xref: /freebsd/sys/net/bpf.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_bpf.h"
41 #include "opt_mac.h"
42 #include "opt_netgraph.h"
43 
44 #include <sys/types.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/conf.h>
48 #include <sys/fcntl.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 
65 #include <sys/socket.h>
66 
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #ifdef BPF_JITTER
70 #include <net/bpf_jitter.h>
71 #endif
72 #include <net/bpfdesc.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 
79 #include <net80211/ieee80211_freebsd.h>
80 
81 #include <security/mac/mac_framework.h>
82 
83 static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
84 
85 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
86 
87 #define PRINET  26			/* interruptible */
88 
89 #define	M_SKIP_BPF	M_SKIP_FIREWALL
90 
91 /*
92  * bpf_iflist is a list of BPF interface structures, each corresponding to a
93  * specific DLT.  The same network interface might have several BPF interface
94  * structures registered by different layers in the stack (i.e., 802.11
95  * frames, ethernet frames, etc).
96  */
97 static LIST_HEAD(, bpf_if)	bpf_iflist;
98 static struct mtx	bpf_mtx;		/* bpf global lock */
99 static int		bpf_bpfd_cnt;
100 
101 static void	bpf_allocbufs(struct bpf_d *);
102 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103 static void	bpf_detachd(struct bpf_d *);
104 static void	bpf_freed(struct bpf_d *);
105 static void	bpf_mcopy(const void *, void *, size_t);
106 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
107 		    struct sockaddr *, int *, struct bpf_insn *);
108 static int	bpf_setif(struct bpf_d *, struct ifreq *);
109 static void	bpf_timed_out(void *);
110 static __inline void
111 		bpf_wakeup(struct bpf_d *);
112 static void	catchpacket(struct bpf_d *, u_char *, u_int,
113 		    u_int, void (*)(const void *, void *, size_t),
114 		    struct timeval *);
115 static void	reset_d(struct bpf_d *);
116 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
117 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
118 static int	bpf_setdlt(struct bpf_d *, u_int);
119 static void	filt_bpfdetach(struct knote *);
120 static int	filt_bpfread(struct knote *, long);
121 static void	bpf_drvinit(void *);
122 static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
123 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
124 
125 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
126 static int bpf_bufsize = 4096;
127 SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
128     &bpf_bufsize, 0, "Default bpf buffer size");
129 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
130 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
131     &bpf_maxbufsize, 0, "Maximum bpf buffer size");
132 static int bpf_maxinsns = BPF_MAXINSNS;
133 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
134     &bpf_maxinsns, 0, "Maximum bpf program instructions");
135 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
136     bpf_stats_sysctl, "bpf statistics portal");
137 
138 static	d_open_t	bpfopen;
139 static	d_close_t	bpfclose;
140 static	d_read_t	bpfread;
141 static	d_write_t	bpfwrite;
142 static	d_ioctl_t	bpfioctl;
143 static	d_poll_t	bpfpoll;
144 static	d_kqfilter_t	bpfkqfilter;
145 
146 static struct cdevsw bpf_cdevsw = {
147 	.d_version =	D_VERSION,
148 	.d_open =	bpfopen,
149 	.d_close =	bpfclose,
150 	.d_read =	bpfread,
151 	.d_write =	bpfwrite,
152 	.d_ioctl =	bpfioctl,
153 	.d_poll =	bpfpoll,
154 	.d_name =	"bpf",
155 	.d_kqfilter =	bpfkqfilter,
156 };
157 
158 static struct filterops bpfread_filtops =
159 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
160 
161 static int
162 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
163     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
164 {
165 	const struct ieee80211_bpf_params *p;
166 	struct ether_header *eh;
167 	struct mbuf *m;
168 	int error;
169 	int len;
170 	int hlen;
171 	int slen;
172 
173 	/*
174 	 * Build a sockaddr based on the data link layer type.
175 	 * We do this at this level because the ethernet header
176 	 * is copied directly into the data field of the sockaddr.
177 	 * In the case of SLIP, there is no header and the packet
178 	 * is forwarded as is.
179 	 * Also, we are careful to leave room at the front of the mbuf
180 	 * for the link level header.
181 	 */
182 	switch (linktype) {
183 
184 	case DLT_SLIP:
185 		sockp->sa_family = AF_INET;
186 		hlen = 0;
187 		break;
188 
189 	case DLT_EN10MB:
190 		sockp->sa_family = AF_UNSPEC;
191 		/* XXX Would MAXLINKHDR be better? */
192 		hlen = ETHER_HDR_LEN;
193 		break;
194 
195 	case DLT_FDDI:
196 		sockp->sa_family = AF_IMPLINK;
197 		hlen = 0;
198 		break;
199 
200 	case DLT_RAW:
201 		sockp->sa_family = AF_UNSPEC;
202 		hlen = 0;
203 		break;
204 
205 	case DLT_NULL:
206 		/*
207 		 * null interface types require a 4 byte pseudo header which
208 		 * corresponds to the address family of the packet.
209 		 */
210 		sockp->sa_family = AF_UNSPEC;
211 		hlen = 4;
212 		break;
213 
214 	case DLT_ATM_RFC1483:
215 		/*
216 		 * en atm driver requires 4-byte atm pseudo header.
217 		 * though it isn't standard, vpi:vci needs to be
218 		 * specified anyway.
219 		 */
220 		sockp->sa_family = AF_UNSPEC;
221 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
222 		break;
223 
224 	case DLT_PPP:
225 		sockp->sa_family = AF_UNSPEC;
226 		hlen = 4;	/* This should match PPP_HDRLEN */
227 		break;
228 
229 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
230 		sockp->sa_family = AF_IEEE80211;
231 		hlen = 0;
232 		break;
233 
234 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
235 		sockp->sa_family = AF_IEEE80211;
236 		sockp->sa_len = 12;	/* XXX != 0 */
237 		hlen = sizeof(struct ieee80211_bpf_params);
238 		break;
239 
240 	default:
241 		return (EIO);
242 	}
243 
244 	len = uio->uio_resid;
245 
246 	if (len - hlen > ifp->if_mtu)
247 		return (EMSGSIZE);
248 
249 	if ((unsigned)len > MCLBYTES)
250 		return (EIO);
251 
252 	if (len > MHLEN) {
253 		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
254 	} else {
255 		MGETHDR(m, M_TRYWAIT, MT_DATA);
256 	}
257 	if (m == NULL)
258 		return (ENOBUFS);
259 	m->m_pkthdr.len = m->m_len = len;
260 	m->m_pkthdr.rcvif = NULL;
261 	*mp = m;
262 
263 	if (m->m_len < hlen) {
264 		error = EPERM;
265 		goto bad;
266 	}
267 
268 	error = uiomove(mtod(m, u_char *), len, uio);
269 	if (error)
270 		goto bad;
271 
272 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
273 	if (slen == 0) {
274 		error = EPERM;
275 		goto bad;
276 	}
277 
278 	/* Check for multicast destination */
279 	switch (linktype) {
280 	case DLT_EN10MB:
281 		eh = mtod(m, struct ether_header *);
282 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
283 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
284 			    ETHER_ADDR_LEN) == 0)
285 				m->m_flags |= M_BCAST;
286 			else
287 				m->m_flags |= M_MCAST;
288 		}
289 		break;
290 	}
291 
292 	/*
293 	 * Make room for link header, and copy it to sockaddr
294 	 */
295 	if (hlen != 0) {
296 		if (sockp->sa_family == AF_IEEE80211) {
297 			/*
298 			 * Collect true length from the parameter header
299 			 * NB: sockp is known to be zero'd so if we do a
300 			 *     short copy unspecified parameters will be
301 			 *     zero.
302 			 * NB: packet may not be aligned after stripping
303 			 *     bpf params
304 			 * XXX check ibp_vers
305 			 */
306 			p = mtod(m, const struct ieee80211_bpf_params *);
307 			hlen = p->ibp_len;
308 			if (hlen > sizeof(sockp->sa_data)) {
309 				error = EINVAL;
310 				goto bad;
311 			}
312 		}
313 		bcopy(m->m_data, sockp->sa_data, hlen);
314 	}
315 	*hdrlen = hlen;
316 
317 	return (0);
318 bad:
319 	m_freem(m);
320 	return (error);
321 }
322 
323 /*
324  * Attach file to the bpf interface, i.e. make d listen on bp.
325  */
326 static void
327 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
328 {
329 	/*
330 	 * Point d at bp, and add d to the interface's list of listeners.
331 	 * Finally, point the driver's bpf cookie at the interface so
332 	 * it will divert packets to bpf.
333 	 */
334 	BPFIF_LOCK(bp);
335 	d->bd_bif = bp;
336 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
337 
338 	bpf_bpfd_cnt++;
339 	BPFIF_UNLOCK(bp);
340 }
341 
342 /*
343  * Detach a file from its interface.
344  */
345 static void
346 bpf_detachd(struct bpf_d *d)
347 {
348 	int error;
349 	struct bpf_if *bp;
350 	struct ifnet *ifp;
351 
352 	bp = d->bd_bif;
353 	BPFIF_LOCK(bp);
354 	BPFD_LOCK(d);
355 	ifp = d->bd_bif->bif_ifp;
356 
357 	/*
358 	 * Remove d from the interface's descriptor list.
359 	 */
360 	LIST_REMOVE(d, bd_next);
361 
362 	bpf_bpfd_cnt--;
363 	d->bd_bif = NULL;
364 	BPFD_UNLOCK(d);
365 	BPFIF_UNLOCK(bp);
366 
367 	/*
368 	 * Check if this descriptor had requested promiscuous mode.
369 	 * If so, turn it off.
370 	 */
371 	if (d->bd_promisc) {
372 		d->bd_promisc = 0;
373 		error = ifpromisc(ifp, 0);
374 		if (error != 0 && error != ENXIO) {
375 			/*
376 			 * ENXIO can happen if a pccard is unplugged
377 			 * Something is really wrong if we were able to put
378 			 * the driver into promiscuous mode, but can't
379 			 * take it out.
380 			 */
381 			if_printf(bp->bif_ifp,
382 				"bpf_detach: ifpromisc failed (%d)\n", error);
383 		}
384 	}
385 }
386 
387 /*
388  * Open ethernet device.  Returns ENXIO for illegal minor device number,
389  * EBUSY if file is open by another process.
390  */
391 /* ARGSUSED */
392 static	int
393 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
394 {
395 	struct bpf_d *d;
396 
397 	mtx_lock(&bpf_mtx);
398 	d = dev->si_drv1;
399 	/*
400 	 * Each minor can be opened by only one process.  If the requested
401 	 * minor is in use, return EBUSY.
402 	 */
403 	if (d != NULL) {
404 		mtx_unlock(&bpf_mtx);
405 		return (EBUSY);
406 	}
407 	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
408 	mtx_unlock(&bpf_mtx);
409 
410 	if ((dev->si_flags & SI_NAMED) == 0)
411 		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
412 		    "bpf%d", dev2unit(dev));
413 	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
414 	dev->si_drv1 = d;
415 	d->bd_bufsize = bpf_bufsize;
416 	d->bd_sig = SIGIO;
417 	d->bd_direction = BPF_D_INOUT;
418 	d->bd_pid = td->td_proc->p_pid;
419 #ifdef MAC
420 	mac_bpfdesc_init(d);
421 	mac_bpfdesc_create(td->td_ucred, d);
422 #endif
423 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
424 	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
425 	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
426 
427 	return (0);
428 }
429 
430 /*
431  * Close the descriptor by detaching it from its interface,
432  * deallocating its buffers, and marking it free.
433  */
434 /* ARGSUSED */
435 static	int
436 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
437 {
438 	struct bpf_d *d = dev->si_drv1;
439 
440 	BPFD_LOCK(d);
441 	if (d->bd_state == BPF_WAITING)
442 		callout_stop(&d->bd_callout);
443 	d->bd_state = BPF_IDLE;
444 	BPFD_UNLOCK(d);
445 	funsetown(&d->bd_sigio);
446 	mtx_lock(&bpf_mtx);
447 	if (d->bd_bif)
448 		bpf_detachd(d);
449 	mtx_unlock(&bpf_mtx);
450 	selwakeuppri(&d->bd_sel, PRINET);
451 #ifdef MAC
452 	mac_bpfdesc_destroy(d);
453 #endif /* MAC */
454 	knlist_destroy(&d->bd_sel.si_note);
455 	bpf_freed(d);
456 	dev->si_drv1 = NULL;
457 	free(d, M_BPF);
458 
459 	return (0);
460 }
461 
462 
463 /*
464  * Rotate the packet buffers in descriptor d.  Move the store buffer
465  * into the hold slot, and the free buffer into the store slot.
466  * Zero the length of the new store buffer.
467  */
468 #define ROTATE_BUFFERS(d) \
469 	(d)->bd_hbuf = (d)->bd_sbuf; \
470 	(d)->bd_hlen = (d)->bd_slen; \
471 	(d)->bd_sbuf = (d)->bd_fbuf; \
472 	(d)->bd_slen = 0; \
473 	(d)->bd_fbuf = NULL;
474 /*
475  *  bpfread - read next chunk of packets from buffers
476  */
477 static	int
478 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
479 {
480 	struct bpf_d *d = dev->si_drv1;
481 	int timed_out;
482 	int error;
483 
484 	/*
485 	 * Restrict application to use a buffer the same size as
486 	 * as kernel buffers.
487 	 */
488 	if (uio->uio_resid != d->bd_bufsize)
489 		return (EINVAL);
490 
491 	BPFD_LOCK(d);
492 	d->bd_pid = curthread->td_proc->p_pid;
493 	if (d->bd_state == BPF_WAITING)
494 		callout_stop(&d->bd_callout);
495 	timed_out = (d->bd_state == BPF_TIMED_OUT);
496 	d->bd_state = BPF_IDLE;
497 	/*
498 	 * If the hold buffer is empty, then do a timed sleep, which
499 	 * ends when the timeout expires or when enough packets
500 	 * have arrived to fill the store buffer.
501 	 */
502 	while (d->bd_hbuf == NULL) {
503 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
504 			/*
505 			 * A packet(s) either arrived since the previous
506 			 * read or arrived while we were asleep.
507 			 * Rotate the buffers and return what's here.
508 			 */
509 			ROTATE_BUFFERS(d);
510 			break;
511 		}
512 
513 		/*
514 		 * No data is available, check to see if the bpf device
515 		 * is still pointed at a real interface.  If not, return
516 		 * ENXIO so that the userland process knows to rebind
517 		 * it before using it again.
518 		 */
519 		if (d->bd_bif == NULL) {
520 			BPFD_UNLOCK(d);
521 			return (ENXIO);
522 		}
523 
524 		if (ioflag & O_NONBLOCK) {
525 			BPFD_UNLOCK(d);
526 			return (EWOULDBLOCK);
527 		}
528 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
529 		     "bpf", d->bd_rtout);
530 		if (error == EINTR || error == ERESTART) {
531 			BPFD_UNLOCK(d);
532 			return (error);
533 		}
534 		if (error == EWOULDBLOCK) {
535 			/*
536 			 * On a timeout, return what's in the buffer,
537 			 * which may be nothing.  If there is something
538 			 * in the store buffer, we can rotate the buffers.
539 			 */
540 			if (d->bd_hbuf)
541 				/*
542 				 * We filled up the buffer in between
543 				 * getting the timeout and arriving
544 				 * here, so we don't need to rotate.
545 				 */
546 				break;
547 
548 			if (d->bd_slen == 0) {
549 				BPFD_UNLOCK(d);
550 				return (0);
551 			}
552 			ROTATE_BUFFERS(d);
553 			break;
554 		}
555 	}
556 	/*
557 	 * At this point, we know we have something in the hold slot.
558 	 */
559 	BPFD_UNLOCK(d);
560 
561 	/*
562 	 * Move data from hold buffer into user space.
563 	 * We know the entire buffer is transferred since
564 	 * we checked above that the read buffer is bpf_bufsize bytes.
565 	 */
566 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
567 
568 	BPFD_LOCK(d);
569 	d->bd_fbuf = d->bd_hbuf;
570 	d->bd_hbuf = NULL;
571 	d->bd_hlen = 0;
572 	BPFD_UNLOCK(d);
573 
574 	return (error);
575 }
576 
577 
578 /*
579  * If there are processes sleeping on this descriptor, wake them up.
580  */
581 static __inline void
582 bpf_wakeup(struct bpf_d *d)
583 {
584 
585 	BPFD_LOCK_ASSERT(d);
586 	if (d->bd_state == BPF_WAITING) {
587 		callout_stop(&d->bd_callout);
588 		d->bd_state = BPF_IDLE;
589 	}
590 	wakeup(d);
591 	if (d->bd_async && d->bd_sig && d->bd_sigio)
592 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
593 
594 	selwakeuppri(&d->bd_sel, PRINET);
595 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
596 }
597 
598 static void
599 bpf_timed_out(void *arg)
600 {
601 	struct bpf_d *d = (struct bpf_d *)arg;
602 
603 	BPFD_LOCK(d);
604 	if (d->bd_state == BPF_WAITING) {
605 		d->bd_state = BPF_TIMED_OUT;
606 		if (d->bd_slen != 0)
607 			bpf_wakeup(d);
608 	}
609 	BPFD_UNLOCK(d);
610 }
611 
612 static int
613 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
614 {
615 	struct bpf_d *d = dev->si_drv1;
616 	struct ifnet *ifp;
617 	struct mbuf *m, *mc;
618 	struct sockaddr dst;
619 	int error, hlen;
620 
621 	d->bd_pid = curthread->td_proc->p_pid;
622 	if (d->bd_bif == NULL)
623 		return (ENXIO);
624 
625 	ifp = d->bd_bif->bif_ifp;
626 
627 	if ((ifp->if_flags & IFF_UP) == 0)
628 		return (ENETDOWN);
629 
630 	if (uio->uio_resid == 0)
631 		return (0);
632 
633 	bzero(&dst, sizeof(dst));
634 	m = NULL;
635 	hlen = 0;
636 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
637 	    &m, &dst, &hlen, d->bd_wfilter);
638 	if (error)
639 		return (error);
640 
641 	if (d->bd_hdrcmplt)
642 		dst.sa_family = pseudo_AF_HDRCMPLT;
643 
644 	if (d->bd_feedback) {
645 		mc = m_dup(m, M_DONTWAIT);
646 		if (mc != NULL)
647 			mc->m_pkthdr.rcvif = ifp;
648 		/* XXX Do not return the same packet twice. */
649 		if (d->bd_direction == BPF_D_INOUT)
650 			m->m_flags |= M_SKIP_BPF;
651 	} else
652 		mc = NULL;
653 
654 	m->m_pkthdr.len -= hlen;
655 	m->m_len -= hlen;
656 	m->m_data += hlen;	/* XXX */
657 
658 #ifdef MAC
659 	BPFD_LOCK(d);
660 	mac_bpfdesc_create_mbuf(d, m);
661 	if (mc != NULL)
662 		mac_bpfdesc_create_mbuf(d, mc);
663 	BPFD_UNLOCK(d);
664 #endif
665 
666 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
667 
668 	if (mc != NULL) {
669 		if (error == 0)
670 			(*ifp->if_input)(ifp, mc);
671 		else
672 			m_freem(mc);
673 	}
674 
675 	return (error);
676 }
677 
678 /*
679  * Reset a descriptor by flushing its packet buffer and clearing the
680  * receive and drop counts.
681  */
682 static void
683 reset_d(struct bpf_d *d)
684 {
685 
686 	mtx_assert(&d->bd_mtx, MA_OWNED);
687 	if (d->bd_hbuf) {
688 		/* Free the hold buffer. */
689 		d->bd_fbuf = d->bd_hbuf;
690 		d->bd_hbuf = NULL;
691 	}
692 	d->bd_slen = 0;
693 	d->bd_hlen = 0;
694 	d->bd_rcount = 0;
695 	d->bd_dcount = 0;
696 	d->bd_fcount = 0;
697 }
698 
699 /*
700  *  FIONREAD		Check for read packet available.
701  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
702  *  BIOCGBLEN		Get buffer len [for read()].
703  *  BIOCSETF		Set ethernet read filter.
704  *  BIOCSETWF		Set ethernet write filter.
705  *  BIOCFLUSH		Flush read packet buffer.
706  *  BIOCPROMISC		Put interface into promiscuous mode.
707  *  BIOCGDLT		Get link layer type.
708  *  BIOCGETIF		Get interface name.
709  *  BIOCSETIF		Set interface.
710  *  BIOCSRTIMEOUT	Set read timeout.
711  *  BIOCGRTIMEOUT	Get read timeout.
712  *  BIOCGSTATS		Get packet stats.
713  *  BIOCIMMEDIATE	Set immediate mode.
714  *  BIOCVERSION		Get filter language version.
715  *  BIOCGHDRCMPLT	Get "header already complete" flag
716  *  BIOCSHDRCMPLT	Set "header already complete" flag
717  *  BIOCGDIRECTION	Get packet direction flag
718  *  BIOCSDIRECTION	Set packet direction flag
719  *  BIOCLOCK		Set "locked" flag
720  *  BIOCFEEDBACK	Set packet feedback mode.
721  */
722 /* ARGSUSED */
723 static	int
724 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
725     struct thread *td)
726 {
727 	struct bpf_d *d = dev->si_drv1;
728 	int error = 0;
729 
730 	/*
731 	 * Refresh PID associated with this descriptor.
732 	 */
733 	BPFD_LOCK(d);
734 	d->bd_pid = td->td_proc->p_pid;
735 	if (d->bd_state == BPF_WAITING)
736 		callout_stop(&d->bd_callout);
737 	d->bd_state = BPF_IDLE;
738 	BPFD_UNLOCK(d);
739 
740 	if (d->bd_locked == 1) {
741 		switch (cmd) {
742 		case BIOCGBLEN:
743 		case BIOCFLUSH:
744 		case BIOCGDLT:
745 		case BIOCGDLTLIST:
746 		case BIOCGETIF:
747 		case BIOCGRTIMEOUT:
748 		case BIOCGSTATS:
749 		case BIOCVERSION:
750 		case BIOCGRSIG:
751 		case BIOCGHDRCMPLT:
752 		case BIOCFEEDBACK:
753 		case FIONREAD:
754 		case BIOCLOCK:
755 		case BIOCSRTIMEOUT:
756 		case BIOCIMMEDIATE:
757 		case TIOCGPGRP:
758 			break;
759 		default:
760 			return (EPERM);
761 		}
762 	}
763 	switch (cmd) {
764 
765 	default:
766 		error = EINVAL;
767 		break;
768 
769 	/*
770 	 * Check for read packet available.
771 	 */
772 	case FIONREAD:
773 		{
774 			int n;
775 
776 			BPFD_LOCK(d);
777 			n = d->bd_slen;
778 			if (d->bd_hbuf)
779 				n += d->bd_hlen;
780 			BPFD_UNLOCK(d);
781 
782 			*(int *)addr = n;
783 			break;
784 		}
785 
786 	case SIOCGIFADDR:
787 		{
788 			struct ifnet *ifp;
789 
790 			if (d->bd_bif == NULL)
791 				error = EINVAL;
792 			else {
793 				ifp = d->bd_bif->bif_ifp;
794 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
795 			}
796 			break;
797 		}
798 
799 	/*
800 	 * Get buffer len [for read()].
801 	 */
802 	case BIOCGBLEN:
803 		*(u_int *)addr = d->bd_bufsize;
804 		break;
805 
806 	/*
807 	 * Set buffer length.
808 	 */
809 	case BIOCSBLEN:
810 		if (d->bd_bif != NULL)
811 			error = EINVAL;
812 		else {
813 			u_int size = *(u_int *)addr;
814 
815 			if (size > bpf_maxbufsize)
816 				*(u_int *)addr = size = bpf_maxbufsize;
817 			else if (size < BPF_MINBUFSIZE)
818 				*(u_int *)addr = size = BPF_MINBUFSIZE;
819 			d->bd_bufsize = size;
820 		}
821 		break;
822 
823 	/*
824 	 * Set link layer read filter.
825 	 */
826 	case BIOCSETF:
827 	case BIOCSETWF:
828 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
829 		break;
830 
831 	/*
832 	 * Flush read packet buffer.
833 	 */
834 	case BIOCFLUSH:
835 		BPFD_LOCK(d);
836 		reset_d(d);
837 		BPFD_UNLOCK(d);
838 		break;
839 
840 	/*
841 	 * Put interface into promiscuous mode.
842 	 */
843 	case BIOCPROMISC:
844 		if (d->bd_bif == NULL) {
845 			/*
846 			 * No interface attached yet.
847 			 */
848 			error = EINVAL;
849 			break;
850 		}
851 		if (d->bd_promisc == 0) {
852 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
853 			if (error == 0)
854 				d->bd_promisc = 1;
855 		}
856 		break;
857 
858 	/*
859 	 * Get current data link type.
860 	 */
861 	case BIOCGDLT:
862 		if (d->bd_bif == NULL)
863 			error = EINVAL;
864 		else
865 			*(u_int *)addr = d->bd_bif->bif_dlt;
866 		break;
867 
868 	/*
869 	 * Get a list of supported data link types.
870 	 */
871 	case BIOCGDLTLIST:
872 		if (d->bd_bif == NULL)
873 			error = EINVAL;
874 		else
875 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
876 		break;
877 
878 	/*
879 	 * Set data link type.
880 	 */
881 	case BIOCSDLT:
882 		if (d->bd_bif == NULL)
883 			error = EINVAL;
884 		else
885 			error = bpf_setdlt(d, *(u_int *)addr);
886 		break;
887 
888 	/*
889 	 * Get interface name.
890 	 */
891 	case BIOCGETIF:
892 		if (d->bd_bif == NULL)
893 			error = EINVAL;
894 		else {
895 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
896 			struct ifreq *const ifr = (struct ifreq *)addr;
897 
898 			strlcpy(ifr->ifr_name, ifp->if_xname,
899 			    sizeof(ifr->ifr_name));
900 		}
901 		break;
902 
903 	/*
904 	 * Set interface.
905 	 */
906 	case BIOCSETIF:
907 		error = bpf_setif(d, (struct ifreq *)addr);
908 		break;
909 
910 	/*
911 	 * Set read timeout.
912 	 */
913 	case BIOCSRTIMEOUT:
914 		{
915 			struct timeval *tv = (struct timeval *)addr;
916 
917 			/*
918 			 * Subtract 1 tick from tvtohz() since this isn't
919 			 * a one-shot timer.
920 			 */
921 			if ((error = itimerfix(tv)) == 0)
922 				d->bd_rtout = tvtohz(tv) - 1;
923 			break;
924 		}
925 
926 	/*
927 	 * Get read timeout.
928 	 */
929 	case BIOCGRTIMEOUT:
930 		{
931 			struct timeval *tv = (struct timeval *)addr;
932 
933 			tv->tv_sec = d->bd_rtout / hz;
934 			tv->tv_usec = (d->bd_rtout % hz) * tick;
935 			break;
936 		}
937 
938 	/*
939 	 * Get packet stats.
940 	 */
941 	case BIOCGSTATS:
942 		{
943 			struct bpf_stat *bs = (struct bpf_stat *)addr;
944 
945 			bs->bs_recv = d->bd_rcount;
946 			bs->bs_drop = d->bd_dcount;
947 			break;
948 		}
949 
950 	/*
951 	 * Set immediate mode.
952 	 */
953 	case BIOCIMMEDIATE:
954 		d->bd_immediate = *(u_int *)addr;
955 		break;
956 
957 	case BIOCVERSION:
958 		{
959 			struct bpf_version *bv = (struct bpf_version *)addr;
960 
961 			bv->bv_major = BPF_MAJOR_VERSION;
962 			bv->bv_minor = BPF_MINOR_VERSION;
963 			break;
964 		}
965 
966 	/*
967 	 * Get "header already complete" flag
968 	 */
969 	case BIOCGHDRCMPLT:
970 		*(u_int *)addr = d->bd_hdrcmplt;
971 		break;
972 
973 	/*
974 	 * Set "header already complete" flag
975 	 */
976 	case BIOCSHDRCMPLT:
977 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
978 		break;
979 
980 	/*
981 	 * Get packet direction flag
982 	 */
983 	case BIOCGDIRECTION:
984 		*(u_int *)addr = d->bd_direction;
985 		break;
986 
987 	/*
988 	 * Set packet direction flag
989 	 */
990 	case BIOCSDIRECTION:
991 		{
992 			u_int	direction;
993 
994 			direction = *(u_int *)addr;
995 			switch (direction) {
996 			case BPF_D_IN:
997 			case BPF_D_INOUT:
998 			case BPF_D_OUT:
999 				d->bd_direction = direction;
1000 				break;
1001 			default:
1002 				error = EINVAL;
1003 			}
1004 		}
1005 		break;
1006 
1007 	case BIOCFEEDBACK:
1008 		d->bd_feedback = *(u_int *)addr;
1009 		break;
1010 
1011 	case BIOCLOCK:
1012 		d->bd_locked = 1;
1013 		break;
1014 
1015 	case FIONBIO:		/* Non-blocking I/O */
1016 		break;
1017 
1018 	case FIOASYNC:		/* Send signal on receive packets */
1019 		d->bd_async = *(int *)addr;
1020 		break;
1021 
1022 	case FIOSETOWN:
1023 		error = fsetown(*(int *)addr, &d->bd_sigio);
1024 		break;
1025 
1026 	case FIOGETOWN:
1027 		*(int *)addr = fgetown(&d->bd_sigio);
1028 		break;
1029 
1030 	/* This is deprecated, FIOSETOWN should be used instead. */
1031 	case TIOCSPGRP:
1032 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1033 		break;
1034 
1035 	/* This is deprecated, FIOGETOWN should be used instead. */
1036 	case TIOCGPGRP:
1037 		*(int *)addr = -fgetown(&d->bd_sigio);
1038 		break;
1039 
1040 	case BIOCSRSIG:		/* Set receive signal */
1041 		{
1042 			u_int sig;
1043 
1044 			sig = *(u_int *)addr;
1045 
1046 			if (sig >= NSIG)
1047 				error = EINVAL;
1048 			else
1049 				d->bd_sig = sig;
1050 			break;
1051 		}
1052 	case BIOCGRSIG:
1053 		*(u_int *)addr = d->bd_sig;
1054 		break;
1055 	}
1056 	return (error);
1057 }
1058 
1059 /*
1060  * Set d's packet filter program to fp.  If this file already has a filter,
1061  * free it and replace it.  Returns EINVAL for bogus requests.
1062  */
1063 static int
1064 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1065 {
1066 	struct bpf_insn *fcode, *old;
1067 	u_int wfilter, flen, size;
1068 #ifdef BPF_JITTER
1069 	bpf_jit_filter *ofunc;
1070 #endif
1071 
1072 	if (cmd == BIOCSETWF) {
1073 		old = d->bd_wfilter;
1074 		wfilter = 1;
1075 #ifdef BPF_JITTER
1076 		ofunc = NULL;
1077 #endif
1078 	} else {
1079 		wfilter = 0;
1080 		old = d->bd_rfilter;
1081 #ifdef BPF_JITTER
1082 		ofunc = d->bd_bfilter;
1083 #endif
1084 	}
1085 	if (fp->bf_insns == NULL) {
1086 		if (fp->bf_len != 0)
1087 			return (EINVAL);
1088 		BPFD_LOCK(d);
1089 		if (wfilter)
1090 			d->bd_wfilter = NULL;
1091 		else {
1092 			d->bd_rfilter = NULL;
1093 #ifdef BPF_JITTER
1094 			d->bd_bfilter = NULL;
1095 #endif
1096 		}
1097 		reset_d(d);
1098 		BPFD_UNLOCK(d);
1099 		if (old != NULL)
1100 			free((caddr_t)old, M_BPF);
1101 #ifdef BPF_JITTER
1102 		if (ofunc != NULL)
1103 			bpf_destroy_jit_filter(ofunc);
1104 #endif
1105 		return (0);
1106 	}
1107 	flen = fp->bf_len;
1108 	if (flen > bpf_maxinsns)
1109 		return (EINVAL);
1110 
1111 	size = flen * sizeof(*fp->bf_insns);
1112 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1113 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1114 	    bpf_validate(fcode, (int)flen)) {
1115 		BPFD_LOCK(d);
1116 		if (wfilter)
1117 			d->bd_wfilter = fcode;
1118 		else {
1119 			d->bd_rfilter = fcode;
1120 #ifdef BPF_JITTER
1121 			d->bd_bfilter = bpf_jitter(fcode, flen);
1122 #endif
1123 		}
1124 		reset_d(d);
1125 		BPFD_UNLOCK(d);
1126 		if (old != NULL)
1127 			free((caddr_t)old, M_BPF);
1128 #ifdef BPF_JITTER
1129 		if (ofunc != NULL)
1130 			bpf_destroy_jit_filter(ofunc);
1131 #endif
1132 
1133 		return (0);
1134 	}
1135 	free((caddr_t)fcode, M_BPF);
1136 	return (EINVAL);
1137 }
1138 
1139 /*
1140  * Detach a file from its current interface (if attached at all) and attach
1141  * to the interface indicated by the name stored in ifr.
1142  * Return an errno or 0.
1143  */
1144 static int
1145 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1146 {
1147 	struct bpf_if *bp;
1148 	struct ifnet *theywant;
1149 
1150 	theywant = ifunit(ifr->ifr_name);
1151 	if (theywant == NULL || theywant->if_bpf == NULL)
1152 		return (ENXIO);
1153 
1154 	bp = theywant->if_bpf;
1155 	/*
1156 	 * Allocate the packet buffers if we need to.
1157 	 * If we're already attached to requested interface,
1158 	 * just flush the buffer.
1159 	 */
1160 	if (d->bd_sbuf == NULL)
1161 		bpf_allocbufs(d);
1162 	if (bp != d->bd_bif) {
1163 		if (d->bd_bif)
1164 			/*
1165 			 * Detach if attached to something else.
1166 			 */
1167 			bpf_detachd(d);
1168 
1169 		bpf_attachd(d, bp);
1170 	}
1171 	BPFD_LOCK(d);
1172 	reset_d(d);
1173 	BPFD_UNLOCK(d);
1174 	return (0);
1175 }
1176 
1177 /*
1178  * Support for select() and poll() system calls
1179  *
1180  * Return true iff the specific operation will not block indefinitely.
1181  * Otherwise, return false but make a note that a selwakeup() must be done.
1182  */
1183 static int
1184 bpfpoll(struct cdev *dev, int events, struct thread *td)
1185 {
1186 	struct bpf_d *d;
1187 	int revents;
1188 
1189 	d = dev->si_drv1;
1190 	if (d->bd_bif == NULL)
1191 		return (ENXIO);
1192 
1193 	/*
1194 	 * Refresh PID associated with this descriptor.
1195 	 */
1196 	revents = events & (POLLOUT | POLLWRNORM);
1197 	BPFD_LOCK(d);
1198 	d->bd_pid = td->td_proc->p_pid;
1199 	if (events & (POLLIN | POLLRDNORM)) {
1200 		if (bpf_ready(d))
1201 			revents |= events & (POLLIN | POLLRDNORM);
1202 		else {
1203 			selrecord(td, &d->bd_sel);
1204 			/* Start the read timeout if necessary. */
1205 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1206 				callout_reset(&d->bd_callout, d->bd_rtout,
1207 				    bpf_timed_out, d);
1208 				d->bd_state = BPF_WAITING;
1209 			}
1210 		}
1211 	}
1212 	BPFD_UNLOCK(d);
1213 	return (revents);
1214 }
1215 
1216 /*
1217  * Support for kevent() system call.  Register EVFILT_READ filters and
1218  * reject all others.
1219  */
1220 int
1221 bpfkqfilter(struct cdev *dev, struct knote *kn)
1222 {
1223 	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1224 
1225 	if (kn->kn_filter != EVFILT_READ)
1226 		return (1);
1227 
1228 	/*
1229 	 * Refresh PID associated with this descriptor.
1230 	 */
1231 	BPFD_LOCK(d);
1232 	d->bd_pid = curthread->td_proc->p_pid;
1233 	kn->kn_fop = &bpfread_filtops;
1234 	kn->kn_hook = d;
1235 	knlist_add(&d->bd_sel.si_note, kn, 1);
1236 	BPFD_UNLOCK(d);
1237 
1238 	return (0);
1239 }
1240 
1241 static void
1242 filt_bpfdetach(struct knote *kn)
1243 {
1244 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1245 
1246 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1247 }
1248 
1249 static int
1250 filt_bpfread(struct knote *kn, long hint)
1251 {
1252 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1253 	int ready;
1254 
1255 	BPFD_LOCK_ASSERT(d);
1256 	ready = bpf_ready(d);
1257 	if (ready) {
1258 		kn->kn_data = d->bd_slen;
1259 		if (d->bd_hbuf)
1260 			kn->kn_data += d->bd_hlen;
1261 	}
1262 	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1263 		callout_reset(&d->bd_callout, d->bd_rtout,
1264 		    bpf_timed_out, d);
1265 		d->bd_state = BPF_WAITING;
1266 	}
1267 
1268 	return (ready);
1269 }
1270 
1271 /*
1272  * Incoming linkage from device drivers.  Process the packet pkt, of length
1273  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1274  * by each process' filter, and if accepted, stashed into the corresponding
1275  * buffer.
1276  */
1277 void
1278 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1279 {
1280 	struct bpf_d *d;
1281 	u_int slen;
1282 	int gottime;
1283 	struct timeval tv;
1284 
1285 	gottime = 0;
1286 	BPFIF_LOCK(bp);
1287 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1288 		BPFD_LOCK(d);
1289 		++d->bd_rcount;
1290 #ifdef BPF_JITTER
1291 		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1292 			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1293 		else
1294 #endif
1295 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1296 		if (slen != 0) {
1297 			d->bd_fcount++;
1298 			if (!gottime) {
1299 				microtime(&tv);
1300 				gottime = 1;
1301 			}
1302 #ifdef MAC
1303 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1304 #endif
1305 				catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
1306 		}
1307 		BPFD_UNLOCK(d);
1308 	}
1309 	BPFIF_UNLOCK(bp);
1310 }
1311 
1312 /*
1313  * Copy data from an mbuf chain into a buffer.  This code is derived
1314  * from m_copydata in sys/uipc_mbuf.c.
1315  */
1316 static void
1317 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1318 {
1319 	const struct mbuf *m;
1320 	u_int count;
1321 	u_char *dst;
1322 
1323 	m = src_arg;
1324 	dst = dst_arg;
1325 	while (len > 0) {
1326 		if (m == NULL)
1327 			panic("bpf_mcopy");
1328 		count = min(m->m_len, len);
1329 		bcopy(mtod(m, void *), dst, count);
1330 		m = m->m_next;
1331 		dst += count;
1332 		len -= count;
1333 	}
1334 }
1335 
1336 #define	BPF_CHECK_DIRECTION(d, m) \
1337 	if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \
1338 	    ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL))
1339 
1340 /*
1341  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1342  */
1343 void
1344 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1345 {
1346 	struct bpf_d *d;
1347 	u_int pktlen, slen;
1348 	int gottime;
1349 	struct timeval tv;
1350 
1351 	if (m->m_flags & M_SKIP_BPF) {
1352 		m->m_flags &= ~M_SKIP_BPF;
1353 		return;
1354 	}
1355 
1356 	gottime = 0;
1357 
1358 	pktlen = m_length(m, NULL);
1359 
1360 	BPFIF_LOCK(bp);
1361 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1362 		BPF_CHECK_DIRECTION(d, m)
1363 			continue;
1364 		BPFD_LOCK(d);
1365 		++d->bd_rcount;
1366 #ifdef BPF_JITTER
1367 		/* XXX We cannot handle multiple mbufs. */
1368 		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1369 		    m->m_next == NULL)
1370 			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1371 			    pktlen, pktlen);
1372 		else
1373 #endif
1374 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1375 		if (slen != 0) {
1376 			d->bd_fcount++;
1377 			if (!gottime) {
1378 				microtime(&tv);
1379 				gottime = 1;
1380 			}
1381 #ifdef MAC
1382 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1383 #endif
1384 				catchpacket(d, (u_char *)m, pktlen, slen,
1385 				    bpf_mcopy, &tv);
1386 		}
1387 		BPFD_UNLOCK(d);
1388 	}
1389 	BPFIF_UNLOCK(bp);
1390 }
1391 
1392 /*
1393  * Incoming linkage from device drivers, when packet is in
1394  * an mbuf chain and to be prepended by a contiguous header.
1395  */
1396 void
1397 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1398 {
1399 	struct mbuf mb;
1400 	struct bpf_d *d;
1401 	u_int pktlen, slen;
1402 	int gottime;
1403 	struct timeval tv;
1404 
1405 	if (m->m_flags & M_SKIP_BPF) {
1406 		m->m_flags &= ~M_SKIP_BPF;
1407 		return;
1408 	}
1409 
1410 	gottime = 0;
1411 
1412 	pktlen = m_length(m, NULL);
1413 	/*
1414 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1415 	 * Note that we cut corners here; we only setup what's
1416 	 * absolutely needed--this mbuf should never go anywhere else.
1417 	 */
1418 	mb.m_next = m;
1419 	mb.m_data = data;
1420 	mb.m_len = dlen;
1421 	pktlen += dlen;
1422 
1423 	BPFIF_LOCK(bp);
1424 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1425 		BPF_CHECK_DIRECTION(d, m)
1426 			continue;
1427 		BPFD_LOCK(d);
1428 		++d->bd_rcount;
1429 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1430 		if (slen != 0) {
1431 			d->bd_fcount++;
1432 			if (!gottime) {
1433 				microtime(&tv);
1434 				gottime = 1;
1435 			}
1436 #ifdef MAC
1437 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1438 #endif
1439 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1440 				    bpf_mcopy, &tv);
1441 		}
1442 		BPFD_UNLOCK(d);
1443 	}
1444 	BPFIF_UNLOCK(bp);
1445 }
1446 
1447 #undef	BPF_CHECK_DIRECTION
1448 
1449 /*
1450  * Move the packet data from interface memory (pkt) into the
1451  * store buffer.  "cpfn" is the routine called to do the actual data
1452  * transfer.  bcopy is passed in to copy contiguous chunks, while
1453  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1454  * pkt is really an mbuf.
1455  */
1456 static void
1457 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1458     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1459 {
1460 	struct bpf_hdr *hp;
1461 	int totlen, curlen;
1462 	int hdrlen = d->bd_bif->bif_hdrlen;
1463 	int do_wakeup = 0;
1464 
1465 	BPFD_LOCK_ASSERT(d);
1466 	/*
1467 	 * Figure out how many bytes to move.  If the packet is
1468 	 * greater or equal to the snapshot length, transfer that
1469 	 * much.  Otherwise, transfer the whole packet (unless
1470 	 * we hit the buffer size limit).
1471 	 */
1472 	totlen = hdrlen + min(snaplen, pktlen);
1473 	if (totlen > d->bd_bufsize)
1474 		totlen = d->bd_bufsize;
1475 
1476 	/*
1477 	 * Round up the end of the previous packet to the next longword.
1478 	 */
1479 	curlen = BPF_WORDALIGN(d->bd_slen);
1480 	if (curlen + totlen > d->bd_bufsize) {
1481 		/*
1482 		 * This packet will overflow the storage buffer.
1483 		 * Rotate the buffers if we can, then wakeup any
1484 		 * pending reads.
1485 		 */
1486 		if (d->bd_fbuf == NULL) {
1487 			/*
1488 			 * We haven't completed the previous read yet,
1489 			 * so drop the packet.
1490 			 */
1491 			++d->bd_dcount;
1492 			return;
1493 		}
1494 		ROTATE_BUFFERS(d);
1495 		do_wakeup = 1;
1496 		curlen = 0;
1497 	}
1498 	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1499 		/*
1500 		 * Immediate mode is set, or the read timeout has
1501 		 * already expired during a select call.  A packet
1502 		 * arrived, so the reader should be woken up.
1503 		 */
1504 		do_wakeup = 1;
1505 
1506 	/*
1507 	 * Append the bpf header.
1508 	 */
1509 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1510 	hp->bh_tstamp = *tv;
1511 	hp->bh_datalen = pktlen;
1512 	hp->bh_hdrlen = hdrlen;
1513 	/*
1514 	 * Copy the packet data into the store buffer and update its length.
1515 	 */
1516 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1517 	d->bd_slen = curlen + totlen;
1518 
1519 	if (do_wakeup)
1520 		bpf_wakeup(d);
1521 }
1522 
1523 /*
1524  * Initialize all nonzero fields of a descriptor.
1525  */
1526 static void
1527 bpf_allocbufs(struct bpf_d *d)
1528 {
1529 
1530 	KASSERT(d->bd_fbuf == NULL, ("bpf_allocbufs: bd_fbuf != NULL"));
1531 	KASSERT(d->bd_sbuf == NULL, ("bpf_allocbufs: bd_sbuf != NULL"));
1532 	KASSERT(d->bd_hbuf == NULL, ("bpf_allocbufs: bd_hbuf != NULL"));
1533 
1534 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1535 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1536 	d->bd_slen = 0;
1537 	d->bd_hlen = 0;
1538 }
1539 
1540 /*
1541  * Free buffers currently in use by a descriptor.
1542  * Called on close.
1543  */
1544 static void
1545 bpf_freed(struct bpf_d *d)
1546 {
1547 	/*
1548 	 * We don't need to lock out interrupts since this descriptor has
1549 	 * been detached from its interface and it yet hasn't been marked
1550 	 * free.
1551 	 */
1552 	if (d->bd_sbuf != NULL) {
1553 		free(d->bd_sbuf, M_BPF);
1554 		if (d->bd_hbuf != NULL)
1555 			free(d->bd_hbuf, M_BPF);
1556 		if (d->bd_fbuf != NULL)
1557 			free(d->bd_fbuf, M_BPF);
1558 	}
1559 	if (d->bd_rfilter) {
1560 		free((caddr_t)d->bd_rfilter, M_BPF);
1561 #ifdef BPF_JITTER
1562 		bpf_destroy_jit_filter(d->bd_bfilter);
1563 #endif
1564 	}
1565 	if (d->bd_wfilter)
1566 		free((caddr_t)d->bd_wfilter, M_BPF);
1567 	mtx_destroy(&d->bd_mtx);
1568 }
1569 
1570 /*
1571  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1572  * fixed size of the link header (variable length headers not yet supported).
1573  */
1574 void
1575 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1576 {
1577 
1578 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1579 }
1580 
1581 /*
1582  * Attach an interface to bpf.  ifp is a pointer to the structure
1583  * defining the interface to be attached, dlt is the link layer type,
1584  * and hdrlen is the fixed size of the link header (variable length
1585  * headers are not yet supporrted).
1586  */
1587 void
1588 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1589 {
1590 	struct bpf_if *bp;
1591 
1592 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1593 	if (bp == NULL)
1594 		panic("bpfattach");
1595 
1596 	LIST_INIT(&bp->bif_dlist);
1597 	bp->bif_ifp = ifp;
1598 	bp->bif_dlt = dlt;
1599 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1600 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1601 	*driverp = bp;
1602 
1603 	mtx_lock(&bpf_mtx);
1604 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1605 	mtx_unlock(&bpf_mtx);
1606 
1607 	/*
1608 	 * Compute the length of the bpf header.  This is not necessarily
1609 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1610 	 * that the network layer header begins on a longword boundary (for
1611 	 * performance reasons and to alleviate alignment restrictions).
1612 	 */
1613 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1614 
1615 	if (bootverbose)
1616 		if_printf(ifp, "bpf attached\n");
1617 }
1618 
1619 /*
1620  * Detach bpf from an interface.  This involves detaching each descriptor
1621  * associated with the interface, and leaving bd_bif NULL.  Notify each
1622  * descriptor as it's detached so that any sleepers wake up and get
1623  * ENXIO.
1624  */
1625 void
1626 bpfdetach(struct ifnet *ifp)
1627 {
1628 	struct bpf_if	*bp;
1629 	struct bpf_d	*d;
1630 
1631 	/* Locate BPF interface information */
1632 	mtx_lock(&bpf_mtx);
1633 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1634 		if (ifp == bp->bif_ifp)
1635 			break;
1636 	}
1637 
1638 	/* Interface wasn't attached */
1639 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1640 		mtx_unlock(&bpf_mtx);
1641 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1642 		return;
1643 	}
1644 
1645 	LIST_REMOVE(bp, bif_next);
1646 	mtx_unlock(&bpf_mtx);
1647 
1648 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1649 		bpf_detachd(d);
1650 		BPFD_LOCK(d);
1651 		bpf_wakeup(d);
1652 		BPFD_UNLOCK(d);
1653 	}
1654 
1655 	mtx_destroy(&bp->bif_mtx);
1656 	free(bp, M_BPF);
1657 }
1658 
1659 /*
1660  * Get a list of available data link type of the interface.
1661  */
1662 static int
1663 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1664 {
1665 	int n, error;
1666 	struct ifnet *ifp;
1667 	struct bpf_if *bp;
1668 
1669 	ifp = d->bd_bif->bif_ifp;
1670 	n = 0;
1671 	error = 0;
1672 	mtx_lock(&bpf_mtx);
1673 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1674 		if (bp->bif_ifp != ifp)
1675 			continue;
1676 		if (bfl->bfl_list != NULL) {
1677 			if (n >= bfl->bfl_len) {
1678 				mtx_unlock(&bpf_mtx);
1679 				return (ENOMEM);
1680 			}
1681 			error = copyout(&bp->bif_dlt,
1682 			    bfl->bfl_list + n, sizeof(u_int));
1683 		}
1684 		n++;
1685 	}
1686 	mtx_unlock(&bpf_mtx);
1687 	bfl->bfl_len = n;
1688 	return (error);
1689 }
1690 
1691 /*
1692  * Set the data link type of a BPF instance.
1693  */
1694 static int
1695 bpf_setdlt(struct bpf_d *d, u_int dlt)
1696 {
1697 	int error, opromisc;
1698 	struct ifnet *ifp;
1699 	struct bpf_if *bp;
1700 
1701 	if (d->bd_bif->bif_dlt == dlt)
1702 		return (0);
1703 	ifp = d->bd_bif->bif_ifp;
1704 	mtx_lock(&bpf_mtx);
1705 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1706 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1707 			break;
1708 	}
1709 	mtx_unlock(&bpf_mtx);
1710 	if (bp != NULL) {
1711 		opromisc = d->bd_promisc;
1712 		bpf_detachd(d);
1713 		bpf_attachd(d, bp);
1714 		BPFD_LOCK(d);
1715 		reset_d(d);
1716 		BPFD_UNLOCK(d);
1717 		if (opromisc) {
1718 			error = ifpromisc(bp->bif_ifp, 1);
1719 			if (error)
1720 				if_printf(bp->bif_ifp,
1721 					"bpf_setdlt: ifpromisc failed (%d)\n",
1722 					error);
1723 			else
1724 				d->bd_promisc = 1;
1725 		}
1726 	}
1727 	return (bp == NULL ? EINVAL : 0);
1728 }
1729 
1730 static void
1731 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1732     struct cdev **dev)
1733 {
1734 	int u;
1735 
1736 	if (*dev != NULL)
1737 		return;
1738 	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1739 		return;
1740 	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1741 	    "bpf%d", u);
1742 	dev_ref(*dev);
1743 	(*dev)->si_flags |= SI_CHEAPCLONE;
1744 	return;
1745 }
1746 
1747 static void
1748 bpf_drvinit(void *unused)
1749 {
1750 
1751 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1752 	LIST_INIT(&bpf_iflist);
1753 	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1754 }
1755 
1756 static void
1757 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1758 {
1759 
1760 	bzero(d, sizeof(*d));
1761 	BPFD_LOCK_ASSERT(bd);
1762 	d->bd_immediate = bd->bd_immediate;
1763 	d->bd_promisc = bd->bd_promisc;
1764 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1765 	d->bd_direction = bd->bd_direction;
1766 	d->bd_feedback = bd->bd_feedback;
1767 	d->bd_async = bd->bd_async;
1768 	d->bd_rcount = bd->bd_rcount;
1769 	d->bd_dcount = bd->bd_dcount;
1770 	d->bd_fcount = bd->bd_fcount;
1771 	d->bd_sig = bd->bd_sig;
1772 	d->bd_slen = bd->bd_slen;
1773 	d->bd_hlen = bd->bd_hlen;
1774 	d->bd_bufsize = bd->bd_bufsize;
1775 	d->bd_pid = bd->bd_pid;
1776 	strlcpy(d->bd_ifname,
1777 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1778 	d->bd_locked = bd->bd_locked;
1779 }
1780 
1781 static int
1782 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1783 {
1784 	struct xbpf_d *xbdbuf, *xbd;
1785 	int index, error;
1786 	struct bpf_if *bp;
1787 	struct bpf_d *bd;
1788 
1789 	/*
1790 	 * XXX This is not technically correct. It is possible for non
1791 	 * privileged users to open bpf devices. It would make sense
1792 	 * if the users who opened the devices were able to retrieve
1793 	 * the statistics for them, too.
1794 	 */
1795 	error = priv_check(req->td, PRIV_NET_BPF);
1796 	if (error)
1797 		return (error);
1798 	if (req->oldptr == NULL)
1799 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1800 	if (bpf_bpfd_cnt == 0)
1801 		return (SYSCTL_OUT(req, 0, 0));
1802 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1803 	mtx_lock(&bpf_mtx);
1804 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1805 		mtx_unlock(&bpf_mtx);
1806 		free(xbdbuf, M_BPF);
1807 		return (ENOMEM);
1808 	}
1809 	index = 0;
1810 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1811 		BPFIF_LOCK(bp);
1812 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1813 			xbd = &xbdbuf[index++];
1814 			BPFD_LOCK(bd);
1815 			bpfstats_fill_xbpf(xbd, bd);
1816 			BPFD_UNLOCK(bd);
1817 		}
1818 		BPFIF_UNLOCK(bp);
1819 	}
1820 	mtx_unlock(&bpf_mtx);
1821 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1822 	free(xbdbuf, M_BPF);
1823 	return (error);
1824 }
1825 
1826 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1827 
1828 #else /* !DEV_BPF && !NETGRAPH_BPF */
1829 /*
1830  * NOP stubs to allow bpf-using drivers to load and function.
1831  *
1832  * A 'better' implementation would allow the core bpf functionality
1833  * to be loaded at runtime.
1834  */
1835 static struct bpf_if bp_null;
1836 
1837 void
1838 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1839 {
1840 }
1841 
1842 void
1843 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1844 {
1845 }
1846 
1847 void
1848 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
1849 {
1850 }
1851 
1852 void
1853 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1854 {
1855 
1856 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1857 }
1858 
1859 void
1860 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1861 {
1862 
1863 	*driverp = &bp_null;
1864 }
1865 
1866 void
1867 bpfdetach(struct ifnet *ifp)
1868 {
1869 }
1870 
1871 u_int
1872 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1873 {
1874 	return -1;	/* "no filter" behaviour */
1875 }
1876 
1877 int
1878 bpf_validate(const struct bpf_insn *f, int len)
1879 {
1880 	return 0;		/* false */
1881 }
1882 
1883 #endif /* !DEV_BPF && !NETGRAPH_BPF */
1884