xref: /freebsd/sys/net/bpf.c (revision b6de9e91bd2c47efaeec72a08642f8fd99cc7b20)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  *
36  * $FreeBSD$
37  */
38 
39 #include "opt_bpf.h"
40 #include "opt_mac.h"
41 #include "opt_netgraph.h"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/conf.h>
47 #include <sys/fcntl.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/proc.h>
53 #include <sys/signalvar.h>
54 #include <sys/filio.h>
55 #include <sys/sockio.h>
56 #include <sys/ttycom.h>
57 #include <sys/uio.h>
58 
59 #include <sys/event.h>
60 #include <sys/file.h>
61 #include <sys/poll.h>
62 #include <sys/proc.h>
63 
64 #include <sys/socket.h>
65 
66 #include <net/if.h>
67 #include <net/bpf.h>
68 #include <net/bpfdesc.h>
69 
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72 #include <sys/kernel.h>
73 #include <sys/sysctl.h>
74 
75 static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76 
77 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78 
79 #define PRINET  26			/* interruptible */
80 
81 /*
82  * bpf_iflist is a list of BPF interface structures, each corresponding to a
83  * specific DLT.  The same network interface might have several BPF interface
84  * structures registered by different layers in the stack (i.e., 802.11
85  * frames, ethernet frames, etc).
86  */
87 static LIST_HEAD(, bpf_if)	bpf_iflist;
88 static struct mtx	bpf_mtx;		/* bpf global lock */
89 static int		bpf_bpfd_cnt;
90 
91 static int	bpf_allocbufs(struct bpf_d *);
92 static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
93 static void	bpf_detachd(struct bpf_d *d);
94 static void	bpf_freed(struct bpf_d *);
95 static void	bpf_mcopy(const void *, void *, size_t);
96 static int	bpf_movein(struct uio *, int, int,
97 		    struct mbuf **, struct sockaddr *, struct bpf_insn *);
98 static int	bpf_setif(struct bpf_d *, struct ifreq *);
99 static void	bpf_timed_out(void *);
100 static __inline void
101 		bpf_wakeup(struct bpf_d *);
102 static void	catchpacket(struct bpf_d *, u_char *, u_int,
103 		    u_int, void (*)(const void *, void *, size_t));
104 static void	reset_d(struct bpf_d *);
105 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
106 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
107 static int	bpf_setdlt(struct bpf_d *, u_int);
108 static void	filt_bpfdetach(struct knote *);
109 static int	filt_bpfread(struct knote *, long);
110 static void	bpf_drvinit(void *);
111 static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
112 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
113 
114 /*
115  * The default read buffer size is patchable.
116  */
117 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
118 static int bpf_bufsize = 4096;
119 SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
120     &bpf_bufsize, 0, "");
121 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
122 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
123     &bpf_maxbufsize, 0, "");
124 static int bpf_maxinsns = BPF_MAXINSNS;
125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126     &bpf_maxinsns, 0, "Maximum bpf program instructions");
127 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
128     bpf_stats_sysctl, "bpf statistics portal");
129 
130 static	d_open_t	bpfopen;
131 static	d_close_t	bpfclose;
132 static	d_read_t	bpfread;
133 static	d_write_t	bpfwrite;
134 static	d_ioctl_t	bpfioctl;
135 static	d_poll_t	bpfpoll;
136 static	d_kqfilter_t	bpfkqfilter;
137 
138 static struct cdevsw bpf_cdevsw = {
139 	.d_version =	D_VERSION,
140 	.d_flags =	D_NEEDGIANT,
141 	.d_open =	bpfopen,
142 	.d_close =	bpfclose,
143 	.d_read =	bpfread,
144 	.d_write =	bpfwrite,
145 	.d_ioctl =	bpfioctl,
146 	.d_poll =	bpfpoll,
147 	.d_name =	"bpf",
148 	.d_kqfilter =	bpfkqfilter,
149 };
150 
151 static struct filterops bpfread_filtops =
152 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
153 
154 static int
155 bpf_movein(uio, linktype, mtu, mp, sockp, wfilter)
156 	struct uio *uio;
157 	int linktype;
158 	int mtu;
159 	struct mbuf **mp;
160 	struct sockaddr *sockp;
161 	struct bpf_insn *wfilter;
162 {
163 	struct mbuf *m;
164 	int error;
165 	int len;
166 	int hlen;
167 	int slen;
168 
169 	/*
170 	 * Build a sockaddr based on the data link layer type.
171 	 * We do this at this level because the ethernet header
172 	 * is copied directly into the data field of the sockaddr.
173 	 * In the case of SLIP, there is no header and the packet
174 	 * is forwarded as is.
175 	 * Also, we are careful to leave room at the front of the mbuf
176 	 * for the link level header.
177 	 */
178 	switch (linktype) {
179 
180 	case DLT_SLIP:
181 		sockp->sa_family = AF_INET;
182 		hlen = 0;
183 		break;
184 
185 	case DLT_EN10MB:
186 		sockp->sa_family = AF_UNSPEC;
187 		/* XXX Would MAXLINKHDR be better? */
188 		hlen = ETHER_HDR_LEN;
189 		break;
190 
191 	case DLT_FDDI:
192 		sockp->sa_family = AF_IMPLINK;
193 		hlen = 0;
194 		break;
195 
196 	case DLT_RAW:
197 		sockp->sa_family = AF_UNSPEC;
198 		hlen = 0;
199 		break;
200 
201 	case DLT_NULL:
202 		/*
203 		 * null interface types require a 4 byte pseudo header which
204 		 * corresponds to the address family of the packet.
205 		 */
206 		sockp->sa_family = AF_UNSPEC;
207 		hlen = 4;
208 		break;
209 
210 	case DLT_ATM_RFC1483:
211 		/*
212 		 * en atm driver requires 4-byte atm pseudo header.
213 		 * though it isn't standard, vpi:vci needs to be
214 		 * specified anyway.
215 		 */
216 		sockp->sa_family = AF_UNSPEC;
217 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
218 		break;
219 
220 	case DLT_PPP:
221 		sockp->sa_family = AF_UNSPEC;
222 		hlen = 4;	/* This should match PPP_HDRLEN */
223 		break;
224 
225 	default:
226 		return (EIO);
227 	}
228 
229 	len = uio->uio_resid;
230 
231 	if (len - hlen > mtu)
232 		return (EMSGSIZE);
233 
234 	if ((unsigned)len > MCLBYTES)
235 		return (EIO);
236 
237 	if (len > MHLEN) {
238 		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
239 	} else {
240 		MGETHDR(m, M_TRYWAIT, MT_DATA);
241 	}
242 	if (m == NULL)
243 		return (ENOBUFS);
244 	m->m_pkthdr.len = m->m_len = len;
245 	m->m_pkthdr.rcvif = NULL;
246 	*mp = m;
247 
248 	if (m->m_len < hlen) {
249 		error = EPERM;
250 		goto bad;
251 	}
252 
253 	error = uiomove(mtod(m, u_char *), len, uio);
254 	if (error)
255 		goto bad;
256 
257 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
258 	if (slen == 0) {
259 		error = EPERM;
260 		goto bad;
261 	}
262 
263 	/*
264 	 * Make room for link header, and copy it to sockaddr
265 	 */
266 	if (hlen != 0) {
267 		bcopy(m->m_data, sockp->sa_data, hlen);
268 		m->m_pkthdr.len -= hlen;
269 		m->m_len -= hlen;
270 #if BSD >= 199103
271 		m->m_data += hlen; /* XXX */
272 #else
273 		m->m_off += hlen;
274 #endif
275 	}
276 
277 	return (0);
278 bad:
279 	m_freem(m);
280 	return (error);
281 }
282 
283 /*
284  * Attach file to the bpf interface, i.e. make d listen on bp.
285  */
286 static void
287 bpf_attachd(d, bp)
288 	struct bpf_d *d;
289 	struct bpf_if *bp;
290 {
291 	/*
292 	 * Point d at bp, and add d to the interface's list of listeners.
293 	 * Finally, point the driver's bpf cookie at the interface so
294 	 * it will divert packets to bpf.
295 	 */
296 	BPFIF_LOCK(bp);
297 	d->bd_bif = bp;
298 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
299 
300 	bpf_bpfd_cnt++;
301 	*bp->bif_driverp = bp;
302 	BPFIF_UNLOCK(bp);
303 }
304 
305 /*
306  * Detach a file from its interface.
307  */
308 static void
309 bpf_detachd(d)
310 	struct bpf_d *d;
311 {
312 	int error;
313 	struct bpf_if *bp;
314 	struct ifnet *ifp;
315 
316 	bp = d->bd_bif;
317 	BPFIF_LOCK(bp);
318 	BPFD_LOCK(d);
319 	ifp = d->bd_bif->bif_ifp;
320 
321 	/*
322 	 * Remove d from the interface's descriptor list.
323 	 */
324 	LIST_REMOVE(d, bd_next);
325 
326 	bpf_bpfd_cnt--;
327 	/*
328 	 * Let the driver know that there are no more listeners.
329 	 */
330 	if (LIST_EMPTY(&bp->bif_dlist))
331 		*bp->bif_driverp = NULL;
332 
333 	d->bd_bif = NULL;
334 	BPFD_UNLOCK(d);
335 	BPFIF_UNLOCK(bp);
336 
337 	/*
338 	 * Check if this descriptor had requested promiscuous mode.
339 	 * If so, turn it off.
340 	 */
341 	if (d->bd_promisc) {
342 		d->bd_promisc = 0;
343 		error = ifpromisc(ifp, 0);
344 		if (error != 0 && error != ENXIO) {
345 			/*
346 			 * ENXIO can happen if a pccard is unplugged
347 			 * Something is really wrong if we were able to put
348 			 * the driver into promiscuous mode, but can't
349 			 * take it out.
350 			 */
351 			if_printf(bp->bif_ifp,
352 				"bpf_detach: ifpromisc failed (%d)\n", error);
353 		}
354 	}
355 }
356 
357 /*
358  * Open ethernet device.  Returns ENXIO for illegal minor device number,
359  * EBUSY if file is open by another process.
360  */
361 /* ARGSUSED */
362 static	int
363 bpfopen(dev, flags, fmt, td)
364 	struct cdev *dev;
365 	int flags;
366 	int fmt;
367 	struct thread *td;
368 {
369 	struct bpf_d *d;
370 
371 	mtx_lock(&bpf_mtx);
372 	d = dev->si_drv1;
373 	/*
374 	 * Each minor can be opened by only one process.  If the requested
375 	 * minor is in use, return EBUSY.
376 	 */
377 	if (d != NULL) {
378 		mtx_unlock(&bpf_mtx);
379 		return (EBUSY);
380 	}
381 	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
382 	mtx_unlock(&bpf_mtx);
383 
384 	if ((dev->si_flags & SI_NAMED) == 0)
385 		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
386 		    "bpf%d", dev2unit(dev));
387 	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
388 	dev->si_drv1 = d;
389 	d->bd_bufsize = bpf_bufsize;
390 	d->bd_sig = SIGIO;
391 	d->bd_seesent = 1;
392 	d->bd_pid = td->td_proc->p_pid;
393 #ifdef MAC
394 	mac_init_bpfdesc(d);
395 	mac_create_bpfdesc(td->td_ucred, d);
396 #endif
397 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
398 	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
399 	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
400 
401 	return (0);
402 }
403 
404 /*
405  * Close the descriptor by detaching it from its interface,
406  * deallocating its buffers, and marking it free.
407  */
408 /* ARGSUSED */
409 static	int
410 bpfclose(dev, flags, fmt, td)
411 	struct cdev *dev;
412 	int flags;
413 	int fmt;
414 	struct thread *td;
415 {
416 	struct bpf_d *d = dev->si_drv1;
417 
418 	BPFD_LOCK(d);
419 	if (d->bd_state == BPF_WAITING)
420 		callout_stop(&d->bd_callout);
421 	d->bd_state = BPF_IDLE;
422 	BPFD_UNLOCK(d);
423 	funsetown(&d->bd_sigio);
424 	mtx_lock(&bpf_mtx);
425 	if (d->bd_bif)
426 		bpf_detachd(d);
427 	mtx_unlock(&bpf_mtx);
428 	selwakeuppri(&d->bd_sel, PRINET);
429 #ifdef MAC
430 	mac_destroy_bpfdesc(d);
431 #endif /* MAC */
432 	knlist_destroy(&d->bd_sel.si_note);
433 	bpf_freed(d);
434 	dev->si_drv1 = NULL;
435 	free(d, M_BPF);
436 
437 	return (0);
438 }
439 
440 
441 /*
442  * Rotate the packet buffers in descriptor d.  Move the store buffer
443  * into the hold slot, and the free buffer into the store slot.
444  * Zero the length of the new store buffer.
445  */
446 #define ROTATE_BUFFERS(d) \
447 	(d)->bd_hbuf = (d)->bd_sbuf; \
448 	(d)->bd_hlen = (d)->bd_slen; \
449 	(d)->bd_sbuf = (d)->bd_fbuf; \
450 	(d)->bd_slen = 0; \
451 	(d)->bd_fbuf = NULL;
452 /*
453  *  bpfread - read next chunk of packets from buffers
454  */
455 static	int
456 bpfread(dev, uio, ioflag)
457 	struct cdev *dev;
458 	struct uio *uio;
459 	int ioflag;
460 {
461 	struct bpf_d *d = dev->si_drv1;
462 	int timed_out;
463 	int error;
464 
465 	/*
466 	 * Restrict application to use a buffer the same size as
467 	 * as kernel buffers.
468 	 */
469 	if (uio->uio_resid != d->bd_bufsize)
470 		return (EINVAL);
471 
472 	BPFD_LOCK(d);
473 	if (d->bd_state == BPF_WAITING)
474 		callout_stop(&d->bd_callout);
475 	timed_out = (d->bd_state == BPF_TIMED_OUT);
476 	d->bd_state = BPF_IDLE;
477 	/*
478 	 * If the hold buffer is empty, then do a timed sleep, which
479 	 * ends when the timeout expires or when enough packets
480 	 * have arrived to fill the store buffer.
481 	 */
482 	while (d->bd_hbuf == NULL) {
483 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
484 			/*
485 			 * A packet(s) either arrived since the previous
486 			 * read or arrived while we were asleep.
487 			 * Rotate the buffers and return what's here.
488 			 */
489 			ROTATE_BUFFERS(d);
490 			break;
491 		}
492 
493 		/*
494 		 * No data is available, check to see if the bpf device
495 		 * is still pointed at a real interface.  If not, return
496 		 * ENXIO so that the userland process knows to rebind
497 		 * it before using it again.
498 		 */
499 		if (d->bd_bif == NULL) {
500 			BPFD_UNLOCK(d);
501 			return (ENXIO);
502 		}
503 
504 		if (ioflag & O_NONBLOCK) {
505 			BPFD_UNLOCK(d);
506 			return (EWOULDBLOCK);
507 		}
508 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
509 		     "bpf", d->bd_rtout);
510 		if (error == EINTR || error == ERESTART) {
511 			BPFD_UNLOCK(d);
512 			return (error);
513 		}
514 		if (error == EWOULDBLOCK) {
515 			/*
516 			 * On a timeout, return what's in the buffer,
517 			 * which may be nothing.  If there is something
518 			 * in the store buffer, we can rotate the buffers.
519 			 */
520 			if (d->bd_hbuf)
521 				/*
522 				 * We filled up the buffer in between
523 				 * getting the timeout and arriving
524 				 * here, so we don't need to rotate.
525 				 */
526 				break;
527 
528 			if (d->bd_slen == 0) {
529 				BPFD_UNLOCK(d);
530 				return (0);
531 			}
532 			ROTATE_BUFFERS(d);
533 			break;
534 		}
535 	}
536 	/*
537 	 * At this point, we know we have something in the hold slot.
538 	 */
539 	BPFD_UNLOCK(d);
540 
541 	/*
542 	 * Move data from hold buffer into user space.
543 	 * We know the entire buffer is transferred since
544 	 * we checked above that the read buffer is bpf_bufsize bytes.
545 	 */
546 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
547 
548 	BPFD_LOCK(d);
549 	d->bd_fbuf = d->bd_hbuf;
550 	d->bd_hbuf = NULL;
551 	d->bd_hlen = 0;
552 	BPFD_UNLOCK(d);
553 
554 	return (error);
555 }
556 
557 
558 /*
559  * If there are processes sleeping on this descriptor, wake them up.
560  */
561 static __inline void
562 bpf_wakeup(d)
563 	struct bpf_d *d;
564 {
565 
566 	BPFD_LOCK_ASSERT(d);
567 	if (d->bd_state == BPF_WAITING) {
568 		callout_stop(&d->bd_callout);
569 		d->bd_state = BPF_IDLE;
570 	}
571 	wakeup(d);
572 	if (d->bd_async && d->bd_sig && d->bd_sigio)
573 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
574 
575 	selwakeuppri(&d->bd_sel, PRINET);
576 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
577 }
578 
579 static void
580 bpf_timed_out(arg)
581 	void *arg;
582 {
583 	struct bpf_d *d = (struct bpf_d *)arg;
584 
585 	BPFD_LOCK(d);
586 	if (d->bd_state == BPF_WAITING) {
587 		d->bd_state = BPF_TIMED_OUT;
588 		if (d->bd_slen != 0)
589 			bpf_wakeup(d);
590 	}
591 	BPFD_UNLOCK(d);
592 }
593 
594 static	int
595 bpfwrite(dev, uio, ioflag)
596 	struct cdev *dev;
597 	struct uio *uio;
598 	int ioflag;
599 {
600 	struct bpf_d *d = dev->si_drv1;
601 	struct ifnet *ifp;
602 	struct mbuf *m;
603 	int error;
604 	struct sockaddr dst;
605 
606 	if (d->bd_bif == NULL)
607 		return (ENXIO);
608 
609 	ifp = d->bd_bif->bif_ifp;
610 
611 	if ((ifp->if_flags & IFF_UP) == 0)
612 		return (ENETDOWN);
613 
614 	if (uio->uio_resid == 0)
615 		return (0);
616 
617 	bzero(&dst, sizeof(dst));
618 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu,
619 	    &m, &dst, d->bd_wfilter);
620 	if (error)
621 		return (error);
622 
623 	if (d->bd_hdrcmplt)
624 		dst.sa_family = pseudo_AF_HDRCMPLT;
625 
626 #ifdef MAC
627 	BPFD_LOCK(d);
628 	mac_create_mbuf_from_bpfdesc(d, m);
629 	BPFD_UNLOCK(d);
630 #endif
631 	NET_LOCK_GIANT();
632 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
633 	NET_UNLOCK_GIANT();
634 	/*
635 	 * The driver frees the mbuf.
636 	 */
637 	return (error);
638 }
639 
640 /*
641  * Reset a descriptor by flushing its packet buffer and clearing the
642  * receive and drop counts.
643  */
644 static void
645 reset_d(d)
646 	struct bpf_d *d;
647 {
648 
649 	mtx_assert(&d->bd_mtx, MA_OWNED);
650 	if (d->bd_hbuf) {
651 		/* Free the hold buffer. */
652 		d->bd_fbuf = d->bd_hbuf;
653 		d->bd_hbuf = NULL;
654 	}
655 	d->bd_slen = 0;
656 	d->bd_hlen = 0;
657 	d->bd_rcount = 0;
658 	d->bd_dcount = 0;
659 	d->bd_fcount = 0;
660 }
661 
662 /*
663  *  FIONREAD		Check for read packet available.
664  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
665  *  BIOCGBLEN		Get buffer len [for read()].
666  *  BIOCSETF		Set ethernet read filter.
667  *  BIOCSETWF		Set ethernet write filter.
668  *  BIOCFLUSH		Flush read packet buffer.
669  *  BIOCPROMISC		Put interface into promiscuous mode.
670  *  BIOCGDLT		Get link layer type.
671  *  BIOCGETIF		Get interface name.
672  *  BIOCSETIF		Set interface.
673  *  BIOCSRTIMEOUT	Set read timeout.
674  *  BIOCGRTIMEOUT	Get read timeout.
675  *  BIOCGSTATS		Get packet stats.
676  *  BIOCIMMEDIATE	Set immediate mode.
677  *  BIOCVERSION		Get filter language version.
678  *  BIOCGHDRCMPLT	Get "header already complete" flag
679  *  BIOCSHDRCMPLT	Set "header already complete" flag
680  *  BIOCGSEESENT	Get "see packets sent" flag
681  *  BIOCSSEESENT	Set "see packets sent" flag
682  *  BIOCLOCK		Set "locked" flag
683  */
684 /* ARGSUSED */
685 static	int
686 bpfioctl(dev, cmd, addr, flags, td)
687 	struct cdev *dev;
688 	u_long cmd;
689 	caddr_t addr;
690 	int flags;
691 	struct thread *td;
692 {
693 	struct bpf_d *d = dev->si_drv1;
694 	int error = 0;
695 
696 	/*
697 	 * Refresh PID associated with this descriptor.
698 	 */
699 	d->bd_pid = td->td_proc->p_pid;
700 	BPFD_LOCK(d);
701 	if (d->bd_state == BPF_WAITING)
702 		callout_stop(&d->bd_callout);
703 	d->bd_state = BPF_IDLE;
704 	BPFD_UNLOCK(d);
705 
706 	if (d->bd_locked == 1) {
707 		switch (cmd) {
708 		case BIOCGBLEN:
709 		case BIOCFLUSH:
710 		case BIOCGDLT:
711 		case BIOCGDLTLIST:
712 		case BIOCGETIF:
713 		case BIOCGRTIMEOUT:
714 		case BIOCGSTATS:
715 		case BIOCVERSION:
716 		case BIOCGRSIG:
717 		case BIOCGHDRCMPLT:
718 		case FIONREAD:
719 		case BIOCLOCK:
720 		case BIOCSRTIMEOUT:
721 		case BIOCIMMEDIATE:
722 		case TIOCGPGRP:
723 			break;
724 		default:
725 			return (EPERM);
726 		}
727 	}
728 	switch (cmd) {
729 
730 	default:
731 		error = EINVAL;
732 		break;
733 
734 	/*
735 	 * Check for read packet available.
736 	 */
737 	case FIONREAD:
738 		{
739 			int n;
740 
741 			BPFD_LOCK(d);
742 			n = d->bd_slen;
743 			if (d->bd_hbuf)
744 				n += d->bd_hlen;
745 			BPFD_UNLOCK(d);
746 
747 			*(int *)addr = n;
748 			break;
749 		}
750 
751 	case SIOCGIFADDR:
752 		{
753 			struct ifnet *ifp;
754 
755 			if (d->bd_bif == NULL)
756 				error = EINVAL;
757 			else {
758 				ifp = d->bd_bif->bif_ifp;
759 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
760 			}
761 			break;
762 		}
763 
764 	/*
765 	 * Get buffer len [for read()].
766 	 */
767 	case BIOCGBLEN:
768 		*(u_int *)addr = d->bd_bufsize;
769 		break;
770 
771 	/*
772 	 * Set buffer length.
773 	 */
774 	case BIOCSBLEN:
775 		if (d->bd_bif != NULL)
776 			error = EINVAL;
777 		else {
778 			u_int size = *(u_int *)addr;
779 
780 			if (size > bpf_maxbufsize)
781 				*(u_int *)addr = size = bpf_maxbufsize;
782 			else if (size < BPF_MINBUFSIZE)
783 				*(u_int *)addr = size = BPF_MINBUFSIZE;
784 			d->bd_bufsize = size;
785 		}
786 		break;
787 
788 	/*
789 	 * Set link layer read filter.
790 	 */
791 	case BIOCSETF:
792 	case BIOCSETWF:
793 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
794 		break;
795 
796 	/*
797 	 * Flush read packet buffer.
798 	 */
799 	case BIOCFLUSH:
800 		BPFD_LOCK(d);
801 		reset_d(d);
802 		BPFD_UNLOCK(d);
803 		break;
804 
805 	/*
806 	 * Put interface into promiscuous mode.
807 	 */
808 	case BIOCPROMISC:
809 		if (d->bd_bif == NULL) {
810 			/*
811 			 * No interface attached yet.
812 			 */
813 			error = EINVAL;
814 			break;
815 		}
816 		if (d->bd_promisc == 0) {
817 			mtx_lock(&Giant);
818 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
819 			mtx_unlock(&Giant);
820 			if (error == 0)
821 				d->bd_promisc = 1;
822 		}
823 		break;
824 
825 	/*
826 	 * Get current data link type.
827 	 */
828 	case BIOCGDLT:
829 		if (d->bd_bif == NULL)
830 			error = EINVAL;
831 		else
832 			*(u_int *)addr = d->bd_bif->bif_dlt;
833 		break;
834 
835 	/*
836 	 * Get a list of supported data link types.
837 	 */
838 	case BIOCGDLTLIST:
839 		if (d->bd_bif == NULL)
840 			error = EINVAL;
841 		else
842 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
843 		break;
844 
845 	/*
846 	 * Set data link type.
847 	 */
848 	case BIOCSDLT:
849 		if (d->bd_bif == NULL)
850 			error = EINVAL;
851 		else
852 			error = bpf_setdlt(d, *(u_int *)addr);
853 		break;
854 
855 	/*
856 	 * Get interface name.
857 	 */
858 	case BIOCGETIF:
859 		if (d->bd_bif == NULL)
860 			error = EINVAL;
861 		else {
862 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
863 			struct ifreq *const ifr = (struct ifreq *)addr;
864 
865 			strlcpy(ifr->ifr_name, ifp->if_xname,
866 			    sizeof(ifr->ifr_name));
867 		}
868 		break;
869 
870 	/*
871 	 * Set interface.
872 	 */
873 	case BIOCSETIF:
874 		error = bpf_setif(d, (struct ifreq *)addr);
875 		break;
876 
877 	/*
878 	 * Set read timeout.
879 	 */
880 	case BIOCSRTIMEOUT:
881 		{
882 			struct timeval *tv = (struct timeval *)addr;
883 
884 			/*
885 			 * Subtract 1 tick from tvtohz() since this isn't
886 			 * a one-shot timer.
887 			 */
888 			if ((error = itimerfix(tv)) == 0)
889 				d->bd_rtout = tvtohz(tv) - 1;
890 			break;
891 		}
892 
893 	/*
894 	 * Get read timeout.
895 	 */
896 	case BIOCGRTIMEOUT:
897 		{
898 			struct timeval *tv = (struct timeval *)addr;
899 
900 			tv->tv_sec = d->bd_rtout / hz;
901 			tv->tv_usec = (d->bd_rtout % hz) * tick;
902 			break;
903 		}
904 
905 	/*
906 	 * Get packet stats.
907 	 */
908 	case BIOCGSTATS:
909 		{
910 			struct bpf_stat *bs = (struct bpf_stat *)addr;
911 
912 			bs->bs_recv = d->bd_rcount;
913 			bs->bs_drop = d->bd_dcount;
914 			break;
915 		}
916 
917 	/*
918 	 * Set immediate mode.
919 	 */
920 	case BIOCIMMEDIATE:
921 		d->bd_immediate = *(u_int *)addr;
922 		break;
923 
924 	case BIOCVERSION:
925 		{
926 			struct bpf_version *bv = (struct bpf_version *)addr;
927 
928 			bv->bv_major = BPF_MAJOR_VERSION;
929 			bv->bv_minor = BPF_MINOR_VERSION;
930 			break;
931 		}
932 
933 	/*
934 	 * Get "header already complete" flag
935 	 */
936 	case BIOCGHDRCMPLT:
937 		*(u_int *)addr = d->bd_hdrcmplt;
938 		break;
939 
940 	case BIOCLOCK:
941 		d->bd_locked = 1;
942 		break;
943 	/*
944 	 * Set "header already complete" flag
945 	 */
946 	case BIOCSHDRCMPLT:
947 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
948 		break;
949 
950 	/*
951 	 * Get "see sent packets" flag
952 	 */
953 	case BIOCGSEESENT:
954 		*(u_int *)addr = d->bd_seesent;
955 		break;
956 
957 	/*
958 	 * Set "see sent packets" flag
959 	 */
960 	case BIOCSSEESENT:
961 		d->bd_seesent = *(u_int *)addr;
962 		break;
963 
964 	case FIONBIO:		/* Non-blocking I/O */
965 		break;
966 
967 	case FIOASYNC:		/* Send signal on receive packets */
968 		d->bd_async = *(int *)addr;
969 		break;
970 
971 	case FIOSETOWN:
972 		error = fsetown(*(int *)addr, &d->bd_sigio);
973 		break;
974 
975 	case FIOGETOWN:
976 		*(int *)addr = fgetown(&d->bd_sigio);
977 		break;
978 
979 	/* This is deprecated, FIOSETOWN should be used instead. */
980 	case TIOCSPGRP:
981 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
982 		break;
983 
984 	/* This is deprecated, FIOGETOWN should be used instead. */
985 	case TIOCGPGRP:
986 		*(int *)addr = -fgetown(&d->bd_sigio);
987 		break;
988 
989 	case BIOCSRSIG:		/* Set receive signal */
990 		{
991 			u_int sig;
992 
993 			sig = *(u_int *)addr;
994 
995 			if (sig >= NSIG)
996 				error = EINVAL;
997 			else
998 				d->bd_sig = sig;
999 			break;
1000 		}
1001 	case BIOCGRSIG:
1002 		*(u_int *)addr = d->bd_sig;
1003 		break;
1004 	}
1005 	return (error);
1006 }
1007 
1008 /*
1009  * Set d's packet filter program to fp.  If this file already has a filter,
1010  * free it and replace it.  Returns EINVAL for bogus requests.
1011  */
1012 static int
1013 bpf_setf(d, fp, cmd)
1014 	struct bpf_d *d;
1015 	struct bpf_program *fp;
1016 	u_long cmd;
1017 {
1018 	struct bpf_insn *fcode, *old;
1019 	u_int wfilter, flen, size;
1020 
1021 	if (cmd == BIOCSETWF) {
1022 		old = d->bd_wfilter;
1023 		wfilter = 1;
1024 	} else {
1025 		wfilter = 0;
1026 		old = d->bd_rfilter;
1027 	}
1028 	if (fp->bf_insns == NULL) {
1029 		if (fp->bf_len != 0)
1030 			return (EINVAL);
1031 		BPFD_LOCK(d);
1032 		if (wfilter)
1033 			d->bd_wfilter = NULL;
1034 		else
1035 			d->bd_rfilter = NULL;
1036 		reset_d(d);
1037 		BPFD_UNLOCK(d);
1038 		if (old != NULL)
1039 			free((caddr_t)old, M_BPF);
1040 		return (0);
1041 	}
1042 	flen = fp->bf_len;
1043 	if (flen > bpf_maxinsns)
1044 		return (EINVAL);
1045 
1046 	size = flen * sizeof(*fp->bf_insns);
1047 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1048 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1049 	    bpf_validate(fcode, (int)flen)) {
1050 		BPFD_LOCK(d);
1051 		if (wfilter)
1052 			d->bd_wfilter = fcode;
1053 		else
1054 			d->bd_rfilter = fcode;
1055 		reset_d(d);
1056 		BPFD_UNLOCK(d);
1057 		if (old != NULL)
1058 			free((caddr_t)old, M_BPF);
1059 
1060 		return (0);
1061 	}
1062 	free((caddr_t)fcode, M_BPF);
1063 	return (EINVAL);
1064 }
1065 
1066 /*
1067  * Detach a file from its current interface (if attached at all) and attach
1068  * to the interface indicated by the name stored in ifr.
1069  * Return an errno or 0.
1070  */
1071 static int
1072 bpf_setif(d, ifr)
1073 	struct bpf_d *d;
1074 	struct ifreq *ifr;
1075 {
1076 	struct bpf_if *bp;
1077 	int error;
1078 	struct ifnet *theywant;
1079 
1080 	theywant = ifunit(ifr->ifr_name);
1081 	if (theywant == NULL)
1082 		return ENXIO;
1083 
1084 	/*
1085 	 * Look through attached interfaces for the named one.
1086 	 */
1087 	mtx_lock(&bpf_mtx);
1088 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1089 		struct ifnet *ifp = bp->bif_ifp;
1090 
1091 		if (ifp == NULL || ifp != theywant)
1092 			continue;
1093 		/* skip additional entry */
1094 		if (bp->bif_driverp != &ifp->if_bpf)
1095 			continue;
1096 
1097 		mtx_unlock(&bpf_mtx);
1098 		/*
1099 		 * We found the requested interface.
1100 		 * Allocate the packet buffers if we need to.
1101 		 * If we're already attached to requested interface,
1102 		 * just flush the buffer.
1103 		 */
1104 		if (d->bd_sbuf == NULL) {
1105 			error = bpf_allocbufs(d);
1106 			if (error != 0)
1107 				return (error);
1108 		}
1109 		if (bp != d->bd_bif) {
1110 			if (d->bd_bif)
1111 				/*
1112 				 * Detach if attached to something else.
1113 				 */
1114 				bpf_detachd(d);
1115 
1116 			bpf_attachd(d, bp);
1117 		}
1118 		BPFD_LOCK(d);
1119 		reset_d(d);
1120 		BPFD_UNLOCK(d);
1121 		return (0);
1122 	}
1123 	mtx_unlock(&bpf_mtx);
1124 	/* Not found. */
1125 	return (ENXIO);
1126 }
1127 
1128 /*
1129  * Support for select() and poll() system calls
1130  *
1131  * Return true iff the specific operation will not block indefinitely.
1132  * Otherwise, return false but make a note that a selwakeup() must be done.
1133  */
1134 static int
1135 bpfpoll(dev, events, td)
1136 	struct cdev *dev;
1137 	int events;
1138 	struct thread *td;
1139 {
1140 	struct bpf_d *d;
1141 	int revents;
1142 
1143 	d = dev->si_drv1;
1144 	if (d->bd_bif == NULL)
1145 		return (ENXIO);
1146 
1147 	/*
1148 	 * Refresh PID associated with this descriptor.
1149 	 */
1150 	d->bd_pid = td->td_proc->p_pid;
1151 	revents = events & (POLLOUT | POLLWRNORM);
1152 	BPFD_LOCK(d);
1153 	if (events & (POLLIN | POLLRDNORM)) {
1154 		if (bpf_ready(d))
1155 			revents |= events & (POLLIN | POLLRDNORM);
1156 		else {
1157 			selrecord(td, &d->bd_sel);
1158 			/* Start the read timeout if necessary. */
1159 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1160 				callout_reset(&d->bd_callout, d->bd_rtout,
1161 				    bpf_timed_out, d);
1162 				d->bd_state = BPF_WAITING;
1163 			}
1164 		}
1165 	}
1166 	BPFD_UNLOCK(d);
1167 	return (revents);
1168 }
1169 
1170 /*
1171  * Support for kevent() system call.  Register EVFILT_READ filters and
1172  * reject all others.
1173  */
1174 int
1175 bpfkqfilter(dev, kn)
1176 	struct cdev *dev;
1177 	struct knote *kn;
1178 {
1179 	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1180 
1181 	if (kn->kn_filter != EVFILT_READ)
1182 		return (1);
1183 
1184 	/*
1185 	 * Refresh PID associated with this descriptor.
1186 	 */
1187 	d->bd_pid = curthread->td_proc->p_pid;
1188 	kn->kn_fop = &bpfread_filtops;
1189 	kn->kn_hook = d;
1190 	knlist_add(&d->bd_sel.si_note, kn, 0);
1191 
1192 	return (0);
1193 }
1194 
1195 static void
1196 filt_bpfdetach(kn)
1197 	struct knote *kn;
1198 {
1199 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1200 
1201 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1202 }
1203 
1204 static int
1205 filt_bpfread(kn, hint)
1206 	struct knote *kn;
1207 	long hint;
1208 {
1209 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1210 	int ready;
1211 
1212 	BPFD_LOCK_ASSERT(d);
1213 	ready = bpf_ready(d);
1214 	if (ready) {
1215 		kn->kn_data = d->bd_slen;
1216 		if (d->bd_hbuf)
1217 			kn->kn_data += d->bd_hlen;
1218 	}
1219 	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1220 		callout_reset(&d->bd_callout, d->bd_rtout,
1221 		    bpf_timed_out, d);
1222 		d->bd_state = BPF_WAITING;
1223 	}
1224 
1225 	return (ready);
1226 }
1227 
1228 /*
1229  * Incoming linkage from device drivers.  Process the packet pkt, of length
1230  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1231  * by each process' filter, and if accepted, stashed into the corresponding
1232  * buffer.
1233  */
1234 void
1235 bpf_tap(bp, pkt, pktlen)
1236 	struct bpf_if *bp;
1237 	u_char *pkt;
1238 	u_int pktlen;
1239 {
1240 	struct bpf_d *d;
1241 	u_int slen;
1242 
1243 	/*
1244 	 * Lockless read to avoid cost of locking the interface if there are
1245 	 * no descriptors attached.
1246 	 */
1247 	if (LIST_EMPTY(&bp->bif_dlist))
1248 		return;
1249 
1250 	BPFIF_LOCK(bp);
1251 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1252 		BPFD_LOCK(d);
1253 		++d->bd_rcount;
1254 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1255 		if (slen != 0) {
1256 			d->bd_fcount++;
1257 #ifdef MAC
1258 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1259 #endif
1260 				catchpacket(d, pkt, pktlen, slen, bcopy);
1261 		}
1262 		BPFD_UNLOCK(d);
1263 	}
1264 	BPFIF_UNLOCK(bp);
1265 }
1266 
1267 /*
1268  * Copy data from an mbuf chain into a buffer.  This code is derived
1269  * from m_copydata in sys/uipc_mbuf.c.
1270  */
1271 static void
1272 bpf_mcopy(src_arg, dst_arg, len)
1273 	const void *src_arg;
1274 	void *dst_arg;
1275 	size_t len;
1276 {
1277 	const struct mbuf *m;
1278 	u_int count;
1279 	u_char *dst;
1280 
1281 	m = src_arg;
1282 	dst = dst_arg;
1283 	while (len > 0) {
1284 		if (m == NULL)
1285 			panic("bpf_mcopy");
1286 		count = min(m->m_len, len);
1287 		bcopy(mtod(m, void *), dst, count);
1288 		m = m->m_next;
1289 		dst += count;
1290 		len -= count;
1291 	}
1292 }
1293 
1294 /*
1295  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1296  */
1297 void
1298 bpf_mtap(bp, m)
1299 	struct bpf_if *bp;
1300 	struct mbuf *m;
1301 {
1302 	struct bpf_d *d;
1303 	u_int pktlen, slen;
1304 
1305 	/*
1306 	 * Lockless read to avoid cost of locking the interface if there are
1307 	 * no descriptors attached.
1308 	 */
1309 	if (LIST_EMPTY(&bp->bif_dlist))
1310 		return;
1311 
1312 	pktlen = m_length(m, NULL);
1313 
1314 	BPFIF_LOCK(bp);
1315 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1316 		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1317 			continue;
1318 		BPFD_LOCK(d);
1319 		++d->bd_rcount;
1320 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1321 		if (slen != 0) {
1322 			d->bd_fcount++;
1323 #ifdef MAC
1324 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1325 #endif
1326 				catchpacket(d, (u_char *)m, pktlen, slen,
1327 				    bpf_mcopy);
1328 		}
1329 		BPFD_UNLOCK(d);
1330 	}
1331 	BPFIF_UNLOCK(bp);
1332 }
1333 
1334 /*
1335  * Incoming linkage from device drivers, when packet is in
1336  * an mbuf chain and to be prepended by a contiguous header.
1337  */
1338 void
1339 bpf_mtap2(bp, data, dlen, m)
1340 	struct bpf_if *bp;
1341 	void *data;
1342 	u_int dlen;
1343 	struct mbuf *m;
1344 {
1345 	struct mbuf mb;
1346 	struct bpf_d *d;
1347 	u_int pktlen, slen;
1348 
1349 	/*
1350 	 * Lockless read to avoid cost of locking the interface if there are
1351 	 * no descriptors attached.
1352 	 */
1353 	if (LIST_EMPTY(&bp->bif_dlist))
1354 		return;
1355 
1356 	pktlen = m_length(m, NULL);
1357 	/*
1358 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1359 	 * Note that we cut corners here; we only setup what's
1360 	 * absolutely needed--this mbuf should never go anywhere else.
1361 	 */
1362 	mb.m_next = m;
1363 	mb.m_data = data;
1364 	mb.m_len = dlen;
1365 	pktlen += dlen;
1366 
1367 	BPFIF_LOCK(bp);
1368 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1369 		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1370 			continue;
1371 		BPFD_LOCK(d);
1372 		++d->bd_rcount;
1373 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1374 		if (slen != 0) {
1375 			d->bd_fcount++;
1376 #ifdef MAC
1377 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1378 #endif
1379 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1380 				    bpf_mcopy);
1381 		}
1382 		BPFD_UNLOCK(d);
1383 	}
1384 	BPFIF_UNLOCK(bp);
1385 }
1386 
1387 /*
1388  * Move the packet data from interface memory (pkt) into the
1389  * store buffer.  "cpfn" is the routine called to do the actual data
1390  * transfer.  bcopy is passed in to copy contiguous chunks, while
1391  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1392  * pkt is really an mbuf.
1393  */
1394 static void
1395 catchpacket(d, pkt, pktlen, snaplen, cpfn)
1396 	struct bpf_d *d;
1397 	u_char *pkt;
1398 	u_int pktlen, snaplen;
1399 	void (*cpfn)(const void *, void *, size_t);
1400 {
1401 	struct bpf_hdr *hp;
1402 	int totlen, curlen;
1403 	int hdrlen = d->bd_bif->bif_hdrlen;
1404 	int do_wakeup = 0;
1405 
1406 	BPFD_LOCK_ASSERT(d);
1407 	/*
1408 	 * Figure out how many bytes to move.  If the packet is
1409 	 * greater or equal to the snapshot length, transfer that
1410 	 * much.  Otherwise, transfer the whole packet (unless
1411 	 * we hit the buffer size limit).
1412 	 */
1413 	totlen = hdrlen + min(snaplen, pktlen);
1414 	if (totlen > d->bd_bufsize)
1415 		totlen = d->bd_bufsize;
1416 
1417 	/*
1418 	 * Round up the end of the previous packet to the next longword.
1419 	 */
1420 	curlen = BPF_WORDALIGN(d->bd_slen);
1421 	if (curlen + totlen > d->bd_bufsize) {
1422 		/*
1423 		 * This packet will overflow the storage buffer.
1424 		 * Rotate the buffers if we can, then wakeup any
1425 		 * pending reads.
1426 		 */
1427 		if (d->bd_fbuf == NULL) {
1428 			/*
1429 			 * We haven't completed the previous read yet,
1430 			 * so drop the packet.
1431 			 */
1432 			++d->bd_dcount;
1433 			return;
1434 		}
1435 		ROTATE_BUFFERS(d);
1436 		do_wakeup = 1;
1437 		curlen = 0;
1438 	}
1439 	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1440 		/*
1441 		 * Immediate mode is set, or the read timeout has
1442 		 * already expired during a select call.  A packet
1443 		 * arrived, so the reader should be woken up.
1444 		 */
1445 		do_wakeup = 1;
1446 
1447 	/*
1448 	 * Append the bpf header.
1449 	 */
1450 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1451 	microtime(&hp->bh_tstamp);
1452 	hp->bh_datalen = pktlen;
1453 	hp->bh_hdrlen = hdrlen;
1454 	/*
1455 	 * Copy the packet data into the store buffer and update its length.
1456 	 */
1457 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1458 	d->bd_slen = curlen + totlen;
1459 
1460 	if (do_wakeup)
1461 		bpf_wakeup(d);
1462 }
1463 
1464 /*
1465  * Initialize all nonzero fields of a descriptor.
1466  */
1467 static int
1468 bpf_allocbufs(d)
1469 	struct bpf_d *d;
1470 {
1471 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1472 	if (d->bd_fbuf == NULL)
1473 		return (ENOBUFS);
1474 
1475 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1476 	if (d->bd_sbuf == NULL) {
1477 		free(d->bd_fbuf, M_BPF);
1478 		return (ENOBUFS);
1479 	}
1480 	d->bd_slen = 0;
1481 	d->bd_hlen = 0;
1482 	return (0);
1483 }
1484 
1485 /*
1486  * Free buffers currently in use by a descriptor.
1487  * Called on close.
1488  */
1489 static void
1490 bpf_freed(d)
1491 	struct bpf_d *d;
1492 {
1493 	/*
1494 	 * We don't need to lock out interrupts since this descriptor has
1495 	 * been detached from its interface and it yet hasn't been marked
1496 	 * free.
1497 	 */
1498 	if (d->bd_sbuf != NULL) {
1499 		free(d->bd_sbuf, M_BPF);
1500 		if (d->bd_hbuf != NULL)
1501 			free(d->bd_hbuf, M_BPF);
1502 		if (d->bd_fbuf != NULL)
1503 			free(d->bd_fbuf, M_BPF);
1504 	}
1505 	if (d->bd_rfilter)
1506 		free((caddr_t)d->bd_rfilter, M_BPF);
1507 	if (d->bd_wfilter)
1508 		free((caddr_t)d->bd_wfilter, M_BPF);
1509 	mtx_destroy(&d->bd_mtx);
1510 }
1511 
1512 /*
1513  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1514  * fixed size of the link header (variable length headers not yet supported).
1515  */
1516 void
1517 bpfattach(ifp, dlt, hdrlen)
1518 	struct ifnet *ifp;
1519 	u_int dlt, hdrlen;
1520 {
1521 
1522 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1523 }
1524 
1525 /*
1526  * Attach an interface to bpf.  ifp is a pointer to the structure
1527  * defining the interface to be attached, dlt is the link layer type,
1528  * and hdrlen is the fixed size of the link header (variable length
1529  * headers are not yet supporrted).
1530  */
1531 void
1532 bpfattach2(ifp, dlt, hdrlen, driverp)
1533 	struct ifnet *ifp;
1534 	u_int dlt, hdrlen;
1535 	struct bpf_if **driverp;
1536 {
1537 	struct bpf_if *bp;
1538 	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1539 	if (bp == NULL)
1540 		panic("bpfattach");
1541 
1542 	LIST_INIT(&bp->bif_dlist);
1543 	bp->bif_driverp = driverp;
1544 	bp->bif_ifp = ifp;
1545 	bp->bif_dlt = dlt;
1546 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1547 
1548 	mtx_lock(&bpf_mtx);
1549 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1550 	mtx_unlock(&bpf_mtx);
1551 
1552 	*bp->bif_driverp = NULL;
1553 
1554 	/*
1555 	 * Compute the length of the bpf header.  This is not necessarily
1556 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1557 	 * that the network layer header begins on a longword boundary (for
1558 	 * performance reasons and to alleviate alignment restrictions).
1559 	 */
1560 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1561 
1562 	if (bootverbose)
1563 		if_printf(ifp, "bpf attached\n");
1564 }
1565 
1566 /*
1567  * Detach bpf from an interface.  This involves detaching each descriptor
1568  * associated with the interface, and leaving bd_bif NULL.  Notify each
1569  * descriptor as it's detached so that any sleepers wake up and get
1570  * ENXIO.
1571  */
1572 void
1573 bpfdetach(ifp)
1574 	struct ifnet *ifp;
1575 {
1576 	struct bpf_if	*bp;
1577 	struct bpf_d	*d;
1578 
1579 	/* Locate BPF interface information */
1580 	mtx_lock(&bpf_mtx);
1581 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1582 		if (ifp == bp->bif_ifp)
1583 			break;
1584 	}
1585 
1586 	/* Interface wasn't attached */
1587 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1588 		mtx_unlock(&bpf_mtx);
1589 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1590 		return;
1591 	}
1592 
1593 	LIST_REMOVE(bp, bif_next);
1594 	mtx_unlock(&bpf_mtx);
1595 
1596 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1597 		bpf_detachd(d);
1598 		BPFD_LOCK(d);
1599 		bpf_wakeup(d);
1600 		BPFD_UNLOCK(d);
1601 	}
1602 
1603 	mtx_destroy(&bp->bif_mtx);
1604 	free(bp, M_BPF);
1605 }
1606 
1607 /*
1608  * Get a list of available data link type of the interface.
1609  */
1610 static int
1611 bpf_getdltlist(d, bfl)
1612 	struct bpf_d *d;
1613 	struct bpf_dltlist *bfl;
1614 {
1615 	int n, error;
1616 	struct ifnet *ifp;
1617 	struct bpf_if *bp;
1618 
1619 	ifp = d->bd_bif->bif_ifp;
1620 	n = 0;
1621 	error = 0;
1622 	mtx_lock(&bpf_mtx);
1623 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1624 		if (bp->bif_ifp != ifp)
1625 			continue;
1626 		if (bfl->bfl_list != NULL) {
1627 			if (n >= bfl->bfl_len) {
1628 				mtx_unlock(&bpf_mtx);
1629 				return (ENOMEM);
1630 			}
1631 			error = copyout(&bp->bif_dlt,
1632 			    bfl->bfl_list + n, sizeof(u_int));
1633 		}
1634 		n++;
1635 	}
1636 	mtx_unlock(&bpf_mtx);
1637 	bfl->bfl_len = n;
1638 	return (error);
1639 }
1640 
1641 /*
1642  * Set the data link type of a BPF instance.
1643  */
1644 static int
1645 bpf_setdlt(d, dlt)
1646 	struct bpf_d *d;
1647 	u_int dlt;
1648 {
1649 	int error, opromisc;
1650 	struct ifnet *ifp;
1651 	struct bpf_if *bp;
1652 
1653 	if (d->bd_bif->bif_dlt == dlt)
1654 		return (0);
1655 	ifp = d->bd_bif->bif_ifp;
1656 	mtx_lock(&bpf_mtx);
1657 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1658 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1659 			break;
1660 	}
1661 	mtx_unlock(&bpf_mtx);
1662 	if (bp != NULL) {
1663 		opromisc = d->bd_promisc;
1664 		bpf_detachd(d);
1665 		bpf_attachd(d, bp);
1666 		BPFD_LOCK(d);
1667 		reset_d(d);
1668 		BPFD_UNLOCK(d);
1669 		if (opromisc) {
1670 			error = ifpromisc(bp->bif_ifp, 1);
1671 			if (error)
1672 				if_printf(bp->bif_ifp,
1673 					"bpf_setdlt: ifpromisc failed (%d)\n",
1674 					error);
1675 			else
1676 				d->bd_promisc = 1;
1677 		}
1678 	}
1679 	return (bp == NULL ? EINVAL : 0);
1680 }
1681 
1682 static void
1683 bpf_clone(arg, cred, name, namelen, dev)
1684 	void *arg;
1685 	struct ucred *cred;
1686 	char *name;
1687 	int namelen;
1688 	struct cdev **dev;
1689 {
1690 	int u;
1691 
1692 	if (*dev != NULL)
1693 		return;
1694 	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1695 		return;
1696 	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1697 	    "bpf%d", u);
1698 	dev_ref(*dev);
1699 	(*dev)->si_flags |= SI_CHEAPCLONE;
1700 	return;
1701 }
1702 
1703 static void
1704 bpf_drvinit(unused)
1705 	void *unused;
1706 {
1707 
1708 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1709 	LIST_INIT(&bpf_iflist);
1710 	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1711 }
1712 
1713 static void
1714 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1715 {
1716 
1717 	bzero(d, sizeof(*d));
1718 	BPFD_LOCK_ASSERT(bd);
1719 	d->bd_immediate = bd->bd_immediate;
1720 	d->bd_promisc = bd->bd_promisc;
1721 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1722 	d->bd_seesent = bd->bd_seesent;
1723 	d->bd_async = bd->bd_async;
1724 	d->bd_rcount = bd->bd_rcount;
1725 	d->bd_dcount = bd->bd_dcount;
1726 	d->bd_fcount = bd->bd_fcount;
1727 	d->bd_sig = bd->bd_sig;
1728 	d->bd_slen = bd->bd_slen;
1729 	d->bd_hlen = bd->bd_hlen;
1730 	d->bd_bufsize = bd->bd_bufsize;
1731 	d->bd_pid = bd->bd_pid;
1732 	strlcpy(d->bd_ifname,
1733 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1734 	d->bd_locked = bd->bd_locked;
1735 }
1736 
1737 static int
1738 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1739 {
1740 	struct xbpf_d *xbdbuf, *xbd;
1741 	int index, error;
1742 	struct bpf_if *bp;
1743 	struct bpf_d *bd;
1744 
1745 	/*
1746 	 * XXX This is not technically correct. It is possible for non
1747 	 * privileged users to open bpf devices. It would make sense
1748 	 * if the users who opened the devices were able to retrieve
1749 	 * the statistics for them, too.
1750 	 */
1751 	error = suser(req->td);
1752 	if (error)
1753 		return (error);
1754 	if (req->oldptr == NULL)
1755 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1756 	if (bpf_bpfd_cnt == 0)
1757 		return (SYSCTL_OUT(req, 0, 0));
1758 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1759 	mtx_lock(&bpf_mtx);
1760 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1761 		mtx_unlock(&bpf_mtx);
1762 		free(xbdbuf, M_BPF);
1763 		return (ENOMEM);
1764 	}
1765 	index = 0;
1766 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1767 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1768 			xbd = &xbdbuf[index++];
1769 			BPFD_LOCK(bd);
1770 			bpfstats_fill_xbpf(xbd, bd);
1771 			BPFD_UNLOCK(bd);
1772 		}
1773 	}
1774 	mtx_unlock(&bpf_mtx);
1775 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1776 	free(xbdbuf, M_BPF);
1777 	return (error);
1778 }
1779 
1780 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1781 
1782 #else /* !DEV_BPF && !NETGRAPH_BPF */
1783 /*
1784  * NOP stubs to allow bpf-using drivers to load and function.
1785  *
1786  * A 'better' implementation would allow the core bpf functionality
1787  * to be loaded at runtime.
1788  */
1789 
1790 void
1791 bpf_tap(bp, pkt, pktlen)
1792 	struct bpf_if *bp;
1793 	u_char *pkt;
1794 	u_int pktlen;
1795 {
1796 }
1797 
1798 void
1799 bpf_mtap(bp, m)
1800 	struct bpf_if *bp;
1801 	struct mbuf *m;
1802 {
1803 }
1804 
1805 void
1806 bpf_mtap2(bp, d, l, m)
1807 	struct bpf_if *bp;
1808 	void *d;
1809 	u_int l;
1810 	struct mbuf *m;
1811 {
1812 }
1813 
1814 void
1815 bpfattach(ifp, dlt, hdrlen)
1816 	struct ifnet *ifp;
1817 	u_int dlt, hdrlen;
1818 {
1819 }
1820 
1821 void
1822 bpfattach2(ifp, dlt, hdrlen, driverp)
1823 	struct ifnet *ifp;
1824 	u_int dlt, hdrlen;
1825 	struct bpf_if **driverp;
1826 {
1827 }
1828 
1829 void
1830 bpfdetach(ifp)
1831 	struct ifnet *ifp;
1832 {
1833 }
1834 
1835 u_int
1836 bpf_filter(pc, p, wirelen, buflen)
1837 	const struct bpf_insn *pc;
1838 	u_char *p;
1839 	u_int wirelen;
1840 	u_int buflen;
1841 {
1842 	return -1;	/* "no filter" behaviour */
1843 }
1844 
1845 int
1846 bpf_validate(f, len)
1847 	const struct bpf_insn *f;
1848 	int len;
1849 {
1850 	return 0;		/* false */
1851 }
1852 
1853 #endif /* !DEV_BPF && !NETGRAPH_BPF */
1854