1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * This code is derived from the Stanford/CMU enet packet filter,
9 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
10 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
11 * Berkeley Laboratory.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 #include "opt_bpf.h"
40 #include "opt_netgraph.h"
41
42 #include <sys/param.h>
43 #include <sys/conf.h>
44 #include <sys/fcntl.h>
45 #include <sys/jail.h>
46 #include <sys/ktr.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/mutex.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 #include <sys/sysent.h>
60 #include <sys/systm.h>
61
62 #include <sys/file.h>
63 #include <sys/poll.h>
64 #include <sys/proc.h>
65
66 #include <sys/socket.h>
67
68 #include <net/if.h>
69 #include <net/if_var.h>
70 #include <net/if_private.h>
71 #include <net/if_vlan_var.h>
72 #include <net/bpf.h>
73 #include <net/bpf_buffer.h>
74 #ifdef BPF_JITTER
75 #include <net/bpf_jitter.h>
76 #endif
77 #include <net/bpf_zerocopy.h>
78 #include <net/bpfdesc.h>
79 #include <net/vnet.h>
80
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83
84 #include <security/mac/mac_framework.h>
85
86 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87
88 struct bpf_if {
89 struct bpfd_list bif_dlist; /* list of all interfaces */
90 LIST_ENTRY(bpf_if) bif_next; /* descriptor list */
91 u_int bif_dlt; /* link layer type */
92 u_int bif_hdrlen; /* length of link header */
93 volatile u_int bif_refcnt;
94 struct bpfd_list bif_wlist; /* writer-only list */
95 const struct bif_methods *bif_methods;
96 void *bif_softc;
97 const char *bif_name;
98 struct epoch_context epoch_ctx;
99 };
100
101 /* See bpf_peers_present() in bpf.h. */
102 _Static_assert(offsetof(struct bpf_if, bif_dlist) == 0,
103 "bpf_if shall start with bif_dlist");
104
105 static inline void
bif_attachd(struct bpf_if * bp)106 bif_attachd(struct bpf_if *bp)
107 {
108 if (bp->bif_methods->bif_attachd != NULL)
109 bp->bif_methods->bif_attachd(bp->bif_softc);
110 }
111
112 static inline void
bif_detachd(struct bpf_if * bp)113 bif_detachd(struct bpf_if *bp)
114 {
115 if (bp->bif_methods->bif_detachd != NULL)
116 bp->bif_methods->bif_detachd(bp->bif_softc);
117 }
118
119 static inline uint32_t
bif_wrsize(struct bpf_if * bp)120 bif_wrsize(struct bpf_if *bp)
121 {
122 if (bp->bif_methods->bif_wrsize != NULL)
123 return (bp->bif_methods->bif_wrsize(bp->bif_softc));
124 else
125 return (0);
126 }
127
128 static inline int
bif_promisc(struct bpf_if * bp,bool on)129 bif_promisc(struct bpf_if *bp, bool on)
130 {
131 if (bp->bif_methods->bif_promisc != NULL)
132 return (bp->bif_methods->bif_promisc(bp->bif_softc, on));
133 else
134 return (0);
135 }
136
137 #ifdef MAC
138 static inline int
bif_mac_check_receive(struct bpf_if * bp,struct bpf_d * d)139 bif_mac_check_receive(struct bpf_if *bp, struct bpf_d *d)
140 {
141 if (bp->bif_methods->bif_mac_check_receive != NULL)
142 return (bp->bif_methods->bif_mac_check_receive(bp->bif_softc,
143 d));
144 else
145 return (0);
146 }
147 #endif
148
149 /*
150 * XXXGL: Once we migrate to tapping KPI that would specify packet direction
151 * we no longer need bif_chkdir method.
152 */
153 static inline bool
bpf_chkdir(struct bpf_d * d,struct mbuf * m)154 bpf_chkdir(struct bpf_d *d, struct mbuf *m)
155 {
156 return (d->bd_bif->bif_methods->bif_chkdir(d->bd_bif->bif_softc, m,
157 d->bd_direction));
158 }
159
160 struct bpf_program_buffer {
161 struct epoch_context epoch_ctx;
162 #ifdef BPF_JITTER
163 bpf_jit_filter *func;
164 #endif
165 void *buffer[0];
166 };
167
168 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
169
170 #define PRINET 26 /* interruptible */
171 #define BPF_PRIO_MAX 7
172
173 #define SIZEOF_BPF_HDR(type) \
174 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
175
176 #ifdef COMPAT_FREEBSD32
177 #include <sys/mount.h>
178 #include <compat/freebsd32/freebsd32.h>
179 #define BPF_ALIGNMENT32 sizeof(int32_t)
180 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
181
182 #ifndef BURN_BRIDGES
183 /*
184 * 32-bit version of structure prepended to each packet. We use this header
185 * instead of the standard one for 32-bit streams. We mark the a stream as
186 * 32-bit the first time we see a 32-bit compat ioctl request.
187 */
188 struct bpf_hdr32 {
189 struct timeval32 bh_tstamp; /* time stamp */
190 uint32_t bh_caplen; /* length of captured portion */
191 uint32_t bh_datalen; /* original length of packet */
192 uint16_t bh_hdrlen; /* length of bpf header (this struct
193 plus alignment padding) */
194 };
195 #endif
196
197 struct bpf_program32 {
198 u_int bf_len;
199 uint32_t bf_insns;
200 };
201
202 struct bpf_dltlist32 {
203 u_int bfl_len;
204 u_int bfl_list;
205 };
206
207 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
208 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
209 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
210 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
211 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
212 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
213 #endif
214
215 #define BPF_LOCK() sx_xlock(&bpf_sx)
216 #define BPF_UNLOCK() sx_xunlock(&bpf_sx)
217 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
218 /*
219 * bpf_iflist is a list of BPF interface structures, each corresponding to a
220 * specific DLT. The same network interface might have several BPF interface
221 * structures registered by different layers in the stack (i.e., 802.11
222 * frames, ethernet frames, etc).
223 */
224 VNET_DEFINE_STATIC(LIST_HEAD(, bpf_if), bpf_iflist) = LIST_HEAD_INITIALIZER();
225 #define V_bpf_iflist VNET(bpf_iflist)
226 static struct sx bpf_sx; /* bpf global lock */
227
228 static void bpfif_ref(struct bpf_if *);
229 static void bpfif_rele(struct bpf_if *);
230
231 static void bpfd_ref(struct bpf_d *);
232 static void bpfd_rele(struct bpf_d *);
233 static int bpf_attachd(struct bpf_d *d, struct bpf_if *);
234 static void bpf_detachd(struct bpf_d *, bool);
235 static void bpfd_free(epoch_context_t);
236 static void bpf_timed_out(void *);
237 static __inline void
238 bpf_wakeup(struct bpf_d *);
239 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
240 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
241 struct bintime *);
242 static void reset_d(struct bpf_d *);
243 static int bpf_getiflist(struct bpf_iflist *);
244 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
245 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
246 static int bpf_setdlt(struct bpf_d *, u_int);
247 static void filt_bpfdetach(struct knote *);
248 static int filt_bpfread(struct knote *, long);
249 static int filt_bpfwrite(struct knote *, long);
250 static void bpf_drvinit(void *);
251 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
252
253 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
254 "bpf sysctl");
255 int bpf_maxinsns = BPF_MAXINSNS;
256 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
257 &bpf_maxinsns, 0, "Maximum bpf program instructions");
258 static int bpf_zerocopy_enable = 0;
259 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
260 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
261 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats,
262 CTLFLAG_VNET | CTLFLAG_MPSAFE | CTLFLAG_RW,
263 bpf_stats_sysctl, "bpf statistics portal");
264
265 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
266 #define V_bpf_optimize_writers VNET(bpf_optimize_writers)
267 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN,
268 &VNET_NAME(bpf_optimize_writers), 0,
269 "Do not send packets until BPF program is set");
270
271 static d_open_t bpfopen;
272 static d_read_t bpfread;
273 static d_write_t bpfwrite;
274 static d_ioctl_t bpfioctl;
275 static d_poll_t bpfpoll;
276 static d_kqfilter_t bpfkqfilter;
277
278 static struct cdevsw bpf_cdevsw = {
279 .d_version = D_VERSION,
280 .d_open = bpfopen,
281 .d_read = bpfread,
282 .d_write = bpfwrite,
283 .d_ioctl = bpfioctl,
284 .d_poll = bpfpoll,
285 .d_name = "bpf",
286 .d_kqfilter = bpfkqfilter,
287 };
288
289 static const struct filterops bpfread_filtops = {
290 .f_isfd = 1,
291 .f_detach = filt_bpfdetach,
292 .f_event = filt_bpfread,
293 .f_copy = knote_triv_copy,
294 };
295
296 static const struct filterops bpfwrite_filtops = {
297 .f_isfd = 1,
298 .f_detach = filt_bpfdetach,
299 .f_event = filt_bpfwrite,
300 .f_copy = knote_triv_copy,
301 };
302
303 /*
304 * LOCKING MODEL USED BY BPF
305 *
306 * Locks:
307 * 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
308 * every bpf_iflist changes, serializes ioctl access to bpf descriptors.
309 * 2) Descriptor lock. Mutex, used to protect BPF buffers and various
310 * structure fields used by bpf_*tap* code.
311 *
312 * Lock order: global lock, then descriptor lock.
313 *
314 * There are several possible consumers:
315 *
316 * 1. The kernel registers interface pointer with bpfattach().
317 * Each call allocates new bpf_if structure, references ifnet pointer
318 * and links bpf_if into bpf_iflist chain. This is protected with global
319 * lock.
320 *
321 * 2. An userland application uses ioctl() call to bpf_d descriptor.
322 * All such call are serialized with global lock. BPF filters can be
323 * changed, but pointer to old filter will be freed using NET_EPOCH_CALL().
324 * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
325 * filter pointers, even if change will happen during bpf_tap execution.
326 * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL().
327 *
328 * 3. An userland application can write packets into bpf_d descriptor.
329 * There we need to be sure, that ifnet won't disappear during bpfwrite().
330 *
331 * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
332 * bif_dlist is protected with net_epoch_preempt section. So, it should
333 * be safe to make access to bpf_d descriptor inside the section.
334 *
335 * 5. The kernel invokes bpfdetach() on interface destroying. All lists
336 * are modified with global lock held and actual free() is done using
337 * NET_EPOCH_CALL().
338 */
339
340 static void
bpfif_free(epoch_context_t ctx)341 bpfif_free(epoch_context_t ctx)
342 {
343 struct bpf_if *bp;
344
345 bp = __containerof(ctx, struct bpf_if, epoch_ctx);
346 free(bp, M_BPF);
347 }
348
349 static void
bpfif_ref(struct bpf_if * bp)350 bpfif_ref(struct bpf_if *bp)
351 {
352
353 refcount_acquire(&bp->bif_refcnt);
354 }
355
356 static void
bpfif_rele(struct bpf_if * bp)357 bpfif_rele(struct bpf_if *bp)
358 {
359
360 if (!refcount_release(&bp->bif_refcnt))
361 return;
362 NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx);
363 }
364
365 static void
bpfd_ref(struct bpf_d * d)366 bpfd_ref(struct bpf_d *d)
367 {
368
369 refcount_acquire(&d->bd_refcnt);
370 }
371
372 static void
bpfd_rele(struct bpf_d * d)373 bpfd_rele(struct bpf_d *d)
374 {
375
376 if (!refcount_release(&d->bd_refcnt))
377 return;
378 NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx);
379 }
380
381 static struct bpf_program_buffer*
bpf_program_buffer_alloc(size_t size,int flags)382 bpf_program_buffer_alloc(size_t size, int flags)
383 {
384
385 return (malloc(sizeof(struct bpf_program_buffer) + size,
386 M_BPF, flags));
387 }
388
389 static void
bpf_program_buffer_free(epoch_context_t ctx)390 bpf_program_buffer_free(epoch_context_t ctx)
391 {
392 struct bpf_program_buffer *ptr;
393
394 ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
395 #ifdef BPF_JITTER
396 if (ptr->func != NULL)
397 bpf_destroy_jit_filter(ptr->func);
398 #endif
399 free(ptr, M_BPF);
400 }
401
402 /*
403 * Wrapper functions for various buffering methods. If the set of buffer
404 * modes expands, we will probably want to introduce a switch data structure
405 * similar to protosw, et.
406 */
407 static void
bpf_append_bytes(struct bpf_d * d,caddr_t buf,u_int offset,void * src,u_int len)408 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
409 u_int len)
410 {
411
412 BPFD_LOCK_ASSERT(d);
413
414 switch (d->bd_bufmode) {
415 case BPF_BUFMODE_BUFFER:
416 return (bpf_buffer_append_bytes(d, buf, offset, src, len));
417
418 case BPF_BUFMODE_ZBUF:
419 counter_u64_add(d->bd_zcopy, 1);
420 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
421
422 default:
423 panic("bpf_buf_append_bytes");
424 }
425 }
426
427 static void
bpf_append_mbuf(struct bpf_d * d,caddr_t buf,u_int offset,void * src,u_int len)428 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
429 u_int len)
430 {
431
432 BPFD_LOCK_ASSERT(d);
433
434 switch (d->bd_bufmode) {
435 case BPF_BUFMODE_BUFFER:
436 return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
437
438 case BPF_BUFMODE_ZBUF:
439 counter_u64_add(d->bd_zcopy, 1);
440 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
441
442 default:
443 panic("bpf_buf_append_mbuf");
444 }
445 }
446
447 /*
448 * This function gets called when the free buffer is re-assigned.
449 */
450 static void
bpf_buf_reclaimed(struct bpf_d * d)451 bpf_buf_reclaimed(struct bpf_d *d)
452 {
453
454 BPFD_LOCK_ASSERT(d);
455
456 switch (d->bd_bufmode) {
457 case BPF_BUFMODE_BUFFER:
458 return;
459
460 case BPF_BUFMODE_ZBUF:
461 bpf_zerocopy_buf_reclaimed(d);
462 return;
463
464 default:
465 panic("bpf_buf_reclaimed");
466 }
467 }
468
469 /*
470 * If the buffer mechanism has a way to decide that a held buffer can be made
471 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is
472 * returned if the buffer can be discarded, (0) is returned if it cannot.
473 */
474 static int
bpf_canfreebuf(struct bpf_d * d)475 bpf_canfreebuf(struct bpf_d *d)
476 {
477
478 BPFD_LOCK_ASSERT(d);
479
480 switch (d->bd_bufmode) {
481 case BPF_BUFMODE_ZBUF:
482 return (bpf_zerocopy_canfreebuf(d));
483 }
484 return (0);
485 }
486
487 /*
488 * Allow the buffer model to indicate that the current store buffer is
489 * immutable, regardless of the appearance of space. Return (1) if the
490 * buffer is writable, and (0) if not.
491 */
492 static int
bpf_canwritebuf(struct bpf_d * d)493 bpf_canwritebuf(struct bpf_d *d)
494 {
495 BPFD_LOCK_ASSERT(d);
496
497 switch (d->bd_bufmode) {
498 case BPF_BUFMODE_ZBUF:
499 return (bpf_zerocopy_canwritebuf(d));
500 }
501 return (1);
502 }
503
504 /*
505 * Notify buffer model that an attempt to write to the store buffer has
506 * resulted in a dropped packet, in which case the buffer may be considered
507 * full.
508 */
509 static void
bpf_buffull(struct bpf_d * d)510 bpf_buffull(struct bpf_d *d)
511 {
512
513 BPFD_LOCK_ASSERT(d);
514
515 switch (d->bd_bufmode) {
516 case BPF_BUFMODE_ZBUF:
517 bpf_zerocopy_buffull(d);
518 break;
519 }
520 }
521
522 /*
523 * Notify the buffer model that a buffer has moved into the hold position.
524 */
525 void
bpf_bufheld(struct bpf_d * d)526 bpf_bufheld(struct bpf_d *d)
527 {
528
529 BPFD_LOCK_ASSERT(d);
530
531 switch (d->bd_bufmode) {
532 case BPF_BUFMODE_ZBUF:
533 bpf_zerocopy_bufheld(d);
534 break;
535 }
536 }
537
538 static void
bpf_free(struct bpf_d * d)539 bpf_free(struct bpf_d *d)
540 {
541
542 switch (d->bd_bufmode) {
543 case BPF_BUFMODE_BUFFER:
544 return (bpf_buffer_free(d));
545
546 case BPF_BUFMODE_ZBUF:
547 return (bpf_zerocopy_free(d));
548
549 default:
550 panic("bpf_buf_free");
551 }
552 }
553
554 static int
bpf_uiomove(struct bpf_d * d,caddr_t buf,u_int len,struct uio * uio)555 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
556 {
557
558 if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
559 return (EOPNOTSUPP);
560 return (bpf_buffer_uiomove(d, buf, len, uio));
561 }
562
563 static int
bpf_ioctl_sblen(struct bpf_d * d,u_int * i)564 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
565 {
566
567 if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
568 return (EOPNOTSUPP);
569 return (bpf_buffer_ioctl_sblen(d, i));
570 }
571
572 static int
bpf_ioctl_getzmax(struct thread * td,struct bpf_d * d,size_t * i)573 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
574 {
575
576 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
577 return (EOPNOTSUPP);
578 return (bpf_zerocopy_ioctl_getzmax(td, d, i));
579 }
580
581 static int
bpf_ioctl_rotzbuf(struct thread * td,struct bpf_d * d,struct bpf_zbuf * bz)582 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
583 {
584
585 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
586 return (EOPNOTSUPP);
587 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
588 }
589
590 static int
bpf_ioctl_setzbuf(struct thread * td,struct bpf_d * d,struct bpf_zbuf * bz)591 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
592 {
593
594 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
595 return (EOPNOTSUPP);
596 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
597 }
598
599 /*
600 * Check if we need to upgrade our descriptor @d from write-only mode.
601 */
602 static int
bpf_check_upgrade(u_long cmd,struct bpf_d * d,struct bpf_insn * fcode,int flen)603 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
604 int flen)
605 {
606 int is_snap, need_upgrade;
607
608 /*
609 * Check if we've already upgraded or new filter is empty.
610 */
611 if (d->bd_writer == 0 || fcode == NULL)
612 return (0);
613
614 need_upgrade = 0;
615
616 /*
617 * Check if cmd looks like snaplen setting from
618 * pcap_bpf.c:pcap_open_live().
619 * Note we're not checking .k value here:
620 * while pcap_open_live() definitely sets to non-zero value,
621 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
622 * do not consider upgrading immediately
623 */
624 if (cmd == BIOCSETF && flen == 1 &&
625 fcode[0].code == (BPF_RET | BPF_K))
626 is_snap = 1;
627 else
628 is_snap = 0;
629
630 if (is_snap == 0) {
631 /*
632 * We're setting first filter and it doesn't look like
633 * setting snaplen. We're probably using bpf directly.
634 * Upgrade immediately.
635 */
636 need_upgrade = 1;
637 } else {
638 /*
639 * Do not require upgrade by first BIOCSETF
640 * (used to set snaplen) by pcap_open_live().
641 */
642
643 if (--d->bd_writer == 0) {
644 /*
645 * First snaplen filter has already
646 * been set. This is probably catch-all
647 * filter
648 */
649 need_upgrade = 1;
650 }
651 }
652
653 CTR5(KTR_NET,
654 "%s: filter function set by pid %d, "
655 "bd_writer counter %d, snap %d upgrade %d",
656 __func__, d->bd_pid, d->bd_writer,
657 is_snap, need_upgrade);
658
659 return (need_upgrade);
660 }
661
662 /*
663 * Detach a file from its interface.
664 */
665 static void
bpf_detachd(struct bpf_d * d,bool detached_ifp)666 bpf_detachd(struct bpf_d *d, bool detached_ifp)
667 {
668 struct bpf_if *bp;
669 bool writer;
670
671 BPF_LOCK_ASSERT();
672 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
673
674 /* Check if descriptor is attached */
675 if ((bp = d->bd_bif) == NULL)
676 return;
677
678 BPFD_LOCK(d);
679 CK_LIST_REMOVE(d, bd_next);
680 writer = (d->bd_writer > 0);
681 if (detached_ifp) {
682 d->bd_bif = NULL;
683 /*
684 * Notify descriptor as it's detached, so that any
685 * sleepers wake up and get ENXIO.
686 */
687 bpf_wakeup(d);
688 }
689 BPFD_UNLOCK(d);
690
691 if (!writer)
692 bif_detachd(bp);
693
694 if (d->bd_promisc && !detached_ifp) {
695 d->bd_promisc = 0;
696 (void)bif_promisc(bp, false);
697 }
698
699 bpfif_rele(bp);
700 }
701
702 /*
703 * Close the descriptor by detaching it from its interface,
704 * deallocating its buffers, and marking it free.
705 */
706 static void
bpf_dtor(void * data)707 bpf_dtor(void *data)
708 {
709 struct bpf_d *d = data;
710
711 BPFD_LOCK(d);
712 if (d->bd_state == BPF_WAITING)
713 callout_stop(&d->bd_callout);
714 d->bd_state = BPF_IDLE;
715 BPFD_UNLOCK(d);
716 funsetown(&d->bd_sigio);
717 BPF_LOCK();
718 bpf_detachd(d, false);
719 BPF_UNLOCK();
720 #ifdef MAC
721 mac_bpfdesc_destroy(d);
722 #endif /* MAC */
723 seldrain(&d->bd_sel);
724 knlist_destroy(&d->bd_sel.si_note);
725 callout_drain(&d->bd_callout);
726 bpfd_rele(d);
727 }
728
729 /*
730 * Open ethernet device. Returns ENXIO for illegal minor device number,
731 * EBUSY if file is open by another process.
732 */
733 /* ARGSUSED */
734 static int
bpfopen(struct cdev * dev,int flags,int fmt,struct thread * td)735 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
736 {
737 struct bpf_d *d;
738 int error;
739
740 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
741 error = devfs_set_cdevpriv(d, bpf_dtor);
742 if (error != 0) {
743 free(d, M_BPF);
744 return (error);
745 }
746
747 /* Setup counters */
748 d->bd_rcount = counter_u64_alloc(M_WAITOK);
749 d->bd_dcount = counter_u64_alloc(M_WAITOK);
750 d->bd_fcount = counter_u64_alloc(M_WAITOK);
751 d->bd_wcount = counter_u64_alloc(M_WAITOK);
752 d->bd_wfcount = counter_u64_alloc(M_WAITOK);
753 d->bd_wdcount = counter_u64_alloc(M_WAITOK);
754 d->bd_zcopy = counter_u64_alloc(M_WAITOK);
755
756 /*
757 * For historical reasons, perform a one-time initialization call to
758 * the buffer routines, even though we're not yet committed to a
759 * particular buffer method.
760 */
761 bpf_buffer_init(d);
762 if ((flags & FREAD) == 0)
763 d->bd_writer = 2;
764 d->bd_bufmode = BPF_BUFMODE_BUFFER;
765 d->bd_sig = SIGIO;
766 d->bd_direction = BPF_D_INOUT;
767 refcount_init(&d->bd_refcnt, 1);
768 BPF_PID_REFRESH(d, td);
769 #ifdef MAC
770 mac_bpfdesc_init(d);
771 mac_bpfdesc_create(td->td_ucred, d);
772 #endif
773 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
774 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
775 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
776
777 /* Disable VLAN pcp tagging. */
778 d->bd_pcp = 0;
779
780 return (0);
781 }
782
783 /*
784 * bpfread - read next chunk of packets from buffers
785 */
786 static int
bpfread(struct cdev * dev,struct uio * uio,int ioflag)787 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
788 {
789 struct bpf_d *d;
790 int error;
791 int non_block;
792 int timed_out;
793
794 error = devfs_get_cdevpriv((void **)&d);
795 if (error != 0)
796 return (error);
797
798 /*
799 * Restrict application to use a buffer the same size as
800 * as kernel buffers.
801 */
802 if (uio->uio_resid != d->bd_bufsize)
803 return (EINVAL);
804
805 non_block = ((ioflag & O_NONBLOCK) != 0);
806
807 BPFD_LOCK(d);
808 BPF_PID_REFRESH_CUR(d);
809 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
810 BPFD_UNLOCK(d);
811 return (EOPNOTSUPP);
812 }
813 if (d->bd_state == BPF_WAITING)
814 callout_stop(&d->bd_callout);
815 timed_out = (d->bd_state == BPF_TIMED_OUT);
816 d->bd_state = BPF_IDLE;
817 while (d->bd_flags & BPFD_HBUF_INUSE) {
818 error = mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET | PCATCH,
819 "bd_hbuf", 0);
820 if (error != 0) {
821 BPFD_UNLOCK(d);
822 return (error);
823 }
824 }
825 /*
826 * If the hold buffer is empty, then do a timed sleep, which
827 * ends when the timeout expires or when enough packets
828 * have arrived to fill the store buffer.
829 */
830 while (d->bd_hbuf == NULL) {
831 if (d->bd_slen != 0) {
832 /*
833 * A packet(s) either arrived since the previous
834 * read or arrived while we were asleep.
835 */
836 if ((d->bd_flags & BPFD_IMMEDIATE) || non_block ||
837 timed_out) {
838 /*
839 * Rotate the buffers and return what's here
840 * if we are in immediate mode, non-blocking
841 * flag is set, or this descriptor timed out.
842 */
843 ROTATE_BUFFERS(d);
844 break;
845 }
846 }
847
848 /*
849 * No data is available, check to see if the bpf device
850 * is still pointed at a real interface. If not, return
851 * ENXIO so that the userland process knows to rebind
852 * it before using it again.
853 */
854 if (d->bd_bif == NULL) {
855 BPFD_UNLOCK(d);
856 return (ENXIO);
857 }
858
859 if (non_block) {
860 BPFD_UNLOCK(d);
861 return (EWOULDBLOCK);
862 }
863 error = msleep(d, &d->bd_lock, PRINET | PCATCH,
864 "bpf", d->bd_rtout);
865 if (error == EINTR || error == ERESTART) {
866 BPFD_UNLOCK(d);
867 return (error);
868 }
869 if (error == EWOULDBLOCK) {
870 /*
871 * On a timeout, return what's in the buffer,
872 * which may be nothing. If there is something
873 * in the store buffer, we can rotate the buffers.
874 */
875 if (d->bd_hbuf)
876 /*
877 * We filled up the buffer in between
878 * getting the timeout and arriving
879 * here, so we don't need to rotate.
880 */
881 break;
882
883 if (d->bd_slen == 0) {
884 BPFD_UNLOCK(d);
885 return (0);
886 }
887 ROTATE_BUFFERS(d);
888 break;
889 }
890 }
891 /*
892 * At this point, we know we have something in the hold slot.
893 */
894 d->bd_flags |= BPFD_HBUF_INUSE;
895 BPFD_UNLOCK(d);
896
897 /*
898 * Move data from hold buffer into user space.
899 * We know the entire buffer is transferred since
900 * we checked above that the read buffer is bpf_bufsize bytes.
901 *
902 * We do not have to worry about simultaneous reads because
903 * we waited for sole access to the hold buffer above.
904 */
905 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
906
907 BPFD_LOCK(d);
908 if (d->bd_flags & BPFD_HBUF_INUSE) {
909 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
910 d->bd_fbuf = d->bd_hbuf;
911 d->bd_hbuf = NULL;
912 d->bd_hlen = 0;
913 bpf_buf_reclaimed(d);
914 d->bd_flags &= ~BPFD_HBUF_INUSE;
915 wakeup(&d->bd_hbuf);
916 }
917 BPFD_UNLOCK(d);
918
919 return (error);
920 }
921
922 /*
923 * If there are processes sleeping on this descriptor, wake them up.
924 */
925 static __inline void
bpf_wakeup(struct bpf_d * d)926 bpf_wakeup(struct bpf_d *d)
927 {
928
929 BPFD_LOCK_ASSERT(d);
930 if (d->bd_state == BPF_WAITING) {
931 callout_stop(&d->bd_callout);
932 d->bd_state = BPF_IDLE;
933 }
934 wakeup(d);
935 if ((d->bd_flags & BPFD_ASYNC) && d->bd_sig && d->bd_sigio)
936 pgsigio(&d->bd_sigio, d->bd_sig, 0);
937
938 selwakeuppri(&d->bd_sel, PRINET);
939 KNOTE_LOCKED(&d->bd_sel.si_note, 0);
940 }
941
942 static void
bpf_timed_out(void * arg)943 bpf_timed_out(void *arg)
944 {
945 struct bpf_d *d = (struct bpf_d *)arg;
946
947 BPFD_LOCK_ASSERT(d);
948
949 if (callout_pending(&d->bd_callout) ||
950 !callout_active(&d->bd_callout))
951 return;
952 if (d->bd_state == BPF_WAITING) {
953 d->bd_state = BPF_TIMED_OUT;
954 if (d->bd_slen != 0)
955 bpf_wakeup(d);
956 }
957 }
958
959 static int
bpf_ready(struct bpf_d * d)960 bpf_ready(struct bpf_d *d)
961 {
962
963 BPFD_LOCK_ASSERT(d);
964
965 if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
966 return (1);
967 if (((d->bd_flags & BPFD_IMMEDIATE) || d->bd_state == BPF_TIMED_OUT) &&
968 d->bd_slen != 0)
969 return (1);
970 return (0);
971 }
972
973 static int
bpfwrite(struct cdev * dev,struct uio * uio,int ioflag)974 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
975 {
976 struct epoch_tracker et;
977 struct bpf_if *bp;
978 struct bpf_d *d;
979 struct mbuf *m, *mc;
980 ssize_t len;
981 int error;
982
983 error = devfs_get_cdevpriv((void **)&d);
984 if (error != 0)
985 return (error);
986
987 if (uio->uio_resid == 0)
988 return (0);
989
990 BPFD_LOCK(d);
991 if ((bp = d->bd_bif) == NULL)
992 error = ENXIO;
993 else if (bp->bif_methods->bif_write == NULL)
994 error = EOPNOTSUPP;
995 if (error) {
996 BPFD_UNLOCK(d);
997 counter_u64_add(d->bd_wdcount, 1);
998 return (error);
999 }
1000 bpfd_ref(d);
1001 BPFD_UNLOCK(d);
1002
1003 len = uio->uio_resid;
1004 /* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
1005 m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
1006 if (m == NULL) {
1007 error = ENOMEM;
1008 goto fail_wref;
1009 }
1010 m->m_pkthdr.len = m->m_len = len;
1011
1012 error = uiomove(mtod(m, u_char *), len, uio);
1013 if (error)
1014 goto fail_wref;
1015
1016 if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) {
1017 error = EPERM;
1018 goto fail_wref;
1019 }
1020
1021 if (d->bd_flags & BPFD_FEEDBACK) {
1022 mc = m_dup(m, M_WAITOK);
1023 /* Set M_PROMISC for outgoing packets to be discarded. */
1024 if (d->bd_direction == BPF_D_INOUT)
1025 m->m_flags |= M_PROMISC;
1026 } else
1027 mc = NULL;
1028
1029 /* XXXGL: should belong to bpf_ifnet.c */
1030 if (d->bd_pcp != 0)
1031 (void)vlan_set_pcp(m, d->bd_pcp);
1032
1033 BPFD_LOCK(d);
1034 #ifdef MAC
1035 mac_bpfdesc_create_mbuf(d, m);
1036 if (mc != NULL)
1037 mac_bpfdesc_create_mbuf(d, mc);
1038 #endif
1039 /*
1040 * Check that descriptor is still attached to the interface.
1041 * This can happen on bpfdetach() or if other thread did BIOCSDLT.
1042 */
1043 if (__predict_false(d->bd_bif != bp)) {
1044 BPFD_UNLOCK(d);
1045 m_freem(mc);
1046 error = ENXIO;
1047 goto fail_wref;
1048 }
1049 BPFD_UNLOCK(d);
1050
1051 NET_EPOCH_ENTER(et);
1052 error = bp->bif_methods->bif_write(bp->bif_softc, m, mc, d->bd_flags);
1053 NET_EPOCH_EXIT(et);
1054 if (error)
1055 counter_u64_add(d->bd_wdcount, 1);
1056 else
1057 counter_u64_add(d->bd_wfcount, 1);
1058 bpfd_rele(d);
1059
1060 return (error);
1061
1062 fail_wref:
1063 counter_u64_add(d->bd_wdcount, 1);
1064 bpfd_rele(d);
1065 m_freem(m);
1066 return (error);
1067 }
1068
1069 /*
1070 * Reset a descriptor by flushing its packet buffer and clearing the receive
1071 * and drop counts. This is doable for kernel-only buffers, but with
1072 * zero-copy buffers, we can't write to (or rotate) buffers that are
1073 * currently owned by userspace. It would be nice if we could encapsulate
1074 * this logic in the buffer code rather than here.
1075 */
1076 static void
reset_d(struct bpf_d * d)1077 reset_d(struct bpf_d *d)
1078 {
1079
1080 BPFD_LOCK_ASSERT(d);
1081
1082 while (d->bd_flags & BPFD_HBUF_INUSE)
1083 mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET, "bd_hbuf", 0);
1084 if ((d->bd_hbuf != NULL) &&
1085 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
1086 /* Free the hold buffer. */
1087 d->bd_fbuf = d->bd_hbuf;
1088 d->bd_hbuf = NULL;
1089 d->bd_hlen = 0;
1090 bpf_buf_reclaimed(d);
1091 }
1092 if (bpf_canwritebuf(d))
1093 d->bd_slen = 0;
1094 counter_u64_zero(d->bd_rcount);
1095 counter_u64_zero(d->bd_dcount);
1096 counter_u64_zero(d->bd_fcount);
1097 counter_u64_zero(d->bd_wcount);
1098 counter_u64_zero(d->bd_wfcount);
1099 counter_u64_zero(d->bd_wdcount);
1100 counter_u64_zero(d->bd_zcopy);
1101 }
1102
1103 /*
1104 * FIONREAD Check for read packet available.
1105 * BIOCGETIFLIST Get list of all tap points.
1106 * BIOCGBLEN Get buffer len [for read()].
1107 * BIOCSETF Set read filter.
1108 * BIOCSETFNR Set read filter without resetting descriptor.
1109 * BIOCSETWF Set write filter.
1110 * BIOCFLUSH Flush read packet buffer.
1111 * BIOCPROMISC Put interface into promiscuous mode.
1112 * BIOCGDLT Get link layer type.
1113 * BIOCGETIF Get interface name.
1114 * BIOCSETIF Set interface.
1115 * BIOCSRTIMEOUT Set read timeout.
1116 * BIOCGRTIMEOUT Get read timeout.
1117 * BIOCGSTATS Get packet stats.
1118 * BIOCIMMEDIATE Set immediate mode.
1119 * BIOCVERSION Get filter language version.
1120 * BIOCGHDRCMPLT Get "header already complete" flag
1121 * BIOCSHDRCMPLT Set "header already complete" flag
1122 * BIOCGDIRECTION Get packet direction flag
1123 * BIOCSDIRECTION Set packet direction flag
1124 * BIOCGTSTAMP Get time stamp format and resolution.
1125 * BIOCSTSTAMP Set time stamp format and resolution.
1126 * BIOCLOCK Set "locked" flag
1127 * BIOCFEEDBACK Set packet feedback mode.
1128 * BIOCSETZBUF Set current zero-copy buffer locations.
1129 * BIOCGETZMAX Get maximum zero-copy buffer size.
1130 * BIOCROTZBUF Force rotation of zero-copy buffer
1131 * BIOCSETBUFMODE Set buffer mode.
1132 * BIOCGETBUFMODE Get current buffer mode.
1133 * BIOCSETVLANPCP Set VLAN PCP tag.
1134 */
1135 /* ARGSUSED */
1136 static int
bpfioctl(struct cdev * dev,u_long cmd,caddr_t addr,int flags,struct thread * td)1137 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1138 struct thread *td)
1139 {
1140 struct bpf_d *d;
1141 int error;
1142
1143 error = devfs_get_cdevpriv((void **)&d);
1144 if (error != 0)
1145 return (error);
1146
1147 /*
1148 * Refresh PID associated with this descriptor.
1149 */
1150 BPFD_LOCK(d);
1151 BPF_PID_REFRESH(d, td);
1152 if (d->bd_state == BPF_WAITING)
1153 callout_stop(&d->bd_callout);
1154 d->bd_state = BPF_IDLE;
1155 BPFD_UNLOCK(d);
1156
1157 if (d->bd_flags & BPFD_LOCKED) {
1158 switch (cmd) {
1159 case BIOCGETIFLIST:
1160 case BIOCGBLEN:
1161 case BIOCFLUSH:
1162 case BIOCGDLT:
1163 case BIOCGDLTLIST:
1164 #ifdef COMPAT_FREEBSD32
1165 case BIOCGDLTLIST32:
1166 #endif
1167 case BIOCGETIF:
1168 case BIOCGRTIMEOUT:
1169 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1170 case BIOCGRTIMEOUT32:
1171 #endif
1172 case BIOCGSTATS:
1173 case BIOCVERSION:
1174 case BIOCGRSIG:
1175 case BIOCGHDRCMPLT:
1176 case BIOCSTSTAMP:
1177 case BIOCFEEDBACK:
1178 case FIONREAD:
1179 case BIOCLOCK:
1180 case BIOCSRTIMEOUT:
1181 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1182 case BIOCSRTIMEOUT32:
1183 #endif
1184 case BIOCIMMEDIATE:
1185 case TIOCGPGRP:
1186 case BIOCROTZBUF:
1187 break;
1188 default:
1189 return (EPERM);
1190 }
1191 }
1192 #ifdef COMPAT_FREEBSD32
1193 /*
1194 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1195 * that it will get 32-bit packet headers.
1196 */
1197 switch (cmd) {
1198 case BIOCSETF32:
1199 case BIOCSETFNR32:
1200 case BIOCSETWF32:
1201 case BIOCGDLTLIST32:
1202 case BIOCGRTIMEOUT32:
1203 case BIOCSRTIMEOUT32:
1204 if (SV_CURPROC_FLAG(SV_ILP32)) {
1205 BPFD_LOCK(d);
1206 d->bd_compat32 = 1;
1207 BPFD_UNLOCK(d);
1208 }
1209 }
1210 #endif
1211
1212 #if defined(COMPAT_FREEBSD32)
1213 if (SV_CURPROC_FLAG(SV_ILP32)) {
1214 /*
1215 * On platforms other than amd64, BIOC[GS]RTIMEOUT32 is equal to
1216 * BIOC[GS]RTIMEOUT. Since this is difficult to handle in the
1217 * switch command, map them.
1218 */
1219 if (cmd == BIOCSRTIMEOUT32)
1220 cmd = BIOCSRTIMEOUT;
1221 if (cmd == BIOCGRTIMEOUT32)
1222 cmd = BIOCGRTIMEOUT;
1223 }
1224 #endif
1225 CURVNET_SET(TD_TO_VNET(td));
1226 switch (cmd) {
1227 default:
1228 error = EINVAL;
1229 break;
1230
1231 /*
1232 * Check for read packet available.
1233 */
1234 case FIONREAD:
1235 {
1236 int n;
1237
1238 BPFD_LOCK(d);
1239 n = d->bd_slen;
1240 while (d->bd_flags & BPFD_HBUF_INUSE)
1241 mtx_sleep(&d->bd_hbuf, &d->bd_lock,
1242 PRINET, "bd_hbuf", 0);
1243 if (d->bd_hbuf)
1244 n += d->bd_hlen;
1245 BPFD_UNLOCK(d);
1246
1247 *(int *)addr = n;
1248 break;
1249 }
1250 /*
1251 * Get list of all tap points.
1252 */
1253 case BIOCGETIFLIST:
1254 error = bpf_getiflist((struct bpf_iflist *)addr);
1255 break;
1256
1257 /*
1258 * Get buffer len [for read()].
1259 */
1260 case BIOCGBLEN:
1261 BPFD_LOCK(d);
1262 *(u_int *)addr = d->bd_bufsize;
1263 BPFD_UNLOCK(d);
1264 break;
1265
1266 /*
1267 * Set buffer length.
1268 */
1269 case BIOCSBLEN:
1270 error = bpf_ioctl_sblen(d, (u_int *)addr);
1271 break;
1272
1273 /*
1274 * Set link layer read filter.
1275 */
1276 case BIOCSETF:
1277 case BIOCSETFNR:
1278 case BIOCSETWF:
1279 #ifdef COMPAT_FREEBSD32
1280 case BIOCSETF32:
1281 case BIOCSETFNR32:
1282 case BIOCSETWF32:
1283 #endif
1284 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1285 break;
1286
1287 /*
1288 * Flush read packet buffer.
1289 */
1290 case BIOCFLUSH:
1291 BPFD_LOCK(d);
1292 reset_d(d);
1293 BPFD_UNLOCK(d);
1294 break;
1295
1296 /*
1297 * Put interface into promiscuous mode.
1298 */
1299 case BIOCPROMISC:
1300 BPF_LOCK();
1301 if (d->bd_bif == NULL) {
1302 /*
1303 * No interface attached yet.
1304 */
1305 error = EINVAL;
1306 } else if (d->bd_promisc == 0) {
1307 struct bpf_if *bp = d->bd_bif;
1308
1309 if ((error = bif_promisc(bp, true)) == 0)
1310 d->bd_promisc = 1;
1311 }
1312 BPF_UNLOCK();
1313 break;
1314
1315 /*
1316 * Get current data link type.
1317 */
1318 case BIOCGDLT:
1319 BPF_LOCK();
1320 if (d->bd_bif == NULL)
1321 error = EINVAL;
1322 else
1323 *(u_int *)addr = d->bd_bif->bif_dlt;
1324 BPF_UNLOCK();
1325 break;
1326
1327 /*
1328 * Get a list of supported data link types.
1329 */
1330 #ifdef COMPAT_FREEBSD32
1331 case BIOCGDLTLIST32:
1332 {
1333 struct bpf_dltlist32 *list32;
1334 struct bpf_dltlist dltlist;
1335
1336 list32 = (struct bpf_dltlist32 *)addr;
1337 dltlist.bfl_len = list32->bfl_len;
1338 dltlist.bfl_list = PTRIN(list32->bfl_list);
1339 BPF_LOCK();
1340 if (d->bd_bif == NULL)
1341 error = EINVAL;
1342 else {
1343 error = bpf_getdltlist(d, &dltlist);
1344 if (error == 0)
1345 list32->bfl_len = dltlist.bfl_len;
1346 }
1347 BPF_UNLOCK();
1348 break;
1349 }
1350 #endif
1351
1352 case BIOCGDLTLIST:
1353 BPF_LOCK();
1354 if (d->bd_bif == NULL)
1355 error = EINVAL;
1356 else
1357 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1358 BPF_UNLOCK();
1359 break;
1360
1361 /*
1362 * Set data link type.
1363 */
1364 case BIOCSDLT:
1365 BPF_LOCK();
1366 if (d->bd_bif == NULL)
1367 error = EINVAL;
1368 else
1369 error = bpf_setdlt(d, *(u_int *)addr);
1370 BPF_UNLOCK();
1371 break;
1372
1373 /*
1374 * Get interface name.
1375 */
1376 case BIOCGETIF:
1377 BPF_LOCK();
1378 if (d->bd_bif == NULL)
1379 error = EINVAL;
1380 else {
1381 struct bpf_if *const bp = d->bd_bif;
1382 struct ifreq *const ifr = (struct ifreq *)addr;
1383
1384 strlcpy(ifr->ifr_name, bp->bif_name,
1385 sizeof(ifr->ifr_name));
1386 }
1387 BPF_UNLOCK();
1388 break;
1389
1390 /*
1391 * Set interface.
1392 */
1393 case BIOCSETIF: {
1394 struct ifreq *const ifr = (struct ifreq *)addr;
1395 struct bpf_if *bp;
1396
1397 /*
1398 * Behavior here depends on the buffering model. If we're
1399 * using kernel memory buffers, then we can allocate them here.
1400 * If we're using zero-copy, then the user process must have
1401 * registered buffers by the time we get here.
1402 */
1403 BPFD_LOCK(d);
1404 if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1405 d->bd_sbuf == NULL) {
1406 u_int size;
1407
1408 size = d->bd_bufsize;
1409 BPFD_UNLOCK(d);
1410 error = bpf_buffer_ioctl_sblen(d, &size);
1411 if (error != 0)
1412 break;
1413 } else
1414 BPFD_UNLOCK(d);
1415 BPF_LOCK();
1416 /*
1417 * Look through attached interfaces for the named one.
1418 */
1419 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1420 if (strncmp(ifr->ifr_name, bp->bif_name,
1421 sizeof(ifr->ifr_name)) == 0)
1422 break;
1423 }
1424 if (bp != NULL)
1425 error = bpf_attachd(d, bp);
1426 else
1427 error = ENXIO;
1428 BPF_UNLOCK();
1429 break;
1430 }
1431 /*
1432 * Set read timeout.
1433 */
1434 case BIOCSRTIMEOUT:
1435 {
1436 struct timeval *tv = (struct timeval *)addr;
1437 #ifdef COMPAT_FREEBSD32
1438 struct timeval32 *tv32;
1439 struct timeval tv64;
1440
1441 if (SV_CURPROC_FLAG(SV_ILP32)) {
1442 tv32 = (struct timeval32 *)addr;
1443 tv = &tv64;
1444 tv->tv_sec = tv32->tv_sec;
1445 tv->tv_usec = tv32->tv_usec;
1446 }
1447 #endif
1448
1449 /*
1450 * Subtract 1 tick from tvtohz() since this isn't
1451 * a one-shot timer.
1452 */
1453 if ((error = itimerfix(tv)) == 0)
1454 d->bd_rtout = tvtohz(tv) - 1;
1455 break;
1456 }
1457
1458 /*
1459 * Get read timeout.
1460 */
1461 case BIOCGRTIMEOUT:
1462 {
1463 struct timeval *tv = (struct timeval *)addr;
1464 #ifdef COMPAT_FREEBSD32
1465 struct timeval32 *tv32;
1466 struct timeval tv64;
1467
1468 if (SV_CURPROC_FLAG(SV_ILP32))
1469 tv = &tv64;
1470 #endif
1471 tv->tv_sec = d->bd_rtout / hz;
1472 tv->tv_usec = (d->bd_rtout % hz) * tick;
1473 #ifdef COMPAT_FREEBSD32
1474 if (SV_CURPROC_FLAG(SV_ILP32)) {
1475 tv32 = (struct timeval32 *)addr;
1476 tv32->tv_sec = tv->tv_sec;
1477 tv32->tv_usec = tv->tv_usec;
1478 }
1479 #endif
1480 break;
1481 }
1482
1483 /*
1484 * Get packet stats.
1485 */
1486 case BIOCGSTATS:
1487 {
1488 struct bpf_stat *bs = (struct bpf_stat *)addr;
1489
1490 /* XXXCSJP overflow */
1491 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
1492 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
1493 break;
1494 }
1495
1496 /*
1497 * Set immediate mode.
1498 */
1499 case BIOCIMMEDIATE:
1500 BPFD_LOCK(d);
1501 d->bd_flags |= *(u_int *)addr ? BPFD_IMMEDIATE : 0;
1502 BPFD_UNLOCK(d);
1503 break;
1504
1505 case BIOCVERSION:
1506 {
1507 struct bpf_version *bv = (struct bpf_version *)addr;
1508
1509 bv->bv_major = BPF_MAJOR_VERSION;
1510 bv->bv_minor = BPF_MINOR_VERSION;
1511 break;
1512 }
1513
1514 /*
1515 * Get "header already complete" flag
1516 */
1517 case BIOCGHDRCMPLT:
1518 BPFD_LOCK(d);
1519 *(u_int *)addr = d->bd_flags & BPFD_HDRCMPLT ? 1 : 0;
1520 BPFD_UNLOCK(d);
1521 break;
1522
1523 /*
1524 * Set "header already complete" flag
1525 */
1526 case BIOCSHDRCMPLT:
1527 BPFD_LOCK(d);
1528 d->bd_flags |= *(u_int *)addr ? BPFD_HDRCMPLT : 0;
1529 BPFD_UNLOCK(d);
1530 break;
1531
1532 /*
1533 * Get packet direction flag
1534 */
1535 case BIOCGDIRECTION:
1536 BPFD_LOCK(d);
1537 *(u_int *)addr = d->bd_direction;
1538 BPFD_UNLOCK(d);
1539 break;
1540
1541 /*
1542 * Set packet direction flag
1543 */
1544 case BIOCSDIRECTION:
1545 {
1546 u_int direction;
1547
1548 direction = *(u_int *)addr;
1549 switch (direction) {
1550 case BPF_D_IN:
1551 case BPF_D_INOUT:
1552 case BPF_D_OUT:
1553 BPFD_LOCK(d);
1554 d->bd_direction = direction;
1555 BPFD_UNLOCK(d);
1556 break;
1557 default:
1558 error = EINVAL;
1559 }
1560 }
1561 break;
1562
1563 /*
1564 * Get packet timestamp format and resolution.
1565 */
1566 case BIOCGTSTAMP:
1567 BPFD_LOCK(d);
1568 *(u_int *)addr = d->bd_tstamp;
1569 BPFD_UNLOCK(d);
1570 break;
1571
1572 /*
1573 * Set packet timestamp format and resolution.
1574 */
1575 case BIOCSTSTAMP:
1576 {
1577 u_int func;
1578
1579 func = *(u_int *)addr;
1580 if (BPF_T_VALID(func))
1581 d->bd_tstamp = func;
1582 else
1583 error = EINVAL;
1584 }
1585 break;
1586
1587 case BIOCFEEDBACK:
1588 BPFD_LOCK(d);
1589 d->bd_flags |= *(u_int *)addr ? BPFD_FEEDBACK : 0;
1590 BPFD_UNLOCK(d);
1591 break;
1592
1593 case BIOCLOCK:
1594 BPFD_LOCK(d);
1595 d->bd_flags |= BPFD_LOCKED;
1596 BPFD_UNLOCK(d);
1597 break;
1598
1599 case FIONBIO: /* Non-blocking I/O */
1600 break;
1601
1602 case FIOASYNC: /* Send signal on receive packets */
1603 BPFD_LOCK(d);
1604 d->bd_flags |= *(u_int *)addr ? BPFD_ASYNC : 0;
1605 BPFD_UNLOCK(d);
1606 break;
1607
1608 case FIOSETOWN:
1609 /*
1610 * XXX: Add some sort of locking here?
1611 * fsetown() can sleep.
1612 */
1613 error = fsetown(*(int *)addr, &d->bd_sigio);
1614 break;
1615
1616 case FIOGETOWN:
1617 BPFD_LOCK(d);
1618 *(int *)addr = fgetown(&d->bd_sigio);
1619 BPFD_UNLOCK(d);
1620 break;
1621
1622 /* This is deprecated, FIOSETOWN should be used instead. */
1623 case TIOCSPGRP:
1624 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1625 break;
1626
1627 /* This is deprecated, FIOGETOWN should be used instead. */
1628 case TIOCGPGRP:
1629 *(int *)addr = -fgetown(&d->bd_sigio);
1630 break;
1631
1632 case BIOCSRSIG: /* Set receive signal */
1633 {
1634 u_int sig;
1635
1636 sig = *(u_int *)addr;
1637
1638 if (sig >= NSIG)
1639 error = EINVAL;
1640 else {
1641 BPFD_LOCK(d);
1642 d->bd_sig = sig;
1643 BPFD_UNLOCK(d);
1644 }
1645 break;
1646 }
1647 case BIOCGRSIG:
1648 BPFD_LOCK(d);
1649 *(u_int *)addr = d->bd_sig;
1650 BPFD_UNLOCK(d);
1651 break;
1652
1653 case BIOCGETBUFMODE:
1654 BPFD_LOCK(d);
1655 *(u_int *)addr = d->bd_bufmode;
1656 BPFD_UNLOCK(d);
1657 break;
1658
1659 case BIOCSETBUFMODE:
1660 /*
1661 * Allow the buffering mode to be changed as long as we
1662 * haven't yet committed to a particular mode. Our
1663 * definition of commitment, for now, is whether or not a
1664 * buffer has been allocated or an interface attached, since
1665 * that's the point where things get tricky.
1666 */
1667 switch (*(u_int *)addr) {
1668 case BPF_BUFMODE_BUFFER:
1669 break;
1670
1671 case BPF_BUFMODE_ZBUF:
1672 if (bpf_zerocopy_enable)
1673 break;
1674 /* FALLSTHROUGH */
1675
1676 default:
1677 CURVNET_RESTORE();
1678 return (EINVAL);
1679 }
1680
1681 BPFD_LOCK(d);
1682 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1683 d->bd_fbuf != NULL || d->bd_bif != NULL) {
1684 BPFD_UNLOCK(d);
1685 CURVNET_RESTORE();
1686 return (EBUSY);
1687 }
1688 d->bd_bufmode = *(u_int *)addr;
1689 BPFD_UNLOCK(d);
1690 break;
1691
1692 case BIOCGETZMAX:
1693 error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1694 break;
1695
1696 case BIOCSETZBUF:
1697 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1698 break;
1699
1700 case BIOCROTZBUF:
1701 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1702 break;
1703
1704 case BIOCSETVLANPCP:
1705 {
1706 u_int pcp;
1707
1708 pcp = *(u_int *)addr;
1709 if (pcp > BPF_PRIO_MAX || pcp < 0) {
1710 error = EINVAL;
1711 break;
1712 }
1713 d->bd_pcp = pcp;
1714 break;
1715 }
1716 }
1717 CURVNET_RESTORE();
1718 return (error);
1719 }
1720
1721 /*
1722 * Return list of available tapping points, or report how much space is
1723 * required for a successful return.
1724 */
1725 static int
bpf_getiflist(struct bpf_iflist * bi)1726 bpf_getiflist(struct bpf_iflist *bi)
1727 {
1728 struct bpf_if *bp;
1729 u_int allsize, size, cnt;
1730 char *uaddr;
1731
1732 BPF_LOCK();
1733
1734 cnt = allsize = size = 0;
1735 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1736 allsize += strlen(bp->bif_name) + 1;
1737 if (++cnt == bi->bi_count)
1738 size = allsize;
1739 }
1740 if (size == 0)
1741 size = allsize;
1742
1743 if (bi->bi_size == 0) {
1744 BPF_UNLOCK();
1745 bi->bi_size = size;
1746 bi->bi_count = cnt;
1747 return (0);
1748 } else if (bi->bi_size < size) {
1749 BPF_UNLOCK();
1750 return (ENOSPC);
1751 }
1752
1753 uaddr = bi->bi_ubuf;
1754 cnt = 0;
1755 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
1756 u_int len;
1757 int error;
1758
1759 len = strlen(bp->bif_name) + 1;
1760 if ((error = copyout(bp->bif_name, uaddr, len)) != 0) {
1761 BPF_UNLOCK();
1762 return (error);
1763 }
1764 if (++cnt == bi->bi_count)
1765 break;
1766 uaddr += len;
1767 }
1768 BPF_UNLOCK();
1769 bi->bi_count = cnt;
1770
1771 return (0);
1772 }
1773
1774 /*
1775 * Set d's packet filter program to fp. If this file already has a filter,
1776 * free it and replace it. Returns EINVAL for bogus requests.
1777 *
1778 * Note we use global lock here to serialize bpf_setf() and bpf_setif()
1779 * calls.
1780 */
1781 static int
bpf_setf(struct bpf_d * d,struct bpf_program * fp,u_long cmd)1782 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1783 {
1784 #ifdef COMPAT_FREEBSD32
1785 struct bpf_program fp_swab;
1786 struct bpf_program32 *fp32;
1787 #endif
1788 struct bpf_program_buffer *fcode;
1789 struct bpf_insn *filter;
1790 #ifdef BPF_JITTER
1791 bpf_jit_filter *jfunc;
1792 #endif
1793 size_t size;
1794 u_int flen;
1795 bool track_event;
1796
1797 #ifdef COMPAT_FREEBSD32
1798 switch (cmd) {
1799 case BIOCSETF32:
1800 case BIOCSETWF32:
1801 case BIOCSETFNR32:
1802 fp32 = (struct bpf_program32 *)fp;
1803 fp_swab.bf_len = fp32->bf_len;
1804 fp_swab.bf_insns =
1805 (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1806 fp = &fp_swab;
1807 switch (cmd) {
1808 case BIOCSETF32:
1809 cmd = BIOCSETF;
1810 break;
1811 case BIOCSETWF32:
1812 cmd = BIOCSETWF;
1813 break;
1814 }
1815 break;
1816 }
1817 #endif
1818
1819 filter = NULL;
1820 #ifdef BPF_JITTER
1821 jfunc = NULL;
1822 #endif
1823 /*
1824 * Check new filter validness before acquiring any locks.
1825 * Allocate memory for new filter, if needed.
1826 */
1827 flen = fp->bf_len;
1828 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1829 return (EINVAL);
1830 size = flen * sizeof(*fp->bf_insns);
1831 if (size > 0) {
1832 /* We're setting up new filter. Copy and check actual data. */
1833 fcode = bpf_program_buffer_alloc(size, M_WAITOK);
1834 filter = (struct bpf_insn *)fcode->buffer;
1835 if (copyin(fp->bf_insns, filter, size) != 0 ||
1836 !bpf_validate(filter, flen)) {
1837 free(fcode, M_BPF);
1838 return (EINVAL);
1839 }
1840 #ifdef BPF_JITTER
1841 if (cmd != BIOCSETWF) {
1842 /*
1843 * Filter is copied inside fcode and is
1844 * perfectly valid.
1845 */
1846 jfunc = bpf_jitter(filter, flen);
1847 }
1848 #endif
1849 }
1850
1851 track_event = false;
1852 fcode = NULL;
1853
1854 BPF_LOCK();
1855 BPFD_LOCK(d);
1856 /* Set up new filter. */
1857 if (cmd == BIOCSETWF) {
1858 if (d->bd_wfilter != NULL) {
1859 fcode = __containerof((void *)d->bd_wfilter,
1860 struct bpf_program_buffer, buffer);
1861 #ifdef BPF_JITTER
1862 fcode->func = NULL;
1863 #endif
1864 }
1865 d->bd_wfilter = filter;
1866 } else {
1867 if (d->bd_rfilter != NULL) {
1868 fcode = __containerof((void *)d->bd_rfilter,
1869 struct bpf_program_buffer, buffer);
1870 #ifdef BPF_JITTER
1871 fcode->func = d->bd_bfilter;
1872 #endif
1873 }
1874 d->bd_rfilter = filter;
1875 #ifdef BPF_JITTER
1876 d->bd_bfilter = jfunc;
1877 #endif
1878 if (cmd == BIOCSETF)
1879 reset_d(d);
1880
1881 if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
1882 /*
1883 * Filter can be set several times without
1884 * specifying interface. In this case just mark d
1885 * as reader.
1886 */
1887 d->bd_writer = 0;
1888 if (d->bd_bif != NULL) {
1889 /*
1890 * Remove descriptor from writers-only list
1891 * and add it to active readers list.
1892 */
1893 CK_LIST_REMOVE(d, bd_next);
1894 CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
1895 d, bd_next);
1896 CTR2(KTR_NET,
1897 "%s: upgrade required by pid %d",
1898 __func__, d->bd_pid);
1899 track_event = true;
1900 }
1901 }
1902 }
1903 BPFD_UNLOCK(d);
1904
1905 if (fcode != NULL)
1906 NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
1907
1908 if (track_event)
1909 bif_attachd(d->bd_bif);
1910
1911 BPF_UNLOCK();
1912 return (0);
1913 }
1914
1915 /*
1916 * Attach descriptor to a tap point, possibly detaching from the old one,
1917 * reset the counters.
1918 * XXXGL: this KPI is subject to change
1919 */
1920 static int
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)1921 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1922 {
1923 bool writer;
1924
1925 BPF_LOCK_ASSERT();
1926
1927 /*
1928 * At this point, we expect the buffer is already allocated. If not,
1929 * return an error.
1930 */
1931 switch (d->bd_bufmode) {
1932 case BPF_BUFMODE_BUFFER:
1933 case BPF_BUFMODE_ZBUF:
1934 if (d->bd_sbuf == NULL)
1935 return (EINVAL);
1936 break;
1937
1938 default:
1939 panic("%s: bufmode %d", __func__, d->bd_bufmode);
1940 }
1941
1942 if (bp == d->bd_bif) {
1943 BPFD_LOCK(d);
1944 reset_d(d);
1945 BPFD_UNLOCK(d);
1946 return (0);
1947 } else if (d->bd_bif != NULL)
1948 bpf_detachd(d, false);
1949
1950 /*
1951 * Save sysctl value to protect from sysctl change between reads.
1952 */
1953 writer = V_bpf_optimize_writers || (d->bd_writer > 0);
1954
1955 /*
1956 * Point d at bp, and add d to the interface's list.
1957 * Since there are many applications using BPF for
1958 * sending raw packets only (dhcpd, cdpd are good examples)
1959 * we can delay adding d to the list of active listeners until
1960 * some filter is configured.
1961 */
1962 BPFD_LOCK(d);
1963 /*
1964 * Hold reference to bpif while descriptor uses this interface.
1965 */
1966 bpfif_ref(bp);
1967 d->bd_bif = bp;
1968 if (writer) {
1969 /* Add to writers-only list */
1970 CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
1971 /*
1972 * We decrement bd_writer on every filter set operation.
1973 * First BIOCSETF is done by pcap_open_live() to set up
1974 * snap length. After that application usually sets its own
1975 * filter.
1976 */
1977 d->bd_writer = 2;
1978 } else
1979 CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
1980
1981 reset_d(d);
1982
1983 /* Trigger EVFILT_WRITE events. */
1984 bpf_wakeup(d);
1985
1986 BPFD_UNLOCK(d);
1987
1988 CTR3(KTR_NET, "%s: called by pid %d, adding to %s list",
1989 __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
1990
1991 if (!writer)
1992 bif_attachd(bp);
1993
1994 return (0);
1995 }
1996
1997 /*
1998 * Support for select() and poll() system calls
1999 *
2000 * Return true iff the specific operation will not block indefinitely.
2001 * Otherwise, return false but make a note that a selwakeup() must be done.
2002 */
2003 static int
bpfpoll(struct cdev * dev,int events,struct thread * td)2004 bpfpoll(struct cdev *dev, int events, struct thread *td)
2005 {
2006 struct bpf_d *d;
2007 int revents;
2008
2009 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
2010 return (events &
2011 (POLLHUP | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM));
2012
2013 /*
2014 * Refresh PID associated with this descriptor.
2015 */
2016 revents = events & (POLLOUT | POLLWRNORM);
2017 BPFD_LOCK(d);
2018 BPF_PID_REFRESH(d, td);
2019 if (events & (POLLIN | POLLRDNORM)) {
2020 if (bpf_ready(d))
2021 revents |= events & (POLLIN | POLLRDNORM);
2022 else {
2023 selrecord(td, &d->bd_sel);
2024 /* Start the read timeout if necessary. */
2025 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2026 callout_reset(&d->bd_callout, d->bd_rtout,
2027 bpf_timed_out, d);
2028 d->bd_state = BPF_WAITING;
2029 }
2030 }
2031 }
2032 BPFD_UNLOCK(d);
2033 return (revents);
2034 }
2035
2036 /*
2037 * Support for kevent() system call. Register EVFILT_READ filters and
2038 * reject all others.
2039 */
2040 int
bpfkqfilter(struct cdev * dev,struct knote * kn)2041 bpfkqfilter(struct cdev *dev, struct knote *kn)
2042 {
2043 struct bpf_d *d;
2044
2045 if (devfs_get_cdevpriv((void **)&d) != 0)
2046 return (1);
2047
2048 switch (kn->kn_filter) {
2049 case EVFILT_READ:
2050 kn->kn_fop = &bpfread_filtops;
2051 break;
2052
2053 case EVFILT_WRITE:
2054 kn->kn_fop = &bpfwrite_filtops;
2055 break;
2056
2057 default:
2058 return (1);
2059 }
2060
2061 /*
2062 * Refresh PID associated with this descriptor.
2063 */
2064 BPFD_LOCK(d);
2065 BPF_PID_REFRESH_CUR(d);
2066 kn->kn_hook = d;
2067 knlist_add(&d->bd_sel.si_note, kn, 1);
2068 BPFD_UNLOCK(d);
2069
2070 return (0);
2071 }
2072
2073 static void
filt_bpfdetach(struct knote * kn)2074 filt_bpfdetach(struct knote *kn)
2075 {
2076 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2077
2078 knlist_remove(&d->bd_sel.si_note, kn, 0);
2079 }
2080
2081 static int
filt_bpfread(struct knote * kn,long hint)2082 filt_bpfread(struct knote *kn, long hint)
2083 {
2084 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2085 int ready;
2086
2087 BPFD_LOCK_ASSERT(d);
2088 ready = bpf_ready(d);
2089 if (ready) {
2090 kn->kn_data = d->bd_slen;
2091 /*
2092 * Ignore the hold buffer if it is being copied to user space.
2093 */
2094 if (!(d->bd_flags & BPFD_HBUF_INUSE) && d->bd_hbuf)
2095 kn->kn_data += d->bd_hlen;
2096 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2097 callout_reset(&d->bd_callout, d->bd_rtout,
2098 bpf_timed_out, d);
2099 d->bd_state = BPF_WAITING;
2100 }
2101
2102 return (ready);
2103 }
2104
2105 static int
filt_bpfwrite(struct knote * kn,long hint)2106 filt_bpfwrite(struct knote *kn, long hint)
2107 {
2108 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2109
2110 BPFD_LOCK_ASSERT(d);
2111
2112 if (d->bd_bif == NULL) {
2113 kn->kn_data = 0;
2114 return (0);
2115 } else {
2116 kn->kn_data = bif_wrsize(d->bd_bif);
2117 return (1);
2118 }
2119 }
2120
2121 #define BPF_TSTAMP_NONE 0
2122 #define BPF_TSTAMP_FAST 1
2123 #define BPF_TSTAMP_NORMAL 2
2124 #define BPF_TSTAMP_EXTERN 3
2125
2126 static int
bpf_ts_quality(int tstype)2127 bpf_ts_quality(int tstype)
2128 {
2129
2130 if (tstype == BPF_T_NONE)
2131 return (BPF_TSTAMP_NONE);
2132 if ((tstype & BPF_T_FAST) != 0)
2133 return (BPF_TSTAMP_FAST);
2134
2135 return (BPF_TSTAMP_NORMAL);
2136 }
2137
2138 static int
bpf_gettime(struct bintime * bt,int tstype,struct mbuf * m)2139 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2140 {
2141 struct timespec ts;
2142 struct m_tag *tag;
2143 int quality;
2144
2145 quality = bpf_ts_quality(tstype);
2146 if (quality == BPF_TSTAMP_NONE)
2147 return (quality);
2148
2149 if (m != NULL) {
2150 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) {
2151 mbuf_tstmp2timespec(m, &ts);
2152 timespec2bintime(&ts, bt);
2153 return (BPF_TSTAMP_EXTERN);
2154 }
2155 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2156 if (tag != NULL) {
2157 *bt = *(struct bintime *)(tag + 1);
2158 return (BPF_TSTAMP_EXTERN);
2159 }
2160 }
2161 if (quality == BPF_TSTAMP_NORMAL)
2162 binuptime(bt);
2163 else
2164 getbinuptime(bt);
2165
2166 return (quality);
2167 }
2168
2169 /*
2170 * Incoming linkage from device drivers. Process the packet pkt, of length
2171 * pktlen, which is stored in a contiguous buffer. The packet is parsed
2172 * by each process' filter, and if accepted, stashed into the corresponding
2173 * buffer.
2174 */
2175 void
bpf_tap(struct bpf_if * bp,u_char * pkt,u_int pktlen)2176 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2177 {
2178 struct epoch_tracker et;
2179 struct bintime bt;
2180 struct bpf_d *d;
2181 #ifdef BPF_JITTER
2182 bpf_jit_filter *bf;
2183 #endif
2184 u_int slen;
2185 int gottime;
2186
2187 gottime = BPF_TSTAMP_NONE;
2188 NET_EPOCH_ENTER(et);
2189 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2190 counter_u64_add(d->bd_rcount, 1);
2191 /*
2192 * NB: We don't check the direction here since there
2193 * is no way for the caller to indiciate to us whether this
2194 * packet is inbound or outbound. In the bpf_mtap() routines,
2195 * we use the interface pointers on the mbuf to figure it out.
2196 */
2197 #ifdef BPF_JITTER
2198 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2199 if (bf != NULL)
2200 slen = (*(bf->func))(pkt, pktlen, pktlen);
2201 else
2202 #endif
2203 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2204 if (slen != 0) {
2205 /*
2206 * Filter matches. Let's to acquire write lock.
2207 */
2208 BPFD_LOCK(d);
2209 counter_u64_add(d->bd_fcount, 1);
2210 if (gottime < bpf_ts_quality(d->bd_tstamp))
2211 gottime = bpf_gettime(&bt, d->bd_tstamp,
2212 NULL);
2213 #ifdef MAC
2214 if (bif_mac_check_receive(bp, d) == 0)
2215 #endif
2216 catchpacket(d, pkt, pktlen, slen,
2217 bpf_append_bytes, &bt);
2218 BPFD_UNLOCK(d);
2219 }
2220 }
2221 NET_EPOCH_EXIT(et);
2222 }
2223
2224 void
bpf_tap_if(if_t ifp,u_char * pkt,u_int pktlen)2225 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
2226 {
2227 if (bpf_peers_present(ifp->if_bpf))
2228 bpf_tap(ifp->if_bpf, pkt, pktlen);
2229 }
2230
2231 /*
2232 * Incoming linkage from device drivers, when packet is in an mbuf chain.
2233 * Locking model is explained in bpf_tap().
2234 */
2235 void
bpf_mtap(struct bpf_if * bp,struct mbuf * m)2236 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2237 {
2238 struct epoch_tracker et;
2239 struct bintime bt;
2240 struct bpf_d *d;
2241 #ifdef BPF_JITTER
2242 bpf_jit_filter *bf;
2243 #endif
2244 u_int pktlen, slen;
2245 int gottime;
2246
2247 /* Skip outgoing duplicate packets. */
2248 if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
2249 m->m_flags &= ~M_PROMISC;
2250 return;
2251 }
2252
2253 pktlen = m_length(m, NULL);
2254 gottime = BPF_TSTAMP_NONE;
2255
2256 NET_EPOCH_ENTER(et);
2257 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2258 if (bpf_chkdir(d, m))
2259 continue;
2260 counter_u64_add(d->bd_rcount, 1);
2261 #ifdef BPF_JITTER
2262 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2263 /* XXX We cannot handle multiple mbufs. */
2264 if (bf != NULL && m->m_next == NULL)
2265 slen = (*(bf->func))(mtod(m, u_char *), pktlen,
2266 pktlen);
2267 else
2268 #endif
2269 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2270 if (slen != 0) {
2271 BPFD_LOCK(d);
2272
2273 counter_u64_add(d->bd_fcount, 1);
2274 if (gottime < bpf_ts_quality(d->bd_tstamp))
2275 gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2276 #ifdef MAC
2277 if (bif_mac_check_receive(bp, d) == 0)
2278 #endif
2279 catchpacket(d, (u_char *)m, pktlen, slen,
2280 bpf_append_mbuf, &bt);
2281 BPFD_UNLOCK(d);
2282 }
2283 }
2284 NET_EPOCH_EXIT(et);
2285 }
2286
2287 void
bpf_mtap_if(if_t ifp,struct mbuf * m)2288 bpf_mtap_if(if_t ifp, struct mbuf *m)
2289 {
2290 if (bpf_peers_present(ifp->if_bpf)) {
2291 M_ASSERTVALID(m);
2292 bpf_mtap(ifp->if_bpf, m);
2293 }
2294 }
2295
2296 /*
2297 * Incoming linkage from device drivers, when packet is in
2298 * an mbuf chain and to be prepended by a contiguous header.
2299 */
2300 void
bpf_mtap2(struct bpf_if * bp,void * data,u_int dlen,struct mbuf * m)2301 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2302 {
2303 struct epoch_tracker et;
2304 struct bintime bt;
2305 struct mbuf mb;
2306 struct bpf_d *d;
2307 u_int pktlen, slen;
2308 int gottime;
2309
2310 /* Skip outgoing duplicate packets. */
2311 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2312 m->m_flags &= ~M_PROMISC;
2313 return;
2314 }
2315
2316 pktlen = m_length(m, NULL);
2317 /*
2318 * Craft on-stack mbuf suitable for passing to bpf_filter.
2319 * Note that we cut corners here; we only setup what's
2320 * absolutely needed--this mbuf should never go anywhere else.
2321 */
2322 mb.m_flags = 0;
2323 mb.m_next = m;
2324 mb.m_data = data;
2325 mb.m_len = dlen;
2326 pktlen += dlen;
2327
2328 gottime = BPF_TSTAMP_NONE;
2329
2330 NET_EPOCH_ENTER(et);
2331 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2332 if (bpf_chkdir(d, m))
2333 continue;
2334 counter_u64_add(d->bd_rcount, 1);
2335 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2336 if (slen != 0) {
2337 BPFD_LOCK(d);
2338
2339 counter_u64_add(d->bd_fcount, 1);
2340 if (gottime < bpf_ts_quality(d->bd_tstamp))
2341 gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2342 #ifdef MAC
2343 if (bif_mac_check_receive(bp, d) == 0)
2344 #endif
2345 catchpacket(d, (u_char *)&mb, pktlen, slen,
2346 bpf_append_mbuf, &bt);
2347 BPFD_UNLOCK(d);
2348 }
2349 }
2350 NET_EPOCH_EXIT(et);
2351 }
2352
2353 void
bpf_mtap2_if(if_t ifp,void * data,u_int dlen,struct mbuf * m)2354 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
2355 {
2356 if (bpf_peers_present(ifp->if_bpf)) {
2357 M_ASSERTVALID(m);
2358 bpf_mtap2(ifp->if_bpf, data, dlen, m);
2359 }
2360 }
2361
2362 #undef BPF_TSTAMP_NONE
2363 #undef BPF_TSTAMP_FAST
2364 #undef BPF_TSTAMP_NORMAL
2365 #undef BPF_TSTAMP_EXTERN
2366
2367 static int
bpf_hdrlen(struct bpf_d * d)2368 bpf_hdrlen(struct bpf_d *d)
2369 {
2370 int hdrlen;
2371
2372 hdrlen = d->bd_bif->bif_hdrlen;
2373 #ifndef BURN_BRIDGES
2374 if (d->bd_tstamp == BPF_T_NONE ||
2375 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2376 #ifdef COMPAT_FREEBSD32
2377 if (d->bd_compat32)
2378 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2379 else
2380 #endif
2381 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2382 else
2383 #endif
2384 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2385 #ifdef COMPAT_FREEBSD32
2386 if (d->bd_compat32)
2387 hdrlen = BPF_WORDALIGN32(hdrlen);
2388 else
2389 #endif
2390 hdrlen = BPF_WORDALIGN(hdrlen);
2391
2392 return (hdrlen - d->bd_bif->bif_hdrlen);
2393 }
2394
2395 static void
bpf_bintime2ts(struct bintime * bt,struct bpf_ts * ts,int tstype)2396 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2397 {
2398 struct bintime bt2, boottimebin;
2399 struct timeval tsm;
2400 struct timespec tsn;
2401
2402 if ((tstype & BPF_T_MONOTONIC) == 0) {
2403 bt2 = *bt;
2404 getboottimebin(&boottimebin);
2405 bintime_add(&bt2, &boottimebin);
2406 bt = &bt2;
2407 }
2408 switch (BPF_T_FORMAT(tstype)) {
2409 case BPF_T_MICROTIME:
2410 bintime2timeval(bt, &tsm);
2411 ts->bt_sec = tsm.tv_sec;
2412 ts->bt_frac = tsm.tv_usec;
2413 break;
2414 case BPF_T_NANOTIME:
2415 bintime2timespec(bt, &tsn);
2416 ts->bt_sec = tsn.tv_sec;
2417 ts->bt_frac = tsn.tv_nsec;
2418 break;
2419 case BPF_T_BINTIME:
2420 ts->bt_sec = bt->sec;
2421 ts->bt_frac = bt->frac;
2422 break;
2423 }
2424 }
2425
2426 /*
2427 * Move the packet data from interface memory (pkt) into the
2428 * store buffer. "cpfn" is the routine called to do the actual data
2429 * transfer. bcopy is passed in to copy contiguous chunks, while
2430 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
2431 * pkt is really an mbuf.
2432 */
2433 static void
catchpacket(struct bpf_d * d,u_char * pkt,u_int pktlen,u_int snaplen,void (* cpfn)(struct bpf_d *,caddr_t,u_int,void *,u_int),struct bintime * bt)2434 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2435 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2436 struct bintime *bt)
2437 {
2438 static char zeroes[BPF_ALIGNMENT];
2439 struct bpf_xhdr hdr;
2440 #ifndef BURN_BRIDGES
2441 struct bpf_hdr hdr_old;
2442 #ifdef COMPAT_FREEBSD32
2443 struct bpf_hdr32 hdr32_old;
2444 #endif
2445 #endif
2446 int caplen, curlen, hdrlen, pad, totlen;
2447 int do_wakeup = 0;
2448 int do_timestamp;
2449 int tstype;
2450
2451 BPFD_LOCK_ASSERT(d);
2452 if (d->bd_bif == NULL) {
2453 /* Descriptor was detached in concurrent thread */
2454 counter_u64_add(d->bd_dcount, 1);
2455 return;
2456 }
2457
2458 /*
2459 * Detect whether user space has released a buffer back to us, and if
2460 * so, move it from being a hold buffer to a free buffer. This may
2461 * not be the best place to do it (for example, we might only want to
2462 * run this check if we need the space), but for now it's a reliable
2463 * spot to do it.
2464 */
2465 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2466 d->bd_fbuf = d->bd_hbuf;
2467 d->bd_hbuf = NULL;
2468 d->bd_hlen = 0;
2469 bpf_buf_reclaimed(d);
2470 }
2471
2472 /*
2473 * Figure out how many bytes to move. If the packet is
2474 * greater or equal to the snapshot length, transfer that
2475 * much. Otherwise, transfer the whole packet (unless
2476 * we hit the buffer size limit).
2477 */
2478 hdrlen = bpf_hdrlen(d);
2479 totlen = hdrlen + min(snaplen, pktlen);
2480 if (totlen > d->bd_bufsize)
2481 totlen = d->bd_bufsize;
2482
2483 /*
2484 * Round up the end of the previous packet to the next longword.
2485 *
2486 * Drop the packet if there's no room and no hope of room
2487 * If the packet would overflow the storage buffer or the storage
2488 * buffer is considered immutable by the buffer model, try to rotate
2489 * the buffer and wakeup pending processes.
2490 */
2491 #ifdef COMPAT_FREEBSD32
2492 if (d->bd_compat32)
2493 curlen = BPF_WORDALIGN32(d->bd_slen);
2494 else
2495 #endif
2496 curlen = BPF_WORDALIGN(d->bd_slen);
2497 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2498 if (d->bd_fbuf == NULL) {
2499 /*
2500 * There's no room in the store buffer, and no
2501 * prospect of room, so drop the packet. Notify the
2502 * buffer model.
2503 */
2504 bpf_buffull(d);
2505 counter_u64_add(d->bd_dcount, 1);
2506 return;
2507 }
2508 KASSERT(!(d->bd_flags & BPFD_HBUF_INUSE),
2509 ("hold buffer is in use"));
2510 ROTATE_BUFFERS(d);
2511 do_wakeup = 1;
2512 curlen = 0;
2513 } else {
2514 if ((d->bd_flags & BPFD_IMMEDIATE) ||
2515 d->bd_state == BPF_TIMED_OUT) {
2516 /*
2517 * Immediate mode is set, or the read timeout has
2518 * already expired during a select call. A packet
2519 * arrived, so the reader should be woken up.
2520 */
2521 do_wakeup = 1;
2522 }
2523 pad = curlen - d->bd_slen;
2524 KASSERT(pad >= 0 && pad <= sizeof(zeroes),
2525 ("%s: invalid pad byte count %d", __func__, pad));
2526 if (pad > 0) {
2527 /* Zero pad bytes. */
2528 bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes,
2529 pad);
2530 }
2531 }
2532
2533 caplen = totlen - hdrlen;
2534 tstype = d->bd_tstamp;
2535 do_timestamp = tstype != BPF_T_NONE;
2536 #ifndef BURN_BRIDGES
2537 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2538 struct bpf_ts ts;
2539 if (do_timestamp)
2540 bpf_bintime2ts(bt, &ts, tstype);
2541 #ifdef COMPAT_FREEBSD32
2542 if (d->bd_compat32) {
2543 bzero(&hdr32_old, sizeof(hdr32_old));
2544 if (do_timestamp) {
2545 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2546 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2547 }
2548 hdr32_old.bh_datalen = pktlen;
2549 hdr32_old.bh_hdrlen = hdrlen;
2550 hdr32_old.bh_caplen = caplen;
2551 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2552 sizeof(hdr32_old));
2553 goto copy;
2554 }
2555 #endif
2556 bzero(&hdr_old, sizeof(hdr_old));
2557 if (do_timestamp) {
2558 hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2559 hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2560 }
2561 hdr_old.bh_datalen = pktlen;
2562 hdr_old.bh_hdrlen = hdrlen;
2563 hdr_old.bh_caplen = caplen;
2564 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2565 sizeof(hdr_old));
2566 goto copy;
2567 }
2568 #endif
2569
2570 /*
2571 * Append the bpf header. Note we append the actual header size, but
2572 * move forward the length of the header plus padding.
2573 */
2574 bzero(&hdr, sizeof(hdr));
2575 if (do_timestamp)
2576 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2577 hdr.bh_datalen = pktlen;
2578 hdr.bh_hdrlen = hdrlen;
2579 hdr.bh_caplen = caplen;
2580 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2581
2582 /*
2583 * Copy the packet data into the store buffer and update its length.
2584 */
2585 #ifndef BURN_BRIDGES
2586 copy:
2587 #endif
2588 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2589 d->bd_slen = curlen + totlen;
2590
2591 if (do_wakeup)
2592 bpf_wakeup(d);
2593 }
2594
2595 /*
2596 * Free buffers currently in use by a descriptor.
2597 * Called on close.
2598 */
2599 static void
bpfd_free(epoch_context_t ctx)2600 bpfd_free(epoch_context_t ctx)
2601 {
2602 struct bpf_d *d;
2603 struct bpf_program_buffer *p;
2604
2605 /*
2606 * We don't need to lock out interrupts since this descriptor has
2607 * been detached from its interface and it yet hasn't been marked
2608 * free.
2609 */
2610 d = __containerof(ctx, struct bpf_d, epoch_ctx);
2611 bpf_free(d);
2612 if (d->bd_rfilter != NULL) {
2613 p = __containerof((void *)d->bd_rfilter,
2614 struct bpf_program_buffer, buffer);
2615 #ifdef BPF_JITTER
2616 p->func = d->bd_bfilter;
2617 #endif
2618 bpf_program_buffer_free(&p->epoch_ctx);
2619 }
2620 if (d->bd_wfilter != NULL) {
2621 p = __containerof((void *)d->bd_wfilter,
2622 struct bpf_program_buffer, buffer);
2623 #ifdef BPF_JITTER
2624 p->func = NULL;
2625 #endif
2626 bpf_program_buffer_free(&p->epoch_ctx);
2627 }
2628
2629 mtx_destroy(&d->bd_lock);
2630 counter_u64_free(d->bd_rcount);
2631 counter_u64_free(d->bd_dcount);
2632 counter_u64_free(d->bd_fcount);
2633 counter_u64_free(d->bd_wcount);
2634 counter_u64_free(d->bd_wfcount);
2635 counter_u64_free(d->bd_wdcount);
2636 counter_u64_free(d->bd_zcopy);
2637 free(d, M_BPF);
2638 }
2639
2640 /*
2641 * Attach a tap point to bpf.
2642 * XXX: with current KPI it is consumer's responsibility to avoid duplicates.
2643 */
2644 struct bpf_if *
bpf_attach(const char * name,u_int dlt,u_int hdrlen,const struct bif_methods * methods,void * sc)2645 bpf_attach(const char *name, u_int dlt, u_int hdrlen,
2646 const struct bif_methods *methods, void *sc)
2647 {
2648 struct bpf_if *bp;
2649
2650 bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
2651
2652 CK_LIST_INIT(&bp->bif_dlist);
2653 CK_LIST_INIT(&bp->bif_wlist);
2654 bp->bif_dlt = dlt;
2655 bp->bif_hdrlen = hdrlen;
2656 bp->bif_softc = sc;
2657 bp->bif_name = name;
2658 bp->bif_methods = methods;
2659 refcount_init(&bp->bif_refcnt, 1);
2660 BPF_LOCK();
2661 LIST_INSERT_HEAD(&V_bpf_iflist, bp, bif_next);
2662 BPF_UNLOCK();
2663
2664 return (bp);
2665 }
2666
2667 #ifdef VIMAGE
2668 /*
2669 * Detach descriptors on interface's vmove event.
2670 * XXXGL: shouldn't be a special case, but a full detach.
2671 */
2672 void
bpf_ifdetach(struct ifnet * ifp)2673 bpf_ifdetach(struct ifnet *ifp)
2674 {
2675 struct bpf_if *bp;
2676 struct bpf_d *d;
2677
2678 BPF_LOCK();
2679 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2680 /* XXXGL: assuming softc is ifnet here */
2681 if (bp->bif_softc != ifp)
2682 continue;
2683
2684 /* Detach common descriptors */
2685 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2686 bpf_detachd(d, true);
2687 }
2688
2689 /* Detach writer-only descriptors */
2690 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2691 bpf_detachd(d, true);
2692 }
2693 }
2694 BPF_UNLOCK();
2695 }
2696 #endif
2697
2698 /*
2699 * Detach bpf tap point. This involves detaching each descriptor associated
2700 * with the interface. Notify each descriptor as it's detached so that any
2701 * sleepers wake up and get ENXIO.
2702 */
2703 void
bpf_detach(struct bpf_if * bp)2704 bpf_detach(struct bpf_if *bp)
2705 {
2706 struct bpf_d *d;
2707
2708 BPF_LOCK();
2709 LIST_REMOVE(bp, bif_next);
2710
2711 CTR3(KTR_NET, "%s: sheduling free for encap %d for bp %p",
2712 __func__, bp->bif_dlt, bp);
2713
2714 /* Detach common descriptors */
2715 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
2716 bpf_detachd(d, true);
2717 }
2718
2719 /* Detach writer-only descriptors */
2720 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
2721 bpf_detachd(d, true);
2722 }
2723 bpfif_rele(bp);
2724 BPF_UNLOCK();
2725 }
2726
2727 #ifdef VIMAGE
2728 /*
2729 * Move bpf to a different VNET. This KPI is a crutch to support if_vmove
2730 * and is not supposed to be used anywhere else.
2731 */
2732 void
bpf_vmove(struct bpf_if * bp)2733 bpf_vmove(struct bpf_if *bp)
2734 {
2735
2736 BPF_LOCK();
2737 LIST_REMOVE(bp, bif_next);
2738 LIST_INSERT_HEAD(&V_bpf_iflist, bp, bif_next);
2739 BPF_UNLOCK();
2740 }
2741 #endif
2742
2743 bool
bpf_peers_present_if(struct ifnet * ifp)2744 bpf_peers_present_if(struct ifnet *ifp)
2745 {
2746 return (bpf_peers_present(ifp->if_bpf));
2747 }
2748
2749 /*
2750 * Get a list of available data link type of the tap point. If a tap point
2751 * attaches more than one time, it is supposed to attach with different DLTs
2752 * but with the same name pointer.
2753 */
2754 static int
bpf_getdltlist(struct bpf_d * d,struct bpf_dltlist * bfl)2755 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2756 {
2757 const char *name;
2758 struct bpf_if *bp;
2759 u_int *lst;
2760 int error, n, n1;
2761
2762 BPF_LOCK_ASSERT();
2763
2764 name = d->bd_bif->bif_name;
2765 n1 = 0;
2766 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2767 if (bp->bif_name == name)
2768 n1++;
2769 }
2770 if (bfl->bfl_list == NULL) {
2771 bfl->bfl_len = n1;
2772 return (0);
2773 }
2774 if (n1 > bfl->bfl_len)
2775 return (ENOMEM);
2776
2777 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2778 n = 0;
2779 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2780 if (bp->bif_name != name)
2781 continue;
2782 lst[n++] = bp->bif_dlt;
2783 }
2784 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2785 free(lst, M_TEMP);
2786 bfl->bfl_len = n;
2787 return (error);
2788 }
2789
2790 /*
2791 * Set the data link type of a BPF descriptor. The convention is that
2792 * application first do BIOCSETIF and then BIOCSETDLT, thus the descriptor
2793 * is supposed to be already attached. Only one kernel facility provides
2794 * tapping points with same name but different DLT - ieee80211_radiotap.
2795 *
2796 * XXXGL: this function definitely looks suspicious, e.g. it clearly doesn't
2797 * clear promisc on the old bpf_if. The convention about reference counting
2798 * is also unclear.
2799 */
2800 static int
bpf_setdlt(struct bpf_d * d,u_int dlt)2801 bpf_setdlt(struct bpf_d *d, u_int dlt)
2802 {
2803 int error, opromisc;
2804 const char *name;
2805 struct bpf_if *bp;
2806
2807 BPF_LOCK_ASSERT();
2808 MPASS(d->bd_bif != NULL);
2809
2810 /*
2811 * It is safe to check bd_bif without BPFD_LOCK, it can not be
2812 * changed while we hold global lock.
2813 */
2814 if (d->bd_bif->bif_dlt == dlt)
2815 return (0);
2816
2817 name = d->bd_bif->bif_name;
2818 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2819 if (bp->bif_name == name && bp->bif_dlt == dlt)
2820 break;
2821 }
2822 if (bp == NULL)
2823 return (EINVAL);
2824
2825 opromisc = d->bd_promisc;
2826 bpf_attachd(d, bp);
2827 if (opromisc) {
2828 error = bp->bif_methods->bif_promisc(bp->bif_softc, true);
2829 if (error)
2830 printf("%s: bif_promisc on %s failed (%d)\n",
2831 __func__, bp->bif_name, error);
2832 else
2833 d->bd_promisc = 1;
2834 }
2835 return (0);
2836 }
2837
2838 static void
bpf_drvinit(void * unused)2839 bpf_drvinit(void *unused)
2840 {
2841 struct cdev *dev;
2842
2843 sx_init(&bpf_sx, "bpf global lock");
2844 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2845 /* For compatibility */
2846 make_dev_alias(dev, "bpf0");
2847 }
2848
2849 /*
2850 * Zero out the various packet counters associated with all of the bpf
2851 * descriptors. At some point, we will probably want to get a bit more
2852 * granular and allow the user to specify descriptors to be zeroed.
2853 */
2854 static void
bpf_zero_counters(void)2855 bpf_zero_counters(void)
2856 {
2857 struct bpf_if *bp;
2858 struct bpf_d *bd;
2859
2860 BPF_LOCK();
2861 /*
2862 * We are protected by global lock here, interfaces and
2863 * descriptors can not be deleted while we hold it.
2864 */
2865 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2866 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2867 counter_u64_zero(bd->bd_rcount);
2868 counter_u64_zero(bd->bd_dcount);
2869 counter_u64_zero(bd->bd_fcount);
2870 counter_u64_zero(bd->bd_wcount);
2871 counter_u64_zero(bd->bd_wfcount);
2872 counter_u64_zero(bd->bd_zcopy);
2873 }
2874 }
2875 BPF_UNLOCK();
2876 }
2877
2878 /*
2879 * Fill filter statistics
2880 */
2881 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)2882 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2883 {
2884
2885 BPF_LOCK_ASSERT();
2886 bzero(d, sizeof(*d));
2887 d->bd_structsize = sizeof(*d);
2888 d->bd_immediate = bd->bd_flags & BPFD_IMMEDIATE ? 1 : 0;
2889 d->bd_promisc = bd->bd_promisc;
2890 d->bd_hdrcmplt = bd->bd_flags & BPFD_HDRCMPLT ? 1 : 0;
2891 d->bd_direction = bd->bd_direction;
2892 d->bd_feedback = bd->bd_flags & BPFD_FEEDBACK ? 1 : 0;
2893 d->bd_async = bd->bd_flags & BPFD_ASYNC ? 1 : 0;
2894 d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
2895 d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
2896 d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
2897 d->bd_sig = bd->bd_sig;
2898 d->bd_slen = bd->bd_slen;
2899 d->bd_hlen = bd->bd_hlen;
2900 d->bd_bufsize = bd->bd_bufsize;
2901 d->bd_pid = bd->bd_pid;
2902 strlcpy(d->bd_ifname, bd->bd_bif->bif_name, sizeof(d->bd_ifname));
2903 d->bd_locked = bd->bd_flags & BPFD_LOCKED ? 1 : 0;
2904 d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
2905 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
2906 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
2907 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
2908 d->bd_bufmode = bd->bd_bufmode;
2909 }
2910
2911 /*
2912 * Handle `netstat -B' stats request
2913 */
2914 static int
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)2915 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2916 {
2917 static const struct xbpf_d zerostats;
2918 struct xbpf_d *xbdbuf, *xbd, tempstats;
2919 u_int bpfd_cnt, index;
2920 int error;
2921 struct bpf_if *bp;
2922 struct bpf_d *bd;
2923
2924 /*
2925 * XXX This is not technically correct. It is possible for non
2926 * privileged users to open bpf devices. It would make sense
2927 * if the users who opened the devices were able to retrieve
2928 * the statistics for them, too.
2929 */
2930 error = priv_check(req->td, PRIV_NET_BPF);
2931 if (error)
2932 return (error);
2933 /*
2934 * Check to see if the user is requesting that the counters be
2935 * zeroed out. Explicitly check that the supplied data is zeroed,
2936 * as we aren't allowing the user to set the counters currently.
2937 */
2938 if (req->newptr != NULL) {
2939 if (req->newlen != sizeof(tempstats))
2940 return (EINVAL);
2941 memset(&tempstats, 0, sizeof(tempstats));
2942 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
2943 if (error)
2944 return (error);
2945 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
2946 return (EINVAL);
2947 bpf_zero_counters();
2948 return (0);
2949 }
2950 bpfd_cnt = 0;
2951 BPF_LOCK();
2952 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2953 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next)
2954 bpfd_cnt++;
2955 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next)
2956 bpfd_cnt++;
2957 }
2958 if (bpfd_cnt == 0 || req->oldptr == NULL) {
2959 BPF_UNLOCK();
2960 return (SYSCTL_OUT(req, 0, bpfd_cnt * sizeof(*xbd)));
2961 }
2962 if (req->oldlen < bpfd_cnt * sizeof(*xbd)) {
2963 BPF_UNLOCK();
2964 return (ENOMEM);
2965 }
2966 xbdbuf = malloc(bpfd_cnt * sizeof(*xbd), M_BPF, M_WAITOK);
2967 index = 0;
2968 LIST_FOREACH(bp, &V_bpf_iflist, bif_next) {
2969 /* Send writers-only first */
2970 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
2971 MPASS(index <= bpfd_cnt);
2972 xbd = &xbdbuf[index++];
2973 bpfstats_fill_xbpf(xbd, bd);
2974 }
2975 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2976 MPASS(index <= bpfd_cnt);
2977 xbd = &xbdbuf[index++];
2978 bpfstats_fill_xbpf(xbd, bd);
2979 }
2980 }
2981 BPF_UNLOCK();
2982 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2983 free(xbdbuf, M_BPF);
2984 return (error);
2985 }
2986
2987 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, bpf_drvinit, NULL);
2988
2989 #else /* !DEV_BPF && !NETGRAPH_BPF */
2990
2991 /*
2992 * NOP stubs to allow bpf-using drivers to load and function.
2993 *
2994 * A 'better' implementation would allow the core bpf functionality
2995 * to be loaded at runtime.
2996 */
2997
2998 void
bpf_tap(struct bpf_if * bp,u_char * pkt,u_int pktlen)2999 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
3000 {
3001 }
3002
3003 void
bpf_tap_if(if_t ifp,u_char * pkt,u_int pktlen)3004 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
3005 {
3006 }
3007
3008 void
bpf_mtap(struct bpf_if * bp,struct mbuf * m)3009 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
3010 {
3011 }
3012
3013 void
bpf_mtap_if(if_t ifp,struct mbuf * m)3014 bpf_mtap_if(if_t ifp, struct mbuf *m)
3015 {
3016 }
3017
3018 void
bpf_mtap2(struct bpf_if * bp,void * d,u_int l,struct mbuf * m)3019 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
3020 {
3021 }
3022
3023 void
bpf_mtap2_if(if_t ifp,void * data,u_int dlen,struct mbuf * m)3024 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
3025 {
3026 }
3027
3028 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3029 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3030 {
3031 static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
3032
3033 ifp->if_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
3034 }
3035
3036 void
bpfdetach(struct ifnet * ifp)3037 bpfdetach(struct ifnet *ifp)
3038 {
3039 }
3040
3041 bool
bpf_peers_present_if(struct ifnet * ifp)3042 bpf_peers_present_if(struct ifnet *ifp)
3043 {
3044 return (false);
3045 }
3046
3047 u_int
bpf_filter(const struct bpf_insn * pc,u_char * p,u_int wirelen,u_int buflen)3048 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
3049 {
3050 return (-1); /* "no filter" behaviour */
3051 }
3052
3053 int
bpf_validate(const struct bpf_insn * f,int len)3054 bpf_validate(const struct bpf_insn *f, int len)
3055 {
3056 return (0); /* false */
3057 }
3058
3059 #endif /* !DEV_BPF && !NETGRAPH_BPF */
3060