xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision 63d1fd5970ec814904aa0f4580b10a0d302d08b2)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Monitors
31  *
32  * netmap monitors can be used to do monitoring of network traffic
33  * on another adapter, when the latter adapter is working in netmap mode.
34  *
35  * Monitors offer to userspace the same interface as any other netmap port,
36  * with as many pairs of netmap rings as the monitored adapter.
37  * However, only the rx rings are actually used. Each monitor rx ring receives
38  * the traffic transiting on both the tx and rx corresponding rings in the
39  * monitored adapter. During registration, the user can choose if she wants
40  * to intercept tx only, rx only, or both tx and rx traffic.
41  *
42  * If the monitor is not able to cope with the stream of frames, excess traffic
43  * will be dropped.
44  *
45  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
46  *
47  * Monitors can be either zero-copy or copy-based.
48  *
49  * Copy monitors see the frames before they are consumed:
50  *
51  *  - For tx traffic, this is when the application sends them, before they are
52  *    passed down to the adapter.
53  *
54  *  - For rx traffic, this is when they are received by the adapter, before
55  *    they are sent up to the application, if any (note that, if no
56  *    application is reading from a monitored ring, the ring will eventually
57  *    fill up and traffic will stop).
58  *
59  * Zero-copy monitors only see the frames after they have been consumed:
60  *
61  *  - For tx traffic, this is after the slots containing the frames have been
62  *    marked as free. Note that this may happen at a considerably delay after
63  *    frame transmission, since freeing of slots is often done lazily.
64  *
65  *  - For rx traffic, this is after the consumer on the monitored adapter
66  *    has released them. In most cases, the consumer is a userspace
67  *    application which may have modified the frame contents.
68  *
69  * Several copy monitors may be active on any ring.  Zero-copy monitors,
70  * instead, need exclusive access to each of the monitored rings.  This may
71  * change in the future, if we implement zero-copy monitor chaining.
72  *
73  */
74 
75 
76 #if defined(__FreeBSD__)
77 #include <sys/cdefs.h> /* prerequisite */
78 
79 #include <sys/types.h>
80 #include <sys/errno.h>
81 #include <sys/param.h>	/* defines used in kernel.h */
82 #include <sys/kernel.h>	/* types used in module initialization */
83 #include <sys/malloc.h>
84 #include <sys/poll.h>
85 #include <sys/lock.h>
86 #include <sys/rwlock.h>
87 #include <sys/selinfo.h>
88 #include <sys/sysctl.h>
89 #include <sys/socket.h> /* sockaddrs */
90 #include <net/if.h>
91 #include <net/if_var.h>
92 #include <machine/bus.h>	/* bus_dmamap_* */
93 #include <sys/refcount.h>
94 
95 
96 #elif defined(linux)
97 
98 #include "bsd_glue.h"
99 
100 #elif defined(__APPLE__)
101 
102 #warning OSX support is only partial
103 #include "osx_glue.h"
104 
105 #elif defined(_WIN32)
106 #include "win_glue.h"
107 #else
108 
109 #error	Unsupported platform
110 
111 #endif /* unsupported */
112 
113 /*
114  * common headers
115  */
116 
117 #include <net/netmap.h>
118 #include <dev/netmap/netmap_kern.h>
119 #include <dev/netmap/netmap_mem2.h>
120 
121 #ifdef WITH_MONITOR
122 
123 #define NM_MONITOR_MAXSLOTS 4096
124 
125 /*
126  ********************************************************************
127  * functions common to both kind of monitors
128  ********************************************************************
129  */
130 
131 /* nm_sync callback for the monitor's own tx rings.
132  * This makes no sense and always returns error
133  */
134 static int
135 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
136 {
137         RD(1, "%s %x", kring->name, flags);
138 	return EIO;
139 }
140 
141 /* nm_sync callback for the monitor's own rx rings.
142  * Note that the lock in netmap_zmon_parent_sync only protects
143  * writers among themselves. Synchronization between writers
144  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
145  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
146  */
147 static int
148 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
149 {
150         ND("%s %x", kring->name, flags);
151 	kring->nr_hwcur = kring->rcur;
152 	mb();
153         return 0;
154 }
155 
156 /* nm_krings_create callbacks for monitors.
157  */
158 static int
159 netmap_monitor_krings_create(struct netmap_adapter *na)
160 {
161 	int error = netmap_krings_create(na, 0);
162 	if (error)
163 		return error;
164 	/* override the host rings callbacks */
165 	na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync;
166 	na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync;
167 	return 0;
168 }
169 
170 /* nm_krings_delete callback for monitors */
171 static void
172 netmap_monitor_krings_delete(struct netmap_adapter *na)
173 {
174 	netmap_krings_delete(na);
175 }
176 
177 
178 static u_int
179 nm_txrx2flag(enum txrx t)
180 {
181 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
182 }
183 
184 /* allocate the monitors array in the monitored kring */
185 static int
186 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
187 {
188 	size_t len;
189 	struct netmap_kring **nm;
190 
191 	if (n <= kring->max_monitors)
192 		/* we already have more entries that requested */
193 		return 0;
194 
195         len = sizeof(struct netmap_kring *) * n;
196 #ifndef _WIN32
197 	nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
198 #else
199 	nm = realloc(kring->monitors, len, sizeof(struct netmap_kring *)*kring->max_monitors);
200 #endif
201 	if (nm == NULL)
202 		return ENOMEM;
203 
204 	kring->monitors = nm;
205 	kring->max_monitors = n;
206 
207 	return 0;
208 }
209 
210 /* deallocate the parent array in the parent adapter */
211 static void
212 nm_monitor_dealloc(struct netmap_kring *kring)
213 {
214 	if (kring->monitors) {
215 		if (kring->n_monitors > 0) {
216 			D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
217 					kring->n_monitors);
218 		}
219 		free(kring->monitors, M_DEVBUF);
220 		kring->monitors = NULL;
221 		kring->max_monitors = 0;
222 		kring->n_monitors = 0;
223 	}
224 }
225 
226 /*
227  * monitors work by replacing the nm_sync() and possibly the
228  * nm_notify() callbacks in the monitored rings.
229  */
230 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
231 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
232 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
233 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
234 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
235 
236 
237 /* add the monitor mkring to the list of monitors of kring.
238  * If this is the first monitor, intercept the callbacks
239  */
240 static int
241 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
242 {
243 	int error = NM_IRQ_COMPLETED;
244 
245 	/* sinchronize with concurrently running nm_sync()s */
246 	nm_kr_stop(kring, NM_KR_LOCKED);
247 	/* make sure the monitor array exists and is big enough */
248 	error = nm_monitor_alloc(kring, kring->n_monitors + 1);
249 	if (error)
250 		goto out;
251 	kring->monitors[kring->n_monitors] = mkring;
252 	mkring->mon_pos = kring->n_monitors;
253 	kring->n_monitors++;
254 	if (kring->n_monitors == 1) {
255 		/* this is the first monitor, intercept callbacks */
256 		ND("%s: intercept callbacks on %s", mkring->name, kring->name);
257 		kring->mon_sync = kring->nm_sync;
258 		/* zcopy monitors do not override nm_notify(), but
259 		 * we save the original one regardless, so that
260 		 * netmap_monitor_del() does not need to know the
261 		 * monitor type
262 		 */
263 		kring->mon_notify = kring->nm_notify;
264 		if (kring->tx == NR_TX) {
265 			kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
266 						  netmap_monitor_parent_txsync);
267 		} else {
268 			kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
269 						  netmap_monitor_parent_rxsync);
270 			if (!zcopy) {
271 				/* also intercept notify */
272 				kring->nm_notify = netmap_monitor_parent_notify;
273 				kring->mon_tail = kring->nr_hwtail;
274 			}
275 		}
276 	}
277 
278 out:
279 	nm_kr_start(kring);
280 	return error;
281 }
282 
283 
284 /* remove the monitor mkring from the list of monitors of kring.
285  * If this is the last monitor, restore the original callbacks
286  */
287 static void
288 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
289 {
290 	/* sinchronize with concurrently running nm_sync()s */
291 	nm_kr_stop(kring, NM_KR_LOCKED);
292 	kring->n_monitors--;
293 	if (mkring->mon_pos != kring->n_monitors) {
294 		kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
295 		kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
296 	}
297 	kring->monitors[kring->n_monitors] = NULL;
298 	if (kring->n_monitors == 0) {
299 		/* this was the last monitor, restore callbacks  and delete monitor array */
300 		ND("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
301 		kring->nm_sync = kring->mon_sync;
302 		kring->mon_sync = NULL;
303 		if (kring->tx == NR_RX) {
304 			ND("%s: restoring notify on %s: %p",
305 					mkring->name, kring->name, kring->mon_notify);
306 			kring->nm_notify = kring->mon_notify;
307 			kring->mon_notify = NULL;
308 		}
309 		nm_monitor_dealloc(kring);
310 	}
311 	nm_kr_start(kring);
312 }
313 
314 
315 /* This is called when the monitored adapter leaves netmap mode
316  * (see netmap_do_unregif).
317  * We need to notify the monitors that the monitored rings are gone.
318  * We do this by setting their mna->priv.np_na to NULL.
319  * Note that the rings are already stopped when this happens, so
320  * no monitor ring callback can be active.
321  */
322 void
323 netmap_monitor_stop(struct netmap_adapter *na)
324 {
325 	enum txrx t;
326 
327 	for_rx_tx(t) {
328 		u_int i;
329 
330 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
331 			struct netmap_kring *kring = &NMR(na, t)[i];
332 			u_int j;
333 
334 			for (j = 0; j < kring->n_monitors; j++) {
335 				struct netmap_kring *mkring =
336 					kring->monitors[j];
337 				struct netmap_monitor_adapter *mna =
338 					(struct netmap_monitor_adapter *)mkring->na;
339 				/* forget about this adapter */
340 				netmap_adapter_put(mna->priv.np_na);
341 				mna->priv.np_na = NULL;
342 			}
343 		}
344 	}
345 }
346 
347 
348 /* common functions for the nm_register() callbacks of both kind of
349  * monitors.
350  */
351 static int
352 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
353 {
354 	struct netmap_monitor_adapter *mna =
355 		(struct netmap_monitor_adapter *)na;
356 	struct netmap_priv_d *priv = &mna->priv;
357 	struct netmap_adapter *pna = priv->np_na;
358 	struct netmap_kring *kring, *mkring;
359 	int i;
360 	enum txrx t;
361 
362 	ND("%p: onoff %d", na, onoff);
363 	if (onoff) {
364 		if (pna == NULL) {
365 			/* parent left netmap mode, fatal */
366 			D("%s: internal error", na->name);
367 			return ENXIO;
368 		}
369 		for_rx_tx(t) {
370 			if (mna->flags & nm_txrx2flag(t)) {
371 				for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
372 					kring = &NMR(pna, t)[i];
373 					mkring = &na->rx_rings[i];
374 					if (nm_kring_pending_on(mkring)) {
375 						netmap_monitor_add(mkring, kring, zmon);
376 						mkring->nr_mode = NKR_NETMAP_ON;
377 					}
378 				}
379 			}
380 		}
381 		na->na_flags |= NAF_NETMAP_ON;
382 	} else {
383 		if (na->active_fds == 0)
384 			na->na_flags &= ~NAF_NETMAP_ON;
385 		for_rx_tx(t) {
386 			if (mna->flags & nm_txrx2flag(t)) {
387 				for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
388 					mkring = &na->rx_rings[i];
389 					if (nm_kring_pending_off(mkring)) {
390 						mkring->nr_mode = NKR_NETMAP_OFF;
391 						/* we cannot access the parent krings if the parent
392 						 * has left netmap mode. This is signaled by a NULL
393 						 * pna pointer
394 						 */
395 						if (pna) {
396 							kring = &NMR(pna, t)[i];
397 							netmap_monitor_del(mkring, kring);
398 						}
399 					}
400 				}
401 			}
402 		}
403 	}
404 	return 0;
405 }
406 
407 /*
408  ****************************************************************
409  * functions specific for zero-copy monitors
410  ****************************************************************
411  */
412 
413 /*
414  * Common function for both zero-copy tx and rx nm_sync()
415  * callbacks
416  */
417 static int
418 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
419 {
420 	struct netmap_kring *mkring = kring->monitors[0];
421 	struct netmap_ring *ring = kring->ring, *mring;
422 	int error = 0;
423 	int rel_slots, free_slots, busy, sent = 0;
424 	u_int beg, end, i;
425 	u_int lim = kring->nkr_num_slots - 1,
426 	      mlim; // = mkring->nkr_num_slots - 1;
427 
428 	if (mkring == NULL) {
429 		RD(5, "NULL monitor on %s", kring->name);
430 		return 0;
431 	}
432 	mring = mkring->ring;
433 	mlim = mkring->nkr_num_slots - 1;
434 
435 	/* get the relased slots (rel_slots) */
436 	if (tx == NR_TX) {
437 		beg = kring->nr_hwtail;
438 		error = kring->mon_sync(kring, flags);
439 		if (error)
440 			return error;
441 		end = kring->nr_hwtail;
442 	} else { /* NR_RX */
443 		beg = kring->nr_hwcur;
444 		end = kring->rhead;
445 	}
446 
447 	rel_slots = end - beg;
448 	if (rel_slots < 0)
449 		rel_slots += kring->nkr_num_slots;
450 
451 	if (!rel_slots) {
452 		/* no released slots, but we still need
453 		 * to call rxsync if this is a rx ring
454 		 */
455 		goto out_rxsync;
456 	}
457 
458 	/* we need to lock the monitor receive ring, since it
459 	 * is the target of bot tx and rx traffic from the monitored
460 	 * adapter
461 	 */
462 	mtx_lock(&mkring->q_lock);
463 	/* get the free slots available on the monitor ring */
464 	i = mkring->nr_hwtail;
465 	busy = i - mkring->nr_hwcur;
466 	if (busy < 0)
467 		busy += mkring->nkr_num_slots;
468 	free_slots = mlim - busy;
469 
470 	if (!free_slots)
471 		goto out;
472 
473 	/* swap min(free_slots, rel_slots) slots */
474 	if (free_slots < rel_slots) {
475 		beg += (rel_slots - free_slots);
476 		if (beg >= kring->nkr_num_slots)
477 			beg -= kring->nkr_num_slots;
478 		rel_slots = free_slots;
479 	}
480 
481 	sent = rel_slots;
482 	for ( ; rel_slots; rel_slots--) {
483 		struct netmap_slot *s = &ring->slot[beg];
484 		struct netmap_slot *ms = &mring->slot[i];
485 		uint32_t tmp;
486 
487 		tmp = ms->buf_idx;
488 		ms->buf_idx = s->buf_idx;
489 		s->buf_idx = tmp;
490 		ND(5, "beg %d buf_idx %d", beg, tmp);
491 
492 		tmp = ms->len;
493 		ms->len = s->len;
494 		s->len = tmp;
495 
496 		s->flags |= NS_BUF_CHANGED;
497 
498 		beg = nm_next(beg, lim);
499 		i = nm_next(i, mlim);
500 
501 	}
502 	mb();
503 	mkring->nr_hwtail = i;
504 
505 out:
506 	mtx_unlock(&mkring->q_lock);
507 
508 	if (sent) {
509 		/* notify the new frames to the monitor */
510 		mkring->nm_notify(mkring, 0);
511 	}
512 
513 out_rxsync:
514 	if (tx == NR_RX)
515 		error = kring->mon_sync(kring, flags);
516 
517 	return error;
518 }
519 
520 /* callback used to replace the nm_sync callback in the monitored tx rings */
521 static int
522 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
523 {
524         ND("%s %x", kring->name, flags);
525         return netmap_zmon_parent_sync(kring, flags, NR_TX);
526 }
527 
528 /* callback used to replace the nm_sync callback in the monitored rx rings */
529 static int
530 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
531 {
532         ND("%s %x", kring->name, flags);
533         return netmap_zmon_parent_sync(kring, flags, NR_RX);
534 }
535 
536 
537 static int
538 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
539 {
540 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
541 }
542 
543 /* nm_dtor callback for monitors */
544 static void
545 netmap_zmon_dtor(struct netmap_adapter *na)
546 {
547 	struct netmap_monitor_adapter *mna =
548 		(struct netmap_monitor_adapter *)na;
549 	struct netmap_priv_d *priv = &mna->priv;
550 	struct netmap_adapter *pna = priv->np_na;
551 
552 	netmap_adapter_put(pna);
553 }
554 
555 /*
556  ****************************************************************
557  * functions specific for copy monitors
558  ****************************************************************
559  */
560 
561 static void
562 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
563 {
564 	u_int j;
565 
566 	for (j = 0; j < kring->n_monitors; j++) {
567 		struct netmap_kring *mkring = kring->monitors[j];
568 		u_int i, mlim, beg;
569 		int free_slots, busy, sent = 0, m;
570 		u_int lim = kring->nkr_num_slots - 1;
571 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
572 		u_int max_len = NETMAP_BUF_SIZE(mkring->na);
573 
574 		mlim = mkring->nkr_num_slots - 1;
575 
576 		/* we need to lock the monitor receive ring, since it
577 		 * is the target of bot tx and rx traffic from the monitored
578 		 * adapter
579 		 */
580 		mtx_lock(&mkring->q_lock);
581 		/* get the free slots available on the monitor ring */
582 		i = mkring->nr_hwtail;
583 		busy = i - mkring->nr_hwcur;
584 		if (busy < 0)
585 			busy += mkring->nkr_num_slots;
586 		free_slots = mlim - busy;
587 
588 		if (!free_slots)
589 			goto out;
590 
591 		/* copy min(free_slots, new_slots) slots */
592 		m = new_slots;
593 		beg = first_new;
594 		if (free_slots < m) {
595 			beg += (m - free_slots);
596 			if (beg >= kring->nkr_num_slots)
597 				beg -= kring->nkr_num_slots;
598 			m = free_slots;
599 		}
600 
601 		for ( ; m; m--) {
602 			struct netmap_slot *s = &ring->slot[beg];
603 			struct netmap_slot *ms = &mring->slot[i];
604 			u_int copy_len = s->len;
605 			char *src = NMB(kring->na, s),
606 			     *dst = NMB(mkring->na, ms);
607 
608 			if (unlikely(copy_len > max_len)) {
609 				RD(5, "%s->%s: truncating %d to %d", kring->name,
610 						mkring->name, copy_len, max_len);
611 				copy_len = max_len;
612 			}
613 
614 			memcpy(dst, src, copy_len);
615 			ms->len = copy_len;
616 			sent++;
617 
618 			beg = nm_next(beg, lim);
619 			i = nm_next(i, mlim);
620 		}
621 		mb();
622 		mkring->nr_hwtail = i;
623 	out:
624 		mtx_unlock(&mkring->q_lock);
625 
626 		if (sent) {
627 			/* notify the new frames to the monitor */
628 			mkring->nm_notify(mkring, 0);
629 		}
630 	}
631 }
632 
633 /* callback used to replace the nm_sync callback in the monitored tx rings */
634 static int
635 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
636 {
637 	u_int first_new;
638 	int new_slots;
639 
640 	/* get the new slots */
641 	first_new = kring->nr_hwcur;
642         new_slots = kring->rhead - first_new;
643         if (new_slots < 0)
644                 new_slots += kring->nkr_num_slots;
645 	if (new_slots)
646 		netmap_monitor_parent_sync(kring, first_new, new_slots);
647 	return kring->mon_sync(kring, flags);
648 }
649 
650 /* callback used to replace the nm_sync callback in the monitored rx rings */
651 static int
652 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
653 {
654 	u_int first_new;
655 	int new_slots, error;
656 
657 	/* get the new slots */
658 	error =  kring->mon_sync(kring, flags);
659 	if (error)
660 		return error;
661 	first_new = kring->mon_tail;
662         new_slots = kring->nr_hwtail - first_new;
663         if (new_slots < 0)
664                 new_slots += kring->nkr_num_slots;
665 	if (new_slots)
666 		netmap_monitor_parent_sync(kring, first_new, new_slots);
667 	kring->mon_tail = kring->nr_hwtail;
668 	return 0;
669 }
670 
671 /* callback used to replace the nm_notify() callback in the monitored rx rings */
672 static int
673 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
674 {
675 	int (*notify)(struct netmap_kring*, int);
676 	ND(5, "%s %x", kring->name, flags);
677 	/* ?xsync callbacks have tryget called by their callers
678 	 * (NIOCREGIF and poll()), but here we have to call it
679 	 * by ourself
680 	 */
681 	if (nm_kr_tryget(kring, 0, NULL)) {
682 		/* in all cases, just skip the sync */
683 		return NM_IRQ_COMPLETED;
684 	}
685 	if (kring->n_monitors > 0) {
686 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
687 		notify = kring->mon_notify;
688 	} else {
689 		/* we are no longer monitoring this ring, so both
690 		 * mon_sync and mon_notify are NULL
691 		 */
692 		notify = kring->nm_notify;
693 	}
694 	nm_kr_put(kring);
695         return notify(kring, flags);
696 }
697 
698 
699 static int
700 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
701 {
702 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
703 }
704 
705 static void
706 netmap_monitor_dtor(struct netmap_adapter *na)
707 {
708 	struct netmap_monitor_adapter *mna =
709 		(struct netmap_monitor_adapter *)na;
710 	struct netmap_priv_d *priv = &mna->priv;
711 	struct netmap_adapter *pna = priv->np_na;
712 
713 	netmap_adapter_put(pna);
714 }
715 
716 
717 /* check if nmr is a request for a monitor adapter that we can satisfy */
718 int
719 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
720 {
721 	struct nmreq pnmr;
722 	struct netmap_adapter *pna; /* parent adapter */
723 	struct netmap_monitor_adapter *mna;
724 	struct ifnet *ifp = NULL;
725 	int i, error;
726 	enum txrx t;
727 	int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
728 	char monsuff[10] = "";
729 
730 	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
731 		if (nmr->nr_flags & NR_ZCOPY_MON) {
732 			/* the flag makes no sense unless you are
733 			 * creating a monitor
734 			 */
735 			return EINVAL;
736 		}
737 		ND("not a monitor");
738 		return 0;
739 	}
740 	/* this is a request for a monitor adapter */
741 
742 	ND("flags %x", nmr->nr_flags);
743 
744 	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
745 	if (mna == NULL) {
746 		D("memory error");
747 		return ENOMEM;
748 	}
749 
750 	/* first, try to find the adapter that we want to monitor
751 	 * We use the same nmr, after we have turned off the monitor flags.
752 	 * In this way we can potentially monitor everything netmap understands,
753 	 * except other monitors.
754 	 */
755 	memcpy(&pnmr, nmr, sizeof(pnmr));
756 	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
757 	error = netmap_get_na(&pnmr, &pna, &ifp, create);
758 	if (error) {
759 		D("parent lookup failed: %d", error);
760 		free(mna, M_DEVBUF);
761 		return error;
762 	}
763 	ND("found parent: %s", pna->name);
764 
765 	if (!nm_netmap_on(pna)) {
766 		/* parent not in netmap mode */
767 		/* XXX we can wait for the parent to enter netmap mode,
768 		 * by intercepting its nm_register callback (2014-03-16)
769 		 */
770 		D("%s not in netmap mode", pna->name);
771 		error = EINVAL;
772 		goto put_out;
773 	}
774 
775 	/* grab all the rings we need in the parent */
776 	mna->priv.np_na = pna;
777 	error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
778 	if (error) {
779 		D("ringid error");
780 		goto put_out;
781 	}
782 	if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
783 		snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
784 	}
785 	snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
786 			monsuff,
787 			zcopy ? "z" : "",
788 			(nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
789 			(nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
790 
791 	if (zcopy) {
792 		/* zero copy monitors need exclusive access to the monitored rings */
793 		for_rx_tx(t) {
794 			if (! (nmr->nr_flags & nm_txrx2flag(t)))
795 				continue;
796 			for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
797 				struct netmap_kring *kring = &NMR(pna, t)[i];
798 				if (kring->n_monitors > 0) {
799 					error = EBUSY;
800 					D("ring %s already monitored by %s", kring->name,
801 							kring->monitors[0]->name);
802 					goto put_out;
803 				}
804 			}
805 		}
806 		mna->up.nm_register = netmap_zmon_reg;
807 		mna->up.nm_dtor = netmap_zmon_dtor;
808 		/* to have zero copy, we need to use the same memory allocator
809 		 * as the monitored port
810 		 */
811 		mna->up.nm_mem = pna->nm_mem;
812 		mna->up.na_lut = pna->na_lut;
813 	} else {
814 		/* normal monitors are incompatible with zero copy ones */
815 		for_rx_tx(t) {
816 			if (! (nmr->nr_flags & nm_txrx2flag(t)))
817 				continue;
818 			for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
819 				struct netmap_kring *kring = &NMR(pna, t)[i];
820 				if (kring->n_monitors > 0 &&
821 				    kring->monitors[0]->na->nm_register == netmap_zmon_reg)
822 				{
823 					error = EBUSY;
824 					D("ring busy");
825 					goto put_out;
826 				}
827 			}
828 		}
829 		mna->up.nm_rxsync = netmap_monitor_rxsync;
830 		mna->up.nm_register = netmap_monitor_reg;
831 		mna->up.nm_dtor = netmap_monitor_dtor;
832 	}
833 
834 	/* the monitor supports the host rings iff the parent does */
835 	mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
836 	/* a do-nothing txsync: monitors cannot be used to inject packets */
837 	mna->up.nm_txsync = netmap_monitor_txsync;
838 	mna->up.nm_rxsync = netmap_monitor_rxsync;
839 	mna->up.nm_krings_create = netmap_monitor_krings_create;
840 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
841 	mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
842 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
843 	 * in the parent
844 	 */
845 	mna->up.num_rx_rings = pna->num_rx_rings;
846 	if (pna->num_tx_rings > pna->num_rx_rings)
847 		mna->up.num_rx_rings = pna->num_tx_rings;
848 	/* by default, the number of slots is the same as in
849 	 * the parent rings, but the user may ask for a different
850 	 * number
851 	 */
852 	mna->up.num_tx_desc = nmr->nr_tx_slots;
853 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
854 			1, NM_MONITOR_MAXSLOTS, NULL);
855 	mna->up.num_rx_desc = nmr->nr_rx_slots;
856 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
857 			1, NM_MONITOR_MAXSLOTS, NULL);
858 	error = netmap_attach_common(&mna->up);
859 	if (error) {
860 		D("attach_common error");
861 		goto put_out;
862 	}
863 
864 	/* remember the traffic directions we have to monitor */
865 	mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
866 
867 	*na = &mna->up;
868 	netmap_adapter_get(*na);
869 
870 	/* keep the reference to the parent */
871 	ND("monitor ok");
872 
873 	/* drop the reference to the ifp, if any */
874 	if (ifp)
875 		if_rele(ifp);
876 
877 	return 0;
878 
879 put_out:
880 	netmap_unget_na(pna, ifp);
881 	free(mna, M_DEVBUF);
882 	return error;
883 }
884 
885 
886 #endif /* WITH_MONITOR */
887