xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision 389e4940069316fe667ffa263fa7d6390d0a960f)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Monitors
31  *
32  * netmap monitors can be used to do monitoring of network traffic
33  * on another adapter, when the latter adapter is working in netmap mode.
34  *
35  * Monitors offer to userspace the same interface as any other netmap port,
36  * with as many pairs of netmap rings as the monitored adapter.
37  * However, only the rx rings are actually used. Each monitor rx ring receives
38  * the traffic transiting on both the tx and rx corresponding rings in the
39  * monitored adapter. During registration, the user can choose if she wants
40  * to intercept tx only, rx only, or both tx and rx traffic.
41  *
42  * If the monitor is not able to cope with the stream of frames, excess traffic
43  * will be dropped.
44  *
45  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
46  *
47  * Monitors can be either zero-copy or copy-based.
48  *
49  * Copy monitors see the frames before they are consumed:
50  *
51  *  - For tx traffic, this is when the application sends them, before they are
52  *    passed down to the adapter.
53  *
54  *  - For rx traffic, this is when they are received by the adapter, before
55  *    they are sent up to the application, if any (note that, if no
56  *    application is reading from a monitored ring, the ring will eventually
57  *    fill up and traffic will stop).
58  *
59  * Zero-copy monitors only see the frames after they have been consumed:
60  *
61  *  - For tx traffic, this is after the slots containing the frames have been
62  *    marked as free. Note that this may happen at a considerably delay after
63  *    frame transmission, since freeing of slots is often done lazily.
64  *
65  *  - For rx traffic, this is after the consumer on the monitored adapter
66  *    has released them. In most cases, the consumer is a userspace
67  *    application which may have modified the frame contents.
68  *
69  * Several copy or zero-copy monitors may be active on any ring.
70  *
71  */
72 
73 
74 #if defined(__FreeBSD__)
75 #include <sys/cdefs.h> /* prerequisite */
76 
77 #include <sys/types.h>
78 #include <sys/errno.h>
79 #include <sys/param.h>	/* defines used in kernel.h */
80 #include <sys/kernel.h>	/* types used in module initialization */
81 #include <sys/malloc.h>
82 #include <sys/poll.h>
83 #include <sys/lock.h>
84 #include <sys/rwlock.h>
85 #include <sys/selinfo.h>
86 #include <sys/sysctl.h>
87 #include <sys/socket.h> /* sockaddrs */
88 #include <net/if.h>
89 #include <net/if_var.h>
90 #include <machine/bus.h>	/* bus_dmamap_* */
91 #include <sys/refcount.h>
92 
93 
94 #elif defined(linux)
95 
96 #include "bsd_glue.h"
97 
98 #elif defined(__APPLE__)
99 
100 #warning OSX support is only partial
101 #include "osx_glue.h"
102 
103 #elif defined(_WIN32)
104 #include "win_glue.h"
105 #else
106 
107 #error	Unsupported platform
108 
109 #endif /* unsupported */
110 
111 /*
112  * common headers
113  */
114 
115 #include <net/netmap.h>
116 #include <dev/netmap/netmap_kern.h>
117 #include <dev/netmap/netmap_mem2.h>
118 
119 #ifdef WITH_MONITOR
120 
121 #define NM_MONITOR_MAXSLOTS 4096
122 
123 /*
124  ********************************************************************
125  * functions common to both kind of monitors
126  ********************************************************************
127  */
128 
129 static int netmap_zmon_reg(struct netmap_adapter *, int);
130 static int
131 nm_is_zmon(struct netmap_adapter *na)
132 {
133 	return na->nm_register == netmap_zmon_reg;
134 }
135 
136 /* nm_sync callback for the monitor's own tx rings.
137  * This makes no sense and always returns error
138  */
139 static int
140 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
141 {
142 	RD(1, "%s %x", kring->name, flags);
143 	return EIO;
144 }
145 
146 /* nm_sync callback for the monitor's own rx rings.
147  * Note that the lock in netmap_zmon_parent_sync only protects
148  * writers among themselves. Synchronization between writers
149  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
150  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
151  */
152 static int
153 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
154 {
155 	ND("%s %x", kring->name, flags);
156 	kring->nr_hwcur = kring->rhead;
157 	mb();
158 	return 0;
159 }
160 
161 /* nm_krings_create callbacks for monitors.
162  */
163 static int
164 netmap_monitor_krings_create(struct netmap_adapter *na)
165 {
166 	int error = netmap_krings_create(na, 0);
167 	if (error)
168 		return error;
169 	/* override the host rings callbacks */
170 	na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync;
171 	na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync;
172 	return 0;
173 }
174 
175 /* nm_krings_delete callback for monitors */
176 static void
177 netmap_monitor_krings_delete(struct netmap_adapter *na)
178 {
179 	netmap_krings_delete(na);
180 }
181 
182 
183 static u_int
184 nm_txrx2flag(enum txrx t)
185 {
186 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
187 }
188 
189 /* allocate the monitors array in the monitored kring */
190 static int
191 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
192 {
193 	size_t old_len, len;
194 	struct netmap_kring **nm;
195 
196 	if (n <= kring->max_monitors)
197 		/* we already have more entries that requested */
198 		return 0;
199 
200 	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
201 	len = sizeof(struct netmap_kring *) * n;
202 	nm = nm_os_realloc(kring->monitors, len, old_len);
203 	if (nm == NULL)
204 		return ENOMEM;
205 
206 	kring->monitors = nm;
207 	kring->max_monitors = n;
208 
209 	return 0;
210 }
211 
212 /* deallocate the parent array in the parent adapter */
213 static void
214 nm_monitor_dealloc(struct netmap_kring *kring)
215 {
216 	if (kring->monitors) {
217 		if (kring->n_monitors > 0) {
218 			D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
219 					kring->n_monitors);
220 		}
221 		nm_os_free(kring->monitors);
222 		kring->monitors = NULL;
223 		kring->max_monitors = 0;
224 		kring->n_monitors = 0;
225 	}
226 }
227 
228 /* returns 1 iff kring has no monitors */
229 static inline int
230 nm_monitor_none(struct netmap_kring *kring)
231 {
232 	return kring->n_monitors == 0 &&
233 		kring->zmon_list[NR_TX].next == NULL &&
234 		kring->zmon_list[NR_RX].next == NULL;
235 }
236 
237 /*
238  * monitors work by replacing the nm_sync() and possibly the
239  * nm_notify() callbacks in the monitored rings.
240  */
241 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
242 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
243 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
244 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
245 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
246 
247 /* add the monitor mkring to the list of monitors of kring.
248  * If this is the first monitor, intercept the callbacks
249  */
250 static int
251 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
252 {
253 	int error = NM_IRQ_COMPLETED;
254 	enum txrx t = kring->tx;
255 	struct netmap_zmon_list *z = &kring->zmon_list[t];
256 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
257 
258 	/* a zero-copy monitor which is not the first in the list
259 	 * must monitor the previous monitor
260 	 */
261 	if (zmon && z->prev != NULL)
262 		kring = z->prev;
263 
264 	/* synchronize with concurrently running nm_sync()s */
265 	nm_kr_stop(kring, NM_KR_LOCKED);
266 
267 	if (nm_monitor_none(kring)) {
268 		/* this is the first monitor, intercept callbacks */
269 		ND("intercept callbacks on %s", kring->name);
270 		kring->mon_sync = kring->nm_sync;
271 		kring->mon_notify = kring->nm_notify;
272 		if (kring->tx == NR_TX) {
273 			kring->nm_sync = netmap_monitor_parent_txsync;
274 		} else {
275 			kring->nm_sync = netmap_monitor_parent_rxsync;
276 			kring->nm_notify = netmap_monitor_parent_notify;
277 			kring->mon_tail = kring->nr_hwtail;
278 		}
279 	}
280 
281 	if (zmon) {
282 		/* append the zmon to the list */
283 		struct netmap_monitor_adapter *mna =
284 			(struct netmap_monitor_adapter *)mkring->na;
285 		struct netmap_adapter *pna;
286 
287 		if (z->prev != NULL)
288 			z->prev->zmon_list[t].next = mkring;
289 		mz->prev = z->prev;
290 		z->prev = mkring;
291 		if (z->next == NULL)
292 			z->next = mkring;
293 
294 		/* grap a reference to the previous netmap adapter
295 		 * in the chain (this may be the monitored port
296 		 * or another zero-copy monitor)
297 		 */
298 		pna = kring->na;
299 		netmap_adapter_get(pna);
300 		netmap_adapter_put(mna->priv.np_na);
301 		mna->priv.np_na = pna;
302 	} else {
303 		/* make sure the monitor array exists and is big enough */
304 		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
305 		if (error)
306 			goto out;
307 		kring->monitors[kring->n_monitors] = mkring;
308 		mkring->mon_pos[kring->tx] = kring->n_monitors;
309 		kring->n_monitors++;
310 	}
311 
312 out:
313 	nm_kr_start(kring);
314 	return error;
315 }
316 
317 /* remove the monitor mkring from the list of monitors of kring.
318  * If this is the last monitor, restore the original callbacks
319  */
320 static void
321 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
322 {
323 	struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
324 	int zmon = nm_is_zmon(mkring->na);
325 
326 
327 	if (zmon && mz->prev != NULL)
328 		kring = mz->prev;
329 
330 	/* synchronize with concurrently running nm_sync()s */
331 	nm_kr_stop(kring, NM_KR_LOCKED);
332 
333 	if (zmon) {
334 		/* remove the monitor from the list */
335 		if (mz->prev != NULL)
336 			mz->prev->zmon_list[kring->tx].next = mz->next;
337 		else
338 			kring->zmon_list[kring->tx].next = mz->next;
339 		if (mz->next != NULL) {
340 			mz->next->zmon_list[kring->tx].prev = mz->prev;
341 		} else {
342 			kring->zmon_list[kring->tx].prev = mz->prev;
343 		}
344 	} else {
345 		/* this is a copy monitor */
346 		uint32_t mon_pos = mkring->mon_pos[kring->tx];
347 		kring->n_monitors--;
348 		if (mon_pos != kring->n_monitors) {
349 			kring->monitors[mon_pos] =
350 				kring->monitors[kring->n_monitors];
351 			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
352 		}
353 		kring->monitors[kring->n_monitors] = NULL;
354 		if (kring->n_monitors == 0) {
355 			nm_monitor_dealloc(kring);
356 		}
357 	}
358 
359 	if (nm_monitor_none(kring)) {
360 		/* this was the last monitor, restore the callbacks */
361 		ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
362 				kring->mon_sync);
363 		kring->nm_sync = kring->mon_sync;
364 		kring->mon_sync = NULL;
365 		if (kring->tx == NR_RX) {
366 			ND("%s: restoring notify on %s: %p",
367 					mkring->name, kring->name, kring->mon_notify);
368 			kring->nm_notify = kring->mon_notify;
369 			kring->mon_notify = NULL;
370 		}
371 	}
372 
373 	nm_kr_start(kring);
374 }
375 
376 
377 /* This is called when the monitored adapter leaves netmap mode
378  * (see netmap_do_unregif).
379  * We need to notify the monitors that the monitored rings are gone.
380  * We do this by setting their mna->priv.np_na to NULL.
381  * Note that the rings are already stopped when this happens, so
382  * no monitor ring callback can be active.
383  */
384 void
385 netmap_monitor_stop(struct netmap_adapter *na)
386 {
387 	enum txrx t;
388 
389 	for_rx_tx(t) {
390 		u_int i;
391 
392 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
393 			struct netmap_kring *kring = NMR(na, t)[i];
394 			struct netmap_kring *zkring;
395 			u_int j;
396 
397 			for (j = 0; j < kring->n_monitors; j++) {
398 				struct netmap_kring *mkring =
399 					kring->monitors[j];
400 				struct netmap_monitor_adapter *mna =
401 					(struct netmap_monitor_adapter *)mkring->na;
402 				/* forget about this adapter */
403 				if (mna->priv.np_na != NULL) {
404 					netmap_adapter_put(mna->priv.np_na);
405 					mna->priv.np_na = NULL;
406 				}
407 			}
408 
409 			zkring = kring->zmon_list[kring->tx].next;
410 			if (zkring != NULL) {
411 				struct netmap_monitor_adapter *next =
412 					(struct netmap_monitor_adapter *)zkring->na;
413 				struct netmap_monitor_adapter *this =
414 						(struct netmap_monitor_adapter *)na;
415 				struct netmap_adapter *pna = this->priv.np_na;
416 				/* let the next monitor forget about us */
417 				if (next->priv.np_na != NULL) {
418 					netmap_adapter_put(next->priv.np_na);
419 				}
420 				if (pna != NULL && nm_is_zmon(na)) {
421 					/* we are a monitor ourselves and we may
422 					 * need to pass down the reference to
423 					 * the previous adapter in the chain
424 					 */
425 					netmap_adapter_get(pna);
426 					next->priv.np_na = pna;
427 					continue;
428 				}
429 				next->priv.np_na = NULL;
430 			}
431 		}
432 	}
433 }
434 
435 
436 /* common functions for the nm_register() callbacks of both kind of
437  * monitors.
438  */
439 static int
440 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
441 {
442 	struct netmap_monitor_adapter *mna =
443 		(struct netmap_monitor_adapter *)na;
444 	struct netmap_priv_d *priv = &mna->priv;
445 	struct netmap_adapter *pna = priv->np_na;
446 	struct netmap_kring *kring, *mkring;
447 	int i;
448 	enum txrx t, s;
449 
450 	ND("%p: onoff %d", na, onoff);
451 	if (onoff) {
452 		if (pna == NULL) {
453 			/* parent left netmap mode, fatal */
454 			D("%s: internal error", na->name);
455 			return ENXIO;
456 		}
457 		for_rx_tx(t) {
458 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
459 				mkring = NMR(na, t)[i];
460 				if (!nm_kring_pending_on(mkring))
461 					continue;
462 				mkring->nr_mode = NKR_NETMAP_ON;
463 				if (t == NR_TX)
464 					continue;
465 				for_rx_tx(s) {
466 					if (i > nma_get_nrings(pna, s))
467 						continue;
468 					if (mna->flags & nm_txrx2flag(s)) {
469 						kring = NMR(pna, s)[i];
470 						netmap_monitor_add(mkring, kring, zmon);
471 					}
472 				}
473 			}
474 		}
475 		na->na_flags |= NAF_NETMAP_ON;
476 	} else {
477 		if (na->active_fds == 0)
478 			na->na_flags &= ~NAF_NETMAP_ON;
479 		for_rx_tx(t) {
480 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
481 				mkring = NMR(na, t)[i];
482 				if (!nm_kring_pending_off(mkring))
483 					continue;
484 				mkring->nr_mode = NKR_NETMAP_OFF;
485 				if (t == NR_TX)
486 					continue;
487 				/* we cannot access the parent krings if the parent
488 				 * has left netmap mode. This is signaled by a NULL
489 				 * pna pointer
490 				 */
491 				if (pna == NULL)
492 					continue;
493 				for_rx_tx(s) {
494 					if (i > nma_get_nrings(pna, s))
495 						continue;
496 					if (mna->flags & nm_txrx2flag(s)) {
497 						kring = NMR(pna, s)[i];
498 						netmap_monitor_del(mkring, kring);
499 					}
500 				}
501 			}
502 		}
503 	}
504 	return 0;
505 }
506 
507 /*
508  ****************************************************************
509  * functions specific for zero-copy monitors
510  ****************************************************************
511  */
512 
513 /*
514  * Common function for both zero-copy tx and rx nm_sync()
515  * callbacks
516  */
517 static int
518 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
519 {
520 	struct netmap_kring *mkring = kring->zmon_list[tx].next;
521 	struct netmap_ring *ring = kring->ring, *mring;
522 	int error = 0;
523 	int rel_slots, free_slots, busy, sent = 0;
524 	u_int beg, end, i;
525 	u_int lim = kring->nkr_num_slots - 1,
526 	      mlim; // = mkring->nkr_num_slots - 1;
527 
528 	if (mkring == NULL) {
529 		RD(5, "NULL monitor on %s", kring->name);
530 		return 0;
531 	}
532 	mring = mkring->ring;
533 	mlim = mkring->nkr_num_slots - 1;
534 
535 	/* get the relased slots (rel_slots) */
536 	if (tx == NR_TX) {
537 		beg = kring->nr_hwtail + 1;
538 		error = kring->mon_sync(kring, flags);
539 		if (error)
540 			return error;
541 		end = kring->nr_hwtail + 1;
542 	} else { /* NR_RX */
543 		beg = kring->nr_hwcur;
544 		end = kring->rhead;
545 	}
546 
547 	rel_slots = end - beg;
548 	if (rel_slots < 0)
549 		rel_slots += kring->nkr_num_slots;
550 
551 	if (!rel_slots) {
552 		/* no released slots, but we still need
553 		 * to call rxsync if this is a rx ring
554 		 */
555 		goto out_rxsync;
556 	}
557 
558 	/* we need to lock the monitor receive ring, since it
559 	 * is the target of bot tx and rx traffic from the monitored
560 	 * adapter
561 	 */
562 	mtx_lock(&mkring->q_lock);
563 	/* get the free slots available on the monitor ring */
564 	i = mkring->nr_hwtail;
565 	busy = i - mkring->nr_hwcur;
566 	if (busy < 0)
567 		busy += mkring->nkr_num_slots;
568 	free_slots = mlim - busy;
569 
570 	if (!free_slots)
571 		goto out;
572 
573 	/* swap min(free_slots, rel_slots) slots */
574 	if (free_slots < rel_slots) {
575 		beg += (rel_slots - free_slots);
576 		rel_slots = free_slots;
577 	}
578 	if (unlikely(beg >= kring->nkr_num_slots))
579 		beg -= kring->nkr_num_slots;
580 
581 	sent = rel_slots;
582 	for ( ; rel_slots; rel_slots--) {
583 		struct netmap_slot *s = &ring->slot[beg];
584 		struct netmap_slot *ms = &mring->slot[i];
585 		uint32_t tmp;
586 
587 		tmp = ms->buf_idx;
588 		ms->buf_idx = s->buf_idx;
589 		s->buf_idx = tmp;
590 		ND(5, "beg %d buf_idx %d", beg, tmp);
591 
592 		tmp = ms->len;
593 		ms->len = s->len;
594 		s->len = tmp;
595 
596 		s->flags |= NS_BUF_CHANGED;
597 
598 		beg = nm_next(beg, lim);
599 		i = nm_next(i, mlim);
600 
601 	}
602 	mb();
603 	mkring->nr_hwtail = i;
604 
605 out:
606 	mtx_unlock(&mkring->q_lock);
607 
608 	if (sent) {
609 		/* notify the new frames to the monitor */
610 		mkring->nm_notify(mkring, 0);
611 	}
612 
613 out_rxsync:
614 	if (tx == NR_RX)
615 		error = kring->mon_sync(kring, flags);
616 
617 	return error;
618 }
619 
620 /* callback used to replace the nm_sync callback in the monitored tx rings */
621 static int
622 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
623 {
624 	return netmap_zmon_parent_sync(kring, flags, NR_TX);
625 }
626 
627 /* callback used to replace the nm_sync callback in the monitored rx rings */
628 static int
629 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
630 {
631 	return netmap_zmon_parent_sync(kring, flags, NR_RX);
632 }
633 
634 static int
635 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
636 {
637 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
638 }
639 
640 /* nm_dtor callback for monitors */
641 static void
642 netmap_zmon_dtor(struct netmap_adapter *na)
643 {
644 	struct netmap_monitor_adapter *mna =
645 		(struct netmap_monitor_adapter *)na;
646 	struct netmap_priv_d *priv = &mna->priv;
647 	struct netmap_adapter *pna = priv->np_na;
648 
649 	netmap_adapter_put(pna);
650 }
651 
652 /*
653  ****************************************************************
654  * functions specific for copy monitors
655  ****************************************************************
656  */
657 
658 static void
659 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
660 {
661 	u_int j;
662 
663 	for (j = 0; j < kring->n_monitors; j++) {
664 		struct netmap_kring *mkring = kring->monitors[j];
665 		u_int i, mlim, beg;
666 		int free_slots, busy, sent = 0, m;
667 		u_int lim = kring->nkr_num_slots - 1;
668 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
669 		u_int max_len = NETMAP_BUF_SIZE(mkring->na);
670 
671 		mlim = mkring->nkr_num_slots - 1;
672 
673 		/* we need to lock the monitor receive ring, since it
674 		 * is the target of bot tx and rx traffic from the monitored
675 		 * adapter
676 		 */
677 		mtx_lock(&mkring->q_lock);
678 		/* get the free slots available on the monitor ring */
679 		i = mkring->nr_hwtail;
680 		busy = i - mkring->nr_hwcur;
681 		if (busy < 0)
682 			busy += mkring->nkr_num_slots;
683 		free_slots = mlim - busy;
684 
685 		if (!free_slots)
686 			goto out;
687 
688 		/* copy min(free_slots, new_slots) slots */
689 		m = new_slots;
690 		beg = first_new;
691 		if (free_slots < m) {
692 			beg += (m - free_slots);
693 			if (beg >= kring->nkr_num_slots)
694 				beg -= kring->nkr_num_slots;
695 			m = free_slots;
696 		}
697 
698 		for ( ; m; m--) {
699 			struct netmap_slot *s = &ring->slot[beg];
700 			struct netmap_slot *ms = &mring->slot[i];
701 			u_int copy_len = s->len;
702 			char *src = NMB(kring->na, s),
703 			     *dst = NMB(mkring->na, ms);
704 
705 			if (unlikely(copy_len > max_len)) {
706 				RD(5, "%s->%s: truncating %d to %d", kring->name,
707 						mkring->name, copy_len, max_len);
708 				copy_len = max_len;
709 			}
710 
711 			memcpy(dst, src, copy_len);
712 			ms->len = copy_len;
713 			sent++;
714 
715 			beg = nm_next(beg, lim);
716 			i = nm_next(i, mlim);
717 		}
718 		mb();
719 		mkring->nr_hwtail = i;
720 	out:
721 		mtx_unlock(&mkring->q_lock);
722 
723 		if (sent) {
724 			/* notify the new frames to the monitor */
725 			mkring->nm_notify(mkring, 0);
726 		}
727 	}
728 }
729 
730 /* callback used to replace the nm_sync callback in the monitored tx rings */
731 static int
732 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
733 {
734 	u_int first_new;
735 	int new_slots;
736 
737 	/* get the new slots */
738 	if (kring->n_monitors > 0) {
739 		first_new = kring->nr_hwcur;
740 		new_slots = kring->rhead - first_new;
741 		if (new_slots < 0)
742 			new_slots += kring->nkr_num_slots;
743 		if (new_slots)
744 			netmap_monitor_parent_sync(kring, first_new, new_slots);
745 	}
746 	if (kring->zmon_list[NR_TX].next != NULL) {
747 		return netmap_zmon_parent_txsync(kring, flags);
748 	}
749 	return kring->mon_sync(kring, flags);
750 }
751 
752 /* callback used to replace the nm_sync callback in the monitored rx rings */
753 static int
754 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
755 {
756 	u_int first_new;
757 	int new_slots, error;
758 
759 	/* get the new slots */
760 	if (kring->zmon_list[NR_RX].next != NULL) {
761 		error = netmap_zmon_parent_rxsync(kring, flags);
762 	} else {
763 		error =  kring->mon_sync(kring, flags);
764 	}
765 	if (error)
766 		return error;
767 	if (kring->n_monitors > 0) {
768 		first_new = kring->mon_tail;
769 		new_slots = kring->nr_hwtail - first_new;
770 		if (new_slots < 0)
771 			new_slots += kring->nkr_num_slots;
772 		if (new_slots)
773 			netmap_monitor_parent_sync(kring, first_new, new_slots);
774 		kring->mon_tail = kring->nr_hwtail;
775 	}
776 	return 0;
777 }
778 
779 /* callback used to replace the nm_notify() callback in the monitored rx rings */
780 static int
781 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
782 {
783 	int (*notify)(struct netmap_kring*, int);
784 	ND(5, "%s %x", kring->name, flags);
785 	/* ?xsync callbacks have tryget called by their callers
786 	 * (NIOCREGIF and poll()), but here we have to call it
787 	 * by ourself
788 	 */
789 	if (nm_kr_tryget(kring, 0, NULL)) {
790 		/* in all cases, just skip the sync */
791 		return NM_IRQ_COMPLETED;
792 	}
793 	if (kring->n_monitors > 0) {
794 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
795 	}
796 	if (nm_monitor_none(kring)) {
797 		/* we are no longer monitoring this ring, so both
798 		 * mon_sync and mon_notify are NULL
799 		 */
800 		notify = kring->nm_notify;
801 	} else {
802 		notify = kring->mon_notify;
803 	}
804 	nm_kr_put(kring);
805 	return notify(kring, flags);
806 }
807 
808 
809 static int
810 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
811 {
812 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
813 }
814 
815 static void
816 netmap_monitor_dtor(struct netmap_adapter *na)
817 {
818 	struct netmap_monitor_adapter *mna =
819 		(struct netmap_monitor_adapter *)na;
820 	struct netmap_priv_d *priv = &mna->priv;
821 	struct netmap_adapter *pna = priv->np_na;
822 
823 	netmap_adapter_put(pna);
824 }
825 
826 
827 /* check if req is a request for a monitor adapter that we can satisfy */
828 int
829 netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
830 			struct netmap_mem_d *nmd, int create)
831 {
832 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
833 	struct nmreq_register preq;
834 	struct netmap_adapter *pna; /* parent adapter */
835 	struct netmap_monitor_adapter *mna;
836 	struct ifnet *ifp = NULL;
837 	int  error;
838 	int zcopy = (req->nr_flags & NR_ZCOPY_MON);
839 	char monsuff[10] = "";
840 
841 	if (zcopy) {
842 		req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
843 	}
844 	if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
845 		ND("not a monitor");
846 		return 0;
847 	}
848 	/* this is a request for a monitor adapter */
849 
850 	ND("flags %lx", req->nr_flags);
851 
852 	/* First, try to find the adapter that we want to monitor.
853 	 * We use the same req, after we have turned off the monitor flags.
854 	 * In this way we can potentially monitor everything netmap understands,
855 	 * except other monitors.
856 	 */
857 	memcpy(&preq, req, sizeof(preq));
858 	preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
859 	hdr->nr_body = (uintptr_t)&preq;
860 	error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
861 	hdr->nr_body = (uintptr_t)req;
862 	if (error) {
863 		D("parent lookup failed: %d", error);
864 		return error;
865 	}
866 	ND("found parent: %s", pna->name);
867 
868 	if (!nm_netmap_on(pna)) {
869 		/* parent not in netmap mode */
870 		/* XXX we can wait for the parent to enter netmap mode,
871 		 * by intercepting its nm_register callback (2014-03-16)
872 		 */
873 		D("%s not in netmap mode", pna->name);
874 		error = EINVAL;
875 		goto put_out;
876 	}
877 
878 	mna = nm_os_malloc(sizeof(*mna));
879 	if (mna == NULL) {
880 		D("memory error");
881 		error = ENOMEM;
882 		goto put_out;
883 	}
884 	mna->priv.np_na = pna;
885 
886 	/* grab all the rings we need in the parent */
887 	error = netmap_interp_ringid(&mna->priv, req->nr_mode, req->nr_ringid,
888 					req->nr_flags);
889 	if (error) {
890 		D("ringid error");
891 		goto free_out;
892 	}
893 	if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
894 		snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
895 	}
896 	snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
897 			monsuff,
898 			zcopy ? "z" : "",
899 			(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
900 			(req->nr_flags & NR_MONITOR_TX) ? "t" : "");
901 
902 	/* the monitor supports the host rings iff the parent does */
903 	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
904 	/* a do-nothing txsync: monitors cannot be used to inject packets */
905 	mna->up.nm_txsync = netmap_monitor_txsync;
906 	mna->up.nm_rxsync = netmap_monitor_rxsync;
907 	mna->up.nm_krings_create = netmap_monitor_krings_create;
908 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
909 	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
910 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
911 	 * in the parent
912 	 */
913 	mna->up.num_rx_rings = pna->num_rx_rings;
914 	if (pna->num_tx_rings > pna->num_rx_rings)
915 		mna->up.num_rx_rings = pna->num_tx_rings;
916 	/* by default, the number of slots is the same as in
917 	 * the parent rings, but the user may ask for a different
918 	 * number
919 	 */
920 	mna->up.num_tx_desc = req->nr_tx_slots;
921 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
922 			1, NM_MONITOR_MAXSLOTS, NULL);
923 	mna->up.num_rx_desc = req->nr_rx_slots;
924 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
925 			1, NM_MONITOR_MAXSLOTS, NULL);
926 	if (zcopy) {
927 		mna->up.nm_register = netmap_zmon_reg;
928 		mna->up.nm_dtor = netmap_zmon_dtor;
929 		/* to have zero copy, we need to use the same memory allocator
930 		 * as the monitored port
931 		 */
932 		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
933 		/* and the allocator cannot be changed */
934 		mna->up.na_flags |= NAF_MEM_OWNER;
935 	} else {
936 		mna->up.nm_register = netmap_monitor_reg;
937 		mna->up.nm_dtor = netmap_monitor_dtor;
938 		mna->up.nm_mem = netmap_mem_private_new(
939 				mna->up.num_tx_rings,
940 				mna->up.num_tx_desc,
941 				mna->up.num_rx_rings,
942 				mna->up.num_rx_desc,
943 				0, /* extra bufs */
944 				0, /* pipes */
945 				&error);
946 		if (mna->up.nm_mem == NULL)
947 			goto put_out;
948 	}
949 
950 	error = netmap_attach_common(&mna->up);
951 	if (error) {
952 		D("attach_common error");
953 		goto mem_put_out;
954 	}
955 
956 	/* remember the traffic directions we have to monitor */
957 	mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
958 
959 	*na = &mna->up;
960 	netmap_adapter_get(*na);
961 
962 	/* keep the reference to the parent */
963 	ND("monitor ok");
964 
965 	/* drop the reference to the ifp, if any */
966 	if (ifp)
967 		if_rele(ifp);
968 
969 	return 0;
970 
971 mem_put_out:
972 	netmap_mem_put(mna->up.nm_mem);
973 free_out:
974 	nm_os_free(mna);
975 put_out:
976 	netmap_unget_na(pna, ifp);
977 	return error;
978 }
979 
980 
981 #endif /* WITH_MONITOR */
982