xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  *
29  * Monitors
30  *
31  * netmap monitors can be used to do monitoring of network traffic
32  * on another adapter, when the latter adapter is working in netmap mode.
33  *
34  * Monitors offer to userspace the same interface as any other netmap port,
35  * with as many pairs of netmap rings as the monitored adapter.
36  * However, only the rx rings are actually used. Each monitor rx ring receives
37  * the traffic transiting on both the tx and rx corresponding rings in the
38  * monitored adapter. During registration, the user can choose if she wants
39  * to intercept tx only, rx only, or both tx and rx traffic.
40  * The slots containing traffic intercepted in the tx direction will have
41  * the NS_TXMON flag set.
42  *
43  * If the monitor is not able to cope with the stream of frames, excess traffic
44  * will be dropped.
45  *
46  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
47  *
48  * Monitors can be either zero-copy or copy-based.
49  *
50  * Copy monitors see the frames before they are consumed:
51  *
52  *  - For tx traffic, this is when the application sends them, before they are
53  *    passed down to the adapter.
54  *
55  *  - For rx traffic, this is when they are received by the adapter, before
56  *    they are sent up to the application, if any (note that, if no
57  *    application is reading from a monitored ring, the ring will eventually
58  *    fill up and traffic will stop).
59  *
60  * Zero-copy monitors only see the frames after they have been consumed:
61  *
62  *  - For tx traffic, this is after the slots containing the frames have been
63  *    marked as free. Note that this may happen at a considerably delay after
64  *    frame transmission, since freeing of slots is often done lazily.
65  *
66  *  - For rx traffic, this is after the consumer on the monitored adapter
67  *    has released them. In most cases, the consumer is a userspace
68  *    application which may have modified the frame contents.
69  *
70  * Several copy or zero-copy monitors may be active on any ring.
71  *
72  */
73 
74 
75 #if defined(__FreeBSD__)
76 #include <sys/cdefs.h> /* prerequisite */
77 
78 #include <sys/types.h>
79 #include <sys/errno.h>
80 #include <sys/param.h>	/* defines used in kernel.h */
81 #include <sys/kernel.h>	/* types used in module initialization */
82 #include <sys/malloc.h>
83 #include <sys/poll.h>
84 #include <sys/lock.h>
85 #include <sys/rwlock.h>
86 #include <sys/selinfo.h>
87 #include <sys/sysctl.h>
88 #include <sys/socket.h> /* sockaddrs */
89 #include <net/if.h>
90 #include <net/if_var.h>
91 #include <machine/bus.h>	/* bus_dmamap_* */
92 #include <sys/refcount.h>
93 
94 
95 #elif defined(linux)
96 
97 #include "bsd_glue.h"
98 
99 #elif defined(__APPLE__)
100 
101 #warning OSX support is only partial
102 #include "osx_glue.h"
103 
104 #elif defined(_WIN32)
105 #include "win_glue.h"
106 #else
107 
108 #error	Unsupported platform
109 
110 #endif /* unsupported */
111 
112 /*
113  * common headers
114  */
115 
116 #include <net/netmap.h>
117 #include <dev/netmap/netmap_kern.h>
118 #include <dev/netmap/netmap_mem2.h>
119 
120 #ifdef WITH_MONITOR
121 
122 #define NM_MONITOR_MAXSLOTS 4096
123 
124 /*
125  ********************************************************************
126  * functions common to both kind of monitors
127  ********************************************************************
128  */
129 
130 static int netmap_zmon_reg(struct netmap_adapter *, int);
131 static int
132 nm_is_zmon(struct netmap_adapter *na)
133 {
134 	return na->nm_register == netmap_zmon_reg;
135 }
136 
137 /* nm_sync callback for the monitor's own tx rings.
138  * This makes no sense and always returns error
139  */
140 static int
141 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
142 {
143 	nm_prlim(1, "%s %x", kring->name, flags);
144 	return EIO;
145 }
146 
147 /* nm_sync callback for the monitor's own rx rings.
148  * Note that the lock in netmap_zmon_parent_sync only protects
149  * writers among themselves. Synchronization between writers
150  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
151  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
152  */
153 static int
154 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
155 {
156 	struct netmap_monitor_adapter *mna =
157 		(struct netmap_monitor_adapter *)kring->na;
158 	if (unlikely(mna->priv.np_na == NULL)) {
159 		/* parent left netmap mode */
160 		return EIO;
161 	}
162 	nm_prdis("%s %x", kring->name, flags);
163 	kring->nr_hwcur = kring->rhead;
164 	mb();
165 	return 0;
166 }
167 
168 /* nm_krings_create callbacks for monitors.
169  */
170 static int
171 netmap_monitor_krings_create(struct netmap_adapter *na)
172 {
173 	int error = netmap_krings_create(na, 0);
174 	enum txrx t;
175 
176 	if (error)
177 		return error;
178 	/* override the host rings callbacks */
179 	for_rx_tx(t) {
180 		int i;
181 		u_int first = nma_get_nrings(na, t);
182 		for (i = 0; i < nma_get_host_nrings(na, t); i++) {
183 			struct netmap_kring *kring = NMR(na, t)[first + i];
184 			kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
185 						      netmap_monitor_rxsync;
186 		}
187 	}
188 	return 0;
189 }
190 
191 /* nm_krings_delete callback for monitors */
192 static void
193 netmap_monitor_krings_delete(struct netmap_adapter *na)
194 {
195 	netmap_krings_delete(na);
196 }
197 
198 
199 static u_int
200 nm_txrx2flag(enum txrx t)
201 {
202 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
203 }
204 
205 /* allocate the monitors array in the monitored kring */
206 static int
207 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
208 {
209 	size_t old_len, len;
210 	struct netmap_kring **nm;
211 
212 	if (n <= kring->max_monitors)
213 		/* we already have more entries that requested */
214 		return 0;
215 
216 	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
217 	len = sizeof(struct netmap_kring *) * n;
218 	nm = nm_os_realloc(kring->monitors, len, old_len);
219 	if (nm == NULL)
220 		return ENOMEM;
221 
222 	kring->monitors = nm;
223 	kring->max_monitors = n;
224 
225 	return 0;
226 }
227 
228 /* deallocate the parent array in the parent adapter */
229 static void
230 nm_monitor_dealloc(struct netmap_kring *kring)
231 {
232 	if (kring->monitors) {
233 		if (kring->n_monitors > 0) {
234 			nm_prerr("freeing not empty monitor array for %s (%d dangling monitors)!",
235 			    kring->name, kring->n_monitors);
236 		}
237 		nm_os_free(kring->monitors);
238 		kring->monitors = NULL;
239 		kring->max_monitors = 0;
240 		kring->n_monitors = 0;
241 	}
242 }
243 
244 /* returns 1 iff kring has no monitors */
245 static inline int
246 nm_monitor_none(struct netmap_kring *kring)
247 {
248 	return kring->n_monitors == 0 &&
249 		kring->zmon_list[NR_TX].next == NULL &&
250 		kring->zmon_list[NR_RX].next == NULL;
251 }
252 
253 /*
254  * monitors work by replacing the nm_sync() and possibly the
255  * nm_notify() callbacks in the monitored rings.
256  */
257 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
258 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
259 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
260 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
261 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
262 
263 static int
264 nm_monitor_dummycb(struct netmap_kring *kring, int flags)
265 {
266 	(void)kring;
267 	(void)flags;
268 	return 0;
269 }
270 
271 static void
272 nm_monitor_intercept_callbacks(struct netmap_kring *kring)
273 {
274 	nm_prdis("intercept callbacks on %s", kring->name);
275 	kring->mon_sync = kring->nm_sync != NULL ?
276 		kring->nm_sync : nm_monitor_dummycb;
277 	kring->mon_notify = kring->nm_notify;
278 	if (kring->tx == NR_TX) {
279 		kring->nm_sync = netmap_monitor_parent_txsync;
280 	} else {
281 		kring->nm_sync = netmap_monitor_parent_rxsync;
282 		kring->nm_notify = netmap_monitor_parent_notify;
283 		kring->mon_tail = kring->nr_hwtail;
284 	}
285 }
286 
287 static void
288 nm_monitor_restore_callbacks(struct netmap_kring *kring)
289 {
290 	nm_prdis("restoring callbacks on %s", kring->name);
291 	kring->nm_sync = kring->mon_sync;
292 	kring->mon_sync = NULL;
293 	if (kring->tx == NR_RX) {
294 		kring->nm_notify = kring->mon_notify;
295 	}
296 	kring->mon_notify = NULL;
297 }
298 
299 static struct netmap_kring *
300 nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
301 {
302 	struct netmap_adapter *na = mkring->na;
303 	struct netmap_kring *kring = mkring;
304 	struct netmap_zmon_list *z = &kring->zmon_list[t];
305 	/* reach the head of the list */
306 	while (nm_is_zmon(na) && z->prev != NULL) {
307 		kring = z->prev;
308 		na = kring->na;
309 		z = &kring->zmon_list[t];
310 	}
311 	return nm_is_zmon(na) ? NULL : kring;
312 }
313 
314 /* add the monitor mkring to the list of monitors of kring.
315  * If this is the first monitor, intercept the callbacks
316  */
317 static int
318 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
319 {
320 	int error = NM_IRQ_COMPLETED;
321 	enum txrx t = kring->tx;
322 	struct netmap_zmon_list *z = &kring->zmon_list[t];
323 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
324 	struct netmap_kring *ikring = kring;
325 
326 	/* a zero-copy monitor which is not the first in the list
327 	 * must monitor the previous monitor
328 	 */
329 	if (zmon && z->prev != NULL)
330 		ikring = z->prev; /* tail of the list */
331 
332 	/* synchronize with concurrently running nm_sync()s */
333 	nm_kr_stop(kring, NM_KR_LOCKED);
334 
335 	if (nm_monitor_none(ikring)) {
336 		/* this is the first monitor, intercept the callbacks */
337 		nm_prdis("%s: intercept callbacks on %s", mkring->name, ikring->name);
338 		nm_monitor_intercept_callbacks(ikring);
339 	}
340 
341 	if (zmon) {
342 		/* append the zmon to the list */
343 		ikring->zmon_list[t].next = mkring;
344 		z->prev = mkring; /* new tail */
345 		mz->prev = ikring;
346 		mz->next = NULL;
347 		/* grab a reference to the previous netmap adapter
348 		 * in the chain (this may be the monitored port
349 		 * or another zero-copy monitor)
350 		 */
351 		netmap_adapter_get(ikring->na);
352 	} else {
353 		/* make sure the monitor array exists and is big enough */
354 		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
355 		if (error)
356 			goto out;
357 		kring->monitors[kring->n_monitors] = mkring;
358 		mkring->mon_pos[kring->tx] = kring->n_monitors;
359 		kring->n_monitors++;
360 	}
361 
362 out:
363 	nm_kr_start(kring);
364 	return error;
365 }
366 
367 /* remove the monitor mkring from the list of monitors of kring.
368  * If this is the last monitor, restore the original callbacks
369  */
370 static void
371 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
372 {
373 	int zmon = nm_is_zmon(mkring->na);
374 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
375 	struct netmap_kring *ikring = kring;
376 
377 
378 	if (zmon) {
379 		/* get to the head of the list */
380 		kring = nm_zmon_list_head(mkring, t);
381 		ikring = mz->prev;
382 	}
383 
384 	/* synchronize with concurrently running nm_sync()s
385 	 * if kring is NULL (orphaned list) the monitored port
386 	 * has exited netmap mode, so there is nothing to stop
387 	 */
388 	if (kring != NULL)
389 		nm_kr_stop(kring, NM_KR_LOCKED);
390 
391 	if (zmon) {
392 		/* remove the monitor from the list */
393 		if (mz->next != NULL) {
394 			mz->next->zmon_list[t].prev = mz->prev;
395 			/* we also need to let the next monitor drop the
396 			 * reference to us and grab the reference to the
397 			 * previous ring owner, instead
398 			 */
399 			if (mz->prev != NULL)
400 				netmap_adapter_get(mz->prev->na);
401 			netmap_adapter_put(mkring->na);
402 		} else if (kring != NULL) {
403 			/* in the monitored kring, prev is actually the
404 			 * pointer to the tail of the list
405 			 */
406 			kring->zmon_list[t].prev =
407 				(mz->prev != kring ? mz->prev : NULL);
408 		}
409 		if (mz->prev != NULL) {
410 			netmap_adapter_put(mz->prev->na);
411 			mz->prev->zmon_list[t].next = mz->next;
412 		}
413 		mz->prev = NULL;
414 		mz->next = NULL;
415 	} else {
416 		/* this is a copy monitor */
417 		uint32_t mon_pos = mkring->mon_pos[kring->tx];
418 		kring->n_monitors--;
419 		if (mon_pos != kring->n_monitors) {
420 			kring->monitors[mon_pos] =
421 				kring->monitors[kring->n_monitors];
422 			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
423 		}
424 		kring->monitors[kring->n_monitors] = NULL;
425 		if (kring->n_monitors == 0) {
426 			nm_monitor_dealloc(kring);
427 		}
428 	}
429 
430 	if (ikring != NULL && nm_monitor_none(ikring)) {
431 		/* this was the last monitor, restore the callbacks */
432 		nm_monitor_restore_callbacks(ikring);
433 	}
434 
435 	if (kring != NULL)
436 		nm_kr_start(kring);
437 }
438 
439 
440 /* This is called when the monitored adapter leaves netmap mode
441  * (see netmap_do_unregif).
442  * We need to notify the monitors that the monitored rings are gone.
443  * We do this by setting their mna->priv.np_na to NULL.
444  * Note that the rings are already stopped when this happens, so
445  * no monitor ring callback can be active.
446  */
447 void
448 netmap_monitor_stop(struct netmap_adapter *na)
449 {
450 	enum txrx t;
451 
452 	for_rx_tx(t) {
453 		u_int i;
454 
455 		for (i = 0; i < netmap_all_rings(na, t); i++) {
456 			struct netmap_kring *kring = NMR(na, t)[i];
457 			struct netmap_zmon_list *z = &kring->zmon_list[t];
458 			u_int j;
459 
460 			if (nm_monitor_none(kring))
461 				continue;
462 
463 			for (j = 0; j < kring->n_monitors; j++) {
464 				struct netmap_kring *mkring =
465 					kring->monitors[j];
466 				struct netmap_monitor_adapter *mna =
467 					(struct netmap_monitor_adapter *)mkring->na;
468 				/* forget about this adapter */
469 				if (mna->priv.np_na != NULL) {
470 					netmap_adapter_put(mna->priv.np_na);
471 					mna->priv.np_na = NULL;
472 				}
473 				kring->monitors[j] = NULL;
474 			}
475 			kring->n_monitors = 0;
476 			nm_monitor_dealloc(kring);
477 
478 			if (!nm_is_zmon(na)) {
479 				/* we are the head of at most one list */
480 				struct netmap_kring *zkring;
481 				for (zkring = z->next; zkring != NULL;
482 						zkring = zkring->zmon_list[t].next)
483 				{
484 					struct netmap_monitor_adapter *next =
485 						(struct netmap_monitor_adapter *)zkring->na;
486 					/* let the monitor forget about us */
487 					netmap_adapter_put(next->priv.np_na); /* nop if null */
488 					next->priv.np_na = NULL;
489 					/* drop the additional ref taken in netmap_monitor_add() */
490 					netmap_adapter_put(zkring->zmon_list[t].prev->na);
491 				}
492 				/* orphan the zmon list */
493 				if (z->next != NULL)
494 					z->next->zmon_list[t].prev = NULL;
495 				z->next = NULL;
496 				z->prev = NULL;
497 			}
498 
499 			nm_monitor_restore_callbacks(kring);
500 		}
501 	}
502 }
503 
504 
505 /* common functions for the nm_register() callbacks of both kind of
506  * monitors.
507  */
508 static int
509 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
510 {
511 	struct netmap_monitor_adapter *mna =
512 		(struct netmap_monitor_adapter *)na;
513 	struct netmap_priv_d *priv = &mna->priv;
514 	struct netmap_adapter *pna = priv->np_na;
515 	struct netmap_kring *kring, *mkring;
516 	int i;
517 	enum txrx t, s;
518 
519 	nm_prdis("%p: onoff %d", na, onoff);
520 	if (onoff) {
521 		if (pna == NULL) {
522 			/* parent left netmap mode, fatal */
523 			nm_prerr("%s: parent left netmap mode", na->name);
524 			return ENXIO;
525 		}
526 		for_rx_tx(t) {
527 			for (i = 0; i < netmap_all_rings(na, t); i++) {
528 				mkring = NMR(na, t)[i];
529 				if (!nm_kring_pending_on(mkring))
530 					continue;
531 				mkring->nr_mode = NKR_NETMAP_ON;
532 				if (t == NR_TX)
533 					continue;
534 				for_rx_tx(s) {
535 					if (i > nma_get_nrings(pna, s))
536 						continue;
537 					if (mna->flags & nm_txrx2flag(s)) {
538 						kring = NMR(pna, s)[i];
539 						netmap_monitor_add(mkring, kring, zmon);
540 					}
541 				}
542 			}
543 		}
544 		na->na_flags |= NAF_NETMAP_ON;
545 	} else {
546 		if (na->active_fds == 0)
547 			na->na_flags &= ~NAF_NETMAP_ON;
548 		for_rx_tx(t) {
549 			for (i = 0; i < netmap_all_rings(na, t); i++) {
550 				mkring = NMR(na, t)[i];
551 				if (!nm_kring_pending_off(mkring))
552 					continue;
553 				mkring->nr_mode = NKR_NETMAP_OFF;
554 				if (t == NR_TX)
555 					continue;
556 				/* we cannot access the parent krings if the parent
557 				 * has left netmap mode. This is signaled by a NULL
558 				 * pna pointer
559 				 */
560 				if (pna == NULL)
561 					continue;
562 				for_rx_tx(s) {
563 					if (i > nma_get_nrings(pna, s))
564 						continue;
565 					if (mna->flags & nm_txrx2flag(s)) {
566 						kring = NMR(pna, s)[i];
567 						netmap_monitor_del(mkring, kring, s);
568 					}
569 				}
570 			}
571 		}
572 	}
573 	return 0;
574 }
575 
576 /*
577  ****************************************************************
578  * functions specific for zero-copy monitors
579  ****************************************************************
580  */
581 
582 /*
583  * Common function for both zero-copy tx and rx nm_sync()
584  * callbacks
585  */
586 static int
587 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
588 {
589 	struct netmap_kring *mkring = kring->zmon_list[tx].next;
590 	struct netmap_ring *ring = kring->ring, *mring;
591 	int error = 0;
592 	int rel_slots, free_slots, busy, sent = 0;
593 	u_int beg, end, i;
594 	u_int lim = kring->nkr_num_slots - 1,
595 	      mlim; // = mkring->nkr_num_slots - 1;
596 	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
597 
598 	if (mkring == NULL) {
599 		nm_prlim(5, "NULL monitor on %s", kring->name);
600 		return 0;
601 	}
602 	mring = mkring->ring;
603 	mlim = mkring->nkr_num_slots - 1;
604 
605 	/* get the released slots (rel_slots) */
606 	if (tx == NR_TX) {
607 		beg = kring->nr_hwtail + 1;
608 		error = kring->mon_sync(kring, flags);
609 		if (error)
610 			return error;
611 		end = kring->nr_hwtail + 1;
612 	} else { /* NR_RX */
613 		beg = kring->nr_hwcur;
614 		end = kring->rhead;
615 	}
616 
617 	rel_slots = end - beg;
618 	if (rel_slots < 0)
619 		rel_slots += kring->nkr_num_slots;
620 
621 	if (!rel_slots) {
622 		/* no released slots, but we still need
623 		 * to call rxsync if this is a rx ring
624 		 */
625 		goto out_rxsync;
626 	}
627 
628 	/* we need to lock the monitor receive ring, since it
629 	 * is the target of bot tx and rx traffic from the monitored
630 	 * adapter
631 	 */
632 	mtx_lock(&mkring->q_lock);
633 	/* get the free slots available on the monitor ring */
634 	i = mkring->nr_hwtail;
635 	busy = i - mkring->nr_hwcur;
636 	if (busy < 0)
637 		busy += mkring->nkr_num_slots;
638 	free_slots = mlim - busy;
639 
640 	if (!free_slots)
641 		goto out;
642 
643 	/* swap min(free_slots, rel_slots) slots */
644 	if (free_slots < rel_slots) {
645 		beg += (rel_slots - free_slots);
646 		rel_slots = free_slots;
647 	}
648 	if (unlikely(beg >= kring->nkr_num_slots))
649 		beg -= kring->nkr_num_slots;
650 
651 	sent = rel_slots;
652 	for ( ; rel_slots; rel_slots--) {
653 		struct netmap_slot *s = &ring->slot[beg];
654 		struct netmap_slot *ms = &mring->slot[i];
655 		uint32_t tmp;
656 
657 		tmp = ms->buf_idx;
658 		ms->buf_idx = s->buf_idx;
659 		s->buf_idx = tmp;
660 		nm_prdis(5, "beg %d buf_idx %d", beg, tmp);
661 
662 		tmp = ms->len;
663 		ms->len = s->len;
664 		s->len = tmp;
665 
666 		ms->flags = (s->flags & ~NS_TXMON) | txmon;
667 		s->flags |= NS_BUF_CHANGED;
668 
669 		beg = nm_next(beg, lim);
670 		i = nm_next(i, mlim);
671 
672 	}
673 	mb();
674 	mkring->nr_hwtail = i;
675 
676 out:
677 	mtx_unlock(&mkring->q_lock);
678 
679 	if (sent) {
680 		/* notify the new frames to the monitor */
681 		mkring->nm_notify(mkring, 0);
682 	}
683 
684 out_rxsync:
685 	if (tx == NR_RX)
686 		error = kring->mon_sync(kring, flags);
687 
688 	return error;
689 }
690 
691 /* callback used to replace the nm_sync callback in the monitored tx rings */
692 static int
693 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
694 {
695 	return netmap_zmon_parent_sync(kring, flags, NR_TX);
696 }
697 
698 /* callback used to replace the nm_sync callback in the monitored rx rings */
699 static int
700 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
701 {
702 	return netmap_zmon_parent_sync(kring, flags, NR_RX);
703 }
704 
705 static int
706 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
707 {
708 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
709 }
710 
711 /* nm_dtor callback for monitors */
712 static void
713 netmap_zmon_dtor(struct netmap_adapter *na)
714 {
715 	struct netmap_monitor_adapter *mna =
716 		(struct netmap_monitor_adapter *)na;
717 	struct netmap_priv_d *priv = &mna->priv;
718 	struct netmap_adapter *pna = priv->np_na;
719 
720 	netmap_adapter_put(pna);
721 }
722 
723 /*
724  ****************************************************************
725  * functions specific for copy monitors
726  ****************************************************************
727  */
728 
729 static void
730 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
731 {
732 	u_int j;
733 	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
734 
735 	for (j = 0; j < kring->n_monitors; j++) {
736 		struct netmap_kring *mkring = kring->monitors[j];
737 		u_int i, mlim, beg;
738 		int free_slots, busy, sent = 0, m;
739 		u_int lim = kring->nkr_num_slots - 1;
740 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
741 		u_int max_len;
742 		mlim = mkring->nkr_num_slots - 1;
743 
744 		/* we need to lock the monitor receive ring, since it
745 		 * is the target of bot tx and rx traffic from the monitored
746 		 * adapter
747 		 */
748 		mtx_lock(&mkring->q_lock);
749 		/* get the free slots available on the monitor ring */
750 		i = mkring->nr_hwtail;
751 		busy = i - mkring->nr_hwcur;
752 		if (busy < 0)
753 			busy += mkring->nkr_num_slots;
754 		free_slots = mlim - busy;
755 
756 		if (!free_slots)
757 			goto out;
758 
759 		/* copy min(free_slots, new_slots) slots */
760 		m = new_slots;
761 		beg = first_new;
762 		if (free_slots < m) {
763 			beg += (m - free_slots);
764 			if (beg >= kring->nkr_num_slots)
765 				beg -= kring->nkr_num_slots;
766 			m = free_slots;
767 		}
768 
769 		for ( ; m; m--) {
770 			struct netmap_slot *s = &ring->slot[beg];
771 			struct netmap_slot *ms = &mring->slot[i];
772 			u_int copy_len = s->len;
773 			char *src = NMB_O(kring, s),
774 			     *dst = NMB_O(mkring, ms);
775 
776 			max_len = NETMAP_BUF_SIZE(mkring->na) - nm_get_offset(mkring, ms);
777 			if (unlikely(copy_len > max_len)) {
778 				nm_prlim(5, "%s->%s: truncating %d to %d", kring->name,
779 						mkring->name, copy_len, max_len);
780 				copy_len = max_len;
781 			}
782 
783 			memcpy(dst, src, copy_len);
784 			ms->len = copy_len;
785 			ms->flags = (s->flags & ~NS_TXMON) | txmon;
786 			sent++;
787 
788 			beg = nm_next(beg, lim);
789 			i = nm_next(i, mlim);
790 		}
791 		mb();
792 		mkring->nr_hwtail = i;
793 	out:
794 		mtx_unlock(&mkring->q_lock);
795 
796 		if (sent) {
797 			/* notify the new frames to the monitor */
798 			mkring->nm_notify(mkring, 0);
799 		}
800 	}
801 }
802 
803 /* callback used to replace the nm_sync callback in the monitored tx rings */
804 static int
805 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
806 {
807 	u_int first_new;
808 	int new_slots;
809 
810 	/* get the new slots */
811 	if (kring->n_monitors > 0) {
812 		first_new = kring->nr_hwcur;
813 		new_slots = kring->rhead - first_new;
814 		if (new_slots < 0)
815 			new_slots += kring->nkr_num_slots;
816 		if (new_slots)
817 			netmap_monitor_parent_sync(kring, first_new, new_slots);
818 	}
819 	if (kring->zmon_list[NR_TX].next != NULL) {
820 		return netmap_zmon_parent_txsync(kring, flags);
821 	}
822 	return kring->mon_sync(kring, flags);
823 }
824 
825 /* callback used to replace the nm_sync callback in the monitored rx rings */
826 static int
827 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
828 {
829 	u_int first_new;
830 	int new_slots, error;
831 
832 	/* get the new slots */
833 	if (kring->zmon_list[NR_RX].next != NULL) {
834 		error = netmap_zmon_parent_rxsync(kring, flags);
835 	} else {
836 		error =  kring->mon_sync(kring, flags);
837 	}
838 	if (error)
839 		return error;
840 	if (kring->n_monitors > 0) {
841 		first_new = kring->mon_tail;
842 		new_slots = kring->nr_hwtail - first_new;
843 		if (new_slots < 0)
844 			new_slots += kring->nkr_num_slots;
845 		if (new_slots)
846 			netmap_monitor_parent_sync(kring, first_new, new_slots);
847 		kring->mon_tail = kring->nr_hwtail;
848 	}
849 	return 0;
850 }
851 
852 /* callback used to replace the nm_notify() callback in the monitored rx rings */
853 static int
854 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
855 {
856 	int (*notify)(struct netmap_kring*, int);
857 	nm_prdis(5, "%s %x", kring->name, flags);
858 	/* ?xsync callbacks have tryget called by their callers
859 	 * (NIOCREGIF and poll()), but here we have to call it
860 	 * by ourself
861 	 */
862 	if (nm_kr_tryget(kring, 0, NULL)) {
863 		/* in all cases, just skip the sync */
864 		return NM_IRQ_COMPLETED;
865 	}
866 	if (kring->n_monitors > 0) {
867 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
868 	}
869 	if (nm_monitor_none(kring)) {
870 		/* we are no longer monitoring this ring, so both
871 		 * mon_sync and mon_notify are NULL
872 		 */
873 		notify = kring->nm_notify;
874 	} else {
875 		notify = kring->mon_notify;
876 	}
877 	nm_kr_put(kring);
878 	return notify(kring, flags);
879 }
880 
881 
882 static int
883 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
884 {
885 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
886 }
887 
888 static void
889 netmap_monitor_dtor(struct netmap_adapter *na)
890 {
891 	struct netmap_monitor_adapter *mna =
892 		(struct netmap_monitor_adapter *)na;
893 	struct netmap_priv_d *priv = &mna->priv;
894 	struct netmap_adapter *pna = priv->np_na;
895 
896 	netmap_adapter_put(pna);
897 }
898 
899 
900 /* check if req is a request for a monitor adapter that we can satisfy */
901 int
902 netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
903 			struct netmap_mem_d *nmd, int create)
904 {
905 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
906 	struct nmreq_register preq;
907 	struct netmap_adapter *pna; /* parent adapter */
908 	struct netmap_monitor_adapter *mna;
909 	if_t ifp = NULL;
910 	int  error;
911 	int zcopy = (req->nr_flags & NR_ZCOPY_MON);
912 
913 	if (zcopy) {
914 		req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
915 	}
916 	if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
917 		nm_prdis("not a monitor");
918 		return 0;
919 	}
920 	/* this is a request for a monitor adapter */
921 
922 	nm_prdis("flags %lx", req->nr_flags);
923 
924 	/* First, try to find the adapter that we want to monitor.
925 	 * We use the same req, after we have turned off the monitor flags.
926 	 * In this way we can potentially monitor everything netmap understands,
927 	 * except other monitors.
928 	 */
929 	memcpy(&preq, req, sizeof(preq));
930 	preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
931 	hdr->nr_body = (uintptr_t)&preq;
932 	error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
933 	hdr->nr_body = (uintptr_t)req;
934 	if (error) {
935 		nm_prerr("parent lookup failed: %d", error);
936 		return error;
937 	}
938 	nm_prdis("found parent: %s", pna->name);
939 
940 	if (!nm_netmap_on(pna)) {
941 		/* parent not in netmap mode */
942 		/* XXX we can wait for the parent to enter netmap mode,
943 		 * by intercepting its nm_register callback (2014-03-16)
944 		 */
945 		nm_prerr("%s not in netmap mode", pna->name);
946 		error = EINVAL;
947 		goto put_out;
948 	}
949 
950 	mna = nm_os_malloc(sizeof(*mna));
951 	if (mna == NULL) {
952 		error = ENOMEM;
953 		goto put_out;
954 	}
955 	mna->priv.np_na = pna;
956 
957 	/* grab all the rings we need in the parent */
958 	error = netmap_interp_ringid(&mna->priv, hdr);
959 	if (error) {
960 		nm_prerr("ringid error");
961 		goto free_out;
962 	}
963 	snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
964 			zcopy ? "z" : "",
965 			(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
966 			(req->nr_flags & NR_MONITOR_TX) ? "t" : "",
967 			pna->monitor_id++);
968 
969 	/* the monitor supports the host rings iff the parent does */
970 	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS) & ~NAF_OFFSETS;
971 	if (!zcopy)
972 		mna->up.na_flags |= NAF_OFFSETS;
973 	/* a do-nothing txsync: monitors cannot be used to inject packets */
974 	mna->up.nm_txsync = netmap_monitor_txsync;
975 	mna->up.nm_rxsync = netmap_monitor_rxsync;
976 	mna->up.nm_krings_create = netmap_monitor_krings_create;
977 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
978 	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
979 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
980 	 * in the parent
981 	 */
982 	mna->up.num_rx_rings = pna->num_rx_rings;
983 	if (pna->num_tx_rings > pna->num_rx_rings)
984 		mna->up.num_rx_rings = pna->num_tx_rings;
985 	/* by default, the number of slots is the same as in
986 	 * the parent rings, but the user may ask for a different
987 	 * number
988 	 */
989 	mna->up.num_tx_desc = req->nr_tx_slots;
990 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
991 			1, NM_MONITOR_MAXSLOTS, NULL);
992 	mna->up.num_rx_desc = req->nr_rx_slots;
993 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
994 			1, NM_MONITOR_MAXSLOTS, NULL);
995 	if (zcopy) {
996 		mna->up.nm_register = netmap_zmon_reg;
997 		mna->up.nm_dtor = netmap_zmon_dtor;
998 		/* to have zero copy, we need to use the same memory allocator
999 		 * as the monitored port
1000 		 */
1001 		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
1002 		/* and the allocator cannot be changed */
1003 		mna->up.na_flags |= NAF_MEM_OWNER;
1004 	} else {
1005 		mna->up.nm_register = netmap_monitor_reg;
1006 		mna->up.nm_dtor = netmap_monitor_dtor;
1007 		mna->up.nm_mem = netmap_mem_private_new(
1008 				mna->up.num_tx_rings,
1009 				mna->up.num_tx_desc,
1010 				mna->up.num_rx_rings,
1011 				mna->up.num_rx_desc,
1012 				0, /* extra bufs */
1013 				0, /* pipes */
1014 				&error);
1015 		if (mna->up.nm_mem == NULL)
1016 			goto put_out;
1017 	}
1018 
1019 	error = netmap_attach_common(&mna->up);
1020 	if (error) {
1021 		nm_prerr("netmap_attach_common failed");
1022 		goto mem_put_out;
1023 	}
1024 
1025 	/* remember the traffic directions we have to monitor */
1026 	mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
1027 
1028 	*na = &mna->up;
1029 	netmap_adapter_get(*na);
1030 
1031 	/* keep the reference to the parent */
1032 	nm_prdis("monitor ok");
1033 
1034 	/* drop the reference to the ifp, if any */
1035 	if (ifp)
1036 		if_rele(ifp);
1037 
1038 	return 0;
1039 
1040 mem_put_out:
1041 	netmap_mem_put(mna->up.nm_mem);
1042 free_out:
1043 	nm_os_free(mna);
1044 put_out:
1045 	netmap_unget_na(pna, ifp);
1046 	return error;
1047 }
1048 
1049 
1050 #endif /* WITH_MONITOR */
1051