xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision 058ac3e8063366dafa634d9107642e12b038bf09)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Monitors
31  *
32  * netmap monitors can be used to do monitoring of network traffic
33  * on another adapter, when the latter adapter is working in netmap mode.
34  *
35  * Monitors offer to userspace the same interface as any other netmap port,
36  * with as many pairs of netmap rings as the monitored adapter.
37  * However, only the rx rings are actually used. Each monitor rx ring receives
38  * the traffic transiting on both the tx and rx corresponding rings in the
39  * monitored adapter. During registration, the user can choose if she wants
40  * to intercept tx only, rx only, or both tx and rx traffic.
41  * The slots containing traffic intercepted in the tx direction will have
42  * the NS_TXMON flag set.
43  *
44  * If the monitor is not able to cope with the stream of frames, excess traffic
45  * will be dropped.
46  *
47  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
48  *
49  * Monitors can be either zero-copy or copy-based.
50  *
51  * Copy monitors see the frames before they are consumed:
52  *
53  *  - For tx traffic, this is when the application sends them, before they are
54  *    passed down to the adapter.
55  *
56  *  - For rx traffic, this is when they are received by the adapter, before
57  *    they are sent up to the application, if any (note that, if no
58  *    application is reading from a monitored ring, the ring will eventually
59  *    fill up and traffic will stop).
60  *
61  * Zero-copy monitors only see the frames after they have been consumed:
62  *
63  *  - For tx traffic, this is after the slots containing the frames have been
64  *    marked as free. Note that this may happen at a considerably delay after
65  *    frame transmission, since freeing of slots is often done lazily.
66  *
67  *  - For rx traffic, this is after the consumer on the monitored adapter
68  *    has released them. In most cases, the consumer is a userspace
69  *    application which may have modified the frame contents.
70  *
71  * Several copy or zero-copy monitors may be active on any ring.
72  *
73  */
74 
75 
76 #if defined(__FreeBSD__)
77 #include <sys/cdefs.h> /* prerequisite */
78 
79 #include <sys/types.h>
80 #include <sys/errno.h>
81 #include <sys/param.h>	/* defines used in kernel.h */
82 #include <sys/kernel.h>	/* types used in module initialization */
83 #include <sys/malloc.h>
84 #include <sys/poll.h>
85 #include <sys/lock.h>
86 #include <sys/rwlock.h>
87 #include <sys/selinfo.h>
88 #include <sys/sysctl.h>
89 #include <sys/socket.h> /* sockaddrs */
90 #include <net/if.h>
91 #include <net/if_var.h>
92 #include <machine/bus.h>	/* bus_dmamap_* */
93 #include <sys/refcount.h>
94 
95 
96 #elif defined(linux)
97 
98 #include "bsd_glue.h"
99 
100 #elif defined(__APPLE__)
101 
102 #warning OSX support is only partial
103 #include "osx_glue.h"
104 
105 #elif defined(_WIN32)
106 #include "win_glue.h"
107 #else
108 
109 #error	Unsupported platform
110 
111 #endif /* unsupported */
112 
113 /*
114  * common headers
115  */
116 
117 #include <net/netmap.h>
118 #include <dev/netmap/netmap_kern.h>
119 #include <dev/netmap/netmap_mem2.h>
120 
121 #ifdef WITH_MONITOR
122 
123 #define NM_MONITOR_MAXSLOTS 4096
124 
125 /*
126  ********************************************************************
127  * functions common to both kind of monitors
128  ********************************************************************
129  */
130 
131 static int netmap_zmon_reg(struct netmap_adapter *, int);
132 static int
133 nm_is_zmon(struct netmap_adapter *na)
134 {
135 	return na->nm_register == netmap_zmon_reg;
136 }
137 
138 /* nm_sync callback for the monitor's own tx rings.
139  * This makes no sense and always returns error
140  */
141 static int
142 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
143 {
144 	nm_prlim(1, "%s %x", kring->name, flags);
145 	return EIO;
146 }
147 
148 /* nm_sync callback for the monitor's own rx rings.
149  * Note that the lock in netmap_zmon_parent_sync only protects
150  * writers among themselves. Synchronization between writers
151  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
152  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
153  */
154 static int
155 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
156 {
157 	struct netmap_monitor_adapter *mna =
158 		(struct netmap_monitor_adapter *)kring->na;
159 	if (unlikely(mna->priv.np_na == NULL)) {
160 		/* parent left netmap mode */
161 		return EIO;
162 	}
163 	nm_prdis("%s %x", kring->name, flags);
164 	kring->nr_hwcur = kring->rhead;
165 	mb();
166 	return 0;
167 }
168 
169 /* nm_krings_create callbacks for monitors.
170  */
171 static int
172 netmap_monitor_krings_create(struct netmap_adapter *na)
173 {
174 	int error = netmap_krings_create(na, 0);
175 	enum txrx t;
176 
177 	if (error)
178 		return error;
179 	/* override the host rings callbacks */
180 	for_rx_tx(t) {
181 		int i;
182 		u_int first = nma_get_nrings(na, t);
183 		for (i = 0; i < nma_get_host_nrings(na, t); i++) {
184 			struct netmap_kring *kring = NMR(na, t)[first + i];
185 			kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
186 						      netmap_monitor_rxsync;
187 		}
188 	}
189 	return 0;
190 }
191 
192 /* nm_krings_delete callback for monitors */
193 static void
194 netmap_monitor_krings_delete(struct netmap_adapter *na)
195 {
196 	netmap_krings_delete(na);
197 }
198 
199 
200 static u_int
201 nm_txrx2flag(enum txrx t)
202 {
203 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
204 }
205 
206 /* allocate the monitors array in the monitored kring */
207 static int
208 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
209 {
210 	size_t old_len, len;
211 	struct netmap_kring **nm;
212 
213 	if (n <= kring->max_monitors)
214 		/* we already have more entries that requested */
215 		return 0;
216 
217 	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
218 	len = sizeof(struct netmap_kring *) * n;
219 	nm = nm_os_realloc(kring->monitors, len, old_len);
220 	if (nm == NULL)
221 		return ENOMEM;
222 
223 	kring->monitors = nm;
224 	kring->max_monitors = n;
225 
226 	return 0;
227 }
228 
229 /* deallocate the parent array in the parent adapter */
230 static void
231 nm_monitor_dealloc(struct netmap_kring *kring)
232 {
233 	if (kring->monitors) {
234 		if (kring->n_monitors > 0) {
235 			nm_prerr("freeing not empty monitor array for %s (%d dangling monitors)!",
236 			    kring->name, kring->n_monitors);
237 		}
238 		nm_os_free(kring->monitors);
239 		kring->monitors = NULL;
240 		kring->max_monitors = 0;
241 		kring->n_monitors = 0;
242 	}
243 }
244 
245 /* returns 1 iff kring has no monitors */
246 static inline int
247 nm_monitor_none(struct netmap_kring *kring)
248 {
249 	return kring->n_monitors == 0 &&
250 		kring->zmon_list[NR_TX].next == NULL &&
251 		kring->zmon_list[NR_RX].next == NULL;
252 }
253 
254 /*
255  * monitors work by replacing the nm_sync() and possibly the
256  * nm_notify() callbacks in the monitored rings.
257  */
258 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
259 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
260 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
261 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
262 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
263 
264 static int
265 nm_monitor_dummycb(struct netmap_kring *kring, int flags)
266 {
267 	(void)kring;
268 	(void)flags;
269 	return 0;
270 }
271 
272 static void
273 nm_monitor_intercept_callbacks(struct netmap_kring *kring)
274 {
275 	nm_prdis("intercept callbacks on %s", kring->name);
276 	kring->mon_sync = kring->nm_sync != NULL ?
277 		kring->nm_sync : nm_monitor_dummycb;
278 	kring->mon_notify = kring->nm_notify;
279 	if (kring->tx == NR_TX) {
280 		kring->nm_sync = netmap_monitor_parent_txsync;
281 	} else {
282 		kring->nm_sync = netmap_monitor_parent_rxsync;
283 		kring->nm_notify = netmap_monitor_parent_notify;
284 		kring->mon_tail = kring->nr_hwtail;
285 	}
286 }
287 
288 static void
289 nm_monitor_restore_callbacks(struct netmap_kring *kring)
290 {
291 	nm_prdis("restoring callbacks on %s", kring->name);
292 	kring->nm_sync = kring->mon_sync;
293 	kring->mon_sync = NULL;
294 	if (kring->tx == NR_RX) {
295 		kring->nm_notify = kring->mon_notify;
296 	}
297 	kring->mon_notify = NULL;
298 }
299 
300 static struct netmap_kring *
301 nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
302 {
303 	struct netmap_adapter *na = mkring->na;
304 	struct netmap_kring *kring = mkring;
305 	struct netmap_zmon_list *z = &kring->zmon_list[t];
306 	/* reach the head of the list */
307 	while (nm_is_zmon(na) && z->prev != NULL) {
308 		kring = z->prev;
309 		na = kring->na;
310 		z = &kring->zmon_list[t];
311 	}
312 	return nm_is_zmon(na) ? NULL : kring;
313 }
314 
315 /* add the monitor mkring to the list of monitors of kring.
316  * If this is the first monitor, intercept the callbacks
317  */
318 static int
319 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
320 {
321 	int error = NM_IRQ_COMPLETED;
322 	enum txrx t = kring->tx;
323 	struct netmap_zmon_list *z = &kring->zmon_list[t];
324 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
325 	struct netmap_kring *ikring = kring;
326 
327 	/* a zero-copy monitor which is not the first in the list
328 	 * must monitor the previous monitor
329 	 */
330 	if (zmon && z->prev != NULL)
331 		ikring = z->prev; /* tail of the list */
332 
333 	/* synchronize with concurrently running nm_sync()s */
334 	nm_kr_stop(kring, NM_KR_LOCKED);
335 
336 	if (nm_monitor_none(ikring)) {
337 		/* this is the first monitor, intercept the callbacks */
338 		nm_prdis("%s: intercept callbacks on %s", mkring->name, ikring->name);
339 		nm_monitor_intercept_callbacks(ikring);
340 	}
341 
342 	if (zmon) {
343 		/* append the zmon to the list */
344 		ikring->zmon_list[t].next = mkring;
345 		z->prev = mkring; /* new tail */
346 		mz->prev = ikring;
347 		mz->next = NULL;
348 		/* grab a reference to the previous netmap adapter
349 		 * in the chain (this may be the monitored port
350 		 * or another zero-copy monitor)
351 		 */
352 		netmap_adapter_get(ikring->na);
353 	} else {
354 		/* make sure the monitor array exists and is big enough */
355 		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
356 		if (error)
357 			goto out;
358 		kring->monitors[kring->n_monitors] = mkring;
359 		mkring->mon_pos[kring->tx] = kring->n_monitors;
360 		kring->n_monitors++;
361 	}
362 
363 out:
364 	nm_kr_start(kring);
365 	return error;
366 }
367 
368 /* remove the monitor mkring from the list of monitors of kring.
369  * If this is the last monitor, restore the original callbacks
370  */
371 static void
372 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
373 {
374 	int zmon = nm_is_zmon(mkring->na);
375 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
376 	struct netmap_kring *ikring = kring;
377 
378 
379 	if (zmon) {
380 		/* get to the head of the list */
381 		kring = nm_zmon_list_head(mkring, t);
382 		ikring = mz->prev;
383 	}
384 
385 	/* synchronize with concurrently running nm_sync()s
386 	 * if kring is NULL (orphaned list) the monitored port
387 	 * has exited netmap mode, so there is nothing to stop
388 	 */
389 	if (kring != NULL)
390 		nm_kr_stop(kring, NM_KR_LOCKED);
391 
392 	if (zmon) {
393 		/* remove the monitor from the list */
394 		if (mz->next != NULL) {
395 			mz->next->zmon_list[t].prev = mz->prev;
396 			/* we also need to let the next monitor drop the
397 			 * reference to us and grab the reference to the
398 			 * previous ring owner, instead
399 			 */
400 			if (mz->prev != NULL)
401 				netmap_adapter_get(mz->prev->na);
402 			netmap_adapter_put(mkring->na);
403 		} else if (kring != NULL) {
404 			/* in the monitored kring, prev is actually the
405 			 * pointer to the tail of the list
406 			 */
407 			kring->zmon_list[t].prev =
408 				(mz->prev != kring ? mz->prev : NULL);
409 		}
410 		if (mz->prev != NULL) {
411 			netmap_adapter_put(mz->prev->na);
412 			mz->prev->zmon_list[t].next = mz->next;
413 		}
414 		mz->prev = NULL;
415 		mz->next = NULL;
416 	} else {
417 		/* this is a copy monitor */
418 		uint32_t mon_pos = mkring->mon_pos[kring->tx];
419 		kring->n_monitors--;
420 		if (mon_pos != kring->n_monitors) {
421 			kring->monitors[mon_pos] =
422 				kring->monitors[kring->n_monitors];
423 			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
424 		}
425 		kring->monitors[kring->n_monitors] = NULL;
426 		if (kring->n_monitors == 0) {
427 			nm_monitor_dealloc(kring);
428 		}
429 	}
430 
431 	if (ikring != NULL && nm_monitor_none(ikring)) {
432 		/* this was the last monitor, restore the callbacks */
433 		nm_monitor_restore_callbacks(ikring);
434 	}
435 
436 	if (kring != NULL)
437 		nm_kr_start(kring);
438 }
439 
440 
441 /* This is called when the monitored adapter leaves netmap mode
442  * (see netmap_do_unregif).
443  * We need to notify the monitors that the monitored rings are gone.
444  * We do this by setting their mna->priv.np_na to NULL.
445  * Note that the rings are already stopped when this happens, so
446  * no monitor ring callback can be active.
447  */
448 void
449 netmap_monitor_stop(struct netmap_adapter *na)
450 {
451 	enum txrx t;
452 
453 	for_rx_tx(t) {
454 		u_int i;
455 
456 		for (i = 0; i < netmap_all_rings(na, t); i++) {
457 			struct netmap_kring *kring = NMR(na, t)[i];
458 			struct netmap_zmon_list *z = &kring->zmon_list[t];
459 			u_int j;
460 
461 			if (nm_monitor_none(kring))
462 				continue;
463 
464 			for (j = 0; j < kring->n_monitors; j++) {
465 				struct netmap_kring *mkring =
466 					kring->monitors[j];
467 				struct netmap_monitor_adapter *mna =
468 					(struct netmap_monitor_adapter *)mkring->na;
469 				/* forget about this adapter */
470 				if (mna->priv.np_na != NULL) {
471 					netmap_adapter_put(mna->priv.np_na);
472 					mna->priv.np_na = NULL;
473 				}
474 				kring->monitors[j] = NULL;
475 			}
476 			kring->n_monitors = 0;
477 			nm_monitor_dealloc(kring);
478 
479 			if (!nm_is_zmon(na)) {
480 				/* we are the head of at most one list */
481 				struct netmap_kring *zkring;
482 				for (zkring = z->next; zkring != NULL;
483 						zkring = zkring->zmon_list[t].next)
484 				{
485 					struct netmap_monitor_adapter *next =
486 						(struct netmap_monitor_adapter *)zkring->na;
487 					/* let the monitor forget about us */
488 					netmap_adapter_put(next->priv.np_na); /* nop if null */
489 					next->priv.np_na = NULL;
490 					/* drop the additional ref taken in netmap_monitor_add() */
491 					netmap_adapter_put(zkring->zmon_list[t].prev->na);
492 				}
493 				/* orphan the zmon list */
494 				if (z->next != NULL)
495 					z->next->zmon_list[t].prev = NULL;
496 				z->next = NULL;
497 				z->prev = NULL;
498 			}
499 
500 			nm_monitor_restore_callbacks(kring);
501 		}
502 	}
503 }
504 
505 
506 /* common functions for the nm_register() callbacks of both kind of
507  * monitors.
508  */
509 static int
510 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
511 {
512 	struct netmap_monitor_adapter *mna =
513 		(struct netmap_monitor_adapter *)na;
514 	struct netmap_priv_d *priv = &mna->priv;
515 	struct netmap_adapter *pna = priv->np_na;
516 	struct netmap_kring *kring, *mkring;
517 	int i;
518 	enum txrx t, s;
519 
520 	nm_prdis("%p: onoff %d", na, onoff);
521 	if (onoff) {
522 		if (pna == NULL) {
523 			/* parent left netmap mode, fatal */
524 			nm_prerr("%s: parent left netmap mode", na->name);
525 			return ENXIO;
526 		}
527 		for_rx_tx(t) {
528 			for (i = 0; i < netmap_all_rings(na, t); i++) {
529 				mkring = NMR(na, t)[i];
530 				if (!nm_kring_pending_on(mkring))
531 					continue;
532 				mkring->nr_mode = NKR_NETMAP_ON;
533 				if (t == NR_TX)
534 					continue;
535 				for_rx_tx(s) {
536 					if (i > nma_get_nrings(pna, s))
537 						continue;
538 					if (mna->flags & nm_txrx2flag(s)) {
539 						kring = NMR(pna, s)[i];
540 						netmap_monitor_add(mkring, kring, zmon);
541 					}
542 				}
543 			}
544 		}
545 		na->na_flags |= NAF_NETMAP_ON;
546 	} else {
547 		if (na->active_fds == 0)
548 			na->na_flags &= ~NAF_NETMAP_ON;
549 		for_rx_tx(t) {
550 			for (i = 0; i < netmap_all_rings(na, t); i++) {
551 				mkring = NMR(na, t)[i];
552 				if (!nm_kring_pending_off(mkring))
553 					continue;
554 				mkring->nr_mode = NKR_NETMAP_OFF;
555 				if (t == NR_TX)
556 					continue;
557 				/* we cannot access the parent krings if the parent
558 				 * has left netmap mode. This is signaled by a NULL
559 				 * pna pointer
560 				 */
561 				if (pna == NULL)
562 					continue;
563 				for_rx_tx(s) {
564 					if (i > nma_get_nrings(pna, s))
565 						continue;
566 					if (mna->flags & nm_txrx2flag(s)) {
567 						kring = NMR(pna, s)[i];
568 						netmap_monitor_del(mkring, kring, s);
569 					}
570 				}
571 			}
572 		}
573 	}
574 	return 0;
575 }
576 
577 /*
578  ****************************************************************
579  * functions specific for zero-copy monitors
580  ****************************************************************
581  */
582 
583 /*
584  * Common function for both zero-copy tx and rx nm_sync()
585  * callbacks
586  */
587 static int
588 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
589 {
590 	struct netmap_kring *mkring = kring->zmon_list[tx].next;
591 	struct netmap_ring *ring = kring->ring, *mring;
592 	int error = 0;
593 	int rel_slots, free_slots, busy, sent = 0;
594 	u_int beg, end, i;
595 	u_int lim = kring->nkr_num_slots - 1,
596 	      mlim; // = mkring->nkr_num_slots - 1;
597 	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
598 
599 	if (mkring == NULL) {
600 		nm_prlim(5, "NULL monitor on %s", kring->name);
601 		return 0;
602 	}
603 	mring = mkring->ring;
604 	mlim = mkring->nkr_num_slots - 1;
605 
606 	/* get the released slots (rel_slots) */
607 	if (tx == NR_TX) {
608 		beg = kring->nr_hwtail + 1;
609 		error = kring->mon_sync(kring, flags);
610 		if (error)
611 			return error;
612 		end = kring->nr_hwtail + 1;
613 	} else { /* NR_RX */
614 		beg = kring->nr_hwcur;
615 		end = kring->rhead;
616 	}
617 
618 	rel_slots = end - beg;
619 	if (rel_slots < 0)
620 		rel_slots += kring->nkr_num_slots;
621 
622 	if (!rel_slots) {
623 		/* no released slots, but we still need
624 		 * to call rxsync if this is a rx ring
625 		 */
626 		goto out_rxsync;
627 	}
628 
629 	/* we need to lock the monitor receive ring, since it
630 	 * is the target of bot tx and rx traffic from the monitored
631 	 * adapter
632 	 */
633 	mtx_lock(&mkring->q_lock);
634 	/* get the free slots available on the monitor ring */
635 	i = mkring->nr_hwtail;
636 	busy = i - mkring->nr_hwcur;
637 	if (busy < 0)
638 		busy += mkring->nkr_num_slots;
639 	free_slots = mlim - busy;
640 
641 	if (!free_slots)
642 		goto out;
643 
644 	/* swap min(free_slots, rel_slots) slots */
645 	if (free_slots < rel_slots) {
646 		beg += (rel_slots - free_slots);
647 		rel_slots = free_slots;
648 	}
649 	if (unlikely(beg >= kring->nkr_num_slots))
650 		beg -= kring->nkr_num_slots;
651 
652 	sent = rel_slots;
653 	for ( ; rel_slots; rel_slots--) {
654 		struct netmap_slot *s = &ring->slot[beg];
655 		struct netmap_slot *ms = &mring->slot[i];
656 		uint32_t tmp;
657 
658 		tmp = ms->buf_idx;
659 		ms->buf_idx = s->buf_idx;
660 		s->buf_idx = tmp;
661 		nm_prdis(5, "beg %d buf_idx %d", beg, tmp);
662 
663 		tmp = ms->len;
664 		ms->len = s->len;
665 		s->len = tmp;
666 
667 		ms->flags = (s->flags & ~NS_TXMON) | txmon;
668 		s->flags |= NS_BUF_CHANGED;
669 
670 		beg = nm_next(beg, lim);
671 		i = nm_next(i, mlim);
672 
673 	}
674 	mb();
675 	mkring->nr_hwtail = i;
676 
677 out:
678 	mtx_unlock(&mkring->q_lock);
679 
680 	if (sent) {
681 		/* notify the new frames to the monitor */
682 		mkring->nm_notify(mkring, 0);
683 	}
684 
685 out_rxsync:
686 	if (tx == NR_RX)
687 		error = kring->mon_sync(kring, flags);
688 
689 	return error;
690 }
691 
692 /* callback used to replace the nm_sync callback in the monitored tx rings */
693 static int
694 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
695 {
696 	return netmap_zmon_parent_sync(kring, flags, NR_TX);
697 }
698 
699 /* callback used to replace the nm_sync callback in the monitored rx rings */
700 static int
701 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
702 {
703 	return netmap_zmon_parent_sync(kring, flags, NR_RX);
704 }
705 
706 static int
707 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
708 {
709 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
710 }
711 
712 /* nm_dtor callback for monitors */
713 static void
714 netmap_zmon_dtor(struct netmap_adapter *na)
715 {
716 	struct netmap_monitor_adapter *mna =
717 		(struct netmap_monitor_adapter *)na;
718 	struct netmap_priv_d *priv = &mna->priv;
719 	struct netmap_adapter *pna = priv->np_na;
720 
721 	netmap_adapter_put(pna);
722 }
723 
724 /*
725  ****************************************************************
726  * functions specific for copy monitors
727  ****************************************************************
728  */
729 
730 static void
731 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
732 {
733 	u_int j;
734 	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
735 
736 	for (j = 0; j < kring->n_monitors; j++) {
737 		struct netmap_kring *mkring = kring->monitors[j];
738 		u_int i, mlim, beg;
739 		int free_slots, busy, sent = 0, m;
740 		u_int lim = kring->nkr_num_slots - 1;
741 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
742 		u_int max_len;
743 		mlim = mkring->nkr_num_slots - 1;
744 
745 		/* we need to lock the monitor receive ring, since it
746 		 * is the target of bot tx and rx traffic from the monitored
747 		 * adapter
748 		 */
749 		mtx_lock(&mkring->q_lock);
750 		/* get the free slots available on the monitor ring */
751 		i = mkring->nr_hwtail;
752 		busy = i - mkring->nr_hwcur;
753 		if (busy < 0)
754 			busy += mkring->nkr_num_slots;
755 		free_slots = mlim - busy;
756 
757 		if (!free_slots)
758 			goto out;
759 
760 		/* copy min(free_slots, new_slots) slots */
761 		m = new_slots;
762 		beg = first_new;
763 		if (free_slots < m) {
764 			beg += (m - free_slots);
765 			if (beg >= kring->nkr_num_slots)
766 				beg -= kring->nkr_num_slots;
767 			m = free_slots;
768 		}
769 
770 		for ( ; m; m--) {
771 			struct netmap_slot *s = &ring->slot[beg];
772 			struct netmap_slot *ms = &mring->slot[i];
773 			u_int copy_len = s->len;
774 			char *src = NMB_O(kring, s),
775 			     *dst = NMB_O(mkring, ms);
776 
777 			max_len = NETMAP_BUF_SIZE(mkring->na) - nm_get_offset(mkring, ms);
778 			if (unlikely(copy_len > max_len)) {
779 				nm_prlim(5, "%s->%s: truncating %d to %d", kring->name,
780 						mkring->name, copy_len, max_len);
781 				copy_len = max_len;
782 			}
783 
784 			memcpy(dst, src, copy_len);
785 			ms->len = copy_len;
786 			ms->flags = (s->flags & ~NS_TXMON) | txmon;
787 			sent++;
788 
789 			beg = nm_next(beg, lim);
790 			i = nm_next(i, mlim);
791 		}
792 		mb();
793 		mkring->nr_hwtail = i;
794 	out:
795 		mtx_unlock(&mkring->q_lock);
796 
797 		if (sent) {
798 			/* notify the new frames to the monitor */
799 			mkring->nm_notify(mkring, 0);
800 		}
801 	}
802 }
803 
804 /* callback used to replace the nm_sync callback in the monitored tx rings */
805 static int
806 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
807 {
808 	u_int first_new;
809 	int new_slots;
810 
811 	/* get the new slots */
812 	if (kring->n_monitors > 0) {
813 		first_new = kring->nr_hwcur;
814 		new_slots = kring->rhead - first_new;
815 		if (new_slots < 0)
816 			new_slots += kring->nkr_num_slots;
817 		if (new_slots)
818 			netmap_monitor_parent_sync(kring, first_new, new_slots);
819 	}
820 	if (kring->zmon_list[NR_TX].next != NULL) {
821 		return netmap_zmon_parent_txsync(kring, flags);
822 	}
823 	return kring->mon_sync(kring, flags);
824 }
825 
826 /* callback used to replace the nm_sync callback in the monitored rx rings */
827 static int
828 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
829 {
830 	u_int first_new;
831 	int new_slots, error;
832 
833 	/* get the new slots */
834 	if (kring->zmon_list[NR_RX].next != NULL) {
835 		error = netmap_zmon_parent_rxsync(kring, flags);
836 	} else {
837 		error =  kring->mon_sync(kring, flags);
838 	}
839 	if (error)
840 		return error;
841 	if (kring->n_monitors > 0) {
842 		first_new = kring->mon_tail;
843 		new_slots = kring->nr_hwtail - first_new;
844 		if (new_slots < 0)
845 			new_slots += kring->nkr_num_slots;
846 		if (new_slots)
847 			netmap_monitor_parent_sync(kring, first_new, new_slots);
848 		kring->mon_tail = kring->nr_hwtail;
849 	}
850 	return 0;
851 }
852 
853 /* callback used to replace the nm_notify() callback in the monitored rx rings */
854 static int
855 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
856 {
857 	int (*notify)(struct netmap_kring*, int);
858 	nm_prdis(5, "%s %x", kring->name, flags);
859 	/* ?xsync callbacks have tryget called by their callers
860 	 * (NIOCREGIF and poll()), but here we have to call it
861 	 * by ourself
862 	 */
863 	if (nm_kr_tryget(kring, 0, NULL)) {
864 		/* in all cases, just skip the sync */
865 		return NM_IRQ_COMPLETED;
866 	}
867 	if (kring->n_monitors > 0) {
868 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
869 	}
870 	if (nm_monitor_none(kring)) {
871 		/* we are no longer monitoring this ring, so both
872 		 * mon_sync and mon_notify are NULL
873 		 */
874 		notify = kring->nm_notify;
875 	} else {
876 		notify = kring->mon_notify;
877 	}
878 	nm_kr_put(kring);
879 	return notify(kring, flags);
880 }
881 
882 
883 static int
884 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
885 {
886 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
887 }
888 
889 static void
890 netmap_monitor_dtor(struct netmap_adapter *na)
891 {
892 	struct netmap_monitor_adapter *mna =
893 		(struct netmap_monitor_adapter *)na;
894 	struct netmap_priv_d *priv = &mna->priv;
895 	struct netmap_adapter *pna = priv->np_na;
896 
897 	netmap_adapter_put(pna);
898 }
899 
900 
901 /* check if req is a request for a monitor adapter that we can satisfy */
902 int
903 netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
904 			struct netmap_mem_d *nmd, int create)
905 {
906 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
907 	struct nmreq_register preq;
908 	struct netmap_adapter *pna; /* parent adapter */
909 	struct netmap_monitor_adapter *mna;
910 	struct ifnet *ifp = NULL;
911 	int  error;
912 	int zcopy = (req->nr_flags & NR_ZCOPY_MON);
913 
914 	if (zcopy) {
915 		req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
916 	}
917 	if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
918 		nm_prdis("not a monitor");
919 		return 0;
920 	}
921 	/* this is a request for a monitor adapter */
922 
923 	nm_prdis("flags %lx", req->nr_flags);
924 
925 	/* First, try to find the adapter that we want to monitor.
926 	 * We use the same req, after we have turned off the monitor flags.
927 	 * In this way we can potentially monitor everything netmap understands,
928 	 * except other monitors.
929 	 */
930 	memcpy(&preq, req, sizeof(preq));
931 	preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
932 	hdr->nr_body = (uintptr_t)&preq;
933 	error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
934 	hdr->nr_body = (uintptr_t)req;
935 	if (error) {
936 		nm_prerr("parent lookup failed: %d", error);
937 		return error;
938 	}
939 	nm_prdis("found parent: %s", pna->name);
940 
941 	if (!nm_netmap_on(pna)) {
942 		/* parent not in netmap mode */
943 		/* XXX we can wait for the parent to enter netmap mode,
944 		 * by intercepting its nm_register callback (2014-03-16)
945 		 */
946 		nm_prerr("%s not in netmap mode", pna->name);
947 		error = EINVAL;
948 		goto put_out;
949 	}
950 
951 	mna = nm_os_malloc(sizeof(*mna));
952 	if (mna == NULL) {
953 		error = ENOMEM;
954 		goto put_out;
955 	}
956 	mna->priv.np_na = pna;
957 
958 	/* grab all the rings we need in the parent */
959 	error = netmap_interp_ringid(&mna->priv, hdr);
960 	if (error) {
961 		nm_prerr("ringid error");
962 		goto free_out;
963 	}
964 	snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
965 			zcopy ? "z" : "",
966 			(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
967 			(req->nr_flags & NR_MONITOR_TX) ? "t" : "",
968 			pna->monitor_id++);
969 
970 	/* the monitor supports the host rings iff the parent does */
971 	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS) & ~NAF_OFFSETS;
972 	if (!zcopy)
973 		mna->up.na_flags |= NAF_OFFSETS;
974 	/* a do-nothing txsync: monitors cannot be used to inject packets */
975 	mna->up.nm_txsync = netmap_monitor_txsync;
976 	mna->up.nm_rxsync = netmap_monitor_rxsync;
977 	mna->up.nm_krings_create = netmap_monitor_krings_create;
978 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
979 	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
980 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
981 	 * in the parent
982 	 */
983 	mna->up.num_rx_rings = pna->num_rx_rings;
984 	if (pna->num_tx_rings > pna->num_rx_rings)
985 		mna->up.num_rx_rings = pna->num_tx_rings;
986 	/* by default, the number of slots is the same as in
987 	 * the parent rings, but the user may ask for a different
988 	 * number
989 	 */
990 	mna->up.num_tx_desc = req->nr_tx_slots;
991 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
992 			1, NM_MONITOR_MAXSLOTS, NULL);
993 	mna->up.num_rx_desc = req->nr_rx_slots;
994 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
995 			1, NM_MONITOR_MAXSLOTS, NULL);
996 	if (zcopy) {
997 		mna->up.nm_register = netmap_zmon_reg;
998 		mna->up.nm_dtor = netmap_zmon_dtor;
999 		/* to have zero copy, we need to use the same memory allocator
1000 		 * as the monitored port
1001 		 */
1002 		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
1003 		/* and the allocator cannot be changed */
1004 		mna->up.na_flags |= NAF_MEM_OWNER;
1005 	} else {
1006 		mna->up.nm_register = netmap_monitor_reg;
1007 		mna->up.nm_dtor = netmap_monitor_dtor;
1008 		mna->up.nm_mem = netmap_mem_private_new(
1009 				mna->up.num_tx_rings,
1010 				mna->up.num_tx_desc,
1011 				mna->up.num_rx_rings,
1012 				mna->up.num_rx_desc,
1013 				0, /* extra bufs */
1014 				0, /* pipes */
1015 				&error);
1016 		if (mna->up.nm_mem == NULL)
1017 			goto put_out;
1018 	}
1019 
1020 	error = netmap_attach_common(&mna->up);
1021 	if (error) {
1022 		nm_prerr("netmap_attach_common failed");
1023 		goto mem_put_out;
1024 	}
1025 
1026 	/* remember the traffic directions we have to monitor */
1027 	mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
1028 
1029 	*na = &mna->up;
1030 	netmap_adapter_get(*na);
1031 
1032 	/* keep the reference to the parent */
1033 	nm_prdis("monitor ok");
1034 
1035 	/* drop the reference to the ifp, if any */
1036 	if (ifp)
1037 		if_rele(ifp);
1038 
1039 	return 0;
1040 
1041 mem_put_out:
1042 	netmap_mem_put(mna->up.nm_mem);
1043 free_out:
1044 	nm_os_free(mna);
1045 put_out:
1046 	netmap_unget_na(pna, ifp);
1047 	return error;
1048 }
1049 
1050 
1051 #endif /* WITH_MONITOR */
1052