xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision 7815283df299be63807225a9fe9b6e54406eae28)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Monitors
31  *
32  * netmap monitors can be used to do monitoring of network traffic
33  * on another adapter, when the latter adapter is working in netmap mode.
34  *
35  * Monitors offer to userspace the same interface as any other netmap port,
36  * with as many pairs of netmap rings as the monitored adapter.
37  * However, only the rx rings are actually used. Each monitor rx ring receives
38  * the traffic transiting on both the tx and rx corresponding rings in the
39  * monitored adapter. During registration, the user can choose if she wants
40  * to intercept tx only, rx only, or both tx and rx traffic.
41  *
42  * If the monitor is not able to cope with the stream of frames, excess traffic
43  * will be dropped.
44  *
45  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
46  *
47  * Monitors can be either zero-copy or copy-based.
48  *
49  * Copy monitors see the frames before they are consumed:
50  *
51  *  - For tx traffic, this is when the application sends them, before they are
52  *    passed down to the adapter.
53  *
54  *  - For rx traffic, this is when they are received by the adapter, before
55  *    they are sent up to the application, if any (note that, if no
56  *    application is reading from a monitored ring, the ring will eventually
57  *    fill up and traffic will stop).
58  *
59  * Zero-copy monitors only see the frames after they have been consumed:
60  *
61  *  - For tx traffic, this is after the slots containing the frames have been
62  *    marked as free. Note that this may happen at a considerably delay after
63  *    frame transmission, since freeing of slots is often done lazily.
64  *
65  *  - For rx traffic, this is after the consumer on the monitored adapter
66  *    has released them. In most cases, the consumer is a userspace
67  *    application which may have modified the frame contents.
68  *
69  * Several copy or zero-copy monitors may be active on any ring.
70  *
71  */
72 
73 
74 #if defined(__FreeBSD__)
75 #include <sys/cdefs.h> /* prerequisite */
76 
77 #include <sys/types.h>
78 #include <sys/errno.h>
79 #include <sys/param.h>	/* defines used in kernel.h */
80 #include <sys/kernel.h>	/* types used in module initialization */
81 #include <sys/malloc.h>
82 #include <sys/poll.h>
83 #include <sys/lock.h>
84 #include <sys/rwlock.h>
85 #include <sys/selinfo.h>
86 #include <sys/sysctl.h>
87 #include <sys/socket.h> /* sockaddrs */
88 #include <net/if.h>
89 #include <net/if_var.h>
90 #include <machine/bus.h>	/* bus_dmamap_* */
91 #include <sys/refcount.h>
92 
93 
94 #elif defined(linux)
95 
96 #include "bsd_glue.h"
97 
98 #elif defined(__APPLE__)
99 
100 #warning OSX support is only partial
101 #include "osx_glue.h"
102 
103 #elif defined(_WIN32)
104 #include "win_glue.h"
105 #else
106 
107 #error	Unsupported platform
108 
109 #endif /* unsupported */
110 
111 /*
112  * common headers
113  */
114 
115 #include <net/netmap.h>
116 #include <dev/netmap/netmap_kern.h>
117 #include <dev/netmap/netmap_mem2.h>
118 
119 #ifdef WITH_MONITOR
120 
121 #define NM_MONITOR_MAXSLOTS 4096
122 
123 /*
124  ********************************************************************
125  * functions common to both kind of monitors
126  ********************************************************************
127  */
128 
129 static int netmap_zmon_reg(struct netmap_adapter *, int);
130 static int
131 nm_is_zmon(struct netmap_adapter *na)
132 {
133 	return na->nm_register == netmap_zmon_reg;
134 }
135 
136 /* nm_sync callback for the monitor's own tx rings.
137  * This makes no sense and always returns error
138  */
139 static int
140 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
141 {
142 	RD(1, "%s %x", kring->name, flags);
143 	return EIO;
144 }
145 
146 /* nm_sync callback for the monitor's own rx rings.
147  * Note that the lock in netmap_zmon_parent_sync only protects
148  * writers among themselves. Synchronization between writers
149  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
150  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
151  */
152 static int
153 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
154 {
155 	struct netmap_monitor_adapter *mna =
156 		(struct netmap_monitor_adapter *)kring->na;
157 	if (unlikely(mna->priv.np_na == NULL)) {
158 		/* parent left netmap mode */
159 		return EIO;
160 	}
161 	ND("%s %x", kring->name, flags);
162 	kring->nr_hwcur = kring->rhead;
163 	mb();
164 	return 0;
165 }
166 
167 /* nm_krings_create callbacks for monitors.
168  */
169 static int
170 netmap_monitor_krings_create(struct netmap_adapter *na)
171 {
172 	int error = netmap_krings_create(na, 0);
173 	enum txrx t;
174 
175 	if (error)
176 		return error;
177 	/* override the host rings callbacks */
178 	for_rx_tx(t) {
179 		int i;
180 		u_int first = nma_get_nrings(na, t);
181 		for (i = 0; i < nma_get_host_nrings(na, t); i++) {
182 			struct netmap_kring *kring = NMR(na, t)[first + i];
183 			kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
184 						      netmap_monitor_rxsync;
185 		}
186 	}
187 	return 0;
188 }
189 
190 /* nm_krings_delete callback for monitors */
191 static void
192 netmap_monitor_krings_delete(struct netmap_adapter *na)
193 {
194 	netmap_krings_delete(na);
195 }
196 
197 
198 static u_int
199 nm_txrx2flag(enum txrx t)
200 {
201 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
202 }
203 
204 /* allocate the monitors array in the monitored kring */
205 static int
206 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
207 {
208 	size_t old_len, len;
209 	struct netmap_kring **nm;
210 
211 	if (n <= kring->max_monitors)
212 		/* we already have more entries that requested */
213 		return 0;
214 
215 	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
216 	len = sizeof(struct netmap_kring *) * n;
217 	nm = nm_os_realloc(kring->monitors, len, old_len);
218 	if (nm == NULL)
219 		return ENOMEM;
220 
221 	kring->monitors = nm;
222 	kring->max_monitors = n;
223 
224 	return 0;
225 }
226 
227 /* deallocate the parent array in the parent adapter */
228 static void
229 nm_monitor_dealloc(struct netmap_kring *kring)
230 {
231 	if (kring->monitors) {
232 		if (kring->n_monitors > 0) {
233 			D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
234 					kring->n_monitors);
235 		}
236 		nm_os_free(kring->monitors);
237 		kring->monitors = NULL;
238 		kring->max_monitors = 0;
239 		kring->n_monitors = 0;
240 	}
241 }
242 
243 /* returns 1 iff kring has no monitors */
244 static inline int
245 nm_monitor_none(struct netmap_kring *kring)
246 {
247 	return kring->n_monitors == 0 &&
248 		kring->zmon_list[NR_TX].next == NULL &&
249 		kring->zmon_list[NR_RX].next == NULL;
250 }
251 
252 /*
253  * monitors work by replacing the nm_sync() and possibly the
254  * nm_notify() callbacks in the monitored rings.
255  */
256 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
257 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
258 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
259 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
260 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
261 
262 static void
263 nm_monitor_intercept_callbacks(struct netmap_kring *kring)
264 {
265 	ND("intercept callbacks on %s", kring->name);
266 	kring->mon_sync = kring->nm_sync;
267 	kring->mon_notify = kring->nm_notify;
268 	if (kring->tx == NR_TX) {
269 		kring->nm_sync = netmap_monitor_parent_txsync;
270 	} else {
271 		kring->nm_sync = netmap_monitor_parent_rxsync;
272 		kring->nm_notify = netmap_monitor_parent_notify;
273 		kring->mon_tail = kring->nr_hwtail;
274 	}
275 }
276 
277 static void
278 nm_monitor_restore_callbacks(struct netmap_kring *kring)
279 {
280 	ND("restoring callbacks on %s", kring->name);
281 	kring->nm_sync = kring->mon_sync;
282 	kring->mon_sync = NULL;
283 	if (kring->tx == NR_RX) {
284 		kring->nm_notify = kring->mon_notify;
285 	}
286 	kring->mon_notify = NULL;
287 }
288 
289 static struct netmap_kring *
290 nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
291 {
292 	struct netmap_adapter *na = mkring->na;
293 	struct netmap_kring *kring = mkring;
294 	struct netmap_zmon_list *z = &kring->zmon_list[t];
295 	/* reach the head of the list */
296 	while (nm_is_zmon(na) && z->prev != NULL) {
297 		kring = z->prev;
298 		na = kring->na;
299 		z = &kring->zmon_list[t];
300 	}
301 	return nm_is_zmon(na) ? NULL : kring;
302 }
303 
304 /* add the monitor mkring to the list of monitors of kring.
305  * If this is the first monitor, intercept the callbacks
306  */
307 static int
308 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
309 {
310 	int error = NM_IRQ_COMPLETED;
311 	enum txrx t = kring->tx;
312 	struct netmap_zmon_list *z = &kring->zmon_list[t];
313 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
314 	struct netmap_kring *ikring = kring;
315 
316 	/* a zero-copy monitor which is not the first in the list
317 	 * must monitor the previous monitor
318 	 */
319 	if (zmon && z->prev != NULL)
320 		ikring = z->prev; /* tail of the list */
321 
322 	/* synchronize with concurrently running nm_sync()s */
323 	nm_kr_stop(kring, NM_KR_LOCKED);
324 
325 	if (nm_monitor_none(ikring)) {
326 		/* this is the first monitor, intercept the callbacks */
327 		ND("%s: intercept callbacks on %s", mkring->name, ikring->name);
328 		nm_monitor_intercept_callbacks(ikring);
329 	}
330 
331 	if (zmon) {
332 		/* append the zmon to the list */
333 		ikring->zmon_list[t].next = mkring;
334 		z->prev = mkring; /* new tail */
335 		mz->prev = ikring;
336 		mz->next = NULL;
337 		/* grab a reference to the previous netmap adapter
338 		 * in the chain (this may be the monitored port
339 		 * or another zero-copy monitor)
340 		 */
341 		netmap_adapter_get(ikring->na);
342 	} else {
343 		/* make sure the monitor array exists and is big enough */
344 		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
345 		if (error)
346 			goto out;
347 		kring->monitors[kring->n_monitors] = mkring;
348 		mkring->mon_pos[kring->tx] = kring->n_monitors;
349 		kring->n_monitors++;
350 	}
351 
352 out:
353 	nm_kr_start(kring);
354 	return error;
355 }
356 
357 /* remove the monitor mkring from the list of monitors of kring.
358  * If this is the last monitor, restore the original callbacks
359  */
360 static void
361 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
362 {
363 	int zmon = nm_is_zmon(mkring->na);
364 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
365 	struct netmap_kring *ikring = kring;
366 
367 
368 	if (zmon) {
369 		/* get to the head of the list */
370 		kring = nm_zmon_list_head(mkring, t);
371 		ikring = mz->prev;
372 	}
373 
374 	/* synchronize with concurrently running nm_sync()s
375 	 * if kring is NULL (orphaned list) the monitored port
376 	 * has exited netmap mode, so there is nothing to stop
377 	 */
378 	if (kring != NULL)
379 		nm_kr_stop(kring, NM_KR_LOCKED);
380 
381 	if (zmon) {
382 		/* remove the monitor from the list */
383 		if (mz->next != NULL) {
384 			mz->next->zmon_list[t].prev = mz->prev;
385 			/* we also need to let the next monitor drop the
386 			 * reference to us and grab the reference to the
387 			 * previous ring owner, instead
388 			 */
389 			if (mz->prev != NULL)
390 				netmap_adapter_get(mz->prev->na);
391 			netmap_adapter_put(mkring->na);
392 		} else if (kring != NULL) {
393 			/* in the monitored kring, prev is actually the
394 			 * pointer to the tail of the list
395 			 */
396 			kring->zmon_list[t].prev =
397 				(mz->prev != kring ? mz->prev : NULL);
398 		}
399 		if (mz->prev != NULL) {
400 			netmap_adapter_put(mz->prev->na);
401 			mz->prev->zmon_list[t].next = mz->next;
402 		}
403 		mz->prev = NULL;
404 		mz->next = NULL;
405 	} else {
406 		/* this is a copy monitor */
407 		uint32_t mon_pos = mkring->mon_pos[kring->tx];
408 		kring->n_monitors--;
409 		if (mon_pos != kring->n_monitors) {
410 			kring->monitors[mon_pos] =
411 				kring->monitors[kring->n_monitors];
412 			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
413 		}
414 		kring->monitors[kring->n_monitors] = NULL;
415 		if (kring->n_monitors == 0) {
416 			nm_monitor_dealloc(kring);
417 		}
418 	}
419 
420 	if (ikring != NULL && nm_monitor_none(ikring)) {
421 		/* this was the last monitor, restore the callbacks */
422 		nm_monitor_restore_callbacks(ikring);
423 	}
424 
425 	if (kring != NULL)
426 		nm_kr_start(kring);
427 }
428 
429 
430 /* This is called when the monitored adapter leaves netmap mode
431  * (see netmap_do_unregif).
432  * We need to notify the monitors that the monitored rings are gone.
433  * We do this by setting their mna->priv.np_na to NULL.
434  * Note that the rings are already stopped when this happens, so
435  * no monitor ring callback can be active.
436  */
437 void
438 netmap_monitor_stop(struct netmap_adapter *na)
439 {
440 	enum txrx t;
441 
442 	for_rx_tx(t) {
443 		u_int i;
444 
445 		for (i = 0; i < netmap_all_rings(na, t); i++) {
446 			struct netmap_kring *kring = NMR(na, t)[i];
447 			struct netmap_zmon_list *z = &kring->zmon_list[t];
448 			u_int j;
449 
450 			for (j = 0; j < kring->n_monitors; j++) {
451 				struct netmap_kring *mkring =
452 					kring->monitors[j];
453 				struct netmap_monitor_adapter *mna =
454 					(struct netmap_monitor_adapter *)mkring->na;
455 				/* forget about this adapter */
456 				if (mna->priv.np_na != NULL) {
457 					netmap_adapter_put(mna->priv.np_na);
458 					mna->priv.np_na = NULL;
459 				}
460 				kring->monitors[j] = NULL;
461 			}
462 
463 			if (!nm_is_zmon(na)) {
464 				/* we are the head of at most one list */
465 				struct netmap_kring *zkring;
466 				for (zkring = z->next; zkring != NULL;
467 						zkring = zkring->zmon_list[t].next)
468 				{
469 					struct netmap_monitor_adapter *next =
470 						(struct netmap_monitor_adapter *)zkring->na;
471 					/* let the monitor forget about us */
472 					netmap_adapter_put(next->priv.np_na); /* nop if null */
473 					next->priv.np_na = NULL;
474 				}
475 				/* orhpan the zmon list */
476 				if (z->next != NULL)
477 					z->next->zmon_list[t].prev = NULL;
478 				z->next = NULL;
479 				z->prev = NULL;
480 			}
481 
482 			if (!nm_monitor_none(kring)) {
483 
484 				kring->n_monitors = 0;
485 				nm_monitor_dealloc(kring);
486 				nm_monitor_restore_callbacks(kring);
487 			}
488 		}
489 	}
490 }
491 
492 
493 /* common functions for the nm_register() callbacks of both kind of
494  * monitors.
495  */
496 static int
497 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
498 {
499 	struct netmap_monitor_adapter *mna =
500 		(struct netmap_monitor_adapter *)na;
501 	struct netmap_priv_d *priv = &mna->priv;
502 	struct netmap_adapter *pna = priv->np_na;
503 	struct netmap_kring *kring, *mkring;
504 	int i;
505 	enum txrx t, s;
506 
507 	ND("%p: onoff %d", na, onoff);
508 	if (onoff) {
509 		if (pna == NULL) {
510 			/* parent left netmap mode, fatal */
511 			D("%s: internal error", na->name);
512 			return ENXIO;
513 		}
514 		for_rx_tx(t) {
515 			for (i = 0; i < netmap_all_rings(na, t); i++) {
516 				mkring = NMR(na, t)[i];
517 				if (!nm_kring_pending_on(mkring))
518 					continue;
519 				mkring->nr_mode = NKR_NETMAP_ON;
520 				if (t == NR_TX)
521 					continue;
522 				for_rx_tx(s) {
523 					if (i > nma_get_nrings(pna, s))
524 						continue;
525 					if (mna->flags & nm_txrx2flag(s)) {
526 						kring = NMR(pna, s)[i];
527 						netmap_monitor_add(mkring, kring, zmon);
528 					}
529 				}
530 			}
531 		}
532 		na->na_flags |= NAF_NETMAP_ON;
533 	} else {
534 		if (na->active_fds == 0)
535 			na->na_flags &= ~NAF_NETMAP_ON;
536 		for_rx_tx(t) {
537 			for (i = 0; i < netmap_all_rings(na, t); i++) {
538 				mkring = NMR(na, t)[i];
539 				if (!nm_kring_pending_off(mkring))
540 					continue;
541 				mkring->nr_mode = NKR_NETMAP_OFF;
542 				if (t == NR_TX)
543 					continue;
544 				/* we cannot access the parent krings if the parent
545 				 * has left netmap mode. This is signaled by a NULL
546 				 * pna pointer
547 				 */
548 				if (pna == NULL)
549 					continue;
550 				for_rx_tx(s) {
551 					if (i > nma_get_nrings(pna, s))
552 						continue;
553 					if (mna->flags & nm_txrx2flag(s)) {
554 						kring = NMR(pna, s)[i];
555 						netmap_monitor_del(mkring, kring, s);
556 					}
557 				}
558 			}
559 		}
560 	}
561 	return 0;
562 }
563 
564 /*
565  ****************************************************************
566  * functions specific for zero-copy monitors
567  ****************************************************************
568  */
569 
570 /*
571  * Common function for both zero-copy tx and rx nm_sync()
572  * callbacks
573  */
574 static int
575 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
576 {
577 	struct netmap_kring *mkring = kring->zmon_list[tx].next;
578 	struct netmap_ring *ring = kring->ring, *mring;
579 	int error = 0;
580 	int rel_slots, free_slots, busy, sent = 0;
581 	u_int beg, end, i;
582 	u_int lim = kring->nkr_num_slots - 1,
583 	      mlim; // = mkring->nkr_num_slots - 1;
584 
585 	if (mkring == NULL) {
586 		RD(5, "NULL monitor on %s", kring->name);
587 		return 0;
588 	}
589 	mring = mkring->ring;
590 	mlim = mkring->nkr_num_slots - 1;
591 
592 	/* get the relased slots (rel_slots) */
593 	if (tx == NR_TX) {
594 		beg = kring->nr_hwtail + 1;
595 		error = kring->mon_sync(kring, flags);
596 		if (error)
597 			return error;
598 		end = kring->nr_hwtail + 1;
599 	} else { /* NR_RX */
600 		beg = kring->nr_hwcur;
601 		end = kring->rhead;
602 	}
603 
604 	rel_slots = end - beg;
605 	if (rel_slots < 0)
606 		rel_slots += kring->nkr_num_slots;
607 
608 	if (!rel_slots) {
609 		/* no released slots, but we still need
610 		 * to call rxsync if this is a rx ring
611 		 */
612 		goto out_rxsync;
613 	}
614 
615 	/* we need to lock the monitor receive ring, since it
616 	 * is the target of bot tx and rx traffic from the monitored
617 	 * adapter
618 	 */
619 	mtx_lock(&mkring->q_lock);
620 	/* get the free slots available on the monitor ring */
621 	i = mkring->nr_hwtail;
622 	busy = i - mkring->nr_hwcur;
623 	if (busy < 0)
624 		busy += mkring->nkr_num_slots;
625 	free_slots = mlim - busy;
626 
627 	if (!free_slots)
628 		goto out;
629 
630 	/* swap min(free_slots, rel_slots) slots */
631 	if (free_slots < rel_slots) {
632 		beg += (rel_slots - free_slots);
633 		rel_slots = free_slots;
634 	}
635 	if (unlikely(beg >= kring->nkr_num_slots))
636 		beg -= kring->nkr_num_slots;
637 
638 	sent = rel_slots;
639 	for ( ; rel_slots; rel_slots--) {
640 		struct netmap_slot *s = &ring->slot[beg];
641 		struct netmap_slot *ms = &mring->slot[i];
642 		uint32_t tmp;
643 
644 		tmp = ms->buf_idx;
645 		ms->buf_idx = s->buf_idx;
646 		s->buf_idx = tmp;
647 		ND(5, "beg %d buf_idx %d", beg, tmp);
648 
649 		tmp = ms->len;
650 		ms->len = s->len;
651 		s->len = tmp;
652 
653 		ms->flags = s->flags;
654 		s->flags |= NS_BUF_CHANGED;
655 
656 		beg = nm_next(beg, lim);
657 		i = nm_next(i, mlim);
658 
659 	}
660 	mb();
661 	mkring->nr_hwtail = i;
662 
663 out:
664 	mtx_unlock(&mkring->q_lock);
665 
666 	if (sent) {
667 		/* notify the new frames to the monitor */
668 		mkring->nm_notify(mkring, 0);
669 	}
670 
671 out_rxsync:
672 	if (tx == NR_RX)
673 		error = kring->mon_sync(kring, flags);
674 
675 	return error;
676 }
677 
678 /* callback used to replace the nm_sync callback in the monitored tx rings */
679 static int
680 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
681 {
682 	return netmap_zmon_parent_sync(kring, flags, NR_TX);
683 }
684 
685 /* callback used to replace the nm_sync callback in the monitored rx rings */
686 static int
687 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
688 {
689 	return netmap_zmon_parent_sync(kring, flags, NR_RX);
690 }
691 
692 static int
693 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
694 {
695 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
696 }
697 
698 /* nm_dtor callback for monitors */
699 static void
700 netmap_zmon_dtor(struct netmap_adapter *na)
701 {
702 	struct netmap_monitor_adapter *mna =
703 		(struct netmap_monitor_adapter *)na;
704 	struct netmap_priv_d *priv = &mna->priv;
705 	struct netmap_adapter *pna = priv->np_na;
706 
707 	netmap_adapter_put(pna);
708 }
709 
710 /*
711  ****************************************************************
712  * functions specific for copy monitors
713  ****************************************************************
714  */
715 
716 static void
717 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
718 {
719 	u_int j;
720 
721 	for (j = 0; j < kring->n_monitors; j++) {
722 		struct netmap_kring *mkring = kring->monitors[j];
723 		u_int i, mlim, beg;
724 		int free_slots, busy, sent = 0, m;
725 		u_int lim = kring->nkr_num_slots - 1;
726 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
727 		u_int max_len = NETMAP_BUF_SIZE(mkring->na);
728 
729 		mlim = mkring->nkr_num_slots - 1;
730 
731 		/* we need to lock the monitor receive ring, since it
732 		 * is the target of bot tx and rx traffic from the monitored
733 		 * adapter
734 		 */
735 		mtx_lock(&mkring->q_lock);
736 		/* get the free slots available on the monitor ring */
737 		i = mkring->nr_hwtail;
738 		busy = i - mkring->nr_hwcur;
739 		if (busy < 0)
740 			busy += mkring->nkr_num_slots;
741 		free_slots = mlim - busy;
742 
743 		if (!free_slots)
744 			goto out;
745 
746 		/* copy min(free_slots, new_slots) slots */
747 		m = new_slots;
748 		beg = first_new;
749 		if (free_slots < m) {
750 			beg += (m - free_slots);
751 			if (beg >= kring->nkr_num_slots)
752 				beg -= kring->nkr_num_slots;
753 			m = free_slots;
754 		}
755 
756 		for ( ; m; m--) {
757 			struct netmap_slot *s = &ring->slot[beg];
758 			struct netmap_slot *ms = &mring->slot[i];
759 			u_int copy_len = s->len;
760 			char *src = NMB(kring->na, s),
761 			     *dst = NMB(mkring->na, ms);
762 
763 			if (unlikely(copy_len > max_len)) {
764 				RD(5, "%s->%s: truncating %d to %d", kring->name,
765 						mkring->name, copy_len, max_len);
766 				copy_len = max_len;
767 			}
768 
769 			memcpy(dst, src, copy_len);
770 			ms->len = copy_len;
771 			ms->flags = s->flags;
772 			sent++;
773 
774 			beg = nm_next(beg, lim);
775 			i = nm_next(i, mlim);
776 		}
777 		mb();
778 		mkring->nr_hwtail = i;
779 	out:
780 		mtx_unlock(&mkring->q_lock);
781 
782 		if (sent) {
783 			/* notify the new frames to the monitor */
784 			mkring->nm_notify(mkring, 0);
785 		}
786 	}
787 }
788 
789 /* callback used to replace the nm_sync callback in the monitored tx rings */
790 static int
791 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
792 {
793 	u_int first_new;
794 	int new_slots;
795 
796 	/* get the new slots */
797 	if (kring->n_monitors > 0) {
798 		first_new = kring->nr_hwcur;
799 		new_slots = kring->rhead - first_new;
800 		if (new_slots < 0)
801 			new_slots += kring->nkr_num_slots;
802 		if (new_slots)
803 			netmap_monitor_parent_sync(kring, first_new, new_slots);
804 	}
805 	if (kring->zmon_list[NR_TX].next != NULL) {
806 		return netmap_zmon_parent_txsync(kring, flags);
807 	}
808 	return kring->mon_sync(kring, flags);
809 }
810 
811 /* callback used to replace the nm_sync callback in the monitored rx rings */
812 static int
813 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
814 {
815 	u_int first_new;
816 	int new_slots, error;
817 
818 	/* get the new slots */
819 	if (kring->zmon_list[NR_RX].next != NULL) {
820 		error = netmap_zmon_parent_rxsync(kring, flags);
821 	} else {
822 		error =  kring->mon_sync(kring, flags);
823 	}
824 	if (error)
825 		return error;
826 	if (kring->n_monitors > 0) {
827 		first_new = kring->mon_tail;
828 		new_slots = kring->nr_hwtail - first_new;
829 		if (new_slots < 0)
830 			new_slots += kring->nkr_num_slots;
831 		if (new_slots)
832 			netmap_monitor_parent_sync(kring, first_new, new_slots);
833 		kring->mon_tail = kring->nr_hwtail;
834 	}
835 	return 0;
836 }
837 
838 /* callback used to replace the nm_notify() callback in the monitored rx rings */
839 static int
840 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
841 {
842 	int (*notify)(struct netmap_kring*, int);
843 	ND(5, "%s %x", kring->name, flags);
844 	/* ?xsync callbacks have tryget called by their callers
845 	 * (NIOCREGIF and poll()), but here we have to call it
846 	 * by ourself
847 	 */
848 	if (nm_kr_tryget(kring, 0, NULL)) {
849 		/* in all cases, just skip the sync */
850 		return NM_IRQ_COMPLETED;
851 	}
852 	if (kring->n_monitors > 0) {
853 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
854 	}
855 	if (nm_monitor_none(kring)) {
856 		/* we are no longer monitoring this ring, so both
857 		 * mon_sync and mon_notify are NULL
858 		 */
859 		notify = kring->nm_notify;
860 	} else {
861 		notify = kring->mon_notify;
862 	}
863 	nm_kr_put(kring);
864 	return notify(kring, flags);
865 }
866 
867 
868 static int
869 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
870 {
871 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
872 }
873 
874 static void
875 netmap_monitor_dtor(struct netmap_adapter *na)
876 {
877 	struct netmap_monitor_adapter *mna =
878 		(struct netmap_monitor_adapter *)na;
879 	struct netmap_priv_d *priv = &mna->priv;
880 	struct netmap_adapter *pna = priv->np_na;
881 
882 	netmap_adapter_put(pna);
883 }
884 
885 
886 /* check if req is a request for a monitor adapter that we can satisfy */
887 int
888 netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
889 			struct netmap_mem_d *nmd, int create)
890 {
891 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
892 	struct nmreq_register preq;
893 	struct netmap_adapter *pna; /* parent adapter */
894 	struct netmap_monitor_adapter *mna;
895 	struct ifnet *ifp = NULL;
896 	int  error;
897 	int zcopy = (req->nr_flags & NR_ZCOPY_MON);
898 
899 	if (zcopy) {
900 		req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
901 	}
902 	if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
903 		ND("not a monitor");
904 		return 0;
905 	}
906 	/* this is a request for a monitor adapter */
907 
908 	ND("flags %lx", req->nr_flags);
909 
910 	/* First, try to find the adapter that we want to monitor.
911 	 * We use the same req, after we have turned off the monitor flags.
912 	 * In this way we can potentially monitor everything netmap understands,
913 	 * except other monitors.
914 	 */
915 	memcpy(&preq, req, sizeof(preq));
916 	preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
917 	hdr->nr_body = (uintptr_t)&preq;
918 	error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
919 	hdr->nr_body = (uintptr_t)req;
920 	if (error) {
921 		D("parent lookup failed: %d", error);
922 		return error;
923 	}
924 	ND("found parent: %s", pna->name);
925 
926 	if (!nm_netmap_on(pna)) {
927 		/* parent not in netmap mode */
928 		/* XXX we can wait for the parent to enter netmap mode,
929 		 * by intercepting its nm_register callback (2014-03-16)
930 		 */
931 		D("%s not in netmap mode", pna->name);
932 		error = EINVAL;
933 		goto put_out;
934 	}
935 
936 	mna = nm_os_malloc(sizeof(*mna));
937 	if (mna == NULL) {
938 		D("memory error");
939 		error = ENOMEM;
940 		goto put_out;
941 	}
942 	mna->priv.np_na = pna;
943 
944 	/* grab all the rings we need in the parent */
945 	error = netmap_interp_ringid(&mna->priv, req->nr_mode, req->nr_ringid,
946 					req->nr_flags);
947 	if (error) {
948 		D("ringid error");
949 		goto free_out;
950 	}
951 	snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
952 			zcopy ? "z" : "",
953 			(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
954 			(req->nr_flags & NR_MONITOR_TX) ? "t" : "",
955 			pna->monitor_id++);
956 
957 	/* the monitor supports the host rings iff the parent does */
958 	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
959 	/* a do-nothing txsync: monitors cannot be used to inject packets */
960 	mna->up.nm_txsync = netmap_monitor_txsync;
961 	mna->up.nm_rxsync = netmap_monitor_rxsync;
962 	mna->up.nm_krings_create = netmap_monitor_krings_create;
963 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
964 	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
965 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
966 	 * in the parent
967 	 */
968 	mna->up.num_rx_rings = pna->num_rx_rings;
969 	if (pna->num_tx_rings > pna->num_rx_rings)
970 		mna->up.num_rx_rings = pna->num_tx_rings;
971 	/* by default, the number of slots is the same as in
972 	 * the parent rings, but the user may ask for a different
973 	 * number
974 	 */
975 	mna->up.num_tx_desc = req->nr_tx_slots;
976 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
977 			1, NM_MONITOR_MAXSLOTS, NULL);
978 	mna->up.num_rx_desc = req->nr_rx_slots;
979 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
980 			1, NM_MONITOR_MAXSLOTS, NULL);
981 	if (zcopy) {
982 		mna->up.nm_register = netmap_zmon_reg;
983 		mna->up.nm_dtor = netmap_zmon_dtor;
984 		/* to have zero copy, we need to use the same memory allocator
985 		 * as the monitored port
986 		 */
987 		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
988 		/* and the allocator cannot be changed */
989 		mna->up.na_flags |= NAF_MEM_OWNER;
990 	} else {
991 		mna->up.nm_register = netmap_monitor_reg;
992 		mna->up.nm_dtor = netmap_monitor_dtor;
993 		mna->up.nm_mem = netmap_mem_private_new(
994 				mna->up.num_tx_rings,
995 				mna->up.num_tx_desc,
996 				mna->up.num_rx_rings,
997 				mna->up.num_rx_desc,
998 				0, /* extra bufs */
999 				0, /* pipes */
1000 				&error);
1001 		if (mna->up.nm_mem == NULL)
1002 			goto put_out;
1003 	}
1004 
1005 	error = netmap_attach_common(&mna->up);
1006 	if (error) {
1007 		D("attach_common error");
1008 		goto mem_put_out;
1009 	}
1010 
1011 	/* remember the traffic directions we have to monitor */
1012 	mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
1013 
1014 	*na = &mna->up;
1015 	netmap_adapter_get(*na);
1016 
1017 	/* keep the reference to the parent */
1018 	ND("monitor ok");
1019 
1020 	/* drop the reference to the ifp, if any */
1021 	if (ifp)
1022 		if_rele(ifp);
1023 
1024 	return 0;
1025 
1026 mem_put_out:
1027 	netmap_mem_put(mna->up.nm_mem);
1028 free_out:
1029 	nm_os_free(mna);
1030 put_out:
1031 	netmap_unget_na(pna, ifp);
1032 	return error;
1033 }
1034 
1035 
1036 #endif /* WITH_MONITOR */
1037