xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision a812392203d7c4c3f0db9d8a0f3391374c49c71f)
1 /*
2  * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * $FreeBSD$
28  *
29  * Monitors
30  *
31  * netmap monitors can be used to do zero-copy monitoring of network traffic
32  * on another adapter, when the latter adapter is working in netmap mode.
33  *
34  * Monitors offer to userspace the same interface as any other netmap port,
35  * with as many pairs of netmap rings as the monitored adapter.
36  * However, only the rx rings are actually used. Each monitor rx ring receives
37  * the traffic transiting on both the tx and rx corresponding rings in the
38  * monitored adapter. During registration, the user can choose if she wants
39  * to intercept tx only, rx only, or both tx and rx traffic.
40  *
41  * The monitor only sees the frames after they have been consumed in the
42  * monitored adapter:
43  *
44  *  - For tx traffic, this is after the slots containing the frames have been
45  *    marked as free. Note that this may happen at a considerably delay after
46  *    frame transmission, since freeing of slots is often done lazily.
47  *
48  *  - For rx traffic, this is after the consumer on the monitored adapter
49  *    has released them. In most cases, the consumer is a userspace
50  *    application which may have modified the frame contents.
51  *
52  * If the monitor is not able to cope with the stream of frames, excess traffic
53  * will be dropped.
54  *
55  * Each ring can be monitored by at most one monitor. This may change in the
56  * future, if we implement monitor chaining.
57  *
58  */
59 
60 
61 #if defined(__FreeBSD__)
62 #include <sys/cdefs.h> /* prerequisite */
63 
64 #include <sys/types.h>
65 #include <sys/errno.h>
66 #include <sys/param.h>	/* defines used in kernel.h */
67 #include <sys/kernel.h>	/* types used in module initialization */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/lock.h>
71 #include <sys/rwlock.h>
72 #include <sys/selinfo.h>
73 #include <sys/sysctl.h>
74 #include <sys/socket.h> /* sockaddrs */
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <machine/bus.h>	/* bus_dmamap_* */
78 #include <sys/refcount.h>
79 
80 
81 #elif defined(linux)
82 
83 #include "bsd_glue.h"
84 
85 #elif defined(__APPLE__)
86 
87 #warning OSX support is only partial
88 #include "osx_glue.h"
89 
90 #else
91 
92 #error	Unsupported platform
93 
94 #endif /* unsupported */
95 
96 /*
97  * common headers
98  */
99 
100 #include <net/netmap.h>
101 #include <dev/netmap/netmap_kern.h>
102 #include <dev/netmap/netmap_mem2.h>
103 
104 #ifdef WITH_MONITOR
105 
106 #define NM_MONITOR_MAXSLOTS 4096
107 
108 /* monitor works by replacing the nm_sync callbacks in the monitored rings.
109  * The actions to be performed are the same on both tx and rx rings, so we
110  * have collected them here
111  */
112 static int
113 netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
114 {
115 	struct netmap_monitor_adapter *mna = kring->monitor;
116 	struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
117 	struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
118 	int error;
119 	int rel_slots, free_slots, busy;
120 	u_int beg, end, i;
121 	u_int lim = kring->nkr_num_slots - 1,
122 	      mlim = mkring->nkr_num_slots - 1;
123 
124 	/* get the relased slots (rel_slots) */
125 	beg = *ringptr;
126 	error = kring->save_sync(kring, flags);
127 	if (error)
128 		return error;
129 	end = *ringptr;
130 	rel_slots = end - beg;
131 	if (rel_slots < 0)
132 		rel_slots += kring->nkr_num_slots;
133 
134 	if (!rel_slots) {
135 		return 0;
136 	}
137 
138 	/* we need to lock the monitor receive ring, since it
139 	 * is the target of bot tx and rx traffic from the monitored
140 	 * adapter
141 	 */
142 	mtx_lock(&mkring->q_lock);
143 	/* get the free slots available on the monitor ring */
144 	i = mkring->nr_hwtail;
145 	busy = i - mkring->nr_hwcur;
146 	if (busy < 0)
147 		busy += mkring->nkr_num_slots;
148 	free_slots = mlim - busy;
149 
150 	if (!free_slots) {
151 		mtx_unlock(&mkring->q_lock);
152 		return 0;
153 	}
154 
155 	/* swap min(free_slots, rel_slots) slots */
156 	if (free_slots < rel_slots) {
157 		beg += (rel_slots - free_slots);
158 		if (beg > lim)
159 			beg = 0;
160 		rel_slots = free_slots;
161 	}
162 
163 	for ( ; rel_slots; rel_slots--) {
164 		struct netmap_slot *s = &ring->slot[beg];
165 		struct netmap_slot *ms = &mring->slot[i];
166 		uint32_t tmp;
167 
168 		tmp = ms->buf_idx;
169 		ms->buf_idx = s->buf_idx;
170 		s->buf_idx = tmp;
171 
172 		tmp = ms->len;
173 		ms->len = s->len;
174 		s->len = tmp;
175 
176 		s->flags |= NS_BUF_CHANGED;
177 
178 		beg = nm_next(beg, lim);
179 		i = nm_next(i, mlim);
180 
181 	}
182 	mb();
183 	mkring->nr_hwtail = i;
184 
185 	mtx_unlock(&mkring->q_lock);
186 	/* notify the new frames to the monitor */
187 	mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
188 	return 0;
189 }
190 
191 /* callback used to replace the nm_sync callback in the monitored tx rings */
192 static int
193 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
194 {
195         ND("%s %x", kring->name, flags);
196         return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
197 }
198 
199 /* callback used to replace the nm_sync callback in the monitored rx rings */
200 static int
201 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
202 {
203         ND("%s %x", kring->name, flags);
204         return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
205 }
206 
207 /* nm_sync callback for the monitor's own tx rings.
208  * This makes no sense and always returns error
209  */
210 static int
211 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
212 {
213         D("%s %x", kring->name, flags);
214 	return EIO;
215 }
216 
217 /* nm_sync callback for the monitor's own rx rings.
218  * Note that the lock in netmap_monitor_parent_sync only protects
219  * writers among themselves. Synchronization between writers
220  * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
221  * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
222  */
223 static int
224 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
225 {
226         ND("%s %x", kring->name, flags);
227 	kring->nr_hwcur = kring->rcur;
228 	mb();
229 	nm_rxsync_finalize(kring);
230         return 0;
231 }
232 
233 /* nm_krings_create callbacks for monitors.
234  * We could use the default netmap_hw_krings_monitor, but
235  * we don't need the mbq.
236  */
237 static int
238 netmap_monitor_krings_create(struct netmap_adapter *na)
239 {
240 	return netmap_krings_create(na, 0);
241 }
242 
243 
244 /* nm_register callback for monitors.
245  *
246  * On registration, replace the nm_sync callbacks in the monitored
247  * rings with our own, saving the previous ones in the monitored
248  * rings themselves, where they are used by netmap_monitor_parent_sync.
249  *
250  * On de-registration, restore the original callbacks. We need to
251  * stop traffic while we are doing this, since the monitored adapter may
252  * have already started executing a netmap_monitor_parent_sync
253  * and may not like the kring->save_sync pointer to become NULL.
254  */
255 static int
256 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
257 {
258 	struct netmap_monitor_adapter *mna =
259 		(struct netmap_monitor_adapter *)na;
260 	struct netmap_priv_d *priv = &mna->priv;
261 	struct netmap_adapter *pna = priv->np_na;
262 	struct netmap_kring *kring;
263 	int i;
264 
265 	ND("%p: onoff %d", na, onoff);
266 	if (onoff) {
267 		if (!nm_netmap_on(pna)) {
268 			/* parent left netmap mode, fatal */
269 			return ENXIO;
270 		}
271 		if (mna->flags & NR_MONITOR_TX) {
272 			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
273 				kring = &pna->tx_rings[i];
274 				kring->save_sync = kring->nm_sync;
275 				kring->nm_sync = netmap_monitor_parent_txsync;
276 			}
277 		}
278 		if (mna->flags & NR_MONITOR_RX) {
279 			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
280 				kring = &pna->rx_rings[i];
281 				kring->save_sync = kring->nm_sync;
282 				kring->nm_sync = netmap_monitor_parent_rxsync;
283 			}
284 		}
285 		na->na_flags |= NAF_NETMAP_ON;
286 	} else {
287 		if (!nm_netmap_on(pna)) {
288 			/* parent left netmap mode, nothing to restore */
289 			return 0;
290 		}
291 		na->na_flags &= ~NAF_NETMAP_ON;
292 		if (mna->flags & NR_MONITOR_TX) {
293 			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
294 				netmap_set_txring(pna, i, 1 /* stopped */);
295 				kring = &pna->tx_rings[i];
296 				kring->nm_sync = kring->save_sync;
297 				kring->save_sync = NULL;
298 				netmap_set_txring(pna, i, 0 /* enabled */);
299 			}
300 		}
301 		if (mna->flags & NR_MONITOR_RX) {
302 			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
303 				netmap_set_rxring(pna, i, 1 /* stopped */);
304 				kring = &pna->rx_rings[i];
305 				kring->nm_sync = kring->save_sync;
306 				kring->save_sync = NULL;
307 				netmap_set_rxring(pna, i, 0 /* enabled */);
308 			}
309 		}
310 	}
311 	return 0;
312 }
313 /* nm_krings_delete callback for monitors */
314 static void
315 netmap_monitor_krings_delete(struct netmap_adapter *na)
316 {
317 	netmap_krings_delete(na);
318 }
319 
320 
321 /* nm_dtor callback for monitors */
322 static void
323 netmap_monitor_dtor(struct netmap_adapter *na)
324 {
325 	struct netmap_monitor_adapter *mna =
326 		(struct netmap_monitor_adapter *)na;
327 	struct netmap_priv_d *priv = &mna->priv;
328 	struct netmap_adapter *pna = priv->np_na;
329 	int i;
330 
331 	ND("%p", na);
332 	if (nm_netmap_on(pna)) {
333 		/* parent still in netmap mode, mark its krings as free */
334 		if (mna->flags & NR_MONITOR_TX) {
335 			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
336 				pna->tx_rings[i].monitor = NULL;
337 			}
338 		}
339 		if (mna->flags & NR_MONITOR_RX) {
340 			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
341 				pna->rx_rings[i].monitor = NULL;
342 			}
343 		}
344 	}
345 	netmap_adapter_put(pna);
346 }
347 
348 
349 /* check if nmr is a request for a monitor adapter that we can satisfy */
350 int
351 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
352 {
353 	struct nmreq pnmr;
354 	struct netmap_adapter *pna; /* parent adapter */
355 	struct netmap_monitor_adapter *mna;
356 	int i, error;
357 
358 	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
359 		ND("not a monitor");
360 		return 0;
361 	}
362 	/* this is a request for a monitor adapter */
363 
364 	D("flags %x", nmr->nr_flags);
365 
366 	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
367 	if (mna == NULL) {
368 		D("memory error");
369 		return ENOMEM;
370 	}
371 
372 	/* first, try to find the adapter that we want to monitor
373 	 * We use the same nmr, after we have turned off the monitor flags.
374 	 * In this way we can potentially monitor everything netmap understands,
375 	 * except other monitors.
376 	 */
377 	memcpy(&pnmr, nmr, sizeof(pnmr));
378 	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
379 	error = netmap_get_na(&pnmr, &pna, create);
380 	if (error) {
381 		D("parent lookup failed: %d", error);
382 		return error;
383 	}
384 	D("found parent: %s", pna->name);
385 
386 	if (!nm_netmap_on(pna)) {
387 		/* parent not in netmap mode */
388 		/* XXX we can wait for the parent to enter netmap mode,
389 		 * by intercepting its nm_register callback (2014-03-16)
390 		 */
391 		D("%s not in netmap mode", pna->name);
392 		error = EINVAL;
393 		goto put_out;
394 	}
395 
396 	/* grab all the rings we need in the parent */
397 	mna->priv.np_na = pna;
398 	error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
399 	if (error) {
400 		D("ringid error");
401 		goto put_out;
402 	}
403 	if (nmr->nr_flags & NR_MONITOR_TX) {
404 		for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
405 			struct netmap_kring *kring = &pna->tx_rings[i];
406 			if (kring->monitor) {
407 				error = EBUSY;
408 				D("ring busy");
409 				goto release_out;
410 			}
411 			kring->monitor = mna;
412 		}
413 	}
414 	if (nmr->nr_flags & NR_MONITOR_RX) {
415 		for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
416 			struct netmap_kring *kring = &pna->rx_rings[i];
417 			if (kring->monitor) {
418 				error = EBUSY;
419 				D("ring busy");
420 				goto release_out;
421 			}
422 			kring->monitor = mna;
423 		}
424 	}
425 
426 	snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
427 
428 	/* the monitor supports the host rings iff the parent does */
429 	mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
430 	mna->up.nm_txsync = netmap_monitor_txsync;
431 	mna->up.nm_rxsync = netmap_monitor_rxsync;
432 	mna->up.nm_register = netmap_monitor_reg;
433 	mna->up.nm_dtor = netmap_monitor_dtor;
434 	mna->up.nm_krings_create = netmap_monitor_krings_create;
435 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
436 	mna->up.nm_mem = pna->nm_mem;
437 	mna->up.na_lut = pna->na_lut;
438 	mna->up.na_lut_objtotal = pna->na_lut_objtotal;
439 	mna->up.na_lut_objsize = pna->na_lut_objsize;
440 
441 	mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
442 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
443 	 * in the parent
444 	 */
445 	mna->up.num_rx_rings = pna->num_rx_rings;
446 	if (pna->num_tx_rings > pna->num_rx_rings)
447 		mna->up.num_rx_rings = pna->num_tx_rings;
448 	/* by default, the number of slots is the same as in
449 	 * the parent rings, but the user may ask for a different
450 	 * number
451 	 */
452 	mna->up.num_tx_desc = nmr->nr_tx_slots;
453 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
454 			1, NM_MONITOR_MAXSLOTS, NULL);
455 	mna->up.num_rx_desc = nmr->nr_rx_slots;
456 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
457 			1, NM_MONITOR_MAXSLOTS, NULL);
458 	error = netmap_attach_common(&mna->up);
459 	if (error) {
460 		D("attach_common error");
461 		goto release_out;
462 	}
463 
464 	/* remember the traffic directions we have to monitor */
465 	mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
466 
467 	*na = &mna->up;
468 	netmap_adapter_get(*na);
469 
470 	/* write the configuration back */
471 	nmr->nr_tx_rings = mna->up.num_tx_rings;
472 	nmr->nr_rx_rings = mna->up.num_rx_rings;
473 	nmr->nr_tx_slots = mna->up.num_tx_desc;
474 	nmr->nr_rx_slots = mna->up.num_rx_desc;
475 
476 	/* keep the reference to the parent */
477 	D("monitor ok");
478 
479 	return 0;
480 
481 release_out:
482 	D("monitor error");
483 	for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
484 		if (pna->tx_rings[i].monitor == mna)
485 			pna->tx_rings[i].monitor = NULL;
486 	}
487 	for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
488 		if (pna->rx_rings[i].monitor == mna)
489 			pna->rx_rings[i].monitor = NULL;
490 	}
491 put_out:
492 	netmap_adapter_put(pna);
493 	free(mna, M_DEVBUF);
494 	return error;
495 }
496 
497 
498 #endif /* WITH_MONITOR */
499