xref: /freebsd/sys/dev/netmap/netmap_generic.c (revision 0572ccaa4543b0abef8ef81e384c1d04de9f3da1)
1 /*
2  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * This module implements netmap support on top of standard,
28  * unmodified device drivers.
29  *
30  * A NIOCREGIF request is handled here if the device does not
31  * have native support. TX and RX rings are emulated as follows:
32  *
33  * NIOCREGIF
34  *	We preallocate a block of TX mbufs (roughly as many as
35  *	tx descriptors; the number is not critical) to speed up
36  *	operation during transmissions. The refcount on most of
37  *	these buffers is artificially bumped up so we can recycle
38  *	them more easily. Also, the destructor is intercepted
39  *	so we use it as an interrupt notification to wake up
40  *	processes blocked on a poll().
41  *
42  *	For each receive ring we allocate one "struct mbq"
43  *	(an mbuf tailq plus a spinlock). We intercept packets
44  *	(through if_input)
45  *	on the receive path and put them in the mbq from which
46  *	netmap receive routines can grab them.
47  *
48  * TX:
49  *	in the generic_txsync() routine, netmap buffers are copied
50  *	(or linked, in a future) to the preallocated mbufs
51  *	and pushed to the transmit queue. Some of these mbufs
52  *	(those with NS_REPORT, or otherwise every half ring)
53  *	have the refcount=1, others have refcount=2.
54  *	When the destructor is invoked, we take that as
55  *	a notification that all mbufs up to that one in
56  *	the specific ring have been completed, and generate
57  *	the equivalent of a transmit interrupt.
58  *
59  * RX:
60  *
61  */
62 
63 #ifdef __FreeBSD__
64 
65 #include <sys/cdefs.h> /* prerequisite */
66 __FBSDID("$FreeBSD$");
67 
68 #include <sys/types.h>
69 #include <sys/errno.h>
70 #include <sys/malloc.h>
71 #include <sys/lock.h>   /* PROT_EXEC */
72 #include <sys/rwlock.h>
73 #include <sys/socket.h> /* sockaddrs */
74 #include <sys/selinfo.h>
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <machine/bus.h>        /* bus_dmamap_* in netmap_kern.h */
78 
79 // XXX temporary - D() defined here
80 #include <net/netmap.h>
81 #include <dev/netmap/netmap_kern.h>
82 #include <dev/netmap/netmap_mem2.h>
83 
84 #define rtnl_lock() D("rtnl_lock called");
85 #define rtnl_unlock() D("rtnl_unlock called");
86 #define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
87 #define MBUF_RXQ(m)	((m)->m_pkthdr.flowid)
88 #define smp_mb()
89 
90 /*
91  * mbuf wrappers
92  */
93 
94 /*
95  * we allocate an EXT_PACKET
96  */
97 #define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
98 
99 /* mbuf destructor, also need to change the type to EXT_EXTREF,
100  * add an M_NOFREE flag, and then clear the flag and
101  * chain into uma_zfree(zone_pack, mf)
102  * (or reinstall the buffer ?)
103  */
104 #define SET_MBUF_DESTRUCTOR(m, fn)	do {		\
105 	(m)->m_ext.ext_free = (void *)fn;	\
106 	(m)->m_ext.ext_type = EXT_EXTREF;	\
107 } while (0)
108 
109 
110 #define GET_MBUF_REFCNT(m)	((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
111 
112 
113 
114 #else /* linux */
115 
116 #include "bsd_glue.h"
117 
118 #include <linux/rtnetlink.h>    /* rtnl_[un]lock() */
119 #include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
120 #include <linux/hrtimer.h>
121 
122 //#define RATE  /* Enables communication statistics. */
123 
124 //#define REG_RESET
125 
126 #endif /* linux */
127 
128 
129 /* Common headers. */
130 #include <net/netmap.h>
131 #include <dev/netmap/netmap_kern.h>
132 #include <dev/netmap/netmap_mem2.h>
133 
134 
135 
136 /* ======================== usage stats =========================== */
137 
138 #ifdef RATE
139 #define IFRATE(x) x
140 struct rate_stats {
141 	unsigned long txpkt;
142 	unsigned long txsync;
143 	unsigned long txirq;
144 	unsigned long rxpkt;
145 	unsigned long rxirq;
146 	unsigned long rxsync;
147 };
148 
149 struct rate_context {
150 	unsigned refcount;
151 	struct timer_list timer;
152 	struct rate_stats new;
153 	struct rate_stats old;
154 };
155 
156 #define RATE_PRINTK(_NAME_) \
157 	printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
158 #define RATE_PERIOD  2
159 static void rate_callback(unsigned long arg)
160 {
161 	struct rate_context * ctx = (struct rate_context *)arg;
162 	struct rate_stats cur = ctx->new;
163 	int r;
164 
165 	RATE_PRINTK(txpkt);
166 	RATE_PRINTK(txsync);
167 	RATE_PRINTK(txirq);
168 	RATE_PRINTK(rxpkt);
169 	RATE_PRINTK(rxsync);
170 	RATE_PRINTK(rxirq);
171 	printk("\n");
172 
173 	ctx->old = cur;
174 	r = mod_timer(&ctx->timer, jiffies +
175 			msecs_to_jiffies(RATE_PERIOD * 1000));
176 	if (unlikely(r))
177 		D("[v1000] Error: mod_timer()");
178 }
179 
180 static struct rate_context rate_ctx;
181 
182 #else /* !RATE */
183 #define IFRATE(x)
184 #endif /* !RATE */
185 
186 
187 /* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
188 #define GENERIC_BUF_SIZE        netmap_buf_size    /* Size of the mbufs in the Tx pool. */
189 
190 /*
191  * Wrapper used by the generic adapter layer to notify
192  * the poller threads. Differently from netmap_rx_irq(), we check
193  * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
194  */
195 static void
196 netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
197 {
198 	if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
199 		return;
200 
201 	netmap_common_irq(ifp, q, work_done);
202 }
203 
204 
205 /* Enable/disable netmap mode for a generic network interface. */
206 static int
207 generic_netmap_register(struct netmap_adapter *na, int enable)
208 {
209 	struct ifnet *ifp = na->ifp;
210 	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
211 	struct mbuf *m;
212 	int error;
213 	int i, r;
214 
215 	if (!na)
216 		return EINVAL;
217 
218 #ifdef REG_RESET
219 	error = ifp->netdev_ops->ndo_stop(ifp);
220 	if (error) {
221 		return error;
222 	}
223 #endif /* REG_RESET */
224 
225 	if (enable) { /* Enable netmap mode. */
226 		/* Init the mitigation support. */
227 		gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
228 					M_DEVBUF, M_NOWAIT | M_ZERO);
229 		if (!gna->mit) {
230 			D("mitigation allocation failed");
231 			error = ENOMEM;
232 			goto out;
233 		}
234 		for (r=0; r<na->num_rx_rings; r++)
235 			netmap_mitigation_init(&gna->mit[r], na);
236 
237 		/* Initialize the rx queue, as generic_rx_handler() can
238 		 * be called as soon as netmap_catch_rx() returns.
239 		 */
240 		for (r=0; r<na->num_rx_rings; r++) {
241 			mbq_safe_init(&na->rx_rings[r].rx_queue);
242 		}
243 
244 		/*
245 		 * Preallocate packet buffers for the tx rings.
246 		 */
247 		for (r=0; r<na->num_tx_rings; r++)
248 			na->tx_rings[r].tx_pool = NULL;
249 		for (r=0; r<na->num_tx_rings; r++) {
250 			na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
251 					M_DEVBUF, M_NOWAIT | M_ZERO);
252 			if (!na->tx_rings[r].tx_pool) {
253 				D("tx_pool allocation failed");
254 				error = ENOMEM;
255 				goto free_tx_pools;
256 			}
257 			for (i=0; i<na->num_tx_desc; i++)
258 				na->tx_rings[r].tx_pool[i] = NULL;
259 			for (i=0; i<na->num_tx_desc; i++) {
260 				m = netmap_get_mbuf(GENERIC_BUF_SIZE);
261 				if (!m) {
262 					D("tx_pool[%d] allocation failed", i);
263 					error = ENOMEM;
264 					goto free_tx_pools;
265 				}
266 				na->tx_rings[r].tx_pool[i] = m;
267 			}
268 		}
269 		rtnl_lock();
270 		/* Prepare to intercept incoming traffic. */
271 		error = netmap_catch_rx(na, 1);
272 		if (error) {
273 			D("netdev_rx_handler_register() failed (%d)", error);
274 			goto register_handler;
275 		}
276 		ifp->if_capenable |= IFCAP_NETMAP;
277 
278 		/* Make netmap control the packet steering. */
279 		netmap_catch_tx(gna, 1);
280 
281 		rtnl_unlock();
282 
283 #ifdef RATE
284 		if (rate_ctx.refcount == 0) {
285 			D("setup_timer()");
286 			memset(&rate_ctx, 0, sizeof(rate_ctx));
287 			setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
288 			if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
289 				D("Error: mod_timer()");
290 			}
291 		}
292 		rate_ctx.refcount++;
293 #endif /* RATE */
294 
295 	} else if (na->tx_rings[0].tx_pool) {
296 		/* Disable netmap mode. We enter here only if the previous
297 		   generic_netmap_register(na, 1) was successfull.
298 		   If it was not, na->tx_rings[0].tx_pool was set to NULL by the
299 		   error handling code below. */
300 		rtnl_lock();
301 
302 		ifp->if_capenable &= ~IFCAP_NETMAP;
303 
304 		/* Release packet steering control. */
305 		netmap_catch_tx(gna, 0);
306 
307 		/* Do not intercept packets on the rx path. */
308 		netmap_catch_rx(na, 0);
309 
310 		rtnl_unlock();
311 
312 		/* Free the mbufs going to the netmap rings */
313 		for (r=0; r<na->num_rx_rings; r++) {
314 			mbq_safe_purge(&na->rx_rings[r].rx_queue);
315 			mbq_safe_destroy(&na->rx_rings[r].rx_queue);
316 		}
317 
318 		for (r=0; r<na->num_rx_rings; r++)
319 			netmap_mitigation_cleanup(&gna->mit[r]);
320 		free(gna->mit, M_DEVBUF);
321 
322 		for (r=0; r<na->num_tx_rings; r++) {
323 			for (i=0; i<na->num_tx_desc; i++) {
324 				m_freem(na->tx_rings[r].tx_pool[i]);
325 			}
326 			free(na->tx_rings[r].tx_pool, M_DEVBUF);
327 		}
328 
329 #ifdef RATE
330 		if (--rate_ctx.refcount == 0) {
331 			D("del_timer()");
332 			del_timer(&rate_ctx.timer);
333 		}
334 #endif
335 	}
336 
337 #ifdef REG_RESET
338 	error = ifp->netdev_ops->ndo_open(ifp);
339 	if (error) {
340 		goto free_tx_pools;
341 	}
342 #endif
343 
344 	return 0;
345 
346 register_handler:
347 	rtnl_unlock();
348 free_tx_pools:
349 	for (r=0; r<na->num_tx_rings; r++) {
350 		if (na->tx_rings[r].tx_pool == NULL)
351 			continue;
352 		for (i=0; i<na->num_tx_desc; i++)
353 			if (na->tx_rings[r].tx_pool[i])
354 				m_freem(na->tx_rings[r].tx_pool[i]);
355 		free(na->tx_rings[r].tx_pool, M_DEVBUF);
356 		na->tx_rings[r].tx_pool = NULL;
357 	}
358 	for (r=0; r<na->num_rx_rings; r++) {
359 		netmap_mitigation_cleanup(&gna->mit[r]);
360 		mbq_safe_destroy(&na->rx_rings[r].rx_queue);
361 	}
362 	free(gna->mit, M_DEVBUF);
363 out:
364 
365 	return error;
366 }
367 
368 /*
369  * Callback invoked when the device driver frees an mbuf used
370  * by netmap to transmit a packet. This usually happens when
371  * the NIC notifies the driver that transmission is completed.
372  */
373 static void
374 generic_mbuf_destructor(struct mbuf *m)
375 {
376 	if (netmap_verbose)
377 		D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
378 	netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
379 #ifdef __FreeBSD__
380 	m->m_ext.ext_type = EXT_PACKET;
381 	m->m_ext.ext_free = NULL;
382 	if (*(m->m_ext.ref_cnt) == 0)
383 		*(m->m_ext.ref_cnt) = 1;
384 	uma_zfree(zone_pack, m);
385 #endif /* __FreeBSD__ */
386 	IFRATE(rate_ctx.new.txirq++);
387 }
388 
389 /* Record completed transmissions and update hwtail.
390  *
391  * The oldest tx buffer not yet completed is at nr_hwtail + 1,
392  * nr_hwcur is the first unsent buffer.
393  */
394 static u_int
395 generic_netmap_tx_clean(struct netmap_kring *kring)
396 {
397 	u_int const lim = kring->nkr_num_slots - 1;
398 	u_int nm_i = nm_next(kring->nr_hwtail, lim);
399 	u_int hwcur = kring->nr_hwcur;
400 	u_int n = 0;
401 	struct mbuf **tx_pool = kring->tx_pool;
402 
403 	while (nm_i != hwcur) { /* buffers not completed */
404 		struct mbuf *m = tx_pool[nm_i];
405 
406 		if (unlikely(m == NULL)) {
407 			/* this is done, try to replenish the entry */
408 			tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
409 			if (unlikely(m == NULL)) {
410 				D("mbuf allocation failed, XXX error");
411 				// XXX how do we proceed ? break ?
412 				return -ENOMEM;
413 			}
414 		} else if (GET_MBUF_REFCNT(m) != 1) {
415 			break; /* This mbuf is still busy: its refcnt is 2. */
416 		}
417 		n++;
418 		nm_i = nm_next(nm_i, lim);
419 	}
420 	kring->nr_hwtail = nm_prev(nm_i, lim);
421 	ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
422 
423 	return n;
424 }
425 
426 
427 /*
428  * We have pending packets in the driver between nr_hwtail +1 and hwcur.
429  * Compute a position in the middle, to be used to generate
430  * a notification.
431  */
432 static inline u_int
433 generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
434 {
435 	u_int n = kring->nkr_num_slots;
436 	u_int ntc = nm_next(kring->nr_hwtail, n-1);
437 	u_int e;
438 
439 	if (hwcur >= ntc) {
440 		e = (hwcur + ntc) / 2;
441 	} else { /* wrap around */
442 		e = (hwcur + n + ntc) / 2;
443 		if (e >= n) {
444 			e -= n;
445 		}
446 	}
447 
448 	if (unlikely(e >= n)) {
449 		D("This cannot happen");
450 		e = 0;
451 	}
452 
453 	return e;
454 }
455 
456 /*
457  * We have pending packets in the driver between nr_hwtail+1 and hwcur.
458  * Schedule a notification approximately in the middle of the two.
459  * There is a race but this is only called within txsync which does
460  * a double check.
461  */
462 static void
463 generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
464 {
465 	struct mbuf *m;
466 	u_int e;
467 
468 	if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
469 		return; /* all buffers are free */
470 	}
471 	e = generic_tx_event_middle(kring, hwcur);
472 
473 	m = kring->tx_pool[e];
474 	if (m == NULL) {
475 		/* This can happen if there is already an event on the netmap
476 		   slot 'e': There is nothing to do. */
477 		return;
478 	}
479 	ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
480 	kring->tx_pool[e] = NULL;
481 	SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
482 
483 	// XXX wmb() ?
484 	/* Decrement the refcount an free it if we have the last one. */
485 	m_freem(m);
486 	smp_mb();
487 }
488 
489 
490 /*
491  * generic_netmap_txsync() transforms netmap buffers into mbufs
492  * and passes them to the standard device driver
493  * (ndo_start_xmit() or ifp->if_transmit() ).
494  * On linux this is not done directly, but using dev_queue_xmit(),
495  * since it implements the TX flow control (and takes some locks).
496  */
497 static int
498 generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
499 {
500 	struct ifnet *ifp = na->ifp;
501 	struct netmap_kring *kring = &na->tx_rings[ring_nr];
502 	struct netmap_ring *ring = kring->ring;
503 	u_int nm_i;	/* index into the netmap ring */ // j
504 	u_int const lim = kring->nkr_num_slots - 1;
505 	u_int const head = kring->rhead;
506 
507 	IFRATE(rate_ctx.new.txsync++);
508 
509 	// TODO: handle the case of mbuf allocation failure
510 
511 	rmb();
512 
513 	/*
514 	 * First part: process new packets to send.
515 	 */
516 	nm_i = kring->nr_hwcur;
517 	if (nm_i != head) {	/* we have new packets to send */
518 		while (nm_i != head) {
519 			struct netmap_slot *slot = &ring->slot[nm_i];
520 			u_int len = slot->len;
521 			void *addr = NMB(slot);
522 
523 			/* device-specific */
524 			struct mbuf *m;
525 			int tx_ret;
526 
527 			NM_CHECK_ADDR_LEN(addr, len);
528 
529 			/* Tale a mbuf from the tx pool and copy in the user packet. */
530 			m = kring->tx_pool[nm_i];
531 			if (unlikely(!m)) {
532 				RD(5, "This should never happen");
533 				kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
534 				if (unlikely(m == NULL)) {
535 					D("mbuf allocation failed");
536 					break;
537 				}
538 			}
539 			/* XXX we should ask notifications when NS_REPORT is set,
540 			 * or roughly every half frame. We can optimize this
541 			 * by lazily requesting notifications only when a
542 			 * transmission fails. Probably the best way is to
543 			 * break on failures and set notifications when
544 			 * ring->cur == ring->tail || nm_i != cur
545 			 */
546 			tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
547 			if (unlikely(tx_ret)) {
548 				RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
549 						tx_ret, nm_i, head, kring->nr_hwtail);
550 				/*
551 				 * No room for this mbuf in the device driver.
552 				 * Request a notification FOR A PREVIOUS MBUF,
553 				 * then call generic_netmap_tx_clean(kring) to do the
554 				 * double check and see if we can free more buffers.
555 				 * If there is space continue, else break;
556 				 * NOTE: the double check is necessary if the problem
557 				 * occurs in the txsync call after selrecord().
558 				 * Also, we need some way to tell the caller that not
559 				 * all buffers were queued onto the device (this was
560 				 * not a problem with native netmap driver where space
561 				 * is preallocated). The bridge has a similar problem
562 				 * and we solve it there by dropping the excess packets.
563 				 */
564 				generic_set_tx_event(kring, nm_i);
565 				if (generic_netmap_tx_clean(kring)) { /* space now available */
566 					continue;
567 				} else {
568 					break;
569 				}
570 			}
571 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
572 			nm_i = nm_next(nm_i, lim);
573 			IFRATE(rate_ctx.new.txpkt ++);
574 		}
575 
576 		/* Update hwcur to the next slot to transmit. */
577 		kring->nr_hwcur = nm_i; /* not head, we could break early */
578 	}
579 
580 	/*
581 	 * Second, reclaim completed buffers
582 	 */
583 	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
584 		/* No more available slots? Set a notification event
585 		 * on a netmap slot that will be cleaned in the future.
586 		 * No doublecheck is performed, since txsync() will be
587 		 * called twice by netmap_poll().
588 		 */
589 		generic_set_tx_event(kring, nm_i);
590 	}
591 	ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
592 
593 	generic_netmap_tx_clean(kring);
594 
595 	nm_txsync_finalize(kring);
596 
597 	return 0;
598 }
599 
600 
601 /*
602  * This handler is registered (through netmap_catch_rx())
603  * within the attached network interface
604  * in the RX subsystem, so that every mbuf passed up by
605  * the driver can be stolen to the network stack.
606  * Stolen packets are put in a queue where the
607  * generic_netmap_rxsync() callback can extract them.
608  */
609 void
610 generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
611 {
612 	struct netmap_adapter *na = NA(ifp);
613 	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
614 	u_int work_done;
615 	u_int rr = MBUF_RXQ(m); // receive ring number
616 
617 	if (rr >= na->num_rx_rings) {
618 		rr = rr % na->num_rx_rings; // XXX expensive...
619 	}
620 
621 	/* limit the size of the queue */
622 	if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
623 		m_freem(m);
624 	} else {
625 		mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
626 	}
627 
628 	if (netmap_generic_mit < 32768) {
629 		/* no rx mitigation, pass notification up */
630 		netmap_generic_irq(na->ifp, rr, &work_done);
631 		IFRATE(rate_ctx.new.rxirq++);
632 	} else {
633 		/* same as send combining, filter notification if there is a
634 		 * pending timer, otherwise pass it up and start a timer.
635 		 */
636 		if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
637 			/* Record that there is some pending work. */
638 			gna->mit[rr].mit_pending = 1;
639 		} else {
640 			netmap_generic_irq(na->ifp, rr, &work_done);
641 			IFRATE(rate_ctx.new.rxirq++);
642 			netmap_mitigation_start(&gna->mit[rr]);
643 		}
644 	}
645 }
646 
647 /*
648  * generic_netmap_rxsync() extracts mbufs from the queue filled by
649  * generic_netmap_rx_handler() and puts their content in the netmap
650  * receive ring.
651  * Access must be protected because the rx handler is asynchronous,
652  */
653 static int
654 generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
655 {
656 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
657 	struct netmap_ring *ring = kring->ring;
658 	u_int nm_i;	/* index into the netmap ring */ //j,
659 	u_int n;
660 	u_int const lim = kring->nkr_num_slots - 1;
661 	u_int const head = nm_rxsync_prologue(kring);
662 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
663 
664 	if (head > lim)
665 		return netmap_ring_reinit(kring);
666 
667 	/*
668 	 * First part: import newly received packets.
669 	 */
670 	if (netmap_no_pendintr || force_update) {
671 		/* extract buffers from the rx queue, stop at most one
672 		 * slot before nr_hwcur (stop_i)
673 		 */
674 		uint16_t slot_flags = kring->nkr_slot_flags;
675 		u_int stop_i = nm_prev(kring->nr_hwcur, lim);
676 
677 		nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
678 		for (n = 0; nm_i != stop_i; n++) {
679 			int len;
680 			void *addr = NMB(&ring->slot[nm_i]);
681 			struct mbuf *m;
682 
683 			/* we only check the address here on generic rx rings */
684 			if (addr == netmap_buffer_base) { /* Bad buffer */
685 				return netmap_ring_reinit(kring);
686 			}
687 			/*
688 			 * Call the locked version of the function.
689 			 * XXX Ideally we could grab a batch of mbufs at once
690 			 * and save some locking overhead.
691 			 */
692 			m = mbq_safe_dequeue(&kring->rx_queue);
693 			if (!m)	/* no more data */
694 				break;
695 			len = MBUF_LEN(m);
696 			m_copydata(m, 0, len, addr);
697 			ring->slot[nm_i].len = len;
698 			ring->slot[nm_i].flags = slot_flags;
699 			m_freem(m);
700 			nm_i = nm_next(nm_i, lim);
701 		}
702 		if (n) {
703 			kring->nr_hwtail = nm_i;
704 			IFRATE(rate_ctx.new.rxpkt += n);
705 		}
706 		kring->nr_kflags &= ~NKR_PENDINTR;
707 	}
708 
709 	// XXX should we invert the order ?
710 	/*
711 	 * Second part: skip past packets that userspace has released.
712 	 */
713 	nm_i = kring->nr_hwcur;
714 	if (nm_i != head) {
715 		/* Userspace has released some packets. */
716 		for (n = 0; nm_i != head; n++) {
717 			struct netmap_slot *slot = &ring->slot[nm_i];
718 
719 			slot->flags &= ~NS_BUF_CHANGED;
720 			nm_i = nm_next(nm_i, lim);
721 		}
722 		kring->nr_hwcur = head;
723 	}
724 	/* tell userspace that there might be new packets. */
725 	nm_rxsync_finalize(kring);
726 	IFRATE(rate_ctx.new.rxsync++);
727 
728 	return 0;
729 }
730 
731 static void
732 generic_netmap_dtor(struct netmap_adapter *na)
733 {
734 	struct ifnet *ifp = na->ifp;
735 	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
736 	struct netmap_adapter *prev_na = gna->prev;
737 
738 	if (prev_na != NULL) {
739 		D("Released generic NA %p", gna);
740 		if_rele(na->ifp);
741 		netmap_adapter_put(prev_na);
742 	}
743 	if (ifp != NULL) {
744 		WNA(ifp) = prev_na;
745 		D("Restored native NA %p", prev_na);
746 		na->ifp = NULL;
747 	}
748 }
749 
750 /*
751  * generic_netmap_attach() makes it possible to use netmap on
752  * a device without native netmap support.
753  * This is less performant than native support but potentially
754  * faster than raw sockets or similar schemes.
755  *
756  * In this "emulated" mode, netmap rings do not necessarily
757  * have the same size as those in the NIC. We use a default
758  * value and possibly override it if the OS has ways to fetch the
759  * actual configuration.
760  */
761 int
762 generic_netmap_attach(struct ifnet *ifp)
763 {
764 	struct netmap_adapter *na;
765 	struct netmap_generic_adapter *gna;
766 	int retval;
767 	u_int num_tx_desc, num_rx_desc;
768 
769 	num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
770 
771 	generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
772 	ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
773 
774 	gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
775 	if (gna == NULL) {
776 		D("no memory on attach, give up");
777 		return ENOMEM;
778 	}
779 	na = (struct netmap_adapter *)gna;
780 	na->ifp = ifp;
781 	na->num_tx_desc = num_tx_desc;
782 	na->num_rx_desc = num_rx_desc;
783 	na->nm_register = &generic_netmap_register;
784 	na->nm_txsync = &generic_netmap_txsync;
785 	na->nm_rxsync = &generic_netmap_rxsync;
786 	na->nm_dtor = &generic_netmap_dtor;
787 	/* when using generic, IFCAP_NETMAP is set so we force
788 	 * NAF_SKIP_INTR to use the regular interrupt handler
789 	 */
790 	na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
791 
792 	ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
793 			ifp->num_tx_queues, ifp->real_num_tx_queues,
794 			ifp->tx_queue_len);
795 	ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
796 			ifp->num_rx_queues, ifp->real_num_rx_queues);
797 
798 	generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
799 
800 	retval = netmap_attach_common(na);
801 	if (retval) {
802 		free(gna, M_DEVBUF);
803 	}
804 
805 	return retval;
806 }
807