xref: /freebsd/sys/dev/netmap/netmap.c (revision e39e854e27f53a784c3982cbeb68f4ad1cfd9162)
1 /*
2  * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * This module supports memory mapped access to network devices,
28  * see netmap(4).
29  *
30  * The module uses a large, memory pool allocated by the kernel
31  * and accessible as mmapped memory by multiple userspace threads/processes.
32  * The memory pool contains packet buffers and "netmap rings",
33  * i.e. user-accessible copies of the interface's queues.
34  *
35  * Access to the network card works like this:
36  * 1. a process/thread issues one or more open() on /dev/netmap, to create
37  *    select()able file descriptor on which events are reported.
38  * 2. on each descriptor, the process issues an ioctl() to identify
39  *    the interface that should report events to the file descriptor.
40  * 3. on each descriptor, the process issues an mmap() request to
41  *    map the shared memory region within the process' address space.
42  *    The list of interesting queues is indicated by a location in
43  *    the shared memory region.
44  * 4. using the functions in the netmap(4) userspace API, a process
45  *    can look up the occupation state of a queue, access memory buffers,
46  *    and retrieve received packets or enqueue packets to transmit.
47  * 5. using some ioctl()s the process can synchronize the userspace view
48  *    of the queue with the actual status in the kernel. This includes both
49  *    receiving the notification of new packets, and transmitting new
50  *    packets on the output interface.
51  * 6. select() or poll() can be used to wait for events on individual
52  *    transmit or receive queues (or all queues for a given interface).
53  */
54 
55 #include <sys/cdefs.h> /* prerequisite */
56 __FBSDID("$FreeBSD$");
57 
58 #include <sys/types.h>
59 #include <sys/module.h>
60 #include <sys/errno.h>
61 #include <sys/param.h>	/* defines used in kernel.h */
62 #include <sys/jail.h>
63 #include <sys/kernel.h>	/* types used in module initialization */
64 #include <sys/conf.h>	/* cdevsw struct */
65 #include <sys/uio.h>	/* uio struct */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h>	/* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/mman.h>	/* PROT_EXEC */
70 #include <sys/poll.h>
71 #include <sys/proc.h>
72 #include <vm/vm.h>	/* vtophys */
73 #include <vm/pmap.h>	/* vtophys */
74 #include <sys/socket.h> /* sockaddrs */
75 #include <machine/bus.h>
76 #include <sys/selinfo.h>
77 #include <sys/sysctl.h>
78 #include <net/if.h>
79 #include <net/bpf.h>		/* BIOCIMMEDIATE */
80 #include <net/vnet.h>
81 #include <net/netmap.h>
82 #include <dev/netmap/netmap_kern.h>
83 #include <machine/bus.h>	/* bus_dmamap_* */
84 
85 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
86 
87 /*
88  * lock and unlock for the netmap memory allocator
89  */
90 #define NMA_LOCK()	mtx_lock(&nm_mem->nm_mtx);
91 #define NMA_UNLOCK()	mtx_unlock(&nm_mem->nm_mtx);
92 struct netmap_mem_d;
93 static struct netmap_mem_d *nm_mem;	/* Our memory allocator. */
94 
95 u_int netmap_total_buffers;
96 char *netmap_buffer_base;	/* address of an invalid buffer */
97 
98 /* user-controlled variables */
99 int netmap_verbose;
100 
101 static int netmap_no_timestamp; /* don't timestamp on rxsync */
102 
103 SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
104 SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
105     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
106 SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
107     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
108 int netmap_buf_size = 2048;
109 TUNABLE_INT("hw.netmap.buf_size", &netmap_buf_size);
110 SYSCTL_INT(_dev_netmap, OID_AUTO, buf_size,
111     CTLFLAG_RD, &netmap_buf_size, 0, "Size of packet buffers");
112 int netmap_mitigate = 1;
113 SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
114 int netmap_no_pendintr = 1;
115 SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
116     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
117 
118 
119 /*------------- memory allocator -----------------*/
120 #ifdef NETMAP_MEM2
121 #include "netmap_mem2.c"
122 #else /* !NETMAP_MEM2 */
123 #include "netmap_mem1.c"
124 #endif /* !NETMAP_MEM2 */
125 /*------------ end of memory allocator ----------*/
126 
127 /* Structure associated to each thread which registered an interface. */
128 struct netmap_priv_d {
129 	struct netmap_if *np_nifp;	/* netmap interface descriptor. */
130 
131 	struct ifnet	*np_ifp;	/* device for which we hold a reference */
132 	int		np_ringid;	/* from the ioctl */
133 	u_int		np_qfirst, np_qlast;	/* range of rings to scan */
134 	uint16_t	np_txpoll;
135 };
136 
137 
138 /*
139  * File descriptor's private data destructor.
140  *
141  * Call nm_register(ifp,0) to stop netmap mode on the interface and
142  * revert to normal operation. We expect that np_ifp has not gone.
143  */
144 static void
145 netmap_dtor_locked(void *data)
146 {
147 	struct netmap_priv_d *priv = data;
148 	struct ifnet *ifp = priv->np_ifp;
149 	struct netmap_adapter *na = NA(ifp);
150 	struct netmap_if *nifp = priv->np_nifp;
151 
152 	na->refcount--;
153 	if (na->refcount <= 0) {	/* last instance */
154 		u_int i, j, lim;
155 
156 		D("deleting last netmap instance for %s", ifp->if_xname);
157 		/*
158 		 * there is a race here with *_netmap_task() and
159 		 * netmap_poll(), which don't run under NETMAP_REG_LOCK.
160 		 * na->refcount == 0 && na->ifp->if_capenable & IFCAP_NETMAP
161 		 * (aka NETMAP_DELETING(na)) are a unique marker that the
162 		 * device is dying.
163 		 * Before destroying stuff we sleep a bit, and then complete
164 		 * the job. NIOCREG should realize the condition and
165 		 * loop until they can continue; the other routines
166 		 * should check the condition at entry and quit if
167 		 * they cannot run.
168 		 */
169 		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
170 		tsleep(na, 0, "NIOCUNREG", 4);
171 		na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
172 		na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
173 		/* Wake up any sleeping threads. netmap_poll will
174 		 * then return POLLERR
175 		 */
176 		for (i = 0; i < na->num_tx_rings + 1; i++)
177 			selwakeuppri(&na->tx_rings[i].si, PI_NET);
178 		for (i = 0; i < na->num_rx_rings + 1; i++)
179 			selwakeuppri(&na->rx_rings[i].si, PI_NET);
180 		selwakeuppri(&na->tx_si, PI_NET);
181 		selwakeuppri(&na->rx_si, PI_NET);
182 		/* release all buffers */
183 		NMA_LOCK();
184 		for (i = 0; i < na->num_tx_rings + 1; i++) {
185 			struct netmap_ring *ring = na->tx_rings[i].ring;
186 			lim = na->tx_rings[i].nkr_num_slots;
187 			for (j = 0; j < lim; j++)
188 				netmap_free_buf(nifp, ring->slot[j].buf_idx);
189 		}
190 		for (i = 0; i < na->num_rx_rings + 1; i++) {
191 			struct netmap_ring *ring = na->rx_rings[i].ring;
192 			lim = na->rx_rings[i].nkr_num_slots;
193 			for (j = 0; j < lim; j++)
194 				netmap_free_buf(nifp, ring->slot[j].buf_idx);
195 		}
196 		NMA_UNLOCK();
197 		netmap_free_rings(na);
198 		wakeup(na);
199 	}
200 	netmap_if_free(nifp);
201 }
202 
203 
204 static void
205 netmap_dtor(void *data)
206 {
207 	struct netmap_priv_d *priv = data;
208 	struct ifnet *ifp = priv->np_ifp;
209 	struct netmap_adapter *na = NA(ifp);
210 
211 	na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
212 	netmap_dtor_locked(data);
213 	na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
214 
215 	if_rele(ifp);
216 	bzero(priv, sizeof(*priv));	/* XXX for safety */
217 	free(priv, M_DEVBUF);
218 }
219 
220 
221 /*
222  * mmap(2) support for the "netmap" device.
223  *
224  * Expose all the memory previously allocated by our custom memory
225  * allocator: this way the user has only to issue a single mmap(2), and
226  * can work on all the data structures flawlessly.
227  *
228  * Return 0 on success, -1 otherwise.
229  */
230 
231 static int
232 netmap_mmap(__unused struct cdev *dev,
233 #if __FreeBSD_version < 900000
234 		vm_offset_t offset, vm_paddr_t *paddr, int nprot
235 #else
236 		vm_ooffset_t offset, vm_paddr_t *paddr, int nprot,
237 		__unused vm_memattr_t *memattr
238 #endif
239 	)
240 {
241 	if (nprot & PROT_EXEC)
242 		return (-1);	// XXX -1 or EINVAL ?
243 
244 	ND("request for offset 0x%x", (uint32_t)offset);
245 	*paddr = netmap_ofstophys(offset);
246 
247 	return (0);
248 }
249 
250 
251 /*
252  * Handlers for synchronization of the queues from/to the host.
253  *
254  * netmap_sync_to_host() passes packets up. We are called from a
255  * system call in user process context, and the only contention
256  * can be among multiple user threads erroneously calling
257  * this routine concurrently. In principle we should not even
258  * need to lock.
259  */
260 static void
261 netmap_sync_to_host(struct netmap_adapter *na)
262 {
263 	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
264 	struct netmap_ring *ring = kring->ring;
265 	struct mbuf *head = NULL, *tail = NULL, *m;
266 	u_int k, n, lim = kring->nkr_num_slots - 1;
267 
268 	k = ring->cur;
269 	if (k > lim) {
270 		netmap_ring_reinit(kring);
271 		return;
272 	}
273 	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
274 
275 	/* Take packets from hwcur to cur and pass them up.
276 	 * In case of no buffers we give up. At the end of the loop,
277 	 * the queue is drained in all cases.
278 	 */
279 	for (n = kring->nr_hwcur; n != k;) {
280 		struct netmap_slot *slot = &ring->slot[n];
281 
282 		n = (n == lim) ? 0 : n + 1;
283 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
284 			D("bad pkt at %d len %d", n, slot->len);
285 			continue;
286 		}
287 		m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
288 
289 		if (m == NULL)
290 			break;
291 		if (tail)
292 			tail->m_nextpkt = m;
293 		else
294 			head = m;
295 		tail = m;
296 		m->m_nextpkt = NULL;
297 	}
298 	kring->nr_hwcur = k;
299 	kring->nr_hwavail = ring->avail = lim;
300 	// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
301 
302 	/* send packets up, outside the lock */
303 	while ((m = head) != NULL) {
304 		head = head->m_nextpkt;
305 		m->m_nextpkt = NULL;
306 		if (netmap_verbose & NM_VERB_HOST)
307 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
308 		NM_SEND_UP(na->ifp, m);
309 	}
310 }
311 
312 /*
313  * rxsync backend for packets coming from the host stack.
314  * They have been put in the queue by netmap_start() so we
315  * need to protect access to the kring using a lock.
316  *
317  * This routine also does the selrecord if called from the poll handler
318  * (we know because td != NULL).
319  */
320 static void
321 netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
322 {
323 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
324 	struct netmap_ring *ring = kring->ring;
325 	u_int j, n, lim = kring->nkr_num_slots;
326 	u_int k = ring->cur, resvd = ring->reserved;
327 
328 	na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
329 	if (k >= lim) {
330 		netmap_ring_reinit(kring);
331 		return;
332 	}
333 	/* new packets are already set in nr_hwavail */
334 	/* skip past packets that userspace has released */
335 	j = kring->nr_hwcur;
336 	if (resvd > 0) {
337 		if (resvd + ring->avail >= lim + 1) {
338 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
339 			ring->reserved = resvd = 0; // XXX panic...
340 		}
341 		k = (k >= resvd) ? k - resvd : k + lim - resvd;
342         }
343 	if (j != k) {
344 		n = k >= j ? k - j : k + lim - j;
345 		kring->nr_hwavail -= n;
346 		kring->nr_hwcur = k;
347 	}
348 	k = ring->avail = kring->nr_hwavail - resvd;
349 	if (k == 0 && td)
350 		selrecord(td, &kring->si);
351 	if (k && (netmap_verbose & NM_VERB_HOST))
352 		D("%d pkts from stack", k);
353 	na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
354 }
355 
356 
357 /*
358  * get a refcounted reference to an interface.
359  * Return ENXIO if the interface does not exist, EINVAL if netmap
360  * is not supported by the interface.
361  * If successful, hold a reference.
362  */
363 static int
364 get_ifp(const char *name, struct ifnet **ifp)
365 {
366 	*ifp = ifunit_ref(name);
367 	if (*ifp == NULL)
368 		return (ENXIO);
369 	/* can do this if the capability exists and if_pspare[0]
370 	 * points to the netmap descriptor.
371 	 */
372 	if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
373 		return 0;	/* valid pointer, we hold the refcount */
374 	if_rele(*ifp);
375 	return EINVAL;	// not NETMAP capable
376 }
377 
378 
379 /*
380  * Error routine called when txsync/rxsync detects an error.
381  * Can't do much more than resetting cur = hwcur, avail = hwavail.
382  * Return 1 on reinit.
383  *
384  * This routine is only called by the upper half of the kernel.
385  * It only reads hwcur (which is changed only by the upper half, too)
386  * and hwavail (which may be changed by the lower half, but only on
387  * a tx ring and only to increase it, so any error will be recovered
388  * on the next call). For the above, we don't strictly need to call
389  * it under lock.
390  */
391 int
392 netmap_ring_reinit(struct netmap_kring *kring)
393 {
394 	struct netmap_ring *ring = kring->ring;
395 	u_int i, lim = kring->nkr_num_slots - 1;
396 	int errors = 0;
397 
398 	D("called for %s", kring->na->ifp->if_xname);
399 	if (ring->cur > lim)
400 		errors++;
401 	for (i = 0; i <= lim; i++) {
402 		u_int idx = ring->slot[i].buf_idx;
403 		u_int len = ring->slot[i].len;
404 		if (idx < 2 || idx >= netmap_total_buffers) {
405 			if (!errors++)
406 				D("bad buffer at slot %d idx %d len %d ", i, idx, len);
407 			ring->slot[i].buf_idx = 0;
408 			ring->slot[i].len = 0;
409 		} else if (len > NETMAP_BUF_SIZE) {
410 			ring->slot[i].len = 0;
411 			if (!errors++)
412 				D("bad len %d at slot %d idx %d",
413 					len, i, idx);
414 		}
415 	}
416 	if (errors) {
417 		int pos = kring - kring->na->tx_rings;
418 		int n = kring->na->num_tx_rings + 1;
419 
420 		D("total %d errors", errors);
421 		errors++;
422 		D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
423 			kring->na->ifp->if_xname,
424 			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
425 			ring->cur, kring->nr_hwcur,
426 			ring->avail, kring->nr_hwavail);
427 		ring->cur = kring->nr_hwcur;
428 		ring->avail = kring->nr_hwavail;
429 	}
430 	return (errors ? 1 : 0);
431 }
432 
433 
434 /*
435  * Set the ring ID. For devices with a single queue, a request
436  * for all rings is the same as a single ring.
437  */
438 static int
439 netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
440 {
441 	struct ifnet *ifp = priv->np_ifp;
442 	struct netmap_adapter *na = NA(ifp);
443 	u_int i = ringid & NETMAP_RING_MASK;
444 	/* initially (np_qfirst == np_qlast) we don't want to lock */
445 	int need_lock = (priv->np_qfirst != priv->np_qlast);
446 	int lim = na->num_rx_rings;
447 
448 	if (na->num_tx_rings > lim)
449 		lim = na->num_tx_rings;
450 	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
451 		D("invalid ring id %d", i);
452 		return (EINVAL);
453 	}
454 	if (need_lock)
455 		na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
456 	priv->np_ringid = ringid;
457 	if (ringid & NETMAP_SW_RING) {
458 		priv->np_qfirst = NETMAP_SW_RING;
459 		priv->np_qlast = 0;
460 	} else if (ringid & NETMAP_HW_RING) {
461 		priv->np_qfirst = i;
462 		priv->np_qlast = i + 1;
463 	} else {
464 		priv->np_qfirst = 0;
465 		priv->np_qlast = NETMAP_HW_RING ;
466 	}
467 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
468 	if (need_lock)
469 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
470 	if (ringid & NETMAP_SW_RING)
471 		D("ringid %s set to SW RING", ifp->if_xname);
472 	else if (ringid & NETMAP_HW_RING)
473 		D("ringid %s set to HW RING %d", ifp->if_xname,
474 			priv->np_qfirst);
475 	else
476 		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
477 	return 0;
478 }
479 
480 /*
481  * ioctl(2) support for the "netmap" device.
482  *
483  * Following a list of accepted commands:
484  * - NIOCGINFO
485  * - SIOCGIFADDR	just for convenience
486  * - NIOCREGIF
487  * - NIOCUNREGIF
488  * - NIOCTXSYNC
489  * - NIOCRXSYNC
490  *
491  * Return 0 on success, errno otherwise.
492  */
493 static int
494 netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
495 	__unused int fflag, struct thread *td)
496 {
497 	struct netmap_priv_d *priv = NULL;
498 	struct ifnet *ifp;
499 	struct nmreq *nmr = (struct nmreq *) data;
500 	struct netmap_adapter *na;
501 	int error;
502 	u_int i, lim;
503 	struct netmap_if *nifp;
504 
505 	CURVNET_SET(TD_TO_VNET(td));
506 
507 	error = devfs_get_cdevpriv((void **)&priv);
508 	if (error != ENOENT && error != 0) {
509 		CURVNET_RESTORE();
510 		return (error);
511 	}
512 
513 	error = 0;	/* Could be ENOENT */
514 	switch (cmd) {
515 	case NIOCGINFO:		/* return capabilities etc */
516 		/* memsize is always valid */
517 		nmr->nr_memsize = nm_mem->nm_totalsize;
518 		nmr->nr_offset = 0;
519 		nmr->nr_rx_rings = nmr->nr_tx_rings = 0;
520 		nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
521 		if (nmr->nr_version != NETMAP_API) {
522 			D("API mismatch got %d have %d",
523 				nmr->nr_version, NETMAP_API);
524 			nmr->nr_version = NETMAP_API;
525 			error = EINVAL;
526 			break;
527 		}
528 		if (nmr->nr_name[0] == '\0')	/* just get memory info */
529 			break;
530 		error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */
531 		if (error)
532 			break;
533 		na = NA(ifp); /* retrieve netmap_adapter */
534 		nmr->nr_rx_rings = na->num_rx_rings;
535 		nmr->nr_tx_rings = na->num_tx_rings;
536 		nmr->nr_rx_slots = na->num_rx_desc;
537 		nmr->nr_tx_slots = na->num_tx_desc;
538 		if_rele(ifp);	/* return the refcount */
539 		break;
540 
541 	case NIOCREGIF:
542 		if (nmr->nr_version != NETMAP_API) {
543 			nmr->nr_version = NETMAP_API;
544 			error = EINVAL;
545 			break;
546 		}
547 		if (priv != NULL) {	/* thread already registered */
548 			error = netmap_set_ringid(priv, nmr->nr_ringid);
549 			break;
550 		}
551 		/* find the interface and a reference */
552 		error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
553 		if (error)
554 			break;
555 		na = NA(ifp); /* retrieve netmap adapter */
556 		/*
557 		 * Allocate the private per-thread structure.
558 		 * XXX perhaps we can use a blocking malloc ?
559 		 */
560 		priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
561 			      M_NOWAIT | M_ZERO);
562 		if (priv == NULL) {
563 			error = ENOMEM;
564 			if_rele(ifp);   /* return the refcount */
565 			break;
566 		}
567 
568 		for (i = 10; i > 0; i--) {
569 			na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
570 			if (!NETMAP_DELETING(na))
571 				break;
572 			na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
573 			tsleep(na, 0, "NIOCREGIF", hz/10);
574 		}
575 		if (i == 0) {
576 			D("too many NIOCREGIF attempts, give up");
577 			error = EINVAL;
578 			free(priv, M_DEVBUF);
579 			if_rele(ifp);	/* return the refcount */
580 			break;
581 		}
582 
583 		priv->np_ifp = ifp;	/* store the reference */
584 		error = netmap_set_ringid(priv, nmr->nr_ringid);
585 		if (error)
586 			goto error;
587 		priv->np_nifp = nifp = netmap_if_new(nmr->nr_name, na);
588 		if (nifp == NULL) { /* allocation failed */
589 			error = ENOMEM;
590 		} else if (ifp->if_capenable & IFCAP_NETMAP) {
591 			/* was already set */
592 		} else {
593 			/* Otherwise set the card in netmap mode
594 			 * and make it use the shared buffers.
595 			 */
596 			error = na->nm_register(ifp, 1); /* mode on */
597 			if (error)
598 				netmap_dtor_locked(priv);
599 		}
600 
601 		if (error) {	/* reg. failed, release priv and ref */
602 error:
603 			na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
604 			if_rele(ifp);	/* return the refcount */
605 			bzero(priv, sizeof(*priv));
606 			free(priv, M_DEVBUF);
607 			break;
608 		}
609 
610 		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
611 		error = devfs_set_cdevpriv(priv, netmap_dtor);
612 
613 		if (error != 0) {
614 			/* could not assign the private storage for the
615 			 * thread, call the destructor explicitly.
616 			 */
617 			netmap_dtor(priv);
618 			break;
619 		}
620 
621 		/* return the offset of the netmap_if object */
622 		nmr->nr_rx_rings = na->num_rx_rings;
623 		nmr->nr_tx_rings = na->num_tx_rings;
624 		nmr->nr_rx_slots = na->num_rx_desc;
625 		nmr->nr_tx_slots = na->num_tx_desc;
626 		nmr->nr_memsize = nm_mem->nm_totalsize;
627 		nmr->nr_offset = netmap_if_offset(nifp);
628 		break;
629 
630 	case NIOCUNREGIF:
631 		if (priv == NULL) {
632 			error = ENXIO;
633 			break;
634 		}
635 
636 		/* the interface is unregistered inside the
637 		   destructor of the private data. */
638 		devfs_clear_cdevpriv();
639 		break;
640 
641 	case NIOCTXSYNC:
642         case NIOCRXSYNC:
643 		if (priv == NULL) {
644 			error = ENXIO;
645 			break;
646 		}
647 		ifp = priv->np_ifp;	/* we have a reference */
648 		na = NA(ifp); /* retrieve netmap adapter */
649 		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
650 			if (cmd == NIOCTXSYNC)
651 				netmap_sync_to_host(na);
652 			else
653 				netmap_sync_from_host(na, NULL);
654 			break;
655 		}
656 		/* find the last ring to scan */
657 		lim = priv->np_qlast;
658 		if (lim == NETMAP_HW_RING)
659 			lim = (cmd == NIOCTXSYNC) ?
660 			    na->num_tx_rings : na->num_rx_rings;
661 
662 		for (i = priv->np_qfirst; i < lim; i++) {
663 			if (cmd == NIOCTXSYNC) {
664 				struct netmap_kring *kring = &na->tx_rings[i];
665 				if (netmap_verbose & NM_VERB_TXSYNC)
666 					D("pre txsync ring %d cur %d hwcur %d",
667 					    i, kring->ring->cur,
668 					    kring->nr_hwcur);
669 				na->nm_txsync(ifp, i, 1 /* do lock */);
670 				if (netmap_verbose & NM_VERB_TXSYNC)
671 					D("post txsync ring %d cur %d hwcur %d",
672 					    i, kring->ring->cur,
673 					    kring->nr_hwcur);
674 			} else {
675 				na->nm_rxsync(ifp, i, 1 /* do lock */);
676 				microtime(&na->rx_rings[i].ring->ts);
677 			}
678 		}
679 
680 		break;
681 
682 	case BIOCIMMEDIATE:
683 	case BIOCGHDRCMPLT:
684 	case BIOCSHDRCMPLT:
685 	case BIOCSSEESENT:
686 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
687 		break;
688 
689 	default:	/* allow device-specific ioctls */
690 	    {
691 		struct socket so;
692 		bzero(&so, sizeof(so));
693 		error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
694 		if (error)
695 			break;
696 		so.so_vnet = ifp->if_vnet;
697 		// so->so_proto not null.
698 		error = ifioctl(&so, cmd, data, td);
699 		if_rele(ifp);
700 		break;
701 	    }
702 	}
703 
704 	CURVNET_RESTORE();
705 	return (error);
706 }
707 
708 
709 /*
710  * select(2) and poll(2) handlers for the "netmap" device.
711  *
712  * Can be called for one or more queues.
713  * Return true the event mask corresponding to ready events.
714  * If there are no ready events, do a selrecord on either individual
715  * selfd or on the global one.
716  * Device-dependent parts (locking and sync of tx/rx rings)
717  * are done through callbacks.
718  */
719 static int
720 netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
721 {
722 	struct netmap_priv_d *priv = NULL;
723 	struct netmap_adapter *na;
724 	struct ifnet *ifp;
725 	struct netmap_kring *kring;
726 	u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
727 	u_int lim_tx, lim_rx;
728 	enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
729 
730 	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
731 		return POLLERR;
732 
733 	ifp = priv->np_ifp;
734 	// XXX check for deleting() ?
735 	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
736 		return POLLERR;
737 
738 	if (netmap_verbose & 0x8000)
739 		D("device %s events 0x%x", ifp->if_xname, events);
740 	want_tx = events & (POLLOUT | POLLWRNORM);
741 	want_rx = events & (POLLIN | POLLRDNORM);
742 
743 	na = NA(ifp); /* retrieve netmap adapter */
744 
745 	lim_tx = na->num_tx_rings;
746 	lim_rx = na->num_rx_rings;
747 	/* how many queues we are scanning */
748 	if (priv->np_qfirst == NETMAP_SW_RING) {
749 		if (priv->np_txpoll || want_tx) {
750 			/* push any packets up, then we are always ready */
751 			kring = &na->tx_rings[lim_tx];
752 			netmap_sync_to_host(na);
753 			revents |= want_tx;
754 		}
755 		if (want_rx) {
756 			kring = &na->rx_rings[lim_rx];
757 			if (kring->ring->avail == 0)
758 				netmap_sync_from_host(na, td);
759 			if (kring->ring->avail > 0) {
760 				revents |= want_rx;
761 			}
762 		}
763 		return (revents);
764 	}
765 
766 	/*
767 	 * check_all is set if the card has more than one queue and
768 	 * the client is polling all of them. If true, we sleep on
769 	 * the "global" selfd, otherwise we sleep on individual selfd
770 	 * (we can only sleep on one of them per direction).
771 	 * The interrupt routine in the driver should always wake on
772 	 * the individual selfd, and also on the global one if the card
773 	 * has more than one ring.
774 	 *
775 	 * If the card has only one lock, we just use that.
776 	 * If the card has separate ring locks, we just use those
777 	 * unless we are doing check_all, in which case the whole
778 	 * loop is wrapped by the global lock.
779 	 * We acquire locks only when necessary: if poll is called
780 	 * when buffers are available, we can just return without locks.
781 	 *
782 	 * rxsync() is only called if we run out of buffers on a POLLIN.
783 	 * txsync() is called if we run out of buffers on POLLOUT, or
784 	 * there are pending packets to send. The latter can be disabled
785 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
786 	 */
787 	check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
788 
789 	/*
790 	 * core_lock indicates what to do with the core lock.
791 	 * The core lock is used when either the card has no individual
792 	 * locks, or it has individual locks but we are cheking all
793 	 * rings so we need the core lock to avoid missing wakeup events.
794 	 *
795 	 * It has three possible states:
796 	 * NO_CL	we don't need to use the core lock, e.g.
797 	 *		because we are protected by individual locks.
798 	 * NEED_CL	we need the core lock. In this case, when we
799 	 *		call the lock routine, move to LOCKED_CL
800 	 *		to remember to release the lock once done.
801 	 * LOCKED_CL	core lock is set, so we need to release it.
802 	 */
803 	core_lock = (check_all || !na->separate_locks) ? NEED_CL : NO_CL;
804 	if (priv->np_qlast != NETMAP_HW_RING) {
805 		lim_tx = lim_rx = priv->np_qlast;
806 	}
807 
808 	/*
809 	 * We start with a lock free round which is good if we have
810 	 * data available. If this fails, then lock and call the sync
811 	 * routines.
812 	 */
813 	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
814 		kring = &na->rx_rings[i];
815 		if (kring->ring->avail > 0) {
816 			revents |= want_rx;
817 			want_rx = 0;	/* also breaks the loop */
818 		}
819 	}
820 	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
821 		kring = &na->tx_rings[i];
822 		if (kring->ring->avail > 0) {
823 			revents |= want_tx;
824 			want_tx = 0;	/* also breaks the loop */
825 		}
826 	}
827 
828 	/*
829 	 * If we to push packets out (priv->np_txpoll) or want_tx is
830 	 * still set, we do need to run the txsync calls (on all rings,
831 	 * to avoid that the tx rings stall).
832 	 */
833 	if (priv->np_txpoll || want_tx) {
834 		for (i = priv->np_qfirst; i < lim_tx; i++) {
835 			kring = &na->tx_rings[i];
836 			/*
837 			 * Skip the current ring if want_tx == 0
838 			 * (we have already done a successful sync on
839 			 * a previous ring) AND kring->cur == kring->hwcur
840 			 * (there are no pending transmissions for this ring).
841 			 */
842 			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
843 				continue;
844 			if (core_lock == NEED_CL) {
845 				na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
846 				core_lock = LOCKED_CL;
847 			}
848 			if (na->separate_locks)
849 				na->nm_lock(ifp, NETMAP_TX_LOCK, i);
850 			if (netmap_verbose & NM_VERB_TXSYNC)
851 				D("send %d on %s %d",
852 					kring->ring->cur,
853 					ifp->if_xname, i);
854 			if (na->nm_txsync(ifp, i, 0 /* no lock */))
855 				revents |= POLLERR;
856 
857 			/* Check avail/call selrecord only if called with POLLOUT */
858 			if (want_tx) {
859 				if (kring->ring->avail > 0) {
860 					/* stop at the first ring. We don't risk
861 					 * starvation.
862 					 */
863 					revents |= want_tx;
864 					want_tx = 0;
865 				} else if (!check_all)
866 					selrecord(td, &kring->si);
867 			}
868 			if (na->separate_locks)
869 				na->nm_lock(ifp, NETMAP_TX_UNLOCK, i);
870 		}
871 	}
872 
873 	/*
874 	 * now if want_rx is still set we need to lock and rxsync.
875 	 * Do it on all rings because otherwise we starve.
876 	 */
877 	if (want_rx) {
878 		for (i = priv->np_qfirst; i < lim_rx; i++) {
879 			kring = &na->rx_rings[i];
880 			if (core_lock == NEED_CL) {
881 				na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
882 				core_lock = LOCKED_CL;
883 			}
884 			if (na->separate_locks)
885 				na->nm_lock(ifp, NETMAP_RX_LOCK, i);
886 
887 			if (na->nm_rxsync(ifp, i, 0 /* no lock */))
888 				revents |= POLLERR;
889 			if (netmap_no_timestamp == 0 ||
890 					kring->ring->flags & NR_TIMESTAMP) {
891 				microtime(&kring->ring->ts);
892 			}
893 
894 			if (kring->ring->avail > 0)
895 				revents |= want_rx;
896 			else if (!check_all)
897 				selrecord(td, &kring->si);
898 			if (na->separate_locks)
899 				na->nm_lock(ifp, NETMAP_RX_UNLOCK, i);
900 		}
901 	}
902 	if (check_all && revents == 0) { /* signal on the global queue */
903 		if (want_tx)
904 			selrecord(td, &na->tx_si);
905 		if (want_rx)
906 			selrecord(td, &na->rx_si);
907 	}
908 	if (core_lock == LOCKED_CL)
909 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
910 
911 	return (revents);
912 }
913 
914 /*------- driver support routines ------*/
915 
916 /*
917  * default lock wrapper.
918  */
919 static void
920 netmap_lock_wrapper(struct ifnet *dev, int what, u_int queueid)
921 {
922 	struct netmap_adapter *na = NA(dev);
923 
924 	switch (what) {
925 #ifdef linux	/* some system do not need lock on register */
926 	case NETMAP_REG_LOCK:
927 	case NETMAP_REG_UNLOCK:
928 		break;
929 #endif /* linux */
930 
931 	case NETMAP_CORE_LOCK:
932 		mtx_lock(&na->core_lock);
933 		break;
934 
935 	case NETMAP_CORE_UNLOCK:
936 		mtx_unlock(&na->core_lock);
937 		break;
938 
939 	case NETMAP_TX_LOCK:
940 		mtx_lock(&na->tx_rings[queueid].q_lock);
941 		break;
942 
943 	case NETMAP_TX_UNLOCK:
944 		mtx_unlock(&na->tx_rings[queueid].q_lock);
945 		break;
946 
947 	case NETMAP_RX_LOCK:
948 		mtx_lock(&na->rx_rings[queueid].q_lock);
949 		break;
950 
951 	case NETMAP_RX_UNLOCK:
952 		mtx_unlock(&na->rx_rings[queueid].q_lock);
953 		break;
954 	}
955 }
956 
957 
958 /*
959  * Initialize a ``netmap_adapter`` object created by driver on attach.
960  * We allocate a block of memory with room for a struct netmap_adapter
961  * plus two sets of N+2 struct netmap_kring (where N is the number
962  * of hardware rings):
963  * krings	0..N-1	are for the hardware queues.
964  * kring	N	is for the host stack queue
965  * kring	N+1	is only used for the selinfo for all queues.
966  * Return 0 on success, ENOMEM otherwise.
967  *
968  * na->num_tx_rings can be set for cards with different tx/rx setups
969  */
970 int
971 netmap_attach(struct netmap_adapter *na, int num_queues)
972 {
973 	int i, n, size;
974 	void *buf;
975 	struct ifnet *ifp = na->ifp;
976 
977 	if (ifp == NULL) {
978 		D("ifp not set, giving up");
979 		return EINVAL;
980 	}
981 	/* clear other fields ? */
982 	na->refcount = 0;
983 	if (na->num_tx_rings == 0)
984 		na->num_tx_rings = num_queues;
985 	na->num_rx_rings = num_queues;
986 	/* on each direction we have N+1 resources
987 	 * 0..n-1	are the hardware rings
988 	 * n		is the ring attached to the stack.
989 	 */
990 	n = na->num_rx_rings + na->num_tx_rings + 2;
991 	size = sizeof(*na) + n * sizeof(struct netmap_kring);
992 
993 	buf = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
994 	if (buf) {
995 		WNA(ifp) = buf;
996 		na->tx_rings = (void *)((char *)buf + sizeof(*na));
997 		na->rx_rings = na->tx_rings + na->num_tx_rings + 1;
998 		bcopy(na, buf, sizeof(*na));
999 		ifp->if_capabilities |= IFCAP_NETMAP;
1000 
1001 		na = buf;
1002 		if (na->nm_lock == NULL)
1003 			na->nm_lock = netmap_lock_wrapper;
1004 		mtx_init(&na->core_lock, "netmap core lock", NULL, MTX_DEF);
1005 		for (i = 0 ; i < na->num_tx_rings + 1; i++)
1006 			mtx_init(&na->tx_rings[i].q_lock, "netmap txq lock", NULL, MTX_DEF);
1007 		for (i = 0 ; i < na->num_rx_rings + 1; i++)
1008 			mtx_init(&na->rx_rings[i].q_lock, "netmap rxq lock", NULL, MTX_DEF);
1009 	}
1010 #ifdef linux
1011 	D("netdev_ops %p", ifp->netdev_ops);
1012 	/* prepare a clone of the netdev ops */
1013 	na->nm_ndo = *ifp->netdev_ops;
1014 	na->nm_ndo.ndo_start_xmit = netmap_start_linux;
1015 #endif
1016 	D("%s for %s", buf ? "ok" : "failed", ifp->if_xname);
1017 
1018 	return (buf ? 0 : ENOMEM);
1019 }
1020 
1021 
1022 /*
1023  * Free the allocated memory linked to the given ``netmap_adapter``
1024  * object.
1025  */
1026 void
1027 netmap_detach(struct ifnet *ifp)
1028 {
1029 	u_int i;
1030 	struct netmap_adapter *na = NA(ifp);
1031 
1032 	if (!na)
1033 		return;
1034 
1035 	for (i = 0; i < na->num_tx_rings + 1; i++) {
1036 		knlist_destroy(&na->tx_rings[i].si.si_note);
1037 		mtx_destroy(&na->tx_rings[i].q_lock);
1038 	}
1039 	for (i = 0; i < na->num_rx_rings + 1; i++) {
1040 		knlist_destroy(&na->rx_rings[i].si.si_note);
1041 		mtx_destroy(&na->rx_rings[i].q_lock);
1042 	}
1043 	knlist_destroy(&na->tx_si.si_note);
1044 	knlist_destroy(&na->rx_si.si_note);
1045 	bzero(na, sizeof(*na));
1046 	WNA(ifp) = NULL;
1047 	free(na, M_DEVBUF);
1048 }
1049 
1050 
1051 /*
1052  * Intercept packets from the network stack and pass them
1053  * to netmap as incoming packets on the 'software' ring.
1054  * We are not locked when called.
1055  */
1056 int
1057 netmap_start(struct ifnet *ifp, struct mbuf *m)
1058 {
1059 	struct netmap_adapter *na = NA(ifp);
1060 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1061 	u_int i, len = MBUF_LEN(m);
1062 	int error = EBUSY, lim = kring->nkr_num_slots - 1;
1063 	struct netmap_slot *slot;
1064 
1065 	if (netmap_verbose & NM_VERB_HOST)
1066 		D("%s packet %d len %d from the stack", ifp->if_xname,
1067 			kring->nr_hwcur + kring->nr_hwavail, len);
1068 	na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
1069 	if (kring->nr_hwavail >= lim) {
1070 		if (netmap_verbose)
1071 			D("stack ring %s full\n", ifp->if_xname);
1072 		goto done;	/* no space */
1073 	}
1074 	if (len > NETMAP_BUF_SIZE) {
1075 		D("drop packet size %d > %d", len, NETMAP_BUF_SIZE);
1076 		goto done;	/* too long for us */
1077 	}
1078 
1079 	/* compute the insert position */
1080 	i = kring->nr_hwcur + kring->nr_hwavail;
1081 	if (i > lim)
1082 		i -= lim + 1;
1083 	slot = &kring->ring->slot[i];
1084 	m_copydata(m, 0, len, NMB(slot));
1085 	slot->len = len;
1086 	kring->nr_hwavail++;
1087 	if (netmap_verbose  & NM_VERB_HOST)
1088 		D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
1089 	selwakeuppri(&kring->si, PI_NET);
1090 	error = 0;
1091 done:
1092 	na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
1093 
1094 	/* release the mbuf in either cases of success or failure. As an
1095 	 * alternative, put the mbuf in a free list and free the list
1096 	 * only when really necessary.
1097 	 */
1098 	m_freem(m);
1099 
1100 	return (error);
1101 }
1102 
1103 
1104 /*
1105  * netmap_reset() is called by the driver routines when reinitializing
1106  * a ring. The driver is in charge of locking to protect the kring.
1107  * If netmap mode is not set just return NULL.
1108  */
1109 struct netmap_slot *
1110 netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
1111 	u_int new_cur)
1112 {
1113 	struct netmap_kring *kring;
1114 	int new_hwofs, lim;
1115 
1116 	if (na == NULL)
1117 		return NULL;	/* no netmap support here */
1118 	if (!(na->ifp->if_capenable & IFCAP_NETMAP))
1119 		return NULL;	/* nothing to reinitialize */
1120 
1121 	if (tx == NR_TX) {
1122 		kring = na->tx_rings + n;
1123 		new_hwofs = kring->nr_hwcur - new_cur;
1124 	} else {
1125 		kring = na->rx_rings + n;
1126 		new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
1127 	}
1128 	lim = kring->nkr_num_slots - 1;
1129 	if (new_hwofs > lim)
1130 		new_hwofs -= lim + 1;
1131 
1132 	/* Alwayws set the new offset value and realign the ring. */
1133 	kring->nkr_hwofs = new_hwofs;
1134 	if (tx == NR_TX)
1135 		kring->nr_hwavail = kring->nkr_num_slots - 1;
1136 	D("new hwofs %d on %s %s[%d]",
1137 			kring->nkr_hwofs, na->ifp->if_xname,
1138 			tx == NR_TX ? "TX" : "RX", n);
1139 
1140 	/*
1141 	 * Wakeup on the individual and global lock
1142 	 * We do the wakeup here, but the ring is not yet reconfigured.
1143 	 * However, we are under lock so there are no races.
1144 	 */
1145 	selwakeuppri(&kring->si, PI_NET);
1146 	selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
1147 	return kring->ring->slot;
1148 }
1149 
1150 
1151 /*
1152  * Default functions to handle rx/tx interrupts
1153  * we have 4 cases:
1154  * 1 ring, single lock:
1155  *	lock(core); wake(i=0); unlock(core)
1156  * N rings, single lock:
1157  *	lock(core); wake(i); wake(N+1) unlock(core)
1158  * 1 ring, separate locks: (i=0)
1159  *	lock(i); wake(i); unlock(i)
1160  * N rings, separate locks:
1161  *	lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core)
1162  * work_done is non-null on the RX path.
1163  */
1164 int
1165 netmap_rx_irq(struct ifnet *ifp, int q, int *work_done)
1166 {
1167 	struct netmap_adapter *na;
1168 	struct netmap_kring *r;
1169 	NM_SELINFO_T *main_wq;
1170 
1171 	if (!(ifp->if_capenable & IFCAP_NETMAP))
1172 		return 0;
1173 	na = NA(ifp);
1174 	if (work_done) { /* RX path */
1175 		r = na->rx_rings + q;
1176 		r->nr_kflags |= NKR_PENDINTR;
1177 		main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL;
1178 	} else { /* tx path */
1179 		r = na->tx_rings + q;
1180 		main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL;
1181 		work_done = &q; /* dummy */
1182 	}
1183 	if (na->separate_locks) {
1184 		mtx_lock(&r->q_lock);
1185 		selwakeuppri(&r->si, PI_NET);
1186 		mtx_unlock(&r->q_lock);
1187 		if (main_wq) {
1188 			mtx_lock(&na->core_lock);
1189 			selwakeuppri(main_wq, PI_NET);
1190 			mtx_unlock(&na->core_lock);
1191 		}
1192 	} else {
1193 		mtx_lock(&na->core_lock);
1194 		selwakeuppri(&r->si, PI_NET);
1195 		if (main_wq)
1196 			selwakeuppri(main_wq, PI_NET);
1197 		mtx_unlock(&na->core_lock);
1198 	}
1199 	*work_done = 1; /* do not fire napi again */
1200 	return 1;
1201 }
1202 
1203 
1204 static struct cdevsw netmap_cdevsw = {
1205 	.d_version = D_VERSION,
1206 	.d_name = "netmap",
1207 	.d_mmap = netmap_mmap,
1208 	.d_ioctl = netmap_ioctl,
1209 	.d_poll = netmap_poll,
1210 };
1211 
1212 
1213 static struct cdev *netmap_dev; /* /dev/netmap character device. */
1214 
1215 
1216 /*
1217  * Module loader.
1218  *
1219  * Create the /dev/netmap device and initialize all global
1220  * variables.
1221  *
1222  * Return 0 on success, errno on failure.
1223  */
1224 static int
1225 netmap_init(void)
1226 {
1227 	int error;
1228 
1229 	error = netmap_memory_init();
1230 	if (error != 0) {
1231 		printf("netmap: unable to initialize the memory allocator.");
1232 		return (error);
1233 	}
1234 	printf("netmap: loaded module with %d Mbytes\n",
1235 		(int)(nm_mem->nm_totalsize >> 20));
1236 	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
1237 			      "netmap");
1238 	return (error);
1239 }
1240 
1241 
1242 /*
1243  * Module unloader.
1244  *
1245  * Free all the memory, and destroy the ``/dev/netmap`` device.
1246  */
1247 static void
1248 netmap_fini(void)
1249 {
1250 	destroy_dev(netmap_dev);
1251 	netmap_memory_fini();
1252 	printf("netmap: unloaded module.\n");
1253 }
1254 
1255 
1256 /*
1257  * Kernel entry point.
1258  *
1259  * Initialize/finalize the module and return.
1260  *
1261  * Return 0 on success, errno on failure.
1262  */
1263 static int
1264 netmap_loader(__unused struct module *module, int event, __unused void *arg)
1265 {
1266 	int error = 0;
1267 
1268 	switch (event) {
1269 	case MOD_LOAD:
1270 		error = netmap_init();
1271 		break;
1272 
1273 	case MOD_UNLOAD:
1274 		netmap_fini();
1275 		break;
1276 
1277 	default:
1278 		error = EOPNOTSUPP;
1279 		break;
1280 	}
1281 
1282 	return (error);
1283 }
1284 
1285 
1286 DEV_MODULE(netmap, netmap_loader, NULL);
1287