xref: /freebsd/sys/dev/netmap/netmap.c (revision 5ab0d24d489e9451e3467e43965d21d5e5a755aa)
168b8534bSLuigi Rizzo /*
2849bec0eSLuigi Rizzo  * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
368b8534bSLuigi Rizzo  *
468b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
568b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
668b8534bSLuigi Rizzo  * are met:
768b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
868b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
968b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1068b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1168b8534bSLuigi Rizzo  *    documentation and/or other materials provided with the distribution.
1268b8534bSLuigi Rizzo  *
1368b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1468b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1568b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1668b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1768b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1868b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1968b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2068b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2168b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2268b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2368b8534bSLuigi Rizzo  * SUCH DAMAGE.
2468b8534bSLuigi Rizzo  */
2568b8534bSLuigi Rizzo 
26ce3ee1e7SLuigi Rizzo 
27ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__
28ce3ee1e7SLuigi Rizzo #define TEST_STUFF	// test code, does not compile yet on linux
29ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */
30f196ce38SLuigi Rizzo 
3168b8534bSLuigi Rizzo /*
3268b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3368b8534bSLuigi Rizzo  * see netmap(4).
3468b8534bSLuigi Rizzo  *
3568b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
3668b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
3768b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
3868b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
3968b8534bSLuigi Rizzo  *
4068b8534bSLuigi Rizzo  * Access to the network card works like this:
4168b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4268b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4368b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4468b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4568b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
4668b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
4768b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
4868b8534bSLuigi Rizzo  *    the shared memory region.
4968b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5068b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5168b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5268b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5368b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5468b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5568b8534bSLuigi Rizzo  *    packets on the output interface.
5668b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
5768b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
58ce3ee1e7SLuigi Rizzo  *
59ce3ee1e7SLuigi Rizzo 
60ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
61ce3ee1e7SLuigi Rizzo 
62ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
63ce3ee1e7SLuigi Rizzo user threads or even independent processes.
64ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
65ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
66ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
67ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
68ce3ee1e7SLuigi Rizzo invalid usage.
69ce3ee1e7SLuigi Rizzo 
70ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
71ce3ee1e7SLuigi Rizzo 
72ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
75ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
76ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
77ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
78ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
79ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
80ce3ee1e7SLuigi Rizzo 
81ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
82ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
83ce3ee1e7SLuigi Rizzo   For rings connected to user file
84ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
85ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
86ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
87ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
88ce3ee1e7SLuigi Rizzo   already guarantee this).
89ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
90ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
91ce3ee1e7SLuigi Rizzo   them out).
92ce3ee1e7SLuigi Rizzo 
93ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
94ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
95ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
96ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
97ce3ee1e7SLuigi Rizzo 
98ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
99ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
100ce3ee1e7SLuigi Rizzo 
101ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
102ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
103ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
104ce3ee1e7SLuigi Rizzo   only while they are down.
105ce3ee1e7SLuigi Rizzo 
106ce3ee1e7SLuigi Rizzo 
107ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
108ce3ee1e7SLuigi Rizzo 
109ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
110ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
111ce3ee1e7SLuigi Rizzo 
112ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
113ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
114ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
115ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
116ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
117ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
118ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
119ce3ee1e7SLuigi Rizzo 
120ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
121ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
122ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
123ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
124ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
125ce3ee1e7SLuigi Rizzo ports attached to the switch)
126ce3ee1e7SLuigi Rizzo 
12768b8534bSLuigi Rizzo  */
12868b8534bSLuigi Rizzo 
129ce3ee1e7SLuigi Rizzo /*
130ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
131ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
132ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
133ce3ee1e7SLuigi Rizzo  */
13401c7d25fSLuigi Rizzo 
135ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
13668b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
13768b8534bSLuigi Rizzo __FBSDID("$FreeBSD$");
13868b8534bSLuigi Rizzo 
13968b8534bSLuigi Rizzo #include <sys/types.h>
14068b8534bSLuigi Rizzo #include <sys/module.h>
14168b8534bSLuigi Rizzo #include <sys/errno.h>
14268b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
143506cc70cSLuigi Rizzo #include <sys/jail.h>
14468b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
14568b8534bSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct */
14668b8534bSLuigi Rizzo #include <sys/uio.h>	/* uio struct */
14768b8534bSLuigi Rizzo #include <sys/sockio.h>
14868b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
14968b8534bSLuigi Rizzo #include <sys/malloc.h>
15068b8534bSLuigi Rizzo #include <sys/mman.h>	/* PROT_EXEC */
15168b8534bSLuigi Rizzo #include <sys/poll.h>
152506cc70cSLuigi Rizzo #include <sys/proc.h>
15389f6b863SAttilio Rao #include <sys/rwlock.h>
15468b8534bSLuigi Rizzo #include <vm/vm.h>	/* vtophys */
15568b8534bSLuigi Rizzo #include <vm/pmap.h>	/* vtophys */
156ce3ee1e7SLuigi Rizzo #include <vm/vm_param.h>
157ce3ee1e7SLuigi Rizzo #include <vm/vm_object.h>
158ce3ee1e7SLuigi Rizzo #include <vm/vm_page.h>
159ce3ee1e7SLuigi Rizzo #include <vm/vm_pager.h>
160ce3ee1e7SLuigi Rizzo #include <vm/uma.h>
16168b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
16268b8534bSLuigi Rizzo #include <sys/selinfo.h>
16368b8534bSLuigi Rizzo #include <sys/sysctl.h>
16468b8534bSLuigi Rizzo #include <net/if.h>
16576039bc8SGleb Smirnoff #include <net/if_var.h>
16668b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
167506cc70cSLuigi Rizzo #include <net/vnet.h>
16868b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
169ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
170ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
17168b8534bSLuigi Rizzo 
172ce3ee1e7SLuigi Rizzo #define prefetch(x)	__builtin_prefetch(x)
17368b8534bSLuigi Rizzo 
174ce3ee1e7SLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
175ce3ee1e7SLuigi Rizzo 
176ce3ee1e7SLuigi Rizzo #define	BDG_RWINIT(b)		\
177ce3ee1e7SLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
178ce3ee1e7SLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
179ce3ee1e7SLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
180ce3ee1e7SLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
181ce3ee1e7SLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
182ce3ee1e7SLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
183ce3ee1e7SLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
184ce3ee1e7SLuigi Rizzo 
185ce3ee1e7SLuigi Rizzo 
186ce3ee1e7SLuigi Rizzo /* netmap global lock.
187ce3ee1e7SLuigi Rizzo  * normally called within the user thread (upon a system call)
188ce3ee1e7SLuigi Rizzo  * or when a file descriptor or process is terminated
189ce3ee1e7SLuigi Rizzo  * (last close or last munmap)
190ce3ee1e7SLuigi Rizzo  */
191ce3ee1e7SLuigi Rizzo 
192ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T		struct mtx
193ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT()		mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF)
194ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY()	mtx_destroy(&netmap_global_lock)
195ce3ee1e7SLuigi Rizzo #define NMG_LOCK()		mtx_lock(&netmap_global_lock)
196ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK()		mtx_unlock(&netmap_global_lock)
197ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT()	mtx_assert(&netmap_global_lock, MA_OWNED)
198ce3ee1e7SLuigi Rizzo 
199ce3ee1e7SLuigi Rizzo 
200ce3ee1e7SLuigi Rizzo /* atomic operations */
201ce3ee1e7SLuigi Rizzo #include <machine/atomic.h>
202ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_TEST_AND_SET(p)	(!atomic_cmpset_acq_int((p), 0, 1))
203ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_CLEAR(p)		atomic_store_rel_int((p), 0)
204ce3ee1e7SLuigi Rizzo 
205ce3ee1e7SLuigi Rizzo 
206ce3ee1e7SLuigi Rizzo #elif defined(linux)
207ce3ee1e7SLuigi Rizzo 
208ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
209ce3ee1e7SLuigi Rizzo 
210ce3ee1e7SLuigi Rizzo static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *);
211ce3ee1e7SLuigi Rizzo 
212ce3ee1e7SLuigi Rizzo static struct device_driver*
213ce3ee1e7SLuigi Rizzo linux_netmap_find_driver(struct device *dev)
214ce3ee1e7SLuigi Rizzo {
215ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
216ce3ee1e7SLuigi Rizzo 
217ce3ee1e7SLuigi Rizzo 	while ( (dd = dev->driver) == NULL ) {
218ce3ee1e7SLuigi Rizzo 		if ( (dev = dev->parent) == NULL )
219ce3ee1e7SLuigi Rizzo 			return NULL;
220ce3ee1e7SLuigi Rizzo 	}
221ce3ee1e7SLuigi Rizzo 	return dd;
222ce3ee1e7SLuigi Rizzo }
223ce3ee1e7SLuigi Rizzo 
224ce3ee1e7SLuigi Rizzo static struct net_device*
225ce3ee1e7SLuigi Rizzo ifunit_ref(const char *name)
226ce3ee1e7SLuigi Rizzo {
227ce3ee1e7SLuigi Rizzo 	struct net_device *ifp = dev_get_by_name(&init_net, name);
228ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
229ce3ee1e7SLuigi Rizzo 
230ce3ee1e7SLuigi Rizzo 	if (ifp == NULL)
231ce3ee1e7SLuigi Rizzo 		return NULL;
232ce3ee1e7SLuigi Rizzo 
233ce3ee1e7SLuigi Rizzo 	if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL )
234ce3ee1e7SLuigi Rizzo 		goto error;
235ce3ee1e7SLuigi Rizzo 
236ce3ee1e7SLuigi Rizzo 	if (!try_module_get(dd->owner))
237ce3ee1e7SLuigi Rizzo 		goto error;
238ce3ee1e7SLuigi Rizzo 
239ce3ee1e7SLuigi Rizzo 	return ifp;
240ce3ee1e7SLuigi Rizzo error:
241ce3ee1e7SLuigi Rizzo 	dev_put(ifp);
242ce3ee1e7SLuigi Rizzo 	return NULL;
243ce3ee1e7SLuigi Rizzo }
244ce3ee1e7SLuigi Rizzo 
245ce3ee1e7SLuigi Rizzo static void
246ce3ee1e7SLuigi Rizzo if_rele(struct net_device *ifp)
247ce3ee1e7SLuigi Rizzo {
248ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
249ce3ee1e7SLuigi Rizzo 	dd = linux_netmap_find_driver(&ifp->dev);
250ce3ee1e7SLuigi Rizzo 	dev_put(ifp);
251ce3ee1e7SLuigi Rizzo 	if (dd)
252ce3ee1e7SLuigi Rizzo 		module_put(dd->owner);
253ce3ee1e7SLuigi Rizzo }
254ce3ee1e7SLuigi Rizzo 
255ce3ee1e7SLuigi Rizzo // XXX a mtx would suffice here too 20130404 gl
256ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T		struct semaphore
257ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT()		sema_init(&netmap_global_lock, 1)
258ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY()
259ce3ee1e7SLuigi Rizzo #define NMG_LOCK()		down(&netmap_global_lock)
260ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK()		up(&netmap_global_lock)
261ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT()	//	XXX to be completed
262ce3ee1e7SLuigi Rizzo 
263ce3ee1e7SLuigi Rizzo 
264ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
265ce3ee1e7SLuigi Rizzo 
266ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
267ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
268ce3ee1e7SLuigi Rizzo 
269ce3ee1e7SLuigi Rizzo #else
270ce3ee1e7SLuigi Rizzo 
271ce3ee1e7SLuigi Rizzo #error	Unsupported platform
272ce3ee1e7SLuigi Rizzo 
273ce3ee1e7SLuigi Rizzo #endif /* unsupported */
274ce3ee1e7SLuigi Rizzo 
275ce3ee1e7SLuigi Rizzo /*
276ce3ee1e7SLuigi Rizzo  * common headers
277ce3ee1e7SLuigi Rizzo  */
2780b8ed8e0SLuigi Rizzo #include <net/netmap.h>
2790b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
280ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
2810b8ed8e0SLuigi Rizzo 
282ce3ee1e7SLuigi Rizzo 
283ce3ee1e7SLuigi Rizzo MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
284ce3ee1e7SLuigi Rizzo 
285ce3ee1e7SLuigi Rizzo /*
286ce3ee1e7SLuigi Rizzo  * The following variables are used by the drivers and replicate
287ce3ee1e7SLuigi Rizzo  * fields in the global memory pool. They only refer to buffers
288ce3ee1e7SLuigi Rizzo  * used by physical interfaces.
289ce3ee1e7SLuigi Rizzo  */
2905819da83SLuigi Rizzo u_int netmap_total_buffers;
2918241616dSLuigi Rizzo u_int netmap_buf_size;
292ce3ee1e7SLuigi Rizzo char *netmap_buffer_base;	/* also address of an invalid buffer */
2935819da83SLuigi Rizzo 
2945819da83SLuigi Rizzo /* user-controlled variables */
2955819da83SLuigi Rizzo int netmap_verbose;
2965819da83SLuigi Rizzo 
2975819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
2985819da83SLuigi Rizzo 
2995819da83SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
3005819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
3015819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
3025819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
3035819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
3045819da83SLuigi Rizzo int netmap_mitigate = 1;
3055819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
306c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
3075819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
3085819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
309f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
310f18be576SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
311f18be576SLuigi Rizzo     &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
3125819da83SLuigi Rizzo 
313f196ce38SLuigi Rizzo int netmap_drop = 0;	/* debugging */
314f196ce38SLuigi Rizzo int netmap_flags = 0;	/* debug flags */
315091fd0abSLuigi Rizzo int netmap_fwd = 0;	/* force transparent mode */
316ce3ee1e7SLuigi Rizzo int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
317f196ce38SLuigi Rizzo 
318f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
319f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
320091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
321ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
322f196ce38SLuigi Rizzo 
323ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
324ce3ee1e7SLuigi Rizzo 
325ce3ee1e7SLuigi Rizzo /*
326ce3ee1e7SLuigi Rizzo  * protect against multiple threads using the same ring.
327ce3ee1e7SLuigi Rizzo  * also check that the ring has not been stopped.
328ce3ee1e7SLuigi Rizzo  */
329ce3ee1e7SLuigi Rizzo #define NM_KR_BUSY	1
330ce3ee1e7SLuigi Rizzo #define NM_KR_STOPPED	2
331ce3ee1e7SLuigi Rizzo static void nm_kr_put(struct netmap_kring *kr);
332ce3ee1e7SLuigi Rizzo static __inline int nm_kr_tryget(struct netmap_kring *kr)
333ce3ee1e7SLuigi Rizzo {
334ce3ee1e7SLuigi Rizzo 	/* check a first time without taking the lock
335ce3ee1e7SLuigi Rizzo 	 * to avoid starvation for nm_kr_get()
336ce3ee1e7SLuigi Rizzo 	 */
337ce3ee1e7SLuigi Rizzo 	if (unlikely(kr->nkr_stopped)) {
338ce3ee1e7SLuigi Rizzo 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
339ce3ee1e7SLuigi Rizzo 		return NM_KR_STOPPED;
340ce3ee1e7SLuigi Rizzo 	}
341ce3ee1e7SLuigi Rizzo 	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
342ce3ee1e7SLuigi Rizzo 		return NM_KR_BUSY;
343ce3ee1e7SLuigi Rizzo 	/* check a second time with lock held */
344ce3ee1e7SLuigi Rizzo 	if (unlikely(kr->nkr_stopped)) {
345ce3ee1e7SLuigi Rizzo 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
346ce3ee1e7SLuigi Rizzo 		nm_kr_put(kr);
347ce3ee1e7SLuigi Rizzo 		return NM_KR_STOPPED;
348ce3ee1e7SLuigi Rizzo 	}
349ce3ee1e7SLuigi Rizzo 	return 0;
350ce3ee1e7SLuigi Rizzo }
351ce3ee1e7SLuigi Rizzo 
352ce3ee1e7SLuigi Rizzo static __inline void nm_kr_put(struct netmap_kring *kr)
353ce3ee1e7SLuigi Rizzo {
354ce3ee1e7SLuigi Rizzo 	NM_ATOMIC_CLEAR(&kr->nr_busy);
355ce3ee1e7SLuigi Rizzo }
356ce3ee1e7SLuigi Rizzo 
357ce3ee1e7SLuigi Rizzo static void nm_kr_get(struct netmap_kring *kr)
358ce3ee1e7SLuigi Rizzo {
359ce3ee1e7SLuigi Rizzo 	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
360ce3ee1e7SLuigi Rizzo 		tsleep(kr, 0, "NM_KR_GET", 4);
361ce3ee1e7SLuigi Rizzo }
362ce3ee1e7SLuigi Rizzo 
363ce3ee1e7SLuigi Rizzo static void nm_disable_ring(struct netmap_kring *kr)
364ce3ee1e7SLuigi Rizzo {
365ce3ee1e7SLuigi Rizzo 	kr->nkr_stopped = 1;
366ce3ee1e7SLuigi Rizzo 	nm_kr_get(kr);
367ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
368ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
369ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
370ce3ee1e7SLuigi Rizzo }
371ce3ee1e7SLuigi Rizzo 
372ce3ee1e7SLuigi Rizzo void netmap_disable_all_rings(struct ifnet *ifp)
373ce3ee1e7SLuigi Rizzo {
374ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na;
375ce3ee1e7SLuigi Rizzo 	int i;
376ce3ee1e7SLuigi Rizzo 
377ce3ee1e7SLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
378ce3ee1e7SLuigi Rizzo 		return;
379ce3ee1e7SLuigi Rizzo 
380ce3ee1e7SLuigi Rizzo 	na = NA(ifp);
381ce3ee1e7SLuigi Rizzo 
382ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_tx_rings + 1; i++) {
383ce3ee1e7SLuigi Rizzo 		nm_disable_ring(na->tx_rings + i);
384ce3ee1e7SLuigi Rizzo 		selwakeuppri(&na->tx_rings[i].si, PI_NET);
385ce3ee1e7SLuigi Rizzo 	}
386ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_rx_rings + 1; i++) {
387ce3ee1e7SLuigi Rizzo 		nm_disable_ring(na->rx_rings + i);
388ce3ee1e7SLuigi Rizzo 		selwakeuppri(&na->rx_rings[i].si, PI_NET);
389ce3ee1e7SLuigi Rizzo 	}
390ce3ee1e7SLuigi Rizzo 	selwakeuppri(&na->tx_si, PI_NET);
391ce3ee1e7SLuigi Rizzo 	selwakeuppri(&na->rx_si, PI_NET);
392ce3ee1e7SLuigi Rizzo }
393ce3ee1e7SLuigi Rizzo 
394ce3ee1e7SLuigi Rizzo void netmap_enable_all_rings(struct ifnet *ifp)
395ce3ee1e7SLuigi Rizzo {
396ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na;
397ce3ee1e7SLuigi Rizzo 	int i;
398ce3ee1e7SLuigi Rizzo 
399ce3ee1e7SLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
400ce3ee1e7SLuigi Rizzo 		return;
401ce3ee1e7SLuigi Rizzo 
402ce3ee1e7SLuigi Rizzo 	na = NA(ifp);
403ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_tx_rings + 1; i++) {
404ce3ee1e7SLuigi Rizzo 		D("enabling %p", na->tx_rings + i);
405ce3ee1e7SLuigi Rizzo 		na->tx_rings[i].nkr_stopped = 0;
406ce3ee1e7SLuigi Rizzo 	}
407ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_rx_rings + 1; i++) {
408ce3ee1e7SLuigi Rizzo 		D("enabling %p", na->rx_rings + i);
409ce3ee1e7SLuigi Rizzo 		na->rx_rings[i].nkr_stopped = 0;
410ce3ee1e7SLuigi Rizzo 	}
411ce3ee1e7SLuigi Rizzo }
412ce3ee1e7SLuigi Rizzo 
413ce3ee1e7SLuigi Rizzo 
414ce3ee1e7SLuigi Rizzo /*
415ce3ee1e7SLuigi Rizzo  * generic bound_checking function
416ce3ee1e7SLuigi Rizzo  */
417ce3ee1e7SLuigi Rizzo u_int
418ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
419ce3ee1e7SLuigi Rizzo {
420ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
421ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
422ce3ee1e7SLuigi Rizzo 
423ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
424ce3ee1e7SLuigi Rizzo 		dflt = lo;
425ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
426ce3ee1e7SLuigi Rizzo 		dflt = hi;
427ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
428ce3ee1e7SLuigi Rizzo 		*v = dflt;
429ce3ee1e7SLuigi Rizzo 		op = "Bump";
430ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
431ce3ee1e7SLuigi Rizzo 		*v = hi;
432ce3ee1e7SLuigi Rizzo 		op = "Clamp";
433ce3ee1e7SLuigi Rizzo 	}
434ce3ee1e7SLuigi Rizzo 	if (op && msg)
435ce3ee1e7SLuigi Rizzo 		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
436ce3ee1e7SLuigi Rizzo 	return *v;
437ce3ee1e7SLuigi Rizzo }
438ce3ee1e7SLuigi Rizzo 
439ce3ee1e7SLuigi Rizzo /*
440ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
441ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
442ce3ee1e7SLuigi Rizzo  */
443ce3ee1e7SLuigi Rizzo const char *
444ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
445ce3ee1e7SLuigi Rizzo {
446ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
447ce3ee1e7SLuigi Rizzo         int i, j, i0;
448ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
449ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
450ce3ee1e7SLuigi Rizzo 
451ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
452ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
453ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
454ce3ee1e7SLuigi Rizzo 	if (!dst)
455ce3ee1e7SLuigi Rizzo 		dst = _dst;
456ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
457ce3ee1e7SLuigi Rizzo 		lim = len;
458ce3ee1e7SLuigi Rizzo 	o = dst;
459ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
460ce3ee1e7SLuigi Rizzo 	o += strlen(o);
461ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
462ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
463ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
464ce3ee1e7SLuigi Rizzo 		o += strlen(o);
465ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
466ce3ee1e7SLuigi Rizzo 		i0 = i;
467ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
468ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
469ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
470ce3ee1e7SLuigi Rizzo 		}
471ce3ee1e7SLuigi Rizzo 		i = i0;
472ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
473ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
474ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
475ce3ee1e7SLuigi Rizzo 		o += j+49;
476ce3ee1e7SLuigi Rizzo 	}
477ce3ee1e7SLuigi Rizzo 	*o = '\0';
478ce3ee1e7SLuigi Rizzo #undef P_HI
479ce3ee1e7SLuigi Rizzo #undef P_LO
480ce3ee1e7SLuigi Rizzo #undef P_C
481ce3ee1e7SLuigi Rizzo 	return dst;
482ce3ee1e7SLuigi Rizzo }
483f196ce38SLuigi Rizzo 
484f196ce38SLuigi Rizzo /*
485f18be576SLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
486f18be576SLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
487ce3ee1e7SLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
488f18be576SLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
489f18be576SLuigi Rizzo  *	XXX should become a sysctl or tunable
490f196ce38SLuigi Rizzo  *
491f18be576SLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
492f18be576SLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
493f18be576SLuigi Rizzo  * connected to a physical device.
494f18be576SLuigi Rizzo  *
495f18be576SLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
496f18be576SLuigi Rizzo  * for rings and buffers.
497f196ce38SLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
498f196ce38SLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
499ce3ee1e7SLuigi Rizzo  * faster. The batch size is bridge_batch.
500f196ce38SLuigi Rizzo  */
501f18be576SLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
502ce3ee1e7SLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
503f196ce38SLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
504f196ce38SLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
505f196ce38SLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
506ce3ee1e7SLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
507ce3ee1e7SLuigi Rizzo /* actual size of the tables */
508ce3ee1e7SLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
509ce3ee1e7SLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
510ce3ee1e7SLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
511f18be576SLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
512d4b42e08SLuigi Rizzo 
513d4b42e08SLuigi Rizzo 
514ce3ee1e7SLuigi Rizzo /*
515ce3ee1e7SLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
516ce3ee1e7SLuigi Rizzo  * used in the bridge. The actual value may be larger as the
517ce3ee1e7SLuigi Rizzo  * last packet in the block may overflow the size.
518ce3ee1e7SLuigi Rizzo  */
519ce3ee1e7SLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
520ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
52101c7d25fSLuigi Rizzo 
522f196ce38SLuigi Rizzo 
523849bec0eSLuigi Rizzo /*
524849bec0eSLuigi Rizzo  * These are used to handle reference counters for bridge ports.
525849bec0eSLuigi Rizzo  */
526849bec0eSLuigi Rizzo #define	ADD_BDG_REF(ifp)	refcount_acquire(&NA(ifp)->na_bdg_refcount)
527849bec0eSLuigi Rizzo #define	DROP_BDG_REF(ifp)	refcount_release(&NA(ifp)->na_bdg_refcount)
528849bec0eSLuigi Rizzo 
529ce3ee1e7SLuigi Rizzo /* The bridge references the buffers using the device specific look up table */
530ce3ee1e7SLuigi Rizzo static inline void *
531ce3ee1e7SLuigi Rizzo BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot)
532ce3ee1e7SLuigi Rizzo {
533ce3ee1e7SLuigi Rizzo 	struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut;
534ce3ee1e7SLuigi Rizzo 	uint32_t i = slot->buf_idx;
535ce3ee1e7SLuigi Rizzo 	return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ?  lut[0].vaddr : lut[i].vaddr;
536ce3ee1e7SLuigi Rizzo }
537ce3ee1e7SLuigi Rizzo 
538*5ab0d24dSLuigi Rizzo static int bdg_netmap_attach(struct netmap_adapter *);
539f196ce38SLuigi Rizzo static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
540ce3ee1e7SLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr);
541f18be576SLuigi Rizzo 
542ce3ee1e7SLuigi Rizzo /*
543ce3ee1e7SLuigi Rizzo  * Each transmit queue accumulates a batch of packets into
544ce3ee1e7SLuigi Rizzo  * a structure before forwarding. Packets to the same
545ce3ee1e7SLuigi Rizzo  * destination are put in a list using ft_next as a link field.
546ce3ee1e7SLuigi Rizzo  * ft_frags and ft_next are valid only on the first fragment.
547ce3ee1e7SLuigi Rizzo  */
548f196ce38SLuigi Rizzo struct nm_bdg_fwd {	/* forwarding entry for a bridge */
549ce3ee1e7SLuigi Rizzo 	void *ft_buf;		/* netmap or indirect buffer */
550ce3ee1e7SLuigi Rizzo 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
551ce3ee1e7SLuigi Rizzo 	uint8_t _ft_port;	/* dst port (unused) */
55285233a7dSLuigi Rizzo 	uint16_t ft_flags;	/* flags, e.g. indirect */
553ce3ee1e7SLuigi Rizzo 	uint16_t ft_len;	/* src fragment len */
554f18be576SLuigi Rizzo 	uint16_t ft_next;	/* next packet to same destination */
555f18be576SLuigi Rizzo };
556f18be576SLuigi Rizzo 
557ce3ee1e7SLuigi Rizzo /*
558ce3ee1e7SLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
559ce3ee1e7SLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
560ce3ee1e7SLuigi Rizzo  * during the copy).
561f18be576SLuigi Rizzo  */
562f18be576SLuigi Rizzo struct nm_bdg_q {
563f18be576SLuigi Rizzo 	uint16_t bq_head;
564f18be576SLuigi Rizzo 	uint16_t bq_tail;
565ce3ee1e7SLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
566f196ce38SLuigi Rizzo };
567f196ce38SLuigi Rizzo 
568ce3ee1e7SLuigi Rizzo /* XXX revise this */
569f196ce38SLuigi Rizzo struct nm_hash_ent {
570f196ce38SLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
571f196ce38SLuigi Rizzo 	uint64_t	ports;
572f196ce38SLuigi Rizzo };
573f196ce38SLuigi Rizzo 
574f196ce38SLuigi Rizzo /*
575ce3ee1e7SLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
576849bec0eSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
577f196ce38SLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
578ce3ee1e7SLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
579849bec0eSLuigi Rizzo  * don't mind if they are slow.
580849bec0eSLuigi Rizzo  *
581ce3ee1e7SLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
582ce3ee1e7SLuigi Rizzo  * packets are dropped if there is no room on the output port.
583849bec0eSLuigi Rizzo  *
584849bec0eSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
585f18be576SLuigi Rizzo  * This is a rw lock (or equivalent).
586f196ce38SLuigi Rizzo  */
587f196ce38SLuigi Rizzo struct nm_bridge {
588f18be576SLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
589ce3ee1e7SLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
590ce3ee1e7SLuigi Rizzo 	int		bdg_namelen;
591ce3ee1e7SLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
592ce3ee1e7SLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
593ce3ee1e7SLuigi Rizzo 
594ce3ee1e7SLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
595ce3ee1e7SLuigi Rizzo 	 * and all other remaining ports.
596ce3ee1e7SLuigi Rizzo 	 */
597ce3ee1e7SLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
598ce3ee1e7SLuigi Rizzo 
599f18be576SLuigi Rizzo 	struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS];
600f18be576SLuigi Rizzo 
601ce3ee1e7SLuigi Rizzo 
602f18be576SLuigi Rizzo 	/*
603f18be576SLuigi Rizzo 	 * The function to decide the destination port.
604f18be576SLuigi Rizzo 	 * It returns either of an index of the destination port,
605f18be576SLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
606f18be576SLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
607f18be576SLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
608f18be576SLuigi Rizzo 	 * different ring index.
609f18be576SLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
610f18be576SLuigi Rizzo 	 */
611f18be576SLuigi Rizzo 	bdg_lookup_fn_t nm_bdg_lookup;
612f196ce38SLuigi Rizzo 
613ce3ee1e7SLuigi Rizzo 	/* the forwarding table, MAC+ports.
614ce3ee1e7SLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
615ce3ee1e7SLuigi Rizzo 	 * the lookup function, and allocated on attach
616ce3ee1e7SLuigi Rizzo 	 */
617f196ce38SLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
618f196ce38SLuigi Rizzo };
619f196ce38SLuigi Rizzo 
620f196ce38SLuigi Rizzo 
621ce3ee1e7SLuigi Rizzo /*
622ce3ee1e7SLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
623ce3ee1e7SLuigi Rizzo  * Right now we have a static array and deletions are protected
624ce3ee1e7SLuigi Rizzo  * by an exclusive lock.
625f18be576SLuigi Rizzo  */
626ce3ee1e7SLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES];
627f18be576SLuigi Rizzo 
628ce3ee1e7SLuigi Rizzo 
629ce3ee1e7SLuigi Rizzo /*
630ce3ee1e7SLuigi Rizzo  * A few function to tell which kind of port are we using.
631ce3ee1e7SLuigi Rizzo  * XXX should we hold a lock ?
632ce3ee1e7SLuigi Rizzo  *
633ce3ee1e7SLuigi Rizzo  * nma_is_vp()		virtual port
634ce3ee1e7SLuigi Rizzo  * nma_is_host()	port connected to the host stack
635ce3ee1e7SLuigi Rizzo  * nma_is_hw()		port connected to a NIC
636ce3ee1e7SLuigi Rizzo  */
637ce3ee1e7SLuigi Rizzo int nma_is_vp(struct netmap_adapter *na);
638ce3ee1e7SLuigi Rizzo int
639f18be576SLuigi Rizzo nma_is_vp(struct netmap_adapter *na)
640f18be576SLuigi Rizzo {
641f18be576SLuigi Rizzo 	return na->nm_register == bdg_netmap_reg;
642f18be576SLuigi Rizzo }
643ce3ee1e7SLuigi Rizzo 
644f18be576SLuigi Rizzo static __inline int
645f18be576SLuigi Rizzo nma_is_host(struct netmap_adapter *na)
646f18be576SLuigi Rizzo {
647f18be576SLuigi Rizzo 	return na->nm_register == NULL;
648f18be576SLuigi Rizzo }
649ce3ee1e7SLuigi Rizzo 
650f18be576SLuigi Rizzo static __inline int
651f18be576SLuigi Rizzo nma_is_hw(struct netmap_adapter *na)
652f18be576SLuigi Rizzo {
653f18be576SLuigi Rizzo 	/* In case of sw adapter, nm_register is NULL */
654f18be576SLuigi Rizzo 	return !nma_is_vp(na) && !nma_is_host(na);
655f18be576SLuigi Rizzo }
656f18be576SLuigi Rizzo 
657ce3ee1e7SLuigi Rizzo 
658f18be576SLuigi Rizzo /*
659ce3ee1e7SLuigi Rizzo  * If the NIC is owned by the kernel
660f18be576SLuigi Rizzo  * (i.e., bridge), neither another bridge nor user can use it;
661f18be576SLuigi Rizzo  * if the NIC is owned by a user, only users can share it.
662ce3ee1e7SLuigi Rizzo  * Evaluation must be done under NMG_LOCK().
663f18be576SLuigi Rizzo  */
664f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_KERN(ifp)	(!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg)
665f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_ANY(ifp) \
666f18be576SLuigi Rizzo 	(NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0))
667f196ce38SLuigi Rizzo 
668f196ce38SLuigi Rizzo /*
669f196ce38SLuigi Rizzo  * NA(ifp)->bdg_port	port index
670f196ce38SLuigi Rizzo  */
671f196ce38SLuigi Rizzo 
672ce3ee1e7SLuigi Rizzo 
673ce3ee1e7SLuigi Rizzo /*
674ce3ee1e7SLuigi Rizzo  * this is a slightly optimized copy routine which rounds
675ce3ee1e7SLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
676ce3ee1e7SLuigi Rizzo  * with other odd sizes. We assume there is enough room
677ce3ee1e7SLuigi Rizzo  * in the source and destination buffers.
678ce3ee1e7SLuigi Rizzo  *
679ce3ee1e7SLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
680ce3ee1e7SLuigi Rizzo  */
681f196ce38SLuigi Rizzo static inline void
682f196ce38SLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
683f196ce38SLuigi Rizzo {
684f196ce38SLuigi Rizzo         uint64_t *src = _src;
685f196ce38SLuigi Rizzo         uint64_t *dst = _dst;
686f196ce38SLuigi Rizzo         if (unlikely(l >= 1024)) {
687ce3ee1e7SLuigi Rizzo                 memcpy(dst, src, l);
688f196ce38SLuigi Rizzo                 return;
689f196ce38SLuigi Rizzo         }
690f196ce38SLuigi Rizzo         for (; likely(l > 0); l-=64) {
691f196ce38SLuigi Rizzo                 *dst++ = *src++;
692f196ce38SLuigi Rizzo                 *dst++ = *src++;
693f196ce38SLuigi Rizzo                 *dst++ = *src++;
694f196ce38SLuigi Rizzo                 *dst++ = *src++;
695f196ce38SLuigi Rizzo                 *dst++ = *src++;
696f196ce38SLuigi Rizzo                 *dst++ = *src++;
697f196ce38SLuigi Rizzo                 *dst++ = *src++;
698f196ce38SLuigi Rizzo                 *dst++ = *src++;
699f196ce38SLuigi Rizzo         }
700f196ce38SLuigi Rizzo }
701f196ce38SLuigi Rizzo 
702f18be576SLuigi Rizzo 
703ce3ee1e7SLuigi Rizzo #ifdef TEST_STUFF
704ce3ee1e7SLuigi Rizzo struct xxx {
705ce3ee1e7SLuigi Rizzo 	char *name;
706ce3ee1e7SLuigi Rizzo 	void (*fn)(uint32_t);
707ce3ee1e7SLuigi Rizzo };
708ce3ee1e7SLuigi Rizzo 
709ce3ee1e7SLuigi Rizzo 
710ce3ee1e7SLuigi Rizzo static void
711ce3ee1e7SLuigi Rizzo nm_test_defmtx(uint32_t n)
712ce3ee1e7SLuigi Rizzo {
713ce3ee1e7SLuigi Rizzo 	uint32_t i;
714ce3ee1e7SLuigi Rizzo 	struct mtx m;
715ce3ee1e7SLuigi Rizzo 	mtx_init(&m, "test", NULL, MTX_DEF);
716ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); }
717ce3ee1e7SLuigi Rizzo 	mtx_destroy(&m);
718ce3ee1e7SLuigi Rizzo 	return;
719ce3ee1e7SLuigi Rizzo }
720ce3ee1e7SLuigi Rizzo 
721ce3ee1e7SLuigi Rizzo static void
722ce3ee1e7SLuigi Rizzo nm_test_spinmtx(uint32_t n)
723ce3ee1e7SLuigi Rizzo {
724ce3ee1e7SLuigi Rizzo 	uint32_t i;
725ce3ee1e7SLuigi Rizzo 	struct mtx m;
726ce3ee1e7SLuigi Rizzo 	mtx_init(&m, "test", NULL, MTX_SPIN);
727ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); }
728ce3ee1e7SLuigi Rizzo 	mtx_destroy(&m);
729ce3ee1e7SLuigi Rizzo 	return;
730ce3ee1e7SLuigi Rizzo }
731ce3ee1e7SLuigi Rizzo 
732ce3ee1e7SLuigi Rizzo static void
733ce3ee1e7SLuigi Rizzo nm_test_rlock(uint32_t n)
734ce3ee1e7SLuigi Rizzo {
735ce3ee1e7SLuigi Rizzo 	uint32_t i;
736ce3ee1e7SLuigi Rizzo 	struct rwlock m;
737ce3ee1e7SLuigi Rizzo 	rw_init(&m, "test");
738ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { rw_rlock(&m); rw_runlock(&m); }
739ce3ee1e7SLuigi Rizzo 	rw_destroy(&m);
740ce3ee1e7SLuigi Rizzo 	return;
741ce3ee1e7SLuigi Rizzo }
742ce3ee1e7SLuigi Rizzo 
743ce3ee1e7SLuigi Rizzo static void
744ce3ee1e7SLuigi Rizzo nm_test_wlock(uint32_t n)
745ce3ee1e7SLuigi Rizzo {
746ce3ee1e7SLuigi Rizzo 	uint32_t i;
747ce3ee1e7SLuigi Rizzo 	struct rwlock m;
748ce3ee1e7SLuigi Rizzo 	rw_init(&m, "test");
749ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { rw_wlock(&m); rw_wunlock(&m); }
750ce3ee1e7SLuigi Rizzo 	rw_destroy(&m);
751ce3ee1e7SLuigi Rizzo 	return;
752ce3ee1e7SLuigi Rizzo }
753ce3ee1e7SLuigi Rizzo 
754ce3ee1e7SLuigi Rizzo static void
755ce3ee1e7SLuigi Rizzo nm_test_slock(uint32_t n)
756ce3ee1e7SLuigi Rizzo {
757ce3ee1e7SLuigi Rizzo 	uint32_t i;
758ce3ee1e7SLuigi Rizzo 	struct sx m;
759ce3ee1e7SLuigi Rizzo 	sx_init(&m, "test");
760ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { sx_slock(&m); sx_sunlock(&m); }
761ce3ee1e7SLuigi Rizzo 	sx_destroy(&m);
762ce3ee1e7SLuigi Rizzo 	return;
763ce3ee1e7SLuigi Rizzo }
764ce3ee1e7SLuigi Rizzo 
765ce3ee1e7SLuigi Rizzo static void
766ce3ee1e7SLuigi Rizzo nm_test_xlock(uint32_t n)
767ce3ee1e7SLuigi Rizzo {
768ce3ee1e7SLuigi Rizzo 	uint32_t i;
769ce3ee1e7SLuigi Rizzo 	struct sx m;
770ce3ee1e7SLuigi Rizzo 	sx_init(&m, "test");
771ce3ee1e7SLuigi Rizzo 	for (i = 0; i < n; i++) { sx_xlock(&m); sx_xunlock(&m); }
772ce3ee1e7SLuigi Rizzo 	sx_destroy(&m);
773ce3ee1e7SLuigi Rizzo 	return;
774ce3ee1e7SLuigi Rizzo }
775ce3ee1e7SLuigi Rizzo 
776ce3ee1e7SLuigi Rizzo 
777ce3ee1e7SLuigi Rizzo struct xxx nm_tests[] = {
778ce3ee1e7SLuigi Rizzo 	{ "defmtx", nm_test_defmtx },
779ce3ee1e7SLuigi Rizzo 	{ "spinmtx", nm_test_spinmtx },
780ce3ee1e7SLuigi Rizzo 	{ "rlock", nm_test_rlock },
781ce3ee1e7SLuigi Rizzo 	{ "wlock", nm_test_wlock },
782ce3ee1e7SLuigi Rizzo 	{ "slock", nm_test_slock },
783ce3ee1e7SLuigi Rizzo 	{ "xlock", nm_test_xlock },
784ce3ee1e7SLuigi Rizzo };
785ce3ee1e7SLuigi Rizzo 
786ce3ee1e7SLuigi Rizzo static int
787ce3ee1e7SLuigi Rizzo nm_test(struct nmreq *nmr)
788ce3ee1e7SLuigi Rizzo {
789ce3ee1e7SLuigi Rizzo 	uint32_t scale, n, test;
790ce3ee1e7SLuigi Rizzo 	static int old_test = -1;
791ce3ee1e7SLuigi Rizzo 
792ce3ee1e7SLuigi Rizzo 	test = nmr->nr_cmd;
793ce3ee1e7SLuigi Rizzo 	scale = nmr->nr_offset;
794ce3ee1e7SLuigi Rizzo 	n = sizeof(nm_tests) / sizeof(struct xxx) - 1;
795ce3ee1e7SLuigi Rizzo 	if (test > n) {
796ce3ee1e7SLuigi Rizzo 		D("test index too high, max %d", n);
797ce3ee1e7SLuigi Rizzo 		return 0;
798ce3ee1e7SLuigi Rizzo 	}
799ce3ee1e7SLuigi Rizzo 
800ce3ee1e7SLuigi Rizzo 	if (old_test != test) {
801ce3ee1e7SLuigi Rizzo 		D("test %s scale %d", nm_tests[test].name, scale);
802ce3ee1e7SLuigi Rizzo 		old_test = test;
803ce3ee1e7SLuigi Rizzo 	}
804ce3ee1e7SLuigi Rizzo 	nm_tests[test].fn(scale);
805ce3ee1e7SLuigi Rizzo 	return 0;
806ce3ee1e7SLuigi Rizzo }
807ce3ee1e7SLuigi Rizzo #endif /* TEST_STUFF */
808ce3ee1e7SLuigi Rizzo 
809f196ce38SLuigi Rizzo /*
810f196ce38SLuigi Rizzo  * locate a bridge among the existing ones.
811ce3ee1e7SLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
812ce3ee1e7SLuigi Rizzo  *
813f196ce38SLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
814f196ce38SLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
815f196ce38SLuigi Rizzo  */
816f196ce38SLuigi Rizzo static struct nm_bridge *
817f18be576SLuigi Rizzo nm_find_bridge(const char *name, int create)
818f196ce38SLuigi Rizzo {
819f18be576SLuigi Rizzo 	int i, l, namelen;
820f196ce38SLuigi Rizzo 	struct nm_bridge *b = NULL;
821f196ce38SLuigi Rizzo 
822ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
823ce3ee1e7SLuigi Rizzo 
824f196ce38SLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
825ce3ee1e7SLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
826ce3ee1e7SLuigi Rizzo 	if (l < namelen) {
827ce3ee1e7SLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
828ce3ee1e7SLuigi Rizzo 		return NULL;
829ce3ee1e7SLuigi Rizzo 	}
830f196ce38SLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
831f196ce38SLuigi Rizzo 		if (name[i] == ':') {
832f196ce38SLuigi Rizzo 			namelen = i;
833f196ce38SLuigi Rizzo 			break;
834f196ce38SLuigi Rizzo 		}
835f196ce38SLuigi Rizzo 	}
836f196ce38SLuigi Rizzo 	if (namelen >= IFNAMSIZ)
837f196ce38SLuigi Rizzo 		namelen = IFNAMSIZ;
838f196ce38SLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
839f196ce38SLuigi Rizzo 
840f18be576SLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
841f18be576SLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++) {
842f18be576SLuigi Rizzo 		struct nm_bridge *x = nm_bridges + i;
843f18be576SLuigi Rizzo 
844ce3ee1e7SLuigi Rizzo 		if (x->bdg_active_ports == 0) {
845f18be576SLuigi Rizzo 			if (create && b == NULL)
846f18be576SLuigi Rizzo 				b = x;	/* record empty slot */
847ce3ee1e7SLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
848f18be576SLuigi Rizzo 			continue;
849ce3ee1e7SLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
850f196ce38SLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
851f18be576SLuigi Rizzo 			b = x;
852f196ce38SLuigi Rizzo 			break;
853f196ce38SLuigi Rizzo 		}
854f196ce38SLuigi Rizzo 	}
855f18be576SLuigi Rizzo 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
856ce3ee1e7SLuigi Rizzo 		/* initialize the bridge */
857ce3ee1e7SLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
858ce3ee1e7SLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
859ce3ee1e7SLuigi Rizzo 			b->bdg_active_ports);
860ce3ee1e7SLuigi Rizzo 		b->bdg_namelen = namelen;
861ce3ee1e7SLuigi Rizzo 		b->bdg_active_ports = 0;
862ce3ee1e7SLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
863ce3ee1e7SLuigi Rizzo 			b->bdg_port_index[i] = i;
864f18be576SLuigi Rizzo 		/* set the default function */
865f18be576SLuigi Rizzo 		b->nm_bdg_lookup = netmap_bdg_learning;
866f18be576SLuigi Rizzo 		/* reset the MAC address table */
867f18be576SLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
868f196ce38SLuigi Rizzo 	}
869f196ce38SLuigi Rizzo 	return b;
870f196ce38SLuigi Rizzo }
871f18be576SLuigi Rizzo 
872f18be576SLuigi Rizzo 
873f18be576SLuigi Rizzo /*
874f18be576SLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
875f18be576SLuigi Rizzo  */
876f18be576SLuigi Rizzo static void
877f18be576SLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
878f18be576SLuigi Rizzo {
879f18be576SLuigi Rizzo 	int nrings, i;
880f18be576SLuigi Rizzo 	struct netmap_kring *kring;
881f18be576SLuigi Rizzo 
882ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
883f18be576SLuigi Rizzo 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
884f18be576SLuigi Rizzo 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
885f18be576SLuigi Rizzo 	for (i = 0; i < nrings; i++) {
886f18be576SLuigi Rizzo 		if (kring[i].nkr_ft) {
887f18be576SLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
888f18be576SLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
889f18be576SLuigi Rizzo 		}
890f18be576SLuigi Rizzo 	}
891f18be576SLuigi Rizzo 	if (nma_is_hw(na))
892f18be576SLuigi Rizzo 		nm_free_bdgfwd(SWNA(na->ifp));
893f18be576SLuigi Rizzo }
894f18be576SLuigi Rizzo 
895f18be576SLuigi Rizzo 
896f18be576SLuigi Rizzo /*
897f18be576SLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
898f18be576SLuigi Rizzo  */
899f18be576SLuigi Rizzo static int
900f18be576SLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
901f18be576SLuigi Rizzo {
902f18be576SLuigi Rizzo 	int nrings, l, i, num_dstq;
903f18be576SLuigi Rizzo 	struct netmap_kring *kring;
904f18be576SLuigi Rizzo 
905ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
906f18be576SLuigi Rizzo 	/* all port:rings + broadcast */
907f18be576SLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
908ce3ee1e7SLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
909f18be576SLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
910ce3ee1e7SLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
911f18be576SLuigi Rizzo 
912f18be576SLuigi Rizzo 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
913f18be576SLuigi Rizzo 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
914f18be576SLuigi Rizzo 	for (i = 0; i < nrings; i++) {
915f18be576SLuigi Rizzo 		struct nm_bdg_fwd *ft;
916f18be576SLuigi Rizzo 		struct nm_bdg_q *dstq;
917f18be576SLuigi Rizzo 		int j;
918f18be576SLuigi Rizzo 
919f18be576SLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
920f18be576SLuigi Rizzo 		if (!ft) {
921f18be576SLuigi Rizzo 			nm_free_bdgfwd(na);
922f18be576SLuigi Rizzo 			return ENOMEM;
923f18be576SLuigi Rizzo 		}
924ce3ee1e7SLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
925ce3ee1e7SLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
926ce3ee1e7SLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
927ce3ee1e7SLuigi Rizzo 			dstq[j].bq_len = 0;
928ce3ee1e7SLuigi Rizzo 		}
929f18be576SLuigi Rizzo 		kring[i].nkr_ft = ft;
930f18be576SLuigi Rizzo 	}
931f18be576SLuigi Rizzo 	if (nma_is_hw(na))
932f18be576SLuigi Rizzo 		nm_alloc_bdgfwd(SWNA(na->ifp));
933f18be576SLuigi Rizzo 	return 0;
934f18be576SLuigi Rizzo }
935f18be576SLuigi Rizzo 
936ae10d1afSLuigi Rizzo 
937ae10d1afSLuigi Rizzo /*
938ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
939ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
940ae10d1afSLuigi Rizzo  */
941ae10d1afSLuigi Rizzo static int
942ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
943ae10d1afSLuigi Rizzo {
944ae10d1afSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
945ae10d1afSLuigi Rizzo 	u_int txr, txd, rxr, rxd;
946ae10d1afSLuigi Rizzo 
947ae10d1afSLuigi Rizzo 	txr = txd = rxr = rxd = 0;
948ae10d1afSLuigi Rizzo 	if (na->nm_config) {
949ae10d1afSLuigi Rizzo 		na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
950ae10d1afSLuigi Rizzo 	} else {
951ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
952ae10d1afSLuigi Rizzo 		txr = na->num_tx_rings;
953ae10d1afSLuigi Rizzo 		txd = na->num_tx_desc;
954ae10d1afSLuigi Rizzo 		rxr = na->num_rx_rings;
955ae10d1afSLuigi Rizzo 		rxd = na->num_rx_desc;
956ae10d1afSLuigi Rizzo 	}
957ae10d1afSLuigi Rizzo 
958ae10d1afSLuigi Rizzo 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
959ae10d1afSLuigi Rizzo 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
960ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
961ae10d1afSLuigi Rizzo 	if (netmap_verbose || na->refcount > 0) {
962ae10d1afSLuigi Rizzo 		D("stored config %s: txring %d x %d, rxring %d x %d",
963ae10d1afSLuigi Rizzo 			ifp->if_xname,
964ae10d1afSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
965ae10d1afSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
966ae10d1afSLuigi Rizzo 		D("new config %s: txring %d x %d, rxring %d x %d",
967ae10d1afSLuigi Rizzo 			ifp->if_xname, txr, txd, rxr, rxd);
968ae10d1afSLuigi Rizzo 	}
969ae10d1afSLuigi Rizzo 	if (na->refcount == 0) {
970ae10d1afSLuigi Rizzo 		D("configuration changed (but fine)");
971ae10d1afSLuigi Rizzo 		na->num_tx_rings = txr;
972ae10d1afSLuigi Rizzo 		na->num_tx_desc = txd;
973ae10d1afSLuigi Rizzo 		na->num_rx_rings = rxr;
974ae10d1afSLuigi Rizzo 		na->num_rx_desc = rxd;
975ae10d1afSLuigi Rizzo 		return 0;
976ae10d1afSLuigi Rizzo 	}
977ae10d1afSLuigi Rizzo 	D("configuration changed while active, this is bad...");
978ae10d1afSLuigi Rizzo 	return 1;
979ae10d1afSLuigi Rizzo }
980ae10d1afSLuigi Rizzo 
981ce3ee1e7SLuigi Rizzo static struct netmap_if *
982ce3ee1e7SLuigi Rizzo netmap_if_new(const char *ifname, struct netmap_adapter *na)
983ce3ee1e7SLuigi Rizzo {
984ce3ee1e7SLuigi Rizzo 	if (netmap_update_config(na)) {
985ce3ee1e7SLuigi Rizzo 		/* configuration mismatch, report and fail */
986ce3ee1e7SLuigi Rizzo 		return NULL;
987ce3ee1e7SLuigi Rizzo 	}
988ce3ee1e7SLuigi Rizzo 	return netmap_mem_if_new(ifname, na);
989ce3ee1e7SLuigi Rizzo }
99068b8534bSLuigi Rizzo 
9918241616dSLuigi Rizzo 
9928241616dSLuigi Rizzo /* Structure associated to each thread which registered an interface.
9938241616dSLuigi Rizzo  *
9948241616dSLuigi Rizzo  * The first 4 fields of this structure are written by NIOCREGIF and
9958241616dSLuigi Rizzo  * read by poll() and NIOC?XSYNC.
9968241616dSLuigi Rizzo  * There is low contention among writers (actually, a correct user program
9978241616dSLuigi Rizzo  * should have no contention among writers) and among writers and readers,
9988241616dSLuigi Rizzo  * so we use a single global lock to protect the structure initialization.
9998241616dSLuigi Rizzo  * Since initialization involves the allocation of memory, we reuse the memory
10008241616dSLuigi Rizzo  * allocator lock.
10018241616dSLuigi Rizzo  * Read access to the structure is lock free. Readers must check that
10028241616dSLuigi Rizzo  * np_nifp is not NULL before using the other fields.
10038241616dSLuigi Rizzo  * If np_nifp is NULL initialization has not been performed, so they should
10048241616dSLuigi Rizzo  * return an error to userlevel.
10058241616dSLuigi Rizzo  *
10068241616dSLuigi Rizzo  * The ref_done field is used to regulate access to the refcount in the
10078241616dSLuigi Rizzo  * memory allocator. The refcount must be incremented at most once for
10088241616dSLuigi Rizzo  * each open("/dev/netmap"). The increment is performed by the first
10098241616dSLuigi Rizzo  * function that calls netmap_get_memory() (currently called by
10108241616dSLuigi Rizzo  * mmap(), NIOCGINFO and NIOCREGIF).
10118241616dSLuigi Rizzo  * If the refcount is incremented, it is then decremented when the
10128241616dSLuigi Rizzo  * private structure is destroyed.
10138241616dSLuigi Rizzo  */
101468b8534bSLuigi Rizzo struct netmap_priv_d {
1015ce3ee1e7SLuigi Rizzo 	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
101668b8534bSLuigi Rizzo 
1017ce3ee1e7SLuigi Rizzo 	struct ifnet	*np_ifp;	/* device for which we hold a ref. */
101868b8534bSLuigi Rizzo 	int		np_ringid;	/* from the ioctl */
101968b8534bSLuigi Rizzo 	u_int		np_qfirst, np_qlast;	/* range of rings to scan */
102068b8534bSLuigi Rizzo 	uint16_t	np_txpoll;
10218241616dSLuigi Rizzo 
1022ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *np_mref;	/* use with NMG_LOCK held */
1023ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__
1024ce3ee1e7SLuigi Rizzo 	int		np_refcount;	/* use with NMG_LOCK held */
1025ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */
102668b8534bSLuigi Rizzo };
102768b8534bSLuigi Rizzo 
1028ce3ee1e7SLuigi Rizzo /* grab a reference to the memory allocator, if we don't have one already.  The
1029ce3ee1e7SLuigi Rizzo  * reference is taken from the netmap_adapter registered with the priv.
1030ce3ee1e7SLuigi Rizzo  *
1031ce3ee1e7SLuigi Rizzo  */
1032ce3ee1e7SLuigi Rizzo static int
1033ce3ee1e7SLuigi Rizzo netmap_get_memory_locked(struct netmap_priv_d* p)
1034ce3ee1e7SLuigi Rizzo {
1035ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *nmd;
1036ce3ee1e7SLuigi Rizzo 	int error = 0;
1037ce3ee1e7SLuigi Rizzo 
1038ce3ee1e7SLuigi Rizzo 	if (p->np_ifp == NULL) {
1039ce3ee1e7SLuigi Rizzo 		if (!netmap_mmap_unreg)
1040ce3ee1e7SLuigi Rizzo 			return ENODEV;
1041ce3ee1e7SLuigi Rizzo 		/* for compatibility with older versions of the API
1042ce3ee1e7SLuigi Rizzo  		 * we use the global allocator when no interface has been
1043ce3ee1e7SLuigi Rizzo  		 * registered
1044ce3ee1e7SLuigi Rizzo  		 */
1045ce3ee1e7SLuigi Rizzo 		nmd = &nm_mem;
1046ce3ee1e7SLuigi Rizzo 	} else {
1047ce3ee1e7SLuigi Rizzo 		nmd = NA(p->np_ifp)->nm_mem;
1048ce3ee1e7SLuigi Rizzo 	}
1049ce3ee1e7SLuigi Rizzo 	if (p->np_mref == NULL) {
1050ce3ee1e7SLuigi Rizzo 		error = netmap_mem_finalize(nmd);
1051ce3ee1e7SLuigi Rizzo 		if (!error)
1052ce3ee1e7SLuigi Rizzo 			p->np_mref = nmd;
1053ce3ee1e7SLuigi Rizzo 	} else if (p->np_mref != nmd) {
1054ce3ee1e7SLuigi Rizzo 		/* a virtual port has been registered, but previous
1055ce3ee1e7SLuigi Rizzo  		 * syscalls already used the global allocator.
1056ce3ee1e7SLuigi Rizzo  		 * We cannot continue
1057ce3ee1e7SLuigi Rizzo  		 */
1058ce3ee1e7SLuigi Rizzo 		error = ENODEV;
1059ce3ee1e7SLuigi Rizzo 	}
1060ce3ee1e7SLuigi Rizzo 	return error;
1061ce3ee1e7SLuigi Rizzo }
106268b8534bSLuigi Rizzo 
10638241616dSLuigi Rizzo static int
10648241616dSLuigi Rizzo netmap_get_memory(struct netmap_priv_d* p)
10658241616dSLuigi Rizzo {
1066ce3ee1e7SLuigi Rizzo 	int error;
1067ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1068ce3ee1e7SLuigi Rizzo 	error = netmap_get_memory_locked(p);
1069ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
10708241616dSLuigi Rizzo 	return error;
10718241616dSLuigi Rizzo }
10728241616dSLuigi Rizzo 
1073ce3ee1e7SLuigi Rizzo static int
1074ce3ee1e7SLuigi Rizzo netmap_have_memory_locked(struct netmap_priv_d* p)
1075ce3ee1e7SLuigi Rizzo {
1076ce3ee1e7SLuigi Rizzo 	return p->np_mref != NULL;
1077ce3ee1e7SLuigi Rizzo }
1078ce3ee1e7SLuigi Rizzo 
1079ce3ee1e7SLuigi Rizzo static void
1080ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(struct netmap_priv_d* p)
1081ce3ee1e7SLuigi Rizzo {
1082ce3ee1e7SLuigi Rizzo 	if (p->np_mref) {
1083ce3ee1e7SLuigi Rizzo 		netmap_mem_deref(p->np_mref);
1084ce3ee1e7SLuigi Rizzo 		p->np_mref = NULL;
1085ce3ee1e7SLuigi Rizzo 	}
1086ce3ee1e7SLuigi Rizzo }
1087ce3ee1e7SLuigi Rizzo 
108868b8534bSLuigi Rizzo /*
108968b8534bSLuigi Rizzo  * File descriptor's private data destructor.
109068b8534bSLuigi Rizzo  *
109168b8534bSLuigi Rizzo  * Call nm_register(ifp,0) to stop netmap mode on the interface and
109268b8534bSLuigi Rizzo  * revert to normal operation. We expect that np_ifp has not gone.
1093ce3ee1e7SLuigi Rizzo  * The second argument is the nifp to work on. In some cases it is
1094ce3ee1e7SLuigi Rizzo  * not attached yet to the netmap_priv_d so we need to pass it as
1095ce3ee1e7SLuigi Rizzo  * a separate argument.
109668b8534bSLuigi Rizzo  */
1097ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
109868b8534bSLuigi Rizzo static void
1099ce3ee1e7SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
110068b8534bSLuigi Rizzo {
110168b8534bSLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
110268b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
110368b8534bSLuigi Rizzo 
1104ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
110568b8534bSLuigi Rizzo 	na->refcount--;
110668b8534bSLuigi Rizzo 	if (na->refcount <= 0) {	/* last instance */
1107ce3ee1e7SLuigi Rizzo 		u_int i;
110868b8534bSLuigi Rizzo 
1109ae10d1afSLuigi Rizzo 		if (netmap_verbose)
1110ae10d1afSLuigi Rizzo 			D("deleting last instance for %s", ifp->if_xname);
111168b8534bSLuigi Rizzo 		/*
1112f18be576SLuigi Rizzo 		 * (TO CHECK) This function is only called
1113f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
1114f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
1115f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
1116ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
1117ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
1118ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
1119ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
1120ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
1121ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
1122ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
1123ce3ee1e7SLuigi Rizzo 		 * syscall is running.
112468b8534bSLuigi Rizzo 		 */
112568b8534bSLuigi Rizzo 		na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
112668b8534bSLuigi Rizzo 		/* Wake up any sleeping threads. netmap_poll will
112768b8534bSLuigi Rizzo 		 * then return POLLERR
1128ce3ee1e7SLuigi Rizzo 		 * XXX The wake up now must happen during *_down(), when
1129ce3ee1e7SLuigi Rizzo 		 * we order all activities to stop. -gl
113068b8534bSLuigi Rizzo 		 */
1131f18be576SLuigi Rizzo 		nm_free_bdgfwd(na);
1132d76bf4ffSLuigi Rizzo 		for (i = 0; i < na->num_tx_rings + 1; i++) {
11332f70fca5SEd Maste 			mtx_destroy(&na->tx_rings[i].q_lock);
113464ae02c3SLuigi Rizzo 		}
1135d76bf4ffSLuigi Rizzo 		for (i = 0; i < na->num_rx_rings + 1; i++) {
11362f70fca5SEd Maste 			mtx_destroy(&na->rx_rings[i].q_lock);
113768b8534bSLuigi Rizzo 		}
11382f70fca5SEd Maste 		/* XXX kqueue(9) needed; these will mirror knlist_init. */
11392f70fca5SEd Maste 		/* knlist_destroy(&na->tx_si.si_note); */
11402f70fca5SEd Maste 		/* knlist_destroy(&na->rx_si.si_note); */
1141f18be576SLuigi Rizzo 		if (nma_is_hw(na))
1142f18be576SLuigi Rizzo 			SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL;
114368b8534bSLuigi Rizzo 	}
1144ce3ee1e7SLuigi Rizzo 	/*
1145ce3ee1e7SLuigi Rizzo 	 * netmap_mem_if_delete() deletes the nifp, and if this is
1146ce3ee1e7SLuigi Rizzo 	 * the last instance also buffers, rings and krings.
1147ce3ee1e7SLuigi Rizzo 	 */
1148ce3ee1e7SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
11495819da83SLuigi Rizzo }
115068b8534bSLuigi Rizzo 
1151f18be576SLuigi Rizzo 
1152ce3ee1e7SLuigi Rizzo /* we assume netmap adapter exists
1153ce3ee1e7SLuigi Rizzo  * Called with NMG_LOCK held
1154ce3ee1e7SLuigi Rizzo  */
1155f196ce38SLuigi Rizzo static void
1156f196ce38SLuigi Rizzo nm_if_rele(struct ifnet *ifp)
1157f196ce38SLuigi Rizzo {
1158ce3ee1e7SLuigi Rizzo 	int i, is_hw, hw, sw, lim;
1159f196ce38SLuigi Rizzo 	struct nm_bridge *b;
1160f18be576SLuigi Rizzo 	struct netmap_adapter *na;
1161ce3ee1e7SLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
1162f196ce38SLuigi Rizzo 
1163ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1164f18be576SLuigi Rizzo 	/* I can be called not only for get_ifp()-ed references where netmap's
1165f18be576SLuigi Rizzo 	 * capability is guaranteed, but also for non-netmap-capable NICs.
1166f18be576SLuigi Rizzo 	 */
1167f18be576SLuigi Rizzo 	if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) {
1168f196ce38SLuigi Rizzo 		if_rele(ifp);
1169f196ce38SLuigi Rizzo 		return;
1170f196ce38SLuigi Rizzo 	}
1171f18be576SLuigi Rizzo 	na = NA(ifp);
1172f18be576SLuigi Rizzo 	b = na->na_bdg;
1173f18be576SLuigi Rizzo 	is_hw = nma_is_hw(na);
1174f18be576SLuigi Rizzo 
1175ce3ee1e7SLuigi Rizzo 	ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount);
1176f18be576SLuigi Rizzo 
1177ce3ee1e7SLuigi Rizzo 	if (!DROP_BDG_REF(ifp))
1178ce3ee1e7SLuigi Rizzo 		return;
1179ce3ee1e7SLuigi Rizzo 
1180ce3ee1e7SLuigi Rizzo 	/*
1181ce3ee1e7SLuigi Rizzo 	New algorithm:
1182ce3ee1e7SLuigi Rizzo 	make a copy of bdg_port_index;
1183ce3ee1e7SLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
1184ce3ee1e7SLuigi Rizzo 	in the array of bdg_port_index, replacing them with
1185ce3ee1e7SLuigi Rizzo 	entries from the bottom of the array;
1186ce3ee1e7SLuigi Rizzo 	decrement bdg_active_ports;
1187ce3ee1e7SLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
1188ce3ee1e7SLuigi Rizzo 	 */
1189ce3ee1e7SLuigi Rizzo 
1190ce3ee1e7SLuigi Rizzo 	hw = NA(ifp)->bdg_port;
1191ce3ee1e7SLuigi Rizzo 	sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
1192ce3ee1e7SLuigi Rizzo 	lim = b->bdg_active_ports;
1193ce3ee1e7SLuigi Rizzo 
1194ce3ee1e7SLuigi Rizzo 	ND("detach %d and %d (lim %d)", hw, sw, lim);
1195ce3ee1e7SLuigi Rizzo 	/* make a copy of the list of active ports, update it,
1196ce3ee1e7SLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
1197ce3ee1e7SLuigi Rizzo 	 */
1198ce3ee1e7SLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
1199ce3ee1e7SLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
1200ce3ee1e7SLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
1201ce3ee1e7SLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
1202ce3ee1e7SLuigi Rizzo 			lim--; /* point to last active port */
1203ce3ee1e7SLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
1204ce3ee1e7SLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
1205ce3ee1e7SLuigi Rizzo 			hw = -1;
1206ce3ee1e7SLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
1207ce3ee1e7SLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
1208ce3ee1e7SLuigi Rizzo 			lim--;
1209ce3ee1e7SLuigi Rizzo 			tmp[i] = tmp[lim];
1210ce3ee1e7SLuigi Rizzo 			tmp[lim] = sw;
1211ce3ee1e7SLuigi Rizzo 			sw = -1;
1212ce3ee1e7SLuigi Rizzo 		} else {
1213ce3ee1e7SLuigi Rizzo 			i++;
1214ce3ee1e7SLuigi Rizzo 		}
1215ce3ee1e7SLuigi Rizzo 	}
1216ce3ee1e7SLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
1217ce3ee1e7SLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
1218ce3ee1e7SLuigi Rizzo 	}
1219ce3ee1e7SLuigi Rizzo 	hw = NA(ifp)->bdg_port;
1220ce3ee1e7SLuigi Rizzo 	sw = (is_hw && SWNA(ifp)->na_bdg) ?  SWNA(ifp)->bdg_port : -1;
1221ce3ee1e7SLuigi Rizzo 
1222ce3ee1e7SLuigi Rizzo 	BDG_WLOCK(b);
1223ce3ee1e7SLuigi Rizzo 	b->bdg_ports[hw] = NULL;
1224f18be576SLuigi Rizzo 	na->na_bdg = NULL;
1225ce3ee1e7SLuigi Rizzo 	if (sw >= 0) {
1226ce3ee1e7SLuigi Rizzo 		b->bdg_ports[sw] = NULL;
1227f18be576SLuigi Rizzo 		SWNA(ifp)->na_bdg = NULL;
1228f196ce38SLuigi Rizzo 	}
1229ce3ee1e7SLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
1230ce3ee1e7SLuigi Rizzo 	b->bdg_active_ports = lim;
1231f18be576SLuigi Rizzo 	BDG_WUNLOCK(b);
1232ce3ee1e7SLuigi Rizzo 
1233ce3ee1e7SLuigi Rizzo 	ND("now %d active ports", lim);
1234ce3ee1e7SLuigi Rizzo 	if (lim == 0) {
1235ce3ee1e7SLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
1236f18be576SLuigi Rizzo 		b->nm_bdg_lookup = NULL;
1237f18be576SLuigi Rizzo 	}
1238ce3ee1e7SLuigi Rizzo 
1239ce3ee1e7SLuigi Rizzo 	if (is_hw) {
1240f18be576SLuigi Rizzo 		if_rele(ifp);
1241f18be576SLuigi Rizzo 	} else {
1242ce3ee1e7SLuigi Rizzo 		if (na->na_flags & NAF_MEM_OWNER)
1243ce3ee1e7SLuigi Rizzo 			netmap_mem_private_delete(na->nm_mem);
1244f18be576SLuigi Rizzo 		bzero(na, sizeof(*na));
1245f18be576SLuigi Rizzo 		free(na, M_DEVBUF);
1246f18be576SLuigi Rizzo 		bzero(ifp, sizeof(*ifp));
1247f18be576SLuigi Rizzo 		free(ifp, M_DEVBUF);
1248f18be576SLuigi Rizzo 	}
1249ce3ee1e7SLuigi Rizzo }
1250ce3ee1e7SLuigi Rizzo 
1251ce3ee1e7SLuigi Rizzo 
1252ce3ee1e7SLuigi Rizzo /*
1253ce3ee1e7SLuigi Rizzo  * returns 1 if this is the last instance and we can free priv
1254ce3ee1e7SLuigi Rizzo  */
1255ce3ee1e7SLuigi Rizzo static int
1256ce3ee1e7SLuigi Rizzo netmap_dtor_locked(struct netmap_priv_d *priv)
1257ce3ee1e7SLuigi Rizzo {
1258ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
1259ce3ee1e7SLuigi Rizzo 
1260ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__
1261ce3ee1e7SLuigi Rizzo 	/*
1262ce3ee1e7SLuigi Rizzo 	 * np_refcount is the number of active mmaps on
1263ce3ee1e7SLuigi Rizzo 	 * this file descriptor
1264ce3ee1e7SLuigi Rizzo 	 */
1265ce3ee1e7SLuigi Rizzo 	if (--priv->np_refcount > 0) {
1266ce3ee1e7SLuigi Rizzo 		return 0;
1267ce3ee1e7SLuigi Rizzo 	}
1268ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */
1269ce3ee1e7SLuigi Rizzo 	if (ifp) {
1270ce3ee1e7SLuigi Rizzo 		netmap_do_unregif(priv, priv->np_nifp);
1271ce3ee1e7SLuigi Rizzo 	}
1272ce3ee1e7SLuigi Rizzo 	netmap_drop_memory_locked(priv);
1273ce3ee1e7SLuigi Rizzo 	if (ifp) {
1274ce3ee1e7SLuigi Rizzo 		nm_if_rele(ifp); /* might also destroy *na */
1275ce3ee1e7SLuigi Rizzo 	}
1276ce3ee1e7SLuigi Rizzo 	return 1;
1277f196ce38SLuigi Rizzo }
12785819da83SLuigi Rizzo 
12795819da83SLuigi Rizzo static void
12805819da83SLuigi Rizzo netmap_dtor(void *data)
12815819da83SLuigi Rizzo {
12825819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
1283ce3ee1e7SLuigi Rizzo 	int last_instance;
12845819da83SLuigi Rizzo 
1285ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1286ce3ee1e7SLuigi Rizzo 	last_instance = netmap_dtor_locked(priv);
1287ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1288ce3ee1e7SLuigi Rizzo 	if (last_instance) {
1289ce3ee1e7SLuigi Rizzo 		bzero(priv, sizeof(*priv));	/* for safety */
129068b8534bSLuigi Rizzo 		free(priv, M_DEVBUF);
129168b8534bSLuigi Rizzo 	}
1292ce3ee1e7SLuigi Rizzo }
129368b8534bSLuigi Rizzo 
1294f18be576SLuigi Rizzo 
12958241616dSLuigi Rizzo #ifdef __FreeBSD__
12968241616dSLuigi Rizzo 
1297f18be576SLuigi Rizzo /*
1298f18be576SLuigi Rizzo  * In order to track whether pages are still mapped, we hook into
1299f18be576SLuigi Rizzo  * the standard cdev_pager and intercept the constructor and
1300f18be576SLuigi Rizzo  * destructor.
1301f18be576SLuigi Rizzo  */
13028241616dSLuigi Rizzo 
1303ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t {
1304ce3ee1e7SLuigi Rizzo 	struct cdev 		*dev;
1305ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d	*priv;
1306ce3ee1e7SLuigi Rizzo };
1307f18be576SLuigi Rizzo 
13088241616dSLuigi Rizzo static int
13098241616dSLuigi Rizzo netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
13108241616dSLuigi Rizzo     vm_ooffset_t foff, struct ucred *cred, u_short *color)
13118241616dSLuigi Rizzo {
1312ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = handle;
1313ce3ee1e7SLuigi Rizzo 	D("handle %p size %jd prot %d foff %jd",
1314ce3ee1e7SLuigi Rizzo 		handle, (intmax_t)size, prot, (intmax_t)foff);
1315ce3ee1e7SLuigi Rizzo 	dev_ref(vmh->dev);
1316ce3ee1e7SLuigi Rizzo 	return 0;
13178241616dSLuigi Rizzo }
13188241616dSLuigi Rizzo 
1319f18be576SLuigi Rizzo 
13208241616dSLuigi Rizzo static void
13218241616dSLuigi Rizzo netmap_dev_pager_dtor(void *handle)
13228241616dSLuigi Rizzo {
1323ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = handle;
1324ce3ee1e7SLuigi Rizzo 	struct cdev *dev = vmh->dev;
1325ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = vmh->priv;
1326ce3ee1e7SLuigi Rizzo 	D("handle %p", handle);
1327ce3ee1e7SLuigi Rizzo 	netmap_dtor(priv);
1328ce3ee1e7SLuigi Rizzo 	free(vmh, M_DEVBUF);
1329ce3ee1e7SLuigi Rizzo 	dev_rel(dev);
1330ce3ee1e7SLuigi Rizzo }
1331ce3ee1e7SLuigi Rizzo 
1332ce3ee1e7SLuigi Rizzo static int
1333ce3ee1e7SLuigi Rizzo netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
1334ce3ee1e7SLuigi Rizzo 	int prot, vm_page_t *mres)
1335ce3ee1e7SLuigi Rizzo {
1336ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = object->handle;
1337ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = vmh->priv;
1338ce3ee1e7SLuigi Rizzo 	vm_paddr_t paddr;
1339ce3ee1e7SLuigi Rizzo 	vm_page_t page;
1340ce3ee1e7SLuigi Rizzo 	vm_memattr_t memattr;
1341ce3ee1e7SLuigi Rizzo 	vm_pindex_t pidx;
1342ce3ee1e7SLuigi Rizzo 
1343ce3ee1e7SLuigi Rizzo 	ND("object %p offset %jd prot %d mres %p",
1344ce3ee1e7SLuigi Rizzo 			object, (intmax_t)offset, prot, mres);
1345ce3ee1e7SLuigi Rizzo 	memattr = object->memattr;
1346ce3ee1e7SLuigi Rizzo 	pidx = OFF_TO_IDX(offset);
1347ce3ee1e7SLuigi Rizzo 	paddr = netmap_mem_ofstophys(priv->np_mref, offset);
1348ce3ee1e7SLuigi Rizzo 	if (paddr == 0)
1349ce3ee1e7SLuigi Rizzo 		return VM_PAGER_FAIL;
1350ce3ee1e7SLuigi Rizzo 
1351ce3ee1e7SLuigi Rizzo 	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
1352ce3ee1e7SLuigi Rizzo 		/*
1353ce3ee1e7SLuigi Rizzo 		 * If the passed in result page is a fake page, update it with
1354ce3ee1e7SLuigi Rizzo 		 * the new physical address.
1355ce3ee1e7SLuigi Rizzo 		 */
1356ce3ee1e7SLuigi Rizzo 		page = *mres;
1357ce3ee1e7SLuigi Rizzo 		vm_page_updatefake(page, paddr, memattr);
1358ce3ee1e7SLuigi Rizzo 	} else {
1359ce3ee1e7SLuigi Rizzo 		/*
1360ce3ee1e7SLuigi Rizzo 		 * Replace the passed in reqpage page with our own fake page and
1361ce3ee1e7SLuigi Rizzo 		 * free up the all of the original pages.
1362ce3ee1e7SLuigi Rizzo 		 */
1363ce3ee1e7SLuigi Rizzo #ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
1364ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
1365ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
1366ce3ee1e7SLuigi Rizzo #endif /* VM_OBJECT_WUNLOCK */
1367ce3ee1e7SLuigi Rizzo 
1368ce3ee1e7SLuigi Rizzo 		VM_OBJECT_WUNLOCK(object);
1369ce3ee1e7SLuigi Rizzo 		page = vm_page_getfake(paddr, memattr);
1370ce3ee1e7SLuigi Rizzo 		VM_OBJECT_WLOCK(object);
1371ce3ee1e7SLuigi Rizzo 		vm_page_lock(*mres);
1372ce3ee1e7SLuigi Rizzo 		vm_page_free(*mres);
1373ce3ee1e7SLuigi Rizzo 		vm_page_unlock(*mres);
1374ce3ee1e7SLuigi Rizzo 		*mres = page;
1375ce3ee1e7SLuigi Rizzo 		vm_page_insert(page, object, pidx);
1376ce3ee1e7SLuigi Rizzo 	}
1377ce3ee1e7SLuigi Rizzo 	page->valid = VM_PAGE_BITS_ALL;
1378ce3ee1e7SLuigi Rizzo 	return (VM_PAGER_OK);
13798241616dSLuigi Rizzo }
13808241616dSLuigi Rizzo 
13818241616dSLuigi Rizzo 
13828241616dSLuigi Rizzo static struct cdev_pager_ops netmap_cdev_pager_ops = {
13838241616dSLuigi Rizzo         .cdev_pg_ctor = netmap_dev_pager_ctor,
13848241616dSLuigi Rizzo         .cdev_pg_dtor = netmap_dev_pager_dtor,
1385ce3ee1e7SLuigi Rizzo         .cdev_pg_fault = netmap_dev_pager_fault,
13868241616dSLuigi Rizzo };
13878241616dSLuigi Rizzo 
1388f18be576SLuigi Rizzo 
13898241616dSLuigi Rizzo static int
13908241616dSLuigi Rizzo netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
13918241616dSLuigi Rizzo 	vm_size_t objsize,  vm_object_t *objp, int prot)
13928241616dSLuigi Rizzo {
1393ce3ee1e7SLuigi Rizzo 	int error;
1394ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh;
1395ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv;
13968241616dSLuigi Rizzo 	vm_object_t obj;
13978241616dSLuigi Rizzo 
1398ce3ee1e7SLuigi Rizzo 	D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
139988f79057SGleb Smirnoff 	    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
14008241616dSLuigi Rizzo 
1401ce3ee1e7SLuigi Rizzo 	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
1402ce3ee1e7SLuigi Rizzo 			      M_NOWAIT | M_ZERO);
1403ce3ee1e7SLuigi Rizzo 	if (vmh == NULL)
1404ce3ee1e7SLuigi Rizzo 		return ENOMEM;
1405ce3ee1e7SLuigi Rizzo 	vmh->dev = cdev;
140668b8534bSLuigi Rizzo 
1407ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
14088241616dSLuigi Rizzo 	error = devfs_get_cdevpriv((void**)&priv);
1409ce3ee1e7SLuigi Rizzo 	if (error)
1410ce3ee1e7SLuigi Rizzo 		goto err_unlock;
1411ce3ee1e7SLuigi Rizzo 	vmh->priv = priv;
1412ce3ee1e7SLuigi Rizzo 	priv->np_refcount++;
1413ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1414ce3ee1e7SLuigi Rizzo 
14158241616dSLuigi Rizzo 	error = netmap_get_memory(priv);
14168241616dSLuigi Rizzo 	if (error)
1417ce3ee1e7SLuigi Rizzo 		goto err_deref;
14188241616dSLuigi Rizzo 
1419ce3ee1e7SLuigi Rizzo 	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
1420ce3ee1e7SLuigi Rizzo 		&netmap_cdev_pager_ops, objsize, prot,
1421ce3ee1e7SLuigi Rizzo 		*foff, NULL);
1422ce3ee1e7SLuigi Rizzo 	if (obj == NULL) {
1423ce3ee1e7SLuigi Rizzo 		D("cdev_pager_allocate failed");
1424ce3ee1e7SLuigi Rizzo 		error = EINVAL;
1425ce3ee1e7SLuigi Rizzo 		goto err_deref;
1426ce3ee1e7SLuigi Rizzo 	}
142768b8534bSLuigi Rizzo 
1428ce3ee1e7SLuigi Rizzo 	*objp = obj;
1429ce3ee1e7SLuigi Rizzo 	return 0;
1430ce3ee1e7SLuigi Rizzo 
1431ce3ee1e7SLuigi Rizzo err_deref:
1432ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1433ce3ee1e7SLuigi Rizzo 	priv->np_refcount--;
1434ce3ee1e7SLuigi Rizzo err_unlock:
1435ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1436ce3ee1e7SLuigi Rizzo // err:
1437ce3ee1e7SLuigi Rizzo 	free(vmh, M_DEVBUF);
1438ce3ee1e7SLuigi Rizzo 	return error;
14398241616dSLuigi Rizzo }
14408241616dSLuigi Rizzo 
1441f18be576SLuigi Rizzo 
1442ce3ee1e7SLuigi Rizzo // XXX can we remove this ?
14438241616dSLuigi Rizzo static int
14448241616dSLuigi Rizzo netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
14458241616dSLuigi Rizzo {
1446ae10d1afSLuigi Rizzo 	if (netmap_verbose)
1447ae10d1afSLuigi Rizzo 		D("dev %p fflag 0x%x devtype %d td %p",
1448ae10d1afSLuigi Rizzo 			dev, fflag, devtype, td);
14498241616dSLuigi Rizzo 	return 0;
14508241616dSLuigi Rizzo }
14518241616dSLuigi Rizzo 
1452f18be576SLuigi Rizzo 
14538241616dSLuigi Rizzo static int
14548241616dSLuigi Rizzo netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
14558241616dSLuigi Rizzo {
14568241616dSLuigi Rizzo 	struct netmap_priv_d *priv;
14578241616dSLuigi Rizzo 	int error;
14588241616dSLuigi Rizzo 
1459ce3ee1e7SLuigi Rizzo 	(void)dev;
1460ce3ee1e7SLuigi Rizzo 	(void)oflags;
1461ce3ee1e7SLuigi Rizzo 	(void)devtype;
1462ce3ee1e7SLuigi Rizzo 	(void)td;
1463ce3ee1e7SLuigi Rizzo 
1464ce3ee1e7SLuigi Rizzo 	// XXX wait or nowait ?
14658241616dSLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
14668241616dSLuigi Rizzo 			      M_NOWAIT | M_ZERO);
14678241616dSLuigi Rizzo 	if (priv == NULL)
14688241616dSLuigi Rizzo 		return ENOMEM;
14698241616dSLuigi Rizzo 
14708241616dSLuigi Rizzo 	error = devfs_set_cdevpriv(priv, netmap_dtor);
14718241616dSLuigi Rizzo 	if (error)
14728241616dSLuigi Rizzo 	        return error;
14738241616dSLuigi Rizzo 
1474ce3ee1e7SLuigi Rizzo 	priv->np_refcount = 1;
1475ce3ee1e7SLuigi Rizzo 
14768241616dSLuigi Rizzo 	return 0;
147768b8534bSLuigi Rizzo }
1478f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */
147968b8534bSLuigi Rizzo 
148068b8534bSLuigi Rizzo 
148168b8534bSLuigi Rizzo /*
148202ad4083SLuigi Rizzo  * Handlers for synchronization of the queues from/to the host.
1483091fd0abSLuigi Rizzo  * Netmap has two operating modes:
1484091fd0abSLuigi Rizzo  * - in the default mode, the rings connected to the host stack are
1485091fd0abSLuigi Rizzo  *   just another ring pair managed by userspace;
1486091fd0abSLuigi Rizzo  * - in transparent mode (XXX to be defined) incoming packets
1487091fd0abSLuigi Rizzo  *   (from the host or the NIC) are marked as NS_FORWARD upon
1488091fd0abSLuigi Rizzo  *   arrival, and the user application has a chance to reset the
1489091fd0abSLuigi Rizzo  *   flag for packets that should be dropped.
1490091fd0abSLuigi Rizzo  *   On the RXSYNC or poll(), packets in RX rings between
1491091fd0abSLuigi Rizzo  *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1492091fd0abSLuigi Rizzo  *   to the other side.
1493091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1494091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1495091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1496091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1497091fd0abSLuigi Rizzo  */
1498091fd0abSLuigi Rizzo 
1499f18be576SLuigi Rizzo 
1500091fd0abSLuigi Rizzo /*
1501091fd0abSLuigi Rizzo  * pass a chain of buffers to the host stack as coming from 'dst'
1502091fd0abSLuigi Rizzo  */
1503091fd0abSLuigi Rizzo static void
1504091fd0abSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbuf *head)
1505091fd0abSLuigi Rizzo {
1506091fd0abSLuigi Rizzo 	struct mbuf *m;
1507091fd0abSLuigi Rizzo 
1508091fd0abSLuigi Rizzo 	/* send packets up, outside the lock */
1509091fd0abSLuigi Rizzo 	while ((m = head) != NULL) {
1510091fd0abSLuigi Rizzo 		head = head->m_nextpkt;
1511091fd0abSLuigi Rizzo 		m->m_nextpkt = NULL;
1512091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1513091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
1514091fd0abSLuigi Rizzo 		NM_SEND_UP(dst, m);
1515091fd0abSLuigi Rizzo 	}
1516091fd0abSLuigi Rizzo }
1517091fd0abSLuigi Rizzo 
1518091fd0abSLuigi Rizzo struct mbq {
1519091fd0abSLuigi Rizzo 	struct mbuf *head;
1520091fd0abSLuigi Rizzo 	struct mbuf *tail;
1521091fd0abSLuigi Rizzo 	int count;
1522091fd0abSLuigi Rizzo };
1523091fd0abSLuigi Rizzo 
1524f18be576SLuigi Rizzo 
1525091fd0abSLuigi Rizzo /*
1526091fd0abSLuigi Rizzo  * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
1527091fd0abSLuigi Rizzo  * Run from hwcur to cur - reserved
1528091fd0abSLuigi Rizzo  */
1529091fd0abSLuigi Rizzo static void
1530091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1531091fd0abSLuigi Rizzo {
1532091fd0abSLuigi Rizzo 	/* Take packets from hwcur to cur-reserved and pass them up.
1533091fd0abSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
1534091fd0abSLuigi Rizzo 	 * the queue is drained in all cases.
1535091fd0abSLuigi Rizzo 	 * XXX handle reserved
1536091fd0abSLuigi Rizzo 	 */
1537ce3ee1e7SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
1538091fd0abSLuigi Rizzo 	struct mbuf *m, *tail = q->tail;
1539ce3ee1e7SLuigi Rizzo 	u_int k = kring->ring->cur, n = kring->ring->reserved;
1540ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *nmd = kring->na->nm_mem;
1541091fd0abSLuigi Rizzo 
1542ce3ee1e7SLuigi Rizzo 	/* compute the final position, ring->cur - ring->reserved */
1543ce3ee1e7SLuigi Rizzo 	if (n > 0) {
1544ce3ee1e7SLuigi Rizzo 		if (k < n)
1545ce3ee1e7SLuigi Rizzo 			k += kring->nkr_num_slots;
1546ce3ee1e7SLuigi Rizzo 		k += n;
1547ce3ee1e7SLuigi Rizzo 	}
1548091fd0abSLuigi Rizzo 	for (n = kring->nr_hwcur; n != k;) {
1549091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1550091fd0abSLuigi Rizzo 
1551ce3ee1e7SLuigi Rizzo 		n = nm_next(n, lim);
1552091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1553091fd0abSLuigi Rizzo 			continue;
1554ce3ee1e7SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) {
1555091fd0abSLuigi Rizzo 			D("bad pkt at %d len %d", n, slot->len);
1556091fd0abSLuigi Rizzo 			continue;
1557091fd0abSLuigi Rizzo 		}
1558091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
1559ce3ee1e7SLuigi Rizzo 		/* XXX adapt to the case of a multisegment packet */
1560ce3ee1e7SLuigi Rizzo 		m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL);
1561091fd0abSLuigi Rizzo 
1562091fd0abSLuigi Rizzo 		if (m == NULL)
1563091fd0abSLuigi Rizzo 			break;
1564091fd0abSLuigi Rizzo 		if (tail)
1565091fd0abSLuigi Rizzo 			tail->m_nextpkt = m;
1566091fd0abSLuigi Rizzo 		else
1567091fd0abSLuigi Rizzo 			q->head = m;
1568091fd0abSLuigi Rizzo 		tail = m;
1569091fd0abSLuigi Rizzo 		q->count++;
1570091fd0abSLuigi Rizzo 		m->m_nextpkt = NULL;
1571091fd0abSLuigi Rizzo 	}
1572091fd0abSLuigi Rizzo 	q->tail = tail;
1573091fd0abSLuigi Rizzo }
1574091fd0abSLuigi Rizzo 
1575f18be576SLuigi Rizzo 
1576091fd0abSLuigi Rizzo /*
1577091fd0abSLuigi Rizzo  * The host ring has packets from nr_hwcur to (cur - reserved)
1578ce3ee1e7SLuigi Rizzo  * to be sent down to the NIC.
1579ce3ee1e7SLuigi Rizzo  * We need to use the queue lock on the source (host RX ring)
1580ce3ee1e7SLuigi Rizzo  * to protect against netmap_transmit.
1581ce3ee1e7SLuigi Rizzo  * If the user is well behaved we do not need to acquire locks
1582ce3ee1e7SLuigi Rizzo  * on the destination(s),
1583ce3ee1e7SLuigi Rizzo  * so we only need to make sure that there are no panics because
1584ce3ee1e7SLuigi Rizzo  * of user errors.
1585ce3ee1e7SLuigi Rizzo  * XXX verify
1586ce3ee1e7SLuigi Rizzo  *
1587ce3ee1e7SLuigi Rizzo  * We scan the tx rings, which have just been
1588091fd0abSLuigi Rizzo  * flushed so nr_hwcur == cur. Pushing packets down means
1589091fd0abSLuigi Rizzo  * increment cur and decrement avail.
1590091fd0abSLuigi Rizzo  * XXX to be verified
1591091fd0abSLuigi Rizzo  */
1592091fd0abSLuigi Rizzo static void
1593091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1594091fd0abSLuigi Rizzo {
1595091fd0abSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1596091fd0abSLuigi Rizzo 	struct netmap_kring *k1 = &na->tx_rings[0];
1597ce3ee1e7SLuigi Rizzo 	u_int i, howmany, src_lim, dst_lim;
1598ce3ee1e7SLuigi Rizzo 
1599ce3ee1e7SLuigi Rizzo 	/* XXX we should also check that the carrier is on */
1600ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)
1601ce3ee1e7SLuigi Rizzo 		return;
1602ce3ee1e7SLuigi Rizzo 
1603ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
1604ce3ee1e7SLuigi Rizzo 
1605ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)
1606ce3ee1e7SLuigi Rizzo 		goto out;
1607091fd0abSLuigi Rizzo 
1608091fd0abSLuigi Rizzo 	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
1609091fd0abSLuigi Rizzo 
1610ce3ee1e7SLuigi Rizzo 	src_lim = kring->nkr_num_slots - 1;
1611091fd0abSLuigi Rizzo 	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
1612091fd0abSLuigi Rizzo 		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
1613ce3ee1e7SLuigi Rizzo 		dst_lim = k1->nkr_num_slots - 1;
1614091fd0abSLuigi Rizzo 		while (howmany > 0 && k1->ring->avail > 0) {
1615091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
1616091fd0abSLuigi Rizzo 			src = &kring->ring->slot[kring->nr_hwcur];
1617091fd0abSLuigi Rizzo 			dst = &k1->ring->slot[k1->ring->cur];
1618091fd0abSLuigi Rizzo 			tmp = *src;
1619091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1620091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1621091fd0abSLuigi Rizzo 
1622091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1623091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1624091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1625091fd0abSLuigi Rizzo 			ND("out len %d buf %d from %d to %d",
1626091fd0abSLuigi Rizzo 				dst->len, dst->buf_idx,
1627091fd0abSLuigi Rizzo 				kring->nr_hwcur, k1->ring->cur);
1628091fd0abSLuigi Rizzo 
1629ce3ee1e7SLuigi Rizzo 			kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim);
1630091fd0abSLuigi Rizzo 			howmany--;
1631091fd0abSLuigi Rizzo 			kring->nr_hwavail--;
1632ce3ee1e7SLuigi Rizzo 			k1->ring->cur = nm_next(k1->ring->cur, dst_lim);
1633091fd0abSLuigi Rizzo 			k1->ring->avail--;
1634091fd0abSLuigi Rizzo 		}
1635091fd0abSLuigi Rizzo 		kring->ring->cur = kring->nr_hwcur; // XXX
1636ce3ee1e7SLuigi Rizzo 		k1++; // XXX why?
1637091fd0abSLuigi Rizzo 	}
1638ce3ee1e7SLuigi Rizzo out:
1639ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1640091fd0abSLuigi Rizzo }
1641091fd0abSLuigi Rizzo 
1642f18be576SLuigi Rizzo 
1643091fd0abSLuigi Rizzo /*
1644ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
164502ad4083SLuigi Rizzo  * system call in user process context, and the only contention
164602ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1647091fd0abSLuigi Rizzo  * this routine concurrently.
164868b8534bSLuigi Rizzo  */
164968b8534bSLuigi Rizzo static void
1650ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(struct netmap_adapter *na)
165168b8534bSLuigi Rizzo {
1652d76bf4ffSLuigi Rizzo 	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
165368b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1654091fd0abSLuigi Rizzo 	u_int k, lim = kring->nkr_num_slots - 1;
1655ce3ee1e7SLuigi Rizzo 	struct mbq q = { NULL, NULL, 0 };
165668b8534bSLuigi Rizzo 
1657ce3ee1e7SLuigi Rizzo 	if (nm_kr_tryget(kring)) {
1658ce3ee1e7SLuigi Rizzo 		D("ring %p busy (user error)", kring);
165902ad4083SLuigi Rizzo 		return;
166002ad4083SLuigi Rizzo 	}
1661ce3ee1e7SLuigi Rizzo 	k = ring->cur;
1662ce3ee1e7SLuigi Rizzo 	if (k > lim) {
1663ce3ee1e7SLuigi Rizzo 		D("invalid ring index in stack TX kring %p", kring);
1664ce3ee1e7SLuigi Rizzo 		netmap_ring_reinit(kring);
1665ce3ee1e7SLuigi Rizzo 		nm_kr_put(kring);
1666ce3ee1e7SLuigi Rizzo 		return;
1667ce3ee1e7SLuigi Rizzo 	}
166868b8534bSLuigi Rizzo 
166968b8534bSLuigi Rizzo 	/* Take packets from hwcur to cur and pass them up.
167068b8534bSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
167168b8534bSLuigi Rizzo 	 * the queue is drained in all cases.
167268b8534bSLuigi Rizzo 	 */
1673091fd0abSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1);
167402ad4083SLuigi Rizzo 	kring->nr_hwcur = k;
167568b8534bSLuigi Rizzo 	kring->nr_hwavail = ring->avail = lim;
167668b8534bSLuigi Rizzo 
1677ce3ee1e7SLuigi Rizzo 	nm_kr_put(kring);
1678091fd0abSLuigi Rizzo 	netmap_send_up(na->ifp, q.head);
167968b8534bSLuigi Rizzo }
168068b8534bSLuigi Rizzo 
1681f18be576SLuigi Rizzo 
1682ce3ee1e7SLuigi Rizzo /*
1683ce3ee1e7SLuigi Rizzo  * This is the 'txsync' handler to send from a software ring to the
1684ce3ee1e7SLuigi Rizzo  * host stack.
1685ce3ee1e7SLuigi Rizzo  */
1686f18be576SLuigi Rizzo /* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */
1687f18be576SLuigi Rizzo static int
1688ce3ee1e7SLuigi Rizzo netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags)
1689f18be576SLuigi Rizzo {
1690f18be576SLuigi Rizzo 	(void)ring_nr;
1691ce3ee1e7SLuigi Rizzo 	(void)flags;
1692ce3ee1e7SLuigi Rizzo 	if (netmap_verbose > 255)
1693ce3ee1e7SLuigi Rizzo 		RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr);
1694ce3ee1e7SLuigi Rizzo 	netmap_txsync_to_host(NA(ifp));
1695f18be576SLuigi Rizzo 	return 0;
1696f18be576SLuigi Rizzo }
1697f18be576SLuigi Rizzo 
1698f18be576SLuigi Rizzo 
169968b8534bSLuigi Rizzo /*
170002ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
1701ce3ee1e7SLuigi Rizzo  * They have been put in the queue by netmap_transmit() so we
170202ad4083SLuigi Rizzo  * need to protect access to the kring using a lock.
170302ad4083SLuigi Rizzo  *
170468b8534bSLuigi Rizzo  * This routine also does the selrecord if called from the poll handler
170568b8534bSLuigi Rizzo  * (we know because td != NULL).
170601c7d25fSLuigi Rizzo  *
170701c7d25fSLuigi Rizzo  * NOTE: on linux, selrecord() is defined as a macro and uses pwait
170801c7d25fSLuigi Rizzo  *     as an additional hidden argument.
170968b8534bSLuigi Rizzo  */
171068b8534bSLuigi Rizzo static void
1711ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
171268b8534bSLuigi Rizzo {
1713d76bf4ffSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
171468b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
171564ae02c3SLuigi Rizzo 	u_int j, n, lim = kring->nkr_num_slots;
171664ae02c3SLuigi Rizzo 	u_int k = ring->cur, resvd = ring->reserved;
171768b8534bSLuigi Rizzo 
171801c7d25fSLuigi Rizzo 	(void)pwait;	/* disable unused warnings */
1719ce3ee1e7SLuigi Rizzo 
1720ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped) /* check a first time without lock */
1721ce3ee1e7SLuigi Rizzo 		return;
1722ce3ee1e7SLuigi Rizzo 
1723ce3ee1e7SLuigi Rizzo 	/* XXX as an optimization we could reuse na->core_lock */
1724ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
1725ce3ee1e7SLuigi Rizzo 
1726ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)  /* check again with lock held */
1727ce3ee1e7SLuigi Rizzo 		goto unlock_out;
1728ce3ee1e7SLuigi Rizzo 
172964ae02c3SLuigi Rizzo 	if (k >= lim) {
173064ae02c3SLuigi Rizzo 		netmap_ring_reinit(kring);
1731ce3ee1e7SLuigi Rizzo 		goto unlock_out;
173264ae02c3SLuigi Rizzo 	}
173364ae02c3SLuigi Rizzo 	/* new packets are already set in nr_hwavail */
173464ae02c3SLuigi Rizzo 	/* skip past packets that userspace has released */
173564ae02c3SLuigi Rizzo 	j = kring->nr_hwcur;
173664ae02c3SLuigi Rizzo 	if (resvd > 0) {
173764ae02c3SLuigi Rizzo 		if (resvd + ring->avail >= lim + 1) {
173864ae02c3SLuigi Rizzo 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
173964ae02c3SLuigi Rizzo 			ring->reserved = resvd = 0; // XXX panic...
174064ae02c3SLuigi Rizzo 		}
174164ae02c3SLuigi Rizzo 		k = (k >= resvd) ? k - resvd : k + lim - resvd;
174264ae02c3SLuigi Rizzo         }
174364ae02c3SLuigi Rizzo 	if (j != k) {
174464ae02c3SLuigi Rizzo 		n = k >= j ? k - j : k + lim - j;
174564ae02c3SLuigi Rizzo 		kring->nr_hwavail -= n;
174602ad4083SLuigi Rizzo 		kring->nr_hwcur = k;
174764ae02c3SLuigi Rizzo 	}
174864ae02c3SLuigi Rizzo 	k = ring->avail = kring->nr_hwavail - resvd;
174902ad4083SLuigi Rizzo 	if (k == 0 && td)
175068b8534bSLuigi Rizzo 		selrecord(td, &kring->si);
175102ad4083SLuigi Rizzo 	if (k && (netmap_verbose & NM_VERB_HOST))
175202ad4083SLuigi Rizzo 		D("%d pkts from stack", k);
1753ce3ee1e7SLuigi Rizzo unlock_out:
1754ce3ee1e7SLuigi Rizzo 
1755ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
175668b8534bSLuigi Rizzo }
175768b8534bSLuigi Rizzo 
175868b8534bSLuigi Rizzo 
175968b8534bSLuigi Rizzo /*
1760ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1761ce3ee1e7SLuigi Rizzo  *
176268b8534bSLuigi Rizzo  * get a refcounted reference to an interface.
1763ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1764ce3ee1e7SLuigi Rizzo  *
176568b8534bSLuigi Rizzo  * Return ENXIO if the interface does not exist, EINVAL if netmap
176668b8534bSLuigi Rizzo  * is not supported by the interface.
176768b8534bSLuigi Rizzo  * If successful, hold a reference.
1768f18be576SLuigi Rizzo  *
1769ce3ee1e7SLuigi Rizzo  * When the NIC is attached to a bridge, reference is managed
1770f18be576SLuigi Rizzo  * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as
1771f18be576SLuigi Rizzo  * virtual ports.  Hence, on the final DROP_BDG_REF(), the NIC
1772f18be576SLuigi Rizzo  * is detached from the bridge, then ifp's refcount is dropped (this
1773f18be576SLuigi Rizzo  * is equivalent to that ifp is destroyed in case of virtual ports.
1774f18be576SLuigi Rizzo  *
1775f18be576SLuigi Rizzo  * This function uses if_rele() when we want to prevent the NIC from
1776f18be576SLuigi Rizzo  * being detached from the bridge in error handling.  But once refcount
1777f18be576SLuigi Rizzo  * is acquired by this function, it must be released using nm_if_rele().
177868b8534bSLuigi Rizzo  */
177968b8534bSLuigi Rizzo static int
1780ce3ee1e7SLuigi Rizzo get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create)
178168b8534bSLuigi Rizzo {
1782f18be576SLuigi Rizzo 	const char *name = nmr->nr_name;
1783f18be576SLuigi Rizzo 	int namelen = strlen(name);
1784f196ce38SLuigi Rizzo 	struct ifnet *iter = NULL;
1785f18be576SLuigi Rizzo 	int no_prefix = 0;
1786f196ce38SLuigi Rizzo 
1787ce3ee1e7SLuigi Rizzo 	/* first try to see if this is a bridge port. */
1788f196ce38SLuigi Rizzo 	struct nm_bridge *b;
1789f18be576SLuigi Rizzo 	struct netmap_adapter *na;
1790ce3ee1e7SLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
1791ce3ee1e7SLuigi Rizzo 	int needed;
1792f196ce38SLuigi Rizzo 
1793ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1794ce3ee1e7SLuigi Rizzo 	*ifp = NULL;	/* default */
1795f18be576SLuigi Rizzo 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
1796ce3ee1e7SLuigi Rizzo 		no_prefix = 1;	/* no VALE prefix */
1797ce3ee1e7SLuigi Rizzo 		goto no_bridge_port;
1798f18be576SLuigi Rizzo 	}
1799ce3ee1e7SLuigi Rizzo 
1800ce3ee1e7SLuigi Rizzo 	b = nm_find_bridge(name, create);
1801f196ce38SLuigi Rizzo 	if (b == NULL) {
1802f196ce38SLuigi Rizzo 		D("no bridges available for '%s'", name);
1803f196ce38SLuigi Rizzo 		return (ENXIO);
1804f196ce38SLuigi Rizzo 	}
1805ce3ee1e7SLuigi Rizzo 
1806ce3ee1e7SLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
1807ce3ee1e7SLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
1808ce3ee1e7SLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
1809ce3ee1e7SLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
1810ce3ee1e7SLuigi Rizzo 	 * that there are no other possible writers.
1811ce3ee1e7SLuigi Rizzo 	 */
1812ce3ee1e7SLuigi Rizzo 
1813f196ce38SLuigi Rizzo 	/* lookup in the local list of ports */
1814ce3ee1e7SLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
1815ce3ee1e7SLuigi Rizzo 		i = b->bdg_port_index[j];
1816ce3ee1e7SLuigi Rizzo 		na = b->bdg_ports[i];
1817ce3ee1e7SLuigi Rizzo 		// KASSERT(na != NULL);
1818f18be576SLuigi Rizzo 		iter = na->ifp;
1819f18be576SLuigi Rizzo 		/* XXX make sure the name only contains one : */
1820f18be576SLuigi Rizzo 		if (!strcmp(iter->if_xname, name) /* virtual port */ ||
1821ce3ee1e7SLuigi Rizzo 		    (namelen > b->bdg_namelen && !strcmp(iter->if_xname,
1822ce3ee1e7SLuigi Rizzo 		    name + b->bdg_namelen + 1)) /* NIC */) {
1823f196ce38SLuigi Rizzo 			ADD_BDG_REF(iter);
1824ce3ee1e7SLuigi Rizzo 			ND("found existing if %s refs %d", name,
1825ce3ee1e7SLuigi Rizzo 				NA(iter)->na_bdg_refcount);
1826ce3ee1e7SLuigi Rizzo 			*ifp = iter;
1827ce3ee1e7SLuigi Rizzo 			/* we are done, this is surely netmap capable */
1828ce3ee1e7SLuigi Rizzo 			return 0;
1829f196ce38SLuigi Rizzo 		}
1830f196ce38SLuigi Rizzo 	}
1831ce3ee1e7SLuigi Rizzo 	/* not found, should we create it? */
1832ce3ee1e7SLuigi Rizzo 	if (!create)
1833ce3ee1e7SLuigi Rizzo 		return ENXIO;
1834ce3ee1e7SLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
1835ce3ee1e7SLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
1836ce3ee1e7SLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
1837ce3ee1e7SLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
1838f196ce38SLuigi Rizzo 		return EINVAL;
1839f196ce38SLuigi Rizzo 	}
1840ce3ee1e7SLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
1841ce3ee1e7SLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
1842ce3ee1e7SLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
1843ce3ee1e7SLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
1844ce3ee1e7SLuigi Rizzo 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
1845ce3ee1e7SLuigi Rizzo 
1846f18be576SLuigi Rizzo 	/*
1847f18be576SLuigi Rizzo 	 * try see if there is a matching NIC with this name
1848f18be576SLuigi Rizzo 	 * (after the bridge's name)
1849f18be576SLuigi Rizzo 	 */
1850ce3ee1e7SLuigi Rizzo 	iter = ifunit_ref(name + b->bdg_namelen + 1);
1851f18be576SLuigi Rizzo 	if (!iter) { /* this is a virtual port */
1852f18be576SLuigi Rizzo 		/* Create a temporary NA with arguments, then
1853f18be576SLuigi Rizzo 		 * bdg_netmap_attach() will allocate the real one
1854f18be576SLuigi Rizzo 		 * and attach it to the ifp
1855f18be576SLuigi Rizzo 		 */
1856f18be576SLuigi Rizzo 		struct netmap_adapter tmp_na;
1857*5ab0d24dSLuigi Rizzo 		int error;
1858f18be576SLuigi Rizzo 
1859ce3ee1e7SLuigi Rizzo 		if (nmr->nr_cmd) {
1860ce3ee1e7SLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
1861ce3ee1e7SLuigi Rizzo 			return EINVAL;
1862ce3ee1e7SLuigi Rizzo 		}
1863f18be576SLuigi Rizzo 		bzero(&tmp_na, sizeof(tmp_na));
1864f18be576SLuigi Rizzo 		/* bound checking */
1865f18be576SLuigi Rizzo 		tmp_na.num_tx_rings = nmr->nr_tx_rings;
1866ce3ee1e7SLuigi Rizzo 		nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1867ce3ee1e7SLuigi Rizzo 		nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
1868f18be576SLuigi Rizzo 		tmp_na.num_rx_rings = nmr->nr_rx_rings;
1869ce3ee1e7SLuigi Rizzo 		nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1870ce3ee1e7SLuigi Rizzo 		nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
1871ce3ee1e7SLuigi Rizzo 		nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1872ce3ee1e7SLuigi Rizzo 				1, NM_BDG_MAXSLOTS, NULL);
1873ce3ee1e7SLuigi Rizzo 		tmp_na.num_tx_desc = nmr->nr_tx_slots;
1874ce3ee1e7SLuigi Rizzo 		nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1875ce3ee1e7SLuigi Rizzo 				1, NM_BDG_MAXSLOTS, NULL);
1876ce3ee1e7SLuigi Rizzo 		tmp_na.num_rx_desc = nmr->nr_rx_slots;
1877f18be576SLuigi Rizzo 
1878ce3ee1e7SLuigi Rizzo 	 	/* create a struct ifnet for the new port.
1879ce3ee1e7SLuigi Rizzo 		 * need M_NOWAIT as we are under nma_lock
1880ce3ee1e7SLuigi Rizzo 		 */
1881f18be576SLuigi Rizzo 		iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO);
1882f196ce38SLuigi Rizzo 		if (!iter)
1883ce3ee1e7SLuigi Rizzo 			return ENOMEM;
1884ce3ee1e7SLuigi Rizzo 
1885f196ce38SLuigi Rizzo 		strcpy(iter->if_xname, name);
1886f18be576SLuigi Rizzo 		tmp_na.ifp = iter;
1887f18be576SLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
1888*5ab0d24dSLuigi Rizzo 		error = bdg_netmap_attach(&tmp_na);
1889*5ab0d24dSLuigi Rizzo 		if (error) {
1890*5ab0d24dSLuigi Rizzo 			D("error %d", error);
1891*5ab0d24dSLuigi Rizzo 			free(iter, M_DEVBUF);
1892*5ab0d24dSLuigi Rizzo 			return error;
1893*5ab0d24dSLuigi Rizzo 		}
1894ce3ee1e7SLuigi Rizzo 		cand2 = -1;	/* only need one port */
1895f18be576SLuigi Rizzo 	} else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */
1896ce3ee1e7SLuigi Rizzo 		/* make sure the NIC is not already in use */
1897ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(iter)) {
1898ce3ee1e7SLuigi Rizzo 			D("NIC %s busy, cannot attach to bridge",
1899ce3ee1e7SLuigi Rizzo 				iter->if_xname);
1900f18be576SLuigi Rizzo 			if_rele(iter); /* don't detach from bridge */
1901ce3ee1e7SLuigi Rizzo 			return EINVAL;
1902f18be576SLuigi Rizzo 		}
1903ce3ee1e7SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
1904ce3ee1e7SLuigi Rizzo 			cand2 = -1; /* only need one port */
1905ce3ee1e7SLuigi Rizzo 	} else { /* not a netmap-capable NIC */
1906ce3ee1e7SLuigi Rizzo 		if_rele(iter); /* don't detach from bridge */
1907ce3ee1e7SLuigi Rizzo 		return EINVAL;
1908ce3ee1e7SLuigi Rizzo 	}
1909ce3ee1e7SLuigi Rizzo 	na = NA(iter);
1910ce3ee1e7SLuigi Rizzo 
1911ce3ee1e7SLuigi Rizzo 	BDG_WLOCK(b);
1912ce3ee1e7SLuigi Rizzo 	na->bdg_port = cand;
1913ce3ee1e7SLuigi Rizzo 	ND("NIC  %p to bridge port %d", NA(iter), cand);
1914ce3ee1e7SLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
1915ce3ee1e7SLuigi Rizzo 	b->bdg_ports[cand] = na;
1916ce3ee1e7SLuigi Rizzo 	na->na_bdg = b;
1917ce3ee1e7SLuigi Rizzo 	b->bdg_active_ports++;
1918ce3ee1e7SLuigi Rizzo 	if (cand2 >= 0) {
1919ce3ee1e7SLuigi Rizzo 		/* also bind the host stack to the bridge */
1920ce3ee1e7SLuigi Rizzo 		b->bdg_ports[cand2] = SWNA(iter);
1921f18be576SLuigi Rizzo 		SWNA(iter)->bdg_port = cand2;
1922f18be576SLuigi Rizzo 		SWNA(iter)->na_bdg = b;
1923ce3ee1e7SLuigi Rizzo 		b->bdg_active_ports++;
1924ce3ee1e7SLuigi Rizzo 		ND("host %p to bridge port %d", SWNA(iter), cand2);
1925f18be576SLuigi Rizzo 	}
1926ce3ee1e7SLuigi Rizzo 	ADD_BDG_REF(iter);	// XXX one or two ?
1927ce3ee1e7SLuigi Rizzo 	ND("if %s refs %d", name, NA(iter)->na_bdg_refcount);
1928f18be576SLuigi Rizzo 	BDG_WUNLOCK(b);
1929ce3ee1e7SLuigi Rizzo 	*ifp = iter;
1930ce3ee1e7SLuigi Rizzo 	return 0;
1931ce3ee1e7SLuigi Rizzo 
1932ce3ee1e7SLuigi Rizzo no_bridge_port:
1933f196ce38SLuigi Rizzo 	*ifp = iter;
1934f196ce38SLuigi Rizzo 	if (! *ifp)
193568b8534bSLuigi Rizzo 		*ifp = ifunit_ref(name);
193668b8534bSLuigi Rizzo 	if (*ifp == NULL)
193768b8534bSLuigi Rizzo 		return (ENXIO);
1938ce3ee1e7SLuigi Rizzo 
1939f18be576SLuigi Rizzo 	if (NETMAP_CAPABLE(*ifp)) {
1940f18be576SLuigi Rizzo 		/* Users cannot use the NIC attached to a bridge directly */
1941f18be576SLuigi Rizzo 		if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) {
1942f18be576SLuigi Rizzo 			if_rele(*ifp); /* don't detach from bridge */
1943f18be576SLuigi Rizzo 			return EINVAL;
1944f18be576SLuigi Rizzo 		} else
194568b8534bSLuigi Rizzo 			return 0;	/* valid pointer, we hold the refcount */
1946f18be576SLuigi Rizzo 	}
1947f196ce38SLuigi Rizzo 	nm_if_rele(*ifp);
194868b8534bSLuigi Rizzo 	return EINVAL;	// not NETMAP capable
194968b8534bSLuigi Rizzo }
195068b8534bSLuigi Rizzo 
195168b8534bSLuigi Rizzo 
195268b8534bSLuigi Rizzo /*
195368b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
195468b8534bSLuigi Rizzo  * Can't do much more than resetting cur = hwcur, avail = hwavail.
195568b8534bSLuigi Rizzo  * Return 1 on reinit.
1956506cc70cSLuigi Rizzo  *
1957506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1958506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
1959506cc70cSLuigi Rizzo  * and hwavail (which may be changed by the lower half, but only on
1960506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1961506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1962506cc70cSLuigi Rizzo  * it under lock.
196368b8534bSLuigi Rizzo  */
196468b8534bSLuigi Rizzo int
196568b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
196668b8534bSLuigi Rizzo {
196768b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
196868b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
196968b8534bSLuigi Rizzo 	int errors = 0;
197068b8534bSLuigi Rizzo 
1971ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
19728241616dSLuigi Rizzo 	RD(10, "called for %s", kring->na->ifp->if_xname);
197368b8534bSLuigi Rizzo 	if (ring->cur > lim)
197468b8534bSLuigi Rizzo 		errors++;
197568b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
197668b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
197768b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
197868b8534bSLuigi Rizzo 		if (idx < 2 || idx >= netmap_total_buffers) {
197968b8534bSLuigi Rizzo 			if (!errors++)
198068b8534bSLuigi Rizzo 				D("bad buffer at slot %d idx %d len %d ", i, idx, len);
198168b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
198268b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
1983ce3ee1e7SLuigi Rizzo 		} else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
198468b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
198568b8534bSLuigi Rizzo 			if (!errors++)
198668b8534bSLuigi Rizzo 				D("bad len %d at slot %d idx %d",
198768b8534bSLuigi Rizzo 					len, i, idx);
198868b8534bSLuigi Rizzo 		}
198968b8534bSLuigi Rizzo 	}
199068b8534bSLuigi Rizzo 	if (errors) {
199168b8534bSLuigi Rizzo 		int pos = kring - kring->na->tx_rings;
1992d76bf4ffSLuigi Rizzo 		int n = kring->na->num_tx_rings + 1;
199368b8534bSLuigi Rizzo 
19948241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
199568b8534bSLuigi Rizzo 		errors++;
19968241616dSLuigi Rizzo 		RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
199768b8534bSLuigi Rizzo 			kring->na->ifp->if_xname,
199868b8534bSLuigi Rizzo 			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
199968b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
200068b8534bSLuigi Rizzo 			ring->avail, kring->nr_hwavail);
200168b8534bSLuigi Rizzo 		ring->cur = kring->nr_hwcur;
200268b8534bSLuigi Rizzo 		ring->avail = kring->nr_hwavail;
200368b8534bSLuigi Rizzo 	}
200468b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
200568b8534bSLuigi Rizzo }
200668b8534bSLuigi Rizzo 
200768b8534bSLuigi Rizzo 
200868b8534bSLuigi Rizzo /*
200968b8534bSLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
201068b8534bSLuigi Rizzo  * for all rings is the same as a single ring.
201168b8534bSLuigi Rizzo  */
201268b8534bSLuigi Rizzo static int
201368b8534bSLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
201468b8534bSLuigi Rizzo {
201568b8534bSLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
201668b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
201768b8534bSLuigi Rizzo 	u_int i = ringid & NETMAP_RING_MASK;
201864ae02c3SLuigi Rizzo 	/* initially (np_qfirst == np_qlast) we don't want to lock */
2019ce3ee1e7SLuigi Rizzo 	u_int lim = na->num_rx_rings;
202068b8534bSLuigi Rizzo 
2021d76bf4ffSLuigi Rizzo 	if (na->num_tx_rings > lim)
2022d76bf4ffSLuigi Rizzo 		lim = na->num_tx_rings;
202364ae02c3SLuigi Rizzo 	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
202468b8534bSLuigi Rizzo 		D("invalid ring id %d", i);
202568b8534bSLuigi Rizzo 		return (EINVAL);
202668b8534bSLuigi Rizzo 	}
202768b8534bSLuigi Rizzo 	priv->np_ringid = ringid;
202868b8534bSLuigi Rizzo 	if (ringid & NETMAP_SW_RING) {
202964ae02c3SLuigi Rizzo 		priv->np_qfirst = NETMAP_SW_RING;
203064ae02c3SLuigi Rizzo 		priv->np_qlast = 0;
203168b8534bSLuigi Rizzo 	} else if (ringid & NETMAP_HW_RING) {
203268b8534bSLuigi Rizzo 		priv->np_qfirst = i;
203368b8534bSLuigi Rizzo 		priv->np_qlast = i + 1;
203468b8534bSLuigi Rizzo 	} else {
203568b8534bSLuigi Rizzo 		priv->np_qfirst = 0;
203664ae02c3SLuigi Rizzo 		priv->np_qlast = NETMAP_HW_RING ;
203768b8534bSLuigi Rizzo 	}
203868b8534bSLuigi Rizzo 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
2039ae10d1afSLuigi Rizzo     if (netmap_verbose) {
204068b8534bSLuigi Rizzo 	if (ringid & NETMAP_SW_RING)
204168b8534bSLuigi Rizzo 		D("ringid %s set to SW RING", ifp->if_xname);
204268b8534bSLuigi Rizzo 	else if (ringid & NETMAP_HW_RING)
204368b8534bSLuigi Rizzo 		D("ringid %s set to HW RING %d", ifp->if_xname,
204468b8534bSLuigi Rizzo 			priv->np_qfirst);
204568b8534bSLuigi Rizzo 	else
204664ae02c3SLuigi Rizzo 		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
2047ae10d1afSLuigi Rizzo     }
204868b8534bSLuigi Rizzo 	return 0;
204968b8534bSLuigi Rizzo }
205068b8534bSLuigi Rizzo 
2051f18be576SLuigi Rizzo 
2052f18be576SLuigi Rizzo /*
2053f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
2054f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
2055ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
2056f18be576SLuigi Rizzo  */
2057f18be576SLuigi Rizzo static struct netmap_if *
2058f18be576SLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
2059f18be576SLuigi Rizzo 	uint16_t ringid, int *err)
2060f18be576SLuigi Rizzo {
2061f18be576SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
2062f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
2063ce3ee1e7SLuigi Rizzo 	int error, need_mem;
2064f18be576SLuigi Rizzo 
2065ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
2066f18be576SLuigi Rizzo 	/* ring configuration may have changed, fetch from the card */
2067f18be576SLuigi Rizzo 	netmap_update_config(na);
2068f18be576SLuigi Rizzo 	priv->np_ifp = ifp;     /* store the reference */
2069f18be576SLuigi Rizzo 	error = netmap_set_ringid(priv, ringid);
2070f18be576SLuigi Rizzo 	if (error)
2071f18be576SLuigi Rizzo 		goto out;
2072ce3ee1e7SLuigi Rizzo 	/* ensure allocators are ready */
2073ce3ee1e7SLuigi Rizzo 	need_mem = !netmap_have_memory_locked(priv);
2074ce3ee1e7SLuigi Rizzo 	if (need_mem) {
2075ce3ee1e7SLuigi Rizzo 		error = netmap_get_memory_locked(priv);
2076ce3ee1e7SLuigi Rizzo 		ND("get_memory returned %d", error);
2077ce3ee1e7SLuigi Rizzo 		if (error)
2078ce3ee1e7SLuigi Rizzo 			goto out;
2079ce3ee1e7SLuigi Rizzo 	}
2080f18be576SLuigi Rizzo 	nifp = netmap_if_new(ifp->if_xname, na);
2081f18be576SLuigi Rizzo 	if (nifp == NULL) { /* allocation failed */
2082ce3ee1e7SLuigi Rizzo 		/* we should drop the allocator, but only
2083ce3ee1e7SLuigi Rizzo 		 * if we were the ones who grabbed it
2084ce3ee1e7SLuigi Rizzo 		 */
2085ce3ee1e7SLuigi Rizzo 		if (need_mem)
2086ce3ee1e7SLuigi Rizzo 			netmap_drop_memory_locked(priv);
2087f18be576SLuigi Rizzo 		error = ENOMEM;
2088ce3ee1e7SLuigi Rizzo 		goto out;
2089ce3ee1e7SLuigi Rizzo 	}
2090ce3ee1e7SLuigi Rizzo 	na->refcount++;
2091ce3ee1e7SLuigi Rizzo 	if (ifp->if_capenable & IFCAP_NETMAP) {
2092f18be576SLuigi Rizzo 		/* was already set */
2093f18be576SLuigi Rizzo 	} else {
2094ce3ee1e7SLuigi Rizzo 		u_int i;
2095f18be576SLuigi Rizzo 		/* Otherwise set the card in netmap mode
2096f18be576SLuigi Rizzo 		 * and make it use the shared buffers.
2097ce3ee1e7SLuigi Rizzo 		 *
2098ce3ee1e7SLuigi Rizzo 		 * If the interface is attached to a bridge, lock it.
2099f18be576SLuigi Rizzo 		 */
2100ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_KERN(ifp))
2101ce3ee1e7SLuigi Rizzo 			BDG_WLOCK(NA(ifp)->na_bdg);
2102f18be576SLuigi Rizzo 		for (i = 0 ; i < na->num_tx_rings + 1; i++)
2103f18be576SLuigi Rizzo 			mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock",
2104ce3ee1e7SLuigi Rizzo 			    NULL, MTX_DEF);
2105f18be576SLuigi Rizzo 		for (i = 0 ; i < na->num_rx_rings + 1; i++) {
2106f18be576SLuigi Rizzo 			mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock",
2107ce3ee1e7SLuigi Rizzo 			    NULL, MTX_DEF);
2108f18be576SLuigi Rizzo 		}
2109f18be576SLuigi Rizzo 		if (nma_is_hw(na)) {
2110f18be576SLuigi Rizzo 			SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings];
2111f18be576SLuigi Rizzo 			SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings];
2112f18be576SLuigi Rizzo 		}
2113ce3ee1e7SLuigi Rizzo 		/*
2114ce3ee1e7SLuigi Rizzo 		 * do not core lock because the race is harmless here,
2115ce3ee1e7SLuigi Rizzo 		 * there cannot be any traffic to netmap_transmit()
2116ce3ee1e7SLuigi Rizzo 		 */
2117f18be576SLuigi Rizzo 		error = na->nm_register(ifp, 1); /* mode on */
2118ce3ee1e7SLuigi Rizzo 		// XXX do we need to nm_alloc_bdgfwd() in all cases ?
2119f18be576SLuigi Rizzo 		if (!error)
2120f18be576SLuigi Rizzo 			error = nm_alloc_bdgfwd(na);
2121f18be576SLuigi Rizzo 		if (error) {
2122ce3ee1e7SLuigi Rizzo 			netmap_do_unregif(priv, nifp);
2123f18be576SLuigi Rizzo 			nifp = NULL;
2124f18be576SLuigi Rizzo 		}
2125ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_KERN(ifp))
2126ce3ee1e7SLuigi Rizzo 			BDG_WUNLOCK(NA(ifp)->na_bdg);
2127f18be576SLuigi Rizzo 
2128f18be576SLuigi Rizzo 	}
2129f18be576SLuigi Rizzo out:
2130f18be576SLuigi Rizzo 	*err = error;
2131ce3ee1e7SLuigi Rizzo 	if (nifp != NULL) {
2132ce3ee1e7SLuigi Rizzo 		/*
2133ce3ee1e7SLuigi Rizzo 		 * advertise that the interface is ready bt setting ni_nifp.
2134ce3ee1e7SLuigi Rizzo 		 * The barrier is needed because readers (poll and *SYNC)
2135ce3ee1e7SLuigi Rizzo 		 * check for priv->np_nifp != NULL without locking
2136ce3ee1e7SLuigi Rizzo 		 */
2137ce3ee1e7SLuigi Rizzo 		wmb(); /* make sure previous writes are visible to all CPUs */
2138ce3ee1e7SLuigi Rizzo 		priv->np_nifp = nifp;
2139ce3ee1e7SLuigi Rizzo 	}
2140f18be576SLuigi Rizzo 	return nifp;
2141f18be576SLuigi Rizzo }
2142f18be576SLuigi Rizzo 
2143f18be576SLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
2144f18be576SLuigi Rizzo static int
2145ce3ee1e7SLuigi Rizzo nm_bdg_attach(struct nmreq *nmr)
2146f18be576SLuigi Rizzo {
2147f18be576SLuigi Rizzo 	struct ifnet *ifp;
2148f18be576SLuigi Rizzo 	struct netmap_if *nifp;
2149f18be576SLuigi Rizzo 	struct netmap_priv_d *npriv;
2150f18be576SLuigi Rizzo 	int error;
2151f18be576SLuigi Rizzo 
2152f18be576SLuigi Rizzo 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2153f18be576SLuigi Rizzo 	if (npriv == NULL)
2154f18be576SLuigi Rizzo 		return ENOMEM;
2155ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
2156ce3ee1e7SLuigi Rizzo 	error = get_ifp(nmr, &ifp, 1 /* create if not exists */);
2157ce3ee1e7SLuigi Rizzo 	if (error) /* no device, or another bridge or user owns the device */
2158ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2159ce3ee1e7SLuigi Rizzo 	/* get_ifp() sets na_bdg if this is a physical interface
2160ce3ee1e7SLuigi Rizzo 	 * that we can attach to a switch.
2161ce3ee1e7SLuigi Rizzo 	 */
2162ce3ee1e7SLuigi Rizzo 	if (!NETMAP_OWNED_BY_KERN(ifp)) {
2163ce3ee1e7SLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
2164ce3ee1e7SLuigi Rizzo 		 * perhaps specified no bridge prefix or wrong NIC name
2165ce3ee1e7SLuigi Rizzo 		 */
2166ce3ee1e7SLuigi Rizzo 		error = EINVAL;
2167ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2168ce3ee1e7SLuigi Rizzo 	}
2169ce3ee1e7SLuigi Rizzo 
2170ce3ee1e7SLuigi Rizzo 	if (NA(ifp)->refcount > 0) { /* already registered */
2171ce3ee1e7SLuigi Rizzo 		error = EBUSY;
2172ce3ee1e7SLuigi Rizzo 		DROP_BDG_REF(ifp);
2173ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2174ce3ee1e7SLuigi Rizzo 	}
2175ce3ee1e7SLuigi Rizzo 
2176ce3ee1e7SLuigi Rizzo 	nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error);
2177ce3ee1e7SLuigi Rizzo 	if (!nifp) {
2178ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2179ce3ee1e7SLuigi Rizzo 	}
2180ce3ee1e7SLuigi Rizzo 
2181ce3ee1e7SLuigi Rizzo 	NA(ifp)->na_kpriv = npriv;
2182ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2183ce3ee1e7SLuigi Rizzo 	ND("registered %s to netmap-mode", ifp->if_xname);
2184ce3ee1e7SLuigi Rizzo 	return 0;
2185ce3ee1e7SLuigi Rizzo 
2186ce3ee1e7SLuigi Rizzo unref_exit:
2187ce3ee1e7SLuigi Rizzo 	nm_if_rele(ifp);
2188ce3ee1e7SLuigi Rizzo unlock_exit:
2189ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2190f18be576SLuigi Rizzo 	bzero(npriv, sizeof(*npriv));
2191f18be576SLuigi Rizzo 	free(npriv, M_DEVBUF);
2192f18be576SLuigi Rizzo 	return error;
2193f18be576SLuigi Rizzo }
2194f18be576SLuigi Rizzo 
2195ce3ee1e7SLuigi Rizzo static int
2196ce3ee1e7SLuigi Rizzo nm_bdg_detach(struct nmreq *nmr)
2197ce3ee1e7SLuigi Rizzo {
2198ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp;
2199ce3ee1e7SLuigi Rizzo 	int error;
2200ce3ee1e7SLuigi Rizzo 	int last_instance;
2201ce3ee1e7SLuigi Rizzo 
2202ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
2203ce3ee1e7SLuigi Rizzo 	error = get_ifp(nmr, &ifp, 0 /* don't create */);
2204f18be576SLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
2205ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2206ce3ee1e7SLuigi Rizzo 	}
2207ce3ee1e7SLuigi Rizzo 	/* XXX do we need to check this ? */
2208ce3ee1e7SLuigi Rizzo 	if (!NETMAP_OWNED_BY_KERN(ifp)) {
2209f18be576SLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
2210f18be576SLuigi Rizzo 		 * perhaps specified no bridge's prefix or wrong NIC's name
2211f18be576SLuigi Rizzo 		 */
2212f18be576SLuigi Rizzo 		error = EINVAL;
2213ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2214f18be576SLuigi Rizzo 	}
2215f18be576SLuigi Rizzo 
2216f18be576SLuigi Rizzo 	if (NA(ifp)->refcount == 0) { /* not registered */
2217f18be576SLuigi Rizzo 		error = EINVAL;
2218f18be576SLuigi Rizzo 		goto unref_exit;
2219f18be576SLuigi Rizzo 	}
2220f18be576SLuigi Rizzo 
2221ce3ee1e7SLuigi Rizzo 	DROP_BDG_REF(ifp); /* the one from get_ifp */
2222ce3ee1e7SLuigi Rizzo 	last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */
2223ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2224ce3ee1e7SLuigi Rizzo 	if (!last_instance) {
2225ce3ee1e7SLuigi Rizzo 		D("--- error, trying to detach an entry with active mmaps");
2226f18be576SLuigi Rizzo 		error = EINVAL;
2227ce3ee1e7SLuigi Rizzo 	} else {
2228ce3ee1e7SLuigi Rizzo 		struct netmap_priv_d *npriv = NA(ifp)->na_kpriv;
2229ce3ee1e7SLuigi Rizzo 		NA(ifp)->na_kpriv = NULL;
2230ce3ee1e7SLuigi Rizzo 
2231ce3ee1e7SLuigi Rizzo 		bzero(npriv, sizeof(*npriv));
2232ce3ee1e7SLuigi Rizzo 		free(npriv, M_DEVBUF);
2233f18be576SLuigi Rizzo 	}
2234ce3ee1e7SLuigi Rizzo 	return error;
2235f18be576SLuigi Rizzo 
2236ce3ee1e7SLuigi Rizzo unref_exit:
2237ce3ee1e7SLuigi Rizzo 	nm_if_rele(ifp);
2238ce3ee1e7SLuigi Rizzo unlock_exit:
2239ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2240ce3ee1e7SLuigi Rizzo 	return error;
2241f18be576SLuigi Rizzo }
2242f18be576SLuigi Rizzo 
2243f18be576SLuigi Rizzo 
2244f18be576SLuigi Rizzo /* Initialize necessary fields of sw adapter located in right after hw's
2245f18be576SLuigi Rizzo  * one.  sw adapter attaches a pair of sw rings of the netmap-mode NIC.
2246f18be576SLuigi Rizzo  * It is always activated and deactivated at the same tie with the hw's one.
2247f18be576SLuigi Rizzo  * Thus we don't need refcounting on the sw adapter.
2248f18be576SLuigi Rizzo  * Regardless of NIC's feature we use separate lock so that anybody can lock
2249f18be576SLuigi Rizzo  * me independently from the hw adapter.
2250f18be576SLuigi Rizzo  * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw
2251f18be576SLuigi Rizzo  */
2252f18be576SLuigi Rizzo static void
2253f18be576SLuigi Rizzo netmap_attach_sw(struct ifnet *ifp)
2254f18be576SLuigi Rizzo {
2255f18be576SLuigi Rizzo 	struct netmap_adapter *hw_na = NA(ifp);
2256f18be576SLuigi Rizzo 	struct netmap_adapter *na = SWNA(ifp);
2257f18be576SLuigi Rizzo 
2258f18be576SLuigi Rizzo 	na->ifp = ifp;
2259f18be576SLuigi Rizzo 	na->num_rx_rings = na->num_tx_rings = 1;
2260f18be576SLuigi Rizzo 	na->num_tx_desc = hw_na->num_tx_desc;
2261f18be576SLuigi Rizzo 	na->num_rx_desc = hw_na->num_rx_desc;
2262f18be576SLuigi Rizzo 	na->nm_txsync = netmap_bdg_to_host;
2263ce3ee1e7SLuigi Rizzo 	/* we use the same memory allocator as the
2264ce3ee1e7SLuigi Rizzo 	 * the hw adapter */
2265ce3ee1e7SLuigi Rizzo 	na->nm_mem = hw_na->nm_mem;
2266f18be576SLuigi Rizzo }
2267f18be576SLuigi Rizzo 
2268f18be576SLuigi Rizzo 
2269ce3ee1e7SLuigi Rizzo /* exported to kernel callers, e.g. OVS ?
2270ce3ee1e7SLuigi Rizzo  * Entry point.
2271ce3ee1e7SLuigi Rizzo  * Called without NMG_LOCK.
2272ce3ee1e7SLuigi Rizzo  */
2273f18be576SLuigi Rizzo int
2274f18be576SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
2275f18be576SLuigi Rizzo {
2276f18be576SLuigi Rizzo 	struct nm_bridge *b;
2277f18be576SLuigi Rizzo 	struct netmap_adapter *na;
2278f18be576SLuigi Rizzo 	struct ifnet *iter;
2279f18be576SLuigi Rizzo 	char *name = nmr->nr_name;
2280f18be576SLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
2281f18be576SLuigi Rizzo 	int error = 0, i, j;
2282f18be576SLuigi Rizzo 
2283f18be576SLuigi Rizzo 	switch (cmd) {
2284f18be576SLuigi Rizzo 	case NETMAP_BDG_ATTACH:
2285ce3ee1e7SLuigi Rizzo 		error = nm_bdg_attach(nmr);
2286ce3ee1e7SLuigi Rizzo 		break;
2287ce3ee1e7SLuigi Rizzo 
2288f18be576SLuigi Rizzo 	case NETMAP_BDG_DETACH:
2289ce3ee1e7SLuigi Rizzo 		error = nm_bdg_detach(nmr);
2290f18be576SLuigi Rizzo 		break;
2291f18be576SLuigi Rizzo 
2292f18be576SLuigi Rizzo 	case NETMAP_BDG_LIST:
2293f18be576SLuigi Rizzo 		/* this is used to enumerate bridges and ports */
2294f18be576SLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
2295f18be576SLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
2296f18be576SLuigi Rizzo 				error = EINVAL;
2297f18be576SLuigi Rizzo 				break;
2298f18be576SLuigi Rizzo 			}
2299ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2300f18be576SLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
2301f18be576SLuigi Rizzo 			if (!b) {
2302f18be576SLuigi Rizzo 				error = ENOENT;
2303ce3ee1e7SLuigi Rizzo 				NMG_UNLOCK();
2304f18be576SLuigi Rizzo 				break;
2305f18be576SLuigi Rizzo 			}
2306f18be576SLuigi Rizzo 
2307f18be576SLuigi Rizzo 			error = ENOENT;
2308ce3ee1e7SLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
2309ce3ee1e7SLuigi Rizzo 				i = b->bdg_port_index[j];
2310ce3ee1e7SLuigi Rizzo 				na = b->bdg_ports[i];
2311ce3ee1e7SLuigi Rizzo 				if (na == NULL) {
2312ce3ee1e7SLuigi Rizzo 					D("---AAAAAAAAARGH-------");
2313f18be576SLuigi Rizzo 					continue;
2314ce3ee1e7SLuigi Rizzo 				}
2315f18be576SLuigi Rizzo 				iter = na->ifp;
2316f18be576SLuigi Rizzo 				/* the former and the latter identify a
2317f18be576SLuigi Rizzo 				 * virtual port and a NIC, respectively
2318f18be576SLuigi Rizzo 				 */
2319f18be576SLuigi Rizzo 				if (!strcmp(iter->if_xname, name) ||
2320ce3ee1e7SLuigi Rizzo 				    (namelen > b->bdg_namelen &&
2321f18be576SLuigi Rizzo 				    !strcmp(iter->if_xname,
2322ce3ee1e7SLuigi Rizzo 				    name + b->bdg_namelen + 1))) {
2323f18be576SLuigi Rizzo 					/* bridge index */
2324f18be576SLuigi Rizzo 					nmr->nr_arg1 = b - nm_bridges;
2325f18be576SLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
2326f18be576SLuigi Rizzo 					error = 0;
2327f18be576SLuigi Rizzo 					break;
2328f18be576SLuigi Rizzo 				}
2329f18be576SLuigi Rizzo 			}
2330ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
2331f18be576SLuigi Rizzo 		} else {
2332f18be576SLuigi Rizzo 			/* return the first non-empty entry starting from
2333f18be576SLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
2334f18be576SLuigi Rizzo 			 *
2335f18be576SLuigi Rizzo 			 * Users can detect the end of the same bridge by
2336f18be576SLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
2337f18be576SLuigi Rizzo 			 * detect the end of all the bridge by error != 0
2338f18be576SLuigi Rizzo 			 */
2339f18be576SLuigi Rizzo 			i = nmr->nr_arg1;
2340f18be576SLuigi Rizzo 			j = nmr->nr_arg2;
2341f18be576SLuigi Rizzo 
2342ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2343ce3ee1e7SLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
2344f18be576SLuigi Rizzo 				b = nm_bridges + i;
2345ce3ee1e7SLuigi Rizzo 				if (j >= b->bdg_active_ports) {
2346ce3ee1e7SLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
2347f18be576SLuigi Rizzo 					continue;
2348ce3ee1e7SLuigi Rizzo 				}
2349f18be576SLuigi Rizzo 				nmr->nr_arg1 = i;
2350f18be576SLuigi Rizzo 				nmr->nr_arg2 = j;
2351ce3ee1e7SLuigi Rizzo 				j = b->bdg_port_index[j];
2352ce3ee1e7SLuigi Rizzo 				na = b->bdg_ports[j];
2353ce3ee1e7SLuigi Rizzo 				iter = na->ifp;
2354ce3ee1e7SLuigi Rizzo 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
2355f18be576SLuigi Rizzo 				error = 0;
2356f18be576SLuigi Rizzo 				break;
2357f18be576SLuigi Rizzo 			}
2358ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
2359f18be576SLuigi Rizzo 		}
2360f18be576SLuigi Rizzo 		break;
2361f18be576SLuigi Rizzo 
2362f18be576SLuigi Rizzo 	case NETMAP_BDG_LOOKUP_REG:
2363f18be576SLuigi Rizzo 		/* register a lookup function to the given bridge.
2364f18be576SLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
2365f18be576SLuigi Rizzo 		 * if it is not just NM_NAME).
2366f18be576SLuigi Rizzo 		 */
2367f18be576SLuigi Rizzo 		if (!func) {
2368f18be576SLuigi Rizzo 			error = EINVAL;
2369f18be576SLuigi Rizzo 			break;
2370f18be576SLuigi Rizzo 		}
2371ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2372f18be576SLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
2373f18be576SLuigi Rizzo 		if (!b) {
2374f18be576SLuigi Rizzo 			error = EINVAL;
2375ce3ee1e7SLuigi Rizzo 		} else {
2376f18be576SLuigi Rizzo 			b->nm_bdg_lookup = func;
2377ce3ee1e7SLuigi Rizzo 		}
2378ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
2379f18be576SLuigi Rizzo 		break;
2380ce3ee1e7SLuigi Rizzo 
2381f18be576SLuigi Rizzo 	default:
2382f18be576SLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
2383f18be576SLuigi Rizzo 		error = EINVAL;
2384f18be576SLuigi Rizzo 		break;
2385f18be576SLuigi Rizzo 	}
2386f18be576SLuigi Rizzo 	return error;
2387f18be576SLuigi Rizzo }
2388f18be576SLuigi Rizzo 
2389f18be576SLuigi Rizzo 
239068b8534bSLuigi Rizzo /*
239168b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
239268b8534bSLuigi Rizzo  *
239368b8534bSLuigi Rizzo  * Following a list of accepted commands:
239468b8534bSLuigi Rizzo  * - NIOCGINFO
239568b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
239668b8534bSLuigi Rizzo  * - NIOCREGIF
239768b8534bSLuigi Rizzo  * - NIOCUNREGIF
239868b8534bSLuigi Rizzo  * - NIOCTXSYNC
239968b8534bSLuigi Rizzo  * - NIOCRXSYNC
240068b8534bSLuigi Rizzo  *
240168b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
240268b8534bSLuigi Rizzo  */
240368b8534bSLuigi Rizzo static int
24040b8ed8e0SLuigi Rizzo netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
24050b8ed8e0SLuigi Rizzo 	int fflag, struct thread *td)
240668b8534bSLuigi Rizzo {
240768b8534bSLuigi Rizzo 	struct netmap_priv_d *priv = NULL;
2408ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp = NULL;
240968b8534bSLuigi Rizzo 	struct nmreq *nmr = (struct nmreq *) data;
2410ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
241168b8534bSLuigi Rizzo 	int error;
241264ae02c3SLuigi Rizzo 	u_int i, lim;
241368b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2414ce3ee1e7SLuigi Rizzo 	struct netmap_kring *krings;
241568b8534bSLuigi Rizzo 
24160b8ed8e0SLuigi Rizzo 	(void)dev;	/* UNUSED */
24170b8ed8e0SLuigi Rizzo 	(void)fflag;	/* UNUSED */
2418f196ce38SLuigi Rizzo #ifdef linux
2419f196ce38SLuigi Rizzo #define devfs_get_cdevpriv(pp)				\
2420f196ce38SLuigi Rizzo 	({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; 	\
2421f196ce38SLuigi Rizzo 		(*pp ? 0 : ENOENT); })
2422f196ce38SLuigi Rizzo 
2423f196ce38SLuigi Rizzo /* devfs_set_cdevpriv cannot fail on linux */
2424f196ce38SLuigi Rizzo #define devfs_set_cdevpriv(p, fn)				\
2425f196ce38SLuigi Rizzo 	({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
2426f196ce38SLuigi Rizzo 
2427f196ce38SLuigi Rizzo 
2428f196ce38SLuigi Rizzo #define devfs_clear_cdevpriv()	do {				\
2429f196ce38SLuigi Rizzo 		netmap_dtor(priv); ((struct file *)td)->private_data = 0;	\
2430f196ce38SLuigi Rizzo 	} while (0)
2431f196ce38SLuigi Rizzo #endif /* linux */
2432f196ce38SLuigi Rizzo 
2433506cc70cSLuigi Rizzo 	CURVNET_SET(TD_TO_VNET(td));
2434506cc70cSLuigi Rizzo 
243568b8534bSLuigi Rizzo 	error = devfs_get_cdevpriv((void **)&priv);
24368241616dSLuigi Rizzo 	if (error) {
2437506cc70cSLuigi Rizzo 		CURVNET_RESTORE();
24388241616dSLuigi Rizzo 		/* XXX ENOENT should be impossible, since the priv
24398241616dSLuigi Rizzo 		 * is now created in the open */
24408241616dSLuigi Rizzo 		return (error == ENOENT ? ENXIO : error);
2441506cc70cSLuigi Rizzo 	}
244268b8534bSLuigi Rizzo 
2443f196ce38SLuigi Rizzo 	nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';	/* truncate name */
244468b8534bSLuigi Rizzo 	switch (cmd) {
244568b8534bSLuigi Rizzo 	case NIOCGINFO:		/* return capabilities etc */
244664ae02c3SLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
2447ce3ee1e7SLuigi Rizzo #ifdef TEST_STUFF
2448ce3ee1e7SLuigi Rizzo 			/* some test code for locks etc */
2449ce3ee1e7SLuigi Rizzo 			if (nmr->nr_version == 666) {
2450ce3ee1e7SLuigi Rizzo 				error = nm_test(nmr);
2451ce3ee1e7SLuigi Rizzo 				break;
2452ce3ee1e7SLuigi Rizzo 			}
2453ce3ee1e7SLuigi Rizzo #endif /* TEST_STUFF */
245464ae02c3SLuigi Rizzo 			D("API mismatch got %d have %d",
245564ae02c3SLuigi Rizzo 				nmr->nr_version, NETMAP_API);
245664ae02c3SLuigi Rizzo 			nmr->nr_version = NETMAP_API;
245764ae02c3SLuigi Rizzo 			error = EINVAL;
245864ae02c3SLuigi Rizzo 			break;
245964ae02c3SLuigi Rizzo 		}
2460f18be576SLuigi Rizzo 		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2461f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2462f18be576SLuigi Rizzo 			break;
2463f18be576SLuigi Rizzo 		}
2464ce3ee1e7SLuigi Rizzo 
2465ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2466ce3ee1e7SLuigi Rizzo 		do {
2467ce3ee1e7SLuigi Rizzo 			/* memsize is always valid */
2468ce3ee1e7SLuigi Rizzo 			struct netmap_mem_d *nmd = &nm_mem;
2469ce3ee1e7SLuigi Rizzo 			u_int memflags;
2470ce3ee1e7SLuigi Rizzo 
2471ce3ee1e7SLuigi Rizzo 			if (nmr->nr_name[0] != '\0') {
2472ce3ee1e7SLuigi Rizzo 				/* get a refcount */
2473ce3ee1e7SLuigi Rizzo 				error = get_ifp(nmr, &ifp, 1 /* create */);
24748241616dSLuigi Rizzo 				if (error)
24758241616dSLuigi Rizzo 					break;
2476ce3ee1e7SLuigi Rizzo 				na = NA(ifp);  /* retrieve the netmap adapter */
2477ce3ee1e7SLuigi Rizzo 				nmd = na->nm_mem; /* and its memory allocator */
2478ce3ee1e7SLuigi Rizzo 			}
2479ce3ee1e7SLuigi Rizzo 
2480ce3ee1e7SLuigi Rizzo 			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
2481ce3ee1e7SLuigi Rizzo 			if (error)
2482ce3ee1e7SLuigi Rizzo 				break;
2483ce3ee1e7SLuigi Rizzo 			if (na == NULL) /* only memory info */
2484ce3ee1e7SLuigi Rizzo 				break;
24858241616dSLuigi Rizzo 			nmr->nr_offset = 0;
24868241616dSLuigi Rizzo 			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2487ae10d1afSLuigi Rizzo 			netmap_update_config(na);
2488d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2489d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
249064ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
249164ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2492ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE)
2493ce3ee1e7SLuigi Rizzo 				nmr->nr_ringid |= NETMAP_PRIV_MEM;
2494ce3ee1e7SLuigi Rizzo 		} while (0);
2495ce3ee1e7SLuigi Rizzo 		if (ifp)
2496f196ce38SLuigi Rizzo 			nm_if_rele(ifp);	/* return the refcount */
2497ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
249868b8534bSLuigi Rizzo 		break;
249968b8534bSLuigi Rizzo 
250068b8534bSLuigi Rizzo 	case NIOCREGIF:
250164ae02c3SLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
250264ae02c3SLuigi Rizzo 			nmr->nr_version = NETMAP_API;
250364ae02c3SLuigi Rizzo 			error = EINVAL;
250464ae02c3SLuigi Rizzo 			break;
250564ae02c3SLuigi Rizzo 		}
2506f18be576SLuigi Rizzo 		/* possibly attach/detach NIC and VALE switch */
2507f18be576SLuigi Rizzo 		i = nmr->nr_cmd;
2508f18be576SLuigi Rizzo 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) {
2509f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2510f18be576SLuigi Rizzo 			break;
2511f18be576SLuigi Rizzo 		} else if (i != 0) {
2512f18be576SLuigi Rizzo 			D("nr_cmd must be 0 not %d", i);
2513f18be576SLuigi Rizzo 			error = EINVAL;
2514f18be576SLuigi Rizzo 			break;
2515f18be576SLuigi Rizzo 		}
2516f18be576SLuigi Rizzo 
25178241616dSLuigi Rizzo 		/* protect access to priv from concurrent NIOCREGIF */
2518ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2519ce3ee1e7SLuigi Rizzo 		do {
2520ce3ee1e7SLuigi Rizzo 			u_int memflags;
2521ce3ee1e7SLuigi Rizzo 
25228241616dSLuigi Rizzo 			if (priv->np_ifp != NULL) {	/* thread already registered */
2523506cc70cSLuigi Rizzo 				error = netmap_set_ringid(priv, nmr->nr_ringid);
2524506cc70cSLuigi Rizzo 				break;
2525506cc70cSLuigi Rizzo 			}
252668b8534bSLuigi Rizzo 			/* find the interface and a reference */
2527ce3ee1e7SLuigi Rizzo 			error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */
252868b8534bSLuigi Rizzo 			if (error)
2529ce3ee1e7SLuigi Rizzo 				break;
2530ce3ee1e7SLuigi Rizzo 			if (NETMAP_OWNED_BY_KERN(ifp)) {
2531f18be576SLuigi Rizzo 				nm_if_rele(ifp);
2532ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2533ce3ee1e7SLuigi Rizzo 				break;
2534f196ce38SLuigi Rizzo 			}
2535f18be576SLuigi Rizzo 			nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error);
2536f18be576SLuigi Rizzo 			if (!nifp) {    /* reg. failed, release priv and ref */
2537f196ce38SLuigi Rizzo 				nm_if_rele(ifp);        /* return the refcount */
25388241616dSLuigi Rizzo 				priv->np_ifp = NULL;
25398241616dSLuigi Rizzo 				priv->np_nifp = NULL;
2540ce3ee1e7SLuigi Rizzo 				break;
254168b8534bSLuigi Rizzo 			}
254268b8534bSLuigi Rizzo 
254368b8534bSLuigi Rizzo 			/* return the offset of the netmap_if object */
2544f18be576SLuigi Rizzo 			na = NA(ifp); /* retrieve netmap adapter */
2545d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2546d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
254764ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
254864ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2549ce3ee1e7SLuigi Rizzo 			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
2550ce3ee1e7SLuigi Rizzo 			if (error) {
2551ce3ee1e7SLuigi Rizzo 				nm_if_rele(ifp);
2552ce3ee1e7SLuigi Rizzo 				break;
2553ce3ee1e7SLuigi Rizzo 			}
2554ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE) {
2555ce3ee1e7SLuigi Rizzo 				nmr->nr_ringid |= NETMAP_PRIV_MEM;
25563d819cb6SLuigi Rizzo 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2557ce3ee1e7SLuigi Rizzo 			}
2558ce3ee1e7SLuigi Rizzo 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2559ce3ee1e7SLuigi Rizzo 		} while (0);
2560ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
256168b8534bSLuigi Rizzo 		break;
256268b8534bSLuigi Rizzo 
256368b8534bSLuigi Rizzo 	case NIOCUNREGIF:
25648241616dSLuigi Rizzo 		// XXX we have no data here ?
25658241616dSLuigi Rizzo 		D("deprecated, data is %p", nmr);
25668241616dSLuigi Rizzo 		error = EINVAL;
256768b8534bSLuigi Rizzo 		break;
256868b8534bSLuigi Rizzo 
256968b8534bSLuigi Rizzo 	case NIOCTXSYNC:
257068b8534bSLuigi Rizzo 	case NIOCRXSYNC:
25718241616dSLuigi Rizzo 		nifp = priv->np_nifp;
25728241616dSLuigi Rizzo 
25738241616dSLuigi Rizzo 		if (nifp == NULL) {
2574506cc70cSLuigi Rizzo 			error = ENXIO;
2575506cc70cSLuigi Rizzo 			break;
2576506cc70cSLuigi Rizzo 		}
25778241616dSLuigi Rizzo 		rmb(); /* make sure following reads are not from cache */
25788241616dSLuigi Rizzo 
257968b8534bSLuigi Rizzo 		ifp = priv->np_ifp;	/* we have a reference */
25808241616dSLuigi Rizzo 
25818241616dSLuigi Rizzo 		if (ifp == NULL) {
25828241616dSLuigi Rizzo 			D("Internal error: nifp != NULL && ifp == NULL");
25838241616dSLuigi Rizzo 			error = ENXIO;
25848241616dSLuigi Rizzo 			break;
25858241616dSLuigi Rizzo 		}
25868241616dSLuigi Rizzo 
258768b8534bSLuigi Rizzo 		na = NA(ifp); /* retrieve netmap adapter */
258864ae02c3SLuigi Rizzo 		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
258968b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC)
2590ce3ee1e7SLuigi Rizzo 				netmap_txsync_to_host(na);
259168b8534bSLuigi Rizzo 			else
2592ce3ee1e7SLuigi Rizzo 				netmap_rxsync_from_host(na, NULL, NULL);
2593506cc70cSLuigi Rizzo 			break;
259468b8534bSLuigi Rizzo 		}
259564ae02c3SLuigi Rizzo 		/* find the last ring to scan */
259664ae02c3SLuigi Rizzo 		lim = priv->np_qlast;
259764ae02c3SLuigi Rizzo 		if (lim == NETMAP_HW_RING)
25983c0caf6cSLuigi Rizzo 			lim = (cmd == NIOCTXSYNC) ?
2599d76bf4ffSLuigi Rizzo 			    na->num_tx_rings : na->num_rx_rings;
260068b8534bSLuigi Rizzo 
2601ce3ee1e7SLuigi Rizzo 		krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings;
260264ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim; i++) {
2603ce3ee1e7SLuigi Rizzo 			struct netmap_kring *kring = krings + i;
2604ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2605ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2606ce3ee1e7SLuigi Rizzo 				goto out;
2607ce3ee1e7SLuigi Rizzo 			}
260868b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
260968b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
26103c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
261168b8534bSLuigi Rizzo 					    i, kring->ring->cur,
261268b8534bSLuigi Rizzo 					    kring->nr_hwcur);
2613ce3ee1e7SLuigi Rizzo 				na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM);
261468b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
26153c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
261668b8534bSLuigi Rizzo 					    i, kring->ring->cur,
261768b8534bSLuigi Rizzo 					    kring->nr_hwcur);
261868b8534bSLuigi Rizzo 			} else {
2619ce3ee1e7SLuigi Rizzo 				na->nm_rxsync(ifp, i, NAF_FORCE_READ);
262068b8534bSLuigi Rizzo 				microtime(&na->rx_rings[i].ring->ts);
262168b8534bSLuigi Rizzo 			}
2622ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
262368b8534bSLuigi Rizzo 		}
262468b8534bSLuigi Rizzo 
262568b8534bSLuigi Rizzo 		break;
262668b8534bSLuigi Rizzo 
2627f196ce38SLuigi Rizzo #ifdef __FreeBSD__
262868b8534bSLuigi Rizzo 	case BIOCIMMEDIATE:
262968b8534bSLuigi Rizzo 	case BIOCGHDRCMPLT:
263068b8534bSLuigi Rizzo 	case BIOCSHDRCMPLT:
263168b8534bSLuigi Rizzo 	case BIOCSSEESENT:
263268b8534bSLuigi Rizzo 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
263368b8534bSLuigi Rizzo 		break;
263468b8534bSLuigi Rizzo 
2635babc7c12SLuigi Rizzo 	default:	/* allow device-specific ioctls */
263668b8534bSLuigi Rizzo 	    {
263768b8534bSLuigi Rizzo 		struct socket so;
2638ce3ee1e7SLuigi Rizzo 
263968b8534bSLuigi Rizzo 		bzero(&so, sizeof(so));
2640ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2641ce3ee1e7SLuigi Rizzo 		error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */
2642ce3ee1e7SLuigi Rizzo 		if (error) {
2643ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
264468b8534bSLuigi Rizzo 			break;
2645ce3ee1e7SLuigi Rizzo 		}
264668b8534bSLuigi Rizzo 		so.so_vnet = ifp->if_vnet;
264768b8534bSLuigi Rizzo 		// so->so_proto not null.
264868b8534bSLuigi Rizzo 		error = ifioctl(&so, cmd, data, td);
2649f196ce38SLuigi Rizzo 		nm_if_rele(ifp);
2650ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
2651babc7c12SLuigi Rizzo 		break;
265268b8534bSLuigi Rizzo 	    }
2653f196ce38SLuigi Rizzo 
2654f196ce38SLuigi Rizzo #else /* linux */
2655f196ce38SLuigi Rizzo 	default:
2656f196ce38SLuigi Rizzo 		error = EOPNOTSUPP;
2657f196ce38SLuigi Rizzo #endif /* linux */
265868b8534bSLuigi Rizzo 	}
2659ce3ee1e7SLuigi Rizzo out:
266068b8534bSLuigi Rizzo 
2661506cc70cSLuigi Rizzo 	CURVNET_RESTORE();
266268b8534bSLuigi Rizzo 	return (error);
266368b8534bSLuigi Rizzo }
266468b8534bSLuigi Rizzo 
266568b8534bSLuigi Rizzo 
266668b8534bSLuigi Rizzo /*
266768b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
266868b8534bSLuigi Rizzo  *
266968b8534bSLuigi Rizzo  * Can be called for one or more queues.
267068b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
267168b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2672ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
267368b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
267468b8534bSLuigi Rizzo  * are done through callbacks.
2675f196ce38SLuigi Rizzo  *
267601c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
267701c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
267801c7d25fSLuigi Rizzo  * hidden argument.
267968b8534bSLuigi Rizzo  */
268068b8534bSLuigi Rizzo static int
268101c7d25fSLuigi Rizzo netmap_poll(struct cdev *dev, int events, struct thread *td)
268268b8534bSLuigi Rizzo {
268368b8534bSLuigi Rizzo 	struct netmap_priv_d *priv = NULL;
268468b8534bSLuigi Rizzo 	struct netmap_adapter *na;
268568b8534bSLuigi Rizzo 	struct ifnet *ifp;
268668b8534bSLuigi Rizzo 	struct netmap_kring *kring;
2687ce3ee1e7SLuigi Rizzo 	u_int i, check_all, want_tx, want_rx, revents = 0;
2688091fd0abSLuigi Rizzo 	u_int lim_tx, lim_rx, host_forwarded = 0;
2689091fd0abSLuigi Rizzo 	struct mbq q = { NULL, NULL, 0 };
269001c7d25fSLuigi Rizzo 	void *pwait = dev;	/* linux compatibility */
269101c7d25fSLuigi Rizzo 
2692ce3ee1e7SLuigi Rizzo 		int retry_tx = 1;
2693ce3ee1e7SLuigi Rizzo 
269401c7d25fSLuigi Rizzo 	(void)pwait;
269568b8534bSLuigi Rizzo 
269668b8534bSLuigi Rizzo 	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
269768b8534bSLuigi Rizzo 		return POLLERR;
269868b8534bSLuigi Rizzo 
26998241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
27008241616dSLuigi Rizzo 		D("No if registered");
27018241616dSLuigi Rizzo 		return POLLERR;
27028241616dSLuigi Rizzo 	}
27038241616dSLuigi Rizzo 	rmb(); /* make sure following reads are not from cache */
27048241616dSLuigi Rizzo 
270568b8534bSLuigi Rizzo 	ifp = priv->np_ifp;
270668b8534bSLuigi Rizzo 	// XXX check for deleting() ?
270768b8534bSLuigi Rizzo 	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
270868b8534bSLuigi Rizzo 		return POLLERR;
270968b8534bSLuigi Rizzo 
271068b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
271168b8534bSLuigi Rizzo 		D("device %s events 0x%x", ifp->if_xname, events);
271268b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
271368b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
271468b8534bSLuigi Rizzo 
271568b8534bSLuigi Rizzo 	na = NA(ifp); /* retrieve netmap adapter */
271668b8534bSLuigi Rizzo 
2717d76bf4ffSLuigi Rizzo 	lim_tx = na->num_tx_rings;
2718d76bf4ffSLuigi Rizzo 	lim_rx = na->num_rx_rings;
2719ce3ee1e7SLuigi Rizzo 
272064ae02c3SLuigi Rizzo 	if (priv->np_qfirst == NETMAP_SW_RING) {
2721ce3ee1e7SLuigi Rizzo 		/* handle the host stack ring */
272268b8534bSLuigi Rizzo 		if (priv->np_txpoll || want_tx) {
272368b8534bSLuigi Rizzo 			/* push any packets up, then we are always ready */
2724ce3ee1e7SLuigi Rizzo 			netmap_txsync_to_host(na);
272568b8534bSLuigi Rizzo 			revents |= want_tx;
272668b8534bSLuigi Rizzo 		}
272768b8534bSLuigi Rizzo 		if (want_rx) {
272864ae02c3SLuigi Rizzo 			kring = &na->rx_rings[lim_rx];
272968b8534bSLuigi Rizzo 			if (kring->ring->avail == 0)
2730ce3ee1e7SLuigi Rizzo 				netmap_rxsync_from_host(na, td, dev);
273168b8534bSLuigi Rizzo 			if (kring->ring->avail > 0) {
273268b8534bSLuigi Rizzo 				revents |= want_rx;
273368b8534bSLuigi Rizzo 			}
273468b8534bSLuigi Rizzo 		}
273568b8534bSLuigi Rizzo 		return (revents);
273668b8534bSLuigi Rizzo 	}
273768b8534bSLuigi Rizzo 
2738091fd0abSLuigi Rizzo 	/* if we are in transparent mode, check also the host rx ring */
2739091fd0abSLuigi Rizzo 	kring = &na->rx_rings[lim_rx];
2740091fd0abSLuigi Rizzo 	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2741091fd0abSLuigi Rizzo 			&& want_rx
2742091fd0abSLuigi Rizzo 			&& (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
2743091fd0abSLuigi Rizzo 		if (kring->ring->avail == 0)
2744ce3ee1e7SLuigi Rizzo 			netmap_rxsync_from_host(na, td, dev);
2745091fd0abSLuigi Rizzo 		if (kring->ring->avail > 0)
2746091fd0abSLuigi Rizzo 			revents |= want_rx;
2747091fd0abSLuigi Rizzo 	}
2748091fd0abSLuigi Rizzo 
274968b8534bSLuigi Rizzo 	/*
2750ce3ee1e7SLuigi Rizzo 	 * check_all is set if the card has more than one queue AND
275168b8534bSLuigi Rizzo 	 * the client is polling all of them. If true, we sleep on
2752ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
2753ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
2754ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
2755ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
275668b8534bSLuigi Rizzo 	 *
275768b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
275868b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
275968b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
276068b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
276168b8534bSLuigi Rizzo 	 */
276264ae02c3SLuigi Rizzo 	check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
276368b8534bSLuigi Rizzo 
276464ae02c3SLuigi Rizzo 	if (priv->np_qlast != NETMAP_HW_RING) {
276564ae02c3SLuigi Rizzo 		lim_tx = lim_rx = priv->np_qlast;
276664ae02c3SLuigi Rizzo 	}
276764ae02c3SLuigi Rizzo 
276868b8534bSLuigi Rizzo 	/*
276968b8534bSLuigi Rizzo 	 * We start with a lock free round which is good if we have
277068b8534bSLuigi Rizzo 	 * data available. If this fails, then lock and call the sync
277168b8534bSLuigi Rizzo 	 * routines.
277268b8534bSLuigi Rizzo 	 */
277364ae02c3SLuigi Rizzo 	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
277468b8534bSLuigi Rizzo 		kring = &na->rx_rings[i];
277568b8534bSLuigi Rizzo 		if (kring->ring->avail > 0) {
277668b8534bSLuigi Rizzo 			revents |= want_rx;
277768b8534bSLuigi Rizzo 			want_rx = 0;	/* also breaks the loop */
277868b8534bSLuigi Rizzo 		}
277968b8534bSLuigi Rizzo 	}
278064ae02c3SLuigi Rizzo 	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
278168b8534bSLuigi Rizzo 		kring = &na->tx_rings[i];
278268b8534bSLuigi Rizzo 		if (kring->ring->avail > 0) {
278368b8534bSLuigi Rizzo 			revents |= want_tx;
278468b8534bSLuigi Rizzo 			want_tx = 0;	/* also breaks the loop */
278568b8534bSLuigi Rizzo 		}
278668b8534bSLuigi Rizzo 	}
278768b8534bSLuigi Rizzo 
278868b8534bSLuigi Rizzo 	/*
278968b8534bSLuigi Rizzo 	 * If we to push packets out (priv->np_txpoll) or want_tx is
279068b8534bSLuigi Rizzo 	 * still set, we do need to run the txsync calls (on all rings,
279168b8534bSLuigi Rizzo 	 * to avoid that the tx rings stall).
279268b8534bSLuigi Rizzo 	 */
279368b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
2794ce3ee1e7SLuigi Rizzo 		/* If we really want to be woken up (want_tx),
2795ce3ee1e7SLuigi Rizzo 		 * do a selrecord, either on the global or on
2796ce3ee1e7SLuigi Rizzo 		 * the private structure.  Then issue the txsync
2797ce3ee1e7SLuigi Rizzo 		 * so there is no race in the selrecord/selwait
2798ce3ee1e7SLuigi Rizzo 		 */
2799091fd0abSLuigi Rizzo flush_tx:
280064ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim_tx; i++) {
280168b8534bSLuigi Rizzo 			kring = &na->tx_rings[i];
28025819da83SLuigi Rizzo 			/*
2803ce3ee1e7SLuigi Rizzo 			 * Skip this ring if want_tx == 0
28045819da83SLuigi Rizzo 			 * (we have already done a successful sync on
28055819da83SLuigi Rizzo 			 * a previous ring) AND kring->cur == kring->hwcur
28065819da83SLuigi Rizzo 			 * (there are no pending transmissions for this ring).
28075819da83SLuigi Rizzo 			 */
280868b8534bSLuigi Rizzo 			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
280968b8534bSLuigi Rizzo 				continue;
2810ce3ee1e7SLuigi Rizzo 			/* make sure only one user thread is doing this */
2811ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2812ce3ee1e7SLuigi Rizzo 				ND("ring %p busy is %d", kring, (int)kring->nr_busy);
2813ce3ee1e7SLuigi Rizzo 				revents |= POLLERR;
2814ce3ee1e7SLuigi Rizzo 				goto out;
281568b8534bSLuigi Rizzo 			}
2816ce3ee1e7SLuigi Rizzo 
281768b8534bSLuigi Rizzo 			if (netmap_verbose & NM_VERB_TXSYNC)
281868b8534bSLuigi Rizzo 				D("send %d on %s %d",
2819ce3ee1e7SLuigi Rizzo 					kring->ring->cur, ifp->if_xname, i);
2820ce3ee1e7SLuigi Rizzo 			if (na->nm_txsync(ifp, i, 0))
282168b8534bSLuigi Rizzo 				revents |= POLLERR;
282268b8534bSLuigi Rizzo 
28235819da83SLuigi Rizzo 			/* Check avail/call selrecord only if called with POLLOUT */
282468b8534bSLuigi Rizzo 			if (want_tx) {
282568b8534bSLuigi Rizzo 				if (kring->ring->avail > 0) {
282668b8534bSLuigi Rizzo 					/* stop at the first ring. We don't risk
282768b8534bSLuigi Rizzo 					 * starvation.
282868b8534bSLuigi Rizzo 					 */
282968b8534bSLuigi Rizzo 					revents |= want_tx;
283068b8534bSLuigi Rizzo 					want_tx = 0;
283168b8534bSLuigi Rizzo 				}
2832ce3ee1e7SLuigi Rizzo 			}
2833ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
2834ce3ee1e7SLuigi Rizzo 		}
2835ce3ee1e7SLuigi Rizzo 		if (want_tx && retry_tx) {
2836ce3ee1e7SLuigi Rizzo 			selrecord(td, check_all ?
2837ce3ee1e7SLuigi Rizzo 			    &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
2838ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
2839ce3ee1e7SLuigi Rizzo 			goto flush_tx;
284068b8534bSLuigi Rizzo 		}
284168b8534bSLuigi Rizzo 	}
284268b8534bSLuigi Rizzo 
284368b8534bSLuigi Rizzo 	/*
284468b8534bSLuigi Rizzo 	 * now if want_rx is still set we need to lock and rxsync.
284568b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
284668b8534bSLuigi Rizzo 	 */
284768b8534bSLuigi Rizzo 	if (want_rx) {
2848ce3ee1e7SLuigi Rizzo 		int retry_rx = 1;
2849ce3ee1e7SLuigi Rizzo do_retry_rx:
285064ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim_rx; i++) {
285168b8534bSLuigi Rizzo 			kring = &na->rx_rings[i];
2852ce3ee1e7SLuigi Rizzo 
2853ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2854ce3ee1e7SLuigi Rizzo 				revents |= POLLERR;
2855ce3ee1e7SLuigi Rizzo 				goto out;
285668b8534bSLuigi Rizzo 			}
2857ce3ee1e7SLuigi Rizzo 
2858ce3ee1e7SLuigi Rizzo 			/* XXX NR_FORWARD should only be read on
2859ce3ee1e7SLuigi Rizzo 			 * physical or NIC ports
2860ce3ee1e7SLuigi Rizzo 			 */
2861091fd0abSLuigi Rizzo 			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2862091fd0abSLuigi Rizzo 				ND(10, "forwarding some buffers up %d to %d",
2863091fd0abSLuigi Rizzo 				    kring->nr_hwcur, kring->ring->cur);
2864091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
2865091fd0abSLuigi Rizzo 			}
286668b8534bSLuigi Rizzo 
2867ce3ee1e7SLuigi Rizzo 			if (na->nm_rxsync(ifp, i, 0))
286868b8534bSLuigi Rizzo 				revents |= POLLERR;
28695819da83SLuigi Rizzo 			if (netmap_no_timestamp == 0 ||
28705819da83SLuigi Rizzo 					kring->ring->flags & NR_TIMESTAMP) {
287168b8534bSLuigi Rizzo 				microtime(&kring->ring->ts);
28725819da83SLuigi Rizzo 			}
287368b8534bSLuigi Rizzo 
2874ce3ee1e7SLuigi Rizzo 			if (kring->ring->avail > 0) {
287568b8534bSLuigi Rizzo 				revents |= want_rx;
2876ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
287768b8534bSLuigi Rizzo 			}
2878ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
287968b8534bSLuigi Rizzo 		}
2880ce3ee1e7SLuigi Rizzo 		if (retry_rx) {
2881ce3ee1e7SLuigi Rizzo 			retry_rx = 0;
2882ce3ee1e7SLuigi Rizzo 			selrecord(td, check_all ?
2883ce3ee1e7SLuigi Rizzo 			    &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
2884ce3ee1e7SLuigi Rizzo 			goto do_retry_rx;
2885ce3ee1e7SLuigi Rizzo 		}
288668b8534bSLuigi Rizzo 	}
2887091fd0abSLuigi Rizzo 
2888ce3ee1e7SLuigi Rizzo 	/* forward host to the netmap ring.
2889ce3ee1e7SLuigi Rizzo 	 * I am accessing nr_hwavail without lock, but netmap_transmit
2890ce3ee1e7SLuigi Rizzo 	 * can only increment it, so the operation is safe.
2891ce3ee1e7SLuigi Rizzo 	 */
2892091fd0abSLuigi Rizzo 	kring = &na->rx_rings[lim_rx];
2893091fd0abSLuigi Rizzo 	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2894091fd0abSLuigi Rizzo 			&& (netmap_fwd || kring->ring->flags & NR_FORWARD)
2895091fd0abSLuigi Rizzo 			 && kring->nr_hwavail > 0 && !host_forwarded) {
2896091fd0abSLuigi Rizzo 		netmap_sw_to_nic(na);
2897091fd0abSLuigi Rizzo 		host_forwarded = 1; /* prevent another pass */
2898091fd0abSLuigi Rizzo 		want_rx = 0;
2899091fd0abSLuigi Rizzo 		goto flush_tx;
2900091fd0abSLuigi Rizzo 	}
2901091fd0abSLuigi Rizzo 
2902091fd0abSLuigi Rizzo 	if (q.head)
2903091fd0abSLuigi Rizzo 		netmap_send_up(na->ifp, q.head);
290468b8534bSLuigi Rizzo 
2905ce3ee1e7SLuigi Rizzo out:
2906ce3ee1e7SLuigi Rizzo 
290768b8534bSLuigi Rizzo 	return (revents);
290868b8534bSLuigi Rizzo }
290968b8534bSLuigi Rizzo 
291068b8534bSLuigi Rizzo /*------- driver support routines ------*/
291168b8534bSLuigi Rizzo 
2912f18be576SLuigi Rizzo 
291368b8534bSLuigi Rizzo /*
291468b8534bSLuigi Rizzo  * Initialize a ``netmap_adapter`` object created by driver on attach.
291568b8534bSLuigi Rizzo  * We allocate a block of memory with room for a struct netmap_adapter
291668b8534bSLuigi Rizzo  * plus two sets of N+2 struct netmap_kring (where N is the number
291768b8534bSLuigi Rizzo  * of hardware rings):
291868b8534bSLuigi Rizzo  * krings	0..N-1	are for the hardware queues.
291968b8534bSLuigi Rizzo  * kring	N	is for the host stack queue
292068b8534bSLuigi Rizzo  * kring	N+1	is only used for the selinfo for all queues.
292168b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
292264ae02c3SLuigi Rizzo  *
29230bf88954SEd Maste  * By default the receive and transmit adapter ring counts are both initialized
29240bf88954SEd Maste  * to num_queues.  na->num_tx_rings can be set for cards with different tx/rx
292524e57ec9SEd Maste  * setups.
292668b8534bSLuigi Rizzo  */
292768b8534bSLuigi Rizzo int
2928ce3ee1e7SLuigi Rizzo netmap_attach(struct netmap_adapter *arg, u_int num_queues)
292968b8534bSLuigi Rizzo {
2930ae10d1afSLuigi Rizzo 	struct netmap_adapter *na = NULL;
2931ae10d1afSLuigi Rizzo 	struct ifnet *ifp = arg ? arg->ifp : NULL;
2932ce3ee1e7SLuigi Rizzo 	size_t len;
293368b8534bSLuigi Rizzo 
2934ae10d1afSLuigi Rizzo 	if (arg == NULL || ifp == NULL)
2935ae10d1afSLuigi Rizzo 		goto fail;
2936ce3ee1e7SLuigi Rizzo 	/* a VALE port uses two endpoints */
2937f18be576SLuigi Rizzo 	len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2;
2938f18be576SLuigi Rizzo 	na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
2939ae10d1afSLuigi Rizzo 	if (na == NULL)
2940ae10d1afSLuigi Rizzo 		goto fail;
2941ae10d1afSLuigi Rizzo 	WNA(ifp) = na;
2942ae10d1afSLuigi Rizzo 	*na = *arg; /* copy everything, trust the driver to not pass junk */
2943ae10d1afSLuigi Rizzo 	NETMAP_SET_CAPABLE(ifp);
2944d76bf4ffSLuigi Rizzo 	if (na->num_tx_rings == 0)
2945d76bf4ffSLuigi Rizzo 		na->num_tx_rings = num_queues;
2946d76bf4ffSLuigi Rizzo 	na->num_rx_rings = num_queues;
2947ae10d1afSLuigi Rizzo 	na->refcount = na->na_single = na->na_multi = 0;
2948ae10d1afSLuigi Rizzo 	/* Core lock initialized here, others after netmap_if_new. */
2949ae10d1afSLuigi Rizzo 	mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
295064ae02c3SLuigi Rizzo #ifdef linux
2951f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
2952f18be576SLuigi Rizzo 		ND("netdev_ops %p", ifp->netdev_ops);
2953f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
2954f18be576SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2955f18be576SLuigi Rizzo 		na->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2956f18be576SLuigi Rizzo #else
2957849bec0eSLuigi Rizzo 		na->nm_ndo = *ifp->netdev_ops;
2958f18be576SLuigi Rizzo #endif
2959f18be576SLuigi Rizzo 	}
2960ce3ee1e7SLuigi Rizzo 	na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
2961ce3ee1e7SLuigi Rizzo #endif /* linux */
2962ce3ee1e7SLuigi Rizzo 	na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem;
2963f18be576SLuigi Rizzo 	if (!nma_is_vp(arg))
2964f18be576SLuigi Rizzo 		netmap_attach_sw(ifp);
2965ae10d1afSLuigi Rizzo 	D("success for %s", ifp->if_xname);
2966ae10d1afSLuigi Rizzo 	return 0;
296768b8534bSLuigi Rizzo 
2968ae10d1afSLuigi Rizzo fail:
2969ae10d1afSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, na);
2970849bec0eSLuigi Rizzo 	netmap_detach(ifp);
2971ae10d1afSLuigi Rizzo 	return (na ? EINVAL : ENOMEM);
297268b8534bSLuigi Rizzo }
297368b8534bSLuigi Rizzo 
297468b8534bSLuigi Rizzo 
297568b8534bSLuigi Rizzo /*
297668b8534bSLuigi Rizzo  * Free the allocated memory linked to the given ``netmap_adapter``
297768b8534bSLuigi Rizzo  * object.
297868b8534bSLuigi Rizzo  */
297968b8534bSLuigi Rizzo void
298068b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
298168b8534bSLuigi Rizzo {
298268b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
298368b8534bSLuigi Rizzo 
298468b8534bSLuigi Rizzo 	if (!na)
298568b8534bSLuigi Rizzo 		return;
298668b8534bSLuigi Rizzo 
29872f70fca5SEd Maste 	mtx_destroy(&na->core_lock);
29882f70fca5SEd Maste 
2989ae10d1afSLuigi Rizzo 	if (na->tx_rings) { /* XXX should not happen */
2990ae10d1afSLuigi Rizzo 		D("freeing leftover tx_rings");
2991ae10d1afSLuigi Rizzo 		free(na->tx_rings, M_DEVBUF);
2992ae10d1afSLuigi Rizzo 	}
2993ce3ee1e7SLuigi Rizzo 	if (na->na_flags & NAF_MEM_OWNER)
2994ce3ee1e7SLuigi Rizzo 		netmap_mem_private_delete(na->nm_mem);
299568b8534bSLuigi Rizzo 	bzero(na, sizeof(*na));
2996d0c7b075SLuigi Rizzo 	WNA(ifp) = NULL;
299768b8534bSLuigi Rizzo 	free(na, M_DEVBUF);
299868b8534bSLuigi Rizzo }
299968b8534bSLuigi Rizzo 
300068b8534bSLuigi Rizzo 
3001f18be576SLuigi Rizzo int
3002ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
3003ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na, u_int ring_nr);
3004f18be576SLuigi Rizzo 
3005f18be576SLuigi Rizzo 
300668b8534bSLuigi Rizzo /*
300702ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
300802ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
3009ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3010ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3011ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3012ce3ee1e7SLuigi Rizzo  * we make sure to access the core lock and per-ring locks
3013ce3ee1e7SLuigi Rizzo  * so that IFCAP_NETMAP is visible here.
301468b8534bSLuigi Rizzo  */
301568b8534bSLuigi Rizzo int
3016ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
301768b8534bSLuigi Rizzo {
301868b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3019ce3ee1e7SLuigi Rizzo 	struct netmap_kring *kring;
30201a26580eSLuigi Rizzo 	u_int i, len = MBUF_LEN(m);
3021ce3ee1e7SLuigi Rizzo 	u_int error = EBUSY, lim;
302268b8534bSLuigi Rizzo 	struct netmap_slot *slot;
302368b8534bSLuigi Rizzo 
3024ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3025ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3026ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
3027ce3ee1e7SLuigi Rizzo 	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
3028ce3ee1e7SLuigi Rizzo 		/* interface not in netmap mode anymore */
3029ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3030ce3ee1e7SLuigi Rizzo 		goto done;
3031ce3ee1e7SLuigi Rizzo 	}
3032ce3ee1e7SLuigi Rizzo 
3033ce3ee1e7SLuigi Rizzo 	kring = &na->rx_rings[na->num_rx_rings];
3034ce3ee1e7SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
303568b8534bSLuigi Rizzo 	if (netmap_verbose & NM_VERB_HOST)
303668b8534bSLuigi Rizzo 		D("%s packet %d len %d from the stack", ifp->if_xname,
303768b8534bSLuigi Rizzo 			kring->nr_hwcur + kring->nr_hwavail, len);
3038ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
3039ce3ee1e7SLuigi Rizzo 	if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
3040849bec0eSLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", ifp->if_xname,
3041ce3ee1e7SLuigi Rizzo 			len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
3042ce3ee1e7SLuigi Rizzo 		goto done;
3043849bec0eSLuigi Rizzo 	}
3044ce3ee1e7SLuigi Rizzo 	if (SWNA(ifp)->na_bdg) {
3045ce3ee1e7SLuigi Rizzo 		struct nm_bdg_fwd *ft;
3046ce3ee1e7SLuigi Rizzo 		char *dst;
3047f18be576SLuigi Rizzo 
3048ce3ee1e7SLuigi Rizzo 		na = SWNA(ifp); /* we operate on the host port */
3049ce3ee1e7SLuigi Rizzo 		ft = na->rx_rings[0].nkr_ft;
3050ce3ee1e7SLuigi Rizzo 		dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]);
3051ce3ee1e7SLuigi Rizzo 
3052ce3ee1e7SLuigi Rizzo 		/* use slot 0 in the ft, there is nothing queued here */
3053ce3ee1e7SLuigi Rizzo 		/* XXX we can save the copy calling m_copydata in nm_bdg_flush,
3054ce3ee1e7SLuigi Rizzo 		 * need a special flag for this.
3055ce3ee1e7SLuigi Rizzo 		 */
3056ce3ee1e7SLuigi Rizzo 		m_copydata(m, 0, (int)len, dst);
3057ce3ee1e7SLuigi Rizzo 		ft->ft_flags = 0;
3058ce3ee1e7SLuigi Rizzo 		ft->ft_len = len;
3059ce3ee1e7SLuigi Rizzo 		ft->ft_buf = dst;
3060ce3ee1e7SLuigi Rizzo 		ft->ft_next = NM_FT_NULL;
3061ce3ee1e7SLuigi Rizzo 		ft->ft_frags = 1;
3062ce3ee1e7SLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
3063ce3ee1e7SLuigi Rizzo 			RD(5, "pkt %p size %d to bridge port %d",
3064ce3ee1e7SLuigi Rizzo 				dst, len, na->bdg_port);
3065ce3ee1e7SLuigi Rizzo 		nm_bdg_flush(ft, 1, na, 0);
3066ce3ee1e7SLuigi Rizzo 		na = NA(ifp);	/* back to the regular object/lock */
3067ce3ee1e7SLuigi Rizzo 		error = 0;
3068ce3ee1e7SLuigi Rizzo 		goto done;
3069ce3ee1e7SLuigi Rizzo 	}
3070ce3ee1e7SLuigi Rizzo 
3071ce3ee1e7SLuigi Rizzo 	/* protect against other instances of netmap_transmit,
3072ce3ee1e7SLuigi Rizzo 	 * and userspace invocations of rxsync().
3073ce3ee1e7SLuigi Rizzo 	 * XXX could reuse core_lock
3074ce3ee1e7SLuigi Rizzo 	 */
3075ce3ee1e7SLuigi Rizzo 	// XXX [Linux] there can be no other instances of netmap_transmit
3076ce3ee1e7SLuigi Rizzo 	// on this same ring, but we still need this lock to protect
3077ce3ee1e7SLuigi Rizzo 	// concurrent access from netmap_sw_to_nic() -gl
3078ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
307902ad4083SLuigi Rizzo 	if (kring->nr_hwavail >= lim) {
30805b248374SLuigi Rizzo 		if (netmap_verbose)
308168b8534bSLuigi Rizzo 			D("stack ring %s full\n", ifp->if_xname);
3082ce3ee1e7SLuigi Rizzo 	} else {
308368b8534bSLuigi Rizzo 		/* compute the insert position */
3084ce3ee1e7SLuigi Rizzo 		i = nm_kr_rxpos(kring);
308568b8534bSLuigi Rizzo 		slot = &kring->ring->slot[i];
3086ce3ee1e7SLuigi Rizzo 		m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot));
308768b8534bSLuigi Rizzo 		slot->len = len;
3088091fd0abSLuigi Rizzo 		slot->flags = kring->nkr_slot_flags;
308968b8534bSLuigi Rizzo 		kring->nr_hwavail++;
309068b8534bSLuigi Rizzo 		if (netmap_verbose  & NM_VERB_HOST)
3091d76bf4ffSLuigi Rizzo 			D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
309268b8534bSLuigi Rizzo 		selwakeuppri(&kring->si, PI_NET);
309368b8534bSLuigi Rizzo 		error = 0;
3094ce3ee1e7SLuigi Rizzo 	}
3095ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
3096ce3ee1e7SLuigi Rizzo 
309768b8534bSLuigi Rizzo done:
3098ce3ee1e7SLuigi Rizzo 	// mtx_unlock(&na->core_lock);
309968b8534bSLuigi Rizzo 
310068b8534bSLuigi Rizzo 	/* release the mbuf in either cases of success or failure. As an
310168b8534bSLuigi Rizzo 	 * alternative, put the mbuf in a free list and free the list
310268b8534bSLuigi Rizzo 	 * only when really necessary.
310368b8534bSLuigi Rizzo 	 */
310468b8534bSLuigi Rizzo 	m_freem(m);
310568b8534bSLuigi Rizzo 
310668b8534bSLuigi Rizzo 	return (error);
310768b8534bSLuigi Rizzo }
310868b8534bSLuigi Rizzo 
310968b8534bSLuigi Rizzo 
311068b8534bSLuigi Rizzo /*
311168b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
311268b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
311368b8534bSLuigi Rizzo  * If netmap mode is not set just return NULL.
311468b8534bSLuigi Rizzo  */
311568b8534bSLuigi Rizzo struct netmap_slot *
3116ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
311768b8534bSLuigi Rizzo 	u_int new_cur)
311868b8534bSLuigi Rizzo {
311968b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3120506cc70cSLuigi Rizzo 	int new_hwofs, lim;
312168b8534bSLuigi Rizzo 
3122ce3ee1e7SLuigi Rizzo 	if (na == NULL) {
3123ce3ee1e7SLuigi Rizzo 		D("NULL na, should not happen");
312468b8534bSLuigi Rizzo 		return NULL;	/* no netmap support here */
3125ce3ee1e7SLuigi Rizzo 	}
3126ce3ee1e7SLuigi Rizzo 	if (!(na->ifp->if_capenable & IFCAP_NETMAP)) {
3127ce3ee1e7SLuigi Rizzo 		D("interface not in netmap mode");
312868b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3129ce3ee1e7SLuigi Rizzo 	}
313068b8534bSLuigi Rizzo 
3131ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3132ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3133ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3134ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3135ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3136ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3137ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3138ce3ee1e7SLuigi Rizzo 	 */
313964ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
31408241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
31418241616dSLuigi Rizzo 			return NULL;
314264ae02c3SLuigi Rizzo 		kring = na->tx_rings + n;
3143506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
314464ae02c3SLuigi Rizzo 	} else {
31458241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
31468241616dSLuigi Rizzo 			return NULL;
314764ae02c3SLuigi Rizzo 		kring = na->rx_rings + n;
3148506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
314964ae02c3SLuigi Rizzo 	}
315064ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3151506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3152506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3153506cc70cSLuigi Rizzo 
3154ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
3155ce3ee1e7SLuigi Rizzo 	D("%s hwofs %d -> %d, hwavail %d -> %d",
3156ce3ee1e7SLuigi Rizzo 		tx == NR_TX ? "TX" : "RX",
3157ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
3158ce3ee1e7SLuigi Rizzo 		kring->nr_hwavail,
3159ce3ee1e7SLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwavail);
3160506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
3161506cc70cSLuigi Rizzo 	if (tx == NR_TX)
3162ce3ee1e7SLuigi Rizzo 		kring->nr_hwavail = lim;
3163506cc70cSLuigi Rizzo 
3164f196ce38SLuigi Rizzo #if 0 // def linux
3165f196ce38SLuigi Rizzo 	/* XXX check that the mappings are correct */
3166f196ce38SLuigi Rizzo 	/* need ring_nr, adapter->pdev, direction */
3167f196ce38SLuigi Rizzo 	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
3168f196ce38SLuigi Rizzo 	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
3169f196ce38SLuigi Rizzo 		D("error mapping rx netmap buffer %d", i);
3170f196ce38SLuigi Rizzo 		// XXX fix error handling
3171f196ce38SLuigi Rizzo 	}
3172f196ce38SLuigi Rizzo 
3173f196ce38SLuigi Rizzo #endif /* linux */
317468b8534bSLuigi Rizzo 	/*
3175ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3176506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3177506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
317868b8534bSLuigi Rizzo 	 */
317968b8534bSLuigi Rizzo 	selwakeuppri(&kring->si, PI_NET);
318064ae02c3SLuigi Rizzo 	selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
318168b8534bSLuigi Rizzo 	return kring->ring->slot;
318268b8534bSLuigi Rizzo }
318368b8534bSLuigi Rizzo 
318468b8534bSLuigi Rizzo 
3185ce3ee1e7SLuigi Rizzo /*
3186ce3ee1e7SLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
3187ce3ee1e7SLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
3188ce3ee1e7SLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
3189ce3ee1e7SLuigi Rizzo  * Returns the next position in the ring.
3190ce3ee1e7SLuigi Rizzo  */
3191f18be576SLuigi Rizzo static int
3192f18be576SLuigi Rizzo nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr,
3193f18be576SLuigi Rizzo 	struct netmap_kring *kring, u_int end)
3194f18be576SLuigi Rizzo {
3195f18be576SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3196ce3ee1e7SLuigi Rizzo 	struct nm_bdg_fwd *ft;
3197f18be576SLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
3198f18be576SLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
3199ce3ee1e7SLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
3200ce3ee1e7SLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
3201f18be576SLuigi Rizzo 
3202ce3ee1e7SLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
3203ce3ee1e7SLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
3204ce3ee1e7SLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
3205ce3ee1e7SLuigi Rizzo 	 */
3206ce3ee1e7SLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3207ce3ee1e7SLuigi Rizzo 	if (na->na_flags & NAF_BDG_MAYSLEEP)
3208ce3ee1e7SLuigi Rizzo 		BDG_RLOCK(b);
3209ce3ee1e7SLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
3210ce3ee1e7SLuigi Rizzo 		return 0;
3211ce3ee1e7SLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3212ce3ee1e7SLuigi Rizzo 	ft = kring->nkr_ft;
3213ce3ee1e7SLuigi Rizzo 
3214ce3ee1e7SLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
3215f18be576SLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
3216ce3ee1e7SLuigi Rizzo 		char *buf;
3217f18be576SLuigi Rizzo 
3218ce3ee1e7SLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
321985233a7dSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
322085233a7dSLuigi Rizzo 
322185233a7dSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
322285233a7dSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
3223ce3ee1e7SLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
322485233a7dSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
32253d819cb6SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : BDG_NMB(na->nm_mem, slot);
322685233a7dSLuigi Rizzo 		prefetch(buf);
3227ce3ee1e7SLuigi Rizzo 		++ft_i;
3228ce3ee1e7SLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
3229ce3ee1e7SLuigi Rizzo 			frags++;
3230ce3ee1e7SLuigi Rizzo 			continue;
3231ce3ee1e7SLuigi Rizzo 		}
3232ce3ee1e7SLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
3233ce3ee1e7SLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
3234ce3ee1e7SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
3235ce3ee1e7SLuigi Rizzo 		frags = 1;
3236ce3ee1e7SLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
3237f18be576SLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3238f18be576SLuigi Rizzo 	}
3239ce3ee1e7SLuigi Rizzo 	if (frags > 1) {
3240ce3ee1e7SLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
3241ce3ee1e7SLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
3242ce3ee1e7SLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
3243ce3ee1e7SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
3244ce3ee1e7SLuigi Rizzo 	}
3245f18be576SLuigi Rizzo 	if (ft_i)
3246f18be576SLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3247ce3ee1e7SLuigi Rizzo 	BDG_RUNLOCK(b);
3248f18be576SLuigi Rizzo 	return j;
3249f18be576SLuigi Rizzo }
3250f18be576SLuigi Rizzo 
3251f18be576SLuigi Rizzo 
3252f18be576SLuigi Rizzo /*
3253ce3ee1e7SLuigi Rizzo  * Pass packets from nic to the bridge.
3254ce3ee1e7SLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
3255ce3ee1e7SLuigi Rizzo  * handler so we should make sure that the interface is not
3256ce3ee1e7SLuigi Rizzo  * disconnected while passing down an interrupt.
3257ce3ee1e7SLuigi Rizzo  *
3258f18be576SLuigi Rizzo  * Note, no user process can access this NIC so we can ignore
3259f18be576SLuigi Rizzo  * the info in the 'ring'.
3260f18be576SLuigi Rizzo  */
3261f18be576SLuigi Rizzo static void
3262f18be576SLuigi Rizzo netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr)
3263f18be576SLuigi Rizzo {
3264f18be576SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3265f18be576SLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
3266f18be576SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3267ce3ee1e7SLuigi Rizzo 	u_int j, k;
3268f18be576SLuigi Rizzo 
3269ce3ee1e7SLuigi Rizzo 	/* make sure that only one thread is ever in here,
3270ce3ee1e7SLuigi Rizzo 	 * after which we can unlock. Probably unnecessary XXX.
3271ce3ee1e7SLuigi Rizzo 	 */
3272ce3ee1e7SLuigi Rizzo 	if (nm_kr_tryget(kring))
3273ce3ee1e7SLuigi Rizzo 		return;
3274ce3ee1e7SLuigi Rizzo 	/* fetch packets that have arrived.
3275ce3ee1e7SLuigi Rizzo 	 * XXX maybe do this in a loop ?
3276ce3ee1e7SLuigi Rizzo 	 */
3277ce3ee1e7SLuigi Rizzo 	if (na->nm_rxsync(ifp, ring_nr, 0))
3278ce3ee1e7SLuigi Rizzo 		goto put_out;
3279ce3ee1e7SLuigi Rizzo 	if (kring->nr_hwavail == 0 && netmap_verbose) {
3280f18be576SLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
3281f18be576SLuigi Rizzo 			ifp->if_xname);
3282ce3ee1e7SLuigi Rizzo 		goto put_out;
3283f18be576SLuigi Rizzo 	}
3284ce3ee1e7SLuigi Rizzo 	k = nm_kr_rxpos(kring);
3285f18be576SLuigi Rizzo 
3286f18be576SLuigi Rizzo 	j = nm_bdg_preflush(na, ring_nr, kring, k);
3287f18be576SLuigi Rizzo 
3288f18be576SLuigi Rizzo 	/* we consume everything, but we cannot update kring directly
3289f18be576SLuigi Rizzo 	 * because the nic may have destroyed the info in the NIC ring.
3290f18be576SLuigi Rizzo 	 * So we need to call rxsync again to restore it.
3291f18be576SLuigi Rizzo 	 */
3292f18be576SLuigi Rizzo 	ring->cur = j;
3293f18be576SLuigi Rizzo 	ring->avail = 0;
3294f18be576SLuigi Rizzo 	na->nm_rxsync(ifp, ring_nr, 0);
3295ce3ee1e7SLuigi Rizzo 
3296ce3ee1e7SLuigi Rizzo put_out:
3297ce3ee1e7SLuigi Rizzo 	nm_kr_put(kring);
3298f18be576SLuigi Rizzo 	return;
3299f18be576SLuigi Rizzo }
3300f18be576SLuigi Rizzo 
3301f18be576SLuigi Rizzo 
330268b8534bSLuigi Rizzo /*
3303ce3ee1e7SLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3304ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3305ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3306ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3307849bec0eSLuigi Rizzo  *
3308ce3ee1e7SLuigi Rizzo  * If the card is not in netmap mode, simply return 0,
3309ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
3310ce3ee1e7SLuigi Rizzo  *
3311ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3312ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3313ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
3314ce3ee1e7SLuigi Rizzo  * and return 1.
3315ce3ee1e7SLuigi Rizzo  *
3316ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
3317ce3ee1e7SLuigi Rizzo  * calls the proper forwarding routine, and return 1.
33181a26580eSLuigi Rizzo  */
3319babc7c12SLuigi Rizzo int
3320ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
33211a26580eSLuigi Rizzo {
33221a26580eSLuigi Rizzo 	struct netmap_adapter *na;
3323ce3ee1e7SLuigi Rizzo 	struct netmap_kring *kring;
33241a26580eSLuigi Rizzo 
33251a26580eSLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
33261a26580eSLuigi Rizzo 		return 0;
3327849bec0eSLuigi Rizzo 
3328ce3ee1e7SLuigi Rizzo 	q &= NETMAP_RING_MASK;
3329849bec0eSLuigi Rizzo 
3330ce3ee1e7SLuigi Rizzo 	if (netmap_verbose)
3331ce3ee1e7SLuigi Rizzo 		RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
33321a26580eSLuigi Rizzo 	na = NA(ifp);
33338241616dSLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
33348241616dSLuigi Rizzo 		ND("use regular interrupt");
33358241616dSLuigi Rizzo 		return 0;
33368241616dSLuigi Rizzo 	}
33378241616dSLuigi Rizzo 
333864ae02c3SLuigi Rizzo 	if (work_done) { /* RX path */
33398241616dSLuigi Rizzo 		if (q >= na->num_rx_rings)
3340849bec0eSLuigi Rizzo 			return 0;	// not a physical queue
3341ce3ee1e7SLuigi Rizzo 		kring = na->rx_rings + q;
3342ce3ee1e7SLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3343ce3ee1e7SLuigi Rizzo 		if (na->na_bdg != NULL) {
3344ce3ee1e7SLuigi Rizzo 			netmap_nic_to_bdg(ifp, q);
3345ce3ee1e7SLuigi Rizzo 		} else {
3346ce3ee1e7SLuigi Rizzo 			selwakeuppri(&kring->si, PI_NET);
3347ce3ee1e7SLuigi Rizzo 			if (na->num_rx_rings > 1 /* or multiple listeners */ )
3348ce3ee1e7SLuigi Rizzo 				selwakeuppri(&na->rx_si, PI_NET);
3349ce3ee1e7SLuigi Rizzo 		}
3350ce3ee1e7SLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3351849bec0eSLuigi Rizzo 	} else { /* TX path */
33528241616dSLuigi Rizzo 		if (q >= na->num_tx_rings)
3353849bec0eSLuigi Rizzo 			return 0;	// not a physical queue
3354ce3ee1e7SLuigi Rizzo 		kring = na->tx_rings + q;
3355ce3ee1e7SLuigi Rizzo 		selwakeuppri(&kring->si, PI_NET);
3356ce3ee1e7SLuigi Rizzo 		if (na->num_tx_rings > 1 /* or multiple listeners */ )
3357ce3ee1e7SLuigi Rizzo 			selwakeuppri(&na->tx_si, PI_NET);
335864ae02c3SLuigi Rizzo 	}
33591a26580eSLuigi Rizzo 	return 1;
33601a26580eSLuigi Rizzo }
33611a26580eSLuigi Rizzo 
336264ae02c3SLuigi Rizzo 
336301c7d25fSLuigi Rizzo #ifdef linux	/* linux-specific routines */
336401c7d25fSLuigi Rizzo 
3365f18be576SLuigi Rizzo 
336601c7d25fSLuigi Rizzo /*
336701c7d25fSLuigi Rizzo  * Remap linux arguments into the FreeBSD call.
336801c7d25fSLuigi Rizzo  * - pwait is the poll table, passed as 'dev';
336901c7d25fSLuigi Rizzo  *   If pwait == NULL someone else already woke up before. We can report
337001c7d25fSLuigi Rizzo  *   events but they are filtered upstream.
337101c7d25fSLuigi Rizzo  *   If pwait != NULL, then pwait->key contains the list of events.
337201c7d25fSLuigi Rizzo  * - events is computed from pwait as above.
337301c7d25fSLuigi Rizzo  * - file is passed as 'td';
337401c7d25fSLuigi Rizzo  */
337501c7d25fSLuigi Rizzo static u_int
337601c7d25fSLuigi Rizzo linux_netmap_poll(struct file * file, struct poll_table_struct *pwait)
337701c7d25fSLuigi Rizzo {
3378849bec0eSLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
3379849bec0eSLuigi Rizzo 	int events = POLLIN | POLLOUT; /* XXX maybe... */
3380849bec0eSLuigi Rizzo #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
338101c7d25fSLuigi Rizzo 	int events = pwait ? pwait->key : POLLIN | POLLOUT;
338201c7d25fSLuigi Rizzo #else /* in 3.4.0 field 'key' was renamed to '_key' */
338301c7d25fSLuigi Rizzo 	int events = pwait ? pwait->_key : POLLIN | POLLOUT;
338401c7d25fSLuigi Rizzo #endif
338501c7d25fSLuigi Rizzo 	return netmap_poll((void *)pwait, events, (void *)file);
338601c7d25fSLuigi Rizzo }
338701c7d25fSLuigi Rizzo 
3388f18be576SLuigi Rizzo 
338901c7d25fSLuigi Rizzo static int
339042a3a5bdSLuigi Rizzo linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
339101c7d25fSLuigi Rizzo {
33928241616dSLuigi Rizzo 	int error = 0;
3393ce3ee1e7SLuigi Rizzo 	unsigned long off, va;
3394ce3ee1e7SLuigi Rizzo 	vm_ooffset_t pa;
3395ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = f->private_data;
339601c7d25fSLuigi Rizzo 	/*
339701c7d25fSLuigi Rizzo 	 * vma->vm_start: start of mapping user address space
339801c7d25fSLuigi Rizzo 	 * vma->vm_end: end of the mapping user address space
33998241616dSLuigi Rizzo 	 * vma->vm_pfoff: offset of first page in the device
340001c7d25fSLuigi Rizzo 	 */
340101c7d25fSLuigi Rizzo 
340201c7d25fSLuigi Rizzo 	// XXX security checks
340301c7d25fSLuigi Rizzo 
3404ce3ee1e7SLuigi Rizzo 	error = netmap_get_memory(priv);
34058241616dSLuigi Rizzo 	ND("get_memory returned %d", error);
34068241616dSLuigi Rizzo 	if (error)
34078241616dSLuigi Rizzo 	    return -error;
34088241616dSLuigi Rizzo 
3409ce3ee1e7SLuigi Rizzo 	if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) {
3410ce3ee1e7SLuigi Rizzo 		ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end);
3411ce3ee1e7SLuigi Rizzo 		return -EINVAL;
3412ce3ee1e7SLuigi Rizzo 	}
34138241616dSLuigi Rizzo 
3414ce3ee1e7SLuigi Rizzo 	for (va = vma->vm_start, off = vma->vm_pgoff;
3415ce3ee1e7SLuigi Rizzo 	     va < vma->vm_end;
3416ce3ee1e7SLuigi Rizzo 	     va += PAGE_SIZE, off++)
3417ce3ee1e7SLuigi Rizzo 	{
3418ce3ee1e7SLuigi Rizzo 		pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT);
3419ce3ee1e7SLuigi Rizzo 		if (pa == 0)
3420ce3ee1e7SLuigi Rizzo 			return -EINVAL;
342101c7d25fSLuigi Rizzo 
3422ce3ee1e7SLuigi Rizzo 		ND("va %lx pa %p", va, pa);
3423ce3ee1e7SLuigi Rizzo 		error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
3424ce3ee1e7SLuigi Rizzo 		if (error)
3425ce3ee1e7SLuigi Rizzo 			return error;
3426ce3ee1e7SLuigi Rizzo 	}
342701c7d25fSLuigi Rizzo 	return 0;
342801c7d25fSLuigi Rizzo }
342901c7d25fSLuigi Rizzo 
3430f18be576SLuigi Rizzo 
3431ce3ee1e7SLuigi Rizzo /*
3432ce3ee1e7SLuigi Rizzo  * This one is probably already protected by the netif lock XXX
3433ce3ee1e7SLuigi Rizzo  */
343401c7d25fSLuigi Rizzo static netdev_tx_t
3435ce3ee1e7SLuigi Rizzo linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev)
343601c7d25fSLuigi Rizzo {
3437ce3ee1e7SLuigi Rizzo 	netmap_transmit(dev, skb);
343801c7d25fSLuigi Rizzo 	return (NETDEV_TX_OK);
343901c7d25fSLuigi Rizzo }
344001c7d25fSLuigi Rizzo 
344101c7d25fSLuigi Rizzo 
3442ce3ee1e7SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)	// XXX was 37
344301c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME	.ioctl
344401c7d25fSLuigi Rizzo int
344501c7d25fSLuigi Rizzo linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
344601c7d25fSLuigi Rizzo #else
344701c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME	.unlocked_ioctl
344801c7d25fSLuigi Rizzo long
344901c7d25fSLuigi Rizzo linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
345001c7d25fSLuigi Rizzo #endif
345101c7d25fSLuigi Rizzo {
345201c7d25fSLuigi Rizzo 	int ret;
345301c7d25fSLuigi Rizzo 	struct nmreq nmr;
345401c7d25fSLuigi Rizzo 	bzero(&nmr, sizeof(nmr));
345501c7d25fSLuigi Rizzo 
3456ce3ee1e7SLuigi Rizzo 	if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) {
3457ce3ee1e7SLuigi Rizzo 		data = 0;	/* no argument required here */
3458ce3ee1e7SLuigi Rizzo 	}
345901c7d25fSLuigi Rizzo 	if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
346001c7d25fSLuigi Rizzo 		return -EFAULT;
346101c7d25fSLuigi Rizzo 	ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
346201c7d25fSLuigi Rizzo 	if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
346301c7d25fSLuigi Rizzo 		return -EFAULT;
346401c7d25fSLuigi Rizzo 	return -ret;
346501c7d25fSLuigi Rizzo }
346601c7d25fSLuigi Rizzo 
346701c7d25fSLuigi Rizzo 
346801c7d25fSLuigi Rizzo static int
34690b8ed8e0SLuigi Rizzo netmap_release(struct inode *inode, struct file *file)
347001c7d25fSLuigi Rizzo {
34710b8ed8e0SLuigi Rizzo 	(void)inode;	/* UNUSED */
347201c7d25fSLuigi Rizzo 	if (file->private_data)
347301c7d25fSLuigi Rizzo 		netmap_dtor(file->private_data);
347401c7d25fSLuigi Rizzo 	return (0);
347501c7d25fSLuigi Rizzo }
347601c7d25fSLuigi Rizzo 
3477f18be576SLuigi Rizzo 
34788241616dSLuigi Rizzo static int
34798241616dSLuigi Rizzo linux_netmap_open(struct inode *inode, struct file *file)
34808241616dSLuigi Rizzo {
34818241616dSLuigi Rizzo 	struct netmap_priv_d *priv;
34828241616dSLuigi Rizzo 	(void)inode;	/* UNUSED */
34838241616dSLuigi Rizzo 
34848241616dSLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
34858241616dSLuigi Rizzo 			      M_NOWAIT | M_ZERO);
34868241616dSLuigi Rizzo 	if (priv == NULL)
34878241616dSLuigi Rizzo 		return -ENOMEM;
34888241616dSLuigi Rizzo 
34898241616dSLuigi Rizzo 	file->private_data = priv;
34908241616dSLuigi Rizzo 
34918241616dSLuigi Rizzo 	return (0);
34928241616dSLuigi Rizzo }
349301c7d25fSLuigi Rizzo 
3494f18be576SLuigi Rizzo 
349501c7d25fSLuigi Rizzo static struct file_operations netmap_fops = {
3496f18be576SLuigi Rizzo     .owner = THIS_MODULE,
34978241616dSLuigi Rizzo     .open = linux_netmap_open,
349842a3a5bdSLuigi Rizzo     .mmap = linux_netmap_mmap,
349901c7d25fSLuigi Rizzo     LIN_IOCTL_NAME = linux_netmap_ioctl,
350001c7d25fSLuigi Rizzo     .poll = linux_netmap_poll,
350101c7d25fSLuigi Rizzo     .release = netmap_release,
350201c7d25fSLuigi Rizzo };
350301c7d25fSLuigi Rizzo 
3504f18be576SLuigi Rizzo 
350501c7d25fSLuigi Rizzo static struct miscdevice netmap_cdevsw = {	/* same name as FreeBSD */
350601c7d25fSLuigi Rizzo 	MISC_DYNAMIC_MINOR,
350701c7d25fSLuigi Rizzo 	"netmap",
350801c7d25fSLuigi Rizzo 	&netmap_fops,
350901c7d25fSLuigi Rizzo };
351001c7d25fSLuigi Rizzo 
351101c7d25fSLuigi Rizzo static int netmap_init(void);
351201c7d25fSLuigi Rizzo static void netmap_fini(void);
351301c7d25fSLuigi Rizzo 
3514f18be576SLuigi Rizzo 
351542a3a5bdSLuigi Rizzo /* Errors have negative values on linux */
351642a3a5bdSLuigi Rizzo static int linux_netmap_init(void)
351742a3a5bdSLuigi Rizzo {
351842a3a5bdSLuigi Rizzo 	return -netmap_init();
351942a3a5bdSLuigi Rizzo }
352042a3a5bdSLuigi Rizzo 
352142a3a5bdSLuigi Rizzo module_init(linux_netmap_init);
352201c7d25fSLuigi Rizzo module_exit(netmap_fini);
352301c7d25fSLuigi Rizzo /* export certain symbols to other modules */
352401c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_attach);		// driver attach routines
352501c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_detach);		// driver detach routines
352601c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_ring_reinit);	// ring init on error
352701c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_lut);
352801c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_total_buffers);	// index check
352901c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_base);
353001c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_reset);		// ring init routines
353101c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buf_size);
353201c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_rx_irq);		// default irq handler
353301c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_no_pendintr);	// XXX mitigation - should go away
3534f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_ctl);		// bridge configuration routine
3535f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_learning);	// the default lookup function
3536ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_disable_all_rings);
3537ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_enable_all_rings);
353801c7d25fSLuigi Rizzo 
353901c7d25fSLuigi Rizzo 
354001c7d25fSLuigi Rizzo MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
354101c7d25fSLuigi Rizzo MODULE_DESCRIPTION("The netmap packet I/O framework");
354201c7d25fSLuigi Rizzo MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
354301c7d25fSLuigi Rizzo 
354401c7d25fSLuigi Rizzo #else /* __FreeBSD__ */
354501c7d25fSLuigi Rizzo 
3546f18be576SLuigi Rizzo 
3547babc7c12SLuigi Rizzo static struct cdevsw netmap_cdevsw = {
3548babc7c12SLuigi Rizzo 	.d_version = D_VERSION,
3549babc7c12SLuigi Rizzo 	.d_name = "netmap",
35508241616dSLuigi Rizzo 	.d_open = netmap_open,
35518241616dSLuigi Rizzo 	.d_mmap_single = netmap_mmap_single,
3552babc7c12SLuigi Rizzo 	.d_ioctl = netmap_ioctl,
3553babc7c12SLuigi Rizzo 	.d_poll = netmap_poll,
35548241616dSLuigi Rizzo 	.d_close = netmap_close,
3555babc7c12SLuigi Rizzo };
355601c7d25fSLuigi Rizzo #endif /* __FreeBSD__ */
3557babc7c12SLuigi Rizzo 
3558f196ce38SLuigi Rizzo /*
3559f196ce38SLuigi Rizzo  *---- support for virtual bridge -----
3560f196ce38SLuigi Rizzo  */
3561f196ce38SLuigi Rizzo 
3562f196ce38SLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
3563f196ce38SLuigi Rizzo 
3564f196ce38SLuigi Rizzo /*
3565f196ce38SLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3566f196ce38SLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3567f196ce38SLuigi Rizzo  *
3568f196ce38SLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
3569f196ce38SLuigi Rizzo  */
3570f196ce38SLuigi Rizzo #define mix(a, b, c)                                                    \
3571f196ce38SLuigi Rizzo do {                                                                    \
3572f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
3573f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
3574f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
3575f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
3576f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
3577f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
3578f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
3579f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
3580f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
3581f196ce38SLuigi Rizzo } while (/*CONSTCOND*/0)
3582f196ce38SLuigi Rizzo 
3583f196ce38SLuigi Rizzo static __inline uint32_t
3584f196ce38SLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
3585f196ce38SLuigi Rizzo {
3586f196ce38SLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
3587f196ce38SLuigi Rizzo 
3588f196ce38SLuigi Rizzo         b += addr[5] << 8;
3589f196ce38SLuigi Rizzo         b += addr[4];
3590f196ce38SLuigi Rizzo         a += addr[3] << 24;
3591f196ce38SLuigi Rizzo         a += addr[2] << 16;
3592f196ce38SLuigi Rizzo         a += addr[1] << 8;
3593f196ce38SLuigi Rizzo         a += addr[0];
3594f196ce38SLuigi Rizzo 
3595f196ce38SLuigi Rizzo         mix(a, b, c);
3596f196ce38SLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
3597f196ce38SLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
3598f196ce38SLuigi Rizzo }
3599f196ce38SLuigi Rizzo 
3600f196ce38SLuigi Rizzo #undef mix
3601f196ce38SLuigi Rizzo 
3602f196ce38SLuigi Rizzo 
3603f196ce38SLuigi Rizzo static int
3604f196ce38SLuigi Rizzo bdg_netmap_reg(struct ifnet *ifp, int onoff)
3605f196ce38SLuigi Rizzo {
3606f18be576SLuigi Rizzo 	/* the interface is already attached to the bridge,
3607f18be576SLuigi Rizzo 	 * so we only need to toggle IFCAP_NETMAP.
3608f196ce38SLuigi Rizzo 	 */
3609f18be576SLuigi Rizzo 	if (onoff) {
3610f196ce38SLuigi Rizzo 		ifp->if_capenable |= IFCAP_NETMAP;
3611f196ce38SLuigi Rizzo 	} else {
3612f196ce38SLuigi Rizzo 		ifp->if_capenable &= ~IFCAP_NETMAP;
3613f196ce38SLuigi Rizzo 	}
3614f18be576SLuigi Rizzo 	return 0;
3615f196ce38SLuigi Rizzo }
3616f196ce38SLuigi Rizzo 
3617f196ce38SLuigi Rizzo 
3618f18be576SLuigi Rizzo /*
3619f18be576SLuigi Rizzo  * Lookup function for a learning bridge.
3620f18be576SLuigi Rizzo  * Update the hash table with the source address,
3621f18be576SLuigi Rizzo  * and then returns the destination port index, and the
3622f18be576SLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
3623f18be576SLuigi Rizzo  */
3624f18be576SLuigi Rizzo u_int
3625ce3ee1e7SLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
3626f18be576SLuigi Rizzo 		struct netmap_adapter *na)
3627f196ce38SLuigi Rizzo {
3628f18be576SLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
3629f196ce38SLuigi Rizzo 	uint32_t sh, dh;
3630f18be576SLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
3631f196ce38SLuigi Rizzo 	uint64_t smac, dmac;
3632f196ce38SLuigi Rizzo 
3633ce3ee1e7SLuigi Rizzo 	if (buf_len < 14) {
3634ce3ee1e7SLuigi Rizzo 		D("invalid buf length %d", buf_len);
3635ce3ee1e7SLuigi Rizzo 		return NM_BDG_NOPORT;
3636ce3ee1e7SLuigi Rizzo 	}
3637f196ce38SLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
3638f196ce38SLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
3639f196ce38SLuigi Rizzo 	smac >>= 16;
3640f18be576SLuigi Rizzo 
3641f196ce38SLuigi Rizzo 	/*
3642f196ce38SLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
3643f196ce38SLuigi Rizzo 	 * worthwhile optimizations here.
3644f196ce38SLuigi Rizzo 	 */
3645f196ce38SLuigi Rizzo 	if ((buf[6] & 1) == 0) { /* valid src */
3646f196ce38SLuigi Rizzo 		uint8_t *s = buf+6;
3647ce3ee1e7SLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
3648f196ce38SLuigi Rizzo 		/* update source port forwarding entry */
3649f18be576SLuigi Rizzo 		ht[sh].mac = smac;	/* XXX expire ? */
3650f18be576SLuigi Rizzo 		ht[sh].ports = mysrc;
3651f196ce38SLuigi Rizzo 		if (netmap_verbose)
3652f196ce38SLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
3653f18be576SLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
3654f196ce38SLuigi Rizzo 	}
3655f18be576SLuigi Rizzo 	dst = NM_BDG_BROADCAST;
3656f196ce38SLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
3657f196ce38SLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
3658f18be576SLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
3659f18be576SLuigi Rizzo 			dst = ht[dh].ports;
3660f196ce38SLuigi Rizzo 		}
3661f18be576SLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
3662f196ce38SLuigi Rizzo 	}
3663f18be576SLuigi Rizzo 	*dst_ring = 0;
3664f18be576SLuigi Rizzo 	return dst;
3665f196ce38SLuigi Rizzo }
3666f196ce38SLuigi Rizzo 
3667f18be576SLuigi Rizzo 
3668f18be576SLuigi Rizzo /*
3669f18be576SLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
3670f18be576SLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
3671f18be576SLuigi Rizzo  */
3672f18be576SLuigi Rizzo int
3673ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na,
3674f18be576SLuigi Rizzo 		u_int ring_nr)
3675f18be576SLuigi Rizzo {
3676f18be576SLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
3677f18be576SLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
3678f18be576SLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
3679ce3ee1e7SLuigi Rizzo 	u_int i, j, me = na->bdg_port;
3680f18be576SLuigi Rizzo 
3681ce3ee1e7SLuigi Rizzo 	/*
3682ce3ee1e7SLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
3683ce3ee1e7SLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
3684ce3ee1e7SLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
3685ce3ee1e7SLuigi Rizzo 	 * Then we have an array of destination indexes.
3686ce3ee1e7SLuigi Rizzo 	 */
3687ce3ee1e7SLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
3688f18be576SLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
3689f18be576SLuigi Rizzo 
3690ce3ee1e7SLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
3691ce3ee1e7SLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
3692ce3ee1e7SLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
3693f18be576SLuigi Rizzo 		uint16_t dst_port, d_i;
3694f18be576SLuigi Rizzo 		struct nm_bdg_q *d;
3695f18be576SLuigi Rizzo 
3696ce3ee1e7SLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
3697ce3ee1e7SLuigi Rizzo 		dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
3698ce3ee1e7SLuigi Rizzo 			&dst_ring, na);
3699ce3ee1e7SLuigi Rizzo 		if (netmap_verbose > 255)
3700ce3ee1e7SLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
3701ce3ee1e7SLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
3702f18be576SLuigi Rizzo 			continue; /* this packet is identified to be dropped */
3703ce3ee1e7SLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
3704f18be576SLuigi Rizzo 			continue;
3705ce3ee1e7SLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
3706f18be576SLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
3707ce3ee1e7SLuigi Rizzo 		else if (unlikely(dst_port == me ||
3708ce3ee1e7SLuigi Rizzo 		    !b->bdg_ports[dst_port]))
3709f18be576SLuigi Rizzo 			continue;
3710f18be576SLuigi Rizzo 
3711f18be576SLuigi Rizzo 		/* get a position in the scratch pad */
3712f18be576SLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
3713f18be576SLuigi Rizzo 		d = dst_ents + d_i;
3714ce3ee1e7SLuigi Rizzo 
3715ce3ee1e7SLuigi Rizzo 		/* append the first fragment to the list */
3716ce3ee1e7SLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
3717f18be576SLuigi Rizzo 			d->bq_head = d->bq_tail = i;
3718f18be576SLuigi Rizzo 			/* remember this position to be scanned later */
3719f18be576SLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
3720f18be576SLuigi Rizzo 				dsts[num_dsts++] = d_i;
372185233a7dSLuigi Rizzo 		} else {
3722f18be576SLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
3723f18be576SLuigi Rizzo 			d->bq_tail = i;
3724f18be576SLuigi Rizzo 		}
3725ce3ee1e7SLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
372685233a7dSLuigi Rizzo 	}
3727f18be576SLuigi Rizzo 
3728ce3ee1e7SLuigi Rizzo 	/*
3729ce3ee1e7SLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
3730ce3ee1e7SLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
3731ce3ee1e7SLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
3732ce3ee1e7SLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
3733ce3ee1e7SLuigi Rizzo 	 * so we could shorten this loop.
3734f18be576SLuigi Rizzo 	 */
3735f18be576SLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
3736ce3ee1e7SLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
3737ce3ee1e7SLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
3738ce3ee1e7SLuigi Rizzo 			uint16_t d_i;
3739ce3ee1e7SLuigi Rizzo 			i = b->bdg_port_index[j];
3740ce3ee1e7SLuigi Rizzo 			if (unlikely(i == me))
3741f18be576SLuigi Rizzo 				continue;
3742ce3ee1e7SLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
3743ce3ee1e7SLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
3744f18be576SLuigi Rizzo 				dsts[num_dsts++] = d_i;
3745f18be576SLuigi Rizzo 		}
3746f18be576SLuigi Rizzo 	}
3747f18be576SLuigi Rizzo 
3748ce3ee1e7SLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
3749f18be576SLuigi Rizzo 	/* second pass: scan destinations (XXX will be modular somehow) */
3750f18be576SLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
3751f18be576SLuigi Rizzo 		struct ifnet *dst_ifp;
3752f18be576SLuigi Rizzo 		struct netmap_adapter *dst_na;
3753f196ce38SLuigi Rizzo 		struct netmap_kring *kring;
3754f196ce38SLuigi Rizzo 		struct netmap_ring *ring;
3755f18be576SLuigi Rizzo 		u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next;
3756ce3ee1e7SLuigi Rizzo 		u_int needed, howmany;
3757ce3ee1e7SLuigi Rizzo 		int retry = netmap_txsync_retry;
3758f18be576SLuigi Rizzo 		struct nm_bdg_q *d;
3759ce3ee1e7SLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
3760ce3ee1e7SLuigi Rizzo 		int nrings;
3761f196ce38SLuigi Rizzo 
3762f18be576SLuigi Rizzo 		d_i = dsts[i];
3763ce3ee1e7SLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
3764f18be576SLuigi Rizzo 		d = dst_ents + d_i;
3765ce3ee1e7SLuigi Rizzo 		// XXX fix the division
3766ce3ee1e7SLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
3767f18be576SLuigi Rizzo 		/* protect from the lookup function returning an inactive
3768f18be576SLuigi Rizzo 		 * destination port
3769f18be576SLuigi Rizzo 		 */
3770f18be576SLuigi Rizzo 		if (unlikely(dst_na == NULL))
3771ce3ee1e7SLuigi Rizzo 			goto cleanup;
3772ce3ee1e7SLuigi Rizzo 		if (dst_na->na_flags & NAF_SW_ONLY)
3773ce3ee1e7SLuigi Rizzo 			goto cleanup;
3774f18be576SLuigi Rizzo 		dst_ifp = dst_na->ifp;
3775f18be576SLuigi Rizzo 		/*
3776f18be576SLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
3777f18be576SLuigi Rizzo 		 * - when na is attached but not activated yet;
3778f18be576SLuigi Rizzo 		 * - when na is being deactivated but is still attached.
3779f18be576SLuigi Rizzo 		 */
3780ce3ee1e7SLuigi Rizzo 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
3781ce3ee1e7SLuigi Rizzo 			ND("not in netmap mode!");
3782ce3ee1e7SLuigi Rizzo 			goto cleanup;
3783ce3ee1e7SLuigi Rizzo 		}
3784f196ce38SLuigi Rizzo 
3785f18be576SLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
3786f18be576SLuigi Rizzo 		brd_next = brddst->bq_head;
3787f18be576SLuigi Rizzo 		next = d->bq_head;
3788ce3ee1e7SLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
3789ce3ee1e7SLuigi Rizzo 		 * available, some packets will be dropped.
3790ce3ee1e7SLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
3791ce3ee1e7SLuigi Rizzo 		 * there is a chance that we may not use all of the slots
3792ce3ee1e7SLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
3793ce3ee1e7SLuigi Rizzo 		 * ones when we regain the lock.
3794ce3ee1e7SLuigi Rizzo 		 */
3795ce3ee1e7SLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
3796f18be576SLuigi Rizzo 
3797f18be576SLuigi Rizzo 		is_vp = nma_is_vp(dst_na);
3798ce3ee1e7SLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
3799ce3ee1e7SLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
3800f18be576SLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
3801f18be576SLuigi Rizzo 		if (is_vp) { /* virtual port */
3802ce3ee1e7SLuigi Rizzo 			nrings = dst_na->num_rx_rings;
3803ce3ee1e7SLuigi Rizzo 		} else {
3804ce3ee1e7SLuigi Rizzo 			nrings = dst_na->num_tx_rings;
3805f18be576SLuigi Rizzo 		}
3806ce3ee1e7SLuigi Rizzo 		if (dst_nr >= nrings)
3807ce3ee1e7SLuigi Rizzo 			dst_nr = dst_nr % nrings;
3808ce3ee1e7SLuigi Rizzo 		kring = is_vp ?  &dst_na->rx_rings[dst_nr] :
3809ce3ee1e7SLuigi Rizzo 				&dst_na->tx_rings[dst_nr];
3810ce3ee1e7SLuigi Rizzo 		ring = kring->ring;
3811ce3ee1e7SLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
3812f18be576SLuigi Rizzo 
3813ce3ee1e7SLuigi Rizzo retry:
3814ce3ee1e7SLuigi Rizzo 
3815ce3ee1e7SLuigi Rizzo 		/* reserve the buffers in the queue and an entry
3816ce3ee1e7SLuigi Rizzo 		 * to report completion, and drop lock.
3817ce3ee1e7SLuigi Rizzo 		 * XXX this might become a helper function.
3818ce3ee1e7SLuigi Rizzo 		 */
3819ce3ee1e7SLuigi Rizzo 		mtx_lock(&kring->q_lock);
3820ce3ee1e7SLuigi Rizzo 		if (kring->nkr_stopped) {
3821ce3ee1e7SLuigi Rizzo 			mtx_unlock(&kring->q_lock);
3822ce3ee1e7SLuigi Rizzo 			goto cleanup;
3823ce3ee1e7SLuigi Rizzo 		}
3824ce3ee1e7SLuigi Rizzo 		/* on physical interfaces, do a txsync to recover
3825ce3ee1e7SLuigi Rizzo 		 * slots for packets already transmitted.
3826ce3ee1e7SLuigi Rizzo 		 * XXX maybe we could be optimistic and rely on a retry
3827ce3ee1e7SLuigi Rizzo 		 * in case of failure.
3828ce3ee1e7SLuigi Rizzo 		 */
3829ce3ee1e7SLuigi Rizzo 		if (nma_is_hw(dst_na)) {
3830ce3ee1e7SLuigi Rizzo 			dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3831ce3ee1e7SLuigi Rizzo 		}
3832ce3ee1e7SLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
3833ce3ee1e7SLuigi Rizzo 		howmany = nm_kr_space(kring, is_vp);
3834ce3ee1e7SLuigi Rizzo 		if (needed < howmany)
3835ce3ee1e7SLuigi Rizzo 			howmany = needed;
3836ce3ee1e7SLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, is_vp);
3837ce3ee1e7SLuigi Rizzo 		mtx_unlock(&kring->q_lock);
3838ce3ee1e7SLuigi Rizzo 
3839ce3ee1e7SLuigi Rizzo 		/* only retry if we need more than available slots */
3840ce3ee1e7SLuigi Rizzo 		if (retry && needed <= howmany)
3841ce3ee1e7SLuigi Rizzo 			retry = 0;
3842ce3ee1e7SLuigi Rizzo 
3843ce3ee1e7SLuigi Rizzo 		/* copy to the destination queue */
3844ce3ee1e7SLuigi Rizzo 		while (howmany > 0) {
3845ce3ee1e7SLuigi Rizzo 			struct netmap_slot *slot;
3846ce3ee1e7SLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
3847ce3ee1e7SLuigi Rizzo 			u_int cnt;
3848ce3ee1e7SLuigi Rizzo 
3849ce3ee1e7SLuigi Rizzo 			/* find the queue from which we pick next packet.
3850ce3ee1e7SLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
385185233a7dSLuigi Rizzo 			 * so we never dereference it if the other list
3852ce3ee1e7SLuigi Rizzo 			 * has packets (and if both are empty we never
385385233a7dSLuigi Rizzo 			 * get here).
385485233a7dSLuigi Rizzo 			 */
3855f18be576SLuigi Rizzo 			if (next < brd_next) {
3856f18be576SLuigi Rizzo 				ft_p = ft + next;
3857f18be576SLuigi Rizzo 				next = ft_p->ft_next;
3858f18be576SLuigi Rizzo 			} else { /* insert broadcast */
3859f18be576SLuigi Rizzo 				ft_p = ft + brd_next;
3860f18be576SLuigi Rizzo 				brd_next = ft_p->ft_next;
3861f18be576SLuigi Rizzo 			}
3862ce3ee1e7SLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
3863ce3ee1e7SLuigi Rizzo 			if (unlikely(cnt > howmany))
3864ce3ee1e7SLuigi Rizzo 			    break; /* no more space */
3865ce3ee1e7SLuigi Rizzo 			howmany -= cnt;
3866ce3ee1e7SLuigi Rizzo 			if (netmap_verbose && cnt > 1)
3867ce3ee1e7SLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
3868ce3ee1e7SLuigi Rizzo 			ft_end = ft_p + cnt;
3869ce3ee1e7SLuigi Rizzo 			do {
3870ce3ee1e7SLuigi Rizzo 			    void *dst, *src = ft_p->ft_buf;
3871ce3ee1e7SLuigi Rizzo 			    size_t len = (ft_p->ft_len + 63) & ~63;
3872ce3ee1e7SLuigi Rizzo 
3873f196ce38SLuigi Rizzo 			    slot = &ring->slot[j];
3874ce3ee1e7SLuigi Rizzo 			    dst = BDG_NMB(dst_na->nm_mem, slot);
3875ce3ee1e7SLuigi Rizzo 			    /* round to a multiple of 64 */
3876ce3ee1e7SLuigi Rizzo 
3877ce3ee1e7SLuigi Rizzo 			    ND("send %d %d bytes at %s:%d",
3878ce3ee1e7SLuigi Rizzo 				i, ft_p->ft_len, dst_ifp->if_xname, j);
387985233a7dSLuigi Rizzo 			    if (ft_p->ft_flags & NS_INDIRECT) {
3880ce3ee1e7SLuigi Rizzo 				if (copyin(src, dst, len)) {
3881ce3ee1e7SLuigi Rizzo 					// invalid user pointer, pretend len is 0
3882ce3ee1e7SLuigi Rizzo 					ft_p->ft_len = 0;
3883ce3ee1e7SLuigi Rizzo 				}
388485233a7dSLuigi Rizzo 			    } else {
3885ce3ee1e7SLuigi Rizzo 				//memcpy(dst, src, len);
3886ce3ee1e7SLuigi Rizzo 				pkt_copy(src, dst, (int)len);
388785233a7dSLuigi Rizzo 			    }
3888f18be576SLuigi Rizzo 			    slot->len = ft_p->ft_len;
3889ce3ee1e7SLuigi Rizzo 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
3890ce3ee1e7SLuigi Rizzo 			    j = nm_next(j, lim);
3891ce3ee1e7SLuigi Rizzo 			    ft_p++;
3892f196ce38SLuigi Rizzo 			    sent++;
3893ce3ee1e7SLuigi Rizzo 			} while (ft_p != ft_end);
3894ce3ee1e7SLuigi Rizzo 			slot->flags = (cnt << 8); /* clear flag on last entry */
389585233a7dSLuigi Rizzo 			/* are we done ? */
3896ce3ee1e7SLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
3897f18be576SLuigi Rizzo 				break;
3898f196ce38SLuigi Rizzo 		}
3899ce3ee1e7SLuigi Rizzo 		{
3900ce3ee1e7SLuigi Rizzo 		    /* current position */
3901ce3ee1e7SLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
3902ce3ee1e7SLuigi Rizzo 		    uint32_t update_pos;
3903ce3ee1e7SLuigi Rizzo 		    int still_locked = 1;
3904ce3ee1e7SLuigi Rizzo 
3905ce3ee1e7SLuigi Rizzo 		    mtx_lock(&kring->q_lock);
3906ce3ee1e7SLuigi Rizzo 		    if (unlikely(howmany > 0)) {
3907ce3ee1e7SLuigi Rizzo 			/* not used all bufs. If i am the last one
3908ce3ee1e7SLuigi Rizzo 			 * i can recover the slots, otherwise must
3909ce3ee1e7SLuigi Rizzo 			 * fill them with 0 to mark empty packets.
3910ce3ee1e7SLuigi Rizzo 			 */
3911ce3ee1e7SLuigi Rizzo 			ND("leftover %d bufs", howmany);
3912ce3ee1e7SLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
3913ce3ee1e7SLuigi Rizzo 			    /* yes i am the last one */
3914ce3ee1e7SLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
3915ce3ee1e7SLuigi Rizzo 			    kring->nkr_hwlease = j;
3916f18be576SLuigi Rizzo 			} else {
3917ce3ee1e7SLuigi Rizzo 			    while (howmany-- > 0) {
3918ce3ee1e7SLuigi Rizzo 				ring->slot[j].len = 0;
3919ce3ee1e7SLuigi Rizzo 				ring->slot[j].flags = 0;
3920ce3ee1e7SLuigi Rizzo 				j = nm_next(j, lim);
3921ce3ee1e7SLuigi Rizzo 			    }
3922ce3ee1e7SLuigi Rizzo 			}
3923ce3ee1e7SLuigi Rizzo 		    }
3924ce3ee1e7SLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
3925ce3ee1e7SLuigi Rizzo 
3926ce3ee1e7SLuigi Rizzo 		    update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur;
3927ce3ee1e7SLuigi Rizzo 
3928ce3ee1e7SLuigi Rizzo 		    if (my_start == update_pos) {
3929ce3ee1e7SLuigi Rizzo 			/* all slots before my_start have been reported,
3930ce3ee1e7SLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
3931ce3ee1e7SLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
3932ce3ee1e7SLuigi Rizzo 		         */
3933ce3ee1e7SLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
3934ce3ee1e7SLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
3935ce3ee1e7SLuigi Rizzo 			    j = p[lease_idx];
3936ce3ee1e7SLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
3937ce3ee1e7SLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
3938ce3ee1e7SLuigi Rizzo 			}
3939ce3ee1e7SLuigi Rizzo 			/* j is the new 'write' position. j != my_start
3940ce3ee1e7SLuigi Rizzo 			 * means there are new buffers to report
3941ce3ee1e7SLuigi Rizzo 			 */
3942ce3ee1e7SLuigi Rizzo 			if (likely(j != my_start)) {
3943ce3ee1e7SLuigi Rizzo 			    if (is_vp) {
3944ce3ee1e7SLuigi Rizzo 				uint32_t old_avail = kring->nr_hwavail;
3945ce3ee1e7SLuigi Rizzo 
3946ce3ee1e7SLuigi Rizzo 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
3947ce3ee1e7SLuigi Rizzo 					j - kring->nr_hwcur :
3948ce3ee1e7SLuigi Rizzo 					j + lim + 1 - kring->nr_hwcur;
3949ce3ee1e7SLuigi Rizzo 				if (kring->nr_hwavail < old_avail) {
3950ce3ee1e7SLuigi Rizzo 					D("avail shrink %d -> %d",
3951ce3ee1e7SLuigi Rizzo 						old_avail, kring->nr_hwavail);
3952ce3ee1e7SLuigi Rizzo 				}
3953ce3ee1e7SLuigi Rizzo 				still_locked = 0;
3954ce3ee1e7SLuigi Rizzo 				mtx_unlock(&kring->q_lock);
3955ce3ee1e7SLuigi Rizzo 				selwakeuppri(&kring->si, PI_NET);
3956ce3ee1e7SLuigi Rizzo 			    } else {
3957f18be576SLuigi Rizzo 				ring->cur = j;
3958ce3ee1e7SLuigi Rizzo 				/* XXX update avail ? */
3959ce3ee1e7SLuigi Rizzo 				still_locked = 0;
3960f18be576SLuigi Rizzo 				dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3961ce3ee1e7SLuigi Rizzo 				mtx_unlock(&kring->q_lock);
3962ce3ee1e7SLuigi Rizzo 
3963f18be576SLuigi Rizzo 				/* retry to send more packets */
3964ce3ee1e7SLuigi Rizzo 				if (nma_is_hw(dst_na) && retry--)
3965f18be576SLuigi Rizzo 					goto retry;
3966f18be576SLuigi Rizzo 			    }
3967f18be576SLuigi Rizzo 			}
3968ce3ee1e7SLuigi Rizzo 		    }
3969ce3ee1e7SLuigi Rizzo 		    if (still_locked)
3970ce3ee1e7SLuigi Rizzo 			mtx_unlock(&kring->q_lock);
3971ce3ee1e7SLuigi Rizzo 		}
3972ce3ee1e7SLuigi Rizzo cleanup:
3973ce3ee1e7SLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
3974ce3ee1e7SLuigi Rizzo 		d->bq_len = 0;
3975ce3ee1e7SLuigi Rizzo 	}
3976ce3ee1e7SLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
3977ce3ee1e7SLuigi Rizzo 	brddst->bq_len = 0;
3978f196ce38SLuigi Rizzo 	return 0;
3979f196ce38SLuigi Rizzo }
3980f196ce38SLuigi Rizzo 
3981f18be576SLuigi Rizzo 
3982f196ce38SLuigi Rizzo /*
3983ce3ee1e7SLuigi Rizzo  * main dispatch routine for the bridge.
3984ce3ee1e7SLuigi Rizzo  * We already know that only one thread is running this.
3985ce3ee1e7SLuigi Rizzo  * we must run nm_bdg_preflush without lock.
3986f196ce38SLuigi Rizzo  */
3987f196ce38SLuigi Rizzo static int
3988ce3ee1e7SLuigi Rizzo bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
3989f196ce38SLuigi Rizzo {
3990f196ce38SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3991f196ce38SLuigi Rizzo 	struct netmap_kring *kring = &na->tx_rings[ring_nr];
3992f196ce38SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3993ce3ee1e7SLuigi Rizzo 	u_int j, k, lim = kring->nkr_num_slots - 1;
3994f196ce38SLuigi Rizzo 
3995f196ce38SLuigi Rizzo 	k = ring->cur;
3996f196ce38SLuigi Rizzo 	if (k > lim)
3997f196ce38SLuigi Rizzo 		return netmap_ring_reinit(kring);
3998f196ce38SLuigi Rizzo 
3999ce3ee1e7SLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
4000f196ce38SLuigi Rizzo 		j = k; // used all
4001f196ce38SLuigi Rizzo 		goto done;
4002f196ce38SLuigi Rizzo 	}
4003ce3ee1e7SLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
4004ce3ee1e7SLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
4005f196ce38SLuigi Rizzo 
4006f18be576SLuigi Rizzo 	j = nm_bdg_preflush(na, ring_nr, kring, k);
4007f196ce38SLuigi Rizzo 	if (j != k)
4008f196ce38SLuigi Rizzo 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
4009ce3ee1e7SLuigi Rizzo 	/* k-j modulo ring size is the number of slots processed */
4010ce3ee1e7SLuigi Rizzo 	if (k < j)
4011ce3ee1e7SLuigi Rizzo 		k += kring->nkr_num_slots;
4012ce3ee1e7SLuigi Rizzo 	kring->nr_hwavail = lim - (k - j);
4013f196ce38SLuigi Rizzo 
4014f196ce38SLuigi Rizzo done:
4015f196ce38SLuigi Rizzo 	kring->nr_hwcur = j;
4016f196ce38SLuigi Rizzo 	ring->avail = kring->nr_hwavail;
4017f196ce38SLuigi Rizzo 	if (netmap_verbose)
4018ce3ee1e7SLuigi Rizzo 		D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags);
4019f196ce38SLuigi Rizzo 	return 0;
4020f196ce38SLuigi Rizzo }
4021f196ce38SLuigi Rizzo 
4022f18be576SLuigi Rizzo 
4023ce3ee1e7SLuigi Rizzo /*
4024ce3ee1e7SLuigi Rizzo  * user process reading from a VALE switch.
4025ce3ee1e7SLuigi Rizzo  * Already protected against concurrent calls from userspace,
4026ce3ee1e7SLuigi Rizzo  * but we must acquire the queue's lock to protect against
4027ce3ee1e7SLuigi Rizzo  * writers on the same queue.
4028ce3ee1e7SLuigi Rizzo  */
4029f196ce38SLuigi Rizzo static int
4030ce3ee1e7SLuigi Rizzo bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
4031f196ce38SLuigi Rizzo {
4032f196ce38SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
4033f196ce38SLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
4034f196ce38SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
4035f18be576SLuigi Rizzo 	u_int j, lim = kring->nkr_num_slots - 1;
4036f196ce38SLuigi Rizzo 	u_int k = ring->cur, resvd = ring->reserved;
4037f18be576SLuigi Rizzo 	int n;
4038f196ce38SLuigi Rizzo 
4039ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
4040ce3ee1e7SLuigi Rizzo 	if (k > lim) {
4041ce3ee1e7SLuigi Rizzo 		D("ouch dangerous reset!!!");
4042ce3ee1e7SLuigi Rizzo 		n = netmap_ring_reinit(kring);
4043ce3ee1e7SLuigi Rizzo 		goto done;
4044ce3ee1e7SLuigi Rizzo 	}
4045f196ce38SLuigi Rizzo 
4046f196ce38SLuigi Rizzo 	/* skip past packets that userspace has released */
4047f196ce38SLuigi Rizzo 	j = kring->nr_hwcur;    /* netmap ring index */
4048f196ce38SLuigi Rizzo 	if (resvd > 0) {
4049f196ce38SLuigi Rizzo 		if (resvd + ring->avail >= lim + 1) {
4050f196ce38SLuigi Rizzo 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
4051f196ce38SLuigi Rizzo 			ring->reserved = resvd = 0; // XXX panic...
4052f196ce38SLuigi Rizzo 		}
4053f196ce38SLuigi Rizzo 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
4054f196ce38SLuigi Rizzo 	}
4055f196ce38SLuigi Rizzo 
4056f196ce38SLuigi Rizzo 	if (j != k) { /* userspace has released some packets. */
4057f196ce38SLuigi Rizzo 		n = k - j;
4058f196ce38SLuigi Rizzo 		if (n < 0)
4059f196ce38SLuigi Rizzo 			n += kring->nkr_num_slots;
4060f196ce38SLuigi Rizzo 		ND("userspace releases %d packets", n);
4061f196ce38SLuigi Rizzo                 for (n = 0; likely(j != k); n++) {
4062f196ce38SLuigi Rizzo                         struct netmap_slot *slot = &ring->slot[j];
4063ce3ee1e7SLuigi Rizzo                         void *addr = BDG_NMB(na->nm_mem, slot);
4064f196ce38SLuigi Rizzo 
4065f196ce38SLuigi Rizzo                         if (addr == netmap_buffer_base) { /* bad buf */
4066ce3ee1e7SLuigi Rizzo 				D("bad buffer index %d, ignore ?",
4067ce3ee1e7SLuigi Rizzo 					slot->buf_idx);
4068f196ce38SLuigi Rizzo                         }
4069f196ce38SLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
4070ce3ee1e7SLuigi Rizzo                         j = nm_next(j, lim);
4071f196ce38SLuigi Rizzo                 }
4072f196ce38SLuigi Rizzo                 kring->nr_hwavail -= n;
4073f196ce38SLuigi Rizzo                 kring->nr_hwcur = k;
4074f196ce38SLuigi Rizzo         }
4075f196ce38SLuigi Rizzo         /* tell userspace that there are new packets */
4076f196ce38SLuigi Rizzo         ring->avail = kring->nr_hwavail - resvd;
4077ce3ee1e7SLuigi Rizzo 	n = 0;
4078ce3ee1e7SLuigi Rizzo done:
4079ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
4080ce3ee1e7SLuigi Rizzo 	return n;
4081f196ce38SLuigi Rizzo }
4082f196ce38SLuigi Rizzo 
4083f18be576SLuigi Rizzo 
4084*5ab0d24dSLuigi Rizzo static int
4085f18be576SLuigi Rizzo bdg_netmap_attach(struct netmap_adapter *arg)
4086f196ce38SLuigi Rizzo {
4087f196ce38SLuigi Rizzo 	struct netmap_adapter na;
4088f196ce38SLuigi Rizzo 
4089f196ce38SLuigi Rizzo 	ND("attaching virtual bridge");
4090f196ce38SLuigi Rizzo 	bzero(&na, sizeof(na));
4091f196ce38SLuigi Rizzo 
4092f18be576SLuigi Rizzo 	na.ifp = arg->ifp;
4093ce3ee1e7SLuigi Rizzo 	na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
4094f18be576SLuigi Rizzo 	na.num_tx_rings = arg->num_tx_rings;
4095f18be576SLuigi Rizzo 	na.num_rx_rings = arg->num_rx_rings;
4096ce3ee1e7SLuigi Rizzo 	na.num_tx_desc = arg->num_tx_desc;
4097ce3ee1e7SLuigi Rizzo 	na.num_rx_desc = arg->num_rx_desc;
4098f196ce38SLuigi Rizzo 	na.nm_txsync = bdg_netmap_txsync;
4099f196ce38SLuigi Rizzo 	na.nm_rxsync = bdg_netmap_rxsync;
4100f196ce38SLuigi Rizzo 	na.nm_register = bdg_netmap_reg;
4101ce3ee1e7SLuigi Rizzo 	na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname,
4102ce3ee1e7SLuigi Rizzo 			na.num_tx_rings, na.num_tx_desc,
4103ce3ee1e7SLuigi Rizzo 			na.num_rx_rings, na.num_rx_desc);
4104*5ab0d24dSLuigi Rizzo 	return netmap_attach(&na, na.num_tx_rings);
4105f196ce38SLuigi Rizzo }
4106f196ce38SLuigi Rizzo 
4107babc7c12SLuigi Rizzo 
4108babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
4109babc7c12SLuigi Rizzo 
4110babc7c12SLuigi Rizzo 
41111a26580eSLuigi Rizzo /*
411268b8534bSLuigi Rizzo  * Module loader.
411368b8534bSLuigi Rizzo  *
411468b8534bSLuigi Rizzo  * Create the /dev/netmap device and initialize all global
411568b8534bSLuigi Rizzo  * variables.
411668b8534bSLuigi Rizzo  *
411768b8534bSLuigi Rizzo  * Return 0 on success, errno on failure.
411868b8534bSLuigi Rizzo  */
411968b8534bSLuigi Rizzo static int
412068b8534bSLuigi Rizzo netmap_init(void)
412168b8534bSLuigi Rizzo {
4122ce3ee1e7SLuigi Rizzo 	int i, error;
412368b8534bSLuigi Rizzo 
4124ce3ee1e7SLuigi Rizzo 	NMG_LOCK_INIT();
4125ce3ee1e7SLuigi Rizzo 
4126ce3ee1e7SLuigi Rizzo 	error = netmap_mem_init();
412768b8534bSLuigi Rizzo 	if (error != 0) {
412842a3a5bdSLuigi Rizzo 		printf("netmap: unable to initialize the memory allocator.\n");
412968b8534bSLuigi Rizzo 		return (error);
413068b8534bSLuigi Rizzo 	}
41318241616dSLuigi Rizzo 	printf("netmap: loaded module\n");
413268b8534bSLuigi Rizzo 	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
413368b8534bSLuigi Rizzo 			      "netmap");
4134f196ce38SLuigi Rizzo 
4135f18be576SLuigi Rizzo 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
4136f196ce38SLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++)
4137ce3ee1e7SLuigi Rizzo 		BDG_RWINIT(&nm_bridges[i]);
4138babc7c12SLuigi Rizzo 	return (error);
413968b8534bSLuigi Rizzo }
414068b8534bSLuigi Rizzo 
414168b8534bSLuigi Rizzo 
414268b8534bSLuigi Rizzo /*
414368b8534bSLuigi Rizzo  * Module unloader.
414468b8534bSLuigi Rizzo  *
414568b8534bSLuigi Rizzo  * Free all the memory, and destroy the ``/dev/netmap`` device.
414668b8534bSLuigi Rizzo  */
414768b8534bSLuigi Rizzo static void
414868b8534bSLuigi Rizzo netmap_fini(void)
414968b8534bSLuigi Rizzo {
415068b8534bSLuigi Rizzo 	destroy_dev(netmap_dev);
4151ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
4152ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
415368b8534bSLuigi Rizzo 	printf("netmap: unloaded module.\n");
415468b8534bSLuigi Rizzo }
415568b8534bSLuigi Rizzo 
415668b8534bSLuigi Rizzo 
4157f196ce38SLuigi Rizzo #ifdef __FreeBSD__
415868b8534bSLuigi Rizzo /*
415968b8534bSLuigi Rizzo  * Kernel entry point.
416068b8534bSLuigi Rizzo  *
416168b8534bSLuigi Rizzo  * Initialize/finalize the module and return.
416268b8534bSLuigi Rizzo  *
416368b8534bSLuigi Rizzo  * Return 0 on success, errno on failure.
416468b8534bSLuigi Rizzo  */
416568b8534bSLuigi Rizzo static int
416668b8534bSLuigi Rizzo netmap_loader(__unused struct module *module, int event, __unused void *arg)
416768b8534bSLuigi Rizzo {
416868b8534bSLuigi Rizzo 	int error = 0;
416968b8534bSLuigi Rizzo 
417068b8534bSLuigi Rizzo 	switch (event) {
417168b8534bSLuigi Rizzo 	case MOD_LOAD:
417268b8534bSLuigi Rizzo 		error = netmap_init();
417368b8534bSLuigi Rizzo 		break;
417468b8534bSLuigi Rizzo 
417568b8534bSLuigi Rizzo 	case MOD_UNLOAD:
417668b8534bSLuigi Rizzo 		netmap_fini();
417768b8534bSLuigi Rizzo 		break;
417868b8534bSLuigi Rizzo 
417968b8534bSLuigi Rizzo 	default:
418068b8534bSLuigi Rizzo 		error = EOPNOTSUPP;
418168b8534bSLuigi Rizzo 		break;
418268b8534bSLuigi Rizzo 	}
418368b8534bSLuigi Rizzo 
418468b8534bSLuigi Rizzo 	return (error);
418568b8534bSLuigi Rizzo }
418668b8534bSLuigi Rizzo 
418768b8534bSLuigi Rizzo 
418868b8534bSLuigi Rizzo DEV_MODULE(netmap, netmap_loader, NULL);
4189f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */
4190