netmap.c (f9b2a21c9eb4d2715be82dc9049eae29fdb40d17) netmap.c (ce3ee1e7c4cac5b86bbc15daac68f2129aa42187)
1/*
2 * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.

--- 9 unchanged lines hidden (view full) ---

18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
1/*
2 * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.

--- 9 unchanged lines hidden (view full) ---

18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#define NM_BRIDGE
27
26
27#ifdef __FreeBSD__
28#define TEST_STUFF // test code, does not compile yet on linux
29#endif /* __FreeBSD__ */
30
28/*
29 * This module supports memory mapped access to network devices,
30 * see netmap(4).
31 *
32 * The module uses a large, memory pool allocated by the kernel
33 * and accessible as mmapped memory by multiple userspace threads/processes.
34 * The memory pool contains packet buffers and "netmap rings",
35 * i.e. user-accessible copies of the interface's queues.

--- 11 unchanged lines hidden (view full) ---

47 * can look up the occupation state of a queue, access memory buffers,
48 * and retrieve received packets or enqueue packets to transmit.
49 * 5. using some ioctl()s the process can synchronize the userspace view
50 * of the queue with the actual status in the kernel. This includes both
51 * receiving the notification of new packets, and transmitting new
52 * packets on the output interface.
53 * 6. select() or poll() can be used to wait for events on individual
54 * transmit or receive queues (or all queues for a given interface).
31/*
32 * This module supports memory mapped access to network devices,
33 * see netmap(4).
34 *
35 * The module uses a large, memory pool allocated by the kernel
36 * and accessible as mmapped memory by multiple userspace threads/processes.
37 * The memory pool contains packet buffers and "netmap rings",
38 * i.e. user-accessible copies of the interface's queues.

--- 11 unchanged lines hidden (view full) ---

50 * can look up the occupation state of a queue, access memory buffers,
51 * and retrieve received packets or enqueue packets to transmit.
52 * 5. using some ioctl()s the process can synchronize the userspace view
53 * of the queue with the actual status in the kernel. This includes both
54 * receiving the notification of new packets, and transmitting new
55 * packets on the output interface.
56 * 6. select() or poll() can be used to wait for events on individual
57 * transmit or receive queues (or all queues for a given interface).
55 */
58 *
56
59
57#ifdef linux
58#include "bsd_glue.h"
59static netdev_tx_t linux_netmap_start(struct sk_buff *skb, struct net_device *dev);
60#endif /* linux */
60 SYNCHRONIZATION (USER)
61
61
62#ifdef __APPLE__
63#include "osx_glue.h"
64#endif /* __APPLE__ */
62The netmap rings and data structures may be shared among multiple
63user threads or even independent processes.
64Any synchronization among those threads/processes is delegated
65to the threads themselves. Only one thread at a time can be in
66a system call on the same netmap ring. The OS does not enforce
67this and only guarantees against system crashes in case of
68invalid usage.
65
69
66#ifdef __FreeBSD__
70 LOCKING (INTERNAL)
71
72Within the kernel, access to the netmap rings is protected as follows:
73
74- a spinlock on each ring, to handle producer/consumer races on
75 RX rings attached to the host stack (against multiple host
76 threads writing from the host stack to the same ring),
77 and on 'destination' rings attached to a VALE switch
78 (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
79 protecting multiple active senders for the same destination)
80
81- an atomic variable to guarantee that there is at most one
82 instance of *_*xsync() on the ring at any time.
83 For rings connected to user file
84 descriptors, an atomic_test_and_set() protects this, and the
85 lock on the ring is not actually used.
86 For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
87 is also used to prevent multiple executions (the driver might indeed
88 already guarantee this).
89 For NIC TX rings connected to a VALE switch, the lock arbitrates
90 access to the queue (both when allocating buffers and when pushing
91 them out).
92
93- *xsync() should be protected against initializations of the card.
94 On FreeBSD most devices have the reset routine protected by
95 a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
96 the RING protection on rx_reset(), this should be added.
97
98 On linux there is an external lock on the tx path, which probably
99 also arbitrates access to the reset routine. XXX to be revised
100
101- a per-interface core_lock protecting access from the host stack
102 while interfaces may be detached from netmap mode.
103 XXX there should be no need for this lock if we detach the interfaces
104 only while they are down.
105
106
107--- VALE SWITCH ---
108
109NMG_LOCK() serializes all modifications to switches and ports.
110A switch cannot be deleted until all ports are gone.
111
112For each switch, an SX lock (RWlock on linux) protects
113deletion of ports. When configuring or deleting a new port, the
114lock is acquired in exclusive mode (after holding NMG_LOCK).
115When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
116The lock is held throughout the entire forwarding cycle,
117during which the thread may incur in a page fault.
118Hence it is important that sleepable shared locks are used.
119
120On the rx ring, the per-port lock is grabbed initially to reserve
121a number of slot in the ring, then the lock is released,
122packets are copied from source to destination, and then
123the lock is acquired again and the receive ring is updated.
124(A similar thing is done on the tx ring for NIC and host stack
125ports attached to the switch)
126
127 */
128
129/*
130 * OS-specific code that is used only within this file.
131 * Other OS-specific code that must be accessed by drivers
132 * is present in netmap_kern.h
133 */
134
135#if defined(__FreeBSD__)
67#include <sys/cdefs.h> /* prerequisite */
68__FBSDID("$FreeBSD$");
69
70#include <sys/types.h>
71#include <sys/module.h>
72#include <sys/errno.h>
73#include <sys/param.h> /* defines used in kernel.h */
74#include <sys/jail.h>

--- 4 unchanged lines hidden (view full) ---

79#include <sys/socketvar.h> /* struct socket */
80#include <sys/malloc.h>
81#include <sys/mman.h> /* PROT_EXEC */
82#include <sys/poll.h>
83#include <sys/proc.h>
84#include <sys/rwlock.h>
85#include <vm/vm.h> /* vtophys */
86#include <vm/pmap.h> /* vtophys */
136#include <sys/cdefs.h> /* prerequisite */
137__FBSDID("$FreeBSD$");
138
139#include <sys/types.h>
140#include <sys/module.h>
141#include <sys/errno.h>
142#include <sys/param.h> /* defines used in kernel.h */
143#include <sys/jail.h>

--- 4 unchanged lines hidden (view full) ---

148#include <sys/socketvar.h> /* struct socket */
149#include <sys/malloc.h>
150#include <sys/mman.h> /* PROT_EXEC */
151#include <sys/poll.h>
152#include <sys/proc.h>
153#include <sys/rwlock.h>
154#include <vm/vm.h> /* vtophys */
155#include <vm/pmap.h> /* vtophys */
156#include <vm/vm_param.h>
157#include <vm/vm_object.h>
158#include <vm/vm_page.h>
159#include <vm/vm_pager.h>
160#include <vm/uma.h>
87#include <sys/socket.h> /* sockaddrs */
161#include <sys/socket.h> /* sockaddrs */
88#include <machine/bus.h>
89#include <sys/selinfo.h>
90#include <sys/sysctl.h>
91#include <net/if.h>
92#include <net/if_var.h>
93#include <net/bpf.h> /* BIOCIMMEDIATE */
94#include <net/vnet.h>
95#include <machine/bus.h> /* bus_dmamap_* */
162#include <sys/selinfo.h>
163#include <sys/sysctl.h>
164#include <net/if.h>
165#include <net/if_var.h>
166#include <net/bpf.h> /* BIOCIMMEDIATE */
167#include <net/vnet.h>
168#include <machine/bus.h> /* bus_dmamap_* */
169#include <sys/endian.h>
170#include <sys/refcount.h>
96
171
97MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
98#endif /* __FreeBSD__ */
172#define prefetch(x) __builtin_prefetch(x)
99
173
174#define BDG_RWLOCK_T struct rwlock // struct rwlock
175
176#define BDG_RWINIT(b) \
177 rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
178#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
179#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
180#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
181#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
182#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
183#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
184
185
186/* netmap global lock.
187 * normally called within the user thread (upon a system call)
188 * or when a file descriptor or process is terminated
189 * (last close or last munmap)
190 */
191
192#define NMG_LOCK_T struct mtx
193#define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF)
194#define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock)
195#define NMG_LOCK() mtx_lock(&netmap_global_lock)
196#define NMG_UNLOCK() mtx_unlock(&netmap_global_lock)
197#define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED)
198
199
200/* atomic operations */
201#include <machine/atomic.h>
202#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
203#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
204
205
206#elif defined(linux)
207
208#include "bsd_glue.h"
209
210static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *);
211
212static struct device_driver*
213linux_netmap_find_driver(struct device *dev)
214{
215 struct device_driver *dd;
216
217 while ( (dd = dev->driver) == NULL ) {
218 if ( (dev = dev->parent) == NULL )
219 return NULL;
220 }
221 return dd;
222}
223
224static struct net_device*
225ifunit_ref(const char *name)
226{
227 struct net_device *ifp = dev_get_by_name(&init_net, name);
228 struct device_driver *dd;
229
230 if (ifp == NULL)
231 return NULL;
232
233 if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL )
234 goto error;
235
236 if (!try_module_get(dd->owner))
237 goto error;
238
239 return ifp;
240error:
241 dev_put(ifp);
242 return NULL;
243}
244
245static void
246if_rele(struct net_device *ifp)
247{
248 struct device_driver *dd;
249 dd = linux_netmap_find_driver(&ifp->dev);
250 dev_put(ifp);
251 if (dd)
252 module_put(dd->owner);
253}
254
255// XXX a mtx would suffice here too 20130404 gl
256#define NMG_LOCK_T struct semaphore
257#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1)
258#define NMG_LOCK_DESTROY()
259#define NMG_LOCK() down(&netmap_global_lock)
260#define NMG_UNLOCK() up(&netmap_global_lock)
261#define NMG_LOCK_ASSERT() // XXX to be completed
262
263
264#elif defined(__APPLE__)
265
266#warning OSX support is only partial
267#include "osx_glue.h"
268
269#else
270
271#error Unsupported platform
272
273#endif /* unsupported */
274
275/*
276 * common headers
277 */
100#include <net/netmap.h>
101#include <dev/netmap/netmap_kern.h>
278#include <net/netmap.h>
279#include <dev/netmap/netmap_kern.h>
280#include <dev/netmap/netmap_mem2.h>
102
281
103/* XXX the following variables must be deprecated and included in nm_mem */
282
283MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
284
285/*
286 * The following variables are used by the drivers and replicate
287 * fields in the global memory pool. They only refer to buffers
288 * used by physical interfaces.
289 */
104u_int netmap_total_buffers;
105u_int netmap_buf_size;
290u_int netmap_total_buffers;
291u_int netmap_buf_size;
106char *netmap_buffer_base; /* address of an invalid buffer */
292char *netmap_buffer_base; /* also address of an invalid buffer */
107
108/* user-controlled variables */
109int netmap_verbose;
110
111static int netmap_no_timestamp; /* don't timestamp on rxsync */
112
113SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
114SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,

--- 7 unchanged lines hidden (view full) ---

122 CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
123int netmap_txsync_retry = 2;
124SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
125 &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
126
127int netmap_drop = 0; /* debugging */
128int netmap_flags = 0; /* debug flags */
129int netmap_fwd = 0; /* force transparent mode */
293
294/* user-controlled variables */
295int netmap_verbose;
296
297static int netmap_no_timestamp; /* don't timestamp on rxsync */
298
299SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
300SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,

--- 7 unchanged lines hidden (view full) ---

308 CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
309int netmap_txsync_retry = 2;
310SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
311 &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
312
313int netmap_drop = 0; /* debugging */
314int netmap_flags = 0; /* debug flags */
315int netmap_fwd = 0; /* force transparent mode */
316int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
130
131SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
132SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
133SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
317
318SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
319SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
320SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
321SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
134
322
135#ifdef NM_BRIDGE /* support for netmap virtual switch, called VALE */
323NMG_LOCK_T netmap_global_lock;
136
137/*
324
325/*
326 * protect against multiple threads using the same ring.
327 * also check that the ring has not been stopped.
328 */
329#define NM_KR_BUSY 1
330#define NM_KR_STOPPED 2
331static void nm_kr_put(struct netmap_kring *kr);
332static __inline int nm_kr_tryget(struct netmap_kring *kr)
333{
334 /* check a first time without taking the lock
335 * to avoid starvation for nm_kr_get()
336 */
337 if (unlikely(kr->nkr_stopped)) {
338 ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
339 return NM_KR_STOPPED;
340 }
341 if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
342 return NM_KR_BUSY;
343 /* check a second time with lock held */
344 if (unlikely(kr->nkr_stopped)) {
345 ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
346 nm_kr_put(kr);
347 return NM_KR_STOPPED;
348 }
349 return 0;
350}
351
352static __inline void nm_kr_put(struct netmap_kring *kr)
353{
354 NM_ATOMIC_CLEAR(&kr->nr_busy);
355}
356
357static void nm_kr_get(struct netmap_kring *kr)
358{
359 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
360 tsleep(kr, 0, "NM_KR_GET", 4);
361}
362
363static void nm_disable_ring(struct netmap_kring *kr)
364{
365 kr->nkr_stopped = 1;
366 nm_kr_get(kr);
367 mtx_lock(&kr->q_lock);
368 mtx_unlock(&kr->q_lock);
369 nm_kr_put(kr);
370}
371
372void netmap_disable_all_rings(struct ifnet *ifp)
373{
374 struct netmap_adapter *na;
375 int i;
376
377 if (!(ifp->if_capenable & IFCAP_NETMAP))
378 return;
379
380 na = NA(ifp);
381
382 for (i = 0; i < na->num_tx_rings + 1; i++) {
383 nm_disable_ring(na->tx_rings + i);
384 selwakeuppri(&na->tx_rings[i].si, PI_NET);
385 }
386 for (i = 0; i < na->num_rx_rings + 1; i++) {
387 nm_disable_ring(na->rx_rings + i);
388 selwakeuppri(&na->rx_rings[i].si, PI_NET);
389 }
390 selwakeuppri(&na->tx_si, PI_NET);
391 selwakeuppri(&na->rx_si, PI_NET);
392}
393
394void netmap_enable_all_rings(struct ifnet *ifp)
395{
396 struct netmap_adapter *na;
397 int i;
398
399 if (!(ifp->if_capenable & IFCAP_NETMAP))
400 return;
401
402 na = NA(ifp);
403 for (i = 0; i < na->num_tx_rings + 1; i++) {
404 D("enabling %p", na->tx_rings + i);
405 na->tx_rings[i].nkr_stopped = 0;
406 }
407 for (i = 0; i < na->num_rx_rings + 1; i++) {
408 D("enabling %p", na->rx_rings + i);
409 na->rx_rings[i].nkr_stopped = 0;
410 }
411}
412
413
414/*
415 * generic bound_checking function
416 */
417u_int
418nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
419{
420 u_int oldv = *v;
421 const char *op = NULL;
422
423 if (dflt < lo)
424 dflt = lo;
425 if (dflt > hi)
426 dflt = hi;
427 if (oldv < lo) {
428 *v = dflt;
429 op = "Bump";
430 } else if (oldv > hi) {
431 *v = hi;
432 op = "Clamp";
433 }
434 if (op && msg)
435 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
436 return *v;
437}
438
439/*
440 * packet-dump function, user-supplied or static buffer.
441 * The destination buffer must be at least 30+4*len
442 */
443const char *
444nm_dump_buf(char *p, int len, int lim, char *dst)
445{
446 static char _dst[8192];
447 int i, j, i0;
448 static char hex[] ="0123456789abcdef";
449 char *o; /* output position */
450
451#define P_HI(x) hex[((x) & 0xf0)>>4]
452#define P_LO(x) hex[((x) & 0xf)]
453#define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
454 if (!dst)
455 dst = _dst;
456 if (lim <= 0 || lim > len)
457 lim = len;
458 o = dst;
459 sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
460 o += strlen(o);
461 /* hexdump routine */
462 for (i = 0; i < lim; ) {
463 sprintf(o, "%5d: ", i);
464 o += strlen(o);
465 memset(o, ' ', 48);
466 i0 = i;
467 for (j=0; j < 16 && i < lim; i++, j++) {
468 o[j*3] = P_HI(p[i]);
469 o[j*3+1] = P_LO(p[i]);
470 }
471 i = i0;
472 for (j=0; j < 16 && i < lim; i++, j++)
473 o[j + 48] = P_C(p[i]);
474 o[j+48] = '\n';
475 o += j+49;
476 }
477 *o = '\0';
478#undef P_HI
479#undef P_LO
480#undef P_C
481 return dst;
482}
483
484/*
138 * system parameters (most of them in netmap_kern.h)
139 * NM_NAME prefix for switch port names, default "vale"
485 * system parameters (most of them in netmap_kern.h)
486 * NM_NAME prefix for switch port names, default "vale"
140 * NM_MAXPORTS number of ports
487 * NM_BDG_MAXPORTS number of ports
141 * NM_BRIDGES max number of switches in the system.
142 * XXX should become a sysctl or tunable
143 *
144 * Switch ports are named valeX:Y where X is the switch name and Y
145 * is the port. If Y matches a physical interface name, the port is
146 * connected to a physical device.
147 *
148 * Unlike physical interfaces, switch ports use their own memory region
149 * for rings and buffers.
150 * The virtual interfaces use per-queue lock instead of core lock.
151 * In the tx loop, we aggregate traffic in batches to make all operations
488 * NM_BRIDGES max number of switches in the system.
489 * XXX should become a sysctl or tunable
490 *
491 * Switch ports are named valeX:Y where X is the switch name and Y
492 * is the port. If Y matches a physical interface name, the port is
493 * connected to a physical device.
494 *
495 * Unlike physical interfaces, switch ports use their own memory region
496 * for rings and buffers.
497 * The virtual interfaces use per-queue lock instead of core lock.
498 * In the tx loop, we aggregate traffic in batches to make all operations
152 * faster. The batch size is NM_BDG_BATCH
499 * faster. The batch size is bridge_batch.
153 */
154#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
500 */
501#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
502#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
155#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
156#define NM_BDG_HASH 1024 /* forwarding table entries */
157#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
503#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
504#define NM_BDG_HASH 1024 /* forwarding table entries */
505#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
506#define NM_MULTISEG 64 /* max size of a chain of bufs */
507/* actual size of the tables */
508#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
509/* NM_FT_NULL terminates a list of slots in the ft */
510#define NM_FT_NULL NM_BDG_BATCH_MAX
158#define NM_BRIDGES 8 /* number of bridges */
159
160
511#define NM_BRIDGES 8 /* number of bridges */
512
513
161int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */
162SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , "");
514/*
515 * bridge_batch is set via sysctl to the max batch size to be
516 * used in the bridge. The actual value may be larger as the
517 * last packet in the block may overflow the size.
518 */
519int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
520SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
163
521
164#ifdef linux
165
522
166#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a)
167#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a)
168
169#else /* !linux */
170
171#ifdef __FreeBSD__
172#include <sys/endian.h>
173#include <sys/refcount.h>
174#endif /* __FreeBSD__ */
175
176#define prefetch(x) __builtin_prefetch(x)
177
178#endif /* !linux */
179
180/*
181 * These are used to handle reference counters for bridge ports.
182 */
183#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount)
184#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount)
185
523/*
524 * These are used to handle reference counters for bridge ports.
525 */
526#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount)
527#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount)
528
529/* The bridge references the buffers using the device specific look up table */
530static inline void *
531BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot)
532{
533 struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut;
534 uint32_t i = slot->buf_idx;
535 return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ? lut[0].vaddr : lut[i].vaddr;
536}
537
186static void bdg_netmap_attach(struct netmap_adapter *);
187static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
538static void bdg_netmap_attach(struct netmap_adapter *);
539static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
188static int kern_netmap_regif(struct nmreq *nmr);
540int kern_netmap_regif(struct nmreq *nmr);
189
541
190/* per-tx-queue entry */
542/*
543 * Each transmit queue accumulates a batch of packets into
544 * a structure before forwarding. Packets to the same
545 * destination are put in a list using ft_next as a link field.
546 * ft_frags and ft_next are valid only on the first fragment.
547 */
191struct nm_bdg_fwd { /* forwarding entry for a bridge */
548struct nm_bdg_fwd { /* forwarding entry for a bridge */
192 void *ft_buf;
193 uint16_t _ft_dst; /* dst port, unused */
549 void *ft_buf; /* netmap or indirect buffer */
550 uint8_t ft_frags; /* how many fragments (only on 1st frag) */
551 uint8_t _ft_port; /* dst port (unused) */
194 uint16_t ft_flags; /* flags, e.g. indirect */
552 uint16_t ft_flags; /* flags, e.g. indirect */
195 uint16_t ft_len; /* src len */
553 uint16_t ft_len; /* src fragment len */
196 uint16_t ft_next; /* next packet to same destination */
197};
198
554 uint16_t ft_next; /* next packet to same destination */
555};
556
199/* We need to build a list of buffers going to each destination.
200 * Each buffer is in one entry of struct nm_bdg_fwd, we use ft_next
201 * to build the list, and struct nm_bdg_q below for the queue.
202 * The structure should compact because potentially we have a lot
203 * of destinations.
557/*
558 * For each output interface, nm_bdg_q is used to construct a list.
559 * bq_len is the number of output buffers (we can have coalescing
560 * during the copy).
204 */
205struct nm_bdg_q {
206 uint16_t bq_head;
207 uint16_t bq_tail;
561 */
562struct nm_bdg_q {
563 uint16_t bq_head;
564 uint16_t bq_tail;
565 uint32_t bq_len; /* number of buffers */
208};
209
566};
567
568/* XXX revise this */
210struct nm_hash_ent {
211 uint64_t mac; /* the top 2 bytes are the epoch */
212 uint64_t ports;
213};
214
215/*
569struct nm_hash_ent {
570 uint64_t mac; /* the top 2 bytes are the epoch */
571 uint64_t ports;
572};
573
574/*
575 * nm_bridge is a descriptor for a VALE switch.
216 * Interfaces for a bridge are all in bdg_ports[].
217 * The array has fixed size, an empty entry does not terminate
576 * Interfaces for a bridge are all in bdg_ports[].
577 * The array has fixed size, an empty entry does not terminate
218 * the search. But lookups only occur on attach/detach so we
578 * the search, but lookups only occur on attach/detach so we
219 * don't mind if they are slow.
220 *
579 * don't mind if they are slow.
580 *
221 * The bridge is non blocking on the transmit ports.
581 * The bridge is non blocking on the transmit ports: excess
582 * packets are dropped if there is no room on the output port.
222 *
223 * bdg_lock protects accesses to the bdg_ports array.
224 * This is a rw lock (or equivalent).
225 */
226struct nm_bridge {
583 *
584 * bdg_lock protects accesses to the bdg_ports array.
585 * This is a rw lock (or equivalent).
586 */
587struct nm_bridge {
227 int namelen; /* 0 means free */
228
229 /* XXX what is the proper alignment/layout ? */
588 /* XXX what is the proper alignment/layout ? */
230 NM_RWLOCK_T bdg_lock; /* protects bdg_ports */
589 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
590 int bdg_namelen;
591 uint32_t bdg_active_ports; /* 0 means free */
592 char bdg_basename[IFNAMSIZ];
593
594 /* Indexes of active ports (up to active_ports)
595 * and all other remaining ports.
596 */
597 uint8_t bdg_port_index[NM_BDG_MAXPORTS];
598
231 struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS];
232
599 struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS];
600
233 char basename[IFNAMSIZ];
601
234 /*
235 * The function to decide the destination port.
236 * It returns either of an index of the destination port,
237 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
238 * forward this packet. ring_nr is the source ring index, and the
239 * function may overwrite this value to forward this packet to a
240 * different ring index.
241 * This function must be set by netmap_bdgctl().
242 */
243 bdg_lookup_fn_t nm_bdg_lookup;
244
602 /*
603 * The function to decide the destination port.
604 * It returns either of an index of the destination port,
605 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
606 * forward this packet. ring_nr is the source ring index, and the
607 * function may overwrite this value to forward this packet to a
608 * different ring index.
609 * This function must be set by netmap_bdgctl().
610 */
611 bdg_lookup_fn_t nm_bdg_lookup;
612
245 /* the forwarding table, MAC+ports */
613 /* the forwarding table, MAC+ports.
614 * XXX should be changed to an argument to be passed to
615 * the lookup function, and allocated on attach
616 */
246 struct nm_hash_ent ht[NM_BDG_HASH];
247};
248
617 struct nm_hash_ent ht[NM_BDG_HASH];
618};
619
620
621/*
622 * XXX in principle nm_bridges could be created dynamically
623 * Right now we have a static array and deletions are protected
624 * by an exclusive lock.
625 */
249struct nm_bridge nm_bridges[NM_BRIDGES];
626struct nm_bridge nm_bridges[NM_BRIDGES];
250NM_LOCK_T netmap_bridge_mutex;
251
627
252/* other OS will have these macros defined in their own glue code. */
253
628
254#ifdef __FreeBSD__
255#define BDG_LOCK() mtx_lock(&netmap_bridge_mutex)
256#define BDG_UNLOCK() mtx_unlock(&netmap_bridge_mutex)
257#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
258#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
259#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
260#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
261
262/* set/get variables. OS-specific macros may wrap these
263 * assignments into read/write lock or similar
629/*
630 * A few function to tell which kind of port are we using.
631 * XXX should we hold a lock ?
632 *
633 * nma_is_vp() virtual port
634 * nma_is_host() port connected to the host stack
635 * nma_is_hw() port connected to a NIC
264 */
636 */
265#define BDG_SET_VAR(lval, p) (lval = p)
266#define BDG_GET_VAR(lval) (lval)
267#define BDG_FREE(p) free(p, M_DEVBUF)
268#endif /* __FreeBSD__ */
269
270static __inline int
637int nma_is_vp(struct netmap_adapter *na);
638int
271nma_is_vp(struct netmap_adapter *na)
272{
273 return na->nm_register == bdg_netmap_reg;
274}
639nma_is_vp(struct netmap_adapter *na)
640{
641 return na->nm_register == bdg_netmap_reg;
642}
643
275static __inline int
276nma_is_host(struct netmap_adapter *na)
277{
278 return na->nm_register == NULL;
279}
644static __inline int
645nma_is_host(struct netmap_adapter *na)
646{
647 return na->nm_register == NULL;
648}
649
280static __inline int
281nma_is_hw(struct netmap_adapter *na)
282{
283 /* In case of sw adapter, nm_register is NULL */
284 return !nma_is_vp(na) && !nma_is_host(na);
285}
286
650static __inline int
651nma_is_hw(struct netmap_adapter *na)
652{
653 /* In case of sw adapter, nm_register is NULL */
654 return !nma_is_vp(na) && !nma_is_host(na);
655}
656
657
287/*
658/*
288 * Regarding holding a NIC, if the NIC is owned by the kernel
659 * If the NIC is owned by the kernel
289 * (i.e., bridge), neither another bridge nor user can use it;
290 * if the NIC is owned by a user, only users can share it.
660 * (i.e., bridge), neither another bridge nor user can use it;
661 * if the NIC is owned by a user, only users can share it.
291 * Evaluation must be done under NMA_LOCK().
662 * Evaluation must be done under NMG_LOCK().
292 */
293#define NETMAP_OWNED_BY_KERN(ifp) (!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg)
294#define NETMAP_OWNED_BY_ANY(ifp) \
295 (NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0))
296
297/*
298 * NA(ifp)->bdg_port port index
299 */
300
663 */
664#define NETMAP_OWNED_BY_KERN(ifp) (!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg)
665#define NETMAP_OWNED_BY_ANY(ifp) \
666 (NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0))
667
668/*
669 * NA(ifp)->bdg_port port index
670 */
671
301// XXX only for multiples of 64 bytes, non overlapped.
672
673/*
674 * this is a slightly optimized copy routine which rounds
675 * to multiple of 64 bytes and is often faster than dealing
676 * with other odd sizes. We assume there is enough room
677 * in the source and destination buffers.
678 *
679 * XXX only for multiples of 64 bytes, non overlapped.
680 */
302static inline void
303pkt_copy(void *_src, void *_dst, int l)
304{
305 uint64_t *src = _src;
306 uint64_t *dst = _dst;
307 if (unlikely(l >= 1024)) {
681static inline void
682pkt_copy(void *_src, void *_dst, int l)
683{
684 uint64_t *src = _src;
685 uint64_t *dst = _dst;
686 if (unlikely(l >= 1024)) {
308 bcopy(src, dst, l);
687 memcpy(dst, src, l);
309 return;
310 }
311 for (; likely(l > 0); l-=64) {
312 *dst++ = *src++;
313 *dst++ = *src++;
314 *dst++ = *src++;
315 *dst++ = *src++;
316 *dst++ = *src++;
317 *dst++ = *src++;
318 *dst++ = *src++;
319 *dst++ = *src++;
320 }
321}
322
323
688 return;
689 }
690 for (; likely(l > 0); l-=64) {
691 *dst++ = *src++;
692 *dst++ = *src++;
693 *dst++ = *src++;
694 *dst++ = *src++;
695 *dst++ = *src++;
696 *dst++ = *src++;
697 *dst++ = *src++;
698 *dst++ = *src++;
699 }
700}
701
702
703#ifdef TEST_STUFF
704struct xxx {
705 char *name;
706 void (*fn)(uint32_t);
707};
708
709
710static void
711nm_test_defmtx(uint32_t n)
712{
713 uint32_t i;
714 struct mtx m;
715 mtx_init(&m, "test", NULL, MTX_DEF);
716 for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); }
717 mtx_destroy(&m);
718 return;
719}
720
721static void
722nm_test_spinmtx(uint32_t n)
723{
724 uint32_t i;
725 struct mtx m;
726 mtx_init(&m, "test", NULL, MTX_SPIN);
727 for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); }
728 mtx_destroy(&m);
729 return;
730}
731
732static void
733nm_test_rlock(uint32_t n)
734{
735 uint32_t i;
736 struct rwlock m;
737 rw_init(&m, "test");
738 for (i = 0; i < n; i++) { rw_rlock(&m); rw_runlock(&m); }
739 rw_destroy(&m);
740 return;
741}
742
743static void
744nm_test_wlock(uint32_t n)
745{
746 uint32_t i;
747 struct rwlock m;
748 rw_init(&m, "test");
749 for (i = 0; i < n; i++) { rw_wlock(&m); rw_wunlock(&m); }
750 rw_destroy(&m);
751 return;
752}
753
754static void
755nm_test_slock(uint32_t n)
756{
757 uint32_t i;
758 struct sx m;
759 sx_init(&m, "test");
760 for (i = 0; i < n; i++) { sx_slock(&m); sx_sunlock(&m); }
761 sx_destroy(&m);
762 return;
763}
764
765static void
766nm_test_xlock(uint32_t n)
767{
768 uint32_t i;
769 struct sx m;
770 sx_init(&m, "test");
771 for (i = 0; i < n; i++) { sx_xlock(&m); sx_xunlock(&m); }
772 sx_destroy(&m);
773 return;
774}
775
776
777struct xxx nm_tests[] = {
778 { "defmtx", nm_test_defmtx },
779 { "spinmtx", nm_test_spinmtx },
780 { "rlock", nm_test_rlock },
781 { "wlock", nm_test_wlock },
782 { "slock", nm_test_slock },
783 { "xlock", nm_test_xlock },
784};
785
786static int
787nm_test(struct nmreq *nmr)
788{
789 uint32_t scale, n, test;
790 static int old_test = -1;
791
792 test = nmr->nr_cmd;
793 scale = nmr->nr_offset;
794 n = sizeof(nm_tests) / sizeof(struct xxx) - 1;
795 if (test > n) {
796 D("test index too high, max %d", n);
797 return 0;
798 }
799
800 if (old_test != test) {
801 D("test %s scale %d", nm_tests[test].name, scale);
802 old_test = test;
803 }
804 nm_tests[test].fn(scale);
805 return 0;
806}
807#endif /* TEST_STUFF */
808
324/*
325 * locate a bridge among the existing ones.
809/*
810 * locate a bridge among the existing ones.
811 * MUST BE CALLED WITH NMG_LOCK()
812 *
326 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
327 * We assume that this is called with a name of at least NM_NAME chars.
328 */
329static struct nm_bridge *
330nm_find_bridge(const char *name, int create)
331{
332 int i, l, namelen;
333 struct nm_bridge *b = NULL;
334
813 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
814 * We assume that this is called with a name of at least NM_NAME chars.
815 */
816static struct nm_bridge *
817nm_find_bridge(const char *name, int create)
818{
819 int i, l, namelen;
820 struct nm_bridge *b = NULL;
821
822 NMG_LOCK_ASSERT();
823
335 namelen = strlen(NM_NAME); /* base length */
824 namelen = strlen(NM_NAME); /* base length */
336 l = strlen(name); /* actual length */
825 l = name ? strlen(name) : 0; /* actual length */
826 if (l < namelen) {
827 D("invalid bridge name %s", name ? name : NULL);
828 return NULL;
829 }
337 for (i = namelen + 1; i < l; i++) {
338 if (name[i] == ':') {
339 namelen = i;
340 break;
341 }
342 }
343 if (namelen >= IFNAMSIZ)
344 namelen = IFNAMSIZ;
345 ND("--- prefix is '%.*s' ---", namelen, name);
346
830 for (i = namelen + 1; i < l; i++) {
831 if (name[i] == ':') {
832 namelen = i;
833 break;
834 }
835 }
836 if (namelen >= IFNAMSIZ)
837 namelen = IFNAMSIZ;
838 ND("--- prefix is '%.*s' ---", namelen, name);
839
347 BDG_LOCK();
348 /* lookup the name, remember empty slot if there is one */
349 for (i = 0; i < NM_BRIDGES; i++) {
350 struct nm_bridge *x = nm_bridges + i;
351
840 /* lookup the name, remember empty slot if there is one */
841 for (i = 0; i < NM_BRIDGES; i++) {
842 struct nm_bridge *x = nm_bridges + i;
843
352 if (x->namelen == 0) {
844 if (x->bdg_active_ports == 0) {
353 if (create && b == NULL)
354 b = x; /* record empty slot */
845 if (create && b == NULL)
846 b = x; /* record empty slot */
355 } else if (x->namelen != namelen) {
847 } else if (x->bdg_namelen != namelen) {
356 continue;
848 continue;
357 } else if (strncmp(name, x->basename, namelen) == 0) {
849 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
358 ND("found '%.*s' at %d", namelen, name, i);
359 b = x;
360 break;
361 }
362 }
363 if (i == NM_BRIDGES && b) { /* name not found, can create entry */
850 ND("found '%.*s' at %d", namelen, name, i);
851 b = x;
852 break;
853 }
854 }
855 if (i == NM_BRIDGES && b) { /* name not found, can create entry */
364 strncpy(b->basename, name, namelen);
365 b->namelen = namelen;
856 /* initialize the bridge */
857 strncpy(b->bdg_basename, name, namelen);
858 ND("create new bridge %s with ports %d", b->bdg_basename,
859 b->bdg_active_ports);
860 b->bdg_namelen = namelen;
861 b->bdg_active_ports = 0;
862 for (i = 0; i < NM_BDG_MAXPORTS; i++)
863 b->bdg_port_index[i] = i;
366 /* set the default function */
367 b->nm_bdg_lookup = netmap_bdg_learning;
368 /* reset the MAC address table */
369 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
370 }
864 /* set the default function */
865 b->nm_bdg_lookup = netmap_bdg_learning;
866 /* reset the MAC address table */
867 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
868 }
371 BDG_UNLOCK();
372 return b;
373}
374
375
376/*
377 * Free the forwarding tables for rings attached to switch ports.
378 */
379static void
380nm_free_bdgfwd(struct netmap_adapter *na)
381{
382 int nrings, i;
383 struct netmap_kring *kring;
384
869 return b;
870}
871
872
873/*
874 * Free the forwarding tables for rings attached to switch ports.
875 */
876static void
877nm_free_bdgfwd(struct netmap_adapter *na)
878{
879 int nrings, i;
880 struct netmap_kring *kring;
881
882 NMG_LOCK_ASSERT();
385 nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
386 kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
387 for (i = 0; i < nrings; i++) {
388 if (kring[i].nkr_ft) {
389 free(kring[i].nkr_ft, M_DEVBUF);
390 kring[i].nkr_ft = NULL; /* protect from freeing twice */
391 }
392 }

--- 6 unchanged lines hidden (view full) ---

399 * Allocate the forwarding tables for the rings attached to the bridge ports.
400 */
401static int
402nm_alloc_bdgfwd(struct netmap_adapter *na)
403{
404 int nrings, l, i, num_dstq;
405 struct netmap_kring *kring;
406
883 nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
884 kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
885 for (i = 0; i < nrings; i++) {
886 if (kring[i].nkr_ft) {
887 free(kring[i].nkr_ft, M_DEVBUF);
888 kring[i].nkr_ft = NULL; /* protect from freeing twice */
889 }
890 }

--- 6 unchanged lines hidden (view full) ---

897 * Allocate the forwarding tables for the rings attached to the bridge ports.
898 */
899static int
900nm_alloc_bdgfwd(struct netmap_adapter *na)
901{
902 int nrings, l, i, num_dstq;
903 struct netmap_kring *kring;
904
905 NMG_LOCK_ASSERT();
407 /* all port:rings + broadcast */
408 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
906 /* all port:rings + broadcast */
907 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
409 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH;
908 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
410 l += sizeof(struct nm_bdg_q) * num_dstq;
909 l += sizeof(struct nm_bdg_q) * num_dstq;
411 l += sizeof(uint16_t) * NM_BDG_BATCH;
910 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
412
413 nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
414 kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
415 for (i = 0; i < nrings; i++) {
416 struct nm_bdg_fwd *ft;
417 struct nm_bdg_q *dstq;
418 int j;
419
420 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
421 if (!ft) {
422 nm_free_bdgfwd(na);
423 return ENOMEM;
424 }
911
912 nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
913 kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
914 for (i = 0; i < nrings; i++) {
915 struct nm_bdg_fwd *ft;
916 struct nm_bdg_q *dstq;
917 int j;
918
919 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
920 if (!ft) {
921 nm_free_bdgfwd(na);
922 return ENOMEM;
923 }
425 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH);
426 for (j = 0; j < num_dstq; j++)
427 dstq[j].bq_head = dstq[j].bq_tail = NM_BDG_BATCH;
924 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
925 for (j = 0; j < num_dstq; j++) {
926 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
927 dstq[j].bq_len = 0;
928 }
428 kring[i].nkr_ft = ft;
429 }
430 if (nma_is_hw(na))
431 nm_alloc_bdgfwd(SWNA(na->ifp));
432 return 0;
433}
434
929 kring[i].nkr_ft = ft;
930 }
931 if (nma_is_hw(na))
932 nm_alloc_bdgfwd(SWNA(na->ifp));
933 return 0;
934}
935
435#endif /* NM_BRIDGE */
436
936
437
438/*
439 * Fetch configuration from the device, to cope with dynamic
440 * reconfigurations after loading the module.
441 */
442static int
443netmap_update_config(struct netmap_adapter *na)
444{
445 struct ifnet *ifp = na->ifp;

--- 28 unchanged lines hidden (view full) ---

474 na->num_rx_rings = rxr;
475 na->num_rx_desc = rxd;
476 return 0;
477 }
478 D("configuration changed while active, this is bad...");
479 return 1;
480}
481
937/*
938 * Fetch configuration from the device, to cope with dynamic
939 * reconfigurations after loading the module.
940 */
941static int
942netmap_update_config(struct netmap_adapter *na)
943{
944 struct ifnet *ifp = na->ifp;

--- 28 unchanged lines hidden (view full) ---

973 na->num_rx_rings = rxr;
974 na->num_rx_desc = rxd;
975 return 0;
976 }
977 D("configuration changed while active, this is bad...");
978 return 1;
979}
980
482/*------------- memory allocator -----------------*/
483#include "netmap_mem2.c"
484/*------------ end of memory allocator ----------*/
981static struct netmap_if *
982netmap_if_new(const char *ifname, struct netmap_adapter *na)
983{
984 if (netmap_update_config(na)) {
985 /* configuration mismatch, report and fail */
986 return NULL;
987 }
988 return netmap_mem_if_new(ifname, na);
989}
485
486
487/* Structure associated to each thread which registered an interface.
488 *
489 * The first 4 fields of this structure are written by NIOCREGIF and
490 * read by poll() and NIOC?XSYNC.
491 * There is low contention among writers (actually, a correct user program
492 * should have no contention among writers) and among writers and readers,

--- 9 unchanged lines hidden (view full) ---

502 * memory allocator. The refcount must be incremented at most once for
503 * each open("/dev/netmap"). The increment is performed by the first
504 * function that calls netmap_get_memory() (currently called by
505 * mmap(), NIOCGINFO and NIOCREGIF).
506 * If the refcount is incremented, it is then decremented when the
507 * private structure is destroyed.
508 */
509struct netmap_priv_d {
990
991
992/* Structure associated to each thread which registered an interface.
993 *
994 * The first 4 fields of this structure are written by NIOCREGIF and
995 * read by poll() and NIOC?XSYNC.
996 * There is low contention among writers (actually, a correct user program
997 * should have no contention among writers) and among writers and readers,

--- 9 unchanged lines hidden (view full) ---

1007 * memory allocator. The refcount must be incremented at most once for
1008 * each open("/dev/netmap"). The increment is performed by the first
1009 * function that calls netmap_get_memory() (currently called by
1010 * mmap(), NIOCGINFO and NIOCREGIF).
1011 * If the refcount is incremented, it is then decremented when the
1012 * private structure is destroyed.
1013 */
1014struct netmap_priv_d {
510 struct netmap_if * volatile np_nifp; /* netmap interface descriptor. */
1015 struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
511
1016
512 struct ifnet *np_ifp; /* device for which we hold a reference */
1017 struct ifnet *np_ifp; /* device for which we hold a ref. */
513 int np_ringid; /* from the ioctl */
514 u_int np_qfirst, np_qlast; /* range of rings to scan */
515 uint16_t np_txpoll;
516
1018 int np_ringid; /* from the ioctl */
1019 u_int np_qfirst, np_qlast; /* range of rings to scan */
1020 uint16_t np_txpoll;
1021
517 unsigned long ref_done; /* use with NMA_LOCK held */
1022 struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
1023#ifdef __FreeBSD__
1024 int np_refcount; /* use with NMG_LOCK held */
1025#endif /* __FreeBSD__ */
518};
519
1026};
1027
520
1028/* grab a reference to the memory allocator, if we don't have one already. The
1029 * reference is taken from the netmap_adapter registered with the priv.
1030 *
1031 */
521static int
1032static int
522netmap_get_memory(struct netmap_priv_d* p)
1033netmap_get_memory_locked(struct netmap_priv_d* p)
523{
1034{
1035 struct netmap_mem_d *nmd;
524 int error = 0;
1036 int error = 0;
525 NMA_LOCK();
526 if (!p->ref_done) {
527 error = netmap_memory_finalize();
1037
1038 if (p->np_ifp == NULL) {
1039 if (!netmap_mmap_unreg)
1040 return ENODEV;
1041 /* for compatibility with older versions of the API
1042 * we use the global allocator when no interface has been
1043 * registered
1044 */
1045 nmd = &nm_mem;
1046 } else {
1047 nmd = NA(p->np_ifp)->nm_mem;
1048 }
1049 if (p->np_mref == NULL) {
1050 error = netmap_mem_finalize(nmd);
528 if (!error)
1051 if (!error)
529 p->ref_done = 1;
1052 p->np_mref = nmd;
1053 } else if (p->np_mref != nmd) {
1054 /* a virtual port has been registered, but previous
1055 * syscalls already used the global allocator.
1056 * We cannot continue
1057 */
1058 error = ENODEV;
530 }
1059 }
531 NMA_UNLOCK();
532 return error;
533}
534
1060 return error;
1061}
1062
1063static int
1064netmap_get_memory(struct netmap_priv_d* p)
1065{
1066 int error;
1067 NMG_LOCK();
1068 error = netmap_get_memory_locked(p);
1069 NMG_UNLOCK();
1070 return error;
1071}
1072
1073static int
1074netmap_have_memory_locked(struct netmap_priv_d* p)
1075{
1076 return p->np_mref != NULL;
1077}
1078
1079static void
1080netmap_drop_memory_locked(struct netmap_priv_d* p)
1081{
1082 if (p->np_mref) {
1083 netmap_mem_deref(p->np_mref);
1084 p->np_mref = NULL;
1085 }
1086}
1087
535/*
536 * File descriptor's private data destructor.
537 *
538 * Call nm_register(ifp,0) to stop netmap mode on the interface and
539 * revert to normal operation. We expect that np_ifp has not gone.
1088/*
1089 * File descriptor's private data destructor.
1090 *
1091 * Call nm_register(ifp,0) to stop netmap mode on the interface and
1092 * revert to normal operation. We expect that np_ifp has not gone.
1093 * The second argument is the nifp to work on. In some cases it is
1094 * not attached yet to the netmap_priv_d so we need to pass it as
1095 * a separate argument.
540 */
1096 */
541/* call with NMA_LOCK held */
1097/* call with NMG_LOCK held */
542static void
1098static void
543netmap_dtor_locked(void *data)
1099netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
544{
1100{
545 struct netmap_priv_d *priv = data;
546 struct ifnet *ifp = priv->np_ifp;
547 struct netmap_adapter *na = NA(ifp);
1101 struct ifnet *ifp = priv->np_ifp;
1102 struct netmap_adapter *na = NA(ifp);
548 struct netmap_if *nifp = priv->np_nifp;
549
1103
1104 NMG_LOCK_ASSERT();
550 na->refcount--;
551 if (na->refcount <= 0) { /* last instance */
1105 na->refcount--;
1106 if (na->refcount <= 0) { /* last instance */
552 u_int i, j, lim;
1107 u_int i;
553
554 if (netmap_verbose)
555 D("deleting last instance for %s", ifp->if_xname);
556 /*
557 * (TO CHECK) This function is only called
558 * when the last reference to this file descriptor goes
559 * away. This means we cannot have any pending poll()
560 * or interrupt routine operating on the structure.
1108
1109 if (netmap_verbose)
1110 D("deleting last instance for %s", ifp->if_xname);
1111 /*
1112 * (TO CHECK) This function is only called
1113 * when the last reference to this file descriptor goes
1114 * away. This means we cannot have any pending poll()
1115 * or interrupt routine operating on the structure.
1116 * XXX The file may be closed in a thread while
1117 * another thread is using it.
1118 * Linux keeps the file opened until the last reference
1119 * by any outstanding ioctl/poll or mmap is gone.
1120 * FreeBSD does not track mmap()s (but we do) and
1121 * wakes up any sleeping poll(). Need to check what
1122 * happens if the close() occurs while a concurrent
1123 * syscall is running.
561 */
562 na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
563 /* Wake up any sleeping threads. netmap_poll will
564 * then return POLLERR
1124 */
1125 na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
1126 /* Wake up any sleeping threads. netmap_poll will
1127 * then return POLLERR
1128 * XXX The wake up now must happen during *_down(), when
1129 * we order all activities to stop. -gl
565 */
1130 */
566 for (i = 0; i < na->num_tx_rings + 1; i++)
567 selwakeuppri(&na->tx_rings[i].si, PI_NET);
568 for (i = 0; i < na->num_rx_rings + 1; i++)
569 selwakeuppri(&na->rx_rings[i].si, PI_NET);
570 selwakeuppri(&na->tx_si, PI_NET);
571 selwakeuppri(&na->rx_si, PI_NET);
572#ifdef NM_BRIDGE
573 nm_free_bdgfwd(na);
1131 nm_free_bdgfwd(na);
574#endif /* NM_BRIDGE */
575 /* release all buffers */
576 for (i = 0; i < na->num_tx_rings + 1; i++) {
1132 for (i = 0; i < na->num_tx_rings + 1; i++) {
577 struct netmap_ring *ring = na->tx_rings[i].ring;
578 lim = na->tx_rings[i].nkr_num_slots;
579 for (j = 0; j < lim; j++)
580 netmap_free_buf(nifp, ring->slot[j].buf_idx);
581 /* knlist_destroy(&na->tx_rings[i].si.si_note); */
582 mtx_destroy(&na->tx_rings[i].q_lock);
583 }
584 for (i = 0; i < na->num_rx_rings + 1; i++) {
1133 mtx_destroy(&na->tx_rings[i].q_lock);
1134 }
1135 for (i = 0; i < na->num_rx_rings + 1; i++) {
585 struct netmap_ring *ring = na->rx_rings[i].ring;
586 lim = na->rx_rings[i].nkr_num_slots;
587 for (j = 0; j < lim; j++)
588 netmap_free_buf(nifp, ring->slot[j].buf_idx);
589 /* knlist_destroy(&na->rx_rings[i].si.si_note); */
590 mtx_destroy(&na->rx_rings[i].q_lock);
591 }
592 /* XXX kqueue(9) needed; these will mirror knlist_init. */
593 /* knlist_destroy(&na->tx_si.si_note); */
594 /* knlist_destroy(&na->rx_si.si_note); */
1136 mtx_destroy(&na->rx_rings[i].q_lock);
1137 }
1138 /* XXX kqueue(9) needed; these will mirror knlist_init. */
1139 /* knlist_destroy(&na->tx_si.si_note); */
1140 /* knlist_destroy(&na->rx_si.si_note); */
595 netmap_free_rings(na);
596 if (nma_is_hw(na))
597 SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL;
598 }
1141 if (nma_is_hw(na))
1142 SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL;
1143 }
599 netmap_if_free(nifp);
1144 /*
1145 * netmap_mem_if_delete() deletes the nifp, and if this is
1146 * the last instance also buffers, rings and krings.
1147 */
1148 netmap_mem_if_delete(na, nifp);
600}
601
602
1149}
1150
1151
603/* we assume netmap adapter exists */
1152/* we assume netmap adapter exists
1153 * Called with NMG_LOCK held
1154 */
604static void
605nm_if_rele(struct ifnet *ifp)
606{
1155static void
1156nm_if_rele(struct ifnet *ifp)
1157{
607#ifndef NM_BRIDGE
608 if_rele(ifp);
609#else /* NM_BRIDGE */
610 int i, full = 0, is_hw;
1158 int i, is_hw, hw, sw, lim;
611 struct nm_bridge *b;
612 struct netmap_adapter *na;
1159 struct nm_bridge *b;
1160 struct netmap_adapter *na;
1161 uint8_t tmp[NM_BDG_MAXPORTS];
613
1162
1163 NMG_LOCK_ASSERT();
614 /* I can be called not only for get_ifp()-ed references where netmap's
615 * capability is guaranteed, but also for non-netmap-capable NICs.
616 */
617 if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) {
618 if_rele(ifp);
619 return;
620 }
1164 /* I can be called not only for get_ifp()-ed references where netmap's
1165 * capability is guaranteed, but also for non-netmap-capable NICs.
1166 */
1167 if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) {
1168 if_rele(ifp);
1169 return;
1170 }
621 if (!DROP_BDG_REF(ifp))
622 return;
623
624 na = NA(ifp);
625 b = na->na_bdg;
626 is_hw = nma_is_hw(na);
627
1171 na = NA(ifp);
1172 b = na->na_bdg;
1173 is_hw = nma_is_hw(na);
1174
628 BDG_WLOCK(b);
629 ND("want to disconnect %s from the bridge", ifp->if_xname);
630 full = 0;
631 /* remove the entry from the bridge, also check
632 * if there are any leftover interfaces
633 * XXX we should optimize this code, e.g. going directly
634 * to na->bdg_port, and having a counter of ports that
635 * are connected. But it is not in a critical path.
636 * In NIC's case, index of sw na is always higher than hw na
1175 ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount);
1176
1177 if (!DROP_BDG_REF(ifp))
1178 return;
1179
1180 /*
1181 New algorithm:
1182 make a copy of bdg_port_index;
1183 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
1184 in the array of bdg_port_index, replacing them with
1185 entries from the bottom of the array;
1186 decrement bdg_active_ports;
1187 acquire BDG_WLOCK() and copy back the array.
637 */
1188 */
638 for (i = 0; i < NM_BDG_MAXPORTS; i++) {
639 struct netmap_adapter *tmp = BDG_GET_VAR(b->bdg_ports[i]);
1189
1190 hw = NA(ifp)->bdg_port;
1191 sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
1192 lim = b->bdg_active_ports;
640
1193
641 if (tmp == na) {
642 /* disconnect from bridge */
643 BDG_SET_VAR(b->bdg_ports[i], NULL);
644 na->na_bdg = NULL;
645 if (is_hw && SWNA(ifp)->na_bdg) {
646 /* disconnect sw adapter too */
647 int j = SWNA(ifp)->bdg_port;
648 BDG_SET_VAR(b->bdg_ports[j], NULL);
649 SWNA(ifp)->na_bdg = NULL;
650 }
651 } else if (tmp != NULL) {
652 full = 1;
1194 ND("detach %d and %d (lim %d)", hw, sw, lim);
1195 /* make a copy of the list of active ports, update it,
1196 * and then copy back within BDG_WLOCK().
1197 */
1198 memcpy(tmp, b->bdg_port_index, sizeof(tmp));
1199 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
1200 if (hw >= 0 && tmp[i] == hw) {
1201 ND("detach hw %d at %d", hw, i);
1202 lim--; /* point to last active port */
1203 tmp[i] = tmp[lim]; /* swap with i */
1204 tmp[lim] = hw; /* now this is inactive */
1205 hw = -1;
1206 } else if (sw >= 0 && tmp[i] == sw) {
1207 ND("detach sw %d at %d", sw, i);
1208 lim--;
1209 tmp[i] = tmp[lim];
1210 tmp[lim] = sw;
1211 sw = -1;
1212 } else {
1213 i++;
653 }
654 }
1214 }
1215 }
1216 if (hw >= 0 || sw >= 0) {
1217 D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
1218 }
1219 hw = NA(ifp)->bdg_port;
1220 sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
1221
1222 BDG_WLOCK(b);
1223 b->bdg_ports[hw] = NULL;
1224 na->na_bdg = NULL;
1225 if (sw >= 0) {
1226 b->bdg_ports[sw] = NULL;
1227 SWNA(ifp)->na_bdg = NULL;
1228 }
1229 memcpy(b->bdg_port_index, tmp, sizeof(tmp));
1230 b->bdg_active_ports = lim;
655 BDG_WUNLOCK(b);
1231 BDG_WUNLOCK(b);
656 if (full == 0) {
657 ND("marking bridge %d as free", b - nm_bridges);
658 b->namelen = 0;
1232
1233 ND("now %d active ports", lim);
1234 if (lim == 0) {
1235 ND("marking bridge %s as free", b->bdg_basename);
659 b->nm_bdg_lookup = NULL;
660 }
1236 b->nm_bdg_lookup = NULL;
1237 }
661 if (na->na_bdg) { /* still attached to the bridge */
662 D("ouch, cannot find ifp to remove");
663 } else if (is_hw) {
1238
1239 if (is_hw) {
664 if_rele(ifp);
665 } else {
1240 if_rele(ifp);
1241 } else {
1242 if (na->na_flags & NAF_MEM_OWNER)
1243 netmap_mem_private_delete(na->nm_mem);
666 bzero(na, sizeof(*na));
667 free(na, M_DEVBUF);
668 bzero(ifp, sizeof(*ifp));
669 free(ifp, M_DEVBUF);
670 }
1244 bzero(na, sizeof(*na));
1245 free(na, M_DEVBUF);
1246 bzero(ifp, sizeof(*ifp));
1247 free(ifp, M_DEVBUF);
1248 }
671#endif /* NM_BRIDGE */
672}
673
1249}
1250
674static void
675netmap_dtor(void *data)
1251
1252/*
1253 * returns 1 if this is the last instance and we can free priv
1254 */
1255static int
1256netmap_dtor_locked(struct netmap_priv_d *priv)
676{
1257{
677 struct netmap_priv_d *priv = data;
678 struct ifnet *ifp = priv->np_ifp;
679
1258 struct ifnet *ifp = priv->np_ifp;
1259
680 NMA_LOCK();
1260#ifdef __FreeBSD__
1261 /*
1262 * np_refcount is the number of active mmaps on
1263 * this file descriptor
1264 */
1265 if (--priv->np_refcount > 0) {
1266 return 0;
1267 }
1268#endif /* __FreeBSD__ */
681 if (ifp) {
1269 if (ifp) {
682 struct netmap_adapter *na = NA(ifp);
1270 netmap_do_unregif(priv, priv->np_nifp);
1271 }
1272 netmap_drop_memory_locked(priv);
1273 if (ifp) {
1274 nm_if_rele(ifp); /* might also destroy *na */
1275 }
1276 return 1;
1277}
683
1278
684 if (na->na_bdg)
685 BDG_WLOCK(na->na_bdg);
686 na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
687 netmap_dtor_locked(data);
688 na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
689 if (na->na_bdg)
690 BDG_WUNLOCK(na->na_bdg);
1279static void
1280netmap_dtor(void *data)
1281{
1282 struct netmap_priv_d *priv = data;
1283 int last_instance;
691
1284
692 nm_if_rele(ifp); /* might also destroy *na */
1285 NMG_LOCK();
1286 last_instance = netmap_dtor_locked(priv);
1287 NMG_UNLOCK();
1288 if (last_instance) {
1289 bzero(priv, sizeof(*priv)); /* for safety */
1290 free(priv, M_DEVBUF);
693 }
1291 }
694 if (priv->ref_done) {
695 netmap_memory_deref();
696 }
697 NMA_UNLOCK();
698 bzero(priv, sizeof(*priv)); /* XXX for safety */
699 free(priv, M_DEVBUF);
700}
701
702
703#ifdef __FreeBSD__
1292}
1293
1294
1295#ifdef __FreeBSD__
704#include <vm/vm.h>
705#include <vm/vm_param.h>
706#include <vm/vm_object.h>
707#include <vm/vm_page.h>
708#include <vm/vm_pager.h>
709#include <vm/uma.h>
710
711/*
712 * In order to track whether pages are still mapped, we hook into
713 * the standard cdev_pager and intercept the constructor and
714 * destructor.
1296
1297/*
1298 * In order to track whether pages are still mapped, we hook into
1299 * the standard cdev_pager and intercept the constructor and
1300 * destructor.
715 * XXX but then ? Do we really use the information ?
716 * Need to investigate.
717 */
1301 */
718static struct cdev_pager_ops saved_cdev_pager_ops;
719
1302
1303struct netmap_vm_handle_t {
1304 struct cdev *dev;
1305 struct netmap_priv_d *priv;
1306};
720
721static int
722netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
723 vm_ooffset_t foff, struct ucred *cred, u_short *color)
724{
1307
1308static int
1309netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
1310 vm_ooffset_t foff, struct ucred *cred, u_short *color)
1311{
725 if (netmap_verbose)
726 D("first mmap for %p", handle);
727 return saved_cdev_pager_ops.cdev_pg_ctor(handle,
728 size, prot, foff, cred, color);
1312 struct netmap_vm_handle_t *vmh = handle;
1313 D("handle %p size %jd prot %d foff %jd",
1314 handle, (intmax_t)size, prot, (intmax_t)foff);
1315 dev_ref(vmh->dev);
1316 return 0;
729}
730
731
732static void
733netmap_dev_pager_dtor(void *handle)
734{
1317}
1318
1319
1320static void
1321netmap_dev_pager_dtor(void *handle)
1322{
735 saved_cdev_pager_ops.cdev_pg_dtor(handle);
736 ND("ready to release memory for %p", handle);
1323 struct netmap_vm_handle_t *vmh = handle;
1324 struct cdev *dev = vmh->dev;
1325 struct netmap_priv_d *priv = vmh->priv;
1326 D("handle %p", handle);
1327 netmap_dtor(priv);
1328 free(vmh, M_DEVBUF);
1329 dev_rel(dev);
737}
738
1330}
1331
1332static int
1333netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
1334 int prot, vm_page_t *mres)
1335{
1336 struct netmap_vm_handle_t *vmh = object->handle;
1337 struct netmap_priv_d *priv = vmh->priv;
1338 vm_paddr_t paddr;
1339 vm_page_t page;
1340 vm_memattr_t memattr;
1341 vm_pindex_t pidx;
739
1342
1343 ND("object %p offset %jd prot %d mres %p",
1344 object, (intmax_t)offset, prot, mres);
1345 memattr = object->memattr;
1346 pidx = OFF_TO_IDX(offset);
1347 paddr = netmap_mem_ofstophys(priv->np_mref, offset);
1348 if (paddr == 0)
1349 return VM_PAGER_FAIL;
1350
1351 if (((*mres)->flags & PG_FICTITIOUS) != 0) {
1352 /*
1353 * If the passed in result page is a fake page, update it with
1354 * the new physical address.
1355 */
1356 page = *mres;
1357 vm_page_updatefake(page, paddr, memattr);
1358 } else {
1359 /*
1360 * Replace the passed in reqpage page with our own fake page and
1361 * free up the all of the original pages.
1362 */
1363#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
1364#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
1365#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
1366#endif /* VM_OBJECT_WUNLOCK */
1367
1368 VM_OBJECT_WUNLOCK(object);
1369 page = vm_page_getfake(paddr, memattr);
1370 VM_OBJECT_WLOCK(object);
1371 vm_page_lock(*mres);
1372 vm_page_free(*mres);
1373 vm_page_unlock(*mres);
1374 *mres = page;
1375 vm_page_insert(page, object, pidx);
1376 }
1377 page->valid = VM_PAGE_BITS_ALL;
1378 return (VM_PAGER_OK);
1379}
1380
1381
740static struct cdev_pager_ops netmap_cdev_pager_ops = {
741 .cdev_pg_ctor = netmap_dev_pager_ctor,
742 .cdev_pg_dtor = netmap_dev_pager_dtor,
1382static struct cdev_pager_ops netmap_cdev_pager_ops = {
1383 .cdev_pg_ctor = netmap_dev_pager_ctor,
1384 .cdev_pg_dtor = netmap_dev_pager_dtor,
743 .cdev_pg_fault = NULL,
1385 .cdev_pg_fault = netmap_dev_pager_fault,
744};
745
746
1386};
1387
1388
747// XXX check whether we need netmap_mmap_single _and_ netmap_mmap
748static int
749netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
750 vm_size_t objsize, vm_object_t *objp, int prot)
751{
1389static int
1390netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
1391 vm_size_t objsize, vm_object_t *objp, int prot)
1392{
1393 int error;
1394 struct netmap_vm_handle_t *vmh;
1395 struct netmap_priv_d *priv;
752 vm_object_t obj;
753
1396 vm_object_t obj;
1397
754 ND("cdev %p foff %jd size %jd objp %p prot %d", cdev,
1398 D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
755 (intmax_t )*foff, (intmax_t )objsize, objp, prot);
1399 (intmax_t )*foff, (intmax_t )objsize, objp, prot);
756 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
757 curthread->td_ucred);
758 ND("returns obj %p", obj);
759 if (obj == NULL)
760 return EINVAL;
761 if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
762 ND("initialize cdev_pager_ops");
763 saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
764 netmap_cdev_pager_ops.cdev_pg_fault =
765 saved_cdev_pager_ops.cdev_pg_fault;
766 };
767 obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
768 *objp = obj;
769 return 0;
770}
771#endif /* __FreeBSD__ */
1400
1401 vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
1402 M_NOWAIT | M_ZERO);
1403 if (vmh == NULL)
1404 return ENOMEM;
1405 vmh->dev = cdev;
772
1406
1407 NMG_LOCK();
1408 error = devfs_get_cdevpriv((void**)&priv);
1409 if (error)
1410 goto err_unlock;
1411 vmh->priv = priv;
1412 priv->np_refcount++;
1413 NMG_UNLOCK();
773
1414
774/*
775 * mmap(2) support for the "netmap" device.
776 *
777 * Expose all the memory previously allocated by our custom memory
778 * allocator: this way the user has only to issue a single mmap(2), and
779 * can work on all the data structures flawlessly.
780 *
781 * Return 0 on success, -1 otherwise.
782 */
783
784#ifdef __FreeBSD__
785static int
786netmap_mmap(__unused struct cdev *dev,
787#if __FreeBSD_version < 900000
788 vm_offset_t offset, vm_paddr_t *paddr, int nprot
789#else
790 vm_ooffset_t offset, vm_paddr_t *paddr, int nprot,
791 __unused vm_memattr_t *memattr
792#endif
793 )
794{
795 int error = 0;
796 struct netmap_priv_d *priv;
797
798 if (nprot & PROT_EXEC)
799 return (-1); // XXX -1 or EINVAL ?
800
801 error = devfs_get_cdevpriv((void **)&priv);
802 if (error == EBADF) { /* called on fault, memory is initialized */
803 ND(5, "handling fault at ofs 0x%x", offset);
804 error = 0;
805 } else if (error == 0) /* make sure memory is set */
806 error = netmap_get_memory(priv);
1415 error = netmap_get_memory(priv);
807 if (error)
1416 if (error)
808 return (error);
1417 goto err_deref;
809
1418
810 ND("request for offset 0x%x", (uint32_t)offset);
811 *paddr = netmap_ofstophys(offset);
1419 obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
1420 &netmap_cdev_pager_ops, objsize, prot,
1421 *foff, NULL);
1422 if (obj == NULL) {
1423 D("cdev_pager_allocate failed");
1424 error = EINVAL;
1425 goto err_deref;
1426 }
1427
1428 *objp = obj;
1429 return 0;
812
1430
813 return (*paddr ? 0 : ENOMEM);
1431err_deref:
1432 NMG_LOCK();
1433 priv->np_refcount--;
1434err_unlock:
1435 NMG_UNLOCK();
1436// err:
1437 free(vmh, M_DEVBUF);
1438 return error;
814}
815
816
1439}
1440
1441
1442// XXX can we remove this ?
817static int
818netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
819{
820 if (netmap_verbose)
821 D("dev %p fflag 0x%x devtype %d td %p",
822 dev, fflag, devtype, td);
823 return 0;
824}
825
826
827static int
828netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
829{
830 struct netmap_priv_d *priv;
831 int error;
832
1443static int
1444netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1445{
1446 if (netmap_verbose)
1447 D("dev %p fflag 0x%x devtype %d td %p",
1448 dev, fflag, devtype, td);
1449 return 0;
1450}
1451
1452
1453static int
1454netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
1455{
1456 struct netmap_priv_d *priv;
1457 int error;
1458
1459 (void)dev;
1460 (void)oflags;
1461 (void)devtype;
1462 (void)td;
1463
1464 // XXX wait or nowait ?
833 priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
834 M_NOWAIT | M_ZERO);
835 if (priv == NULL)
836 return ENOMEM;
837
838 error = devfs_set_cdevpriv(priv, netmap_dtor);
839 if (error)
840 return error;
841
1465 priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
1466 M_NOWAIT | M_ZERO);
1467 if (priv == NULL)
1468 return ENOMEM;
1469
1470 error = devfs_set_cdevpriv(priv, netmap_dtor);
1471 if (error)
1472 return error;
1473
1474 priv->np_refcount = 1;
1475
842 return 0;
843}
844#endif /* __FreeBSD__ */
845
846
847/*
848 * Handlers for synchronization of the queues from/to the host.
849 * Netmap has two operating modes:

--- 45 unchanged lines hidden (view full) ---

895static void
896netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
897{
898 /* Take packets from hwcur to cur-reserved and pass them up.
899 * In case of no buffers we give up. At the end of the loop,
900 * the queue is drained in all cases.
901 * XXX handle reserved
902 */
1476 return 0;
1477}
1478#endif /* __FreeBSD__ */
1479
1480
1481/*
1482 * Handlers for synchronization of the queues from/to the host.
1483 * Netmap has two operating modes:

--- 45 unchanged lines hidden (view full) ---

1529static void
1530netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1531{
1532 /* Take packets from hwcur to cur-reserved and pass them up.
1533 * In case of no buffers we give up. At the end of the loop,
1534 * the queue is drained in all cases.
1535 * XXX handle reserved
1536 */
903 int k = kring->ring->cur - kring->ring->reserved;
904 u_int n, lim = kring->nkr_num_slots - 1;
1537 u_int lim = kring->nkr_num_slots - 1;
905 struct mbuf *m, *tail = q->tail;
1538 struct mbuf *m, *tail = q->tail;
1539 u_int k = kring->ring->cur, n = kring->ring->reserved;
1540 struct netmap_mem_d *nmd = kring->na->nm_mem;
906
1541
907 if (k < 0)
908 k = k + kring->nkr_num_slots;
1542 /* compute the final position, ring->cur - ring->reserved */
1543 if (n > 0) {
1544 if (k < n)
1545 k += kring->nkr_num_slots;
1546 k += n;
1547 }
909 for (n = kring->nr_hwcur; n != k;) {
910 struct netmap_slot *slot = &kring->ring->slot[n];
911
1548 for (n = kring->nr_hwcur; n != k;) {
1549 struct netmap_slot *slot = &kring->ring->slot[n];
1550
912 n = (n == lim) ? 0 : n + 1;
1551 n = nm_next(n, lim);
913 if ((slot->flags & NS_FORWARD) == 0 && !force)
914 continue;
1552 if ((slot->flags & NS_FORWARD) == 0 && !force)
1553 continue;
915 if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
1554 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) {
916 D("bad pkt at %d len %d", n, slot->len);
917 continue;
918 }
919 slot->flags &= ~NS_FORWARD; // XXX needed ?
1555 D("bad pkt at %d len %d", n, slot->len);
1556 continue;
1557 }
1558 slot->flags &= ~NS_FORWARD; // XXX needed ?
920 m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
1559 /* XXX adapt to the case of a multisegment packet */
1560 m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL);
921
922 if (m == NULL)
923 break;
924 if (tail)
925 tail->m_nextpkt = m;
926 else
927 q->head = m;
928 tail = m;
929 q->count++;
930 m->m_nextpkt = NULL;
931 }
932 q->tail = tail;
933}
934
935
936/*
1561
1562 if (m == NULL)
1563 break;
1564 if (tail)
1565 tail->m_nextpkt = m;
1566 else
1567 q->head = m;
1568 tail = m;
1569 q->count++;
1570 m->m_nextpkt = NULL;
1571 }
1572 q->tail = tail;
1573}
1574
1575
1576/*
937 * called under main lock to send packets from the host to the NIC
938 * The host ring has packets from nr_hwcur to (cur - reserved)
1577 * The host ring has packets from nr_hwcur to (cur - reserved)
939 * to be sent down. We scan the tx rings, which have just been
1578 * to be sent down to the NIC.
1579 * We need to use the queue lock on the source (host RX ring)
1580 * to protect against netmap_transmit.
1581 * If the user is well behaved we do not need to acquire locks
1582 * on the destination(s),
1583 * so we only need to make sure that there are no panics because
1584 * of user errors.
1585 * XXX verify
1586 *
1587 * We scan the tx rings, which have just been
940 * flushed so nr_hwcur == cur. Pushing packets down means
941 * increment cur and decrement avail.
942 * XXX to be verified
943 */
944static void
945netmap_sw_to_nic(struct netmap_adapter *na)
946{
947 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
948 struct netmap_kring *k1 = &na->tx_rings[0];
1588 * flushed so nr_hwcur == cur. Pushing packets down means
1589 * increment cur and decrement avail.
1590 * XXX to be verified
1591 */
1592static void
1593netmap_sw_to_nic(struct netmap_adapter *na)
1594{
1595 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1596 struct netmap_kring *k1 = &na->tx_rings[0];
949 int i, howmany, src_lim, dst_lim;
1597 u_int i, howmany, src_lim, dst_lim;
950
1598
1599 /* XXX we should also check that the carrier is on */
1600 if (kring->nkr_stopped)
1601 return;
1602
1603 mtx_lock(&kring->q_lock);
1604
1605 if (kring->nkr_stopped)
1606 goto out;
1607
951 howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */
952
1608 howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */
1609
953 src_lim = kring->nkr_num_slots;
1610 src_lim = kring->nkr_num_slots - 1;
954 for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
955 ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
1611 for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
1612 ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
956 dst_lim = k1->nkr_num_slots;
1613 dst_lim = k1->nkr_num_slots - 1;
957 while (howmany > 0 && k1->ring->avail > 0) {
958 struct netmap_slot *src, *dst, tmp;
959 src = &kring->ring->slot[kring->nr_hwcur];
960 dst = &k1->ring->slot[k1->ring->cur];
961 tmp = *src;
962 src->buf_idx = dst->buf_idx;
963 src->flags = NS_BUF_CHANGED;
964
965 dst->buf_idx = tmp.buf_idx;
966 dst->len = tmp.len;
967 dst->flags = NS_BUF_CHANGED;
968 ND("out len %d buf %d from %d to %d",
969 dst->len, dst->buf_idx,
970 kring->nr_hwcur, k1->ring->cur);
971
1614 while (howmany > 0 && k1->ring->avail > 0) {
1615 struct netmap_slot *src, *dst, tmp;
1616 src = &kring->ring->slot[kring->nr_hwcur];
1617 dst = &k1->ring->slot[k1->ring->cur];
1618 tmp = *src;
1619 src->buf_idx = dst->buf_idx;
1620 src->flags = NS_BUF_CHANGED;
1621
1622 dst->buf_idx = tmp.buf_idx;
1623 dst->len = tmp.len;
1624 dst->flags = NS_BUF_CHANGED;
1625 ND("out len %d buf %d from %d to %d",
1626 dst->len, dst->buf_idx,
1627 kring->nr_hwcur, k1->ring->cur);
1628
972 if (++kring->nr_hwcur >= src_lim)
973 kring->nr_hwcur = 0;
1629 kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim);
974 howmany--;
975 kring->nr_hwavail--;
1630 howmany--;
1631 kring->nr_hwavail--;
976 if (++k1->ring->cur >= dst_lim)
977 k1->ring->cur = 0;
1632 k1->ring->cur = nm_next(k1->ring->cur, dst_lim);
978 k1->ring->avail--;
979 }
980 kring->ring->cur = kring->nr_hwcur; // XXX
1633 k1->ring->avail--;
1634 }
1635 kring->ring->cur = kring->nr_hwcur; // XXX
981 k1++;
1636 k1++; // XXX why?
982 }
1637 }
1638out:
1639 mtx_unlock(&kring->q_lock);
983}
984
985
986/*
1640}
1641
1642
1643/*
987 * netmap_sync_to_host() passes packets up. We are called from a
1644 * netmap_txsync_to_host() passes packets up. We are called from a
988 * system call in user process context, and the only contention
989 * can be among multiple user threads erroneously calling
990 * this routine concurrently.
991 */
992static void
1645 * system call in user process context, and the only contention
1646 * can be among multiple user threads erroneously calling
1647 * this routine concurrently.
1648 */
1649static void
993netmap_sync_to_host(struct netmap_adapter *na)
1650netmap_txsync_to_host(struct netmap_adapter *na)
994{
995 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
996 struct netmap_ring *ring = kring->ring;
997 u_int k, lim = kring->nkr_num_slots - 1;
1651{
1652 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
1653 struct netmap_ring *ring = kring->ring;
1654 u_int k, lim = kring->nkr_num_slots - 1;
998 struct mbq q = { NULL, NULL };
1655 struct mbq q = { NULL, NULL, 0 };
999
1656
1657 if (nm_kr_tryget(kring)) {
1658 D("ring %p busy (user error)", kring);
1659 return;
1660 }
1000 k = ring->cur;
1001 if (k > lim) {
1661 k = ring->cur;
1662 if (k > lim) {
1663 D("invalid ring index in stack TX kring %p", kring);
1002 netmap_ring_reinit(kring);
1664 netmap_ring_reinit(kring);
1665 nm_kr_put(kring);
1003 return;
1004 }
1666 return;
1667 }
1005 // na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
1006
1007 /* Take packets from hwcur to cur and pass them up.
1008 * In case of no buffers we give up. At the end of the loop,
1009 * the queue is drained in all cases.
1010 */
1011 netmap_grab_packets(kring, &q, 1);
1012 kring->nr_hwcur = k;
1013 kring->nr_hwavail = ring->avail = lim;
1668
1669 /* Take packets from hwcur to cur and pass them up.
1670 * In case of no buffers we give up. At the end of the loop,
1671 * the queue is drained in all cases.
1672 */
1673 netmap_grab_packets(kring, &q, 1);
1674 kring->nr_hwcur = k;
1675 kring->nr_hwavail = ring->avail = lim;
1014 // na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
1015
1676
1677 nm_kr_put(kring);
1016 netmap_send_up(na->ifp, q.head);
1017}
1018
1019
1678 netmap_send_up(na->ifp, q.head);
1679}
1680
1681
1682/*
1683 * This is the 'txsync' handler to send from a software ring to the
1684 * host stack.
1685 */
1020/* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */
1021static int
1686/* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */
1687static int
1022netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int do_lock)
1688netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags)
1023{
1024 (void)ring_nr;
1689{
1690 (void)ring_nr;
1025 (void)do_lock;
1026 netmap_sync_to_host(NA(ifp));
1691 (void)flags;
1692 if (netmap_verbose > 255)
1693 RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr);
1694 netmap_txsync_to_host(NA(ifp));
1027 return 0;
1028}
1029
1030
1031/*
1032 * rxsync backend for packets coming from the host stack.
1695 return 0;
1696}
1697
1698
1699/*
1700 * rxsync backend for packets coming from the host stack.
1033 * They have been put in the queue by netmap_start() so we
1701 * They have been put in the queue by netmap_transmit() so we
1034 * need to protect access to the kring using a lock.
1035 *
1036 * This routine also does the selrecord if called from the poll handler
1037 * (we know because td != NULL).
1038 *
1039 * NOTE: on linux, selrecord() is defined as a macro and uses pwait
1040 * as an additional hidden argument.
1041 */
1042static void
1702 * need to protect access to the kring using a lock.
1703 *
1704 * This routine also does the selrecord if called from the poll handler
1705 * (we know because td != NULL).
1706 *
1707 * NOTE: on linux, selrecord() is defined as a macro and uses pwait
1708 * as an additional hidden argument.
1709 */
1710static void
1043netmap_sync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
1711netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
1044{
1045 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1046 struct netmap_ring *ring = kring->ring;
1047 u_int j, n, lim = kring->nkr_num_slots;
1048 u_int k = ring->cur, resvd = ring->reserved;
1049
1050 (void)pwait; /* disable unused warnings */
1712{
1713 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1714 struct netmap_ring *ring = kring->ring;
1715 u_int j, n, lim = kring->nkr_num_slots;
1716 u_int k = ring->cur, resvd = ring->reserved;
1717
1718 (void)pwait; /* disable unused warnings */
1051 na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
1719
1720 if (kring->nkr_stopped) /* check a first time without lock */
1721 return;
1722
1723 /* XXX as an optimization we could reuse na->core_lock */
1724 mtx_lock(&kring->q_lock);
1725
1726 if (kring->nkr_stopped) /* check again with lock held */
1727 goto unlock_out;
1728
1052 if (k >= lim) {
1053 netmap_ring_reinit(kring);
1729 if (k >= lim) {
1730 netmap_ring_reinit(kring);
1054 return;
1731 goto unlock_out;
1055 }
1056 /* new packets are already set in nr_hwavail */
1057 /* skip past packets that userspace has released */
1058 j = kring->nr_hwcur;
1059 if (resvd > 0) {
1060 if (resvd + ring->avail >= lim + 1) {
1061 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1062 ring->reserved = resvd = 0; // XXX panic...

--- 5 unchanged lines hidden (view full) ---

1068 kring->nr_hwavail -= n;
1069 kring->nr_hwcur = k;
1070 }
1071 k = ring->avail = kring->nr_hwavail - resvd;
1072 if (k == 0 && td)
1073 selrecord(td, &kring->si);
1074 if (k && (netmap_verbose & NM_VERB_HOST))
1075 D("%d pkts from stack", k);
1732 }
1733 /* new packets are already set in nr_hwavail */
1734 /* skip past packets that userspace has released */
1735 j = kring->nr_hwcur;
1736 if (resvd > 0) {
1737 if (resvd + ring->avail >= lim + 1) {
1738 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1739 ring->reserved = resvd = 0; // XXX panic...

--- 5 unchanged lines hidden (view full) ---

1745 kring->nr_hwavail -= n;
1746 kring->nr_hwcur = k;
1747 }
1748 k = ring->avail = kring->nr_hwavail - resvd;
1749 if (k == 0 && td)
1750 selrecord(td, &kring->si);
1751 if (k && (netmap_verbose & NM_VERB_HOST))
1752 D("%d pkts from stack", k);
1076 na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
1753unlock_out:
1754
1755 mtx_unlock(&kring->q_lock);
1077}
1078
1079
1080/*
1756}
1757
1758
1759/*
1760 * MUST BE CALLED UNDER NMG_LOCK()
1761 *
1081 * get a refcounted reference to an interface.
1762 * get a refcounted reference to an interface.
1763 * This is always called in the execution of an ioctl().
1764 *
1082 * Return ENXIO if the interface does not exist, EINVAL if netmap
1083 * is not supported by the interface.
1084 * If successful, hold a reference.
1085 *
1765 * Return ENXIO if the interface does not exist, EINVAL if netmap
1766 * is not supported by the interface.
1767 * If successful, hold a reference.
1768 *
1086 * During the NIC is attached to a bridge, reference is managed
1769 * When the NIC is attached to a bridge, reference is managed
1087 * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as
1088 * virtual ports. Hence, on the final DROP_BDG_REF(), the NIC
1089 * is detached from the bridge, then ifp's refcount is dropped (this
1090 * is equivalent to that ifp is destroyed in case of virtual ports.
1091 *
1092 * This function uses if_rele() when we want to prevent the NIC from
1093 * being detached from the bridge in error handling. But once refcount
1094 * is acquired by this function, it must be released using nm_if_rele().
1095 */
1096static int
1770 * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as
1771 * virtual ports. Hence, on the final DROP_BDG_REF(), the NIC
1772 * is detached from the bridge, then ifp's refcount is dropped (this
1773 * is equivalent to that ifp is destroyed in case of virtual ports.
1774 *
1775 * This function uses if_rele() when we want to prevent the NIC from
1776 * being detached from the bridge in error handling. But once refcount
1777 * is acquired by this function, it must be released using nm_if_rele().
1778 */
1779static int
1097get_ifp(struct nmreq *nmr, struct ifnet **ifp)
1780get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create)
1098{
1099 const char *name = nmr->nr_name;
1100 int namelen = strlen(name);
1781{
1782 const char *name = nmr->nr_name;
1783 int namelen = strlen(name);
1101#ifdef NM_BRIDGE
1102 struct ifnet *iter = NULL;
1103 int no_prefix = 0;
1104
1784 struct ifnet *iter = NULL;
1785 int no_prefix = 0;
1786
1105 do {
1106 struct nm_bridge *b;
1107 struct netmap_adapter *na;
1108 int i, cand = -1, cand2 = -1;
1787 /* first try to see if this is a bridge port. */
1788 struct nm_bridge *b;
1789 struct netmap_adapter *na;
1790 int i, j, cand = -1, cand2 = -1;
1791 int needed;
1109
1792
1110 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
1111 no_prefix = 1;
1112 break;
1793 NMG_LOCK_ASSERT();
1794 *ifp = NULL; /* default */
1795 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
1796 no_prefix = 1; /* no VALE prefix */
1797 goto no_bridge_port;
1798 }
1799
1800 b = nm_find_bridge(name, create);
1801 if (b == NULL) {
1802 D("no bridges available for '%s'", name);
1803 return (ENXIO);
1804 }
1805
1806 /* Now we are sure that name starts with the bridge's name,
1807 * lookup the port in the bridge. We need to scan the entire
1808 * list. It is not important to hold a WLOCK on the bridge
1809 * during the search because NMG_LOCK already guarantees
1810 * that there are no other possible writers.
1811 */
1812
1813 /* lookup in the local list of ports */
1814 for (j = 0; j < b->bdg_active_ports; j++) {
1815 i = b->bdg_port_index[j];
1816 na = b->bdg_ports[i];
1817 // KASSERT(na != NULL);
1818 iter = na->ifp;
1819 /* XXX make sure the name only contains one : */
1820 if (!strcmp(iter->if_xname, name) /* virtual port */ ||
1821 (namelen > b->bdg_namelen && !strcmp(iter->if_xname,
1822 name + b->bdg_namelen + 1)) /* NIC */) {
1823 ADD_BDG_REF(iter);
1824 ND("found existing if %s refs %d", name,
1825 NA(iter)->na_bdg_refcount);
1826 *ifp = iter;
1827 /* we are done, this is surely netmap capable */
1828 return 0;
1113 }
1829 }
1114 b = nm_find_bridge(name, 1 /* create a new one if no exist */ );
1115 if (b == NULL) {
1116 D("no bridges available for '%s'", name);
1117 return (ENXIO);
1118 }
1119 /* Now we are sure that name starts with the bridge's name */
1120 BDG_WLOCK(b);
1121 /* lookup in the local list of ports */
1122 for (i = 0; i < NM_BDG_MAXPORTS; i++) {
1123 na = BDG_GET_VAR(b->bdg_ports[i]);
1124 if (na == NULL) {
1125 if (cand == -1)
1126 cand = i; /* potential insert point */
1127 else if (cand2 == -1)
1128 cand2 = i; /* for host stack */
1129 continue;
1130 }
1131 iter = na->ifp;
1132 /* XXX make sure the name only contains one : */
1133 if (!strcmp(iter->if_xname, name) /* virtual port */ ||
1134 (namelen > b->namelen && !strcmp(iter->if_xname,
1135 name + b->namelen + 1)) /* NIC */) {
1136 ADD_BDG_REF(iter);
1137 ND("found existing interface");
1138 BDG_WUNLOCK(b);
1139 break;
1140 }
1141 }
1142 if (i < NM_BDG_MAXPORTS) /* already unlocked */
1143 break;
1144 if (cand == -1) {
1145 D("bridge full, cannot create new port");
1146no_port:
1147 BDG_WUNLOCK(b);
1148 *ifp = NULL;
1830 }
1831 /* not found, should we create it? */
1832 if (!create)
1833 return ENXIO;
1834 /* yes we should, see if we have space to attach entries */
1835 needed = 2; /* in some cases we only need 1 */
1836 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
1837 D("bridge full %d, cannot create new port", b->bdg_active_ports);
1838 return EINVAL;
1839 }
1840 /* record the next two ports available, but do not allocate yet */
1841 cand = b->bdg_port_index[b->bdg_active_ports];
1842 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
1843 ND("+++ bridge %s port %s used %d avail %d %d",
1844 b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
1845
1846 /*
1847 * try see if there is a matching NIC with this name
1848 * (after the bridge's name)
1849 */
1850 iter = ifunit_ref(name + b->bdg_namelen + 1);
1851 if (!iter) { /* this is a virtual port */
1852 /* Create a temporary NA with arguments, then
1853 * bdg_netmap_attach() will allocate the real one
1854 * and attach it to the ifp
1855 */
1856 struct netmap_adapter tmp_na;
1857
1858 if (nmr->nr_cmd) {
1859 /* nr_cmd must be 0 for a virtual port */
1149 return EINVAL;
1150 }
1860 return EINVAL;
1861 }
1151 ND("create new bridge port %s", name);
1152 /*
1153 * create a struct ifnet for the new port.
1154 * The forwarding table is attached to the kring(s).
1862 bzero(&tmp_na, sizeof(tmp_na));
1863 /* bound checking */
1864 tmp_na.num_tx_rings = nmr->nr_tx_rings;
1865 nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1866 nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
1867 tmp_na.num_rx_rings = nmr->nr_rx_rings;
1868 nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1869 nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
1870 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1871 1, NM_BDG_MAXSLOTS, NULL);
1872 tmp_na.num_tx_desc = nmr->nr_tx_slots;
1873 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1874 1, NM_BDG_MAXSLOTS, NULL);
1875 tmp_na.num_rx_desc = nmr->nr_rx_slots;
1876
1877 /* create a struct ifnet for the new port.
1878 * need M_NOWAIT as we are under nma_lock
1155 */
1879 */
1156 /*
1157 * try see if there is a matching NIC with this name
1158 * (after the bridge's name)
1159 */
1160 iter = ifunit_ref(name + b->namelen + 1);
1161 if (!iter) { /* this is a virtual port */
1162 /* Create a temporary NA with arguments, then
1163 * bdg_netmap_attach() will allocate the real one
1164 * and attach it to the ifp
1165 */
1166 struct netmap_adapter tmp_na;
1880 iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO);
1881 if (!iter)
1882 return ENOMEM;
1167
1883
1168 if (nmr->nr_cmd) /* nr_cmd must be for a NIC */
1169 goto no_port;
1170 bzero(&tmp_na, sizeof(tmp_na));
1171 /* bound checking */
1172 if (nmr->nr_tx_rings < 1)
1173 nmr->nr_tx_rings = 1;
1174 if (nmr->nr_tx_rings > NM_BDG_MAXRINGS)
1175 nmr->nr_tx_rings = NM_BDG_MAXRINGS;
1176 tmp_na.num_tx_rings = nmr->nr_tx_rings;
1177 if (nmr->nr_rx_rings < 1)
1178 nmr->nr_rx_rings = 1;
1179 if (nmr->nr_rx_rings > NM_BDG_MAXRINGS)
1180 nmr->nr_rx_rings = NM_BDG_MAXRINGS;
1181 tmp_na.num_rx_rings = nmr->nr_rx_rings;
1884 strcpy(iter->if_xname, name);
1885 tmp_na.ifp = iter;
1886 /* bdg_netmap_attach creates a struct netmap_adapter */
1887 bdg_netmap_attach(&tmp_na);
1888 cand2 = -1; /* only need one port */
1889 } else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */
1890 /* make sure the NIC is not already in use */
1891 if (NETMAP_OWNED_BY_ANY(iter)) {
1892 D("NIC %s busy, cannot attach to bridge",
1893 iter->if_xname);
1894 if_rele(iter); /* don't detach from bridge */
1895 return EINVAL;
1896 }
1897 if (nmr->nr_arg1 != NETMAP_BDG_HOST)
1898 cand2 = -1; /* only need one port */
1899 } else { /* not a netmap-capable NIC */
1900 if_rele(iter); /* don't detach from bridge */
1901 return EINVAL;
1902 }
1903 na = NA(iter);
1182
1904
1183 iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO);
1184 if (!iter)
1185 goto no_port;
1186 strcpy(iter->if_xname, name);
1187 tmp_na.ifp = iter;
1188 /* bdg_netmap_attach creates a struct netmap_adapter */
1189 bdg_netmap_attach(&tmp_na);
1190 } else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */
1191 /* cannot attach the NIC that any user or another
1192 * bridge already holds.
1193 */
1194 if (NETMAP_OWNED_BY_ANY(iter) || cand2 == -1) {
1195ifunit_rele:
1196 if_rele(iter); /* don't detach from bridge */
1197 goto no_port;
1198 }
1199 /* bind the host stack to the bridge */
1200 if (nmr->nr_arg1 == NETMAP_BDG_HOST) {
1201 BDG_SET_VAR(b->bdg_ports[cand2], SWNA(iter));
1202 SWNA(iter)->bdg_port = cand2;
1203 SWNA(iter)->na_bdg = b;
1204 }
1205 } else /* not a netmap-capable NIC */
1206 goto ifunit_rele;
1207 na = NA(iter);
1208 na->bdg_port = cand;
1209 /* bind the port to the bridge (virtual ports are not active) */
1210 BDG_SET_VAR(b->bdg_ports[cand], na);
1211 na->na_bdg = b;
1212 ADD_BDG_REF(iter);
1213 BDG_WUNLOCK(b);
1214 ND("attaching virtual bridge %p", b);
1215 } while (0);
1905 BDG_WLOCK(b);
1906 na->bdg_port = cand;
1907 ND("NIC %p to bridge port %d", NA(iter), cand);
1908 /* bind the port to the bridge (virtual ports are not active) */
1909 b->bdg_ports[cand] = na;
1910 na->na_bdg = b;
1911 b->bdg_active_ports++;
1912 if (cand2 >= 0) {
1913 /* also bind the host stack to the bridge */
1914 b->bdg_ports[cand2] = SWNA(iter);
1915 SWNA(iter)->bdg_port = cand2;
1916 SWNA(iter)->na_bdg = b;
1917 b->bdg_active_ports++;
1918 ND("host %p to bridge port %d", SWNA(iter), cand2);
1919 }
1920 ADD_BDG_REF(iter); // XXX one or two ?
1921 ND("if %s refs %d", name, NA(iter)->na_bdg_refcount);
1922 BDG_WUNLOCK(b);
1216 *ifp = iter;
1923 *ifp = iter;
1924 return 0;
1925
1926no_bridge_port:
1927 *ifp = iter;
1217 if (! *ifp)
1928 if (! *ifp)
1218#endif /* NM_BRIDGE */
1219 *ifp = ifunit_ref(name);
1929 *ifp = ifunit_ref(name);
1220 if (*ifp == NULL)
1221 return (ENXIO);
1930 if (*ifp == NULL)
1931 return (ENXIO);
1222 /* can do this if the capability exists and if_pspare[0]
1223 * points to the netmap descriptor.
1224 */
1932
1225 if (NETMAP_CAPABLE(*ifp)) {
1933 if (NETMAP_CAPABLE(*ifp)) {
1226#ifdef NM_BRIDGE
1227 /* Users cannot use the NIC attached to a bridge directly */
1228 if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) {
1229 if_rele(*ifp); /* don't detach from bridge */
1230 return EINVAL;
1231 } else
1934 /* Users cannot use the NIC attached to a bridge directly */
1935 if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) {
1936 if_rele(*ifp); /* don't detach from bridge */
1937 return EINVAL;
1938 } else
1232#endif /* NM_BRIDGE */
1233 return 0; /* valid pointer, we hold the refcount */
1939 return 0; /* valid pointer, we hold the refcount */
1234 }
1235 nm_if_rele(*ifp);
1236 return EINVAL; // not NETMAP capable
1237}
1238
1239
1240/*
1241 * Error routine called when txsync/rxsync detects an error.

--- 9 unchanged lines hidden (view full) ---

1251 */
1252int
1253netmap_ring_reinit(struct netmap_kring *kring)
1254{
1255 struct netmap_ring *ring = kring->ring;
1256 u_int i, lim = kring->nkr_num_slots - 1;
1257 int errors = 0;
1258
1940 }
1941 nm_if_rele(*ifp);
1942 return EINVAL; // not NETMAP capable
1943}
1944
1945
1946/*
1947 * Error routine called when txsync/rxsync detects an error.

--- 9 unchanged lines hidden (view full) ---

1957 */
1958int
1959netmap_ring_reinit(struct netmap_kring *kring)
1960{
1961 struct netmap_ring *ring = kring->ring;
1962 u_int i, lim = kring->nkr_num_slots - 1;
1963 int errors = 0;
1964
1965 // XXX KASSERT nm_kr_tryget
1259 RD(10, "called for %s", kring->na->ifp->if_xname);
1260 if (ring->cur > lim)
1261 errors++;
1262 for (i = 0; i <= lim; i++) {
1263 u_int idx = ring->slot[i].buf_idx;
1264 u_int len = ring->slot[i].len;
1265 if (idx < 2 || idx >= netmap_total_buffers) {
1266 if (!errors++)
1267 D("bad buffer at slot %d idx %d len %d ", i, idx, len);
1268 ring->slot[i].buf_idx = 0;
1269 ring->slot[i].len = 0;
1966 RD(10, "called for %s", kring->na->ifp->if_xname);
1967 if (ring->cur > lim)
1968 errors++;
1969 for (i = 0; i <= lim; i++) {
1970 u_int idx = ring->slot[i].buf_idx;
1971 u_int len = ring->slot[i].len;
1972 if (idx < 2 || idx >= netmap_total_buffers) {
1973 if (!errors++)
1974 D("bad buffer at slot %d idx %d len %d ", i, idx, len);
1975 ring->slot[i].buf_idx = 0;
1976 ring->slot[i].len = 0;
1270 } else if (len > NETMAP_BUF_SIZE) {
1977 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
1271 ring->slot[i].len = 0;
1272 if (!errors++)
1273 D("bad len %d at slot %d idx %d",
1274 len, i, idx);
1275 }
1276 }
1277 if (errors) {
1278 int pos = kring - kring->na->tx_rings;

--- 19 unchanged lines hidden (view full) ---

1298 */
1299static int
1300netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
1301{
1302 struct ifnet *ifp = priv->np_ifp;
1303 struct netmap_adapter *na = NA(ifp);
1304 u_int i = ringid & NETMAP_RING_MASK;
1305 /* initially (np_qfirst == np_qlast) we don't want to lock */
1978 ring->slot[i].len = 0;
1979 if (!errors++)
1980 D("bad len %d at slot %d idx %d",
1981 len, i, idx);
1982 }
1983 }
1984 if (errors) {
1985 int pos = kring - kring->na->tx_rings;

--- 19 unchanged lines hidden (view full) ---

2005 */
2006static int
2007netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
2008{
2009 struct ifnet *ifp = priv->np_ifp;
2010 struct netmap_adapter *na = NA(ifp);
2011 u_int i = ringid & NETMAP_RING_MASK;
2012 /* initially (np_qfirst == np_qlast) we don't want to lock */
1306 int need_lock = (priv->np_qfirst != priv->np_qlast);
1307 int lim = na->num_rx_rings;
2013 u_int lim = na->num_rx_rings;
1308
1309 if (na->num_tx_rings > lim)
1310 lim = na->num_tx_rings;
1311 if ( (ringid & NETMAP_HW_RING) && i >= lim) {
1312 D("invalid ring id %d", i);
1313 return (EINVAL);
1314 }
2014
2015 if (na->num_tx_rings > lim)
2016 lim = na->num_tx_rings;
2017 if ( (ringid & NETMAP_HW_RING) && i >= lim) {
2018 D("invalid ring id %d", i);
2019 return (EINVAL);
2020 }
1315 if (need_lock)
1316 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
1317 priv->np_ringid = ringid;
1318 if (ringid & NETMAP_SW_RING) {
1319 priv->np_qfirst = NETMAP_SW_RING;
1320 priv->np_qlast = 0;
1321 } else if (ringid & NETMAP_HW_RING) {
1322 priv->np_qfirst = i;
1323 priv->np_qlast = i + 1;
1324 } else {
1325 priv->np_qfirst = 0;
1326 priv->np_qlast = NETMAP_HW_RING ;
1327 }
1328 priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
2021 priv->np_ringid = ringid;
2022 if (ringid & NETMAP_SW_RING) {
2023 priv->np_qfirst = NETMAP_SW_RING;
2024 priv->np_qlast = 0;
2025 } else if (ringid & NETMAP_HW_RING) {
2026 priv->np_qfirst = i;
2027 priv->np_qlast = i + 1;
2028 } else {
2029 priv->np_qfirst = 0;
2030 priv->np_qlast = NETMAP_HW_RING ;
2031 }
2032 priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
1329 if (need_lock)
1330 na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
1331 if (netmap_verbose) {
1332 if (ringid & NETMAP_SW_RING)
1333 D("ringid %s set to SW RING", ifp->if_xname);
1334 else if (ringid & NETMAP_HW_RING)
1335 D("ringid %s set to HW RING %d", ifp->if_xname,
1336 priv->np_qfirst);
1337 else
1338 D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
1339 }
1340 return 0;
1341}
1342
1343
1344/*
1345 * possibly move the interface to netmap-mode.
1346 * If success it returns a pointer to netmap_if, otherwise NULL.
2033 if (netmap_verbose) {
2034 if (ringid & NETMAP_SW_RING)
2035 D("ringid %s set to SW RING", ifp->if_xname);
2036 else if (ringid & NETMAP_HW_RING)
2037 D("ringid %s set to HW RING %d", ifp->if_xname,
2038 priv->np_qfirst);
2039 else
2040 D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
2041 }
2042 return 0;
2043}
2044
2045
2046/*
2047 * possibly move the interface to netmap-mode.
2048 * If success it returns a pointer to netmap_if, otherwise NULL.
1347 * This must be called with NMA_LOCK held.
2049 * This must be called with NMG_LOCK held.
1348 */
1349static struct netmap_if *
1350netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
1351 uint16_t ringid, int *err)
1352{
1353 struct netmap_adapter *na = NA(ifp);
1354 struct netmap_if *nifp = NULL;
2050 */
2051static struct netmap_if *
2052netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
2053 uint16_t ringid, int *err)
2054{
2055 struct netmap_adapter *na = NA(ifp);
2056 struct netmap_if *nifp = NULL;
1355 int i, error;
2057 int error, need_mem;
1356
2058
1357 if (na->na_bdg)
1358 BDG_WLOCK(na->na_bdg);
1359 na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
1360
2059 NMG_LOCK_ASSERT();
1361 /* ring configuration may have changed, fetch from the card */
1362 netmap_update_config(na);
1363 priv->np_ifp = ifp; /* store the reference */
1364 error = netmap_set_ringid(priv, ringid);
1365 if (error)
1366 goto out;
2060 /* ring configuration may have changed, fetch from the card */
2061 netmap_update_config(na);
2062 priv->np_ifp = ifp; /* store the reference */
2063 error = netmap_set_ringid(priv, ringid);
2064 if (error)
2065 goto out;
2066 /* ensure allocators are ready */
2067 need_mem = !netmap_have_memory_locked(priv);
2068 if (need_mem) {
2069 error = netmap_get_memory_locked(priv);
2070 ND("get_memory returned %d", error);
2071 if (error)
2072 goto out;
2073 }
1367 nifp = netmap_if_new(ifp->if_xname, na);
1368 if (nifp == NULL) { /* allocation failed */
2074 nifp = netmap_if_new(ifp->if_xname, na);
2075 if (nifp == NULL) { /* allocation failed */
2076 /* we should drop the allocator, but only
2077 * if we were the ones who grabbed it
2078 */
2079 if (need_mem)
2080 netmap_drop_memory_locked(priv);
1369 error = ENOMEM;
2081 error = ENOMEM;
1370 } else if (ifp->if_capenable & IFCAP_NETMAP) {
2082 goto out;
2083 }
2084 na->refcount++;
2085 if (ifp->if_capenable & IFCAP_NETMAP) {
1371 /* was already set */
1372 } else {
2086 /* was already set */
2087 } else {
2088 u_int i;
1373 /* Otherwise set the card in netmap mode
1374 * and make it use the shared buffers.
2089 /* Otherwise set the card in netmap mode
2090 * and make it use the shared buffers.
2091 *
2092 * If the interface is attached to a bridge, lock it.
1375 */
2093 */
2094 if (NETMAP_OWNED_BY_KERN(ifp))
2095 BDG_WLOCK(NA(ifp)->na_bdg);
1376 for (i = 0 ; i < na->num_tx_rings + 1; i++)
1377 mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock",
2096 for (i = 0 ; i < na->num_tx_rings + 1; i++)
2097 mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock",
1378 MTX_NETWORK_LOCK, MTX_DEF);
2098 NULL, MTX_DEF);
1379 for (i = 0 ; i < na->num_rx_rings + 1; i++) {
1380 mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock",
2099 for (i = 0 ; i < na->num_rx_rings + 1; i++) {
2100 mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock",
1381 MTX_NETWORK_LOCK, MTX_DEF);
2101 NULL, MTX_DEF);
1382 }
1383 if (nma_is_hw(na)) {
1384 SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings];
1385 SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings];
1386 }
2102 }
2103 if (nma_is_hw(na)) {
2104 SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings];
2105 SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings];
2106 }
2107 /*
2108 * do not core lock because the race is harmless here,
2109 * there cannot be any traffic to netmap_transmit()
2110 */
1387 error = na->nm_register(ifp, 1); /* mode on */
2111 error = na->nm_register(ifp, 1); /* mode on */
1388#ifdef NM_BRIDGE
2112 // XXX do we need to nm_alloc_bdgfwd() in all cases ?
1389 if (!error)
1390 error = nm_alloc_bdgfwd(na);
2113 if (!error)
2114 error = nm_alloc_bdgfwd(na);
1391#endif /* NM_BRIDGE */
1392 if (error) {
2115 if (error) {
1393 netmap_dtor_locked(priv);
1394 /* nifp is not yet in priv, so free it separately */
1395 netmap_if_free(nifp);
2116 netmap_do_unregif(priv, nifp);
1396 nifp = NULL;
1397 }
2117 nifp = NULL;
2118 }
2119 if (NETMAP_OWNED_BY_KERN(ifp))
2120 BDG_WUNLOCK(NA(ifp)->na_bdg);
1398
1399 }
1400out:
1401 *err = error;
2121
2122 }
2123out:
2124 *err = error;
1402 na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
1403 if (na->na_bdg)
1404 BDG_WUNLOCK(na->na_bdg);
2125 if (nifp != NULL) {
2126 /*
2127 * advertise that the interface is ready bt setting ni_nifp.
2128 * The barrier is needed because readers (poll and *SYNC)
2129 * check for priv->np_nifp != NULL without locking
2130 */
2131 wmb(); /* make sure previous writes are visible to all CPUs */
2132 priv->np_nifp = nifp;
2133 }
1405 return nifp;
1406}
1407
2134 return nifp;
2135}
2136
1408
1409/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
1410static int
2137/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
2138static int
1411kern_netmap_regif(struct nmreq *nmr)
2139nm_bdg_attach(struct nmreq *nmr)
1412{
1413 struct ifnet *ifp;
1414 struct netmap_if *nifp;
1415 struct netmap_priv_d *npriv;
1416 int error;
1417
1418 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
1419 if (npriv == NULL)
1420 return ENOMEM;
2140{
2141 struct ifnet *ifp;
2142 struct netmap_if *nifp;
2143 struct netmap_priv_d *npriv;
2144 int error;
2145
2146 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2147 if (npriv == NULL)
2148 return ENOMEM;
1421 error = netmap_get_memory(npriv);
1422 if (error) {
1423free_exit:
1424 bzero(npriv, sizeof(*npriv));
1425 free(npriv, M_DEVBUF);
1426 return error;
1427 }
1428
1429 NMA_LOCK();
1430 error = get_ifp(nmr, &ifp);
1431 if (error) { /* no device, or another bridge or user owns the device */
1432 NMA_UNLOCK();
1433 goto free_exit;
1434 } else if (!NETMAP_OWNED_BY_KERN(ifp)) {
2149 NMG_LOCK();
2150 error = get_ifp(nmr, &ifp, 1 /* create if not exists */);
2151 if (error) /* no device, or another bridge or user owns the device */
2152 goto unlock_exit;
2153 /* get_ifp() sets na_bdg if this is a physical interface
2154 * that we can attach to a switch.
2155 */
2156 if (!NETMAP_OWNED_BY_KERN(ifp)) {
1435 /* got reference to a virtual port or direct access to a NIC.
2157 /* got reference to a virtual port or direct access to a NIC.
1436 * perhaps specified no bridge's prefix or wrong NIC's name
2158 * perhaps specified no bridge prefix or wrong NIC name
1437 */
1438 error = EINVAL;
2159 */
2160 error = EINVAL;
1439unref_exit:
1440 nm_if_rele(ifp);
1441 NMA_UNLOCK();
1442 goto free_exit;
2161 goto unref_exit;
1443 }
1444
2162 }
2163
1445 if (nmr->nr_cmd == NETMAP_BDG_DETACH) {
1446 if (NA(ifp)->refcount == 0) { /* not registered */
1447 error = EINVAL;
1448 goto unref_exit;
1449 }
1450 NMA_UNLOCK();
1451
1452 netmap_dtor(NA(ifp)->na_kpriv); /* unregister */
1453 NA(ifp)->na_kpriv = NULL;
1454 nm_if_rele(ifp); /* detach from the bridge */
1455 goto free_exit;
1456 } else if (NA(ifp)->refcount > 0) { /* already registered */
1457 error = EINVAL;
1458 goto unref_exit;
2164 if (NA(ifp)->refcount > 0) { /* already registered */
2165 error = EBUSY;
2166 DROP_BDG_REF(ifp);
2167 goto unlock_exit;
1459 }
1460
1461 nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error);
2168 }
2169
2170 nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error);
1462 if (!nifp)
2171 if (!nifp) {
1463 goto unref_exit;
2172 goto unref_exit;
1464 wmb(); // XXX do we need it ?
1465 npriv->np_nifp = nifp;
2173 }
2174
1466 NA(ifp)->na_kpriv = npriv;
2175 NA(ifp)->na_kpriv = npriv;
1467 NMA_UNLOCK();
1468 D("registered %s to netmap-mode", ifp->if_xname);
2176 NMG_UNLOCK();
2177 ND("registered %s to netmap-mode", ifp->if_xname);
1469 return 0;
2178 return 0;
2179
2180unref_exit:
2181 nm_if_rele(ifp);
2182unlock_exit:
2183 NMG_UNLOCK();
2184 bzero(npriv, sizeof(*npriv));
2185 free(npriv, M_DEVBUF);
2186 return error;
1470}
1471
2187}
2188
1472
1473/* CORE_LOCK is not necessary */
1474static void
1475netmap_swlock_wrapper(struct ifnet *dev, int what, u_int queueid)
2189static int
2190nm_bdg_detach(struct nmreq *nmr)
1476{
2191{
1477 struct netmap_adapter *na = SWNA(dev);
2192 struct ifnet *ifp;
2193 int error;
2194 int last_instance;
1478
2195
1479 switch (what) {
1480 case NETMAP_TX_LOCK:
1481 mtx_lock(&na->tx_rings[queueid].q_lock);
1482 break;
2196 NMG_LOCK();
2197 error = get_ifp(nmr, &ifp, 0 /* don't create */);
2198 if (error) { /* no device, or another bridge or user owns the device */
2199 goto unlock_exit;
2200 }
2201 /* XXX do we need to check this ? */
2202 if (!NETMAP_OWNED_BY_KERN(ifp)) {
2203 /* got reference to a virtual port or direct access to a NIC.
2204 * perhaps specified no bridge's prefix or wrong NIC's name
2205 */
2206 error = EINVAL;
2207 goto unref_exit;
2208 }
1483
2209
1484 case NETMAP_TX_UNLOCK:
1485 mtx_unlock(&na->tx_rings[queueid].q_lock);
1486 break;
2210 if (NA(ifp)->refcount == 0) { /* not registered */
2211 error = EINVAL;
2212 goto unref_exit;
2213 }
1487
2214
1488 case NETMAP_RX_LOCK:
1489 mtx_lock(&na->rx_rings[queueid].q_lock);
1490 break;
2215 DROP_BDG_REF(ifp); /* the one from get_ifp */
2216 last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */
2217 NMG_UNLOCK();
2218 if (!last_instance) {
2219 D("--- error, trying to detach an entry with active mmaps");
2220 error = EINVAL;
2221 } else {
2222 struct netmap_priv_d *npriv = NA(ifp)->na_kpriv;
2223 NA(ifp)->na_kpriv = NULL;
1491
2224
1492 case NETMAP_RX_UNLOCK:
1493 mtx_unlock(&na->rx_rings[queueid].q_lock);
1494 break;
2225 bzero(npriv, sizeof(*npriv));
2226 free(npriv, M_DEVBUF);
1495 }
2227 }
2228 return error;
2229
2230unref_exit:
2231 nm_if_rele(ifp);
2232unlock_exit:
2233 NMG_UNLOCK();
2234 return error;
1496}
1497
1498
1499/* Initialize necessary fields of sw adapter located in right after hw's
1500 * one. sw adapter attaches a pair of sw rings of the netmap-mode NIC.
1501 * It is always activated and deactivated at the same tie with the hw's one.
1502 * Thus we don't need refcounting on the sw adapter.
1503 * Regardless of NIC's feature we use separate lock so that anybody can lock
1504 * me independently from the hw adapter.
1505 * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw
1506 */
1507static void
1508netmap_attach_sw(struct ifnet *ifp)
1509{
1510 struct netmap_adapter *hw_na = NA(ifp);
1511 struct netmap_adapter *na = SWNA(ifp);
1512
1513 na->ifp = ifp;
2235}
2236
2237
2238/* Initialize necessary fields of sw adapter located in right after hw's
2239 * one. sw adapter attaches a pair of sw rings of the netmap-mode NIC.
2240 * It is always activated and deactivated at the same tie with the hw's one.
2241 * Thus we don't need refcounting on the sw adapter.
2242 * Regardless of NIC's feature we use separate lock so that anybody can lock
2243 * me independently from the hw adapter.
2244 * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw
2245 */
2246static void
2247netmap_attach_sw(struct ifnet *ifp)
2248{
2249 struct netmap_adapter *hw_na = NA(ifp);
2250 struct netmap_adapter *na = SWNA(ifp);
2251
2252 na->ifp = ifp;
1514 na->separate_locks = 1;
1515 na->nm_lock = netmap_swlock_wrapper;
1516 na->num_rx_rings = na->num_tx_rings = 1;
1517 na->num_tx_desc = hw_na->num_tx_desc;
1518 na->num_rx_desc = hw_na->num_rx_desc;
1519 na->nm_txsync = netmap_bdg_to_host;
2253 na->num_rx_rings = na->num_tx_rings = 1;
2254 na->num_tx_desc = hw_na->num_tx_desc;
2255 na->num_rx_desc = hw_na->num_rx_desc;
2256 na->nm_txsync = netmap_bdg_to_host;
2257 /* we use the same memory allocator as the
2258 * the hw adapter */
2259 na->nm_mem = hw_na->nm_mem;
1520}
1521
1522
2260}
2261
2262
1523/* exported to kernel callers */
2263/* exported to kernel callers, e.g. OVS ?
2264 * Entry point.
2265 * Called without NMG_LOCK.
2266 */
1524int
1525netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
1526{
1527 struct nm_bridge *b;
1528 struct netmap_adapter *na;
1529 struct ifnet *iter;
1530 char *name = nmr->nr_name;
1531 int cmd = nmr->nr_cmd, namelen = strlen(name);
1532 int error = 0, i, j;
1533
1534 switch (cmd) {
1535 case NETMAP_BDG_ATTACH:
2267int
2268netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
2269{
2270 struct nm_bridge *b;
2271 struct netmap_adapter *na;
2272 struct ifnet *iter;
2273 char *name = nmr->nr_name;
2274 int cmd = nmr->nr_cmd, namelen = strlen(name);
2275 int error = 0, i, j;
2276
2277 switch (cmd) {
2278 case NETMAP_BDG_ATTACH:
2279 error = nm_bdg_attach(nmr);
2280 break;
2281
1536 case NETMAP_BDG_DETACH:
2282 case NETMAP_BDG_DETACH:
1537 error = kern_netmap_regif(nmr);
2283 error = nm_bdg_detach(nmr);
1538 break;
1539
1540 case NETMAP_BDG_LIST:
1541 /* this is used to enumerate bridges and ports */
1542 if (namelen) { /* look up indexes of bridge and port */
1543 if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
1544 error = EINVAL;
1545 break;
1546 }
2284 break;
2285
2286 case NETMAP_BDG_LIST:
2287 /* this is used to enumerate bridges and ports */
2288 if (namelen) { /* look up indexes of bridge and port */
2289 if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
2290 error = EINVAL;
2291 break;
2292 }
2293 NMG_LOCK();
1547 b = nm_find_bridge(name, 0 /* don't create */);
1548 if (!b) {
1549 error = ENOENT;
2294 b = nm_find_bridge(name, 0 /* don't create */);
2295 if (!b) {
2296 error = ENOENT;
2297 NMG_UNLOCK();
1550 break;
1551 }
1552
2298 break;
2299 }
2300
1553 BDG_RLOCK(b);
1554 error = ENOENT;
2301 error = ENOENT;
1555 for (i = 0; i < NM_BDG_MAXPORTS; i++) {
1556 na = BDG_GET_VAR(b->bdg_ports[i]);
1557 if (na == NULL)
2302 for (j = 0; j < b->bdg_active_ports; j++) {
2303 i = b->bdg_port_index[j];
2304 na = b->bdg_ports[i];
2305 if (na == NULL) {
2306 D("---AAAAAAAAARGH-------");
1558 continue;
2307 continue;
2308 }
1559 iter = na->ifp;
1560 /* the former and the latter identify a
1561 * virtual port and a NIC, respectively
1562 */
1563 if (!strcmp(iter->if_xname, name) ||
2309 iter = na->ifp;
2310 /* the former and the latter identify a
2311 * virtual port and a NIC, respectively
2312 */
2313 if (!strcmp(iter->if_xname, name) ||
1564 (namelen > b->namelen &&
2314 (namelen > b->bdg_namelen &&
1565 !strcmp(iter->if_xname,
2315 !strcmp(iter->if_xname,
1566 name + b->namelen + 1))) {
2316 name + b->bdg_namelen + 1))) {
1567 /* bridge index */
1568 nmr->nr_arg1 = b - nm_bridges;
1569 nmr->nr_arg2 = i; /* port index */
1570 error = 0;
1571 break;
1572 }
1573 }
2317 /* bridge index */
2318 nmr->nr_arg1 = b - nm_bridges;
2319 nmr->nr_arg2 = i; /* port index */
2320 error = 0;
2321 break;
2322 }
2323 }
1574 BDG_RUNLOCK(b);
2324 NMG_UNLOCK();
1575 } else {
1576 /* return the first non-empty entry starting from
1577 * bridge nr_arg1 and port nr_arg2.
1578 *
1579 * Users can detect the end of the same bridge by
1580 * seeing the new and old value of nr_arg1, and can
1581 * detect the end of all the bridge by error != 0
1582 */
1583 i = nmr->nr_arg1;
1584 j = nmr->nr_arg2;
1585
2325 } else {
2326 /* return the first non-empty entry starting from
2327 * bridge nr_arg1 and port nr_arg2.
2328 *
2329 * Users can detect the end of the same bridge by
2330 * seeing the new and old value of nr_arg1, and can
2331 * detect the end of all the bridge by error != 0
2332 */
2333 i = nmr->nr_arg1;
2334 j = nmr->nr_arg2;
2335
1586 for (error = ENOENT; error && i < NM_BRIDGES; i++) {
2336 NMG_LOCK();
2337 for (error = ENOENT; i < NM_BRIDGES; i++) {
1587 b = nm_bridges + i;
2338 b = nm_bridges + i;
1588 BDG_RLOCK(b);
1589 for (; j < NM_BDG_MAXPORTS; j++) {
1590 na = BDG_GET_VAR(b->bdg_ports[j]);
1591 if (na == NULL)
1592 continue;
1593 iter = na->ifp;
1594 nmr->nr_arg1 = i;
1595 nmr->nr_arg2 = j;
1596 strncpy(name, iter->if_xname, IFNAMSIZ);
1597 error = 0;
1598 break;
2339 if (j >= b->bdg_active_ports) {
2340 j = 0; /* following bridges scan from 0 */
2341 continue;
1599 }
2342 }
1600 BDG_RUNLOCK(b);
1601 j = 0; /* following bridges scan from 0 */
2343 nmr->nr_arg1 = i;
2344 nmr->nr_arg2 = j;
2345 j = b->bdg_port_index[j];
2346 na = b->bdg_ports[j];
2347 iter = na->ifp;
2348 strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
2349 error = 0;
2350 break;
1602 }
2351 }
2352 NMG_UNLOCK();
1603 }
1604 break;
1605
1606 case NETMAP_BDG_LOOKUP_REG:
1607 /* register a lookup function to the given bridge.
1608 * nmr->nr_name may be just bridge's name (including ':'
1609 * if it is not just NM_NAME).
1610 */
1611 if (!func) {
1612 error = EINVAL;
1613 break;
1614 }
2353 }
2354 break;
2355
2356 case NETMAP_BDG_LOOKUP_REG:
2357 /* register a lookup function to the given bridge.
2358 * nmr->nr_name may be just bridge's name (including ':'
2359 * if it is not just NM_NAME).
2360 */
2361 if (!func) {
2362 error = EINVAL;
2363 break;
2364 }
2365 NMG_LOCK();
1615 b = nm_find_bridge(name, 0 /* don't create */);
1616 if (!b) {
1617 error = EINVAL;
2366 b = nm_find_bridge(name, 0 /* don't create */);
2367 if (!b) {
2368 error = EINVAL;
1618 break;
2369 } else {
2370 b->nm_bdg_lookup = func;
1619 }
2371 }
1620 BDG_WLOCK(b);
1621 b->nm_bdg_lookup = func;
1622 BDG_WUNLOCK(b);
2372 NMG_UNLOCK();
1623 break;
2373 break;
2374
1624 default:
1625 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
1626 error = EINVAL;
1627 break;
1628 }
1629 return error;
1630}
1631

--- 11 unchanged lines hidden (view full) ---

1643 *
1644 * Return 0 on success, errno otherwise.
1645 */
1646static int
1647netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
1648 int fflag, struct thread *td)
1649{
1650 struct netmap_priv_d *priv = NULL;
2375 default:
2376 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
2377 error = EINVAL;
2378 break;
2379 }
2380 return error;
2381}
2382

--- 11 unchanged lines hidden (view full) ---

2394 *
2395 * Return 0 on success, errno otherwise.
2396 */
2397static int
2398netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
2399 int fflag, struct thread *td)
2400{
2401 struct netmap_priv_d *priv = NULL;
1651 struct ifnet *ifp;
2402 struct ifnet *ifp = NULL;
1652 struct nmreq *nmr = (struct nmreq *) data;
2403 struct nmreq *nmr = (struct nmreq *) data;
1653 struct netmap_adapter *na;
2404 struct netmap_adapter *na = NULL;
1654 int error;
1655 u_int i, lim;
1656 struct netmap_if *nifp;
2405 int error;
2406 u_int i, lim;
2407 struct netmap_if *nifp;
2408 struct netmap_kring *krings;
1657
1658 (void)dev; /* UNUSED */
1659 (void)fflag; /* UNUSED */
1660#ifdef linux
1661#define devfs_get_cdevpriv(pp) \
1662 ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \
1663 (*pp ? 0 : ENOENT); })
1664

--- 16 unchanged lines hidden (view full) ---

1681 * is now created in the open */
1682 return (error == ENOENT ? ENXIO : error);
1683 }
1684
1685 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */
1686 switch (cmd) {
1687 case NIOCGINFO: /* return capabilities etc */
1688 if (nmr->nr_version != NETMAP_API) {
2409
2410 (void)dev; /* UNUSED */
2411 (void)fflag; /* UNUSED */
2412#ifdef linux
2413#define devfs_get_cdevpriv(pp) \
2414 ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \
2415 (*pp ? 0 : ENOENT); })
2416

--- 16 unchanged lines hidden (view full) ---

2433 * is now created in the open */
2434 return (error == ENOENT ? ENXIO : error);
2435 }
2436
2437 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */
2438 switch (cmd) {
2439 case NIOCGINFO: /* return capabilities etc */
2440 if (nmr->nr_version != NETMAP_API) {
2441#ifdef TEST_STUFF
2442 /* some test code for locks etc */
2443 if (nmr->nr_version == 666) {
2444 error = nm_test(nmr);
2445 break;
2446 }
2447#endif /* TEST_STUFF */
1689 D("API mismatch got %d have %d",
1690 nmr->nr_version, NETMAP_API);
1691 nmr->nr_version = NETMAP_API;
1692 error = EINVAL;
1693 break;
1694 }
1695 if (nmr->nr_cmd == NETMAP_BDG_LIST) {
1696 error = netmap_bdg_ctl(nmr, NULL);
1697 break;
1698 }
2448 D("API mismatch got %d have %d",
2449 nmr->nr_version, NETMAP_API);
2450 nmr->nr_version = NETMAP_API;
2451 error = EINVAL;
2452 break;
2453 }
2454 if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2455 error = netmap_bdg_ctl(nmr, NULL);
2456 break;
2457 }
1699 /* update configuration */
1700 error = netmap_get_memory(priv);
1701 ND("get_memory returned %d", error);
1702 if (error)
1703 break;
1704 /* memsize is always valid */
1705 nmr->nr_memsize = nm_mem.nm_totalsize;
1706 nmr->nr_offset = 0;
1707 nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
1708 if (nmr->nr_name[0] == '\0') /* just get memory info */
1709 break;
1710 /* lock because get_ifp and update_config see na->refcount */
1711 NMA_LOCK();
1712 error = get_ifp(nmr, &ifp); /* get a refcount */
1713 if (error) {
1714 NMA_UNLOCK();
1715 break;
1716 }
1717 na = NA(ifp); /* retrieve netmap_adapter */
1718 netmap_update_config(na);
1719 NMA_UNLOCK();
1720 nmr->nr_rx_rings = na->num_rx_rings;
1721 nmr->nr_tx_rings = na->num_tx_rings;
1722 nmr->nr_rx_slots = na->num_rx_desc;
1723 nmr->nr_tx_slots = na->num_tx_desc;
1724 nm_if_rele(ifp); /* return the refcount */
2458
2459 NMG_LOCK();
2460 do {
2461 /* memsize is always valid */
2462 struct netmap_mem_d *nmd = &nm_mem;
2463 u_int memflags;
2464
2465 if (nmr->nr_name[0] != '\0') {
2466 /* get a refcount */
2467 error = get_ifp(nmr, &ifp, 1 /* create */);
2468 if (error)
2469 break;
2470 na = NA(ifp); /* retrieve the netmap adapter */
2471 nmd = na->nm_mem; /* and its memory allocator */
2472 }
2473
2474 error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
2475 if (error)
2476 break;
2477 if (na == NULL) /* only memory info */
2478 break;
2479 nmr->nr_offset = 0;
2480 nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2481 netmap_update_config(na);
2482 nmr->nr_rx_rings = na->num_rx_rings;
2483 nmr->nr_tx_rings = na->num_tx_rings;
2484 nmr->nr_rx_slots = na->num_rx_desc;
2485 nmr->nr_tx_slots = na->num_tx_desc;
2486 if (memflags & NETMAP_MEM_PRIVATE)
2487 nmr->nr_ringid |= NETMAP_PRIV_MEM;
2488 } while (0);
2489 if (ifp)
2490 nm_if_rele(ifp); /* return the refcount */
2491 NMG_UNLOCK();
1725 break;
1726
1727 case NIOCREGIF:
1728 if (nmr->nr_version != NETMAP_API) {
1729 nmr->nr_version = NETMAP_API;
1730 error = EINVAL;
1731 break;
1732 }
1733 /* possibly attach/detach NIC and VALE switch */
1734 i = nmr->nr_cmd;
1735 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) {
1736 error = netmap_bdg_ctl(nmr, NULL);
1737 break;
1738 } else if (i != 0) {
1739 D("nr_cmd must be 0 not %d", i);
1740 error = EINVAL;
1741 break;
1742 }
1743
2492 break;
2493
2494 case NIOCREGIF:
2495 if (nmr->nr_version != NETMAP_API) {
2496 nmr->nr_version = NETMAP_API;
2497 error = EINVAL;
2498 break;
2499 }
2500 /* possibly attach/detach NIC and VALE switch */
2501 i = nmr->nr_cmd;
2502 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) {
2503 error = netmap_bdg_ctl(nmr, NULL);
2504 break;
2505 } else if (i != 0) {
2506 D("nr_cmd must be 0 not %d", i);
2507 error = EINVAL;
2508 break;
2509 }
2510
1744 /* ensure allocators are ready */
1745 error = netmap_get_memory(priv);
1746 ND("get_memory returned %d", error);
1747 if (error)
1748 break;
1749
1750 /* protect access to priv from concurrent NIOCREGIF */
2511 /* protect access to priv from concurrent NIOCREGIF */
1751 NMA_LOCK();
1752 if (priv->np_ifp != NULL) { /* thread already registered */
1753 error = netmap_set_ringid(priv, nmr->nr_ringid);
1754unlock_out:
1755 NMA_UNLOCK();
1756 break;
1757 }
1758 /* find the interface and a reference */
1759 error = get_ifp(nmr, &ifp); /* keep reference */
1760 if (error)
1761 goto unlock_out;
1762 else if (NETMAP_OWNED_BY_KERN(ifp)) {
1763 nm_if_rele(ifp);
1764 goto unlock_out;
1765 }
1766 nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error);
1767 if (!nifp) { /* reg. failed, release priv and ref */
1768 nm_if_rele(ifp); /* return the refcount */
1769 priv->np_ifp = NULL;
1770 priv->np_nifp = NULL;
1771 goto unlock_out;
1772 }
2512 NMG_LOCK();
2513 do {
2514 u_int memflags;
1773
2515
1774 /* the following assignment is a commitment.
1775 * Readers (i.e., poll and *SYNC) check for
1776 * np_nifp != NULL without locking
1777 */
1778 wmb(); /* make sure previous writes are visible to all CPUs */
1779 priv->np_nifp = nifp;
1780 NMA_UNLOCK();
2516 if (priv->np_ifp != NULL) { /* thread already registered */
2517 error = netmap_set_ringid(priv, nmr->nr_ringid);
2518 break;
2519 }
2520 /* find the interface and a reference */
2521 error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */
2522 if (error)
2523 break;
2524 if (NETMAP_OWNED_BY_KERN(ifp)) {
2525 nm_if_rele(ifp);
2526 error = EBUSY;
2527 break;
2528 }
2529 nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error);
2530 if (!nifp) { /* reg. failed, release priv and ref */
2531 nm_if_rele(ifp); /* return the refcount */
2532 priv->np_ifp = NULL;
2533 priv->np_nifp = NULL;
2534 break;
2535 }
1781
2536
1782 /* return the offset of the netmap_if object */
1783 na = NA(ifp); /* retrieve netmap adapter */
1784 nmr->nr_rx_rings = na->num_rx_rings;
1785 nmr->nr_tx_rings = na->num_tx_rings;
1786 nmr->nr_rx_slots = na->num_rx_desc;
1787 nmr->nr_tx_slots = na->num_tx_desc;
1788 nmr->nr_memsize = nm_mem.nm_totalsize;
1789 nmr->nr_offset = netmap_if_offset(nifp);
2537 /* return the offset of the netmap_if object */
2538 na = NA(ifp); /* retrieve netmap adapter */
2539 nmr->nr_rx_rings = na->num_rx_rings;
2540 nmr->nr_tx_rings = na->num_tx_rings;
2541 nmr->nr_rx_slots = na->num_rx_desc;
2542 nmr->nr_tx_slots = na->num_tx_desc;
2543 error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
2544 if (error) {
2545 nm_if_rele(ifp);
2546 break;
2547 }
2548 if (memflags & NETMAP_MEM_PRIVATE) {
2549 nmr->nr_ringid |= NETMAP_PRIV_MEM;
2550 *(uint32_t *)&nifp->ni_flags |= NI_PRIV_MEM;
2551 }
2552 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2553 } while (0);
2554 NMG_UNLOCK();
1790 break;
1791
1792 case NIOCUNREGIF:
1793 // XXX we have no data here ?
1794 D("deprecated, data is %p", nmr);
1795 error = EINVAL;
1796 break;
1797
1798 case NIOCTXSYNC:
1799 case NIOCRXSYNC:
1800 nifp = priv->np_nifp;
1801
1802 if (nifp == NULL) {
1803 error = ENXIO;
1804 break;
1805 }
1806 rmb(); /* make sure following reads are not from cache */
1807
2555 break;
2556
2557 case NIOCUNREGIF:
2558 // XXX we have no data here ?
2559 D("deprecated, data is %p", nmr);
2560 error = EINVAL;
2561 break;
2562
2563 case NIOCTXSYNC:
2564 case NIOCRXSYNC:
2565 nifp = priv->np_nifp;
2566
2567 if (nifp == NULL) {
2568 error = ENXIO;
2569 break;
2570 }
2571 rmb(); /* make sure following reads are not from cache */
2572
1808
1809 ifp = priv->np_ifp; /* we have a reference */
1810
1811 if (ifp == NULL) {
1812 D("Internal error: nifp != NULL && ifp == NULL");
1813 error = ENXIO;
1814 break;
1815 }
1816
1817 na = NA(ifp); /* retrieve netmap adapter */
1818 if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
1819 if (cmd == NIOCTXSYNC)
2573 ifp = priv->np_ifp; /* we have a reference */
2574
2575 if (ifp == NULL) {
2576 D("Internal error: nifp != NULL && ifp == NULL");
2577 error = ENXIO;
2578 break;
2579 }
2580
2581 na = NA(ifp); /* retrieve netmap adapter */
2582 if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
2583 if (cmd == NIOCTXSYNC)
1820 netmap_sync_to_host(na);
2584 netmap_txsync_to_host(na);
1821 else
2585 else
1822 netmap_sync_from_host(na, NULL, NULL);
2586 netmap_rxsync_from_host(na, NULL, NULL);
1823 break;
1824 }
1825 /* find the last ring to scan */
1826 lim = priv->np_qlast;
1827 if (lim == NETMAP_HW_RING)
1828 lim = (cmd == NIOCTXSYNC) ?
1829 na->num_tx_rings : na->num_rx_rings;
1830
2587 break;
2588 }
2589 /* find the last ring to scan */
2590 lim = priv->np_qlast;
2591 if (lim == NETMAP_HW_RING)
2592 lim = (cmd == NIOCTXSYNC) ?
2593 na->num_tx_rings : na->num_rx_rings;
2594
2595 krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings;
1831 for (i = priv->np_qfirst; i < lim; i++) {
2596 for (i = priv->np_qfirst; i < lim; i++) {
2597 struct netmap_kring *kring = krings + i;
2598 if (nm_kr_tryget(kring)) {
2599 error = EBUSY;
2600 goto out;
2601 }
1832 if (cmd == NIOCTXSYNC) {
2602 if (cmd == NIOCTXSYNC) {
1833 struct netmap_kring *kring = &na->tx_rings[i];
1834 if (netmap_verbose & NM_VERB_TXSYNC)
1835 D("pre txsync ring %d cur %d hwcur %d",
1836 i, kring->ring->cur,
1837 kring->nr_hwcur);
2603 if (netmap_verbose & NM_VERB_TXSYNC)
2604 D("pre txsync ring %d cur %d hwcur %d",
2605 i, kring->ring->cur,
2606 kring->nr_hwcur);
1838 na->nm_txsync(ifp, i, 1 /* do lock */);
2607 na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM);
1839 if (netmap_verbose & NM_VERB_TXSYNC)
1840 D("post txsync ring %d cur %d hwcur %d",
1841 i, kring->ring->cur,
1842 kring->nr_hwcur);
1843 } else {
2608 if (netmap_verbose & NM_VERB_TXSYNC)
2609 D("post txsync ring %d cur %d hwcur %d",
2610 i, kring->ring->cur,
2611 kring->nr_hwcur);
2612 } else {
1844 na->nm_rxsync(ifp, i, 1 /* do lock */);
2613 na->nm_rxsync(ifp, i, NAF_FORCE_READ);
1845 microtime(&na->rx_rings[i].ring->ts);
1846 }
2614 microtime(&na->rx_rings[i].ring->ts);
2615 }
2616 nm_kr_put(kring);
1847 }
1848
1849 break;
1850
1851#ifdef __FreeBSD__
1852 case BIOCIMMEDIATE:
1853 case BIOCGHDRCMPLT:
1854 case BIOCSHDRCMPLT:
1855 case BIOCSSEESENT:
1856 D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
1857 break;
1858
1859 default: /* allow device-specific ioctls */
1860 {
1861 struct socket so;
2617 }
2618
2619 break;
2620
2621#ifdef __FreeBSD__
2622 case BIOCIMMEDIATE:
2623 case BIOCGHDRCMPLT:
2624 case BIOCSHDRCMPLT:
2625 case BIOCSSEESENT:
2626 D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
2627 break;
2628
2629 default: /* allow device-specific ioctls */
2630 {
2631 struct socket so;
2632
1862 bzero(&so, sizeof(so));
2633 bzero(&so, sizeof(so));
1863 error = get_ifp(nmr, &ifp); /* keep reference */
1864 if (error)
2634 NMG_LOCK();
2635 error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */
2636 if (error) {
2637 NMG_UNLOCK();
1865 break;
2638 break;
2639 }
1866 so.so_vnet = ifp->if_vnet;
1867 // so->so_proto not null.
1868 error = ifioctl(&so, cmd, data, td);
1869 nm_if_rele(ifp);
2640 so.so_vnet = ifp->if_vnet;
2641 // so->so_proto not null.
2642 error = ifioctl(&so, cmd, data, td);
2643 nm_if_rele(ifp);
2644 NMG_UNLOCK();
1870 break;
1871 }
1872
1873#else /* linux */
1874 default:
1875 error = EOPNOTSUPP;
1876#endif /* linux */
1877 }
2645 break;
2646 }
2647
2648#else /* linux */
2649 default:
2650 error = EOPNOTSUPP;
2651#endif /* linux */
2652 }
2653out:
1878
1879 CURVNET_RESTORE();
1880 return (error);
1881}
1882
1883
1884/*
1885 * select(2) and poll(2) handlers for the "netmap" device.
1886 *
1887 * Can be called for one or more queues.
1888 * Return true the event mask corresponding to ready events.
1889 * If there are no ready events, do a selrecord on either individual
2654
2655 CURVNET_RESTORE();
2656 return (error);
2657}
2658
2659
2660/*
2661 * select(2) and poll(2) handlers for the "netmap" device.
2662 *
2663 * Can be called for one or more queues.
2664 * Return true the event mask corresponding to ready events.
2665 * If there are no ready events, do a selrecord on either individual
1890 * selfd or on the global one.
2666 * selinfo or on the global one.
1891 * Device-dependent parts (locking and sync of tx/rx rings)
1892 * are done through callbacks.
1893 *
1894 * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
1895 * The first one is remapped to pwait as selrecord() uses the name as an
1896 * hidden argument.
1897 */
1898static int
1899netmap_poll(struct cdev *dev, int events, struct thread *td)
1900{
1901 struct netmap_priv_d *priv = NULL;
1902 struct netmap_adapter *na;
1903 struct ifnet *ifp;
1904 struct netmap_kring *kring;
2667 * Device-dependent parts (locking and sync of tx/rx rings)
2668 * are done through callbacks.
2669 *
2670 * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
2671 * The first one is remapped to pwait as selrecord() uses the name as an
2672 * hidden argument.
2673 */
2674static int
2675netmap_poll(struct cdev *dev, int events, struct thread *td)
2676{
2677 struct netmap_priv_d *priv = NULL;
2678 struct netmap_adapter *na;
2679 struct ifnet *ifp;
2680 struct netmap_kring *kring;
1905 u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
2681 u_int i, check_all, want_tx, want_rx, revents = 0;
1906 u_int lim_tx, lim_rx, host_forwarded = 0;
1907 struct mbq q = { NULL, NULL, 0 };
2682 u_int lim_tx, lim_rx, host_forwarded = 0;
2683 struct mbq q = { NULL, NULL, 0 };
1908 enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
1909 void *pwait = dev; /* linux compatibility */
1910
2684 void *pwait = dev; /* linux compatibility */
2685
2686 int retry_tx = 1;
2687
1911 (void)pwait;
1912
1913 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
1914 return POLLERR;
1915
1916 if (priv->np_nifp == NULL) {
1917 D("No if registered");
1918 return POLLERR;

--- 9 unchanged lines hidden (view full) ---

1928 D("device %s events 0x%x", ifp->if_xname, events);
1929 want_tx = events & (POLLOUT | POLLWRNORM);
1930 want_rx = events & (POLLIN | POLLRDNORM);
1931
1932 na = NA(ifp); /* retrieve netmap adapter */
1933
1934 lim_tx = na->num_tx_rings;
1935 lim_rx = na->num_rx_rings;
2688 (void)pwait;
2689
2690 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
2691 return POLLERR;
2692
2693 if (priv->np_nifp == NULL) {
2694 D("No if registered");
2695 return POLLERR;

--- 9 unchanged lines hidden (view full) ---

2705 D("device %s events 0x%x", ifp->if_xname, events);
2706 want_tx = events & (POLLOUT | POLLWRNORM);
2707 want_rx = events & (POLLIN | POLLRDNORM);
2708
2709 na = NA(ifp); /* retrieve netmap adapter */
2710
2711 lim_tx = na->num_tx_rings;
2712 lim_rx = na->num_rx_rings;
1936 /* how many queues we are scanning */
2713
1937 if (priv->np_qfirst == NETMAP_SW_RING) {
2714 if (priv->np_qfirst == NETMAP_SW_RING) {
2715 /* handle the host stack ring */
1938 if (priv->np_txpoll || want_tx) {
1939 /* push any packets up, then we are always ready */
2716 if (priv->np_txpoll || want_tx) {
2717 /* push any packets up, then we are always ready */
1940 netmap_sync_to_host(na);
2718 netmap_txsync_to_host(na);
1941 revents |= want_tx;
1942 }
1943 if (want_rx) {
1944 kring = &na->rx_rings[lim_rx];
1945 if (kring->ring->avail == 0)
2719 revents |= want_tx;
2720 }
2721 if (want_rx) {
2722 kring = &na->rx_rings[lim_rx];
2723 if (kring->ring->avail == 0)
1946 netmap_sync_from_host(na, td, dev);
2724 netmap_rxsync_from_host(na, td, dev);
1947 if (kring->ring->avail > 0) {
1948 revents |= want_rx;
1949 }
1950 }
1951 return (revents);
1952 }
1953
1954 /* if we are in transparent mode, check also the host rx ring */
1955 kring = &na->rx_rings[lim_rx];
1956 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
1957 && want_rx
1958 && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
1959 if (kring->ring->avail == 0)
2725 if (kring->ring->avail > 0) {
2726 revents |= want_rx;
2727 }
2728 }
2729 return (revents);
2730 }
2731
2732 /* if we are in transparent mode, check also the host rx ring */
2733 kring = &na->rx_rings[lim_rx];
2734 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2735 && want_rx
2736 && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
2737 if (kring->ring->avail == 0)
1960 netmap_sync_from_host(na, td, dev);
2738 netmap_rxsync_from_host(na, td, dev);
1961 if (kring->ring->avail > 0)
1962 revents |= want_rx;
1963 }
1964
1965 /*
2739 if (kring->ring->avail > 0)
2740 revents |= want_rx;
2741 }
2742
2743 /*
1966 * check_all is set if the card has more than one queue and
2744 * check_all is set if the card has more than one queue AND
1967 * the client is polling all of them. If true, we sleep on
2745 * the client is polling all of them. If true, we sleep on
1968 * the "global" selfd, otherwise we sleep on individual selfd
1969 * (we can only sleep on one of them per direction).
1970 * The interrupt routine in the driver should always wake on
1971 * the individual selfd, and also on the global one if the card
1972 * has more than one ring.
2746 * the "global" selinfo, otherwise we sleep on individual selinfo
2747 * (FreeBSD only allows two selinfo's per file descriptor).
2748 * The interrupt routine in the driver wake one or the other
2749 * (or both) depending on which clients are active.
1973 *
2750 *
1974 * If the card has only one lock, we just use that.
1975 * If the card has separate ring locks, we just use those
1976 * unless we are doing check_all, in which case the whole
1977 * loop is wrapped by the global lock.
1978 * We acquire locks only when necessary: if poll is called
1979 * when buffers are available, we can just return without locks.
1980 *
1981 * rxsync() is only called if we run out of buffers on a POLLIN.
1982 * txsync() is called if we run out of buffers on POLLOUT, or
1983 * there are pending packets to send. The latter can be disabled
1984 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
1985 */
1986 check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
1987
2751 * rxsync() is only called if we run out of buffers on a POLLIN.
2752 * txsync() is called if we run out of buffers on POLLOUT, or
2753 * there are pending packets to send. The latter can be disabled
2754 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
2755 */
2756 check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
2757
1988 /*
1989 * core_lock indicates what to do with the core lock.
1990 * The core lock is used when either the card has no individual
1991 * locks, or it has individual locks but we are cheking all
1992 * rings so we need the core lock to avoid missing wakeup events.
1993 *
1994 * It has three possible states:
1995 * NO_CL we don't need to use the core lock, e.g.
1996 * because we are protected by individual locks.
1997 * NEED_CL we need the core lock. In this case, when we
1998 * call the lock routine, move to LOCKED_CL
1999 * to remember to release the lock once done.
2000 * LOCKED_CL core lock is set, so we need to release it.
2001 */
2002 core_lock = (check_all || !na->separate_locks) ? NEED_CL : NO_CL;
2003#ifdef NM_BRIDGE
2004 /* the bridge uses separate locks */
2005 if (na->nm_register == bdg_netmap_reg) {
2006 ND("not using core lock for %s", ifp->if_xname);
2007 core_lock = NO_CL;
2008 }
2009#endif /* NM_BRIDGE */
2010 if (priv->np_qlast != NETMAP_HW_RING) {
2011 lim_tx = lim_rx = priv->np_qlast;
2012 }
2013
2014 /*
2015 * We start with a lock free round which is good if we have
2016 * data available. If this fails, then lock and call the sync
2017 * routines.

--- 14 unchanged lines hidden (view full) ---

2032 }
2033
2034 /*
2035 * If we to push packets out (priv->np_txpoll) or want_tx is
2036 * still set, we do need to run the txsync calls (on all rings,
2037 * to avoid that the tx rings stall).
2038 */
2039 if (priv->np_txpoll || want_tx) {
2758 if (priv->np_qlast != NETMAP_HW_RING) {
2759 lim_tx = lim_rx = priv->np_qlast;
2760 }
2761
2762 /*
2763 * We start with a lock free round which is good if we have
2764 * data available. If this fails, then lock and call the sync
2765 * routines.

--- 14 unchanged lines hidden (view full) ---

2780 }
2781
2782 /*
2783 * If we to push packets out (priv->np_txpoll) or want_tx is
2784 * still set, we do need to run the txsync calls (on all rings,
2785 * to avoid that the tx rings stall).
2786 */
2787 if (priv->np_txpoll || want_tx) {
2788 /* If we really want to be woken up (want_tx),
2789 * do a selrecord, either on the global or on
2790 * the private structure. Then issue the txsync
2791 * so there is no race in the selrecord/selwait
2792 */
2040flush_tx:
2041 for (i = priv->np_qfirst; i < lim_tx; i++) {
2042 kring = &na->tx_rings[i];
2043 /*
2793flush_tx:
2794 for (i = priv->np_qfirst; i < lim_tx; i++) {
2795 kring = &na->tx_rings[i];
2796 /*
2044 * Skip the current ring if want_tx == 0
2797 * Skip this ring if want_tx == 0
2045 * (we have already done a successful sync on
2046 * a previous ring) AND kring->cur == kring->hwcur
2047 * (there are no pending transmissions for this ring).
2048 */
2049 if (!want_tx && kring->ring->cur == kring->nr_hwcur)
2050 continue;
2798 * (we have already done a successful sync on
2799 * a previous ring) AND kring->cur == kring->hwcur
2800 * (there are no pending transmissions for this ring).
2801 */
2802 if (!want_tx && kring->ring->cur == kring->nr_hwcur)
2803 continue;
2051 if (core_lock == NEED_CL) {
2052 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
2053 core_lock = LOCKED_CL;
2804 /* make sure only one user thread is doing this */
2805 if (nm_kr_tryget(kring)) {
2806 ND("ring %p busy is %d", kring, (int)kring->nr_busy);
2807 revents |= POLLERR;
2808 goto out;
2054 }
2809 }
2055 if (na->separate_locks)
2056 na->nm_lock(ifp, NETMAP_TX_LOCK, i);
2810
2057 if (netmap_verbose & NM_VERB_TXSYNC)
2058 D("send %d on %s %d",
2811 if (netmap_verbose & NM_VERB_TXSYNC)
2812 D("send %d on %s %d",
2059 kring->ring->cur,
2060 ifp->if_xname, i);
2061 if (na->nm_txsync(ifp, i, 0 /* no lock */))
2813 kring->ring->cur, ifp->if_xname, i);
2814 if (na->nm_txsync(ifp, i, 0))
2062 revents |= POLLERR;
2063
2064 /* Check avail/call selrecord only if called with POLLOUT */
2065 if (want_tx) {
2066 if (kring->ring->avail > 0) {
2067 /* stop at the first ring. We don't risk
2068 * starvation.
2069 */
2070 revents |= want_tx;
2071 want_tx = 0;
2815 revents |= POLLERR;
2816
2817 /* Check avail/call selrecord only if called with POLLOUT */
2818 if (want_tx) {
2819 if (kring->ring->avail > 0) {
2820 /* stop at the first ring. We don't risk
2821 * starvation.
2822 */
2823 revents |= want_tx;
2824 want_tx = 0;
2072 } else if (!check_all)
2073 selrecord(td, &kring->si);
2825 }
2074 }
2826 }
2075 if (na->separate_locks)
2076 na->nm_lock(ifp, NETMAP_TX_UNLOCK, i);
2827 nm_kr_put(kring);
2077 }
2828 }
2829 if (want_tx && retry_tx) {
2830 selrecord(td, check_all ?
2831 &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
2832 retry_tx = 0;
2833 goto flush_tx;
2834 }
2078 }
2079
2080 /*
2081 * now if want_rx is still set we need to lock and rxsync.
2082 * Do it on all rings because otherwise we starve.
2083 */
2084 if (want_rx) {
2835 }
2836
2837 /*
2838 * now if want_rx is still set we need to lock and rxsync.
2839 * Do it on all rings because otherwise we starve.
2840 */
2841 if (want_rx) {
2842 int retry_rx = 1;
2843do_retry_rx:
2085 for (i = priv->np_qfirst; i < lim_rx; i++) {
2086 kring = &na->rx_rings[i];
2844 for (i = priv->np_qfirst; i < lim_rx; i++) {
2845 kring = &na->rx_rings[i];
2087 if (core_lock == NEED_CL) {
2088 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
2089 core_lock = LOCKED_CL;
2846
2847 if (nm_kr_tryget(kring)) {
2848 revents |= POLLERR;
2849 goto out;
2090 }
2850 }
2091 if (na->separate_locks)
2092 na->nm_lock(ifp, NETMAP_RX_LOCK, i);
2851
2852 /* XXX NR_FORWARD should only be read on
2853 * physical or NIC ports
2854 */
2093 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2094 ND(10, "forwarding some buffers up %d to %d",
2095 kring->nr_hwcur, kring->ring->cur);
2096 netmap_grab_packets(kring, &q, netmap_fwd);
2097 }
2098
2855 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2856 ND(10, "forwarding some buffers up %d to %d",
2857 kring->nr_hwcur, kring->ring->cur);
2858 netmap_grab_packets(kring, &q, netmap_fwd);
2859 }
2860
2099 if (na->nm_rxsync(ifp, i, 0 /* no lock */))
2861 if (na->nm_rxsync(ifp, i, 0))
2100 revents |= POLLERR;
2101 if (netmap_no_timestamp == 0 ||
2102 kring->ring->flags & NR_TIMESTAMP) {
2103 microtime(&kring->ring->ts);
2104 }
2105
2862 revents |= POLLERR;
2863 if (netmap_no_timestamp == 0 ||
2864 kring->ring->flags & NR_TIMESTAMP) {
2865 microtime(&kring->ring->ts);
2866 }
2867
2106 if (kring->ring->avail > 0)
2868 if (kring->ring->avail > 0) {
2107 revents |= want_rx;
2869 revents |= want_rx;
2108 else if (!check_all)
2109 selrecord(td, &kring->si);
2110 if (na->separate_locks)
2111 na->nm_lock(ifp, NETMAP_RX_UNLOCK, i);
2870 retry_rx = 0;
2871 }
2872 nm_kr_put(kring);
2112 }
2873 }
2874 if (retry_rx) {
2875 retry_rx = 0;
2876 selrecord(td, check_all ?
2877 &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
2878 goto do_retry_rx;
2879 }
2113 }
2880 }
2114 if (check_all && revents == 0) { /* signal on the global queue */
2115 if (want_tx)
2116 selrecord(td, &na->tx_si);
2117 if (want_rx)
2118 selrecord(td, &na->rx_si);
2119 }
2120
2881
2121 /* forward host to the netmap ring */
2882 /* forward host to the netmap ring.
2883 * I am accessing nr_hwavail without lock, but netmap_transmit
2884 * can only increment it, so the operation is safe.
2885 */
2122 kring = &na->rx_rings[lim_rx];
2886 kring = &na->rx_rings[lim_rx];
2123 if (kring->nr_hwavail > 0)
2124 ND("host rx %d has %d packets", lim_rx, kring->nr_hwavail);
2125 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2126 && (netmap_fwd || kring->ring->flags & NR_FORWARD)
2127 && kring->nr_hwavail > 0 && !host_forwarded) {
2887 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2888 && (netmap_fwd || kring->ring->flags & NR_FORWARD)
2889 && kring->nr_hwavail > 0 && !host_forwarded) {
2128 if (core_lock == NEED_CL) {
2129 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
2130 core_lock = LOCKED_CL;
2131 }
2132 netmap_sw_to_nic(na);
2133 host_forwarded = 1; /* prevent another pass */
2134 want_rx = 0;
2135 goto flush_tx;
2136 }
2137
2890 netmap_sw_to_nic(na);
2891 host_forwarded = 1; /* prevent another pass */
2892 want_rx = 0;
2893 goto flush_tx;
2894 }
2895
2138 if (core_lock == LOCKED_CL)
2139 na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
2140 if (q.head)
2141 netmap_send_up(na->ifp, q.head);
2142
2896 if (q.head)
2897 netmap_send_up(na->ifp, q.head);
2898
2899out:
2900
2143 return (revents);
2144}
2145
2146/*------- driver support routines ------*/
2147
2148
2149/*
2901 return (revents);
2902}
2903
2904/*------- driver support routines ------*/
2905
2906
2907/*
2150 * default lock wrapper.
2151 */
2152static void
2153netmap_lock_wrapper(struct ifnet *dev, int what, u_int queueid)
2154{
2155 struct netmap_adapter *na = NA(dev);
2156
2157 switch (what) {
2158#ifdef linux /* some system do not need lock on register */
2159 case NETMAP_REG_LOCK:
2160 case NETMAP_REG_UNLOCK:
2161 break;
2162#endif /* linux */
2163
2164 case NETMAP_CORE_LOCK:
2165 mtx_lock(&na->core_lock);
2166 break;
2167
2168 case NETMAP_CORE_UNLOCK:
2169 mtx_unlock(&na->core_lock);
2170 break;
2171
2172 case NETMAP_TX_LOCK:
2173 mtx_lock(&na->tx_rings[queueid].q_lock);
2174 break;
2175
2176 case NETMAP_TX_UNLOCK:
2177 mtx_unlock(&na->tx_rings[queueid].q_lock);
2178 break;
2179
2180 case NETMAP_RX_LOCK:
2181 mtx_lock(&na->rx_rings[queueid].q_lock);
2182 break;
2183
2184 case NETMAP_RX_UNLOCK:
2185 mtx_unlock(&na->rx_rings[queueid].q_lock);
2186 break;
2187 }
2188}
2189
2190
2191/*
2192 * Initialize a ``netmap_adapter`` object created by driver on attach.
2193 * We allocate a block of memory with room for a struct netmap_adapter
2194 * plus two sets of N+2 struct netmap_kring (where N is the number
2195 * of hardware rings):
2196 * krings 0..N-1 are for the hardware queues.
2197 * kring N is for the host stack queue
2198 * kring N+1 is only used for the selinfo for all queues.
2199 * Return 0 on success, ENOMEM otherwise.
2200 *
2201 * By default the receive and transmit adapter ring counts are both initialized
2202 * to num_queues. na->num_tx_rings can be set for cards with different tx/rx
2203 * setups.
2204 */
2205int
2908 * Initialize a ``netmap_adapter`` object created by driver on attach.
2909 * We allocate a block of memory with room for a struct netmap_adapter
2910 * plus two sets of N+2 struct netmap_kring (where N is the number
2911 * of hardware rings):
2912 * krings 0..N-1 are for the hardware queues.
2913 * kring N is for the host stack queue
2914 * kring N+1 is only used for the selinfo for all queues.
2915 * Return 0 on success, ENOMEM otherwise.
2916 *
2917 * By default the receive and transmit adapter ring counts are both initialized
2918 * to num_queues. na->num_tx_rings can be set for cards with different tx/rx
2919 * setups.
2920 */
2921int
2206netmap_attach(struct netmap_adapter *arg, int num_queues)
2922netmap_attach(struct netmap_adapter *arg, u_int num_queues)
2207{
2208 struct netmap_adapter *na = NULL;
2209 struct ifnet *ifp = arg ? arg->ifp : NULL;
2923{
2924 struct netmap_adapter *na = NULL;
2925 struct ifnet *ifp = arg ? arg->ifp : NULL;
2210 int len;
2926 size_t len;
2211
2212 if (arg == NULL || ifp == NULL)
2213 goto fail;
2927
2928 if (arg == NULL || ifp == NULL)
2929 goto fail;
2930 /* a VALE port uses two endpoints */
2214 len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2;
2215 na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
2216 if (na == NULL)
2217 goto fail;
2218 WNA(ifp) = na;
2219 *na = *arg; /* copy everything, trust the driver to not pass junk */
2220 NETMAP_SET_CAPABLE(ifp);
2221 if (na->num_tx_rings == 0)
2222 na->num_tx_rings = num_queues;
2223 na->num_rx_rings = num_queues;
2224 na->refcount = na->na_single = na->na_multi = 0;
2225 /* Core lock initialized here, others after netmap_if_new. */
2226 mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
2931 len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2;
2932 na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
2933 if (na == NULL)
2934 goto fail;
2935 WNA(ifp) = na;
2936 *na = *arg; /* copy everything, trust the driver to not pass junk */
2937 NETMAP_SET_CAPABLE(ifp);
2938 if (na->num_tx_rings == 0)
2939 na->num_tx_rings = num_queues;
2940 na->num_rx_rings = num_queues;
2941 na->refcount = na->na_single = na->na_multi = 0;
2942 /* Core lock initialized here, others after netmap_if_new. */
2943 mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
2227 if (na->nm_lock == NULL) {
2228 ND("using default locks for %s", ifp->if_xname);
2229 na->nm_lock = netmap_lock_wrapper;
2230 }
2231#ifdef linux
2232 if (ifp->netdev_ops) {
2233 ND("netdev_ops %p", ifp->netdev_ops);
2234 /* prepare a clone of the netdev ops */
2235#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2236 na->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2237#else
2238 na->nm_ndo = *ifp->netdev_ops;
2239#endif
2240 }
2944#ifdef linux
2945 if (ifp->netdev_ops) {
2946 ND("netdev_ops %p", ifp->netdev_ops);
2947 /* prepare a clone of the netdev ops */
2948#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2949 na->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2950#else
2951 na->nm_ndo = *ifp->netdev_ops;
2952#endif
2953 }
2241 na->nm_ndo.ndo_start_xmit = linux_netmap_start;
2242#endif
2954 na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
2955#endif /* linux */
2956 na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem;
2243 if (!nma_is_vp(arg))
2244 netmap_attach_sw(ifp);
2245 D("success for %s", ifp->if_xname);
2246 return 0;
2247
2248fail:
2249 D("fail, arg %p ifp %p na %p", arg, ifp, na);
2250 netmap_detach(ifp);

--- 14 unchanged lines hidden (view full) ---

2265 return;
2266
2267 mtx_destroy(&na->core_lock);
2268
2269 if (na->tx_rings) { /* XXX should not happen */
2270 D("freeing leftover tx_rings");
2271 free(na->tx_rings, M_DEVBUF);
2272 }
2957 if (!nma_is_vp(arg))
2958 netmap_attach_sw(ifp);
2959 D("success for %s", ifp->if_xname);
2960 return 0;
2961
2962fail:
2963 D("fail, arg %p ifp %p na %p", arg, ifp, na);
2964 netmap_detach(ifp);

--- 14 unchanged lines hidden (view full) ---

2979 return;
2980
2981 mtx_destroy(&na->core_lock);
2982
2983 if (na->tx_rings) { /* XXX should not happen */
2984 D("freeing leftover tx_rings");
2985 free(na->tx_rings, M_DEVBUF);
2986 }
2987 if (na->na_flags & NAF_MEM_OWNER)
2988 netmap_mem_private_delete(na->nm_mem);
2273 bzero(na, sizeof(*na));
2274 WNA(ifp) = NULL;
2275 free(na, M_DEVBUF);
2276}
2277
2278
2279int
2989 bzero(na, sizeof(*na));
2990 WNA(ifp) = NULL;
2991 free(na, M_DEVBUF);
2992}
2993
2994
2995int
2280nm_bdg_flush(struct nm_bdg_fwd *ft, int n, struct netmap_adapter *na, u_int ring_nr);
2996nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
2997 struct netmap_adapter *na, u_int ring_nr);
2281
2998
2282/* we don't need to lock myself */
2283static int
2284bdg_netmap_start(struct ifnet *ifp, struct mbuf *m)
2285{
2286 struct netmap_adapter *na = SWNA(ifp);
2287 struct nm_bdg_fwd *ft = na->rx_rings[0].nkr_ft;
2288 char *buf = NMB(&na->rx_rings[0].ring->slot[0]);
2289 u_int len = MBUF_LEN(m);
2290
2999
2291 if (!na->na_bdg) /* SWNA is not configured to be attached */
2292 return EBUSY;
2293 m_copydata(m, 0, len, buf);
2294 ft->ft_flags = 0; // XXX could be indirect ?
2295 ft->ft_len = len;
2296 ft->ft_buf = buf;
2297 ft->ft_next = NM_BDG_BATCH; // XXX is it needed ?
2298 nm_bdg_flush(ft, 1, na, 0);
2299
2300 /* release the mbuf in either cases of success or failure. As an
2301 * alternative, put the mbuf in a free list and free the list
2302 * only when really necessary.
2303 */
2304 m_freem(m);
2305
2306 return (0);
2307}
2308
2309
2310/*
2311 * Intercept packets from the network stack and pass them
2312 * to netmap as incoming packets on the 'software' ring.
3000/*
3001 * Intercept packets from the network stack and pass them
3002 * to netmap as incoming packets on the 'software' ring.
2313 * We are not locked when called.
3003 * We rely on the OS to make sure that the ifp and na do not go
3004 * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3005 * In nm_register() or whenever there is a reinitialization,
3006 * we make sure to access the core lock and per-ring locks
3007 * so that IFCAP_NETMAP is visible here.
2314 */
2315int
3008 */
3009int
2316netmap_start(struct ifnet *ifp, struct mbuf *m)
3010netmap_transmit(struct ifnet *ifp, struct mbuf *m)
2317{
2318 struct netmap_adapter *na = NA(ifp);
3011{
3012 struct netmap_adapter *na = NA(ifp);
2319 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
3013 struct netmap_kring *kring;
2320 u_int i, len = MBUF_LEN(m);
3014 u_int i, len = MBUF_LEN(m);
2321 u_int error = EBUSY, lim = kring->nkr_num_slots - 1;
3015 u_int error = EBUSY, lim;
2322 struct netmap_slot *slot;
2323
3016 struct netmap_slot *slot;
3017
3018 // XXX [Linux] we do not need this lock
3019 // if we follow the down/configure/up protocol -gl
3020 // mtx_lock(&na->core_lock);
3021 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
3022 /* interface not in netmap mode anymore */
3023 error = ENXIO;
3024 goto done;
3025 }
3026
3027 kring = &na->rx_rings[na->num_rx_rings];
3028 lim = kring->nkr_num_slots - 1;
2324 if (netmap_verbose & NM_VERB_HOST)
2325 D("%s packet %d len %d from the stack", ifp->if_xname,
2326 kring->nr_hwcur + kring->nr_hwavail, len);
3029 if (netmap_verbose & NM_VERB_HOST)
3030 D("%s packet %d len %d from the stack", ifp->if_xname,
3031 kring->nr_hwcur + kring->nr_hwavail, len);
2327 if (len > NETMAP_BUF_SIZE) { /* too long for us */
3032 // XXX reconsider long packets if we handle fragments
3033 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
2328 D("%s from_host, drop packet size %d > %d", ifp->if_xname,
3034 D("%s from_host, drop packet size %d > %d", ifp->if_xname,
2329 len, NETMAP_BUF_SIZE);
2330 m_freem(m);
2331 return EINVAL;
3035 len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
3036 goto done;
2332 }
3037 }
2333 if (na->na_bdg)
2334 return bdg_netmap_start(ifp, m);
3038 if (SWNA(ifp)->na_bdg) {
3039 struct nm_bdg_fwd *ft;
3040 char *dst;
2335
3041
2336 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
3042 na = SWNA(ifp); /* we operate on the host port */
3043 ft = na->rx_rings[0].nkr_ft;
3044 dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]);
3045
3046 /* use slot 0 in the ft, there is nothing queued here */
3047 /* XXX we can save the copy calling m_copydata in nm_bdg_flush,
3048 * need a special flag for this.
3049 */
3050 m_copydata(m, 0, (int)len, dst);
3051 ft->ft_flags = 0;
3052 ft->ft_len = len;
3053 ft->ft_buf = dst;
3054 ft->ft_next = NM_FT_NULL;
3055 ft->ft_frags = 1;
3056 if (netmap_verbose & NM_VERB_HOST)
3057 RD(5, "pkt %p size %d to bridge port %d",
3058 dst, len, na->bdg_port);
3059 nm_bdg_flush(ft, 1, na, 0);
3060 na = NA(ifp); /* back to the regular object/lock */
3061 error = 0;
3062 goto done;
3063 }
3064
3065 /* protect against other instances of netmap_transmit,
3066 * and userspace invocations of rxsync().
3067 * XXX could reuse core_lock
3068 */
3069 // XXX [Linux] there can be no other instances of netmap_transmit
3070 // on this same ring, but we still need this lock to protect
3071 // concurrent access from netmap_sw_to_nic() -gl
3072 mtx_lock(&kring->q_lock);
2337 if (kring->nr_hwavail >= lim) {
2338 if (netmap_verbose)
2339 D("stack ring %s full\n", ifp->if_xname);
3073 if (kring->nr_hwavail >= lim) {
3074 if (netmap_verbose)
3075 D("stack ring %s full\n", ifp->if_xname);
2340 goto done; /* no space */
3076 } else {
3077 /* compute the insert position */
3078 i = nm_kr_rxpos(kring);
3079 slot = &kring->ring->slot[i];
3080 m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot));
3081 slot->len = len;
3082 slot->flags = kring->nkr_slot_flags;
3083 kring->nr_hwavail++;
3084 if (netmap_verbose & NM_VERB_HOST)
3085 D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
3086 selwakeuppri(&kring->si, PI_NET);
3087 error = 0;
2341 }
3088 }
3089 mtx_unlock(&kring->q_lock);
2342
3090
2343 /* compute the insert position */
2344 i = kring->nr_hwcur + kring->nr_hwavail;
2345 if (i > lim)
2346 i -= lim + 1;
2347 slot = &kring->ring->slot[i];
2348 m_copydata(m, 0, len, NMB(slot));
2349 slot->len = len;
2350 slot->flags = kring->nkr_slot_flags;
2351 kring->nr_hwavail++;
2352 if (netmap_verbose & NM_VERB_HOST)
2353 D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
2354 selwakeuppri(&kring->si, PI_NET);
2355 error = 0;
2356done:
3091done:
2357 na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
3092 // mtx_unlock(&na->core_lock);
2358
2359 /* release the mbuf in either cases of success or failure. As an
2360 * alternative, put the mbuf in a free list and free the list
2361 * only when really necessary.
2362 */
2363 m_freem(m);
2364
2365 return (error);
2366}
2367
2368
2369/*
2370 * netmap_reset() is called by the driver routines when reinitializing
2371 * a ring. The driver is in charge of locking to protect the kring.
2372 * If netmap mode is not set just return NULL.
2373 */
2374struct netmap_slot *
3093
3094 /* release the mbuf in either cases of success or failure. As an
3095 * alternative, put the mbuf in a free list and free the list
3096 * only when really necessary.
3097 */
3098 m_freem(m);
3099
3100 return (error);
3101}
3102
3103
3104/*
3105 * netmap_reset() is called by the driver routines when reinitializing
3106 * a ring. The driver is in charge of locking to protect the kring.
3107 * If netmap mode is not set just return NULL.
3108 */
3109struct netmap_slot *
2375netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
3110netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
2376 u_int new_cur)
2377{
2378 struct netmap_kring *kring;
2379 int new_hwofs, lim;
2380
3111 u_int new_cur)
3112{
3113 struct netmap_kring *kring;
3114 int new_hwofs, lim;
3115
2381 if (na == NULL)
3116 if (na == NULL) {
3117 D("NULL na, should not happen");
2382 return NULL; /* no netmap support here */
3118 return NULL; /* no netmap support here */
2383 if (!(na->ifp->if_capenable & IFCAP_NETMAP))
3119 }
3120 if (!(na->ifp->if_capenable & IFCAP_NETMAP)) {
3121 D("interface not in netmap mode");
2384 return NULL; /* nothing to reinitialize */
3122 return NULL; /* nothing to reinitialize */
3123 }
2385
3124
3125 /* XXX note- in the new scheme, we are not guaranteed to be
3126 * under lock (e.g. when called on a device reset).
3127 * In this case, we should set a flag and do not trust too
3128 * much the values. In practice: TODO
3129 * - set a RESET flag somewhere in the kring
3130 * - do the processing in a conservative way
3131 * - let the *sync() fixup at the end.
3132 */
2386 if (tx == NR_TX) {
2387 if (n >= na->num_tx_rings)
2388 return NULL;
2389 kring = na->tx_rings + n;
2390 new_hwofs = kring->nr_hwcur - new_cur;
2391 } else {
2392 if (n >= na->num_rx_rings)
2393 return NULL;
2394 kring = na->rx_rings + n;
2395 new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
2396 }
2397 lim = kring->nkr_num_slots - 1;
2398 if (new_hwofs > lim)
2399 new_hwofs -= lim + 1;
2400
3133 if (tx == NR_TX) {
3134 if (n >= na->num_tx_rings)
3135 return NULL;
3136 kring = na->tx_rings + n;
3137 new_hwofs = kring->nr_hwcur - new_cur;
3138 } else {
3139 if (n >= na->num_rx_rings)
3140 return NULL;
3141 kring = na->rx_rings + n;
3142 new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
3143 }
3144 lim = kring->nkr_num_slots - 1;
3145 if (new_hwofs > lim)
3146 new_hwofs -= lim + 1;
3147
2401 /* Alwayws set the new offset value and realign the ring. */
3148 /* Always set the new offset value and realign the ring. */
3149 D("%s hwofs %d -> %d, hwavail %d -> %d",
3150 tx == NR_TX ? "TX" : "RX",
3151 kring->nkr_hwofs, new_hwofs,
3152 kring->nr_hwavail,
3153 tx == NR_TX ? lim : kring->nr_hwavail);
2402 kring->nkr_hwofs = new_hwofs;
2403 if (tx == NR_TX)
3154 kring->nkr_hwofs = new_hwofs;
3155 if (tx == NR_TX)
2404 kring->nr_hwavail = kring->nkr_num_slots - 1;
2405 ND(10, "new hwofs %d on %s %s[%d]",
2406 kring->nkr_hwofs, na->ifp->if_xname,
2407 tx == NR_TX ? "TX" : "RX", n);
3156 kring->nr_hwavail = lim;
2408
2409#if 0 // def linux
2410 /* XXX check that the mappings are correct */
2411 /* need ring_nr, adapter->pdev, direction */
2412 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
2413 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
2414 D("error mapping rx netmap buffer %d", i);
2415 // XXX fix error handling
2416 }
2417
2418#endif /* linux */
2419 /*
3157
3158#if 0 // def linux
3159 /* XXX check that the mappings are correct */
3160 /* need ring_nr, adapter->pdev, direction */
3161 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
3162 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
3163 D("error mapping rx netmap buffer %d", i);
3164 // XXX fix error handling
3165 }
3166
3167#endif /* linux */
3168 /*
2420 * Wakeup on the individual and global lock
3169 * Wakeup on the individual and global selwait
2421 * We do the wakeup here, but the ring is not yet reconfigured.
2422 * However, we are under lock so there are no races.
2423 */
2424 selwakeuppri(&kring->si, PI_NET);
2425 selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
2426 return kring->ring->slot;
2427}
2428
2429
3170 * We do the wakeup here, but the ring is not yet reconfigured.
3171 * However, we are under lock so there are no races.
3172 */
3173 selwakeuppri(&kring->si, PI_NET);
3174 selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
3175 return kring->ring->slot;
3176}
3177
3178
2430/* returns the next position in the ring */
3179/*
3180 * Grab packets from a kring, move them into the ft structure
3181 * associated to the tx (input) port. Max one instance per port,
3182 * filtered on input (ioctl, poll or XXX).
3183 * Returns the next position in the ring.
3184 */
2431static int
2432nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr,
2433 struct netmap_kring *kring, u_int end)
2434{
2435 struct netmap_ring *ring = kring->ring;
3185static int
3186nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr,
3187 struct netmap_kring *kring, u_int end)
3188{
3189 struct netmap_ring *ring = kring->ring;
2436 struct nm_bdg_fwd *ft = kring->nkr_ft;
3190 struct nm_bdg_fwd *ft;
2437 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
2438 u_int ft_i = 0; /* start from 0 */
3191 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
3192 u_int ft_i = 0; /* start from 0 */
3193 u_int frags = 1; /* how many frags ? */
3194 struct nm_bridge *b = na->na_bdg;
2439
3195
2440 for (; likely(j != end); j = unlikely(j == lim) ? 0 : j+1) {
3196 /* To protect against modifications to the bridge we acquire a
3197 * shared lock, waiting if we can sleep (if the source port is
3198 * attached to a user process) or with a trylock otherwise (NICs).
3199 */
3200 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3201 if (na->na_flags & NAF_BDG_MAYSLEEP)
3202 BDG_RLOCK(b);
3203 else if (!BDG_RTRYLOCK(b))
3204 return 0;
3205 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3206 ft = kring->nkr_ft;
3207
3208 for (; likely(j != end); j = nm_next(j, lim)) {
2441 struct netmap_slot *slot = &ring->slot[j];
3209 struct netmap_slot *slot = &ring->slot[j];
2442 char *buf = NMB(slot);
2443 int len = ft[ft_i].ft_len = slot->len;
3210 char *buf;
2444
3211
3212 ft[ft_i].ft_len = slot->len;
2445 ft[ft_i].ft_flags = slot->flags;
2446
2447 ND("flags is 0x%x", slot->flags);
2448 /* this slot goes into a list so initialize the link field */
3213 ft[ft_i].ft_flags = slot->flags;
3214
3215 ND("flags is 0x%x", slot->flags);
3216 /* this slot goes into a list so initialize the link field */
2449 ft[ft_i].ft_next = NM_BDG_BATCH; /* equivalent to NULL */
2450 if (unlikely(len < 14))
2451 continue;
3217 ft[ft_i].ft_next = NM_FT_NULL;
2452 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
3218 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
2453 *((void **)buf) : buf;
3219 (void *)slot->ptr : BDG_NMB(na->nm_mem, slot);
2454 prefetch(buf);
3220 prefetch(buf);
2455 if (unlikely(++ft_i == netmap_bridge))
3221 ++ft_i;
3222 if (slot->flags & NS_MOREFRAG) {
3223 frags++;
3224 continue;
3225 }
3226 if (unlikely(netmap_verbose && frags > 1))
3227 RD(5, "%d frags at %d", frags, ft_i - frags);
3228 ft[ft_i - frags].ft_frags = frags;
3229 frags = 1;
3230 if (unlikely((int)ft_i >= bridge_batch))
2456 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
2457 }
3231 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3232 }
3233 if (frags > 1) {
3234 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
3235 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
3236 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
3237 ft[ft_i - frags].ft_frags = frags - 1;
3238 }
2458 if (ft_i)
2459 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3239 if (ft_i)
3240 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3241 BDG_RUNLOCK(b);
2460 return j;
2461}
2462
2463
2464/*
3242 return j;
3243}
3244
3245
3246/*
2465 * Pass packets from nic to the bridge. Must be called with
2466 * proper locks on the source interface.
3247 * Pass packets from nic to the bridge.
3248 * XXX TODO check locking: this is called from the interrupt
3249 * handler so we should make sure that the interface is not
3250 * disconnected while passing down an interrupt.
3251 *
2467 * Note, no user process can access this NIC so we can ignore
2468 * the info in the 'ring'.
2469 */
2470static void
2471netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr)
2472{
2473 struct netmap_adapter *na = NA(ifp);
2474 struct netmap_kring *kring = &na->rx_rings[ring_nr];
2475 struct netmap_ring *ring = kring->ring;
3252 * Note, no user process can access this NIC so we can ignore
3253 * the info in the 'ring'.
3254 */
3255static void
3256netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr)
3257{
3258 struct netmap_adapter *na = NA(ifp);
3259 struct netmap_kring *kring = &na->rx_rings[ring_nr];
3260 struct netmap_ring *ring = kring->ring;
2476 int j, k, lim = kring->nkr_num_slots - 1;
3261 u_int j, k;
2477
3262
2478 /* fetch packets that have arrived */
2479 na->nm_rxsync(ifp, ring_nr, 0);
2480 /* XXX we don't count reserved, but it should be 0 */
2481 j = kring->nr_hwcur;
2482 k = j + kring->nr_hwavail;
2483 if (k > lim)
2484 k -= lim + 1;
2485 if (k == j && netmap_verbose) {
3263 /* make sure that only one thread is ever in here,
3264 * after which we can unlock. Probably unnecessary XXX.
3265 */
3266 if (nm_kr_tryget(kring))
3267 return;
3268 /* fetch packets that have arrived.
3269 * XXX maybe do this in a loop ?
3270 */
3271 if (na->nm_rxsync(ifp, ring_nr, 0))
3272 goto put_out;
3273 if (kring->nr_hwavail == 0 && netmap_verbose) {
2486 D("how strange, interrupt with no packets on %s",
2487 ifp->if_xname);
3274 D("how strange, interrupt with no packets on %s",
3275 ifp->if_xname);
2488 return;
3276 goto put_out;
2489 }
3277 }
3278 k = nm_kr_rxpos(kring);
2490
2491 j = nm_bdg_preflush(na, ring_nr, kring, k);
2492
2493 /* we consume everything, but we cannot update kring directly
2494 * because the nic may have destroyed the info in the NIC ring.
2495 * So we need to call rxsync again to restore it.
2496 */
2497 ring->cur = j;
2498 ring->avail = 0;
2499 na->nm_rxsync(ifp, ring_nr, 0);
3279
3280 j = nm_bdg_preflush(na, ring_nr, kring, k);
3281
3282 /* we consume everything, but we cannot update kring directly
3283 * because the nic may have destroyed the info in the NIC ring.
3284 * So we need to call rxsync again to restore it.
3285 */
3286 ring->cur = j;
3287 ring->avail = 0;
3288 na->nm_rxsync(ifp, ring_nr, 0);
3289
3290put_out:
3291 nm_kr_put(kring);
2500 return;
2501}
2502
2503
2504/*
3292 return;
3293}
3294
3295
3296/*
2505 * Default functions to handle rx/tx interrupts
2506 * we have 4 cases:
2507 * 1 ring, single lock:
2508 * lock(core); wake(i=0); unlock(core)
2509 * N rings, single lock:
2510 * lock(core); wake(i); wake(N+1) unlock(core)
2511 * 1 ring, separate locks: (i=0)
2512 * lock(i); wake(i); unlock(i)
2513 * N rings, separate locks:
2514 * lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core)
2515 * work_done is non-null on the RX path.
3297 * Default functions to handle rx/tx interrupts from a physical device.
3298 * "work_done" is non-null on the RX path, NULL for the TX path.
3299 * We rely on the OS to make sure that there is only one active
3300 * instance per queue, and that there is appropriate locking.
2516 *
3301 *
2517 * The 'q' argument also includes flag to tell whether the queue is
2518 * already locked on enter, and whether it should remain locked on exit.
2519 * This helps adapting to different defaults in drivers and OSes.
3302 * If the card is not in netmap mode, simply return 0,
3303 * so that the caller proceeds with regular processing.
3304 *
3305 * If the card is connected to a netmap file descriptor,
3306 * do a selwakeup on the individual queue, plus one on the global one
3307 * if needed (multiqueue card _and_ there are multiqueue listeners),
3308 * and return 1.
3309 *
3310 * Finally, if called on rx from an interface connected to a switch,
3311 * calls the proper forwarding routine, and return 1.
2520 */
2521int
3312 */
3313int
2522netmap_rx_irq(struct ifnet *ifp, int q, int *work_done)
3314netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
2523{
2524 struct netmap_adapter *na;
3315{
3316 struct netmap_adapter *na;
2525 struct netmap_kring *r;
2526 NM_SELINFO_T *main_wq;
2527 int locktype, unlocktype, nic_to_bridge, lock;
3317 struct netmap_kring *kring;
2528
2529 if (!(ifp->if_capenable & IFCAP_NETMAP))
2530 return 0;
2531
3318
3319 if (!(ifp->if_capenable & IFCAP_NETMAP))
3320 return 0;
3321
2532 lock = q & (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT);
2533 q = q & NETMAP_RING_MASK;
3322 q &= NETMAP_RING_MASK;
2534
3323
2535 ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
3324 if (netmap_verbose)
3325 RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
2536 na = NA(ifp);
2537 if (na->na_flags & NAF_SKIP_INTR) {
2538 ND("use regular interrupt");
2539 return 0;
2540 }
2541
2542 if (work_done) { /* RX path */
2543 if (q >= na->num_rx_rings)
2544 return 0; // not a physical queue
3326 na = NA(ifp);
3327 if (na->na_flags & NAF_SKIP_INTR) {
3328 ND("use regular interrupt");
3329 return 0;
3330 }
3331
3332 if (work_done) { /* RX path */
3333 if (q >= na->num_rx_rings)
3334 return 0; // not a physical queue
2545 r = na->rx_rings + q;
2546 r->nr_kflags |= NKR_PENDINTR;
2547 main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL;
2548 /* set a flag if the NIC is attached to a VALE switch */
2549 nic_to_bridge = (na->na_bdg != NULL);
2550 locktype = NETMAP_RX_LOCK;
2551 unlocktype = NETMAP_RX_UNLOCK;
3335 kring = na->rx_rings + q;
3336 kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ?
3337 if (na->na_bdg != NULL) {
3338 netmap_nic_to_bdg(ifp, q);
3339 } else {
3340 selwakeuppri(&kring->si, PI_NET);
3341 if (na->num_rx_rings > 1 /* or multiple listeners */ )
3342 selwakeuppri(&na->rx_si, PI_NET);
3343 }
3344 *work_done = 1; /* do not fire napi again */
2552 } else { /* TX path */
2553 if (q >= na->num_tx_rings)
2554 return 0; // not a physical queue
3345 } else { /* TX path */
3346 if (q >= na->num_tx_rings)
3347 return 0; // not a physical queue
2555 r = na->tx_rings + q;
2556 main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL;
2557 work_done = &q; /* dummy */
2558 nic_to_bridge = 0;
2559 locktype = NETMAP_TX_LOCK;
2560 unlocktype = NETMAP_TX_UNLOCK;
3348 kring = na->tx_rings + q;
3349 selwakeuppri(&kring->si, PI_NET);
3350 if (na->num_tx_rings > 1 /* or multiple listeners */ )
3351 selwakeuppri(&na->tx_si, PI_NET);
2561 }
3352 }
2562 if (na->separate_locks) {
2563 if (!(lock & NETMAP_LOCKED_ENTER))
2564 na->nm_lock(ifp, locktype, q);
2565 /* If a NIC is attached to a bridge, flush packets
2566 * (and no need to wakeup anyone). Otherwise, wakeup
2567 * possible processes waiting for packets.
2568 */
2569 if (nic_to_bridge)
2570 netmap_nic_to_bdg(ifp, q);
2571 else
2572 selwakeuppri(&r->si, PI_NET);
2573 na->nm_lock(ifp, unlocktype, q);
2574 if (main_wq && !nic_to_bridge) {
2575 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
2576 selwakeuppri(main_wq, PI_NET);
2577 na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
2578 }
2579 /* lock the queue again if requested */
2580 if (lock & NETMAP_LOCKED_EXIT)
2581 na->nm_lock(ifp, locktype, q);
2582 } else {
2583 if (!(lock & NETMAP_LOCKED_ENTER))
2584 na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
2585 if (nic_to_bridge)
2586 netmap_nic_to_bdg(ifp, q);
2587 else {
2588 selwakeuppri(&r->si, PI_NET);
2589 if (main_wq)
2590 selwakeuppri(main_wq, PI_NET);
2591 }
2592 if (!(lock & NETMAP_LOCKED_EXIT))
2593 na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
2594 }
2595 *work_done = 1; /* do not fire napi again */
2596 return 1;
2597}
2598
2599
2600#ifdef linux /* linux-specific routines */
2601
2602
2603/*

--- 17 unchanged lines hidden (view full) ---

2621#endif
2622 return netmap_poll((void *)pwait, events, (void *)file);
2623}
2624
2625
2626static int
2627linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
2628{
3353 return 1;
3354}
3355
3356
3357#ifdef linux /* linux-specific routines */
3358
3359
3360/*

--- 17 unchanged lines hidden (view full) ---

3378#endif
3379 return netmap_poll((void *)pwait, events, (void *)file);
3380}
3381
3382
3383static int
3384linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
3385{
2629 int lut_skip, i, j;
2630 int user_skip = 0;
2631 struct lut_entry *l_entry;
2632 int error = 0;
3386 int error = 0;
2633 unsigned long off, tomap;
3387 unsigned long off, va;
3388 vm_ooffset_t pa;
3389 struct netmap_priv_d *priv = f->private_data;
2634 /*
2635 * vma->vm_start: start of mapping user address space
2636 * vma->vm_end: end of the mapping user address space
2637 * vma->vm_pfoff: offset of first page in the device
2638 */
2639
2640 // XXX security checks
2641
3390 /*
3391 * vma->vm_start: start of mapping user address space
3392 * vma->vm_end: end of the mapping user address space
3393 * vma->vm_pfoff: offset of first page in the device
3394 */
3395
3396 // XXX security checks
3397
2642 error = netmap_get_memory(f->private_data);
3398 error = netmap_get_memory(priv);
2643 ND("get_memory returned %d", error);
2644 if (error)
2645 return -error;
2646
3399 ND("get_memory returned %d", error);
3400 if (error)
3401 return -error;
3402
2647 off = vma->vm_pgoff << PAGE_SHIFT; /* offset in bytes */
2648 tomap = vma->vm_end - vma->vm_start;
2649 for (i = 0; i < NETMAP_POOLS_NR; i++) { /* loop through obj_pools */
2650 const struct netmap_obj_pool *p = &nm_mem.pools[i];
2651 /*
2652 * In each pool memory is allocated in clusters
2653 * of size _clustsize, each containing clustentries
2654 * entries. For each object k we already store the
2655 * vtophys mapping in lut[k] so we use that, scanning
2656 * the lut[] array in steps of clustentries,
2657 * and we map each cluster (not individual pages,
2658 * it would be overkill -- XXX slow ? 20130415).
2659 */
2660
2661 /*
2662 * We interpret vm_pgoff as an offset into the whole
2663 * netmap memory, as if all clusters where contiguous.
2664 */
2665 for (lut_skip = 0, j = 0; j < p->_numclusters; j++, lut_skip += p->clustentries) {
2666 unsigned long paddr, mapsize;
2667 if (p->_clustsize <= off) {
2668 off -= p->_clustsize;
2669 continue;
2670 }
2671 l_entry = &p->lut[lut_skip]; /* first obj in the cluster */
2672 paddr = l_entry->paddr + off;
2673 mapsize = p->_clustsize - off;
2674 off = 0;
2675 if (mapsize > tomap)
2676 mapsize = tomap;
2677 ND("remap_pfn_range(%lx, %lx, %lx)",
2678 vma->vm_start + user_skip,
2679 paddr >> PAGE_SHIFT, mapsize);
2680 if (remap_pfn_range(vma, vma->vm_start + user_skip,
2681 paddr >> PAGE_SHIFT, mapsize,
2682 vma->vm_page_prot))
2683 return -EAGAIN; // XXX check return value
2684 user_skip += mapsize;
2685 tomap -= mapsize;
2686 if (tomap == 0)
2687 goto done;
2688 }
3403 if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) {
3404 ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end);
3405 return -EINVAL;
2689 }
3406 }
2690done:
2691
3407
3408 for (va = vma->vm_start, off = vma->vm_pgoff;
3409 va < vma->vm_end;
3410 va += PAGE_SIZE, off++)
3411 {
3412 pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT);
3413 if (pa == 0)
3414 return -EINVAL;
3415
3416 ND("va %lx pa %p", va, pa);
3417 error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
3418 if (error)
3419 return error;
3420 }
2692 return 0;
2693}
2694
2695
3421 return 0;
3422}
3423
3424
3425/*
3426 * This one is probably already protected by the netif lock XXX
3427 */
2696static netdev_tx_t
3428static netdev_tx_t
2697linux_netmap_start(struct sk_buff *skb, struct net_device *dev)
3429linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev)
2698{
3430{
2699 netmap_start(dev, skb);
3431 netmap_transmit(dev, skb);
2700 return (NETDEV_TX_OK);
2701}
2702
2703
3432 return (NETDEV_TX_OK);
3433}
3434
3435
2704#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) // XXX was 38
3436#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // XXX was 37
2705#define LIN_IOCTL_NAME .ioctl
2706int
2707linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
2708#else
2709#define LIN_IOCTL_NAME .unlocked_ioctl
2710long
2711linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
2712#endif
2713{
2714 int ret;
2715 struct nmreq nmr;
2716 bzero(&nmr, sizeof(nmr));
2717
3437#define LIN_IOCTL_NAME .ioctl
3438int
3439linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
3440#else
3441#define LIN_IOCTL_NAME .unlocked_ioctl
3442long
3443linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
3444#endif
3445{
3446 int ret;
3447 struct nmreq nmr;
3448 bzero(&nmr, sizeof(nmr));
3449
3450 if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) {
3451 data = 0; /* no argument required here */
3452 }
2718 if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
2719 return -EFAULT;
2720 ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
2721 if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
2722 return -EFAULT;
2723 return -ret;
2724}
2725

--- 61 unchanged lines hidden (view full) ---

2787EXPORT_SYMBOL(netmap_total_buffers); // index check
2788EXPORT_SYMBOL(netmap_buffer_base);
2789EXPORT_SYMBOL(netmap_reset); // ring init routines
2790EXPORT_SYMBOL(netmap_buf_size);
2791EXPORT_SYMBOL(netmap_rx_irq); // default irq handler
2792EXPORT_SYMBOL(netmap_no_pendintr); // XXX mitigation - should go away
2793EXPORT_SYMBOL(netmap_bdg_ctl); // bridge configuration routine
2794EXPORT_SYMBOL(netmap_bdg_learning); // the default lookup function
3453 if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
3454 return -EFAULT;
3455 ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
3456 if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
3457 return -EFAULT;
3458 return -ret;
3459}
3460

--- 61 unchanged lines hidden (view full) ---

3522EXPORT_SYMBOL(netmap_total_buffers); // index check
3523EXPORT_SYMBOL(netmap_buffer_base);
3524EXPORT_SYMBOL(netmap_reset); // ring init routines
3525EXPORT_SYMBOL(netmap_buf_size);
3526EXPORT_SYMBOL(netmap_rx_irq); // default irq handler
3527EXPORT_SYMBOL(netmap_no_pendintr); // XXX mitigation - should go away
3528EXPORT_SYMBOL(netmap_bdg_ctl); // bridge configuration routine
3529EXPORT_SYMBOL(netmap_bdg_learning); // the default lookup function
3530EXPORT_SYMBOL(netmap_disable_all_rings);
3531EXPORT_SYMBOL(netmap_enable_all_rings);
2795
2796
2797MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
2798MODULE_DESCRIPTION("The netmap packet I/O framework");
2799MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
2800
2801#else /* __FreeBSD__ */
2802
2803
2804static struct cdevsw netmap_cdevsw = {
2805 .d_version = D_VERSION,
2806 .d_name = "netmap",
2807 .d_open = netmap_open,
3532
3533
3534MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
3535MODULE_DESCRIPTION("The netmap packet I/O framework");
3536MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
3537
3538#else /* __FreeBSD__ */
3539
3540
3541static struct cdevsw netmap_cdevsw = {
3542 .d_version = D_VERSION,
3543 .d_name = "netmap",
3544 .d_open = netmap_open,
2808 .d_mmap = netmap_mmap,
2809 .d_mmap_single = netmap_mmap_single,
2810 .d_ioctl = netmap_ioctl,
2811 .d_poll = netmap_poll,
2812 .d_close = netmap_close,
2813};
2814#endif /* __FreeBSD__ */
2815
3545 .d_mmap_single = netmap_mmap_single,
3546 .d_ioctl = netmap_ioctl,
3547 .d_poll = netmap_poll,
3548 .d_close = netmap_close,
3549};
3550#endif /* __FreeBSD__ */
3551
2816#ifdef NM_BRIDGE
2817/*
2818 *---- support for virtual bridge -----
2819 */
2820
2821/* ----- FreeBSD if_bridge hash function ------- */
2822
2823/*
2824 * The following hash function is adapted from "Hash Functions" by Bob Jenkins

--- 32 unchanged lines hidden (view full) ---

2857}
2858
2859#undef mix
2860
2861
2862static int
2863bdg_netmap_reg(struct ifnet *ifp, int onoff)
2864{
3552/*
3553 *---- support for virtual bridge -----
3554 */
3555
3556/* ----- FreeBSD if_bridge hash function ------- */
3557
3558/*
3559 * The following hash function is adapted from "Hash Functions" by Bob Jenkins

--- 32 unchanged lines hidden (view full) ---

3592}
3593
3594#undef mix
3595
3596
3597static int
3598bdg_netmap_reg(struct ifnet *ifp, int onoff)
3599{
2865 // struct nm_bridge *b = NA(ifp)->na_bdg;
2866
2867 /* the interface is already attached to the bridge,
2868 * so we only need to toggle IFCAP_NETMAP.
3600 /* the interface is already attached to the bridge,
3601 * so we only need to toggle IFCAP_NETMAP.
2869 * Locking is not necessary (we are already under
2870 * NMA_LOCK, and the port is not in use during this call).
2871 */
3602 */
2872 /* BDG_WLOCK(b); */
2873 if (onoff) {
2874 ifp->if_capenable |= IFCAP_NETMAP;
2875 } else {
2876 ifp->if_capenable &= ~IFCAP_NETMAP;
2877 }
3603 if (onoff) {
3604 ifp->if_capenable |= IFCAP_NETMAP;
3605 } else {
3606 ifp->if_capenable &= ~IFCAP_NETMAP;
3607 }
2878 /* BDG_WUNLOCK(b); */
2879 return 0;
2880}
2881
2882
2883/*
2884 * Lookup function for a learning bridge.
2885 * Update the hash table with the source address,
2886 * and then returns the destination port index, and the
2887 * ring in *dst_ring (at the moment, always use ring 0)
2888 */
2889u_int
3608 return 0;
3609}
3610
3611
3612/*
3613 * Lookup function for a learning bridge.
3614 * Update the hash table with the source address,
3615 * and then returns the destination port index, and the
3616 * ring in *dst_ring (at the moment, always use ring 0)
3617 */
3618u_int
2890netmap_bdg_learning(char *buf, u_int len, uint8_t *dst_ring,
3619netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
2891 struct netmap_adapter *na)
2892{
2893 struct nm_hash_ent *ht = na->na_bdg->ht;
2894 uint32_t sh, dh;
2895 u_int dst, mysrc = na->bdg_port;
2896 uint64_t smac, dmac;
2897
3620 struct netmap_adapter *na)
3621{
3622 struct nm_hash_ent *ht = na->na_bdg->ht;
3623 uint32_t sh, dh;
3624 u_int dst, mysrc = na->bdg_port;
3625 uint64_t smac, dmac;
3626
3627 if (buf_len < 14) {
3628 D("invalid buf length %d", buf_len);
3629 return NM_BDG_NOPORT;
3630 }
2898 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
2899 smac = le64toh(*(uint64_t *)(buf + 4));
2900 smac >>= 16;
2901
2902 /*
2903 * The hash is somewhat expensive, there might be some
2904 * worthwhile optimizations here.
2905 */
2906 if ((buf[6] & 1) == 0) { /* valid src */
2907 uint8_t *s = buf+6;
3631 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
3632 smac = le64toh(*(uint64_t *)(buf + 4));
3633 smac >>= 16;
3634
3635 /*
3636 * The hash is somewhat expensive, there might be some
3637 * worthwhile optimizations here.
3638 */
3639 if ((buf[6] & 1) == 0) { /* valid src */
3640 uint8_t *s = buf+6;
2908 sh = nm_bridge_rthash(buf+6); // XXX hash of source
3641 sh = nm_bridge_rthash(s); // XXX hash of source
2909 /* update source port forwarding entry */
2910 ht[sh].mac = smac; /* XXX expire ? */
2911 ht[sh].ports = mysrc;
2912 if (netmap_verbose)
2913 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
2914 s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
2915 }
2916 dst = NM_BDG_BROADCAST;

--- 9 unchanged lines hidden (view full) ---

2926}
2927
2928
2929/*
2930 * This flush routine supports only unicast and broadcast but a large
2931 * number of ports, and lets us replace the learn and dispatch functions.
2932 */
2933int
3642 /* update source port forwarding entry */
3643 ht[sh].mac = smac; /* XXX expire ? */
3644 ht[sh].ports = mysrc;
3645 if (netmap_verbose)
3646 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
3647 s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
3648 }
3649 dst = NM_BDG_BROADCAST;

--- 9 unchanged lines hidden (view full) ---

3659}
3660
3661
3662/*
3663 * This flush routine supports only unicast and broadcast but a large
3664 * number of ports, and lets us replace the learn and dispatch functions.
3665 */
3666int
2934nm_bdg_flush(struct nm_bdg_fwd *ft, int n, struct netmap_adapter *na,
3667nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na,
2935 u_int ring_nr)
2936{
2937 struct nm_bdg_q *dst_ents, *brddst;
2938 uint16_t num_dsts = 0, *dsts;
2939 struct nm_bridge *b = na->na_bdg;
3668 u_int ring_nr)
3669{
3670 struct nm_bdg_q *dst_ents, *brddst;
3671 uint16_t num_dsts = 0, *dsts;
3672 struct nm_bridge *b = na->na_bdg;
2940 u_int i, me = na->bdg_port;
3673 u_int i, j, me = na->bdg_port;
2941
3674
2942 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH);
3675 /*
3676 * The work area (pointed by ft) is followed by an array of
3677 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
3678 * queues per port plus one for the broadcast traffic.
3679 * Then we have an array of destination indexes.
3680 */
3681 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
2943 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
2944
3682 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
3683
2945 BDG_RLOCK(b);
2946
2947 /* first pass: find a destination */
2948 for (i = 0; likely(i < n); i++) {
2949 uint8_t *buf = ft[i].ft_buf;
2950 uint8_t dst_ring = ring_nr;
3684 /* first pass: find a destination for each packet in the batch */
3685 for (i = 0; likely(i < n); i += ft[i].ft_frags) {
3686 uint8_t dst_ring = ring_nr; /* default, same ring as origin */
2951 uint16_t dst_port, d_i;
2952 struct nm_bdg_q *d;
2953
3687 uint16_t dst_port, d_i;
3688 struct nm_bdg_q *d;
3689
2954 dst_port = b->nm_bdg_lookup(buf, ft[i].ft_len, &dst_ring, na);
2955 if (dst_port == NM_BDG_NOPORT) {
3690 ND("slot %d frags %d", i, ft[i].ft_frags);
3691 dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
3692 &dst_ring, na);
3693 if (netmap_verbose > 255)
3694 RD(5, "slot %d port %d -> %d", i, me, dst_port);
3695 if (dst_port == NM_BDG_NOPORT)
2956 continue; /* this packet is identified to be dropped */
3696 continue; /* this packet is identified to be dropped */
2957 } else if (unlikely(dst_port > NM_BDG_MAXPORTS)) {
3697 else if (unlikely(dst_port > NM_BDG_MAXPORTS))
2958 continue;
3698 continue;
2959 } else if (dst_port == NM_BDG_BROADCAST) {
3699 else if (dst_port == NM_BDG_BROADCAST)
2960 dst_ring = 0; /* broadcasts always go to ring 0 */
3700 dst_ring = 0; /* broadcasts always go to ring 0 */
2961 } else if (unlikely(dst_port == me ||
2962 !BDG_GET_VAR(b->bdg_ports[dst_port]))) {
3701 else if (unlikely(dst_port == me ||
3702 !b->bdg_ports[dst_port]))
2963 continue;
3703 continue;
2964 }
2965
2966 /* get a position in the scratch pad */
2967 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
2968 d = dst_ents + d_i;
3704
3705 /* get a position in the scratch pad */
3706 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
3707 d = dst_ents + d_i;
2969 if (d->bq_head == NM_BDG_BATCH) { /* new destination */
3708
3709 /* append the first fragment to the list */
3710 if (d->bq_head == NM_FT_NULL) { /* new destination */
2970 d->bq_head = d->bq_tail = i;
2971 /* remember this position to be scanned later */
2972 if (dst_port != NM_BDG_BROADCAST)
2973 dsts[num_dsts++] = d_i;
2974 } else {
2975 ft[d->bq_tail].ft_next = i;
2976 d->bq_tail = i;
2977 }
3711 d->bq_head = d->bq_tail = i;
3712 /* remember this position to be scanned later */
3713 if (dst_port != NM_BDG_BROADCAST)
3714 dsts[num_dsts++] = d_i;
3715 } else {
3716 ft[d->bq_tail].ft_next = i;
3717 d->bq_tail = i;
3718 }
3719 d->bq_len += ft[i].ft_frags;
2978 }
2979
3720 }
3721
2980 /* if there is a broadcast, set ring 0 of all ports to be scanned
2981 * XXX This would be optimized by recording the highest index of active
2982 * ports.
3722 /*
3723 * Broadcast traffic goes to ring 0 on all destinations.
3724 * So we need to add these rings to the list of ports to scan.
3725 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
3726 * expensive. We should keep a compact list of active destinations
3727 * so we could shorten this loop.
2983 */
2984 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
3728 */
3729 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
2985 if (brddst->bq_head != NM_BDG_BATCH) {
2986 for (i = 0; likely(i < NM_BDG_MAXPORTS); i++) {
2987 uint16_t d_i = i * NM_BDG_MAXRINGS;
2988 if (unlikely(i == me) || !BDG_GET_VAR(b->bdg_ports[i]))
3730 if (brddst->bq_head != NM_FT_NULL) {
3731 for (j = 0; likely(j < b->bdg_active_ports); j++) {
3732 uint16_t d_i;
3733 i = b->bdg_port_index[j];
3734 if (unlikely(i == me))
2989 continue;
3735 continue;
2990 else if (dst_ents[d_i].bq_head == NM_BDG_BATCH)
3736 d_i = i * NM_BDG_MAXRINGS;
3737 if (dst_ents[d_i].bq_head == NM_FT_NULL)
2991 dsts[num_dsts++] = d_i;
2992 }
2993 }
2994
3738 dsts[num_dsts++] = d_i;
3739 }
3740 }
3741
3742 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
2995 /* second pass: scan destinations (XXX will be modular somehow) */
2996 for (i = 0; i < num_dsts; i++) {
2997 struct ifnet *dst_ifp;
2998 struct netmap_adapter *dst_na;
2999 struct netmap_kring *kring;
3000 struct netmap_ring *ring;
3001 u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next;
3743 /* second pass: scan destinations (XXX will be modular somehow) */
3744 for (i = 0; i < num_dsts; i++) {
3745 struct ifnet *dst_ifp;
3746 struct netmap_adapter *dst_na;
3747 struct netmap_kring *kring;
3748 struct netmap_ring *ring;
3749 u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next;
3002 int howmany, retry = netmap_txsync_retry;
3750 u_int needed, howmany;
3751 int retry = netmap_txsync_retry;
3003 struct nm_bdg_q *d;
3752 struct nm_bdg_q *d;
3753 uint32_t my_start = 0, lease_idx = 0;
3754 int nrings;
3004
3005 d_i = dsts[i];
3755
3756 d_i = dsts[i];
3757 ND("second pass %d port %d", i, d_i);
3006 d = dst_ents + d_i;
3758 d = dst_ents + d_i;
3007 dst_na = BDG_GET_VAR(b->bdg_ports[d_i/NM_BDG_MAXRINGS]);
3759 // XXX fix the division
3760 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
3008 /* protect from the lookup function returning an inactive
3009 * destination port
3010 */
3011 if (unlikely(dst_na == NULL))
3761 /* protect from the lookup function returning an inactive
3762 * destination port
3763 */
3764 if (unlikely(dst_na == NULL))
3012 continue;
3013 else if (dst_na->na_flags & NAF_SW_ONLY)
3014 continue;
3765 goto cleanup;
3766 if (dst_na->na_flags & NAF_SW_ONLY)
3767 goto cleanup;
3015 dst_ifp = dst_na->ifp;
3016 /*
3017 * The interface may be in !netmap mode in two cases:
3018 * - when na is attached but not activated yet;
3019 * - when na is being deactivated but is still attached.
3020 */
3768 dst_ifp = dst_na->ifp;
3769 /*
3770 * The interface may be in !netmap mode in two cases:
3771 * - when na is attached but not activated yet;
3772 * - when na is being deactivated but is still attached.
3773 */
3021 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP)))
3022 continue;
3774 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
3775 ND("not in netmap mode!");
3776 goto cleanup;
3777 }
3023
3024 /* there is at least one either unicast or broadcast packet */
3025 brd_next = brddst->bq_head;
3026 next = d->bq_head;
3778
3779 /* there is at least one either unicast or broadcast packet */
3780 brd_next = brddst->bq_head;
3781 next = d->bq_head;
3782 /* we need to reserve this many slots. If fewer are
3783 * available, some packets will be dropped.
3784 * Packets may have multiple fragments, so we may not use
3785 * there is a chance that we may not use all of the slots
3786 * we have claimed, so we will need to handle the leftover
3787 * ones when we regain the lock.
3788 */
3789 needed = d->bq_len + brddst->bq_len;
3027
3028 is_vp = nma_is_vp(dst_na);
3790
3791 is_vp = nma_is_vp(dst_na);
3792 ND(5, "pass 2 dst %d is %x %s",
3793 i, d_i, is_vp ? "virtual" : "nic/host");
3029 dst_nr = d_i & (NM_BDG_MAXRINGS-1);
3030 if (is_vp) { /* virtual port */
3794 dst_nr = d_i & (NM_BDG_MAXRINGS-1);
3795 if (is_vp) { /* virtual port */
3031 if (dst_nr >= dst_na->num_rx_rings)
3032 dst_nr = dst_nr % dst_na->num_rx_rings;
3033 kring = &dst_na->rx_rings[dst_nr];
3034 ring = kring->ring;
3035 lim = kring->nkr_num_slots - 1;
3036 dst_na->nm_lock(dst_ifp, NETMAP_RX_LOCK, dst_nr);
3037 j = kring->nr_hwcur + kring->nr_hwavail;
3038 if (j > lim)
3039 j -= kring->nkr_num_slots;
3040 howmany = lim - kring->nr_hwavail;
3041 } else { /* hw or sw adapter */
3042 if (dst_nr >= dst_na->num_tx_rings)
3043 dst_nr = dst_nr % dst_na->num_tx_rings;
3044 kring = &dst_na->tx_rings[dst_nr];
3045 ring = kring->ring;
3046 lim = kring->nkr_num_slots - 1;
3047 dst_na->nm_lock(dst_ifp, NETMAP_TX_LOCK, dst_nr);
3796 nrings = dst_na->num_rx_rings;
3797 } else {
3798 nrings = dst_na->num_tx_rings;
3799 }
3800 if (dst_nr >= nrings)
3801 dst_nr = dst_nr % nrings;
3802 kring = is_vp ? &dst_na->rx_rings[dst_nr] :
3803 &dst_na->tx_rings[dst_nr];
3804 ring = kring->ring;
3805 lim = kring->nkr_num_slots - 1;
3806
3048retry:
3807retry:
3808
3809 /* reserve the buffers in the queue and an entry
3810 * to report completion, and drop lock.
3811 * XXX this might become a helper function.
3812 */
3813 mtx_lock(&kring->q_lock);
3814 if (kring->nkr_stopped) {
3815 mtx_unlock(&kring->q_lock);
3816 goto cleanup;
3817 }
3818 /* on physical interfaces, do a txsync to recover
3819 * slots for packets already transmitted.
3820 * XXX maybe we could be optimistic and rely on a retry
3821 * in case of failure.
3822 */
3823 if (nma_is_hw(dst_na)) {
3049 dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3824 dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3050 /* see nm_bdg_flush() */
3051 j = kring->nr_hwcur;
3052 howmany = kring->nr_hwavail;
3053 }
3825 }
3054 while (howmany-- > 0) {
3826 my_start = j = kring->nkr_hwlease;
3827 howmany = nm_kr_space(kring, is_vp);
3828 if (needed < howmany)
3829 howmany = needed;
3830 lease_idx = nm_kr_lease(kring, howmany, is_vp);
3831 mtx_unlock(&kring->q_lock);
3832
3833 /* only retry if we need more than available slots */
3834 if (retry && needed <= howmany)
3835 retry = 0;
3836
3837 /* copy to the destination queue */
3838 while (howmany > 0) {
3055 struct netmap_slot *slot;
3839 struct netmap_slot *slot;
3056 struct nm_bdg_fwd *ft_p;
3840 struct nm_bdg_fwd *ft_p, *ft_end;
3841 u_int cnt;
3057
3842
3058 /* our 'NULL' is always higher than valid indexes
3843 /* find the queue from which we pick next packet.
3844 * NM_FT_NULL is always higher than valid indexes
3059 * so we never dereference it if the other list
3845 * so we never dereference it if the other list
3060 * has packets (and if both are NULL we never
3846 * has packets (and if both are empty we never
3061 * get here).
3062 */
3063 if (next < brd_next) {
3064 ft_p = ft + next;
3065 next = ft_p->ft_next;
3847 * get here).
3848 */
3849 if (next < brd_next) {
3850 ft_p = ft + next;
3851 next = ft_p->ft_next;
3066 ND("j %d uni %d next %d %d",
3067 j, ft_p - ft, next, brd_next);
3068 } else { /* insert broadcast */
3069 ft_p = ft + brd_next;
3070 brd_next = ft_p->ft_next;
3852 } else { /* insert broadcast */
3853 ft_p = ft + brd_next;
3854 brd_next = ft_p->ft_next;
3071 ND("j %d brd %d next %d %d",
3072 j, ft_p - ft, next, brd_next);
3073 }
3855 }
3074 slot = &ring->slot[j];
3075 ND("send %d %d bytes at %s:%d", i, ft_p->ft_len, dst_ifp->if_xname, j);
3076 if (ft_p->ft_flags & NS_INDIRECT) {
3077 ND("copying from INDIRECT source");
3078 copyin(ft_p->ft_buf, NMB(slot),
3079 (ft_p->ft_len + 63) & ~63);
3080 } else {
3081 pkt_copy(ft_p->ft_buf, NMB(slot), ft_p->ft_len);
3082 }
3083 slot->len = ft_p->ft_len;
3084 j = unlikely(j == lim) ? 0: j + 1; /* XXX to be macro-ed */
3085 sent++;
3856 cnt = ft_p->ft_frags; // cnt > 0
3857 if (unlikely(cnt > howmany))
3858 break; /* no more space */
3859 howmany -= cnt;
3860 if (netmap_verbose && cnt > 1)
3861 RD(5, "rx %d frags to %d", cnt, j);
3862 ft_end = ft_p + cnt;
3863 do {
3864 void *dst, *src = ft_p->ft_buf;
3865 size_t len = (ft_p->ft_len + 63) & ~63;
3866
3867 slot = &ring->slot[j];
3868 dst = BDG_NMB(dst_na->nm_mem, slot);
3869 /* round to a multiple of 64 */
3870
3871 ND("send %d %d bytes at %s:%d",
3872 i, ft_p->ft_len, dst_ifp->if_xname, j);
3873 if (ft_p->ft_flags & NS_INDIRECT) {
3874 if (copyin(src, dst, len)) {
3875 // invalid user pointer, pretend len is 0
3876 ft_p->ft_len = 0;
3877 }
3878 } else {
3879 //memcpy(dst, src, len);
3880 pkt_copy(src, dst, (int)len);
3881 }
3882 slot->len = ft_p->ft_len;
3883 slot->flags = (cnt << 8)| NS_MOREFRAG;
3884 j = nm_next(j, lim);
3885 ft_p++;
3886 sent++;
3887 } while (ft_p != ft_end);
3888 slot->flags = (cnt << 8); /* clear flag on last entry */
3086 /* are we done ? */
3889 /* are we done ? */
3087 if (next == NM_BDG_BATCH && brd_next == NM_BDG_BATCH)
3890 if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
3088 break;
3089 }
3891 break;
3892 }
3090 if (netmap_verbose && (howmany < 0))
3091 D("rx ring full on %s", dst_ifp->if_xname);
3092 if (is_vp) {
3093 if (sent) {
3094 kring->nr_hwavail += sent;
3095 selwakeuppri(&kring->si, PI_NET);
3893 {
3894 /* current position */
3895 uint32_t *p = kring->nkr_leases; /* shorthand */
3896 uint32_t update_pos;
3897 int still_locked = 1;
3898
3899 mtx_lock(&kring->q_lock);
3900 if (unlikely(howmany > 0)) {
3901 /* not used all bufs. If i am the last one
3902 * i can recover the slots, otherwise must
3903 * fill them with 0 to mark empty packets.
3904 */
3905 ND("leftover %d bufs", howmany);
3906 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
3907 /* yes i am the last one */
3908 ND("roll back nkr_hwlease to %d", j);
3909 kring->nkr_hwlease = j;
3910 } else {
3911 while (howmany-- > 0) {
3912 ring->slot[j].len = 0;
3913 ring->slot[j].flags = 0;
3914 j = nm_next(j, lim);
3915 }
3096 }
3916 }
3097 dst_na->nm_lock(dst_ifp, NETMAP_RX_UNLOCK, dst_nr);
3098 } else {
3099 if (sent) {
3100 ring->avail -= sent;
3917 }
3918 p[lease_idx] = j; /* report I am done */
3919
3920 update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur;
3921
3922 if (my_start == update_pos) {
3923 /* all slots before my_start have been reported,
3924 * so scan subsequent leases to see if other ranges
3925 * have been completed, and to a selwakeup or txsync.
3926 */
3927 while (lease_idx != kring->nkr_lease_idx &&
3928 p[lease_idx] != NR_NOSLOT) {
3929 j = p[lease_idx];
3930 p[lease_idx] = NR_NOSLOT;
3931 lease_idx = nm_next(lease_idx, lim);
3932 }
3933 /* j is the new 'write' position. j != my_start
3934 * means there are new buffers to report
3935 */
3936 if (likely(j != my_start)) {
3937 if (is_vp) {
3938 uint32_t old_avail = kring->nr_hwavail;
3939
3940 kring->nr_hwavail = (j >= kring->nr_hwcur) ?
3941 j - kring->nr_hwcur :
3942 j + lim + 1 - kring->nr_hwcur;
3943 if (kring->nr_hwavail < old_avail) {
3944 D("avail shrink %d -> %d",
3945 old_avail, kring->nr_hwavail);
3946 }
3947 still_locked = 0;
3948 mtx_unlock(&kring->q_lock);
3949 selwakeuppri(&kring->si, PI_NET);
3950 } else {
3101 ring->cur = j;
3951 ring->cur = j;
3952 /* XXX update avail ? */
3953 still_locked = 0;
3102 dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3954 dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3955 mtx_unlock(&kring->q_lock);
3956
3957 /* retry to send more packets */
3958 if (nma_is_hw(dst_na) && retry--)
3959 goto retry;
3960 }
3103 }
3961 }
3104 /* retry to send more packets */
3105 if (nma_is_hw(dst_na) && howmany < 0 && retry--)
3106 goto retry;
3107 dst_na->nm_lock(dst_ifp, NETMAP_TX_UNLOCK, dst_nr);
3962 }
3963 if (still_locked)
3964 mtx_unlock(&kring->q_lock);
3108 }
3965 }
3109 /* NM_BDG_BATCH means 'no packet' */
3110 d->bq_head = d->bq_tail = NM_BDG_BATCH; /* cleanup */
3966cleanup:
3967 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
3968 d->bq_len = 0;
3111 }
3969 }
3112 brddst->bq_head = brddst->bq_tail = NM_BDG_BATCH; /* cleanup */
3113 BDG_RUNLOCK(b);
3970 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
3971 brddst->bq_len = 0;
3114 return 0;
3115}
3116
3117
3118/*
3972 return 0;
3973}
3974
3975
3976/*
3119 * main dispatch routine
3977 * main dispatch routine for the bridge.
3978 * We already know that only one thread is running this.
3979 * we must run nm_bdg_preflush without lock.
3120 */
3121static int
3980 */
3981static int
3122bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
3982bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
3123{
3124 struct netmap_adapter *na = NA(ifp);
3125 struct netmap_kring *kring = &na->tx_rings[ring_nr];
3126 struct netmap_ring *ring = kring->ring;
3983{
3984 struct netmap_adapter *na = NA(ifp);
3985 struct netmap_kring *kring = &na->tx_rings[ring_nr];
3986 struct netmap_ring *ring = kring->ring;
3127 int i, j, k, lim = kring->nkr_num_slots - 1;
3987 u_int j, k, lim = kring->nkr_num_slots - 1;
3128
3129 k = ring->cur;
3130 if (k > lim)
3131 return netmap_ring_reinit(kring);
3988
3989 k = ring->cur;
3990 if (k > lim)
3991 return netmap_ring_reinit(kring);
3132 if (do_lock)
3133 na->nm_lock(ifp, NETMAP_TX_LOCK, ring_nr);
3134
3992
3135 if (netmap_bridge <= 0) { /* testing only */
3993 if (bridge_batch <= 0) { /* testing only */
3136 j = k; // used all
3137 goto done;
3138 }
3994 j = k; // used all
3995 goto done;
3996 }
3139 if (netmap_bridge > NM_BDG_BATCH)
3140 netmap_bridge = NM_BDG_BATCH;
3997 if (bridge_batch > NM_BDG_BATCH)
3998 bridge_batch = NM_BDG_BATCH;
3141
3142 j = nm_bdg_preflush(na, ring_nr, kring, k);
3999
4000 j = nm_bdg_preflush(na, ring_nr, kring, k);
3143 i = k - j;
3144 if (i < 0)
3145 i += kring->nkr_num_slots;
3146 kring->nr_hwavail = kring->nkr_num_slots - 1 - i;
3147 if (j != k)
3148 D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
4001 if (j != k)
4002 D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
4003 /* k-j modulo ring size is the number of slots processed */
4004 if (k < j)
4005 k += kring->nkr_num_slots;
4006 kring->nr_hwavail = lim - (k - j);
3149
3150done:
3151 kring->nr_hwcur = j;
3152 ring->avail = kring->nr_hwavail;
4007
4008done:
4009 kring->nr_hwcur = j;
4010 ring->avail = kring->nr_hwavail;
3153 if (do_lock)
3154 na->nm_lock(ifp, NETMAP_TX_UNLOCK, ring_nr);
3155
3156 if (netmap_verbose)
4011 if (netmap_verbose)
3157 D("%s ring %d lock %d", ifp->if_xname, ring_nr, do_lock);
4012 D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags);
3158 return 0;
3159}
3160
3161
4013 return 0;
4014}
4015
4016
4017/*
4018 * user process reading from a VALE switch.
4019 * Already protected against concurrent calls from userspace,
4020 * but we must acquire the queue's lock to protect against
4021 * writers on the same queue.
4022 */
3162static int
4023static int
3163bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
4024bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
3164{
3165 struct netmap_adapter *na = NA(ifp);
3166 struct netmap_kring *kring = &na->rx_rings[ring_nr];
3167 struct netmap_ring *ring = kring->ring;
3168 u_int j, lim = kring->nkr_num_slots - 1;
3169 u_int k = ring->cur, resvd = ring->reserved;
3170 int n;
3171
4025{
4026 struct netmap_adapter *na = NA(ifp);
4027 struct netmap_kring *kring = &na->rx_rings[ring_nr];
4028 struct netmap_ring *ring = kring->ring;
4029 u_int j, lim = kring->nkr_num_slots - 1;
4030 u_int k = ring->cur, resvd = ring->reserved;
4031 int n;
4032
3172 ND("%s ring %d lock %d avail %d",
3173 ifp->if_xname, ring_nr, do_lock, kring->nr_hwavail);
4033 mtx_lock(&kring->q_lock);
4034 if (k > lim) {
4035 D("ouch dangerous reset!!!");
4036 n = netmap_ring_reinit(kring);
4037 goto done;
4038 }
3174
4039
3175 if (k > lim)
3176 return netmap_ring_reinit(kring);
3177 if (do_lock)
3178 na->nm_lock(ifp, NETMAP_RX_LOCK, ring_nr);
3179
3180 /* skip past packets that userspace has released */
3181 j = kring->nr_hwcur; /* netmap ring index */
3182 if (resvd > 0) {
3183 if (resvd + ring->avail >= lim + 1) {
3184 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
3185 ring->reserved = resvd = 0; // XXX panic...
3186 }
3187 k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
3188 }
3189
3190 if (j != k) { /* userspace has released some packets. */
3191 n = k - j;
3192 if (n < 0)
3193 n += kring->nkr_num_slots;
3194 ND("userspace releases %d packets", n);
3195 for (n = 0; likely(j != k); n++) {
3196 struct netmap_slot *slot = &ring->slot[j];
4040 /* skip past packets that userspace has released */
4041 j = kring->nr_hwcur; /* netmap ring index */
4042 if (resvd > 0) {
4043 if (resvd + ring->avail >= lim + 1) {
4044 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
4045 ring->reserved = resvd = 0; // XXX panic...
4046 }
4047 k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
4048 }
4049
4050 if (j != k) { /* userspace has released some packets. */
4051 n = k - j;
4052 if (n < 0)
4053 n += kring->nkr_num_slots;
4054 ND("userspace releases %d packets", n);
4055 for (n = 0; likely(j != k); n++) {
4056 struct netmap_slot *slot = &ring->slot[j];
3197 void *addr = NMB(slot);
4057 void *addr = BDG_NMB(na->nm_mem, slot);
3198
3199 if (addr == netmap_buffer_base) { /* bad buf */
4058
4059 if (addr == netmap_buffer_base) { /* bad buf */
3200 if (do_lock)
3201 na->nm_lock(ifp, NETMAP_RX_UNLOCK, ring_nr);
3202 return netmap_ring_reinit(kring);
4060 D("bad buffer index %d, ignore ?",
4061 slot->buf_idx);
3203 }
4062 }
3204 /* decrease refcount for buffer */
3205
3206 slot->flags &= ~NS_BUF_CHANGED;
4063 slot->flags &= ~NS_BUF_CHANGED;
3207 j = unlikely(j == lim) ? 0 : j + 1;
4064 j = nm_next(j, lim);
3208 }
3209 kring->nr_hwavail -= n;
3210 kring->nr_hwcur = k;
3211 }
3212 /* tell userspace that there are new packets */
3213 ring->avail = kring->nr_hwavail - resvd;
4065 }
4066 kring->nr_hwavail -= n;
4067 kring->nr_hwcur = k;
4068 }
4069 /* tell userspace that there are new packets */
4070 ring->avail = kring->nr_hwavail - resvd;
3214
3215 if (do_lock)
3216 na->nm_lock(ifp, NETMAP_RX_UNLOCK, ring_nr);
3217 return 0;
4071 n = 0;
4072done:
4073 mtx_unlock(&kring->q_lock);
4074 return n;
3218}
3219
3220
3221static void
3222bdg_netmap_attach(struct netmap_adapter *arg)
3223{
3224 struct netmap_adapter na;
3225
3226 ND("attaching virtual bridge");
3227 bzero(&na, sizeof(na));
3228
3229 na.ifp = arg->ifp;
4075}
4076
4077
4078static void
4079bdg_netmap_attach(struct netmap_adapter *arg)
4080{
4081 struct netmap_adapter na;
4082
4083 ND("attaching virtual bridge");
4084 bzero(&na, sizeof(na));
4085
4086 na.ifp = arg->ifp;
3230 na.separate_locks = 1;
4087 na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
3231 na.num_tx_rings = arg->num_tx_rings;
3232 na.num_rx_rings = arg->num_rx_rings;
4088 na.num_tx_rings = arg->num_tx_rings;
4089 na.num_rx_rings = arg->num_rx_rings;
3233 na.num_tx_desc = NM_BRIDGE_RINGSIZE;
3234 na.num_rx_desc = NM_BRIDGE_RINGSIZE;
4090 na.num_tx_desc = arg->num_tx_desc;
4091 na.num_rx_desc = arg->num_rx_desc;
3235 na.nm_txsync = bdg_netmap_txsync;
3236 na.nm_rxsync = bdg_netmap_rxsync;
3237 na.nm_register = bdg_netmap_reg;
4092 na.nm_txsync = bdg_netmap_txsync;
4093 na.nm_rxsync = bdg_netmap_rxsync;
4094 na.nm_register = bdg_netmap_reg;
4095 na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname,
4096 na.num_tx_rings, na.num_tx_desc,
4097 na.num_rx_rings, na.num_rx_desc);
3238 netmap_attach(&na, na.num_tx_rings);
3239}
3240
4098 netmap_attach(&na, na.num_tx_rings);
4099}
4100
3241#endif /* NM_BRIDGE */
3242
3243static struct cdev *netmap_dev; /* /dev/netmap character device. */
3244
3245
3246/*
3247 * Module loader.
3248 *
3249 * Create the /dev/netmap device and initialize all global
3250 * variables.
3251 *
3252 * Return 0 on success, errno on failure.
3253 */
3254static int
3255netmap_init(void)
3256{
4101
4102static struct cdev *netmap_dev; /* /dev/netmap character device. */
4103
4104
4105/*
4106 * Module loader.
4107 *
4108 * Create the /dev/netmap device and initialize all global
4109 * variables.
4110 *
4111 * Return 0 on success, errno on failure.
4112 */
4113static int
4114netmap_init(void)
4115{
3257 int error;
4116 int i, error;
3258
4117
3259 error = netmap_memory_init();
4118 NMG_LOCK_INIT();
4119
4120 error = netmap_mem_init();
3260 if (error != 0) {
3261 printf("netmap: unable to initialize the memory allocator.\n");
3262 return (error);
3263 }
3264 printf("netmap: loaded module\n");
3265 netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
3266 "netmap");
3267
4121 if (error != 0) {
4122 printf("netmap: unable to initialize the memory allocator.\n");
4123 return (error);
4124 }
4125 printf("netmap: loaded module\n");
4126 netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
4127 "netmap");
4128
3268#ifdef NM_BRIDGE
3269 {
3270 int i;
3271 mtx_init(&netmap_bridge_mutex, "netmap_bridge_mutex",
3272 MTX_NETWORK_LOCK, MTX_DEF);
3273 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
3274 for (i = 0; i < NM_BRIDGES; i++)
4129 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
4130 for (i = 0; i < NM_BRIDGES; i++)
3275 rw_init(&nm_bridges[i].bdg_lock, "bdg lock");
3276 }
3277#endif
4131 BDG_RWINIT(&nm_bridges[i]);
3278 return (error);
3279}
3280
3281
3282/*
3283 * Module unloader.
3284 *
3285 * Free all the memory, and destroy the ``/dev/netmap`` device.
3286 */
3287static void
3288netmap_fini(void)
3289{
3290 destroy_dev(netmap_dev);
4132 return (error);
4133}
4134
4135
4136/*
4137 * Module unloader.
4138 *
4139 * Free all the memory, and destroy the ``/dev/netmap`` device.
4140 */
4141static void
4142netmap_fini(void)
4143{
4144 destroy_dev(netmap_dev);
3291 netmap_memory_fini();
4145 netmap_mem_fini();
4146 NMG_LOCK_DESTROY();
3292 printf("netmap: unloaded module.\n");
3293}
3294
3295
3296#ifdef __FreeBSD__
3297/*
3298 * Kernel entry point.
3299 *

--- 29 unchanged lines hidden ---
4147 printf("netmap: unloaded module.\n");
4148}
4149
4150
4151#ifdef __FreeBSD__
4152/*
4153 * Kernel entry point.
4154 *

--- 29 unchanged lines hidden ---