168b8534bSLuigi Rizzo /* 2849bec0eSLuigi Rizzo * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. 368b8534bSLuigi Rizzo * 468b8534bSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 568b8534bSLuigi Rizzo * modification, are permitted provided that the following conditions 668b8534bSLuigi Rizzo * are met: 768b8534bSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 868b8534bSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 968b8534bSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 1068b8534bSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 1168b8534bSLuigi Rizzo * documentation and/or other materials provided with the distribution. 1268b8534bSLuigi Rizzo * 1368b8534bSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1468b8534bSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1568b8534bSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1668b8534bSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1768b8534bSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1868b8534bSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1968b8534bSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2068b8534bSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2168b8534bSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2268b8534bSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2368b8534bSLuigi Rizzo * SUCH DAMAGE. 2468b8534bSLuigi Rizzo */ 2568b8534bSLuigi Rizzo 26*ce3ee1e7SLuigi Rizzo 27*ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__ 28*ce3ee1e7SLuigi Rizzo #define TEST_STUFF // test code, does not compile yet on linux 29*ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */ 30f196ce38SLuigi Rizzo 3168b8534bSLuigi Rizzo /* 3268b8534bSLuigi Rizzo * This module supports memory mapped access to network devices, 3368b8534bSLuigi Rizzo * see netmap(4). 3468b8534bSLuigi Rizzo * 3568b8534bSLuigi Rizzo * The module uses a large, memory pool allocated by the kernel 3668b8534bSLuigi Rizzo * and accessible as mmapped memory by multiple userspace threads/processes. 3768b8534bSLuigi Rizzo * The memory pool contains packet buffers and "netmap rings", 3868b8534bSLuigi Rizzo * i.e. user-accessible copies of the interface's queues. 3968b8534bSLuigi Rizzo * 4068b8534bSLuigi Rizzo * Access to the network card works like this: 4168b8534bSLuigi Rizzo * 1. a process/thread issues one or more open() on /dev/netmap, to create 4268b8534bSLuigi Rizzo * select()able file descriptor on which events are reported. 4368b8534bSLuigi Rizzo * 2. on each descriptor, the process issues an ioctl() to identify 4468b8534bSLuigi Rizzo * the interface that should report events to the file descriptor. 4568b8534bSLuigi Rizzo * 3. on each descriptor, the process issues an mmap() request to 4668b8534bSLuigi Rizzo * map the shared memory region within the process' address space. 4768b8534bSLuigi Rizzo * The list of interesting queues is indicated by a location in 4868b8534bSLuigi Rizzo * the shared memory region. 4968b8534bSLuigi Rizzo * 4. using the functions in the netmap(4) userspace API, a process 5068b8534bSLuigi Rizzo * can look up the occupation state of a queue, access memory buffers, 5168b8534bSLuigi Rizzo * and retrieve received packets or enqueue packets to transmit. 5268b8534bSLuigi Rizzo * 5. using some ioctl()s the process can synchronize the userspace view 5368b8534bSLuigi Rizzo * of the queue with the actual status in the kernel. This includes both 5468b8534bSLuigi Rizzo * receiving the notification of new packets, and transmitting new 5568b8534bSLuigi Rizzo * packets on the output interface. 5668b8534bSLuigi Rizzo * 6. select() or poll() can be used to wait for events on individual 5768b8534bSLuigi Rizzo * transmit or receive queues (or all queues for a given interface). 58*ce3ee1e7SLuigi Rizzo * 59*ce3ee1e7SLuigi Rizzo 60*ce3ee1e7SLuigi Rizzo SYNCHRONIZATION (USER) 61*ce3ee1e7SLuigi Rizzo 62*ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple 63*ce3ee1e7SLuigi Rizzo user threads or even independent processes. 64*ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated 65*ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in 66*ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce 67*ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of 68*ce3ee1e7SLuigi Rizzo invalid usage. 69*ce3ee1e7SLuigi Rizzo 70*ce3ee1e7SLuigi Rizzo LOCKING (INTERNAL) 71*ce3ee1e7SLuigi Rizzo 72*ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows: 73*ce3ee1e7SLuigi Rizzo 74*ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on 75*ce3ee1e7SLuigi Rizzo RX rings attached to the host stack (against multiple host 76*ce3ee1e7SLuigi Rizzo threads writing from the host stack to the same ring), 77*ce3ee1e7SLuigi Rizzo and on 'destination' rings attached to a VALE switch 78*ce3ee1e7SLuigi Rizzo (i.e. RX rings in VALE ports, and TX rings in NIC/host ports) 79*ce3ee1e7SLuigi Rizzo protecting multiple active senders for the same destination) 80*ce3ee1e7SLuigi Rizzo 81*ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one 82*ce3ee1e7SLuigi Rizzo instance of *_*xsync() on the ring at any time. 83*ce3ee1e7SLuigi Rizzo For rings connected to user file 84*ce3ee1e7SLuigi Rizzo descriptors, an atomic_test_and_set() protects this, and the 85*ce3ee1e7SLuigi Rizzo lock on the ring is not actually used. 86*ce3ee1e7SLuigi Rizzo For NIC RX rings connected to a VALE switch, an atomic_test_and_set() 87*ce3ee1e7SLuigi Rizzo is also used to prevent multiple executions (the driver might indeed 88*ce3ee1e7SLuigi Rizzo already guarantee this). 89*ce3ee1e7SLuigi Rizzo For NIC TX rings connected to a VALE switch, the lock arbitrates 90*ce3ee1e7SLuigi Rizzo access to the queue (both when allocating buffers and when pushing 91*ce3ee1e7SLuigi Rizzo them out). 92*ce3ee1e7SLuigi Rizzo 93*ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card. 94*ce3ee1e7SLuigi Rizzo On FreeBSD most devices have the reset routine protected by 95*ce3ee1e7SLuigi Rizzo a RING lock (ixgbe, igb, em) or core lock (re). lem is missing 96*ce3ee1e7SLuigi Rizzo the RING protection on rx_reset(), this should be added. 97*ce3ee1e7SLuigi Rizzo 98*ce3ee1e7SLuigi Rizzo On linux there is an external lock on the tx path, which probably 99*ce3ee1e7SLuigi Rizzo also arbitrates access to the reset routine. XXX to be revised 100*ce3ee1e7SLuigi Rizzo 101*ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack 102*ce3ee1e7SLuigi Rizzo while interfaces may be detached from netmap mode. 103*ce3ee1e7SLuigi Rizzo XXX there should be no need for this lock if we detach the interfaces 104*ce3ee1e7SLuigi Rizzo only while they are down. 105*ce3ee1e7SLuigi Rizzo 106*ce3ee1e7SLuigi Rizzo 107*ce3ee1e7SLuigi Rizzo --- VALE SWITCH --- 108*ce3ee1e7SLuigi Rizzo 109*ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports. 110*ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone. 111*ce3ee1e7SLuigi Rizzo 112*ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects 113*ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the 114*ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK). 115*ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 116*ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle, 117*ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault. 118*ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used. 119*ce3ee1e7SLuigi Rizzo 120*ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve 121*ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released, 122*ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then 123*ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated. 124*ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack 125*ce3ee1e7SLuigi Rizzo ports attached to the switch) 126*ce3ee1e7SLuigi Rizzo 12768b8534bSLuigi Rizzo */ 12868b8534bSLuigi Rizzo 129*ce3ee1e7SLuigi Rizzo /* 130*ce3ee1e7SLuigi Rizzo * OS-specific code that is used only within this file. 131*ce3ee1e7SLuigi Rizzo * Other OS-specific code that must be accessed by drivers 132*ce3ee1e7SLuigi Rizzo * is present in netmap_kern.h 133*ce3ee1e7SLuigi Rizzo */ 13401c7d25fSLuigi Rizzo 135*ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__) 13668b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 13768b8534bSLuigi Rizzo __FBSDID("$FreeBSD$"); 13868b8534bSLuigi Rizzo 13968b8534bSLuigi Rizzo #include <sys/types.h> 14068b8534bSLuigi Rizzo #include <sys/module.h> 14168b8534bSLuigi Rizzo #include <sys/errno.h> 14268b8534bSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 143506cc70cSLuigi Rizzo #include <sys/jail.h> 14468b8534bSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 14568b8534bSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct */ 14668b8534bSLuigi Rizzo #include <sys/uio.h> /* uio struct */ 14768b8534bSLuigi Rizzo #include <sys/sockio.h> 14868b8534bSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 14968b8534bSLuigi Rizzo #include <sys/malloc.h> 15068b8534bSLuigi Rizzo #include <sys/mman.h> /* PROT_EXEC */ 15168b8534bSLuigi Rizzo #include <sys/poll.h> 152506cc70cSLuigi Rizzo #include <sys/proc.h> 15389f6b863SAttilio Rao #include <sys/rwlock.h> 15468b8534bSLuigi Rizzo #include <vm/vm.h> /* vtophys */ 15568b8534bSLuigi Rizzo #include <vm/pmap.h> /* vtophys */ 156*ce3ee1e7SLuigi Rizzo #include <vm/vm_param.h> 157*ce3ee1e7SLuigi Rizzo #include <vm/vm_object.h> 158*ce3ee1e7SLuigi Rizzo #include <vm/vm_page.h> 159*ce3ee1e7SLuigi Rizzo #include <vm/vm_pager.h> 160*ce3ee1e7SLuigi Rizzo #include <vm/uma.h> 16168b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 16268b8534bSLuigi Rizzo #include <sys/selinfo.h> 16368b8534bSLuigi Rizzo #include <sys/sysctl.h> 16468b8534bSLuigi Rizzo #include <net/if.h> 16576039bc8SGleb Smirnoff #include <net/if_var.h> 16668b8534bSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 167506cc70cSLuigi Rizzo #include <net/vnet.h> 16868b8534bSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 169*ce3ee1e7SLuigi Rizzo #include <sys/endian.h> 170*ce3ee1e7SLuigi Rizzo #include <sys/refcount.h> 17168b8534bSLuigi Rizzo 172*ce3ee1e7SLuigi Rizzo #define prefetch(x) __builtin_prefetch(x) 17368b8534bSLuigi Rizzo 174*ce3ee1e7SLuigi Rizzo #define BDG_RWLOCK_T struct rwlock // struct rwlock 175*ce3ee1e7SLuigi Rizzo 176*ce3ee1e7SLuigi Rizzo #define BDG_RWINIT(b) \ 177*ce3ee1e7SLuigi Rizzo rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 178*ce3ee1e7SLuigi Rizzo #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 179*ce3ee1e7SLuigi Rizzo #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 180*ce3ee1e7SLuigi Rizzo #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 181*ce3ee1e7SLuigi Rizzo #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 182*ce3ee1e7SLuigi Rizzo #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 183*ce3ee1e7SLuigi Rizzo #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 184*ce3ee1e7SLuigi Rizzo 185*ce3ee1e7SLuigi Rizzo 186*ce3ee1e7SLuigi Rizzo /* netmap global lock. 187*ce3ee1e7SLuigi Rizzo * normally called within the user thread (upon a system call) 188*ce3ee1e7SLuigi Rizzo * or when a file descriptor or process is terminated 189*ce3ee1e7SLuigi Rizzo * (last close or last munmap) 190*ce3ee1e7SLuigi Rizzo */ 191*ce3ee1e7SLuigi Rizzo 192*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T struct mtx 193*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF) 194*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock) 195*ce3ee1e7SLuigi Rizzo #define NMG_LOCK() mtx_lock(&netmap_global_lock) 196*ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK() mtx_unlock(&netmap_global_lock) 197*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED) 198*ce3ee1e7SLuigi Rizzo 199*ce3ee1e7SLuigi Rizzo 200*ce3ee1e7SLuigi Rizzo /* atomic operations */ 201*ce3ee1e7SLuigi Rizzo #include <machine/atomic.h> 202*ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) 203*ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) 204*ce3ee1e7SLuigi Rizzo 205*ce3ee1e7SLuigi Rizzo 206*ce3ee1e7SLuigi Rizzo #elif defined(linux) 207*ce3ee1e7SLuigi Rizzo 208*ce3ee1e7SLuigi Rizzo #include "bsd_glue.h" 209*ce3ee1e7SLuigi Rizzo 210*ce3ee1e7SLuigi Rizzo static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *); 211*ce3ee1e7SLuigi Rizzo 212*ce3ee1e7SLuigi Rizzo static struct device_driver* 213*ce3ee1e7SLuigi Rizzo linux_netmap_find_driver(struct device *dev) 214*ce3ee1e7SLuigi Rizzo { 215*ce3ee1e7SLuigi Rizzo struct device_driver *dd; 216*ce3ee1e7SLuigi Rizzo 217*ce3ee1e7SLuigi Rizzo while ( (dd = dev->driver) == NULL ) { 218*ce3ee1e7SLuigi Rizzo if ( (dev = dev->parent) == NULL ) 219*ce3ee1e7SLuigi Rizzo return NULL; 220*ce3ee1e7SLuigi Rizzo } 221*ce3ee1e7SLuigi Rizzo return dd; 222*ce3ee1e7SLuigi Rizzo } 223*ce3ee1e7SLuigi Rizzo 224*ce3ee1e7SLuigi Rizzo static struct net_device* 225*ce3ee1e7SLuigi Rizzo ifunit_ref(const char *name) 226*ce3ee1e7SLuigi Rizzo { 227*ce3ee1e7SLuigi Rizzo struct net_device *ifp = dev_get_by_name(&init_net, name); 228*ce3ee1e7SLuigi Rizzo struct device_driver *dd; 229*ce3ee1e7SLuigi Rizzo 230*ce3ee1e7SLuigi Rizzo if (ifp == NULL) 231*ce3ee1e7SLuigi Rizzo return NULL; 232*ce3ee1e7SLuigi Rizzo 233*ce3ee1e7SLuigi Rizzo if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL ) 234*ce3ee1e7SLuigi Rizzo goto error; 235*ce3ee1e7SLuigi Rizzo 236*ce3ee1e7SLuigi Rizzo if (!try_module_get(dd->owner)) 237*ce3ee1e7SLuigi Rizzo goto error; 238*ce3ee1e7SLuigi Rizzo 239*ce3ee1e7SLuigi Rizzo return ifp; 240*ce3ee1e7SLuigi Rizzo error: 241*ce3ee1e7SLuigi Rizzo dev_put(ifp); 242*ce3ee1e7SLuigi Rizzo return NULL; 243*ce3ee1e7SLuigi Rizzo } 244*ce3ee1e7SLuigi Rizzo 245*ce3ee1e7SLuigi Rizzo static void 246*ce3ee1e7SLuigi Rizzo if_rele(struct net_device *ifp) 247*ce3ee1e7SLuigi Rizzo { 248*ce3ee1e7SLuigi Rizzo struct device_driver *dd; 249*ce3ee1e7SLuigi Rizzo dd = linux_netmap_find_driver(&ifp->dev); 250*ce3ee1e7SLuigi Rizzo dev_put(ifp); 251*ce3ee1e7SLuigi Rizzo if (dd) 252*ce3ee1e7SLuigi Rizzo module_put(dd->owner); 253*ce3ee1e7SLuigi Rizzo } 254*ce3ee1e7SLuigi Rizzo 255*ce3ee1e7SLuigi Rizzo // XXX a mtx would suffice here too 20130404 gl 256*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T struct semaphore 257*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1) 258*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY() 259*ce3ee1e7SLuigi Rizzo #define NMG_LOCK() down(&netmap_global_lock) 260*ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK() up(&netmap_global_lock) 261*ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT() // XXX to be completed 262*ce3ee1e7SLuigi Rizzo 263*ce3ee1e7SLuigi Rizzo 264*ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__) 265*ce3ee1e7SLuigi Rizzo 266*ce3ee1e7SLuigi Rizzo #warning OSX support is only partial 267*ce3ee1e7SLuigi Rizzo #include "osx_glue.h" 268*ce3ee1e7SLuigi Rizzo 269*ce3ee1e7SLuigi Rizzo #else 270*ce3ee1e7SLuigi Rizzo 271*ce3ee1e7SLuigi Rizzo #error Unsupported platform 272*ce3ee1e7SLuigi Rizzo 273*ce3ee1e7SLuigi Rizzo #endif /* unsupported */ 274*ce3ee1e7SLuigi Rizzo 275*ce3ee1e7SLuigi Rizzo /* 276*ce3ee1e7SLuigi Rizzo * common headers 277*ce3ee1e7SLuigi Rizzo */ 2780b8ed8e0SLuigi Rizzo #include <net/netmap.h> 2790b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h> 280*ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 2810b8ed8e0SLuigi Rizzo 282*ce3ee1e7SLuigi Rizzo 283*ce3ee1e7SLuigi Rizzo MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); 284*ce3ee1e7SLuigi Rizzo 285*ce3ee1e7SLuigi Rizzo /* 286*ce3ee1e7SLuigi Rizzo * The following variables are used by the drivers and replicate 287*ce3ee1e7SLuigi Rizzo * fields in the global memory pool. They only refer to buffers 288*ce3ee1e7SLuigi Rizzo * used by physical interfaces. 289*ce3ee1e7SLuigi Rizzo */ 2905819da83SLuigi Rizzo u_int netmap_total_buffers; 2918241616dSLuigi Rizzo u_int netmap_buf_size; 292*ce3ee1e7SLuigi Rizzo char *netmap_buffer_base; /* also address of an invalid buffer */ 2935819da83SLuigi Rizzo 2945819da83SLuigi Rizzo /* user-controlled variables */ 2955819da83SLuigi Rizzo int netmap_verbose; 2965819da83SLuigi Rizzo 2975819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */ 2985819da83SLuigi Rizzo 2995819da83SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args"); 3005819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, 3015819da83SLuigi Rizzo CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); 3025819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, 3035819da83SLuigi Rizzo CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); 3045819da83SLuigi Rizzo int netmap_mitigate = 1; 3055819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); 306c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1; 3075819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, 3085819da83SLuigi Rizzo CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); 309f18be576SLuigi Rizzo int netmap_txsync_retry = 2; 310f18be576SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, 311f18be576SLuigi Rizzo &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); 3125819da83SLuigi Rizzo 313f196ce38SLuigi Rizzo int netmap_drop = 0; /* debugging */ 314f196ce38SLuigi Rizzo int netmap_flags = 0; /* debug flags */ 315091fd0abSLuigi Rizzo int netmap_fwd = 0; /* force transparent mode */ 316*ce3ee1e7SLuigi Rizzo int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */ 317f196ce38SLuigi Rizzo 318f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); 319f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); 320091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); 321*ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); 322f196ce38SLuigi Rizzo 323*ce3ee1e7SLuigi Rizzo NMG_LOCK_T netmap_global_lock; 324*ce3ee1e7SLuigi Rizzo 325*ce3ee1e7SLuigi Rizzo /* 326*ce3ee1e7SLuigi Rizzo * protect against multiple threads using the same ring. 327*ce3ee1e7SLuigi Rizzo * also check that the ring has not been stopped. 328*ce3ee1e7SLuigi Rizzo */ 329*ce3ee1e7SLuigi Rizzo #define NM_KR_BUSY 1 330*ce3ee1e7SLuigi Rizzo #define NM_KR_STOPPED 2 331*ce3ee1e7SLuigi Rizzo static void nm_kr_put(struct netmap_kring *kr); 332*ce3ee1e7SLuigi Rizzo static __inline int nm_kr_tryget(struct netmap_kring *kr) 333*ce3ee1e7SLuigi Rizzo { 334*ce3ee1e7SLuigi Rizzo /* check a first time without taking the lock 335*ce3ee1e7SLuigi Rizzo * to avoid starvation for nm_kr_get() 336*ce3ee1e7SLuigi Rizzo */ 337*ce3ee1e7SLuigi Rizzo if (unlikely(kr->nkr_stopped)) { 338*ce3ee1e7SLuigi Rizzo ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 339*ce3ee1e7SLuigi Rizzo return NM_KR_STOPPED; 340*ce3ee1e7SLuigi Rizzo } 341*ce3ee1e7SLuigi Rizzo if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) 342*ce3ee1e7SLuigi Rizzo return NM_KR_BUSY; 343*ce3ee1e7SLuigi Rizzo /* check a second time with lock held */ 344*ce3ee1e7SLuigi Rizzo if (unlikely(kr->nkr_stopped)) { 345*ce3ee1e7SLuigi Rizzo ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 346*ce3ee1e7SLuigi Rizzo nm_kr_put(kr); 347*ce3ee1e7SLuigi Rizzo return NM_KR_STOPPED; 348*ce3ee1e7SLuigi Rizzo } 349*ce3ee1e7SLuigi Rizzo return 0; 350*ce3ee1e7SLuigi Rizzo } 351*ce3ee1e7SLuigi Rizzo 352*ce3ee1e7SLuigi Rizzo static __inline void nm_kr_put(struct netmap_kring *kr) 353*ce3ee1e7SLuigi Rizzo { 354*ce3ee1e7SLuigi Rizzo NM_ATOMIC_CLEAR(&kr->nr_busy); 355*ce3ee1e7SLuigi Rizzo } 356*ce3ee1e7SLuigi Rizzo 357*ce3ee1e7SLuigi Rizzo static void nm_kr_get(struct netmap_kring *kr) 358*ce3ee1e7SLuigi Rizzo { 359*ce3ee1e7SLuigi Rizzo while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) 360*ce3ee1e7SLuigi Rizzo tsleep(kr, 0, "NM_KR_GET", 4); 361*ce3ee1e7SLuigi Rizzo } 362*ce3ee1e7SLuigi Rizzo 363*ce3ee1e7SLuigi Rizzo static void nm_disable_ring(struct netmap_kring *kr) 364*ce3ee1e7SLuigi Rizzo { 365*ce3ee1e7SLuigi Rizzo kr->nkr_stopped = 1; 366*ce3ee1e7SLuigi Rizzo nm_kr_get(kr); 367*ce3ee1e7SLuigi Rizzo mtx_lock(&kr->q_lock); 368*ce3ee1e7SLuigi Rizzo mtx_unlock(&kr->q_lock); 369*ce3ee1e7SLuigi Rizzo nm_kr_put(kr); 370*ce3ee1e7SLuigi Rizzo } 371*ce3ee1e7SLuigi Rizzo 372*ce3ee1e7SLuigi Rizzo void netmap_disable_all_rings(struct ifnet *ifp) 373*ce3ee1e7SLuigi Rizzo { 374*ce3ee1e7SLuigi Rizzo struct netmap_adapter *na; 375*ce3ee1e7SLuigi Rizzo int i; 376*ce3ee1e7SLuigi Rizzo 377*ce3ee1e7SLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 378*ce3ee1e7SLuigi Rizzo return; 379*ce3ee1e7SLuigi Rizzo 380*ce3ee1e7SLuigi Rizzo na = NA(ifp); 381*ce3ee1e7SLuigi Rizzo 382*ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 383*ce3ee1e7SLuigi Rizzo nm_disable_ring(na->tx_rings + i); 384*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_rings[i].si, PI_NET); 385*ce3ee1e7SLuigi Rizzo } 386*ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 387*ce3ee1e7SLuigi Rizzo nm_disable_ring(na->rx_rings + i); 388*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_rings[i].si, PI_NET); 389*ce3ee1e7SLuigi Rizzo } 390*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_si, PI_NET); 391*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_si, PI_NET); 392*ce3ee1e7SLuigi Rizzo } 393*ce3ee1e7SLuigi Rizzo 394*ce3ee1e7SLuigi Rizzo void netmap_enable_all_rings(struct ifnet *ifp) 395*ce3ee1e7SLuigi Rizzo { 396*ce3ee1e7SLuigi Rizzo struct netmap_adapter *na; 397*ce3ee1e7SLuigi Rizzo int i; 398*ce3ee1e7SLuigi Rizzo 399*ce3ee1e7SLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 400*ce3ee1e7SLuigi Rizzo return; 401*ce3ee1e7SLuigi Rizzo 402*ce3ee1e7SLuigi Rizzo na = NA(ifp); 403*ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 404*ce3ee1e7SLuigi Rizzo D("enabling %p", na->tx_rings + i); 405*ce3ee1e7SLuigi Rizzo na->tx_rings[i].nkr_stopped = 0; 406*ce3ee1e7SLuigi Rizzo } 407*ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 408*ce3ee1e7SLuigi Rizzo D("enabling %p", na->rx_rings + i); 409*ce3ee1e7SLuigi Rizzo na->rx_rings[i].nkr_stopped = 0; 410*ce3ee1e7SLuigi Rizzo } 411*ce3ee1e7SLuigi Rizzo } 412*ce3ee1e7SLuigi Rizzo 413*ce3ee1e7SLuigi Rizzo 414*ce3ee1e7SLuigi Rizzo /* 415*ce3ee1e7SLuigi Rizzo * generic bound_checking function 416*ce3ee1e7SLuigi Rizzo */ 417*ce3ee1e7SLuigi Rizzo u_int 418*ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg) 419*ce3ee1e7SLuigi Rizzo { 420*ce3ee1e7SLuigi Rizzo u_int oldv = *v; 421*ce3ee1e7SLuigi Rizzo const char *op = NULL; 422*ce3ee1e7SLuigi Rizzo 423*ce3ee1e7SLuigi Rizzo if (dflt < lo) 424*ce3ee1e7SLuigi Rizzo dflt = lo; 425*ce3ee1e7SLuigi Rizzo if (dflt > hi) 426*ce3ee1e7SLuigi Rizzo dflt = hi; 427*ce3ee1e7SLuigi Rizzo if (oldv < lo) { 428*ce3ee1e7SLuigi Rizzo *v = dflt; 429*ce3ee1e7SLuigi Rizzo op = "Bump"; 430*ce3ee1e7SLuigi Rizzo } else if (oldv > hi) { 431*ce3ee1e7SLuigi Rizzo *v = hi; 432*ce3ee1e7SLuigi Rizzo op = "Clamp"; 433*ce3ee1e7SLuigi Rizzo } 434*ce3ee1e7SLuigi Rizzo if (op && msg) 435*ce3ee1e7SLuigi Rizzo printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 436*ce3ee1e7SLuigi Rizzo return *v; 437*ce3ee1e7SLuigi Rizzo } 438*ce3ee1e7SLuigi Rizzo 439*ce3ee1e7SLuigi Rizzo /* 440*ce3ee1e7SLuigi Rizzo * packet-dump function, user-supplied or static buffer. 441*ce3ee1e7SLuigi Rizzo * The destination buffer must be at least 30+4*len 442*ce3ee1e7SLuigi Rizzo */ 443*ce3ee1e7SLuigi Rizzo const char * 444*ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst) 445*ce3ee1e7SLuigi Rizzo { 446*ce3ee1e7SLuigi Rizzo static char _dst[8192]; 447*ce3ee1e7SLuigi Rizzo int i, j, i0; 448*ce3ee1e7SLuigi Rizzo static char hex[] ="0123456789abcdef"; 449*ce3ee1e7SLuigi Rizzo char *o; /* output position */ 450*ce3ee1e7SLuigi Rizzo 451*ce3ee1e7SLuigi Rizzo #define P_HI(x) hex[((x) & 0xf0)>>4] 452*ce3ee1e7SLuigi Rizzo #define P_LO(x) hex[((x) & 0xf)] 453*ce3ee1e7SLuigi Rizzo #define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.') 454*ce3ee1e7SLuigi Rizzo if (!dst) 455*ce3ee1e7SLuigi Rizzo dst = _dst; 456*ce3ee1e7SLuigi Rizzo if (lim <= 0 || lim > len) 457*ce3ee1e7SLuigi Rizzo lim = len; 458*ce3ee1e7SLuigi Rizzo o = dst; 459*ce3ee1e7SLuigi Rizzo sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim); 460*ce3ee1e7SLuigi Rizzo o += strlen(o); 461*ce3ee1e7SLuigi Rizzo /* hexdump routine */ 462*ce3ee1e7SLuigi Rizzo for (i = 0; i < lim; ) { 463*ce3ee1e7SLuigi Rizzo sprintf(o, "%5d: ", i); 464*ce3ee1e7SLuigi Rizzo o += strlen(o); 465*ce3ee1e7SLuigi Rizzo memset(o, ' ', 48); 466*ce3ee1e7SLuigi Rizzo i0 = i; 467*ce3ee1e7SLuigi Rizzo for (j=0; j < 16 && i < lim; i++, j++) { 468*ce3ee1e7SLuigi Rizzo o[j*3] = P_HI(p[i]); 469*ce3ee1e7SLuigi Rizzo o[j*3+1] = P_LO(p[i]); 470*ce3ee1e7SLuigi Rizzo } 471*ce3ee1e7SLuigi Rizzo i = i0; 472*ce3ee1e7SLuigi Rizzo for (j=0; j < 16 && i < lim; i++, j++) 473*ce3ee1e7SLuigi Rizzo o[j + 48] = P_C(p[i]); 474*ce3ee1e7SLuigi Rizzo o[j+48] = '\n'; 475*ce3ee1e7SLuigi Rizzo o += j+49; 476*ce3ee1e7SLuigi Rizzo } 477*ce3ee1e7SLuigi Rizzo *o = '\0'; 478*ce3ee1e7SLuigi Rizzo #undef P_HI 479*ce3ee1e7SLuigi Rizzo #undef P_LO 480*ce3ee1e7SLuigi Rizzo #undef P_C 481*ce3ee1e7SLuigi Rizzo return dst; 482*ce3ee1e7SLuigi Rizzo } 483f196ce38SLuigi Rizzo 484f196ce38SLuigi Rizzo /* 485f18be576SLuigi Rizzo * system parameters (most of them in netmap_kern.h) 486f18be576SLuigi Rizzo * NM_NAME prefix for switch port names, default "vale" 487*ce3ee1e7SLuigi Rizzo * NM_BDG_MAXPORTS number of ports 488f18be576SLuigi Rizzo * NM_BRIDGES max number of switches in the system. 489f18be576SLuigi Rizzo * XXX should become a sysctl or tunable 490f196ce38SLuigi Rizzo * 491f18be576SLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 492f18be576SLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 493f18be576SLuigi Rizzo * connected to a physical device. 494f18be576SLuigi Rizzo * 495f18be576SLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 496f18be576SLuigi Rizzo * for rings and buffers. 497f196ce38SLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 498f196ce38SLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 499*ce3ee1e7SLuigi Rizzo * faster. The batch size is bridge_batch. 500f196ce38SLuigi Rizzo */ 501f18be576SLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 502*ce3ee1e7SLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 503f196ce38SLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 504f196ce38SLuigi Rizzo #define NM_BDG_HASH 1024 /* forwarding table entries */ 505f196ce38SLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 506*ce3ee1e7SLuigi Rizzo #define NM_MULTISEG 64 /* max size of a chain of bufs */ 507*ce3ee1e7SLuigi Rizzo /* actual size of the tables */ 508*ce3ee1e7SLuigi Rizzo #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 509*ce3ee1e7SLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 510*ce3ee1e7SLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 511f18be576SLuigi Rizzo #define NM_BRIDGES 8 /* number of bridges */ 512d4b42e08SLuigi Rizzo 513d4b42e08SLuigi Rizzo 514*ce3ee1e7SLuigi Rizzo /* 515*ce3ee1e7SLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 516*ce3ee1e7SLuigi Rizzo * used in the bridge. The actual value may be larger as the 517*ce3ee1e7SLuigi Rizzo * last packet in the block may overflow the size. 518*ce3ee1e7SLuigi Rizzo */ 519*ce3ee1e7SLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 520*ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 52101c7d25fSLuigi Rizzo 522f196ce38SLuigi Rizzo 523849bec0eSLuigi Rizzo /* 524849bec0eSLuigi Rizzo * These are used to handle reference counters for bridge ports. 525849bec0eSLuigi Rizzo */ 526849bec0eSLuigi Rizzo #define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount) 527849bec0eSLuigi Rizzo #define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount) 528849bec0eSLuigi Rizzo 529*ce3ee1e7SLuigi Rizzo /* The bridge references the buffers using the device specific look up table */ 530*ce3ee1e7SLuigi Rizzo static inline void * 531*ce3ee1e7SLuigi Rizzo BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot) 532*ce3ee1e7SLuigi Rizzo { 533*ce3ee1e7SLuigi Rizzo struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut; 534*ce3ee1e7SLuigi Rizzo uint32_t i = slot->buf_idx; 535*ce3ee1e7SLuigi Rizzo return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ? lut[0].vaddr : lut[i].vaddr; 536*ce3ee1e7SLuigi Rizzo } 537*ce3ee1e7SLuigi Rizzo 538f18be576SLuigi Rizzo static void bdg_netmap_attach(struct netmap_adapter *); 539f196ce38SLuigi Rizzo static int bdg_netmap_reg(struct ifnet *ifp, int onoff); 540*ce3ee1e7SLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr); 541f18be576SLuigi Rizzo 542*ce3ee1e7SLuigi Rizzo /* 543*ce3ee1e7SLuigi Rizzo * Each transmit queue accumulates a batch of packets into 544*ce3ee1e7SLuigi Rizzo * a structure before forwarding. Packets to the same 545*ce3ee1e7SLuigi Rizzo * destination are put in a list using ft_next as a link field. 546*ce3ee1e7SLuigi Rizzo * ft_frags and ft_next are valid only on the first fragment. 547*ce3ee1e7SLuigi Rizzo */ 548f196ce38SLuigi Rizzo struct nm_bdg_fwd { /* forwarding entry for a bridge */ 549*ce3ee1e7SLuigi Rizzo void *ft_buf; /* netmap or indirect buffer */ 550*ce3ee1e7SLuigi Rizzo uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 551*ce3ee1e7SLuigi Rizzo uint8_t _ft_port; /* dst port (unused) */ 55285233a7dSLuigi Rizzo uint16_t ft_flags; /* flags, e.g. indirect */ 553*ce3ee1e7SLuigi Rizzo uint16_t ft_len; /* src fragment len */ 554f18be576SLuigi Rizzo uint16_t ft_next; /* next packet to same destination */ 555f18be576SLuigi Rizzo }; 556f18be576SLuigi Rizzo 557*ce3ee1e7SLuigi Rizzo /* 558*ce3ee1e7SLuigi Rizzo * For each output interface, nm_bdg_q is used to construct a list. 559*ce3ee1e7SLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 560*ce3ee1e7SLuigi Rizzo * during the copy). 561f18be576SLuigi Rizzo */ 562f18be576SLuigi Rizzo struct nm_bdg_q { 563f18be576SLuigi Rizzo uint16_t bq_head; 564f18be576SLuigi Rizzo uint16_t bq_tail; 565*ce3ee1e7SLuigi Rizzo uint32_t bq_len; /* number of buffers */ 566f196ce38SLuigi Rizzo }; 567f196ce38SLuigi Rizzo 568*ce3ee1e7SLuigi Rizzo /* XXX revise this */ 569f196ce38SLuigi Rizzo struct nm_hash_ent { 570f196ce38SLuigi Rizzo uint64_t mac; /* the top 2 bytes are the epoch */ 571f196ce38SLuigi Rizzo uint64_t ports; 572f196ce38SLuigi Rizzo }; 573f196ce38SLuigi Rizzo 574f196ce38SLuigi Rizzo /* 575*ce3ee1e7SLuigi Rizzo * nm_bridge is a descriptor for a VALE switch. 576849bec0eSLuigi Rizzo * Interfaces for a bridge are all in bdg_ports[]. 577f196ce38SLuigi Rizzo * The array has fixed size, an empty entry does not terminate 578*ce3ee1e7SLuigi Rizzo * the search, but lookups only occur on attach/detach so we 579849bec0eSLuigi Rizzo * don't mind if they are slow. 580849bec0eSLuigi Rizzo * 581*ce3ee1e7SLuigi Rizzo * The bridge is non blocking on the transmit ports: excess 582*ce3ee1e7SLuigi Rizzo * packets are dropped if there is no room on the output port. 583849bec0eSLuigi Rizzo * 584849bec0eSLuigi Rizzo * bdg_lock protects accesses to the bdg_ports array. 585f18be576SLuigi Rizzo * This is a rw lock (or equivalent). 586f196ce38SLuigi Rizzo */ 587f196ce38SLuigi Rizzo struct nm_bridge { 588f18be576SLuigi Rizzo /* XXX what is the proper alignment/layout ? */ 589*ce3ee1e7SLuigi Rizzo BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 590*ce3ee1e7SLuigi Rizzo int bdg_namelen; 591*ce3ee1e7SLuigi Rizzo uint32_t bdg_active_ports; /* 0 means free */ 592*ce3ee1e7SLuigi Rizzo char bdg_basename[IFNAMSIZ]; 593*ce3ee1e7SLuigi Rizzo 594*ce3ee1e7SLuigi Rizzo /* Indexes of active ports (up to active_ports) 595*ce3ee1e7SLuigi Rizzo * and all other remaining ports. 596*ce3ee1e7SLuigi Rizzo */ 597*ce3ee1e7SLuigi Rizzo uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 598*ce3ee1e7SLuigi Rizzo 599f18be576SLuigi Rizzo struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS]; 600f18be576SLuigi Rizzo 601*ce3ee1e7SLuigi Rizzo 602f18be576SLuigi Rizzo /* 603f18be576SLuigi Rizzo * The function to decide the destination port. 604f18be576SLuigi Rizzo * It returns either of an index of the destination port, 605f18be576SLuigi Rizzo * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 606f18be576SLuigi Rizzo * forward this packet. ring_nr is the source ring index, and the 607f18be576SLuigi Rizzo * function may overwrite this value to forward this packet to a 608f18be576SLuigi Rizzo * different ring index. 609f18be576SLuigi Rizzo * This function must be set by netmap_bdgctl(). 610f18be576SLuigi Rizzo */ 611f18be576SLuigi Rizzo bdg_lookup_fn_t nm_bdg_lookup; 612f196ce38SLuigi Rizzo 613*ce3ee1e7SLuigi Rizzo /* the forwarding table, MAC+ports. 614*ce3ee1e7SLuigi Rizzo * XXX should be changed to an argument to be passed to 615*ce3ee1e7SLuigi Rizzo * the lookup function, and allocated on attach 616*ce3ee1e7SLuigi Rizzo */ 617f196ce38SLuigi Rizzo struct nm_hash_ent ht[NM_BDG_HASH]; 618f196ce38SLuigi Rizzo }; 619f196ce38SLuigi Rizzo 620f196ce38SLuigi Rizzo 621*ce3ee1e7SLuigi Rizzo /* 622*ce3ee1e7SLuigi Rizzo * XXX in principle nm_bridges could be created dynamically 623*ce3ee1e7SLuigi Rizzo * Right now we have a static array and deletions are protected 624*ce3ee1e7SLuigi Rizzo * by an exclusive lock. 625f18be576SLuigi Rizzo */ 626*ce3ee1e7SLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES]; 627f18be576SLuigi Rizzo 628*ce3ee1e7SLuigi Rizzo 629*ce3ee1e7SLuigi Rizzo /* 630*ce3ee1e7SLuigi Rizzo * A few function to tell which kind of port are we using. 631*ce3ee1e7SLuigi Rizzo * XXX should we hold a lock ? 632*ce3ee1e7SLuigi Rizzo * 633*ce3ee1e7SLuigi Rizzo * nma_is_vp() virtual port 634*ce3ee1e7SLuigi Rizzo * nma_is_host() port connected to the host stack 635*ce3ee1e7SLuigi Rizzo * nma_is_hw() port connected to a NIC 636*ce3ee1e7SLuigi Rizzo */ 637*ce3ee1e7SLuigi Rizzo int nma_is_vp(struct netmap_adapter *na); 638*ce3ee1e7SLuigi Rizzo int 639f18be576SLuigi Rizzo nma_is_vp(struct netmap_adapter *na) 640f18be576SLuigi Rizzo { 641f18be576SLuigi Rizzo return na->nm_register == bdg_netmap_reg; 642f18be576SLuigi Rizzo } 643*ce3ee1e7SLuigi Rizzo 644f18be576SLuigi Rizzo static __inline int 645f18be576SLuigi Rizzo nma_is_host(struct netmap_adapter *na) 646f18be576SLuigi Rizzo { 647f18be576SLuigi Rizzo return na->nm_register == NULL; 648f18be576SLuigi Rizzo } 649*ce3ee1e7SLuigi Rizzo 650f18be576SLuigi Rizzo static __inline int 651f18be576SLuigi Rizzo nma_is_hw(struct netmap_adapter *na) 652f18be576SLuigi Rizzo { 653f18be576SLuigi Rizzo /* In case of sw adapter, nm_register is NULL */ 654f18be576SLuigi Rizzo return !nma_is_vp(na) && !nma_is_host(na); 655f18be576SLuigi Rizzo } 656f18be576SLuigi Rizzo 657*ce3ee1e7SLuigi Rizzo 658f18be576SLuigi Rizzo /* 659*ce3ee1e7SLuigi Rizzo * If the NIC is owned by the kernel 660f18be576SLuigi Rizzo * (i.e., bridge), neither another bridge nor user can use it; 661f18be576SLuigi Rizzo * if the NIC is owned by a user, only users can share it. 662*ce3ee1e7SLuigi Rizzo * Evaluation must be done under NMG_LOCK(). 663f18be576SLuigi Rizzo */ 664f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_KERN(ifp) (!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg) 665f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_ANY(ifp) \ 666f18be576SLuigi Rizzo (NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0)) 667f196ce38SLuigi Rizzo 668f196ce38SLuigi Rizzo /* 669f196ce38SLuigi Rizzo * NA(ifp)->bdg_port port index 670f196ce38SLuigi Rizzo */ 671f196ce38SLuigi Rizzo 672*ce3ee1e7SLuigi Rizzo 673*ce3ee1e7SLuigi Rizzo /* 674*ce3ee1e7SLuigi Rizzo * this is a slightly optimized copy routine which rounds 675*ce3ee1e7SLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 676*ce3ee1e7SLuigi Rizzo * with other odd sizes. We assume there is enough room 677*ce3ee1e7SLuigi Rizzo * in the source and destination buffers. 678*ce3ee1e7SLuigi Rizzo * 679*ce3ee1e7SLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 680*ce3ee1e7SLuigi Rizzo */ 681f196ce38SLuigi Rizzo static inline void 682f196ce38SLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 683f196ce38SLuigi Rizzo { 684f196ce38SLuigi Rizzo uint64_t *src = _src; 685f196ce38SLuigi Rizzo uint64_t *dst = _dst; 686f196ce38SLuigi Rizzo if (unlikely(l >= 1024)) { 687*ce3ee1e7SLuigi Rizzo memcpy(dst, src, l); 688f196ce38SLuigi Rizzo return; 689f196ce38SLuigi Rizzo } 690f196ce38SLuigi Rizzo for (; likely(l > 0); l-=64) { 691f196ce38SLuigi Rizzo *dst++ = *src++; 692f196ce38SLuigi Rizzo *dst++ = *src++; 693f196ce38SLuigi Rizzo *dst++ = *src++; 694f196ce38SLuigi Rizzo *dst++ = *src++; 695f196ce38SLuigi Rizzo *dst++ = *src++; 696f196ce38SLuigi Rizzo *dst++ = *src++; 697f196ce38SLuigi Rizzo *dst++ = *src++; 698f196ce38SLuigi Rizzo *dst++ = *src++; 699f196ce38SLuigi Rizzo } 700f196ce38SLuigi Rizzo } 701f196ce38SLuigi Rizzo 702f18be576SLuigi Rizzo 703*ce3ee1e7SLuigi Rizzo #ifdef TEST_STUFF 704*ce3ee1e7SLuigi Rizzo struct xxx { 705*ce3ee1e7SLuigi Rizzo char *name; 706*ce3ee1e7SLuigi Rizzo void (*fn)(uint32_t); 707*ce3ee1e7SLuigi Rizzo }; 708*ce3ee1e7SLuigi Rizzo 709*ce3ee1e7SLuigi Rizzo 710*ce3ee1e7SLuigi Rizzo static void 711*ce3ee1e7SLuigi Rizzo nm_test_defmtx(uint32_t n) 712*ce3ee1e7SLuigi Rizzo { 713*ce3ee1e7SLuigi Rizzo uint32_t i; 714*ce3ee1e7SLuigi Rizzo struct mtx m; 715*ce3ee1e7SLuigi Rizzo mtx_init(&m, "test", NULL, MTX_DEF); 716*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); } 717*ce3ee1e7SLuigi Rizzo mtx_destroy(&m); 718*ce3ee1e7SLuigi Rizzo return; 719*ce3ee1e7SLuigi Rizzo } 720*ce3ee1e7SLuigi Rizzo 721*ce3ee1e7SLuigi Rizzo static void 722*ce3ee1e7SLuigi Rizzo nm_test_spinmtx(uint32_t n) 723*ce3ee1e7SLuigi Rizzo { 724*ce3ee1e7SLuigi Rizzo uint32_t i; 725*ce3ee1e7SLuigi Rizzo struct mtx m; 726*ce3ee1e7SLuigi Rizzo mtx_init(&m, "test", NULL, MTX_SPIN); 727*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { mtx_lock(&m); mtx_unlock(&m); } 728*ce3ee1e7SLuigi Rizzo mtx_destroy(&m); 729*ce3ee1e7SLuigi Rizzo return; 730*ce3ee1e7SLuigi Rizzo } 731*ce3ee1e7SLuigi Rizzo 732*ce3ee1e7SLuigi Rizzo static void 733*ce3ee1e7SLuigi Rizzo nm_test_rlock(uint32_t n) 734*ce3ee1e7SLuigi Rizzo { 735*ce3ee1e7SLuigi Rizzo uint32_t i; 736*ce3ee1e7SLuigi Rizzo struct rwlock m; 737*ce3ee1e7SLuigi Rizzo rw_init(&m, "test"); 738*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { rw_rlock(&m); rw_runlock(&m); } 739*ce3ee1e7SLuigi Rizzo rw_destroy(&m); 740*ce3ee1e7SLuigi Rizzo return; 741*ce3ee1e7SLuigi Rizzo } 742*ce3ee1e7SLuigi Rizzo 743*ce3ee1e7SLuigi Rizzo static void 744*ce3ee1e7SLuigi Rizzo nm_test_wlock(uint32_t n) 745*ce3ee1e7SLuigi Rizzo { 746*ce3ee1e7SLuigi Rizzo uint32_t i; 747*ce3ee1e7SLuigi Rizzo struct rwlock m; 748*ce3ee1e7SLuigi Rizzo rw_init(&m, "test"); 749*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { rw_wlock(&m); rw_wunlock(&m); } 750*ce3ee1e7SLuigi Rizzo rw_destroy(&m); 751*ce3ee1e7SLuigi Rizzo return; 752*ce3ee1e7SLuigi Rizzo } 753*ce3ee1e7SLuigi Rizzo 754*ce3ee1e7SLuigi Rizzo static void 755*ce3ee1e7SLuigi Rizzo nm_test_slock(uint32_t n) 756*ce3ee1e7SLuigi Rizzo { 757*ce3ee1e7SLuigi Rizzo uint32_t i; 758*ce3ee1e7SLuigi Rizzo struct sx m; 759*ce3ee1e7SLuigi Rizzo sx_init(&m, "test"); 760*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { sx_slock(&m); sx_sunlock(&m); } 761*ce3ee1e7SLuigi Rizzo sx_destroy(&m); 762*ce3ee1e7SLuigi Rizzo return; 763*ce3ee1e7SLuigi Rizzo } 764*ce3ee1e7SLuigi Rizzo 765*ce3ee1e7SLuigi Rizzo static void 766*ce3ee1e7SLuigi Rizzo nm_test_xlock(uint32_t n) 767*ce3ee1e7SLuigi Rizzo { 768*ce3ee1e7SLuigi Rizzo uint32_t i; 769*ce3ee1e7SLuigi Rizzo struct sx m; 770*ce3ee1e7SLuigi Rizzo sx_init(&m, "test"); 771*ce3ee1e7SLuigi Rizzo for (i = 0; i < n; i++) { sx_xlock(&m); sx_xunlock(&m); } 772*ce3ee1e7SLuigi Rizzo sx_destroy(&m); 773*ce3ee1e7SLuigi Rizzo return; 774*ce3ee1e7SLuigi Rizzo } 775*ce3ee1e7SLuigi Rizzo 776*ce3ee1e7SLuigi Rizzo 777*ce3ee1e7SLuigi Rizzo struct xxx nm_tests[] = { 778*ce3ee1e7SLuigi Rizzo { "defmtx", nm_test_defmtx }, 779*ce3ee1e7SLuigi Rizzo { "spinmtx", nm_test_spinmtx }, 780*ce3ee1e7SLuigi Rizzo { "rlock", nm_test_rlock }, 781*ce3ee1e7SLuigi Rizzo { "wlock", nm_test_wlock }, 782*ce3ee1e7SLuigi Rizzo { "slock", nm_test_slock }, 783*ce3ee1e7SLuigi Rizzo { "xlock", nm_test_xlock }, 784*ce3ee1e7SLuigi Rizzo }; 785*ce3ee1e7SLuigi Rizzo 786*ce3ee1e7SLuigi Rizzo static int 787*ce3ee1e7SLuigi Rizzo nm_test(struct nmreq *nmr) 788*ce3ee1e7SLuigi Rizzo { 789*ce3ee1e7SLuigi Rizzo uint32_t scale, n, test; 790*ce3ee1e7SLuigi Rizzo static int old_test = -1; 791*ce3ee1e7SLuigi Rizzo 792*ce3ee1e7SLuigi Rizzo test = nmr->nr_cmd; 793*ce3ee1e7SLuigi Rizzo scale = nmr->nr_offset; 794*ce3ee1e7SLuigi Rizzo n = sizeof(nm_tests) / sizeof(struct xxx) - 1; 795*ce3ee1e7SLuigi Rizzo if (test > n) { 796*ce3ee1e7SLuigi Rizzo D("test index too high, max %d", n); 797*ce3ee1e7SLuigi Rizzo return 0; 798*ce3ee1e7SLuigi Rizzo } 799*ce3ee1e7SLuigi Rizzo 800*ce3ee1e7SLuigi Rizzo if (old_test != test) { 801*ce3ee1e7SLuigi Rizzo D("test %s scale %d", nm_tests[test].name, scale); 802*ce3ee1e7SLuigi Rizzo old_test = test; 803*ce3ee1e7SLuigi Rizzo } 804*ce3ee1e7SLuigi Rizzo nm_tests[test].fn(scale); 805*ce3ee1e7SLuigi Rizzo return 0; 806*ce3ee1e7SLuigi Rizzo } 807*ce3ee1e7SLuigi Rizzo #endif /* TEST_STUFF */ 808*ce3ee1e7SLuigi Rizzo 809f196ce38SLuigi Rizzo /* 810f196ce38SLuigi Rizzo * locate a bridge among the existing ones. 811*ce3ee1e7SLuigi Rizzo * MUST BE CALLED WITH NMG_LOCK() 812*ce3ee1e7SLuigi Rizzo * 813f196ce38SLuigi Rizzo * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 814f196ce38SLuigi Rizzo * We assume that this is called with a name of at least NM_NAME chars. 815f196ce38SLuigi Rizzo */ 816f196ce38SLuigi Rizzo static struct nm_bridge * 817f18be576SLuigi Rizzo nm_find_bridge(const char *name, int create) 818f196ce38SLuigi Rizzo { 819f18be576SLuigi Rizzo int i, l, namelen; 820f196ce38SLuigi Rizzo struct nm_bridge *b = NULL; 821f196ce38SLuigi Rizzo 822*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 823*ce3ee1e7SLuigi Rizzo 824f196ce38SLuigi Rizzo namelen = strlen(NM_NAME); /* base length */ 825*ce3ee1e7SLuigi Rizzo l = name ? strlen(name) : 0; /* actual length */ 826*ce3ee1e7SLuigi Rizzo if (l < namelen) { 827*ce3ee1e7SLuigi Rizzo D("invalid bridge name %s", name ? name : NULL); 828*ce3ee1e7SLuigi Rizzo return NULL; 829*ce3ee1e7SLuigi Rizzo } 830f196ce38SLuigi Rizzo for (i = namelen + 1; i < l; i++) { 831f196ce38SLuigi Rizzo if (name[i] == ':') { 832f196ce38SLuigi Rizzo namelen = i; 833f196ce38SLuigi Rizzo break; 834f196ce38SLuigi Rizzo } 835f196ce38SLuigi Rizzo } 836f196ce38SLuigi Rizzo if (namelen >= IFNAMSIZ) 837f196ce38SLuigi Rizzo namelen = IFNAMSIZ; 838f196ce38SLuigi Rizzo ND("--- prefix is '%.*s' ---", namelen, name); 839f196ce38SLuigi Rizzo 840f18be576SLuigi Rizzo /* lookup the name, remember empty slot if there is one */ 841f18be576SLuigi Rizzo for (i = 0; i < NM_BRIDGES; i++) { 842f18be576SLuigi Rizzo struct nm_bridge *x = nm_bridges + i; 843f18be576SLuigi Rizzo 844*ce3ee1e7SLuigi Rizzo if (x->bdg_active_ports == 0) { 845f18be576SLuigi Rizzo if (create && b == NULL) 846f18be576SLuigi Rizzo b = x; /* record empty slot */ 847*ce3ee1e7SLuigi Rizzo } else if (x->bdg_namelen != namelen) { 848f18be576SLuigi Rizzo continue; 849*ce3ee1e7SLuigi Rizzo } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 850f196ce38SLuigi Rizzo ND("found '%.*s' at %d", namelen, name, i); 851f18be576SLuigi Rizzo b = x; 852f196ce38SLuigi Rizzo break; 853f196ce38SLuigi Rizzo } 854f196ce38SLuigi Rizzo } 855f18be576SLuigi Rizzo if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 856*ce3ee1e7SLuigi Rizzo /* initialize the bridge */ 857*ce3ee1e7SLuigi Rizzo strncpy(b->bdg_basename, name, namelen); 858*ce3ee1e7SLuigi Rizzo ND("create new bridge %s with ports %d", b->bdg_basename, 859*ce3ee1e7SLuigi Rizzo b->bdg_active_ports); 860*ce3ee1e7SLuigi Rizzo b->bdg_namelen = namelen; 861*ce3ee1e7SLuigi Rizzo b->bdg_active_ports = 0; 862*ce3ee1e7SLuigi Rizzo for (i = 0; i < NM_BDG_MAXPORTS; i++) 863*ce3ee1e7SLuigi Rizzo b->bdg_port_index[i] = i; 864f18be576SLuigi Rizzo /* set the default function */ 865f18be576SLuigi Rizzo b->nm_bdg_lookup = netmap_bdg_learning; 866f18be576SLuigi Rizzo /* reset the MAC address table */ 867f18be576SLuigi Rizzo bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 868f196ce38SLuigi Rizzo } 869f196ce38SLuigi Rizzo return b; 870f196ce38SLuigi Rizzo } 871f18be576SLuigi Rizzo 872f18be576SLuigi Rizzo 873f18be576SLuigi Rizzo /* 874f18be576SLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 875f18be576SLuigi Rizzo */ 876f18be576SLuigi Rizzo static void 877f18be576SLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 878f18be576SLuigi Rizzo { 879f18be576SLuigi Rizzo int nrings, i; 880f18be576SLuigi Rizzo struct netmap_kring *kring; 881f18be576SLuigi Rizzo 882*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 883f18be576SLuigi Rizzo nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings; 884f18be576SLuigi Rizzo kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings; 885f18be576SLuigi Rizzo for (i = 0; i < nrings; i++) { 886f18be576SLuigi Rizzo if (kring[i].nkr_ft) { 887f18be576SLuigi Rizzo free(kring[i].nkr_ft, M_DEVBUF); 888f18be576SLuigi Rizzo kring[i].nkr_ft = NULL; /* protect from freeing twice */ 889f18be576SLuigi Rizzo } 890f18be576SLuigi Rizzo } 891f18be576SLuigi Rizzo if (nma_is_hw(na)) 892f18be576SLuigi Rizzo nm_free_bdgfwd(SWNA(na->ifp)); 893f18be576SLuigi Rizzo } 894f18be576SLuigi Rizzo 895f18be576SLuigi Rizzo 896f18be576SLuigi Rizzo /* 897f18be576SLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 898f18be576SLuigi Rizzo */ 899f18be576SLuigi Rizzo static int 900f18be576SLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 901f18be576SLuigi Rizzo { 902f18be576SLuigi Rizzo int nrings, l, i, num_dstq; 903f18be576SLuigi Rizzo struct netmap_kring *kring; 904f18be576SLuigi Rizzo 905*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 906f18be576SLuigi Rizzo /* all port:rings + broadcast */ 907f18be576SLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 908*ce3ee1e7SLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 909f18be576SLuigi Rizzo l += sizeof(struct nm_bdg_q) * num_dstq; 910*ce3ee1e7SLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 911f18be576SLuigi Rizzo 912f18be576SLuigi Rizzo nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings; 913f18be576SLuigi Rizzo kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings; 914f18be576SLuigi Rizzo for (i = 0; i < nrings; i++) { 915f18be576SLuigi Rizzo struct nm_bdg_fwd *ft; 916f18be576SLuigi Rizzo struct nm_bdg_q *dstq; 917f18be576SLuigi Rizzo int j; 918f18be576SLuigi Rizzo 919f18be576SLuigi Rizzo ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 920f18be576SLuigi Rizzo if (!ft) { 921f18be576SLuigi Rizzo nm_free_bdgfwd(na); 922f18be576SLuigi Rizzo return ENOMEM; 923f18be576SLuigi Rizzo } 924*ce3ee1e7SLuigi Rizzo dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 925*ce3ee1e7SLuigi Rizzo for (j = 0; j < num_dstq; j++) { 926*ce3ee1e7SLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 927*ce3ee1e7SLuigi Rizzo dstq[j].bq_len = 0; 928*ce3ee1e7SLuigi Rizzo } 929f18be576SLuigi Rizzo kring[i].nkr_ft = ft; 930f18be576SLuigi Rizzo } 931f18be576SLuigi Rizzo if (nma_is_hw(na)) 932f18be576SLuigi Rizzo nm_alloc_bdgfwd(SWNA(na->ifp)); 933f18be576SLuigi Rizzo return 0; 934f18be576SLuigi Rizzo } 935f18be576SLuigi Rizzo 936ae10d1afSLuigi Rizzo 937ae10d1afSLuigi Rizzo /* 938ae10d1afSLuigi Rizzo * Fetch configuration from the device, to cope with dynamic 939ae10d1afSLuigi Rizzo * reconfigurations after loading the module. 940ae10d1afSLuigi Rizzo */ 941ae10d1afSLuigi Rizzo static int 942ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na) 943ae10d1afSLuigi Rizzo { 944ae10d1afSLuigi Rizzo struct ifnet *ifp = na->ifp; 945ae10d1afSLuigi Rizzo u_int txr, txd, rxr, rxd; 946ae10d1afSLuigi Rizzo 947ae10d1afSLuigi Rizzo txr = txd = rxr = rxd = 0; 948ae10d1afSLuigi Rizzo if (na->nm_config) { 949ae10d1afSLuigi Rizzo na->nm_config(ifp, &txr, &txd, &rxr, &rxd); 950ae10d1afSLuigi Rizzo } else { 951ae10d1afSLuigi Rizzo /* take whatever we had at init time */ 952ae10d1afSLuigi Rizzo txr = na->num_tx_rings; 953ae10d1afSLuigi Rizzo txd = na->num_tx_desc; 954ae10d1afSLuigi Rizzo rxr = na->num_rx_rings; 955ae10d1afSLuigi Rizzo rxd = na->num_rx_desc; 956ae10d1afSLuigi Rizzo } 957ae10d1afSLuigi Rizzo 958ae10d1afSLuigi Rizzo if (na->num_tx_rings == txr && na->num_tx_desc == txd && 959ae10d1afSLuigi Rizzo na->num_rx_rings == rxr && na->num_rx_desc == rxd) 960ae10d1afSLuigi Rizzo return 0; /* nothing changed */ 961ae10d1afSLuigi Rizzo if (netmap_verbose || na->refcount > 0) { 962ae10d1afSLuigi Rizzo D("stored config %s: txring %d x %d, rxring %d x %d", 963ae10d1afSLuigi Rizzo ifp->if_xname, 964ae10d1afSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 965ae10d1afSLuigi Rizzo na->num_rx_rings, na->num_rx_desc); 966ae10d1afSLuigi Rizzo D("new config %s: txring %d x %d, rxring %d x %d", 967ae10d1afSLuigi Rizzo ifp->if_xname, txr, txd, rxr, rxd); 968ae10d1afSLuigi Rizzo } 969ae10d1afSLuigi Rizzo if (na->refcount == 0) { 970ae10d1afSLuigi Rizzo D("configuration changed (but fine)"); 971ae10d1afSLuigi Rizzo na->num_tx_rings = txr; 972ae10d1afSLuigi Rizzo na->num_tx_desc = txd; 973ae10d1afSLuigi Rizzo na->num_rx_rings = rxr; 974ae10d1afSLuigi Rizzo na->num_rx_desc = rxd; 975ae10d1afSLuigi Rizzo return 0; 976ae10d1afSLuigi Rizzo } 977ae10d1afSLuigi Rizzo D("configuration changed while active, this is bad..."); 978ae10d1afSLuigi Rizzo return 1; 979ae10d1afSLuigi Rizzo } 980ae10d1afSLuigi Rizzo 981*ce3ee1e7SLuigi Rizzo static struct netmap_if * 982*ce3ee1e7SLuigi Rizzo netmap_if_new(const char *ifname, struct netmap_adapter *na) 983*ce3ee1e7SLuigi Rizzo { 984*ce3ee1e7SLuigi Rizzo if (netmap_update_config(na)) { 985*ce3ee1e7SLuigi Rizzo /* configuration mismatch, report and fail */ 986*ce3ee1e7SLuigi Rizzo return NULL; 987*ce3ee1e7SLuigi Rizzo } 988*ce3ee1e7SLuigi Rizzo return netmap_mem_if_new(ifname, na); 989*ce3ee1e7SLuigi Rizzo } 99068b8534bSLuigi Rizzo 9918241616dSLuigi Rizzo 9928241616dSLuigi Rizzo /* Structure associated to each thread which registered an interface. 9938241616dSLuigi Rizzo * 9948241616dSLuigi Rizzo * The first 4 fields of this structure are written by NIOCREGIF and 9958241616dSLuigi Rizzo * read by poll() and NIOC?XSYNC. 9968241616dSLuigi Rizzo * There is low contention among writers (actually, a correct user program 9978241616dSLuigi Rizzo * should have no contention among writers) and among writers and readers, 9988241616dSLuigi Rizzo * so we use a single global lock to protect the structure initialization. 9998241616dSLuigi Rizzo * Since initialization involves the allocation of memory, we reuse the memory 10008241616dSLuigi Rizzo * allocator lock. 10018241616dSLuigi Rizzo * Read access to the structure is lock free. Readers must check that 10028241616dSLuigi Rizzo * np_nifp is not NULL before using the other fields. 10038241616dSLuigi Rizzo * If np_nifp is NULL initialization has not been performed, so they should 10048241616dSLuigi Rizzo * return an error to userlevel. 10058241616dSLuigi Rizzo * 10068241616dSLuigi Rizzo * The ref_done field is used to regulate access to the refcount in the 10078241616dSLuigi Rizzo * memory allocator. The refcount must be incremented at most once for 10088241616dSLuigi Rizzo * each open("/dev/netmap"). The increment is performed by the first 10098241616dSLuigi Rizzo * function that calls netmap_get_memory() (currently called by 10108241616dSLuigi Rizzo * mmap(), NIOCGINFO and NIOCREGIF). 10118241616dSLuigi Rizzo * If the refcount is incremented, it is then decremented when the 10128241616dSLuigi Rizzo * private structure is destroyed. 10138241616dSLuigi Rizzo */ 101468b8534bSLuigi Rizzo struct netmap_priv_d { 1015*ce3ee1e7SLuigi Rizzo struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ 101668b8534bSLuigi Rizzo 1017*ce3ee1e7SLuigi Rizzo struct ifnet *np_ifp; /* device for which we hold a ref. */ 101868b8534bSLuigi Rizzo int np_ringid; /* from the ioctl */ 101968b8534bSLuigi Rizzo u_int np_qfirst, np_qlast; /* range of rings to scan */ 102068b8534bSLuigi Rizzo uint16_t np_txpoll; 10218241616dSLuigi Rizzo 1022*ce3ee1e7SLuigi Rizzo struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ 1023*ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__ 1024*ce3ee1e7SLuigi Rizzo int np_refcount; /* use with NMG_LOCK held */ 1025*ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */ 102668b8534bSLuigi Rizzo }; 102768b8534bSLuigi Rizzo 1028*ce3ee1e7SLuigi Rizzo /* grab a reference to the memory allocator, if we don't have one already. The 1029*ce3ee1e7SLuigi Rizzo * reference is taken from the netmap_adapter registered with the priv. 1030*ce3ee1e7SLuigi Rizzo * 1031*ce3ee1e7SLuigi Rizzo */ 1032*ce3ee1e7SLuigi Rizzo static int 1033*ce3ee1e7SLuigi Rizzo netmap_get_memory_locked(struct netmap_priv_d* p) 1034*ce3ee1e7SLuigi Rizzo { 1035*ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd; 1036*ce3ee1e7SLuigi Rizzo int error = 0; 1037*ce3ee1e7SLuigi Rizzo 1038*ce3ee1e7SLuigi Rizzo if (p->np_ifp == NULL) { 1039*ce3ee1e7SLuigi Rizzo if (!netmap_mmap_unreg) 1040*ce3ee1e7SLuigi Rizzo return ENODEV; 1041*ce3ee1e7SLuigi Rizzo /* for compatibility with older versions of the API 1042*ce3ee1e7SLuigi Rizzo * we use the global allocator when no interface has been 1043*ce3ee1e7SLuigi Rizzo * registered 1044*ce3ee1e7SLuigi Rizzo */ 1045*ce3ee1e7SLuigi Rizzo nmd = &nm_mem; 1046*ce3ee1e7SLuigi Rizzo } else { 1047*ce3ee1e7SLuigi Rizzo nmd = NA(p->np_ifp)->nm_mem; 1048*ce3ee1e7SLuigi Rizzo } 1049*ce3ee1e7SLuigi Rizzo if (p->np_mref == NULL) { 1050*ce3ee1e7SLuigi Rizzo error = netmap_mem_finalize(nmd); 1051*ce3ee1e7SLuigi Rizzo if (!error) 1052*ce3ee1e7SLuigi Rizzo p->np_mref = nmd; 1053*ce3ee1e7SLuigi Rizzo } else if (p->np_mref != nmd) { 1054*ce3ee1e7SLuigi Rizzo /* a virtual port has been registered, but previous 1055*ce3ee1e7SLuigi Rizzo * syscalls already used the global allocator. 1056*ce3ee1e7SLuigi Rizzo * We cannot continue 1057*ce3ee1e7SLuigi Rizzo */ 1058*ce3ee1e7SLuigi Rizzo error = ENODEV; 1059*ce3ee1e7SLuigi Rizzo } 1060*ce3ee1e7SLuigi Rizzo return error; 1061*ce3ee1e7SLuigi Rizzo } 106268b8534bSLuigi Rizzo 10638241616dSLuigi Rizzo static int 10648241616dSLuigi Rizzo netmap_get_memory(struct netmap_priv_d* p) 10658241616dSLuigi Rizzo { 1066*ce3ee1e7SLuigi Rizzo int error; 1067*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 1068*ce3ee1e7SLuigi Rizzo error = netmap_get_memory_locked(p); 1069*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 10708241616dSLuigi Rizzo return error; 10718241616dSLuigi Rizzo } 10728241616dSLuigi Rizzo 1073*ce3ee1e7SLuigi Rizzo static int 1074*ce3ee1e7SLuigi Rizzo netmap_have_memory_locked(struct netmap_priv_d* p) 1075*ce3ee1e7SLuigi Rizzo { 1076*ce3ee1e7SLuigi Rizzo return p->np_mref != NULL; 1077*ce3ee1e7SLuigi Rizzo } 1078*ce3ee1e7SLuigi Rizzo 1079*ce3ee1e7SLuigi Rizzo static void 1080*ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(struct netmap_priv_d* p) 1081*ce3ee1e7SLuigi Rizzo { 1082*ce3ee1e7SLuigi Rizzo if (p->np_mref) { 1083*ce3ee1e7SLuigi Rizzo netmap_mem_deref(p->np_mref); 1084*ce3ee1e7SLuigi Rizzo p->np_mref = NULL; 1085*ce3ee1e7SLuigi Rizzo } 1086*ce3ee1e7SLuigi Rizzo } 1087*ce3ee1e7SLuigi Rizzo 108868b8534bSLuigi Rizzo /* 108968b8534bSLuigi Rizzo * File descriptor's private data destructor. 109068b8534bSLuigi Rizzo * 109168b8534bSLuigi Rizzo * Call nm_register(ifp,0) to stop netmap mode on the interface and 109268b8534bSLuigi Rizzo * revert to normal operation. We expect that np_ifp has not gone. 1093*ce3ee1e7SLuigi Rizzo * The second argument is the nifp to work on. In some cases it is 1094*ce3ee1e7SLuigi Rizzo * not attached yet to the netmap_priv_d so we need to pass it as 1095*ce3ee1e7SLuigi Rizzo * a separate argument. 109668b8534bSLuigi Rizzo */ 1097*ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */ 109868b8534bSLuigi Rizzo static void 1099*ce3ee1e7SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) 110068b8534bSLuigi Rizzo { 110168b8534bSLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 110268b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 110368b8534bSLuigi Rizzo 1104*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 110568b8534bSLuigi Rizzo na->refcount--; 110668b8534bSLuigi Rizzo if (na->refcount <= 0) { /* last instance */ 1107*ce3ee1e7SLuigi Rizzo u_int i; 110868b8534bSLuigi Rizzo 1109ae10d1afSLuigi Rizzo if (netmap_verbose) 1110ae10d1afSLuigi Rizzo D("deleting last instance for %s", ifp->if_xname); 111168b8534bSLuigi Rizzo /* 1112f18be576SLuigi Rizzo * (TO CHECK) This function is only called 1113f18be576SLuigi Rizzo * when the last reference to this file descriptor goes 1114f18be576SLuigi Rizzo * away. This means we cannot have any pending poll() 1115f18be576SLuigi Rizzo * or interrupt routine operating on the structure. 1116*ce3ee1e7SLuigi Rizzo * XXX The file may be closed in a thread while 1117*ce3ee1e7SLuigi Rizzo * another thread is using it. 1118*ce3ee1e7SLuigi Rizzo * Linux keeps the file opened until the last reference 1119*ce3ee1e7SLuigi Rizzo * by any outstanding ioctl/poll or mmap is gone. 1120*ce3ee1e7SLuigi Rizzo * FreeBSD does not track mmap()s (but we do) and 1121*ce3ee1e7SLuigi Rizzo * wakes up any sleeping poll(). Need to check what 1122*ce3ee1e7SLuigi Rizzo * happens if the close() occurs while a concurrent 1123*ce3ee1e7SLuigi Rizzo * syscall is running. 112468b8534bSLuigi Rizzo */ 112568b8534bSLuigi Rizzo na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */ 112668b8534bSLuigi Rizzo /* Wake up any sleeping threads. netmap_poll will 112768b8534bSLuigi Rizzo * then return POLLERR 1128*ce3ee1e7SLuigi Rizzo * XXX The wake up now must happen during *_down(), when 1129*ce3ee1e7SLuigi Rizzo * we order all activities to stop. -gl 113068b8534bSLuigi Rizzo */ 1131f18be576SLuigi Rizzo nm_free_bdgfwd(na); 1132d76bf4ffSLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 11332f70fca5SEd Maste mtx_destroy(&na->tx_rings[i].q_lock); 113464ae02c3SLuigi Rizzo } 1135d76bf4ffSLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 11362f70fca5SEd Maste mtx_destroy(&na->rx_rings[i].q_lock); 113768b8534bSLuigi Rizzo } 11382f70fca5SEd Maste /* XXX kqueue(9) needed; these will mirror knlist_init. */ 11392f70fca5SEd Maste /* knlist_destroy(&na->tx_si.si_note); */ 11402f70fca5SEd Maste /* knlist_destroy(&na->rx_si.si_note); */ 1141f18be576SLuigi Rizzo if (nma_is_hw(na)) 1142f18be576SLuigi Rizzo SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL; 114368b8534bSLuigi Rizzo } 1144*ce3ee1e7SLuigi Rizzo /* 1145*ce3ee1e7SLuigi Rizzo * netmap_mem_if_delete() deletes the nifp, and if this is 1146*ce3ee1e7SLuigi Rizzo * the last instance also buffers, rings and krings. 1147*ce3ee1e7SLuigi Rizzo */ 1148*ce3ee1e7SLuigi Rizzo netmap_mem_if_delete(na, nifp); 11495819da83SLuigi Rizzo } 115068b8534bSLuigi Rizzo 1151f18be576SLuigi Rizzo 1152*ce3ee1e7SLuigi Rizzo /* we assume netmap adapter exists 1153*ce3ee1e7SLuigi Rizzo * Called with NMG_LOCK held 1154*ce3ee1e7SLuigi Rizzo */ 1155f196ce38SLuigi Rizzo static void 1156f196ce38SLuigi Rizzo nm_if_rele(struct ifnet *ifp) 1157f196ce38SLuigi Rizzo { 1158*ce3ee1e7SLuigi Rizzo int i, is_hw, hw, sw, lim; 1159f196ce38SLuigi Rizzo struct nm_bridge *b; 1160f18be576SLuigi Rizzo struct netmap_adapter *na; 1161*ce3ee1e7SLuigi Rizzo uint8_t tmp[NM_BDG_MAXPORTS]; 1162f196ce38SLuigi Rizzo 1163*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 1164f18be576SLuigi Rizzo /* I can be called not only for get_ifp()-ed references where netmap's 1165f18be576SLuigi Rizzo * capability is guaranteed, but also for non-netmap-capable NICs. 1166f18be576SLuigi Rizzo */ 1167f18be576SLuigi Rizzo if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) { 1168f196ce38SLuigi Rizzo if_rele(ifp); 1169f196ce38SLuigi Rizzo return; 1170f196ce38SLuigi Rizzo } 1171f18be576SLuigi Rizzo na = NA(ifp); 1172f18be576SLuigi Rizzo b = na->na_bdg; 1173f18be576SLuigi Rizzo is_hw = nma_is_hw(na); 1174f18be576SLuigi Rizzo 1175*ce3ee1e7SLuigi Rizzo ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount); 1176f18be576SLuigi Rizzo 1177*ce3ee1e7SLuigi Rizzo if (!DROP_BDG_REF(ifp)) 1178*ce3ee1e7SLuigi Rizzo return; 1179*ce3ee1e7SLuigi Rizzo 1180*ce3ee1e7SLuigi Rizzo /* 1181*ce3ee1e7SLuigi Rizzo New algorithm: 1182*ce3ee1e7SLuigi Rizzo make a copy of bdg_port_index; 1183*ce3ee1e7SLuigi Rizzo lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 1184*ce3ee1e7SLuigi Rizzo in the array of bdg_port_index, replacing them with 1185*ce3ee1e7SLuigi Rizzo entries from the bottom of the array; 1186*ce3ee1e7SLuigi Rizzo decrement bdg_active_ports; 1187*ce3ee1e7SLuigi Rizzo acquire BDG_WLOCK() and copy back the array. 1188*ce3ee1e7SLuigi Rizzo */ 1189*ce3ee1e7SLuigi Rizzo 1190*ce3ee1e7SLuigi Rizzo hw = NA(ifp)->bdg_port; 1191*ce3ee1e7SLuigi Rizzo sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1; 1192*ce3ee1e7SLuigi Rizzo lim = b->bdg_active_ports; 1193*ce3ee1e7SLuigi Rizzo 1194*ce3ee1e7SLuigi Rizzo ND("detach %d and %d (lim %d)", hw, sw, lim); 1195*ce3ee1e7SLuigi Rizzo /* make a copy of the list of active ports, update it, 1196*ce3ee1e7SLuigi Rizzo * and then copy back within BDG_WLOCK(). 1197*ce3ee1e7SLuigi Rizzo */ 1198*ce3ee1e7SLuigi Rizzo memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 1199*ce3ee1e7SLuigi Rizzo for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 1200*ce3ee1e7SLuigi Rizzo if (hw >= 0 && tmp[i] == hw) { 1201*ce3ee1e7SLuigi Rizzo ND("detach hw %d at %d", hw, i); 1202*ce3ee1e7SLuigi Rizzo lim--; /* point to last active port */ 1203*ce3ee1e7SLuigi Rizzo tmp[i] = tmp[lim]; /* swap with i */ 1204*ce3ee1e7SLuigi Rizzo tmp[lim] = hw; /* now this is inactive */ 1205*ce3ee1e7SLuigi Rizzo hw = -1; 1206*ce3ee1e7SLuigi Rizzo } else if (sw >= 0 && tmp[i] == sw) { 1207*ce3ee1e7SLuigi Rizzo ND("detach sw %d at %d", sw, i); 1208*ce3ee1e7SLuigi Rizzo lim--; 1209*ce3ee1e7SLuigi Rizzo tmp[i] = tmp[lim]; 1210*ce3ee1e7SLuigi Rizzo tmp[lim] = sw; 1211*ce3ee1e7SLuigi Rizzo sw = -1; 1212*ce3ee1e7SLuigi Rizzo } else { 1213*ce3ee1e7SLuigi Rizzo i++; 1214*ce3ee1e7SLuigi Rizzo } 1215*ce3ee1e7SLuigi Rizzo } 1216*ce3ee1e7SLuigi Rizzo if (hw >= 0 || sw >= 0) { 1217*ce3ee1e7SLuigi Rizzo D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 1218*ce3ee1e7SLuigi Rizzo } 1219*ce3ee1e7SLuigi Rizzo hw = NA(ifp)->bdg_port; 1220*ce3ee1e7SLuigi Rizzo sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1; 1221*ce3ee1e7SLuigi Rizzo 1222*ce3ee1e7SLuigi Rizzo BDG_WLOCK(b); 1223*ce3ee1e7SLuigi Rizzo b->bdg_ports[hw] = NULL; 1224f18be576SLuigi Rizzo na->na_bdg = NULL; 1225*ce3ee1e7SLuigi Rizzo if (sw >= 0) { 1226*ce3ee1e7SLuigi Rizzo b->bdg_ports[sw] = NULL; 1227f18be576SLuigi Rizzo SWNA(ifp)->na_bdg = NULL; 1228f196ce38SLuigi Rizzo } 1229*ce3ee1e7SLuigi Rizzo memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 1230*ce3ee1e7SLuigi Rizzo b->bdg_active_ports = lim; 1231f18be576SLuigi Rizzo BDG_WUNLOCK(b); 1232*ce3ee1e7SLuigi Rizzo 1233*ce3ee1e7SLuigi Rizzo ND("now %d active ports", lim); 1234*ce3ee1e7SLuigi Rizzo if (lim == 0) { 1235*ce3ee1e7SLuigi Rizzo ND("marking bridge %s as free", b->bdg_basename); 1236f18be576SLuigi Rizzo b->nm_bdg_lookup = NULL; 1237f18be576SLuigi Rizzo } 1238*ce3ee1e7SLuigi Rizzo 1239*ce3ee1e7SLuigi Rizzo if (is_hw) { 1240f18be576SLuigi Rizzo if_rele(ifp); 1241f18be576SLuigi Rizzo } else { 1242*ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_MEM_OWNER) 1243*ce3ee1e7SLuigi Rizzo netmap_mem_private_delete(na->nm_mem); 1244f18be576SLuigi Rizzo bzero(na, sizeof(*na)); 1245f18be576SLuigi Rizzo free(na, M_DEVBUF); 1246f18be576SLuigi Rizzo bzero(ifp, sizeof(*ifp)); 1247f18be576SLuigi Rizzo free(ifp, M_DEVBUF); 1248f18be576SLuigi Rizzo } 1249*ce3ee1e7SLuigi Rizzo } 1250*ce3ee1e7SLuigi Rizzo 1251*ce3ee1e7SLuigi Rizzo 1252*ce3ee1e7SLuigi Rizzo /* 1253*ce3ee1e7SLuigi Rizzo * returns 1 if this is the last instance and we can free priv 1254*ce3ee1e7SLuigi Rizzo */ 1255*ce3ee1e7SLuigi Rizzo static int 1256*ce3ee1e7SLuigi Rizzo netmap_dtor_locked(struct netmap_priv_d *priv) 1257*ce3ee1e7SLuigi Rizzo { 1258*ce3ee1e7SLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 1259*ce3ee1e7SLuigi Rizzo 1260*ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__ 1261*ce3ee1e7SLuigi Rizzo /* 1262*ce3ee1e7SLuigi Rizzo * np_refcount is the number of active mmaps on 1263*ce3ee1e7SLuigi Rizzo * this file descriptor 1264*ce3ee1e7SLuigi Rizzo */ 1265*ce3ee1e7SLuigi Rizzo if (--priv->np_refcount > 0) { 1266*ce3ee1e7SLuigi Rizzo return 0; 1267*ce3ee1e7SLuigi Rizzo } 1268*ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */ 1269*ce3ee1e7SLuigi Rizzo if (ifp) { 1270*ce3ee1e7SLuigi Rizzo netmap_do_unregif(priv, priv->np_nifp); 1271*ce3ee1e7SLuigi Rizzo } 1272*ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(priv); 1273*ce3ee1e7SLuigi Rizzo if (ifp) { 1274*ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); /* might also destroy *na */ 1275*ce3ee1e7SLuigi Rizzo } 1276*ce3ee1e7SLuigi Rizzo return 1; 1277f196ce38SLuigi Rizzo } 12785819da83SLuigi Rizzo 12795819da83SLuigi Rizzo static void 12805819da83SLuigi Rizzo netmap_dtor(void *data) 12815819da83SLuigi Rizzo { 12825819da83SLuigi Rizzo struct netmap_priv_d *priv = data; 1283*ce3ee1e7SLuigi Rizzo int last_instance; 12845819da83SLuigi Rizzo 1285*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 1286*ce3ee1e7SLuigi Rizzo last_instance = netmap_dtor_locked(priv); 1287*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1288*ce3ee1e7SLuigi Rizzo if (last_instance) { 1289*ce3ee1e7SLuigi Rizzo bzero(priv, sizeof(*priv)); /* for safety */ 129068b8534bSLuigi Rizzo free(priv, M_DEVBUF); 129168b8534bSLuigi Rizzo } 1292*ce3ee1e7SLuigi Rizzo } 129368b8534bSLuigi Rizzo 1294f18be576SLuigi Rizzo 12958241616dSLuigi Rizzo #ifdef __FreeBSD__ 12968241616dSLuigi Rizzo 1297f18be576SLuigi Rizzo /* 1298f18be576SLuigi Rizzo * In order to track whether pages are still mapped, we hook into 1299f18be576SLuigi Rizzo * the standard cdev_pager and intercept the constructor and 1300f18be576SLuigi Rizzo * destructor. 1301f18be576SLuigi Rizzo */ 13028241616dSLuigi Rizzo 1303*ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t { 1304*ce3ee1e7SLuigi Rizzo struct cdev *dev; 1305*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv; 1306*ce3ee1e7SLuigi Rizzo }; 1307f18be576SLuigi Rizzo 13088241616dSLuigi Rizzo static int 13098241616dSLuigi Rizzo netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 13108241616dSLuigi Rizzo vm_ooffset_t foff, struct ucred *cred, u_short *color) 13118241616dSLuigi Rizzo { 1312*ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = handle; 1313*ce3ee1e7SLuigi Rizzo D("handle %p size %jd prot %d foff %jd", 1314*ce3ee1e7SLuigi Rizzo handle, (intmax_t)size, prot, (intmax_t)foff); 1315*ce3ee1e7SLuigi Rizzo dev_ref(vmh->dev); 1316*ce3ee1e7SLuigi Rizzo return 0; 13178241616dSLuigi Rizzo } 13188241616dSLuigi Rizzo 1319f18be576SLuigi Rizzo 13208241616dSLuigi Rizzo static void 13218241616dSLuigi Rizzo netmap_dev_pager_dtor(void *handle) 13228241616dSLuigi Rizzo { 1323*ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = handle; 1324*ce3ee1e7SLuigi Rizzo struct cdev *dev = vmh->dev; 1325*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = vmh->priv; 1326*ce3ee1e7SLuigi Rizzo D("handle %p", handle); 1327*ce3ee1e7SLuigi Rizzo netmap_dtor(priv); 1328*ce3ee1e7SLuigi Rizzo free(vmh, M_DEVBUF); 1329*ce3ee1e7SLuigi Rizzo dev_rel(dev); 1330*ce3ee1e7SLuigi Rizzo } 1331*ce3ee1e7SLuigi Rizzo 1332*ce3ee1e7SLuigi Rizzo static int 1333*ce3ee1e7SLuigi Rizzo netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 1334*ce3ee1e7SLuigi Rizzo int prot, vm_page_t *mres) 1335*ce3ee1e7SLuigi Rizzo { 1336*ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = object->handle; 1337*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = vmh->priv; 1338*ce3ee1e7SLuigi Rizzo vm_paddr_t paddr; 1339*ce3ee1e7SLuigi Rizzo vm_page_t page; 1340*ce3ee1e7SLuigi Rizzo vm_memattr_t memattr; 1341*ce3ee1e7SLuigi Rizzo vm_pindex_t pidx; 1342*ce3ee1e7SLuigi Rizzo 1343*ce3ee1e7SLuigi Rizzo ND("object %p offset %jd prot %d mres %p", 1344*ce3ee1e7SLuigi Rizzo object, (intmax_t)offset, prot, mres); 1345*ce3ee1e7SLuigi Rizzo memattr = object->memattr; 1346*ce3ee1e7SLuigi Rizzo pidx = OFF_TO_IDX(offset); 1347*ce3ee1e7SLuigi Rizzo paddr = netmap_mem_ofstophys(priv->np_mref, offset); 1348*ce3ee1e7SLuigi Rizzo if (paddr == 0) 1349*ce3ee1e7SLuigi Rizzo return VM_PAGER_FAIL; 1350*ce3ee1e7SLuigi Rizzo 1351*ce3ee1e7SLuigi Rizzo if (((*mres)->flags & PG_FICTITIOUS) != 0) { 1352*ce3ee1e7SLuigi Rizzo /* 1353*ce3ee1e7SLuigi Rizzo * If the passed in result page is a fake page, update it with 1354*ce3ee1e7SLuigi Rizzo * the new physical address. 1355*ce3ee1e7SLuigi Rizzo */ 1356*ce3ee1e7SLuigi Rizzo page = *mres; 1357*ce3ee1e7SLuigi Rizzo vm_page_updatefake(page, paddr, memattr); 1358*ce3ee1e7SLuigi Rizzo } else { 1359*ce3ee1e7SLuigi Rizzo /* 1360*ce3ee1e7SLuigi Rizzo * Replace the passed in reqpage page with our own fake page and 1361*ce3ee1e7SLuigi Rizzo * free up the all of the original pages. 1362*ce3ee1e7SLuigi Rizzo */ 1363*ce3ee1e7SLuigi Rizzo #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 1364*ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 1365*ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WLOCK VM_OBJECT_LOCK 1366*ce3ee1e7SLuigi Rizzo #endif /* VM_OBJECT_WUNLOCK */ 1367*ce3ee1e7SLuigi Rizzo 1368*ce3ee1e7SLuigi Rizzo VM_OBJECT_WUNLOCK(object); 1369*ce3ee1e7SLuigi Rizzo page = vm_page_getfake(paddr, memattr); 1370*ce3ee1e7SLuigi Rizzo VM_OBJECT_WLOCK(object); 1371*ce3ee1e7SLuigi Rizzo vm_page_lock(*mres); 1372*ce3ee1e7SLuigi Rizzo vm_page_free(*mres); 1373*ce3ee1e7SLuigi Rizzo vm_page_unlock(*mres); 1374*ce3ee1e7SLuigi Rizzo *mres = page; 1375*ce3ee1e7SLuigi Rizzo vm_page_insert(page, object, pidx); 1376*ce3ee1e7SLuigi Rizzo } 1377*ce3ee1e7SLuigi Rizzo page->valid = VM_PAGE_BITS_ALL; 1378*ce3ee1e7SLuigi Rizzo return (VM_PAGER_OK); 13798241616dSLuigi Rizzo } 13808241616dSLuigi Rizzo 13818241616dSLuigi Rizzo 13828241616dSLuigi Rizzo static struct cdev_pager_ops netmap_cdev_pager_ops = { 13838241616dSLuigi Rizzo .cdev_pg_ctor = netmap_dev_pager_ctor, 13848241616dSLuigi Rizzo .cdev_pg_dtor = netmap_dev_pager_dtor, 1385*ce3ee1e7SLuigi Rizzo .cdev_pg_fault = netmap_dev_pager_fault, 13868241616dSLuigi Rizzo }; 13878241616dSLuigi Rizzo 1388f18be576SLuigi Rizzo 13898241616dSLuigi Rizzo static int 13908241616dSLuigi Rizzo netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 13918241616dSLuigi Rizzo vm_size_t objsize, vm_object_t *objp, int prot) 13928241616dSLuigi Rizzo { 1393*ce3ee1e7SLuigi Rizzo int error; 1394*ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh; 1395*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv; 13968241616dSLuigi Rizzo vm_object_t obj; 13978241616dSLuigi Rizzo 1398*ce3ee1e7SLuigi Rizzo D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 139988f79057SGleb Smirnoff (intmax_t )*foff, (intmax_t )objsize, objp, prot); 14008241616dSLuigi Rizzo 1401*ce3ee1e7SLuigi Rizzo vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 1402*ce3ee1e7SLuigi Rizzo M_NOWAIT | M_ZERO); 1403*ce3ee1e7SLuigi Rizzo if (vmh == NULL) 1404*ce3ee1e7SLuigi Rizzo return ENOMEM; 1405*ce3ee1e7SLuigi Rizzo vmh->dev = cdev; 140668b8534bSLuigi Rizzo 1407*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 14088241616dSLuigi Rizzo error = devfs_get_cdevpriv((void**)&priv); 1409*ce3ee1e7SLuigi Rizzo if (error) 1410*ce3ee1e7SLuigi Rizzo goto err_unlock; 1411*ce3ee1e7SLuigi Rizzo vmh->priv = priv; 1412*ce3ee1e7SLuigi Rizzo priv->np_refcount++; 1413*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1414*ce3ee1e7SLuigi Rizzo 14158241616dSLuigi Rizzo error = netmap_get_memory(priv); 14168241616dSLuigi Rizzo if (error) 1417*ce3ee1e7SLuigi Rizzo goto err_deref; 14188241616dSLuigi Rizzo 1419*ce3ee1e7SLuigi Rizzo obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 1420*ce3ee1e7SLuigi Rizzo &netmap_cdev_pager_ops, objsize, prot, 1421*ce3ee1e7SLuigi Rizzo *foff, NULL); 1422*ce3ee1e7SLuigi Rizzo if (obj == NULL) { 1423*ce3ee1e7SLuigi Rizzo D("cdev_pager_allocate failed"); 1424*ce3ee1e7SLuigi Rizzo error = EINVAL; 1425*ce3ee1e7SLuigi Rizzo goto err_deref; 1426*ce3ee1e7SLuigi Rizzo } 142768b8534bSLuigi Rizzo 1428*ce3ee1e7SLuigi Rizzo *objp = obj; 1429*ce3ee1e7SLuigi Rizzo return 0; 1430*ce3ee1e7SLuigi Rizzo 1431*ce3ee1e7SLuigi Rizzo err_deref: 1432*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 1433*ce3ee1e7SLuigi Rizzo priv->np_refcount--; 1434*ce3ee1e7SLuigi Rizzo err_unlock: 1435*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1436*ce3ee1e7SLuigi Rizzo // err: 1437*ce3ee1e7SLuigi Rizzo free(vmh, M_DEVBUF); 1438*ce3ee1e7SLuigi Rizzo return error; 14398241616dSLuigi Rizzo } 14408241616dSLuigi Rizzo 1441f18be576SLuigi Rizzo 1442*ce3ee1e7SLuigi Rizzo // XXX can we remove this ? 14438241616dSLuigi Rizzo static int 14448241616dSLuigi Rizzo netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 14458241616dSLuigi Rizzo { 1446ae10d1afSLuigi Rizzo if (netmap_verbose) 1447ae10d1afSLuigi Rizzo D("dev %p fflag 0x%x devtype %d td %p", 1448ae10d1afSLuigi Rizzo dev, fflag, devtype, td); 14498241616dSLuigi Rizzo return 0; 14508241616dSLuigi Rizzo } 14518241616dSLuigi Rizzo 1452f18be576SLuigi Rizzo 14538241616dSLuigi Rizzo static int 14548241616dSLuigi Rizzo netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 14558241616dSLuigi Rizzo { 14568241616dSLuigi Rizzo struct netmap_priv_d *priv; 14578241616dSLuigi Rizzo int error; 14588241616dSLuigi Rizzo 1459*ce3ee1e7SLuigi Rizzo (void)dev; 1460*ce3ee1e7SLuigi Rizzo (void)oflags; 1461*ce3ee1e7SLuigi Rizzo (void)devtype; 1462*ce3ee1e7SLuigi Rizzo (void)td; 1463*ce3ee1e7SLuigi Rizzo 1464*ce3ee1e7SLuigi Rizzo // XXX wait or nowait ? 14658241616dSLuigi Rizzo priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 14668241616dSLuigi Rizzo M_NOWAIT | M_ZERO); 14678241616dSLuigi Rizzo if (priv == NULL) 14688241616dSLuigi Rizzo return ENOMEM; 14698241616dSLuigi Rizzo 14708241616dSLuigi Rizzo error = devfs_set_cdevpriv(priv, netmap_dtor); 14718241616dSLuigi Rizzo if (error) 14728241616dSLuigi Rizzo return error; 14738241616dSLuigi Rizzo 1474*ce3ee1e7SLuigi Rizzo priv->np_refcount = 1; 1475*ce3ee1e7SLuigi Rizzo 14768241616dSLuigi Rizzo return 0; 147768b8534bSLuigi Rizzo } 1478f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */ 147968b8534bSLuigi Rizzo 148068b8534bSLuigi Rizzo 148168b8534bSLuigi Rizzo /* 148202ad4083SLuigi Rizzo * Handlers for synchronization of the queues from/to the host. 1483091fd0abSLuigi Rizzo * Netmap has two operating modes: 1484091fd0abSLuigi Rizzo * - in the default mode, the rings connected to the host stack are 1485091fd0abSLuigi Rizzo * just another ring pair managed by userspace; 1486091fd0abSLuigi Rizzo * - in transparent mode (XXX to be defined) incoming packets 1487091fd0abSLuigi Rizzo * (from the host or the NIC) are marked as NS_FORWARD upon 1488091fd0abSLuigi Rizzo * arrival, and the user application has a chance to reset the 1489091fd0abSLuigi Rizzo * flag for packets that should be dropped. 1490091fd0abSLuigi Rizzo * On the RXSYNC or poll(), packets in RX rings between 1491091fd0abSLuigi Rizzo * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved 1492091fd0abSLuigi Rizzo * to the other side. 1493091fd0abSLuigi Rizzo * The transfer NIC --> host is relatively easy, just encapsulate 1494091fd0abSLuigi Rizzo * into mbufs and we are done. The host --> NIC side is slightly 1495091fd0abSLuigi Rizzo * harder because there might not be room in the tx ring so it 1496091fd0abSLuigi Rizzo * might take a while before releasing the buffer. 1497091fd0abSLuigi Rizzo */ 1498091fd0abSLuigi Rizzo 1499f18be576SLuigi Rizzo 1500091fd0abSLuigi Rizzo /* 1501091fd0abSLuigi Rizzo * pass a chain of buffers to the host stack as coming from 'dst' 1502091fd0abSLuigi Rizzo */ 1503091fd0abSLuigi Rizzo static void 1504091fd0abSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbuf *head) 1505091fd0abSLuigi Rizzo { 1506091fd0abSLuigi Rizzo struct mbuf *m; 1507091fd0abSLuigi Rizzo 1508091fd0abSLuigi Rizzo /* send packets up, outside the lock */ 1509091fd0abSLuigi Rizzo while ((m = head) != NULL) { 1510091fd0abSLuigi Rizzo head = head->m_nextpkt; 1511091fd0abSLuigi Rizzo m->m_nextpkt = NULL; 1512091fd0abSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 1513091fd0abSLuigi Rizzo D("sending up pkt %p size %d", m, MBUF_LEN(m)); 1514091fd0abSLuigi Rizzo NM_SEND_UP(dst, m); 1515091fd0abSLuigi Rizzo } 1516091fd0abSLuigi Rizzo } 1517091fd0abSLuigi Rizzo 1518091fd0abSLuigi Rizzo struct mbq { 1519091fd0abSLuigi Rizzo struct mbuf *head; 1520091fd0abSLuigi Rizzo struct mbuf *tail; 1521091fd0abSLuigi Rizzo int count; 1522091fd0abSLuigi Rizzo }; 1523091fd0abSLuigi Rizzo 1524f18be576SLuigi Rizzo 1525091fd0abSLuigi Rizzo /* 1526091fd0abSLuigi Rizzo * put a copy of the buffers marked NS_FORWARD into an mbuf chain. 1527091fd0abSLuigi Rizzo * Run from hwcur to cur - reserved 1528091fd0abSLuigi Rizzo */ 1529091fd0abSLuigi Rizzo static void 1530091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) 1531091fd0abSLuigi Rizzo { 1532091fd0abSLuigi Rizzo /* Take packets from hwcur to cur-reserved and pass them up. 1533091fd0abSLuigi Rizzo * In case of no buffers we give up. At the end of the loop, 1534091fd0abSLuigi Rizzo * the queue is drained in all cases. 1535091fd0abSLuigi Rizzo * XXX handle reserved 1536091fd0abSLuigi Rizzo */ 1537*ce3ee1e7SLuigi Rizzo u_int lim = kring->nkr_num_slots - 1; 1538091fd0abSLuigi Rizzo struct mbuf *m, *tail = q->tail; 1539*ce3ee1e7SLuigi Rizzo u_int k = kring->ring->cur, n = kring->ring->reserved; 1540*ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd = kring->na->nm_mem; 1541091fd0abSLuigi Rizzo 1542*ce3ee1e7SLuigi Rizzo /* compute the final position, ring->cur - ring->reserved */ 1543*ce3ee1e7SLuigi Rizzo if (n > 0) { 1544*ce3ee1e7SLuigi Rizzo if (k < n) 1545*ce3ee1e7SLuigi Rizzo k += kring->nkr_num_slots; 1546*ce3ee1e7SLuigi Rizzo k += n; 1547*ce3ee1e7SLuigi Rizzo } 1548091fd0abSLuigi Rizzo for (n = kring->nr_hwcur; n != k;) { 1549091fd0abSLuigi Rizzo struct netmap_slot *slot = &kring->ring->slot[n]; 1550091fd0abSLuigi Rizzo 1551*ce3ee1e7SLuigi Rizzo n = nm_next(n, lim); 1552091fd0abSLuigi Rizzo if ((slot->flags & NS_FORWARD) == 0 && !force) 1553091fd0abSLuigi Rizzo continue; 1554*ce3ee1e7SLuigi Rizzo if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) { 1555091fd0abSLuigi Rizzo D("bad pkt at %d len %d", n, slot->len); 1556091fd0abSLuigi Rizzo continue; 1557091fd0abSLuigi Rizzo } 1558091fd0abSLuigi Rizzo slot->flags &= ~NS_FORWARD; // XXX needed ? 1559*ce3ee1e7SLuigi Rizzo /* XXX adapt to the case of a multisegment packet */ 1560*ce3ee1e7SLuigi Rizzo m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL); 1561091fd0abSLuigi Rizzo 1562091fd0abSLuigi Rizzo if (m == NULL) 1563091fd0abSLuigi Rizzo break; 1564091fd0abSLuigi Rizzo if (tail) 1565091fd0abSLuigi Rizzo tail->m_nextpkt = m; 1566091fd0abSLuigi Rizzo else 1567091fd0abSLuigi Rizzo q->head = m; 1568091fd0abSLuigi Rizzo tail = m; 1569091fd0abSLuigi Rizzo q->count++; 1570091fd0abSLuigi Rizzo m->m_nextpkt = NULL; 1571091fd0abSLuigi Rizzo } 1572091fd0abSLuigi Rizzo q->tail = tail; 1573091fd0abSLuigi Rizzo } 1574091fd0abSLuigi Rizzo 1575f18be576SLuigi Rizzo 1576091fd0abSLuigi Rizzo /* 1577091fd0abSLuigi Rizzo * The host ring has packets from nr_hwcur to (cur - reserved) 1578*ce3ee1e7SLuigi Rizzo * to be sent down to the NIC. 1579*ce3ee1e7SLuigi Rizzo * We need to use the queue lock on the source (host RX ring) 1580*ce3ee1e7SLuigi Rizzo * to protect against netmap_transmit. 1581*ce3ee1e7SLuigi Rizzo * If the user is well behaved we do not need to acquire locks 1582*ce3ee1e7SLuigi Rizzo * on the destination(s), 1583*ce3ee1e7SLuigi Rizzo * so we only need to make sure that there are no panics because 1584*ce3ee1e7SLuigi Rizzo * of user errors. 1585*ce3ee1e7SLuigi Rizzo * XXX verify 1586*ce3ee1e7SLuigi Rizzo * 1587*ce3ee1e7SLuigi Rizzo * We scan the tx rings, which have just been 1588091fd0abSLuigi Rizzo * flushed so nr_hwcur == cur. Pushing packets down means 1589091fd0abSLuigi Rizzo * increment cur and decrement avail. 1590091fd0abSLuigi Rizzo * XXX to be verified 1591091fd0abSLuigi Rizzo */ 1592091fd0abSLuigi Rizzo static void 1593091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na) 1594091fd0abSLuigi Rizzo { 1595091fd0abSLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 1596091fd0abSLuigi Rizzo struct netmap_kring *k1 = &na->tx_rings[0]; 1597*ce3ee1e7SLuigi Rizzo u_int i, howmany, src_lim, dst_lim; 1598*ce3ee1e7SLuigi Rizzo 1599*ce3ee1e7SLuigi Rizzo /* XXX we should also check that the carrier is on */ 1600*ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) 1601*ce3ee1e7SLuigi Rizzo return; 1602*ce3ee1e7SLuigi Rizzo 1603*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 1604*ce3ee1e7SLuigi Rizzo 1605*ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) 1606*ce3ee1e7SLuigi Rizzo goto out; 1607091fd0abSLuigi Rizzo 1608091fd0abSLuigi Rizzo howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */ 1609091fd0abSLuigi Rizzo 1610*ce3ee1e7SLuigi Rizzo src_lim = kring->nkr_num_slots - 1; 1611091fd0abSLuigi Rizzo for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) { 1612091fd0abSLuigi Rizzo ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail); 1613*ce3ee1e7SLuigi Rizzo dst_lim = k1->nkr_num_slots - 1; 1614091fd0abSLuigi Rizzo while (howmany > 0 && k1->ring->avail > 0) { 1615091fd0abSLuigi Rizzo struct netmap_slot *src, *dst, tmp; 1616091fd0abSLuigi Rizzo src = &kring->ring->slot[kring->nr_hwcur]; 1617091fd0abSLuigi Rizzo dst = &k1->ring->slot[k1->ring->cur]; 1618091fd0abSLuigi Rizzo tmp = *src; 1619091fd0abSLuigi Rizzo src->buf_idx = dst->buf_idx; 1620091fd0abSLuigi Rizzo src->flags = NS_BUF_CHANGED; 1621091fd0abSLuigi Rizzo 1622091fd0abSLuigi Rizzo dst->buf_idx = tmp.buf_idx; 1623091fd0abSLuigi Rizzo dst->len = tmp.len; 1624091fd0abSLuigi Rizzo dst->flags = NS_BUF_CHANGED; 1625091fd0abSLuigi Rizzo ND("out len %d buf %d from %d to %d", 1626091fd0abSLuigi Rizzo dst->len, dst->buf_idx, 1627091fd0abSLuigi Rizzo kring->nr_hwcur, k1->ring->cur); 1628091fd0abSLuigi Rizzo 1629*ce3ee1e7SLuigi Rizzo kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim); 1630091fd0abSLuigi Rizzo howmany--; 1631091fd0abSLuigi Rizzo kring->nr_hwavail--; 1632*ce3ee1e7SLuigi Rizzo k1->ring->cur = nm_next(k1->ring->cur, dst_lim); 1633091fd0abSLuigi Rizzo k1->ring->avail--; 1634091fd0abSLuigi Rizzo } 1635091fd0abSLuigi Rizzo kring->ring->cur = kring->nr_hwcur; // XXX 1636*ce3ee1e7SLuigi Rizzo k1++; // XXX why? 1637091fd0abSLuigi Rizzo } 1638*ce3ee1e7SLuigi Rizzo out: 1639*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 1640091fd0abSLuigi Rizzo } 1641091fd0abSLuigi Rizzo 1642f18be576SLuigi Rizzo 1643091fd0abSLuigi Rizzo /* 1644*ce3ee1e7SLuigi Rizzo * netmap_txsync_to_host() passes packets up. We are called from a 164502ad4083SLuigi Rizzo * system call in user process context, and the only contention 164602ad4083SLuigi Rizzo * can be among multiple user threads erroneously calling 1647091fd0abSLuigi Rizzo * this routine concurrently. 164868b8534bSLuigi Rizzo */ 164968b8534bSLuigi Rizzo static void 1650*ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(struct netmap_adapter *na) 165168b8534bSLuigi Rizzo { 1652d76bf4ffSLuigi Rizzo struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; 165368b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 1654091fd0abSLuigi Rizzo u_int k, lim = kring->nkr_num_slots - 1; 1655*ce3ee1e7SLuigi Rizzo struct mbq q = { NULL, NULL, 0 }; 165668b8534bSLuigi Rizzo 1657*ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 1658*ce3ee1e7SLuigi Rizzo D("ring %p busy (user error)", kring); 165902ad4083SLuigi Rizzo return; 166002ad4083SLuigi Rizzo } 1661*ce3ee1e7SLuigi Rizzo k = ring->cur; 1662*ce3ee1e7SLuigi Rizzo if (k > lim) { 1663*ce3ee1e7SLuigi Rizzo D("invalid ring index in stack TX kring %p", kring); 1664*ce3ee1e7SLuigi Rizzo netmap_ring_reinit(kring); 1665*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 1666*ce3ee1e7SLuigi Rizzo return; 1667*ce3ee1e7SLuigi Rizzo } 166868b8534bSLuigi Rizzo 166968b8534bSLuigi Rizzo /* Take packets from hwcur to cur and pass them up. 167068b8534bSLuigi Rizzo * In case of no buffers we give up. At the end of the loop, 167168b8534bSLuigi Rizzo * the queue is drained in all cases. 167268b8534bSLuigi Rizzo */ 1673091fd0abSLuigi Rizzo netmap_grab_packets(kring, &q, 1); 167402ad4083SLuigi Rizzo kring->nr_hwcur = k; 167568b8534bSLuigi Rizzo kring->nr_hwavail = ring->avail = lim; 167668b8534bSLuigi Rizzo 1677*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 1678091fd0abSLuigi Rizzo netmap_send_up(na->ifp, q.head); 167968b8534bSLuigi Rizzo } 168068b8534bSLuigi Rizzo 1681f18be576SLuigi Rizzo 1682*ce3ee1e7SLuigi Rizzo /* 1683*ce3ee1e7SLuigi Rizzo * This is the 'txsync' handler to send from a software ring to the 1684*ce3ee1e7SLuigi Rizzo * host stack. 1685*ce3ee1e7SLuigi Rizzo */ 1686f18be576SLuigi Rizzo /* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */ 1687f18be576SLuigi Rizzo static int 1688*ce3ee1e7SLuigi Rizzo netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags) 1689f18be576SLuigi Rizzo { 1690f18be576SLuigi Rizzo (void)ring_nr; 1691*ce3ee1e7SLuigi Rizzo (void)flags; 1692*ce3ee1e7SLuigi Rizzo if (netmap_verbose > 255) 1693*ce3ee1e7SLuigi Rizzo RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr); 1694*ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(NA(ifp)); 1695f18be576SLuigi Rizzo return 0; 1696f18be576SLuigi Rizzo } 1697f18be576SLuigi Rizzo 1698f18be576SLuigi Rizzo 169968b8534bSLuigi Rizzo /* 170002ad4083SLuigi Rizzo * rxsync backend for packets coming from the host stack. 1701*ce3ee1e7SLuigi Rizzo * They have been put in the queue by netmap_transmit() so we 170202ad4083SLuigi Rizzo * need to protect access to the kring using a lock. 170302ad4083SLuigi Rizzo * 170468b8534bSLuigi Rizzo * This routine also does the selrecord if called from the poll handler 170568b8534bSLuigi Rizzo * (we know because td != NULL). 170601c7d25fSLuigi Rizzo * 170701c7d25fSLuigi Rizzo * NOTE: on linux, selrecord() is defined as a macro and uses pwait 170801c7d25fSLuigi Rizzo * as an additional hidden argument. 170968b8534bSLuigi Rizzo */ 171068b8534bSLuigi Rizzo static void 1711*ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) 171268b8534bSLuigi Rizzo { 1713d76bf4ffSLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 171468b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 171564ae02c3SLuigi Rizzo u_int j, n, lim = kring->nkr_num_slots; 171664ae02c3SLuigi Rizzo u_int k = ring->cur, resvd = ring->reserved; 171768b8534bSLuigi Rizzo 171801c7d25fSLuigi Rizzo (void)pwait; /* disable unused warnings */ 1719*ce3ee1e7SLuigi Rizzo 1720*ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) /* check a first time without lock */ 1721*ce3ee1e7SLuigi Rizzo return; 1722*ce3ee1e7SLuigi Rizzo 1723*ce3ee1e7SLuigi Rizzo /* XXX as an optimization we could reuse na->core_lock */ 1724*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 1725*ce3ee1e7SLuigi Rizzo 1726*ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) /* check again with lock held */ 1727*ce3ee1e7SLuigi Rizzo goto unlock_out; 1728*ce3ee1e7SLuigi Rizzo 172964ae02c3SLuigi Rizzo if (k >= lim) { 173064ae02c3SLuigi Rizzo netmap_ring_reinit(kring); 1731*ce3ee1e7SLuigi Rizzo goto unlock_out; 173264ae02c3SLuigi Rizzo } 173364ae02c3SLuigi Rizzo /* new packets are already set in nr_hwavail */ 173464ae02c3SLuigi Rizzo /* skip past packets that userspace has released */ 173564ae02c3SLuigi Rizzo j = kring->nr_hwcur; 173664ae02c3SLuigi Rizzo if (resvd > 0) { 173764ae02c3SLuigi Rizzo if (resvd + ring->avail >= lim + 1) { 173864ae02c3SLuigi Rizzo D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 173964ae02c3SLuigi Rizzo ring->reserved = resvd = 0; // XXX panic... 174064ae02c3SLuigi Rizzo } 174164ae02c3SLuigi Rizzo k = (k >= resvd) ? k - resvd : k + lim - resvd; 174264ae02c3SLuigi Rizzo } 174364ae02c3SLuigi Rizzo if (j != k) { 174464ae02c3SLuigi Rizzo n = k >= j ? k - j : k + lim - j; 174564ae02c3SLuigi Rizzo kring->nr_hwavail -= n; 174602ad4083SLuigi Rizzo kring->nr_hwcur = k; 174764ae02c3SLuigi Rizzo } 174864ae02c3SLuigi Rizzo k = ring->avail = kring->nr_hwavail - resvd; 174902ad4083SLuigi Rizzo if (k == 0 && td) 175068b8534bSLuigi Rizzo selrecord(td, &kring->si); 175102ad4083SLuigi Rizzo if (k && (netmap_verbose & NM_VERB_HOST)) 175202ad4083SLuigi Rizzo D("%d pkts from stack", k); 1753*ce3ee1e7SLuigi Rizzo unlock_out: 1754*ce3ee1e7SLuigi Rizzo 1755*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 175668b8534bSLuigi Rizzo } 175768b8534bSLuigi Rizzo 175868b8534bSLuigi Rizzo 175968b8534bSLuigi Rizzo /* 1760*ce3ee1e7SLuigi Rizzo * MUST BE CALLED UNDER NMG_LOCK() 1761*ce3ee1e7SLuigi Rizzo * 176268b8534bSLuigi Rizzo * get a refcounted reference to an interface. 1763*ce3ee1e7SLuigi Rizzo * This is always called in the execution of an ioctl(). 1764*ce3ee1e7SLuigi Rizzo * 176568b8534bSLuigi Rizzo * Return ENXIO if the interface does not exist, EINVAL if netmap 176668b8534bSLuigi Rizzo * is not supported by the interface. 176768b8534bSLuigi Rizzo * If successful, hold a reference. 1768f18be576SLuigi Rizzo * 1769*ce3ee1e7SLuigi Rizzo * When the NIC is attached to a bridge, reference is managed 1770f18be576SLuigi Rizzo * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as 1771f18be576SLuigi Rizzo * virtual ports. Hence, on the final DROP_BDG_REF(), the NIC 1772f18be576SLuigi Rizzo * is detached from the bridge, then ifp's refcount is dropped (this 1773f18be576SLuigi Rizzo * is equivalent to that ifp is destroyed in case of virtual ports. 1774f18be576SLuigi Rizzo * 1775f18be576SLuigi Rizzo * This function uses if_rele() when we want to prevent the NIC from 1776f18be576SLuigi Rizzo * being detached from the bridge in error handling. But once refcount 1777f18be576SLuigi Rizzo * is acquired by this function, it must be released using nm_if_rele(). 177868b8534bSLuigi Rizzo */ 177968b8534bSLuigi Rizzo static int 1780*ce3ee1e7SLuigi Rizzo get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create) 178168b8534bSLuigi Rizzo { 1782f18be576SLuigi Rizzo const char *name = nmr->nr_name; 1783f18be576SLuigi Rizzo int namelen = strlen(name); 1784f196ce38SLuigi Rizzo struct ifnet *iter = NULL; 1785f18be576SLuigi Rizzo int no_prefix = 0; 1786f196ce38SLuigi Rizzo 1787*ce3ee1e7SLuigi Rizzo /* first try to see if this is a bridge port. */ 1788f196ce38SLuigi Rizzo struct nm_bridge *b; 1789f18be576SLuigi Rizzo struct netmap_adapter *na; 1790*ce3ee1e7SLuigi Rizzo int i, j, cand = -1, cand2 = -1; 1791*ce3ee1e7SLuigi Rizzo int needed; 1792f196ce38SLuigi Rizzo 1793*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 1794*ce3ee1e7SLuigi Rizzo *ifp = NULL; /* default */ 1795f18be576SLuigi Rizzo if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 1796*ce3ee1e7SLuigi Rizzo no_prefix = 1; /* no VALE prefix */ 1797*ce3ee1e7SLuigi Rizzo goto no_bridge_port; 1798f18be576SLuigi Rizzo } 1799*ce3ee1e7SLuigi Rizzo 1800*ce3ee1e7SLuigi Rizzo b = nm_find_bridge(name, create); 1801f196ce38SLuigi Rizzo if (b == NULL) { 1802f196ce38SLuigi Rizzo D("no bridges available for '%s'", name); 1803f196ce38SLuigi Rizzo return (ENXIO); 1804f196ce38SLuigi Rizzo } 1805*ce3ee1e7SLuigi Rizzo 1806*ce3ee1e7SLuigi Rizzo /* Now we are sure that name starts with the bridge's name, 1807*ce3ee1e7SLuigi Rizzo * lookup the port in the bridge. We need to scan the entire 1808*ce3ee1e7SLuigi Rizzo * list. It is not important to hold a WLOCK on the bridge 1809*ce3ee1e7SLuigi Rizzo * during the search because NMG_LOCK already guarantees 1810*ce3ee1e7SLuigi Rizzo * that there are no other possible writers. 1811*ce3ee1e7SLuigi Rizzo */ 1812*ce3ee1e7SLuigi Rizzo 1813f196ce38SLuigi Rizzo /* lookup in the local list of ports */ 1814*ce3ee1e7SLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 1815*ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 1816*ce3ee1e7SLuigi Rizzo na = b->bdg_ports[i]; 1817*ce3ee1e7SLuigi Rizzo // KASSERT(na != NULL); 1818f18be576SLuigi Rizzo iter = na->ifp; 1819f18be576SLuigi Rizzo /* XXX make sure the name only contains one : */ 1820f18be576SLuigi Rizzo if (!strcmp(iter->if_xname, name) /* virtual port */ || 1821*ce3ee1e7SLuigi Rizzo (namelen > b->bdg_namelen && !strcmp(iter->if_xname, 1822*ce3ee1e7SLuigi Rizzo name + b->bdg_namelen + 1)) /* NIC */) { 1823f196ce38SLuigi Rizzo ADD_BDG_REF(iter); 1824*ce3ee1e7SLuigi Rizzo ND("found existing if %s refs %d", name, 1825*ce3ee1e7SLuigi Rizzo NA(iter)->na_bdg_refcount); 1826*ce3ee1e7SLuigi Rizzo *ifp = iter; 1827*ce3ee1e7SLuigi Rizzo /* we are done, this is surely netmap capable */ 1828*ce3ee1e7SLuigi Rizzo return 0; 1829f196ce38SLuigi Rizzo } 1830f196ce38SLuigi Rizzo } 1831*ce3ee1e7SLuigi Rizzo /* not found, should we create it? */ 1832*ce3ee1e7SLuigi Rizzo if (!create) 1833*ce3ee1e7SLuigi Rizzo return ENXIO; 1834*ce3ee1e7SLuigi Rizzo /* yes we should, see if we have space to attach entries */ 1835*ce3ee1e7SLuigi Rizzo needed = 2; /* in some cases we only need 1 */ 1836*ce3ee1e7SLuigi Rizzo if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 1837*ce3ee1e7SLuigi Rizzo D("bridge full %d, cannot create new port", b->bdg_active_ports); 1838f196ce38SLuigi Rizzo return EINVAL; 1839f196ce38SLuigi Rizzo } 1840*ce3ee1e7SLuigi Rizzo /* record the next two ports available, but do not allocate yet */ 1841*ce3ee1e7SLuigi Rizzo cand = b->bdg_port_index[b->bdg_active_ports]; 1842*ce3ee1e7SLuigi Rizzo cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 1843*ce3ee1e7SLuigi Rizzo ND("+++ bridge %s port %s used %d avail %d %d", 1844*ce3ee1e7SLuigi Rizzo b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 1845*ce3ee1e7SLuigi Rizzo 1846f18be576SLuigi Rizzo /* 1847f18be576SLuigi Rizzo * try see if there is a matching NIC with this name 1848f18be576SLuigi Rizzo * (after the bridge's name) 1849f18be576SLuigi Rizzo */ 1850*ce3ee1e7SLuigi Rizzo iter = ifunit_ref(name + b->bdg_namelen + 1); 1851f18be576SLuigi Rizzo if (!iter) { /* this is a virtual port */ 1852f18be576SLuigi Rizzo /* Create a temporary NA with arguments, then 1853f18be576SLuigi Rizzo * bdg_netmap_attach() will allocate the real one 1854f18be576SLuigi Rizzo * and attach it to the ifp 1855f18be576SLuigi Rizzo */ 1856f18be576SLuigi Rizzo struct netmap_adapter tmp_na; 1857f18be576SLuigi Rizzo 1858*ce3ee1e7SLuigi Rizzo if (nmr->nr_cmd) { 1859*ce3ee1e7SLuigi Rizzo /* nr_cmd must be 0 for a virtual port */ 1860*ce3ee1e7SLuigi Rizzo return EINVAL; 1861*ce3ee1e7SLuigi Rizzo } 1862f18be576SLuigi Rizzo bzero(&tmp_na, sizeof(tmp_na)); 1863f18be576SLuigi Rizzo /* bound checking */ 1864f18be576SLuigi Rizzo tmp_na.num_tx_rings = nmr->nr_tx_rings; 1865*ce3ee1e7SLuigi Rizzo nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1866*ce3ee1e7SLuigi Rizzo nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back 1867f18be576SLuigi Rizzo tmp_na.num_rx_rings = nmr->nr_rx_rings; 1868*ce3ee1e7SLuigi Rizzo nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1869*ce3ee1e7SLuigi Rizzo nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back 1870*ce3ee1e7SLuigi Rizzo nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1871*ce3ee1e7SLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 1872*ce3ee1e7SLuigi Rizzo tmp_na.num_tx_desc = nmr->nr_tx_slots; 1873*ce3ee1e7SLuigi Rizzo nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1874*ce3ee1e7SLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 1875*ce3ee1e7SLuigi Rizzo tmp_na.num_rx_desc = nmr->nr_rx_slots; 1876f18be576SLuigi Rizzo 1877*ce3ee1e7SLuigi Rizzo /* create a struct ifnet for the new port. 1878*ce3ee1e7SLuigi Rizzo * need M_NOWAIT as we are under nma_lock 1879*ce3ee1e7SLuigi Rizzo */ 1880f18be576SLuigi Rizzo iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO); 1881f196ce38SLuigi Rizzo if (!iter) 1882*ce3ee1e7SLuigi Rizzo return ENOMEM; 1883*ce3ee1e7SLuigi Rizzo 1884f196ce38SLuigi Rizzo strcpy(iter->if_xname, name); 1885f18be576SLuigi Rizzo tmp_na.ifp = iter; 1886f18be576SLuigi Rizzo /* bdg_netmap_attach creates a struct netmap_adapter */ 1887f18be576SLuigi Rizzo bdg_netmap_attach(&tmp_na); 1888*ce3ee1e7SLuigi Rizzo cand2 = -1; /* only need one port */ 1889f18be576SLuigi Rizzo } else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */ 1890*ce3ee1e7SLuigi Rizzo /* make sure the NIC is not already in use */ 1891*ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(iter)) { 1892*ce3ee1e7SLuigi Rizzo D("NIC %s busy, cannot attach to bridge", 1893*ce3ee1e7SLuigi Rizzo iter->if_xname); 1894f18be576SLuigi Rizzo if_rele(iter); /* don't detach from bridge */ 1895*ce3ee1e7SLuigi Rizzo return EINVAL; 1896f18be576SLuigi Rizzo } 1897*ce3ee1e7SLuigi Rizzo if (nmr->nr_arg1 != NETMAP_BDG_HOST) 1898*ce3ee1e7SLuigi Rizzo cand2 = -1; /* only need one port */ 1899*ce3ee1e7SLuigi Rizzo } else { /* not a netmap-capable NIC */ 1900*ce3ee1e7SLuigi Rizzo if_rele(iter); /* don't detach from bridge */ 1901*ce3ee1e7SLuigi Rizzo return EINVAL; 1902*ce3ee1e7SLuigi Rizzo } 1903*ce3ee1e7SLuigi Rizzo na = NA(iter); 1904*ce3ee1e7SLuigi Rizzo 1905*ce3ee1e7SLuigi Rizzo BDG_WLOCK(b); 1906*ce3ee1e7SLuigi Rizzo na->bdg_port = cand; 1907*ce3ee1e7SLuigi Rizzo ND("NIC %p to bridge port %d", NA(iter), cand); 1908*ce3ee1e7SLuigi Rizzo /* bind the port to the bridge (virtual ports are not active) */ 1909*ce3ee1e7SLuigi Rizzo b->bdg_ports[cand] = na; 1910*ce3ee1e7SLuigi Rizzo na->na_bdg = b; 1911*ce3ee1e7SLuigi Rizzo b->bdg_active_ports++; 1912*ce3ee1e7SLuigi Rizzo if (cand2 >= 0) { 1913*ce3ee1e7SLuigi Rizzo /* also bind the host stack to the bridge */ 1914*ce3ee1e7SLuigi Rizzo b->bdg_ports[cand2] = SWNA(iter); 1915f18be576SLuigi Rizzo SWNA(iter)->bdg_port = cand2; 1916f18be576SLuigi Rizzo SWNA(iter)->na_bdg = b; 1917*ce3ee1e7SLuigi Rizzo b->bdg_active_ports++; 1918*ce3ee1e7SLuigi Rizzo ND("host %p to bridge port %d", SWNA(iter), cand2); 1919f18be576SLuigi Rizzo } 1920*ce3ee1e7SLuigi Rizzo ADD_BDG_REF(iter); // XXX one or two ? 1921*ce3ee1e7SLuigi Rizzo ND("if %s refs %d", name, NA(iter)->na_bdg_refcount); 1922f18be576SLuigi Rizzo BDG_WUNLOCK(b); 1923*ce3ee1e7SLuigi Rizzo *ifp = iter; 1924*ce3ee1e7SLuigi Rizzo return 0; 1925*ce3ee1e7SLuigi Rizzo 1926*ce3ee1e7SLuigi Rizzo no_bridge_port: 1927f196ce38SLuigi Rizzo *ifp = iter; 1928f196ce38SLuigi Rizzo if (! *ifp) 192968b8534bSLuigi Rizzo *ifp = ifunit_ref(name); 193068b8534bSLuigi Rizzo if (*ifp == NULL) 193168b8534bSLuigi Rizzo return (ENXIO); 1932*ce3ee1e7SLuigi Rizzo 1933f18be576SLuigi Rizzo if (NETMAP_CAPABLE(*ifp)) { 1934f18be576SLuigi Rizzo /* Users cannot use the NIC attached to a bridge directly */ 1935f18be576SLuigi Rizzo if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) { 1936f18be576SLuigi Rizzo if_rele(*ifp); /* don't detach from bridge */ 1937f18be576SLuigi Rizzo return EINVAL; 1938f18be576SLuigi Rizzo } else 193968b8534bSLuigi Rizzo return 0; /* valid pointer, we hold the refcount */ 1940f18be576SLuigi Rizzo } 1941f196ce38SLuigi Rizzo nm_if_rele(*ifp); 194268b8534bSLuigi Rizzo return EINVAL; // not NETMAP capable 194368b8534bSLuigi Rizzo } 194468b8534bSLuigi Rizzo 194568b8534bSLuigi Rizzo 194668b8534bSLuigi Rizzo /* 194768b8534bSLuigi Rizzo * Error routine called when txsync/rxsync detects an error. 194868b8534bSLuigi Rizzo * Can't do much more than resetting cur = hwcur, avail = hwavail. 194968b8534bSLuigi Rizzo * Return 1 on reinit. 1950506cc70cSLuigi Rizzo * 1951506cc70cSLuigi Rizzo * This routine is only called by the upper half of the kernel. 1952506cc70cSLuigi Rizzo * It only reads hwcur (which is changed only by the upper half, too) 1953506cc70cSLuigi Rizzo * and hwavail (which may be changed by the lower half, but only on 1954506cc70cSLuigi Rizzo * a tx ring and only to increase it, so any error will be recovered 1955506cc70cSLuigi Rizzo * on the next call). For the above, we don't strictly need to call 1956506cc70cSLuigi Rizzo * it under lock. 195768b8534bSLuigi Rizzo */ 195868b8534bSLuigi Rizzo int 195968b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring) 196068b8534bSLuigi Rizzo { 196168b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 196268b8534bSLuigi Rizzo u_int i, lim = kring->nkr_num_slots - 1; 196368b8534bSLuigi Rizzo int errors = 0; 196468b8534bSLuigi Rizzo 1965*ce3ee1e7SLuigi Rizzo // XXX KASSERT nm_kr_tryget 19668241616dSLuigi Rizzo RD(10, "called for %s", kring->na->ifp->if_xname); 196768b8534bSLuigi Rizzo if (ring->cur > lim) 196868b8534bSLuigi Rizzo errors++; 196968b8534bSLuigi Rizzo for (i = 0; i <= lim; i++) { 197068b8534bSLuigi Rizzo u_int idx = ring->slot[i].buf_idx; 197168b8534bSLuigi Rizzo u_int len = ring->slot[i].len; 197268b8534bSLuigi Rizzo if (idx < 2 || idx >= netmap_total_buffers) { 197368b8534bSLuigi Rizzo if (!errors++) 197468b8534bSLuigi Rizzo D("bad buffer at slot %d idx %d len %d ", i, idx, len); 197568b8534bSLuigi Rizzo ring->slot[i].buf_idx = 0; 197668b8534bSLuigi Rizzo ring->slot[i].len = 0; 1977*ce3ee1e7SLuigi Rizzo } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { 197868b8534bSLuigi Rizzo ring->slot[i].len = 0; 197968b8534bSLuigi Rizzo if (!errors++) 198068b8534bSLuigi Rizzo D("bad len %d at slot %d idx %d", 198168b8534bSLuigi Rizzo len, i, idx); 198268b8534bSLuigi Rizzo } 198368b8534bSLuigi Rizzo } 198468b8534bSLuigi Rizzo if (errors) { 198568b8534bSLuigi Rizzo int pos = kring - kring->na->tx_rings; 1986d76bf4ffSLuigi Rizzo int n = kring->na->num_tx_rings + 1; 198768b8534bSLuigi Rizzo 19888241616dSLuigi Rizzo RD(10, "total %d errors", errors); 198968b8534bSLuigi Rizzo errors++; 19908241616dSLuigi Rizzo RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d", 199168b8534bSLuigi Rizzo kring->na->ifp->if_xname, 199268b8534bSLuigi Rizzo pos < n ? "TX" : "RX", pos < n ? pos : pos - n, 199368b8534bSLuigi Rizzo ring->cur, kring->nr_hwcur, 199468b8534bSLuigi Rizzo ring->avail, kring->nr_hwavail); 199568b8534bSLuigi Rizzo ring->cur = kring->nr_hwcur; 199668b8534bSLuigi Rizzo ring->avail = kring->nr_hwavail; 199768b8534bSLuigi Rizzo } 199868b8534bSLuigi Rizzo return (errors ? 1 : 0); 199968b8534bSLuigi Rizzo } 200068b8534bSLuigi Rizzo 200168b8534bSLuigi Rizzo 200268b8534bSLuigi Rizzo /* 200368b8534bSLuigi Rizzo * Set the ring ID. For devices with a single queue, a request 200468b8534bSLuigi Rizzo * for all rings is the same as a single ring. 200568b8534bSLuigi Rizzo */ 200668b8534bSLuigi Rizzo static int 200768b8534bSLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) 200868b8534bSLuigi Rizzo { 200968b8534bSLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 201068b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 201168b8534bSLuigi Rizzo u_int i = ringid & NETMAP_RING_MASK; 201264ae02c3SLuigi Rizzo /* initially (np_qfirst == np_qlast) we don't want to lock */ 2013*ce3ee1e7SLuigi Rizzo u_int lim = na->num_rx_rings; 201468b8534bSLuigi Rizzo 2015d76bf4ffSLuigi Rizzo if (na->num_tx_rings > lim) 2016d76bf4ffSLuigi Rizzo lim = na->num_tx_rings; 201764ae02c3SLuigi Rizzo if ( (ringid & NETMAP_HW_RING) && i >= lim) { 201868b8534bSLuigi Rizzo D("invalid ring id %d", i); 201968b8534bSLuigi Rizzo return (EINVAL); 202068b8534bSLuigi Rizzo } 202168b8534bSLuigi Rizzo priv->np_ringid = ringid; 202268b8534bSLuigi Rizzo if (ringid & NETMAP_SW_RING) { 202364ae02c3SLuigi Rizzo priv->np_qfirst = NETMAP_SW_RING; 202464ae02c3SLuigi Rizzo priv->np_qlast = 0; 202568b8534bSLuigi Rizzo } else if (ringid & NETMAP_HW_RING) { 202668b8534bSLuigi Rizzo priv->np_qfirst = i; 202768b8534bSLuigi Rizzo priv->np_qlast = i + 1; 202868b8534bSLuigi Rizzo } else { 202968b8534bSLuigi Rizzo priv->np_qfirst = 0; 203064ae02c3SLuigi Rizzo priv->np_qlast = NETMAP_HW_RING ; 203168b8534bSLuigi Rizzo } 203268b8534bSLuigi Rizzo priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; 2033ae10d1afSLuigi Rizzo if (netmap_verbose) { 203468b8534bSLuigi Rizzo if (ringid & NETMAP_SW_RING) 203568b8534bSLuigi Rizzo D("ringid %s set to SW RING", ifp->if_xname); 203668b8534bSLuigi Rizzo else if (ringid & NETMAP_HW_RING) 203768b8534bSLuigi Rizzo D("ringid %s set to HW RING %d", ifp->if_xname, 203868b8534bSLuigi Rizzo priv->np_qfirst); 203968b8534bSLuigi Rizzo else 204064ae02c3SLuigi Rizzo D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim); 2041ae10d1afSLuigi Rizzo } 204268b8534bSLuigi Rizzo return 0; 204368b8534bSLuigi Rizzo } 204468b8534bSLuigi Rizzo 2045f18be576SLuigi Rizzo 2046f18be576SLuigi Rizzo /* 2047f18be576SLuigi Rizzo * possibly move the interface to netmap-mode. 2048f18be576SLuigi Rizzo * If success it returns a pointer to netmap_if, otherwise NULL. 2049*ce3ee1e7SLuigi Rizzo * This must be called with NMG_LOCK held. 2050f18be576SLuigi Rizzo */ 2051f18be576SLuigi Rizzo static struct netmap_if * 2052f18be576SLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp, 2053f18be576SLuigi Rizzo uint16_t ringid, int *err) 2054f18be576SLuigi Rizzo { 2055f18be576SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 2056f18be576SLuigi Rizzo struct netmap_if *nifp = NULL; 2057*ce3ee1e7SLuigi Rizzo int error, need_mem; 2058f18be576SLuigi Rizzo 2059*ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 2060f18be576SLuigi Rizzo /* ring configuration may have changed, fetch from the card */ 2061f18be576SLuigi Rizzo netmap_update_config(na); 2062f18be576SLuigi Rizzo priv->np_ifp = ifp; /* store the reference */ 2063f18be576SLuigi Rizzo error = netmap_set_ringid(priv, ringid); 2064f18be576SLuigi Rizzo if (error) 2065f18be576SLuigi Rizzo goto out; 2066*ce3ee1e7SLuigi Rizzo /* ensure allocators are ready */ 2067*ce3ee1e7SLuigi Rizzo need_mem = !netmap_have_memory_locked(priv); 2068*ce3ee1e7SLuigi Rizzo if (need_mem) { 2069*ce3ee1e7SLuigi Rizzo error = netmap_get_memory_locked(priv); 2070*ce3ee1e7SLuigi Rizzo ND("get_memory returned %d", error); 2071*ce3ee1e7SLuigi Rizzo if (error) 2072*ce3ee1e7SLuigi Rizzo goto out; 2073*ce3ee1e7SLuigi Rizzo } 2074f18be576SLuigi Rizzo nifp = netmap_if_new(ifp->if_xname, na); 2075f18be576SLuigi Rizzo if (nifp == NULL) { /* allocation failed */ 2076*ce3ee1e7SLuigi Rizzo /* we should drop the allocator, but only 2077*ce3ee1e7SLuigi Rizzo * if we were the ones who grabbed it 2078*ce3ee1e7SLuigi Rizzo */ 2079*ce3ee1e7SLuigi Rizzo if (need_mem) 2080*ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(priv); 2081f18be576SLuigi Rizzo error = ENOMEM; 2082*ce3ee1e7SLuigi Rizzo goto out; 2083*ce3ee1e7SLuigi Rizzo } 2084*ce3ee1e7SLuigi Rizzo na->refcount++; 2085*ce3ee1e7SLuigi Rizzo if (ifp->if_capenable & IFCAP_NETMAP) { 2086f18be576SLuigi Rizzo /* was already set */ 2087f18be576SLuigi Rizzo } else { 2088*ce3ee1e7SLuigi Rizzo u_int i; 2089f18be576SLuigi Rizzo /* Otherwise set the card in netmap mode 2090f18be576SLuigi Rizzo * and make it use the shared buffers. 2091*ce3ee1e7SLuigi Rizzo * 2092*ce3ee1e7SLuigi Rizzo * If the interface is attached to a bridge, lock it. 2093f18be576SLuigi Rizzo */ 2094*ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) 2095*ce3ee1e7SLuigi Rizzo BDG_WLOCK(NA(ifp)->na_bdg); 2096f18be576SLuigi Rizzo for (i = 0 ; i < na->num_tx_rings + 1; i++) 2097f18be576SLuigi Rizzo mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock", 2098*ce3ee1e7SLuigi Rizzo NULL, MTX_DEF); 2099f18be576SLuigi Rizzo for (i = 0 ; i < na->num_rx_rings + 1; i++) { 2100f18be576SLuigi Rizzo mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock", 2101*ce3ee1e7SLuigi Rizzo NULL, MTX_DEF); 2102f18be576SLuigi Rizzo } 2103f18be576SLuigi Rizzo if (nma_is_hw(na)) { 2104f18be576SLuigi Rizzo SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings]; 2105f18be576SLuigi Rizzo SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings]; 2106f18be576SLuigi Rizzo } 2107*ce3ee1e7SLuigi Rizzo /* 2108*ce3ee1e7SLuigi Rizzo * do not core lock because the race is harmless here, 2109*ce3ee1e7SLuigi Rizzo * there cannot be any traffic to netmap_transmit() 2110*ce3ee1e7SLuigi Rizzo */ 2111f18be576SLuigi Rizzo error = na->nm_register(ifp, 1); /* mode on */ 2112*ce3ee1e7SLuigi Rizzo // XXX do we need to nm_alloc_bdgfwd() in all cases ? 2113f18be576SLuigi Rizzo if (!error) 2114f18be576SLuigi Rizzo error = nm_alloc_bdgfwd(na); 2115f18be576SLuigi Rizzo if (error) { 2116*ce3ee1e7SLuigi Rizzo netmap_do_unregif(priv, nifp); 2117f18be576SLuigi Rizzo nifp = NULL; 2118f18be576SLuigi Rizzo } 2119*ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) 2120*ce3ee1e7SLuigi Rizzo BDG_WUNLOCK(NA(ifp)->na_bdg); 2121f18be576SLuigi Rizzo 2122f18be576SLuigi Rizzo } 2123f18be576SLuigi Rizzo out: 2124f18be576SLuigi Rizzo *err = error; 2125*ce3ee1e7SLuigi Rizzo if (nifp != NULL) { 2126*ce3ee1e7SLuigi Rizzo /* 2127*ce3ee1e7SLuigi Rizzo * advertise that the interface is ready bt setting ni_nifp. 2128*ce3ee1e7SLuigi Rizzo * The barrier is needed because readers (poll and *SYNC) 2129*ce3ee1e7SLuigi Rizzo * check for priv->np_nifp != NULL without locking 2130*ce3ee1e7SLuigi Rizzo */ 2131*ce3ee1e7SLuigi Rizzo wmb(); /* make sure previous writes are visible to all CPUs */ 2132*ce3ee1e7SLuigi Rizzo priv->np_nifp = nifp; 2133*ce3ee1e7SLuigi Rizzo } 2134f18be576SLuigi Rizzo return nifp; 2135f18be576SLuigi Rizzo } 2136f18be576SLuigi Rizzo 2137f18be576SLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 2138f18be576SLuigi Rizzo static int 2139*ce3ee1e7SLuigi Rizzo nm_bdg_attach(struct nmreq *nmr) 2140f18be576SLuigi Rizzo { 2141f18be576SLuigi Rizzo struct ifnet *ifp; 2142f18be576SLuigi Rizzo struct netmap_if *nifp; 2143f18be576SLuigi Rizzo struct netmap_priv_d *npriv; 2144f18be576SLuigi Rizzo int error; 2145f18be576SLuigi Rizzo 2146f18be576SLuigi Rizzo npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 2147f18be576SLuigi Rizzo if (npriv == NULL) 2148f18be576SLuigi Rizzo return ENOMEM; 2149*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2150*ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create if not exists */); 2151*ce3ee1e7SLuigi Rizzo if (error) /* no device, or another bridge or user owns the device */ 2152*ce3ee1e7SLuigi Rizzo goto unlock_exit; 2153*ce3ee1e7SLuigi Rizzo /* get_ifp() sets na_bdg if this is a physical interface 2154*ce3ee1e7SLuigi Rizzo * that we can attach to a switch. 2155*ce3ee1e7SLuigi Rizzo */ 2156*ce3ee1e7SLuigi Rizzo if (!NETMAP_OWNED_BY_KERN(ifp)) { 2157*ce3ee1e7SLuigi Rizzo /* got reference to a virtual port or direct access to a NIC. 2158*ce3ee1e7SLuigi Rizzo * perhaps specified no bridge prefix or wrong NIC name 2159*ce3ee1e7SLuigi Rizzo */ 2160*ce3ee1e7SLuigi Rizzo error = EINVAL; 2161*ce3ee1e7SLuigi Rizzo goto unref_exit; 2162*ce3ee1e7SLuigi Rizzo } 2163*ce3ee1e7SLuigi Rizzo 2164*ce3ee1e7SLuigi Rizzo if (NA(ifp)->refcount > 0) { /* already registered */ 2165*ce3ee1e7SLuigi Rizzo error = EBUSY; 2166*ce3ee1e7SLuigi Rizzo DROP_BDG_REF(ifp); 2167*ce3ee1e7SLuigi Rizzo goto unlock_exit; 2168*ce3ee1e7SLuigi Rizzo } 2169*ce3ee1e7SLuigi Rizzo 2170*ce3ee1e7SLuigi Rizzo nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error); 2171*ce3ee1e7SLuigi Rizzo if (!nifp) { 2172*ce3ee1e7SLuigi Rizzo goto unref_exit; 2173*ce3ee1e7SLuigi Rizzo } 2174*ce3ee1e7SLuigi Rizzo 2175*ce3ee1e7SLuigi Rizzo NA(ifp)->na_kpriv = npriv; 2176*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2177*ce3ee1e7SLuigi Rizzo ND("registered %s to netmap-mode", ifp->if_xname); 2178*ce3ee1e7SLuigi Rizzo return 0; 2179*ce3ee1e7SLuigi Rizzo 2180*ce3ee1e7SLuigi Rizzo unref_exit: 2181*ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2182*ce3ee1e7SLuigi Rizzo unlock_exit: 2183*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2184f18be576SLuigi Rizzo bzero(npriv, sizeof(*npriv)); 2185f18be576SLuigi Rizzo free(npriv, M_DEVBUF); 2186f18be576SLuigi Rizzo return error; 2187f18be576SLuigi Rizzo } 2188f18be576SLuigi Rizzo 2189*ce3ee1e7SLuigi Rizzo static int 2190*ce3ee1e7SLuigi Rizzo nm_bdg_detach(struct nmreq *nmr) 2191*ce3ee1e7SLuigi Rizzo { 2192*ce3ee1e7SLuigi Rizzo struct ifnet *ifp; 2193*ce3ee1e7SLuigi Rizzo int error; 2194*ce3ee1e7SLuigi Rizzo int last_instance; 2195*ce3ee1e7SLuigi Rizzo 2196*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2197*ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 0 /* don't create */); 2198f18be576SLuigi Rizzo if (error) { /* no device, or another bridge or user owns the device */ 2199*ce3ee1e7SLuigi Rizzo goto unlock_exit; 2200*ce3ee1e7SLuigi Rizzo } 2201*ce3ee1e7SLuigi Rizzo /* XXX do we need to check this ? */ 2202*ce3ee1e7SLuigi Rizzo if (!NETMAP_OWNED_BY_KERN(ifp)) { 2203f18be576SLuigi Rizzo /* got reference to a virtual port or direct access to a NIC. 2204f18be576SLuigi Rizzo * perhaps specified no bridge's prefix or wrong NIC's name 2205f18be576SLuigi Rizzo */ 2206f18be576SLuigi Rizzo error = EINVAL; 2207*ce3ee1e7SLuigi Rizzo goto unref_exit; 2208f18be576SLuigi Rizzo } 2209f18be576SLuigi Rizzo 2210f18be576SLuigi Rizzo if (NA(ifp)->refcount == 0) { /* not registered */ 2211f18be576SLuigi Rizzo error = EINVAL; 2212f18be576SLuigi Rizzo goto unref_exit; 2213f18be576SLuigi Rizzo } 2214f18be576SLuigi Rizzo 2215*ce3ee1e7SLuigi Rizzo DROP_BDG_REF(ifp); /* the one from get_ifp */ 2216*ce3ee1e7SLuigi Rizzo last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */ 2217*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2218*ce3ee1e7SLuigi Rizzo if (!last_instance) { 2219*ce3ee1e7SLuigi Rizzo D("--- error, trying to detach an entry with active mmaps"); 2220f18be576SLuigi Rizzo error = EINVAL; 2221*ce3ee1e7SLuigi Rizzo } else { 2222*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *npriv = NA(ifp)->na_kpriv; 2223*ce3ee1e7SLuigi Rizzo NA(ifp)->na_kpriv = NULL; 2224*ce3ee1e7SLuigi Rizzo 2225*ce3ee1e7SLuigi Rizzo bzero(npriv, sizeof(*npriv)); 2226*ce3ee1e7SLuigi Rizzo free(npriv, M_DEVBUF); 2227f18be576SLuigi Rizzo } 2228*ce3ee1e7SLuigi Rizzo return error; 2229f18be576SLuigi Rizzo 2230*ce3ee1e7SLuigi Rizzo unref_exit: 2231*ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2232*ce3ee1e7SLuigi Rizzo unlock_exit: 2233*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2234*ce3ee1e7SLuigi Rizzo return error; 2235f18be576SLuigi Rizzo } 2236f18be576SLuigi Rizzo 2237f18be576SLuigi Rizzo 2238f18be576SLuigi Rizzo /* Initialize necessary fields of sw adapter located in right after hw's 2239f18be576SLuigi Rizzo * one. sw adapter attaches a pair of sw rings of the netmap-mode NIC. 2240f18be576SLuigi Rizzo * It is always activated and deactivated at the same tie with the hw's one. 2241f18be576SLuigi Rizzo * Thus we don't need refcounting on the sw adapter. 2242f18be576SLuigi Rizzo * Regardless of NIC's feature we use separate lock so that anybody can lock 2243f18be576SLuigi Rizzo * me independently from the hw adapter. 2244f18be576SLuigi Rizzo * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw 2245f18be576SLuigi Rizzo */ 2246f18be576SLuigi Rizzo static void 2247f18be576SLuigi Rizzo netmap_attach_sw(struct ifnet *ifp) 2248f18be576SLuigi Rizzo { 2249f18be576SLuigi Rizzo struct netmap_adapter *hw_na = NA(ifp); 2250f18be576SLuigi Rizzo struct netmap_adapter *na = SWNA(ifp); 2251f18be576SLuigi Rizzo 2252f18be576SLuigi Rizzo na->ifp = ifp; 2253f18be576SLuigi Rizzo na->num_rx_rings = na->num_tx_rings = 1; 2254f18be576SLuigi Rizzo na->num_tx_desc = hw_na->num_tx_desc; 2255f18be576SLuigi Rizzo na->num_rx_desc = hw_na->num_rx_desc; 2256f18be576SLuigi Rizzo na->nm_txsync = netmap_bdg_to_host; 2257*ce3ee1e7SLuigi Rizzo /* we use the same memory allocator as the 2258*ce3ee1e7SLuigi Rizzo * the hw adapter */ 2259*ce3ee1e7SLuigi Rizzo na->nm_mem = hw_na->nm_mem; 2260f18be576SLuigi Rizzo } 2261f18be576SLuigi Rizzo 2262f18be576SLuigi Rizzo 2263*ce3ee1e7SLuigi Rizzo /* exported to kernel callers, e.g. OVS ? 2264*ce3ee1e7SLuigi Rizzo * Entry point. 2265*ce3ee1e7SLuigi Rizzo * Called without NMG_LOCK. 2266*ce3ee1e7SLuigi Rizzo */ 2267f18be576SLuigi Rizzo int 2268f18be576SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 2269f18be576SLuigi Rizzo { 2270f18be576SLuigi Rizzo struct nm_bridge *b; 2271f18be576SLuigi Rizzo struct netmap_adapter *na; 2272f18be576SLuigi Rizzo struct ifnet *iter; 2273f18be576SLuigi Rizzo char *name = nmr->nr_name; 2274f18be576SLuigi Rizzo int cmd = nmr->nr_cmd, namelen = strlen(name); 2275f18be576SLuigi Rizzo int error = 0, i, j; 2276f18be576SLuigi Rizzo 2277f18be576SLuigi Rizzo switch (cmd) { 2278f18be576SLuigi Rizzo case NETMAP_BDG_ATTACH: 2279*ce3ee1e7SLuigi Rizzo error = nm_bdg_attach(nmr); 2280*ce3ee1e7SLuigi Rizzo break; 2281*ce3ee1e7SLuigi Rizzo 2282f18be576SLuigi Rizzo case NETMAP_BDG_DETACH: 2283*ce3ee1e7SLuigi Rizzo error = nm_bdg_detach(nmr); 2284f18be576SLuigi Rizzo break; 2285f18be576SLuigi Rizzo 2286f18be576SLuigi Rizzo case NETMAP_BDG_LIST: 2287f18be576SLuigi Rizzo /* this is used to enumerate bridges and ports */ 2288f18be576SLuigi Rizzo if (namelen) { /* look up indexes of bridge and port */ 2289f18be576SLuigi Rizzo if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 2290f18be576SLuigi Rizzo error = EINVAL; 2291f18be576SLuigi Rizzo break; 2292f18be576SLuigi Rizzo } 2293*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2294f18be576SLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 2295f18be576SLuigi Rizzo if (!b) { 2296f18be576SLuigi Rizzo error = ENOENT; 2297*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2298f18be576SLuigi Rizzo break; 2299f18be576SLuigi Rizzo } 2300f18be576SLuigi Rizzo 2301f18be576SLuigi Rizzo error = ENOENT; 2302*ce3ee1e7SLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 2303*ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 2304*ce3ee1e7SLuigi Rizzo na = b->bdg_ports[i]; 2305*ce3ee1e7SLuigi Rizzo if (na == NULL) { 2306*ce3ee1e7SLuigi Rizzo D("---AAAAAAAAARGH-------"); 2307f18be576SLuigi Rizzo continue; 2308*ce3ee1e7SLuigi Rizzo } 2309f18be576SLuigi Rizzo iter = na->ifp; 2310f18be576SLuigi Rizzo /* the former and the latter identify a 2311f18be576SLuigi Rizzo * virtual port and a NIC, respectively 2312f18be576SLuigi Rizzo */ 2313f18be576SLuigi Rizzo if (!strcmp(iter->if_xname, name) || 2314*ce3ee1e7SLuigi Rizzo (namelen > b->bdg_namelen && 2315f18be576SLuigi Rizzo !strcmp(iter->if_xname, 2316*ce3ee1e7SLuigi Rizzo name + b->bdg_namelen + 1))) { 2317f18be576SLuigi Rizzo /* bridge index */ 2318f18be576SLuigi Rizzo nmr->nr_arg1 = b - nm_bridges; 2319f18be576SLuigi Rizzo nmr->nr_arg2 = i; /* port index */ 2320f18be576SLuigi Rizzo error = 0; 2321f18be576SLuigi Rizzo break; 2322f18be576SLuigi Rizzo } 2323f18be576SLuigi Rizzo } 2324*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2325f18be576SLuigi Rizzo } else { 2326f18be576SLuigi Rizzo /* return the first non-empty entry starting from 2327f18be576SLuigi Rizzo * bridge nr_arg1 and port nr_arg2. 2328f18be576SLuigi Rizzo * 2329f18be576SLuigi Rizzo * Users can detect the end of the same bridge by 2330f18be576SLuigi Rizzo * seeing the new and old value of nr_arg1, and can 2331f18be576SLuigi Rizzo * detect the end of all the bridge by error != 0 2332f18be576SLuigi Rizzo */ 2333f18be576SLuigi Rizzo i = nmr->nr_arg1; 2334f18be576SLuigi Rizzo j = nmr->nr_arg2; 2335f18be576SLuigi Rizzo 2336*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2337*ce3ee1e7SLuigi Rizzo for (error = ENOENT; i < NM_BRIDGES; i++) { 2338f18be576SLuigi Rizzo b = nm_bridges + i; 2339*ce3ee1e7SLuigi Rizzo if (j >= b->bdg_active_ports) { 2340*ce3ee1e7SLuigi Rizzo j = 0; /* following bridges scan from 0 */ 2341f18be576SLuigi Rizzo continue; 2342*ce3ee1e7SLuigi Rizzo } 2343f18be576SLuigi Rizzo nmr->nr_arg1 = i; 2344f18be576SLuigi Rizzo nmr->nr_arg2 = j; 2345*ce3ee1e7SLuigi Rizzo j = b->bdg_port_index[j]; 2346*ce3ee1e7SLuigi Rizzo na = b->bdg_ports[j]; 2347*ce3ee1e7SLuigi Rizzo iter = na->ifp; 2348*ce3ee1e7SLuigi Rizzo strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 2349f18be576SLuigi Rizzo error = 0; 2350f18be576SLuigi Rizzo break; 2351f18be576SLuigi Rizzo } 2352*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2353f18be576SLuigi Rizzo } 2354f18be576SLuigi Rizzo break; 2355f18be576SLuigi Rizzo 2356f18be576SLuigi Rizzo case NETMAP_BDG_LOOKUP_REG: 2357f18be576SLuigi Rizzo /* register a lookup function to the given bridge. 2358f18be576SLuigi Rizzo * nmr->nr_name may be just bridge's name (including ':' 2359f18be576SLuigi Rizzo * if it is not just NM_NAME). 2360f18be576SLuigi Rizzo */ 2361f18be576SLuigi Rizzo if (!func) { 2362f18be576SLuigi Rizzo error = EINVAL; 2363f18be576SLuigi Rizzo break; 2364f18be576SLuigi Rizzo } 2365*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2366f18be576SLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 2367f18be576SLuigi Rizzo if (!b) { 2368f18be576SLuigi Rizzo error = EINVAL; 2369*ce3ee1e7SLuigi Rizzo } else { 2370f18be576SLuigi Rizzo b->nm_bdg_lookup = func; 2371*ce3ee1e7SLuigi Rizzo } 2372*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2373f18be576SLuigi Rizzo break; 2374*ce3ee1e7SLuigi Rizzo 2375f18be576SLuigi Rizzo default: 2376f18be576SLuigi Rizzo D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 2377f18be576SLuigi Rizzo error = EINVAL; 2378f18be576SLuigi Rizzo break; 2379f18be576SLuigi Rizzo } 2380f18be576SLuigi Rizzo return error; 2381f18be576SLuigi Rizzo } 2382f18be576SLuigi Rizzo 2383f18be576SLuigi Rizzo 238468b8534bSLuigi Rizzo /* 238568b8534bSLuigi Rizzo * ioctl(2) support for the "netmap" device. 238668b8534bSLuigi Rizzo * 238768b8534bSLuigi Rizzo * Following a list of accepted commands: 238868b8534bSLuigi Rizzo * - NIOCGINFO 238968b8534bSLuigi Rizzo * - SIOCGIFADDR just for convenience 239068b8534bSLuigi Rizzo * - NIOCREGIF 239168b8534bSLuigi Rizzo * - NIOCUNREGIF 239268b8534bSLuigi Rizzo * - NIOCTXSYNC 239368b8534bSLuigi Rizzo * - NIOCRXSYNC 239468b8534bSLuigi Rizzo * 239568b8534bSLuigi Rizzo * Return 0 on success, errno otherwise. 239668b8534bSLuigi Rizzo */ 239768b8534bSLuigi Rizzo static int 23980b8ed8e0SLuigi Rizzo netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 23990b8ed8e0SLuigi Rizzo int fflag, struct thread *td) 240068b8534bSLuigi Rizzo { 240168b8534bSLuigi Rizzo struct netmap_priv_d *priv = NULL; 2402*ce3ee1e7SLuigi Rizzo struct ifnet *ifp = NULL; 240368b8534bSLuigi Rizzo struct nmreq *nmr = (struct nmreq *) data; 2404*ce3ee1e7SLuigi Rizzo struct netmap_adapter *na = NULL; 240568b8534bSLuigi Rizzo int error; 240664ae02c3SLuigi Rizzo u_int i, lim; 240768b8534bSLuigi Rizzo struct netmap_if *nifp; 2408*ce3ee1e7SLuigi Rizzo struct netmap_kring *krings; 240968b8534bSLuigi Rizzo 24100b8ed8e0SLuigi Rizzo (void)dev; /* UNUSED */ 24110b8ed8e0SLuigi Rizzo (void)fflag; /* UNUSED */ 2412f196ce38SLuigi Rizzo #ifdef linux 2413f196ce38SLuigi Rizzo #define devfs_get_cdevpriv(pp) \ 2414f196ce38SLuigi Rizzo ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \ 2415f196ce38SLuigi Rizzo (*pp ? 0 : ENOENT); }) 2416f196ce38SLuigi Rizzo 2417f196ce38SLuigi Rizzo /* devfs_set_cdevpriv cannot fail on linux */ 2418f196ce38SLuigi Rizzo #define devfs_set_cdevpriv(p, fn) \ 2419f196ce38SLuigi Rizzo ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); }) 2420f196ce38SLuigi Rizzo 2421f196ce38SLuigi Rizzo 2422f196ce38SLuigi Rizzo #define devfs_clear_cdevpriv() do { \ 2423f196ce38SLuigi Rizzo netmap_dtor(priv); ((struct file *)td)->private_data = 0; \ 2424f196ce38SLuigi Rizzo } while (0) 2425f196ce38SLuigi Rizzo #endif /* linux */ 2426f196ce38SLuigi Rizzo 2427506cc70cSLuigi Rizzo CURVNET_SET(TD_TO_VNET(td)); 2428506cc70cSLuigi Rizzo 242968b8534bSLuigi Rizzo error = devfs_get_cdevpriv((void **)&priv); 24308241616dSLuigi Rizzo if (error) { 2431506cc70cSLuigi Rizzo CURVNET_RESTORE(); 24328241616dSLuigi Rizzo /* XXX ENOENT should be impossible, since the priv 24338241616dSLuigi Rizzo * is now created in the open */ 24348241616dSLuigi Rizzo return (error == ENOENT ? ENXIO : error); 2435506cc70cSLuigi Rizzo } 243668b8534bSLuigi Rizzo 2437f196ce38SLuigi Rizzo nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */ 243868b8534bSLuigi Rizzo switch (cmd) { 243968b8534bSLuigi Rizzo case NIOCGINFO: /* return capabilities etc */ 244064ae02c3SLuigi Rizzo if (nmr->nr_version != NETMAP_API) { 2441*ce3ee1e7SLuigi Rizzo #ifdef TEST_STUFF 2442*ce3ee1e7SLuigi Rizzo /* some test code for locks etc */ 2443*ce3ee1e7SLuigi Rizzo if (nmr->nr_version == 666) { 2444*ce3ee1e7SLuigi Rizzo error = nm_test(nmr); 2445*ce3ee1e7SLuigi Rizzo break; 2446*ce3ee1e7SLuigi Rizzo } 2447*ce3ee1e7SLuigi Rizzo #endif /* TEST_STUFF */ 244864ae02c3SLuigi Rizzo D("API mismatch got %d have %d", 244964ae02c3SLuigi Rizzo nmr->nr_version, NETMAP_API); 245064ae02c3SLuigi Rizzo nmr->nr_version = NETMAP_API; 245164ae02c3SLuigi Rizzo error = EINVAL; 245264ae02c3SLuigi Rizzo break; 245364ae02c3SLuigi Rizzo } 2454f18be576SLuigi Rizzo if (nmr->nr_cmd == NETMAP_BDG_LIST) { 2455f18be576SLuigi Rizzo error = netmap_bdg_ctl(nmr, NULL); 2456f18be576SLuigi Rizzo break; 2457f18be576SLuigi Rizzo } 2458*ce3ee1e7SLuigi Rizzo 2459*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2460*ce3ee1e7SLuigi Rizzo do { 2461*ce3ee1e7SLuigi Rizzo /* memsize is always valid */ 2462*ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd = &nm_mem; 2463*ce3ee1e7SLuigi Rizzo u_int memflags; 2464*ce3ee1e7SLuigi Rizzo 2465*ce3ee1e7SLuigi Rizzo if (nmr->nr_name[0] != '\0') { 2466*ce3ee1e7SLuigi Rizzo /* get a refcount */ 2467*ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create */); 24688241616dSLuigi Rizzo if (error) 24698241616dSLuigi Rizzo break; 2470*ce3ee1e7SLuigi Rizzo na = NA(ifp); /* retrieve the netmap adapter */ 2471*ce3ee1e7SLuigi Rizzo nmd = na->nm_mem; /* and its memory allocator */ 2472*ce3ee1e7SLuigi Rizzo } 2473*ce3ee1e7SLuigi Rizzo 2474*ce3ee1e7SLuigi Rizzo error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags); 2475*ce3ee1e7SLuigi Rizzo if (error) 2476*ce3ee1e7SLuigi Rizzo break; 2477*ce3ee1e7SLuigi Rizzo if (na == NULL) /* only memory info */ 2478*ce3ee1e7SLuigi Rizzo break; 24798241616dSLuigi Rizzo nmr->nr_offset = 0; 24808241616dSLuigi Rizzo nmr->nr_rx_slots = nmr->nr_tx_slots = 0; 2481ae10d1afSLuigi Rizzo netmap_update_config(na); 2482d76bf4ffSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; 2483d76bf4ffSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; 248464ae02c3SLuigi Rizzo nmr->nr_rx_slots = na->num_rx_desc; 248564ae02c3SLuigi Rizzo nmr->nr_tx_slots = na->num_tx_desc; 2486*ce3ee1e7SLuigi Rizzo if (memflags & NETMAP_MEM_PRIVATE) 2487*ce3ee1e7SLuigi Rizzo nmr->nr_ringid |= NETMAP_PRIV_MEM; 2488*ce3ee1e7SLuigi Rizzo } while (0); 2489*ce3ee1e7SLuigi Rizzo if (ifp) 2490f196ce38SLuigi Rizzo nm_if_rele(ifp); /* return the refcount */ 2491*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 249268b8534bSLuigi Rizzo break; 249368b8534bSLuigi Rizzo 249468b8534bSLuigi Rizzo case NIOCREGIF: 249564ae02c3SLuigi Rizzo if (nmr->nr_version != NETMAP_API) { 249664ae02c3SLuigi Rizzo nmr->nr_version = NETMAP_API; 249764ae02c3SLuigi Rizzo error = EINVAL; 249864ae02c3SLuigi Rizzo break; 249964ae02c3SLuigi Rizzo } 2500f18be576SLuigi Rizzo /* possibly attach/detach NIC and VALE switch */ 2501f18be576SLuigi Rizzo i = nmr->nr_cmd; 2502f18be576SLuigi Rizzo if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) { 2503f18be576SLuigi Rizzo error = netmap_bdg_ctl(nmr, NULL); 2504f18be576SLuigi Rizzo break; 2505f18be576SLuigi Rizzo } else if (i != 0) { 2506f18be576SLuigi Rizzo D("nr_cmd must be 0 not %d", i); 2507f18be576SLuigi Rizzo error = EINVAL; 2508f18be576SLuigi Rizzo break; 2509f18be576SLuigi Rizzo } 2510f18be576SLuigi Rizzo 25118241616dSLuigi Rizzo /* protect access to priv from concurrent NIOCREGIF */ 2512*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2513*ce3ee1e7SLuigi Rizzo do { 2514*ce3ee1e7SLuigi Rizzo u_int memflags; 2515*ce3ee1e7SLuigi Rizzo 25168241616dSLuigi Rizzo if (priv->np_ifp != NULL) { /* thread already registered */ 2517506cc70cSLuigi Rizzo error = netmap_set_ringid(priv, nmr->nr_ringid); 2518506cc70cSLuigi Rizzo break; 2519506cc70cSLuigi Rizzo } 252068b8534bSLuigi Rizzo /* find the interface and a reference */ 2521*ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */ 252268b8534bSLuigi Rizzo if (error) 2523*ce3ee1e7SLuigi Rizzo break; 2524*ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) { 2525f18be576SLuigi Rizzo nm_if_rele(ifp); 2526*ce3ee1e7SLuigi Rizzo error = EBUSY; 2527*ce3ee1e7SLuigi Rizzo break; 2528f196ce38SLuigi Rizzo } 2529f18be576SLuigi Rizzo nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error); 2530f18be576SLuigi Rizzo if (!nifp) { /* reg. failed, release priv and ref */ 2531f196ce38SLuigi Rizzo nm_if_rele(ifp); /* return the refcount */ 25328241616dSLuigi Rizzo priv->np_ifp = NULL; 25338241616dSLuigi Rizzo priv->np_nifp = NULL; 2534*ce3ee1e7SLuigi Rizzo break; 253568b8534bSLuigi Rizzo } 253668b8534bSLuigi Rizzo 253768b8534bSLuigi Rizzo /* return the offset of the netmap_if object */ 2538f18be576SLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 2539d76bf4ffSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; 2540d76bf4ffSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; 254164ae02c3SLuigi Rizzo nmr->nr_rx_slots = na->num_rx_desc; 254264ae02c3SLuigi Rizzo nmr->nr_tx_slots = na->num_tx_desc; 2543*ce3ee1e7SLuigi Rizzo error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags); 2544*ce3ee1e7SLuigi Rizzo if (error) { 2545*ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2546*ce3ee1e7SLuigi Rizzo break; 2547*ce3ee1e7SLuigi Rizzo } 2548*ce3ee1e7SLuigi Rizzo if (memflags & NETMAP_MEM_PRIVATE) { 2549*ce3ee1e7SLuigi Rizzo nmr->nr_ringid |= NETMAP_PRIV_MEM; 2550*ce3ee1e7SLuigi Rizzo *(uint32_t *)&nifp->ni_flags |= NI_PRIV_MEM; 2551*ce3ee1e7SLuigi Rizzo } 2552*ce3ee1e7SLuigi Rizzo nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); 2553*ce3ee1e7SLuigi Rizzo } while (0); 2554*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 255568b8534bSLuigi Rizzo break; 255668b8534bSLuigi Rizzo 255768b8534bSLuigi Rizzo case NIOCUNREGIF: 25588241616dSLuigi Rizzo // XXX we have no data here ? 25598241616dSLuigi Rizzo D("deprecated, data is %p", nmr); 25608241616dSLuigi Rizzo error = EINVAL; 256168b8534bSLuigi Rizzo break; 256268b8534bSLuigi Rizzo 256368b8534bSLuigi Rizzo case NIOCTXSYNC: 256468b8534bSLuigi Rizzo case NIOCRXSYNC: 25658241616dSLuigi Rizzo nifp = priv->np_nifp; 25668241616dSLuigi Rizzo 25678241616dSLuigi Rizzo if (nifp == NULL) { 2568506cc70cSLuigi Rizzo error = ENXIO; 2569506cc70cSLuigi Rizzo break; 2570506cc70cSLuigi Rizzo } 25718241616dSLuigi Rizzo rmb(); /* make sure following reads are not from cache */ 25728241616dSLuigi Rizzo 257368b8534bSLuigi Rizzo ifp = priv->np_ifp; /* we have a reference */ 25748241616dSLuigi Rizzo 25758241616dSLuigi Rizzo if (ifp == NULL) { 25768241616dSLuigi Rizzo D("Internal error: nifp != NULL && ifp == NULL"); 25778241616dSLuigi Rizzo error = ENXIO; 25788241616dSLuigi Rizzo break; 25798241616dSLuigi Rizzo } 25808241616dSLuigi Rizzo 258168b8534bSLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 258264ae02c3SLuigi Rizzo if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */ 258368b8534bSLuigi Rizzo if (cmd == NIOCTXSYNC) 2584*ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(na); 258568b8534bSLuigi Rizzo else 2586*ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, NULL, NULL); 2587506cc70cSLuigi Rizzo break; 258868b8534bSLuigi Rizzo } 258964ae02c3SLuigi Rizzo /* find the last ring to scan */ 259064ae02c3SLuigi Rizzo lim = priv->np_qlast; 259164ae02c3SLuigi Rizzo if (lim == NETMAP_HW_RING) 25923c0caf6cSLuigi Rizzo lim = (cmd == NIOCTXSYNC) ? 2593d76bf4ffSLuigi Rizzo na->num_tx_rings : na->num_rx_rings; 259468b8534bSLuigi Rizzo 2595*ce3ee1e7SLuigi Rizzo krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings; 259664ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim; i++) { 2597*ce3ee1e7SLuigi Rizzo struct netmap_kring *kring = krings + i; 2598*ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2599*ce3ee1e7SLuigi Rizzo error = EBUSY; 2600*ce3ee1e7SLuigi Rizzo goto out; 2601*ce3ee1e7SLuigi Rizzo } 260268b8534bSLuigi Rizzo if (cmd == NIOCTXSYNC) { 260368b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 26043c0caf6cSLuigi Rizzo D("pre txsync ring %d cur %d hwcur %d", 260568b8534bSLuigi Rizzo i, kring->ring->cur, 260668b8534bSLuigi Rizzo kring->nr_hwcur); 2607*ce3ee1e7SLuigi Rizzo na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM); 260868b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 26093c0caf6cSLuigi Rizzo D("post txsync ring %d cur %d hwcur %d", 261068b8534bSLuigi Rizzo i, kring->ring->cur, 261168b8534bSLuigi Rizzo kring->nr_hwcur); 261268b8534bSLuigi Rizzo } else { 2613*ce3ee1e7SLuigi Rizzo na->nm_rxsync(ifp, i, NAF_FORCE_READ); 261468b8534bSLuigi Rizzo microtime(&na->rx_rings[i].ring->ts); 261568b8534bSLuigi Rizzo } 2616*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 261768b8534bSLuigi Rizzo } 261868b8534bSLuigi Rizzo 261968b8534bSLuigi Rizzo break; 262068b8534bSLuigi Rizzo 2621f196ce38SLuigi Rizzo #ifdef __FreeBSD__ 262268b8534bSLuigi Rizzo case BIOCIMMEDIATE: 262368b8534bSLuigi Rizzo case BIOCGHDRCMPLT: 262468b8534bSLuigi Rizzo case BIOCSHDRCMPLT: 262568b8534bSLuigi Rizzo case BIOCSSEESENT: 262668b8534bSLuigi Rizzo D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); 262768b8534bSLuigi Rizzo break; 262868b8534bSLuigi Rizzo 2629babc7c12SLuigi Rizzo default: /* allow device-specific ioctls */ 263068b8534bSLuigi Rizzo { 263168b8534bSLuigi Rizzo struct socket so; 2632*ce3ee1e7SLuigi Rizzo 263368b8534bSLuigi Rizzo bzero(&so, sizeof(so)); 2634*ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2635*ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */ 2636*ce3ee1e7SLuigi Rizzo if (error) { 2637*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 263868b8534bSLuigi Rizzo break; 2639*ce3ee1e7SLuigi Rizzo } 264068b8534bSLuigi Rizzo so.so_vnet = ifp->if_vnet; 264168b8534bSLuigi Rizzo // so->so_proto not null. 264268b8534bSLuigi Rizzo error = ifioctl(&so, cmd, data, td); 2643f196ce38SLuigi Rizzo nm_if_rele(ifp); 2644*ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2645babc7c12SLuigi Rizzo break; 264668b8534bSLuigi Rizzo } 2647f196ce38SLuigi Rizzo 2648f196ce38SLuigi Rizzo #else /* linux */ 2649f196ce38SLuigi Rizzo default: 2650f196ce38SLuigi Rizzo error = EOPNOTSUPP; 2651f196ce38SLuigi Rizzo #endif /* linux */ 265268b8534bSLuigi Rizzo } 2653*ce3ee1e7SLuigi Rizzo out: 265468b8534bSLuigi Rizzo 2655506cc70cSLuigi Rizzo CURVNET_RESTORE(); 265668b8534bSLuigi Rizzo return (error); 265768b8534bSLuigi Rizzo } 265868b8534bSLuigi Rizzo 265968b8534bSLuigi Rizzo 266068b8534bSLuigi Rizzo /* 266168b8534bSLuigi Rizzo * select(2) and poll(2) handlers for the "netmap" device. 266268b8534bSLuigi Rizzo * 266368b8534bSLuigi Rizzo * Can be called for one or more queues. 266468b8534bSLuigi Rizzo * Return true the event mask corresponding to ready events. 266568b8534bSLuigi Rizzo * If there are no ready events, do a selrecord on either individual 2666*ce3ee1e7SLuigi Rizzo * selinfo or on the global one. 266768b8534bSLuigi Rizzo * Device-dependent parts (locking and sync of tx/rx rings) 266868b8534bSLuigi Rizzo * are done through callbacks. 2669f196ce38SLuigi Rizzo * 267001c7d25fSLuigi Rizzo * On linux, arguments are really pwait, the poll table, and 'td' is struct file * 267101c7d25fSLuigi Rizzo * The first one is remapped to pwait as selrecord() uses the name as an 267201c7d25fSLuigi Rizzo * hidden argument. 267368b8534bSLuigi Rizzo */ 267468b8534bSLuigi Rizzo static int 267501c7d25fSLuigi Rizzo netmap_poll(struct cdev *dev, int events, struct thread *td) 267668b8534bSLuigi Rizzo { 267768b8534bSLuigi Rizzo struct netmap_priv_d *priv = NULL; 267868b8534bSLuigi Rizzo struct netmap_adapter *na; 267968b8534bSLuigi Rizzo struct ifnet *ifp; 268068b8534bSLuigi Rizzo struct netmap_kring *kring; 2681*ce3ee1e7SLuigi Rizzo u_int i, check_all, want_tx, want_rx, revents = 0; 2682091fd0abSLuigi Rizzo u_int lim_tx, lim_rx, host_forwarded = 0; 2683091fd0abSLuigi Rizzo struct mbq q = { NULL, NULL, 0 }; 268401c7d25fSLuigi Rizzo void *pwait = dev; /* linux compatibility */ 268501c7d25fSLuigi Rizzo 2686*ce3ee1e7SLuigi Rizzo int retry_tx = 1; 2687*ce3ee1e7SLuigi Rizzo 268801c7d25fSLuigi Rizzo (void)pwait; 268968b8534bSLuigi Rizzo 269068b8534bSLuigi Rizzo if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL) 269168b8534bSLuigi Rizzo return POLLERR; 269268b8534bSLuigi Rizzo 26938241616dSLuigi Rizzo if (priv->np_nifp == NULL) { 26948241616dSLuigi Rizzo D("No if registered"); 26958241616dSLuigi Rizzo return POLLERR; 26968241616dSLuigi Rizzo } 26978241616dSLuigi Rizzo rmb(); /* make sure following reads are not from cache */ 26988241616dSLuigi Rizzo 269968b8534bSLuigi Rizzo ifp = priv->np_ifp; 270068b8534bSLuigi Rizzo // XXX check for deleting() ? 270168b8534bSLuigi Rizzo if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) 270268b8534bSLuigi Rizzo return POLLERR; 270368b8534bSLuigi Rizzo 270468b8534bSLuigi Rizzo if (netmap_verbose & 0x8000) 270568b8534bSLuigi Rizzo D("device %s events 0x%x", ifp->if_xname, events); 270668b8534bSLuigi Rizzo want_tx = events & (POLLOUT | POLLWRNORM); 270768b8534bSLuigi Rizzo want_rx = events & (POLLIN | POLLRDNORM); 270868b8534bSLuigi Rizzo 270968b8534bSLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 271068b8534bSLuigi Rizzo 2711d76bf4ffSLuigi Rizzo lim_tx = na->num_tx_rings; 2712d76bf4ffSLuigi Rizzo lim_rx = na->num_rx_rings; 2713*ce3ee1e7SLuigi Rizzo 271464ae02c3SLuigi Rizzo if (priv->np_qfirst == NETMAP_SW_RING) { 2715*ce3ee1e7SLuigi Rizzo /* handle the host stack ring */ 271668b8534bSLuigi Rizzo if (priv->np_txpoll || want_tx) { 271768b8534bSLuigi Rizzo /* push any packets up, then we are always ready */ 2718*ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(na); 271968b8534bSLuigi Rizzo revents |= want_tx; 272068b8534bSLuigi Rizzo } 272168b8534bSLuigi Rizzo if (want_rx) { 272264ae02c3SLuigi Rizzo kring = &na->rx_rings[lim_rx]; 272368b8534bSLuigi Rizzo if (kring->ring->avail == 0) 2724*ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, td, dev); 272568b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 272668b8534bSLuigi Rizzo revents |= want_rx; 272768b8534bSLuigi Rizzo } 272868b8534bSLuigi Rizzo } 272968b8534bSLuigi Rizzo return (revents); 273068b8534bSLuigi Rizzo } 273168b8534bSLuigi Rizzo 2732091fd0abSLuigi Rizzo /* if we are in transparent mode, check also the host rx ring */ 2733091fd0abSLuigi Rizzo kring = &na->rx_rings[lim_rx]; 2734091fd0abSLuigi Rizzo if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 2735091fd0abSLuigi Rizzo && want_rx 2736091fd0abSLuigi Rizzo && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) { 2737091fd0abSLuigi Rizzo if (kring->ring->avail == 0) 2738*ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, td, dev); 2739091fd0abSLuigi Rizzo if (kring->ring->avail > 0) 2740091fd0abSLuigi Rizzo revents |= want_rx; 2741091fd0abSLuigi Rizzo } 2742091fd0abSLuigi Rizzo 274368b8534bSLuigi Rizzo /* 2744*ce3ee1e7SLuigi Rizzo * check_all is set if the card has more than one queue AND 274568b8534bSLuigi Rizzo * the client is polling all of them. If true, we sleep on 2746*ce3ee1e7SLuigi Rizzo * the "global" selinfo, otherwise we sleep on individual selinfo 2747*ce3ee1e7SLuigi Rizzo * (FreeBSD only allows two selinfo's per file descriptor). 2748*ce3ee1e7SLuigi Rizzo * The interrupt routine in the driver wake one or the other 2749*ce3ee1e7SLuigi Rizzo * (or both) depending on which clients are active. 275068b8534bSLuigi Rizzo * 275168b8534bSLuigi Rizzo * rxsync() is only called if we run out of buffers on a POLLIN. 275268b8534bSLuigi Rizzo * txsync() is called if we run out of buffers on POLLOUT, or 275368b8534bSLuigi Rizzo * there are pending packets to send. The latter can be disabled 275468b8534bSLuigi Rizzo * passing NETMAP_NO_TX_POLL in the NIOCREG call. 275568b8534bSLuigi Rizzo */ 275664ae02c3SLuigi Rizzo check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1); 275768b8534bSLuigi Rizzo 275864ae02c3SLuigi Rizzo if (priv->np_qlast != NETMAP_HW_RING) { 275964ae02c3SLuigi Rizzo lim_tx = lim_rx = priv->np_qlast; 276064ae02c3SLuigi Rizzo } 276164ae02c3SLuigi Rizzo 276268b8534bSLuigi Rizzo /* 276368b8534bSLuigi Rizzo * We start with a lock free round which is good if we have 276468b8534bSLuigi Rizzo * data available. If this fails, then lock and call the sync 276568b8534bSLuigi Rizzo * routines. 276668b8534bSLuigi Rizzo */ 276764ae02c3SLuigi Rizzo for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { 276868b8534bSLuigi Rizzo kring = &na->rx_rings[i]; 276968b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 277068b8534bSLuigi Rizzo revents |= want_rx; 277168b8534bSLuigi Rizzo want_rx = 0; /* also breaks the loop */ 277268b8534bSLuigi Rizzo } 277368b8534bSLuigi Rizzo } 277464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { 277568b8534bSLuigi Rizzo kring = &na->tx_rings[i]; 277668b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 277768b8534bSLuigi Rizzo revents |= want_tx; 277868b8534bSLuigi Rizzo want_tx = 0; /* also breaks the loop */ 277968b8534bSLuigi Rizzo } 278068b8534bSLuigi Rizzo } 278168b8534bSLuigi Rizzo 278268b8534bSLuigi Rizzo /* 278368b8534bSLuigi Rizzo * If we to push packets out (priv->np_txpoll) or want_tx is 278468b8534bSLuigi Rizzo * still set, we do need to run the txsync calls (on all rings, 278568b8534bSLuigi Rizzo * to avoid that the tx rings stall). 278668b8534bSLuigi Rizzo */ 278768b8534bSLuigi Rizzo if (priv->np_txpoll || want_tx) { 2788*ce3ee1e7SLuigi Rizzo /* If we really want to be woken up (want_tx), 2789*ce3ee1e7SLuigi Rizzo * do a selrecord, either on the global or on 2790*ce3ee1e7SLuigi Rizzo * the private structure. Then issue the txsync 2791*ce3ee1e7SLuigi Rizzo * so there is no race in the selrecord/selwait 2792*ce3ee1e7SLuigi Rizzo */ 2793091fd0abSLuigi Rizzo flush_tx: 279464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim_tx; i++) { 279568b8534bSLuigi Rizzo kring = &na->tx_rings[i]; 27965819da83SLuigi Rizzo /* 2797*ce3ee1e7SLuigi Rizzo * Skip this ring if want_tx == 0 27985819da83SLuigi Rizzo * (we have already done a successful sync on 27995819da83SLuigi Rizzo * a previous ring) AND kring->cur == kring->hwcur 28005819da83SLuigi Rizzo * (there are no pending transmissions for this ring). 28015819da83SLuigi Rizzo */ 280268b8534bSLuigi Rizzo if (!want_tx && kring->ring->cur == kring->nr_hwcur) 280368b8534bSLuigi Rizzo continue; 2804*ce3ee1e7SLuigi Rizzo /* make sure only one user thread is doing this */ 2805*ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2806*ce3ee1e7SLuigi Rizzo ND("ring %p busy is %d", kring, (int)kring->nr_busy); 2807*ce3ee1e7SLuigi Rizzo revents |= POLLERR; 2808*ce3ee1e7SLuigi Rizzo goto out; 280968b8534bSLuigi Rizzo } 2810*ce3ee1e7SLuigi Rizzo 281168b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 281268b8534bSLuigi Rizzo D("send %d on %s %d", 2813*ce3ee1e7SLuigi Rizzo kring->ring->cur, ifp->if_xname, i); 2814*ce3ee1e7SLuigi Rizzo if (na->nm_txsync(ifp, i, 0)) 281568b8534bSLuigi Rizzo revents |= POLLERR; 281668b8534bSLuigi Rizzo 28175819da83SLuigi Rizzo /* Check avail/call selrecord only if called with POLLOUT */ 281868b8534bSLuigi Rizzo if (want_tx) { 281968b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 282068b8534bSLuigi Rizzo /* stop at the first ring. We don't risk 282168b8534bSLuigi Rizzo * starvation. 282268b8534bSLuigi Rizzo */ 282368b8534bSLuigi Rizzo revents |= want_tx; 282468b8534bSLuigi Rizzo want_tx = 0; 282568b8534bSLuigi Rizzo } 2826*ce3ee1e7SLuigi Rizzo } 2827*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 2828*ce3ee1e7SLuigi Rizzo } 2829*ce3ee1e7SLuigi Rizzo if (want_tx && retry_tx) { 2830*ce3ee1e7SLuigi Rizzo selrecord(td, check_all ? 2831*ce3ee1e7SLuigi Rizzo &na->tx_si : &na->tx_rings[priv->np_qfirst].si); 2832*ce3ee1e7SLuigi Rizzo retry_tx = 0; 2833*ce3ee1e7SLuigi Rizzo goto flush_tx; 283468b8534bSLuigi Rizzo } 283568b8534bSLuigi Rizzo } 283668b8534bSLuigi Rizzo 283768b8534bSLuigi Rizzo /* 283868b8534bSLuigi Rizzo * now if want_rx is still set we need to lock and rxsync. 283968b8534bSLuigi Rizzo * Do it on all rings because otherwise we starve. 284068b8534bSLuigi Rizzo */ 284168b8534bSLuigi Rizzo if (want_rx) { 2842*ce3ee1e7SLuigi Rizzo int retry_rx = 1; 2843*ce3ee1e7SLuigi Rizzo do_retry_rx: 284464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim_rx; i++) { 284568b8534bSLuigi Rizzo kring = &na->rx_rings[i]; 2846*ce3ee1e7SLuigi Rizzo 2847*ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2848*ce3ee1e7SLuigi Rizzo revents |= POLLERR; 2849*ce3ee1e7SLuigi Rizzo goto out; 285068b8534bSLuigi Rizzo } 2851*ce3ee1e7SLuigi Rizzo 2852*ce3ee1e7SLuigi Rizzo /* XXX NR_FORWARD should only be read on 2853*ce3ee1e7SLuigi Rizzo * physical or NIC ports 2854*ce3ee1e7SLuigi Rizzo */ 2855091fd0abSLuigi Rizzo if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { 2856091fd0abSLuigi Rizzo ND(10, "forwarding some buffers up %d to %d", 2857091fd0abSLuigi Rizzo kring->nr_hwcur, kring->ring->cur); 2858091fd0abSLuigi Rizzo netmap_grab_packets(kring, &q, netmap_fwd); 2859091fd0abSLuigi Rizzo } 286068b8534bSLuigi Rizzo 2861*ce3ee1e7SLuigi Rizzo if (na->nm_rxsync(ifp, i, 0)) 286268b8534bSLuigi Rizzo revents |= POLLERR; 28635819da83SLuigi Rizzo if (netmap_no_timestamp == 0 || 28645819da83SLuigi Rizzo kring->ring->flags & NR_TIMESTAMP) { 286568b8534bSLuigi Rizzo microtime(&kring->ring->ts); 28665819da83SLuigi Rizzo } 286768b8534bSLuigi Rizzo 2868*ce3ee1e7SLuigi Rizzo if (kring->ring->avail > 0) { 286968b8534bSLuigi Rizzo revents |= want_rx; 2870*ce3ee1e7SLuigi Rizzo retry_rx = 0; 287168b8534bSLuigi Rizzo } 2872*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 287368b8534bSLuigi Rizzo } 2874*ce3ee1e7SLuigi Rizzo if (retry_rx) { 2875*ce3ee1e7SLuigi Rizzo retry_rx = 0; 2876*ce3ee1e7SLuigi Rizzo selrecord(td, check_all ? 2877*ce3ee1e7SLuigi Rizzo &na->rx_si : &na->rx_rings[priv->np_qfirst].si); 2878*ce3ee1e7SLuigi Rizzo goto do_retry_rx; 2879*ce3ee1e7SLuigi Rizzo } 288068b8534bSLuigi Rizzo } 2881091fd0abSLuigi Rizzo 2882*ce3ee1e7SLuigi Rizzo /* forward host to the netmap ring. 2883*ce3ee1e7SLuigi Rizzo * I am accessing nr_hwavail without lock, but netmap_transmit 2884*ce3ee1e7SLuigi Rizzo * can only increment it, so the operation is safe. 2885*ce3ee1e7SLuigi Rizzo */ 2886091fd0abSLuigi Rizzo kring = &na->rx_rings[lim_rx]; 2887091fd0abSLuigi Rizzo if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 2888091fd0abSLuigi Rizzo && (netmap_fwd || kring->ring->flags & NR_FORWARD) 2889091fd0abSLuigi Rizzo && kring->nr_hwavail > 0 && !host_forwarded) { 2890091fd0abSLuigi Rizzo netmap_sw_to_nic(na); 2891091fd0abSLuigi Rizzo host_forwarded = 1; /* prevent another pass */ 2892091fd0abSLuigi Rizzo want_rx = 0; 2893091fd0abSLuigi Rizzo goto flush_tx; 2894091fd0abSLuigi Rizzo } 2895091fd0abSLuigi Rizzo 2896091fd0abSLuigi Rizzo if (q.head) 2897091fd0abSLuigi Rizzo netmap_send_up(na->ifp, q.head); 289868b8534bSLuigi Rizzo 2899*ce3ee1e7SLuigi Rizzo out: 2900*ce3ee1e7SLuigi Rizzo 290168b8534bSLuigi Rizzo return (revents); 290268b8534bSLuigi Rizzo } 290368b8534bSLuigi Rizzo 290468b8534bSLuigi Rizzo /*------- driver support routines ------*/ 290568b8534bSLuigi Rizzo 2906f18be576SLuigi Rizzo 290768b8534bSLuigi Rizzo /* 290868b8534bSLuigi Rizzo * Initialize a ``netmap_adapter`` object created by driver on attach. 290968b8534bSLuigi Rizzo * We allocate a block of memory with room for a struct netmap_adapter 291068b8534bSLuigi Rizzo * plus two sets of N+2 struct netmap_kring (where N is the number 291168b8534bSLuigi Rizzo * of hardware rings): 291268b8534bSLuigi Rizzo * krings 0..N-1 are for the hardware queues. 291368b8534bSLuigi Rizzo * kring N is for the host stack queue 291468b8534bSLuigi Rizzo * kring N+1 is only used for the selinfo for all queues. 291568b8534bSLuigi Rizzo * Return 0 on success, ENOMEM otherwise. 291664ae02c3SLuigi Rizzo * 29170bf88954SEd Maste * By default the receive and transmit adapter ring counts are both initialized 29180bf88954SEd Maste * to num_queues. na->num_tx_rings can be set for cards with different tx/rx 291924e57ec9SEd Maste * setups. 292068b8534bSLuigi Rizzo */ 292168b8534bSLuigi Rizzo int 2922*ce3ee1e7SLuigi Rizzo netmap_attach(struct netmap_adapter *arg, u_int num_queues) 292368b8534bSLuigi Rizzo { 2924ae10d1afSLuigi Rizzo struct netmap_adapter *na = NULL; 2925ae10d1afSLuigi Rizzo struct ifnet *ifp = arg ? arg->ifp : NULL; 2926*ce3ee1e7SLuigi Rizzo size_t len; 292768b8534bSLuigi Rizzo 2928ae10d1afSLuigi Rizzo if (arg == NULL || ifp == NULL) 2929ae10d1afSLuigi Rizzo goto fail; 2930*ce3ee1e7SLuigi Rizzo /* a VALE port uses two endpoints */ 2931f18be576SLuigi Rizzo len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2; 2932f18be576SLuigi Rizzo na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); 2933ae10d1afSLuigi Rizzo if (na == NULL) 2934ae10d1afSLuigi Rizzo goto fail; 2935ae10d1afSLuigi Rizzo WNA(ifp) = na; 2936ae10d1afSLuigi Rizzo *na = *arg; /* copy everything, trust the driver to not pass junk */ 2937ae10d1afSLuigi Rizzo NETMAP_SET_CAPABLE(ifp); 2938d76bf4ffSLuigi Rizzo if (na->num_tx_rings == 0) 2939d76bf4ffSLuigi Rizzo na->num_tx_rings = num_queues; 2940d76bf4ffSLuigi Rizzo na->num_rx_rings = num_queues; 2941ae10d1afSLuigi Rizzo na->refcount = na->na_single = na->na_multi = 0; 2942ae10d1afSLuigi Rizzo /* Core lock initialized here, others after netmap_if_new. */ 2943ae10d1afSLuigi Rizzo mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF); 294464ae02c3SLuigi Rizzo #ifdef linux 2945f18be576SLuigi Rizzo if (ifp->netdev_ops) { 2946f18be576SLuigi Rizzo ND("netdev_ops %p", ifp->netdev_ops); 2947f18be576SLuigi Rizzo /* prepare a clone of the netdev ops */ 2948f18be576SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) 2949f18be576SLuigi Rizzo na->nm_ndo.ndo_start_xmit = ifp->netdev_ops; 2950f18be576SLuigi Rizzo #else 2951849bec0eSLuigi Rizzo na->nm_ndo = *ifp->netdev_ops; 2952f18be576SLuigi Rizzo #endif 2953f18be576SLuigi Rizzo } 2954*ce3ee1e7SLuigi Rizzo na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; 2955*ce3ee1e7SLuigi Rizzo #endif /* linux */ 2956*ce3ee1e7SLuigi Rizzo na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem; 2957f18be576SLuigi Rizzo if (!nma_is_vp(arg)) 2958f18be576SLuigi Rizzo netmap_attach_sw(ifp); 2959ae10d1afSLuigi Rizzo D("success for %s", ifp->if_xname); 2960ae10d1afSLuigi Rizzo return 0; 296168b8534bSLuigi Rizzo 2962ae10d1afSLuigi Rizzo fail: 2963ae10d1afSLuigi Rizzo D("fail, arg %p ifp %p na %p", arg, ifp, na); 2964849bec0eSLuigi Rizzo netmap_detach(ifp); 2965ae10d1afSLuigi Rizzo return (na ? EINVAL : ENOMEM); 296668b8534bSLuigi Rizzo } 296768b8534bSLuigi Rizzo 296868b8534bSLuigi Rizzo 296968b8534bSLuigi Rizzo /* 297068b8534bSLuigi Rizzo * Free the allocated memory linked to the given ``netmap_adapter`` 297168b8534bSLuigi Rizzo * object. 297268b8534bSLuigi Rizzo */ 297368b8534bSLuigi Rizzo void 297468b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp) 297568b8534bSLuigi Rizzo { 297668b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 297768b8534bSLuigi Rizzo 297868b8534bSLuigi Rizzo if (!na) 297968b8534bSLuigi Rizzo return; 298068b8534bSLuigi Rizzo 29812f70fca5SEd Maste mtx_destroy(&na->core_lock); 29822f70fca5SEd Maste 2983ae10d1afSLuigi Rizzo if (na->tx_rings) { /* XXX should not happen */ 2984ae10d1afSLuigi Rizzo D("freeing leftover tx_rings"); 2985ae10d1afSLuigi Rizzo free(na->tx_rings, M_DEVBUF); 2986ae10d1afSLuigi Rizzo } 2987*ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_MEM_OWNER) 2988*ce3ee1e7SLuigi Rizzo netmap_mem_private_delete(na->nm_mem); 298968b8534bSLuigi Rizzo bzero(na, sizeof(*na)); 2990d0c7b075SLuigi Rizzo WNA(ifp) = NULL; 299168b8534bSLuigi Rizzo free(na, M_DEVBUF); 299268b8534bSLuigi Rizzo } 299368b8534bSLuigi Rizzo 299468b8534bSLuigi Rizzo 2995f18be576SLuigi Rizzo int 2996*ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 2997*ce3ee1e7SLuigi Rizzo struct netmap_adapter *na, u_int ring_nr); 2998f18be576SLuigi Rizzo 2999f18be576SLuigi Rizzo 300068b8534bSLuigi Rizzo /* 300102ad4083SLuigi Rizzo * Intercept packets from the network stack and pass them 300202ad4083SLuigi Rizzo * to netmap as incoming packets on the 'software' ring. 3003*ce3ee1e7SLuigi Rizzo * We rely on the OS to make sure that the ifp and na do not go 3004*ce3ee1e7SLuigi Rizzo * away (typically the caller checks for IFF_DRV_RUNNING or the like). 3005*ce3ee1e7SLuigi Rizzo * In nm_register() or whenever there is a reinitialization, 3006*ce3ee1e7SLuigi Rizzo * we make sure to access the core lock and per-ring locks 3007*ce3ee1e7SLuigi Rizzo * so that IFCAP_NETMAP is visible here. 300868b8534bSLuigi Rizzo */ 300968b8534bSLuigi Rizzo int 3010*ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m) 301168b8534bSLuigi Rizzo { 301268b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3013*ce3ee1e7SLuigi Rizzo struct netmap_kring *kring; 30141a26580eSLuigi Rizzo u_int i, len = MBUF_LEN(m); 3015*ce3ee1e7SLuigi Rizzo u_int error = EBUSY, lim; 301668b8534bSLuigi Rizzo struct netmap_slot *slot; 301768b8534bSLuigi Rizzo 3018*ce3ee1e7SLuigi Rizzo // XXX [Linux] we do not need this lock 3019*ce3ee1e7SLuigi Rizzo // if we follow the down/configure/up protocol -gl 3020*ce3ee1e7SLuigi Rizzo // mtx_lock(&na->core_lock); 3021*ce3ee1e7SLuigi Rizzo if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { 3022*ce3ee1e7SLuigi Rizzo /* interface not in netmap mode anymore */ 3023*ce3ee1e7SLuigi Rizzo error = ENXIO; 3024*ce3ee1e7SLuigi Rizzo goto done; 3025*ce3ee1e7SLuigi Rizzo } 3026*ce3ee1e7SLuigi Rizzo 3027*ce3ee1e7SLuigi Rizzo kring = &na->rx_rings[na->num_rx_rings]; 3028*ce3ee1e7SLuigi Rizzo lim = kring->nkr_num_slots - 1; 302968b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 303068b8534bSLuigi Rizzo D("%s packet %d len %d from the stack", ifp->if_xname, 303168b8534bSLuigi Rizzo kring->nr_hwcur + kring->nr_hwavail, len); 3032*ce3ee1e7SLuigi Rizzo // XXX reconsider long packets if we handle fragments 3033*ce3ee1e7SLuigi Rizzo if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ 3034849bec0eSLuigi Rizzo D("%s from_host, drop packet size %d > %d", ifp->if_xname, 3035*ce3ee1e7SLuigi Rizzo len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); 3036*ce3ee1e7SLuigi Rizzo goto done; 3037849bec0eSLuigi Rizzo } 3038*ce3ee1e7SLuigi Rizzo if (SWNA(ifp)->na_bdg) { 3039*ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft; 3040*ce3ee1e7SLuigi Rizzo char *dst; 3041f18be576SLuigi Rizzo 3042*ce3ee1e7SLuigi Rizzo na = SWNA(ifp); /* we operate on the host port */ 3043*ce3ee1e7SLuigi Rizzo ft = na->rx_rings[0].nkr_ft; 3044*ce3ee1e7SLuigi Rizzo dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]); 3045*ce3ee1e7SLuigi Rizzo 3046*ce3ee1e7SLuigi Rizzo /* use slot 0 in the ft, there is nothing queued here */ 3047*ce3ee1e7SLuigi Rizzo /* XXX we can save the copy calling m_copydata in nm_bdg_flush, 3048*ce3ee1e7SLuigi Rizzo * need a special flag for this. 3049*ce3ee1e7SLuigi Rizzo */ 3050*ce3ee1e7SLuigi Rizzo m_copydata(m, 0, (int)len, dst); 3051*ce3ee1e7SLuigi Rizzo ft->ft_flags = 0; 3052*ce3ee1e7SLuigi Rizzo ft->ft_len = len; 3053*ce3ee1e7SLuigi Rizzo ft->ft_buf = dst; 3054*ce3ee1e7SLuigi Rizzo ft->ft_next = NM_FT_NULL; 3055*ce3ee1e7SLuigi Rizzo ft->ft_frags = 1; 3056*ce3ee1e7SLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 3057*ce3ee1e7SLuigi Rizzo RD(5, "pkt %p size %d to bridge port %d", 3058*ce3ee1e7SLuigi Rizzo dst, len, na->bdg_port); 3059*ce3ee1e7SLuigi Rizzo nm_bdg_flush(ft, 1, na, 0); 3060*ce3ee1e7SLuigi Rizzo na = NA(ifp); /* back to the regular object/lock */ 3061*ce3ee1e7SLuigi Rizzo error = 0; 3062*ce3ee1e7SLuigi Rizzo goto done; 3063*ce3ee1e7SLuigi Rizzo } 3064*ce3ee1e7SLuigi Rizzo 3065*ce3ee1e7SLuigi Rizzo /* protect against other instances of netmap_transmit, 3066*ce3ee1e7SLuigi Rizzo * and userspace invocations of rxsync(). 3067*ce3ee1e7SLuigi Rizzo * XXX could reuse core_lock 3068*ce3ee1e7SLuigi Rizzo */ 3069*ce3ee1e7SLuigi Rizzo // XXX [Linux] there can be no other instances of netmap_transmit 3070*ce3ee1e7SLuigi Rizzo // on this same ring, but we still need this lock to protect 3071*ce3ee1e7SLuigi Rizzo // concurrent access from netmap_sw_to_nic() -gl 3072*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 307302ad4083SLuigi Rizzo if (kring->nr_hwavail >= lim) { 30745b248374SLuigi Rizzo if (netmap_verbose) 307568b8534bSLuigi Rizzo D("stack ring %s full\n", ifp->if_xname); 3076*ce3ee1e7SLuigi Rizzo } else { 307768b8534bSLuigi Rizzo /* compute the insert position */ 3078*ce3ee1e7SLuigi Rizzo i = nm_kr_rxpos(kring); 307968b8534bSLuigi Rizzo slot = &kring->ring->slot[i]; 3080*ce3ee1e7SLuigi Rizzo m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot)); 308168b8534bSLuigi Rizzo slot->len = len; 3082091fd0abSLuigi Rizzo slot->flags = kring->nkr_slot_flags; 308368b8534bSLuigi Rizzo kring->nr_hwavail++; 308468b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 3085d76bf4ffSLuigi Rizzo D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings); 308668b8534bSLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 308768b8534bSLuigi Rizzo error = 0; 3088*ce3ee1e7SLuigi Rizzo } 3089*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3090*ce3ee1e7SLuigi Rizzo 309168b8534bSLuigi Rizzo done: 3092*ce3ee1e7SLuigi Rizzo // mtx_unlock(&na->core_lock); 309368b8534bSLuigi Rizzo 309468b8534bSLuigi Rizzo /* release the mbuf in either cases of success or failure. As an 309568b8534bSLuigi Rizzo * alternative, put the mbuf in a free list and free the list 309668b8534bSLuigi Rizzo * only when really necessary. 309768b8534bSLuigi Rizzo */ 309868b8534bSLuigi Rizzo m_freem(m); 309968b8534bSLuigi Rizzo 310068b8534bSLuigi Rizzo return (error); 310168b8534bSLuigi Rizzo } 310268b8534bSLuigi Rizzo 310368b8534bSLuigi Rizzo 310468b8534bSLuigi Rizzo /* 310568b8534bSLuigi Rizzo * netmap_reset() is called by the driver routines when reinitializing 310668b8534bSLuigi Rizzo * a ring. The driver is in charge of locking to protect the kring. 310768b8534bSLuigi Rizzo * If netmap mode is not set just return NULL. 310868b8534bSLuigi Rizzo */ 310968b8534bSLuigi Rizzo struct netmap_slot * 3110*ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, 311168b8534bSLuigi Rizzo u_int new_cur) 311268b8534bSLuigi Rizzo { 311368b8534bSLuigi Rizzo struct netmap_kring *kring; 3114506cc70cSLuigi Rizzo int new_hwofs, lim; 311568b8534bSLuigi Rizzo 3116*ce3ee1e7SLuigi Rizzo if (na == NULL) { 3117*ce3ee1e7SLuigi Rizzo D("NULL na, should not happen"); 311868b8534bSLuigi Rizzo return NULL; /* no netmap support here */ 3119*ce3ee1e7SLuigi Rizzo } 3120*ce3ee1e7SLuigi Rizzo if (!(na->ifp->if_capenable & IFCAP_NETMAP)) { 3121*ce3ee1e7SLuigi Rizzo D("interface not in netmap mode"); 312268b8534bSLuigi Rizzo return NULL; /* nothing to reinitialize */ 3123*ce3ee1e7SLuigi Rizzo } 312468b8534bSLuigi Rizzo 3125*ce3ee1e7SLuigi Rizzo /* XXX note- in the new scheme, we are not guaranteed to be 3126*ce3ee1e7SLuigi Rizzo * under lock (e.g. when called on a device reset). 3127*ce3ee1e7SLuigi Rizzo * In this case, we should set a flag and do not trust too 3128*ce3ee1e7SLuigi Rizzo * much the values. In practice: TODO 3129*ce3ee1e7SLuigi Rizzo * - set a RESET flag somewhere in the kring 3130*ce3ee1e7SLuigi Rizzo * - do the processing in a conservative way 3131*ce3ee1e7SLuigi Rizzo * - let the *sync() fixup at the end. 3132*ce3ee1e7SLuigi Rizzo */ 313364ae02c3SLuigi Rizzo if (tx == NR_TX) { 31348241616dSLuigi Rizzo if (n >= na->num_tx_rings) 31358241616dSLuigi Rizzo return NULL; 313664ae02c3SLuigi Rizzo kring = na->tx_rings + n; 3137506cc70cSLuigi Rizzo new_hwofs = kring->nr_hwcur - new_cur; 313864ae02c3SLuigi Rizzo } else { 31398241616dSLuigi Rizzo if (n >= na->num_rx_rings) 31408241616dSLuigi Rizzo return NULL; 314164ae02c3SLuigi Rizzo kring = na->rx_rings + n; 3142506cc70cSLuigi Rizzo new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; 314364ae02c3SLuigi Rizzo } 314464ae02c3SLuigi Rizzo lim = kring->nkr_num_slots - 1; 3145506cc70cSLuigi Rizzo if (new_hwofs > lim) 3146506cc70cSLuigi Rizzo new_hwofs -= lim + 1; 3147506cc70cSLuigi Rizzo 3148*ce3ee1e7SLuigi Rizzo /* Always set the new offset value and realign the ring. */ 3149*ce3ee1e7SLuigi Rizzo D("%s hwofs %d -> %d, hwavail %d -> %d", 3150*ce3ee1e7SLuigi Rizzo tx == NR_TX ? "TX" : "RX", 3151*ce3ee1e7SLuigi Rizzo kring->nkr_hwofs, new_hwofs, 3152*ce3ee1e7SLuigi Rizzo kring->nr_hwavail, 3153*ce3ee1e7SLuigi Rizzo tx == NR_TX ? lim : kring->nr_hwavail); 3154506cc70cSLuigi Rizzo kring->nkr_hwofs = new_hwofs; 3155506cc70cSLuigi Rizzo if (tx == NR_TX) 3156*ce3ee1e7SLuigi Rizzo kring->nr_hwavail = lim; 3157506cc70cSLuigi Rizzo 3158f196ce38SLuigi Rizzo #if 0 // def linux 3159f196ce38SLuigi Rizzo /* XXX check that the mappings are correct */ 3160f196ce38SLuigi Rizzo /* need ring_nr, adapter->pdev, direction */ 3161f196ce38SLuigi Rizzo buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); 3162f196ce38SLuigi Rizzo if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 3163f196ce38SLuigi Rizzo D("error mapping rx netmap buffer %d", i); 3164f196ce38SLuigi Rizzo // XXX fix error handling 3165f196ce38SLuigi Rizzo } 3166f196ce38SLuigi Rizzo 3167f196ce38SLuigi Rizzo #endif /* linux */ 316868b8534bSLuigi Rizzo /* 3169*ce3ee1e7SLuigi Rizzo * Wakeup on the individual and global selwait 3170506cc70cSLuigi Rizzo * We do the wakeup here, but the ring is not yet reconfigured. 3171506cc70cSLuigi Rizzo * However, we are under lock so there are no races. 317268b8534bSLuigi Rizzo */ 317368b8534bSLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 317464ae02c3SLuigi Rizzo selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET); 317568b8534bSLuigi Rizzo return kring->ring->slot; 317668b8534bSLuigi Rizzo } 317768b8534bSLuigi Rizzo 317868b8534bSLuigi Rizzo 3179*ce3ee1e7SLuigi Rizzo /* 3180*ce3ee1e7SLuigi Rizzo * Grab packets from a kring, move them into the ft structure 3181*ce3ee1e7SLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 3182*ce3ee1e7SLuigi Rizzo * filtered on input (ioctl, poll or XXX). 3183*ce3ee1e7SLuigi Rizzo * Returns the next position in the ring. 3184*ce3ee1e7SLuigi Rizzo */ 3185f18be576SLuigi Rizzo static int 3186f18be576SLuigi Rizzo nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr, 3187f18be576SLuigi Rizzo struct netmap_kring *kring, u_int end) 3188f18be576SLuigi Rizzo { 3189f18be576SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3190*ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft; 3191f18be576SLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 3192f18be576SLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 3193*ce3ee1e7SLuigi Rizzo u_int frags = 1; /* how many frags ? */ 3194*ce3ee1e7SLuigi Rizzo struct nm_bridge *b = na->na_bdg; 3195f18be576SLuigi Rizzo 3196*ce3ee1e7SLuigi Rizzo /* To protect against modifications to the bridge we acquire a 3197*ce3ee1e7SLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 3198*ce3ee1e7SLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 3199*ce3ee1e7SLuigi Rizzo */ 3200*ce3ee1e7SLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 3201*ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_BDG_MAYSLEEP) 3202*ce3ee1e7SLuigi Rizzo BDG_RLOCK(b); 3203*ce3ee1e7SLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 3204*ce3ee1e7SLuigi Rizzo return 0; 3205*ce3ee1e7SLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 3206*ce3ee1e7SLuigi Rizzo ft = kring->nkr_ft; 3207*ce3ee1e7SLuigi Rizzo 3208*ce3ee1e7SLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 3209f18be576SLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 3210*ce3ee1e7SLuigi Rizzo char *buf; 3211f18be576SLuigi Rizzo 3212*ce3ee1e7SLuigi Rizzo ft[ft_i].ft_len = slot->len; 321385233a7dSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 321485233a7dSLuigi Rizzo 321585233a7dSLuigi Rizzo ND("flags is 0x%x", slot->flags); 321685233a7dSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 3217*ce3ee1e7SLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 321885233a7dSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 3219*ce3ee1e7SLuigi Rizzo (void *)slot->ptr : BDG_NMB(na->nm_mem, slot); 322085233a7dSLuigi Rizzo prefetch(buf); 3221*ce3ee1e7SLuigi Rizzo ++ft_i; 3222*ce3ee1e7SLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 3223*ce3ee1e7SLuigi Rizzo frags++; 3224*ce3ee1e7SLuigi Rizzo continue; 3225*ce3ee1e7SLuigi Rizzo } 3226*ce3ee1e7SLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 3227*ce3ee1e7SLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 3228*ce3ee1e7SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 3229*ce3ee1e7SLuigi Rizzo frags = 1; 3230*ce3ee1e7SLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 3231f18be576SLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 3232f18be576SLuigi Rizzo } 3233*ce3ee1e7SLuigi Rizzo if (frags > 1) { 3234*ce3ee1e7SLuigi Rizzo D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 3235*ce3ee1e7SLuigi Rizzo // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 3236*ce3ee1e7SLuigi Rizzo ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 3237*ce3ee1e7SLuigi Rizzo ft[ft_i - frags].ft_frags = frags - 1; 3238*ce3ee1e7SLuigi Rizzo } 3239f18be576SLuigi Rizzo if (ft_i) 3240f18be576SLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 3241*ce3ee1e7SLuigi Rizzo BDG_RUNLOCK(b); 3242f18be576SLuigi Rizzo return j; 3243f18be576SLuigi Rizzo } 3244f18be576SLuigi Rizzo 3245f18be576SLuigi Rizzo 3246f18be576SLuigi Rizzo /* 3247*ce3ee1e7SLuigi Rizzo * Pass packets from nic to the bridge. 3248*ce3ee1e7SLuigi Rizzo * XXX TODO check locking: this is called from the interrupt 3249*ce3ee1e7SLuigi Rizzo * handler so we should make sure that the interface is not 3250*ce3ee1e7SLuigi Rizzo * disconnected while passing down an interrupt. 3251*ce3ee1e7SLuigi Rizzo * 3252f18be576SLuigi Rizzo * Note, no user process can access this NIC so we can ignore 3253f18be576SLuigi Rizzo * the info in the 'ring'. 3254f18be576SLuigi Rizzo */ 3255f18be576SLuigi Rizzo static void 3256f18be576SLuigi Rizzo netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr) 3257f18be576SLuigi Rizzo { 3258f18be576SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3259f18be576SLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[ring_nr]; 3260f18be576SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3261*ce3ee1e7SLuigi Rizzo u_int j, k; 3262f18be576SLuigi Rizzo 3263*ce3ee1e7SLuigi Rizzo /* make sure that only one thread is ever in here, 3264*ce3ee1e7SLuigi Rizzo * after which we can unlock. Probably unnecessary XXX. 3265*ce3ee1e7SLuigi Rizzo */ 3266*ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) 3267*ce3ee1e7SLuigi Rizzo return; 3268*ce3ee1e7SLuigi Rizzo /* fetch packets that have arrived. 3269*ce3ee1e7SLuigi Rizzo * XXX maybe do this in a loop ? 3270*ce3ee1e7SLuigi Rizzo */ 3271*ce3ee1e7SLuigi Rizzo if (na->nm_rxsync(ifp, ring_nr, 0)) 3272*ce3ee1e7SLuigi Rizzo goto put_out; 3273*ce3ee1e7SLuigi Rizzo if (kring->nr_hwavail == 0 && netmap_verbose) { 3274f18be576SLuigi Rizzo D("how strange, interrupt with no packets on %s", 3275f18be576SLuigi Rizzo ifp->if_xname); 3276*ce3ee1e7SLuigi Rizzo goto put_out; 3277f18be576SLuigi Rizzo } 3278*ce3ee1e7SLuigi Rizzo k = nm_kr_rxpos(kring); 3279f18be576SLuigi Rizzo 3280f18be576SLuigi Rizzo j = nm_bdg_preflush(na, ring_nr, kring, k); 3281f18be576SLuigi Rizzo 3282f18be576SLuigi Rizzo /* we consume everything, but we cannot update kring directly 3283f18be576SLuigi Rizzo * because the nic may have destroyed the info in the NIC ring. 3284f18be576SLuigi Rizzo * So we need to call rxsync again to restore it. 3285f18be576SLuigi Rizzo */ 3286f18be576SLuigi Rizzo ring->cur = j; 3287f18be576SLuigi Rizzo ring->avail = 0; 3288f18be576SLuigi Rizzo na->nm_rxsync(ifp, ring_nr, 0); 3289*ce3ee1e7SLuigi Rizzo 3290*ce3ee1e7SLuigi Rizzo put_out: 3291*ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 3292f18be576SLuigi Rizzo return; 3293f18be576SLuigi Rizzo } 3294f18be576SLuigi Rizzo 3295f18be576SLuigi Rizzo 329668b8534bSLuigi Rizzo /* 3297*ce3ee1e7SLuigi Rizzo * Default functions to handle rx/tx interrupts from a physical device. 3298*ce3ee1e7SLuigi Rizzo * "work_done" is non-null on the RX path, NULL for the TX path. 3299*ce3ee1e7SLuigi Rizzo * We rely on the OS to make sure that there is only one active 3300*ce3ee1e7SLuigi Rizzo * instance per queue, and that there is appropriate locking. 3301849bec0eSLuigi Rizzo * 3302*ce3ee1e7SLuigi Rizzo * If the card is not in netmap mode, simply return 0, 3303*ce3ee1e7SLuigi Rizzo * so that the caller proceeds with regular processing. 3304*ce3ee1e7SLuigi Rizzo * 3305*ce3ee1e7SLuigi Rizzo * If the card is connected to a netmap file descriptor, 3306*ce3ee1e7SLuigi Rizzo * do a selwakeup on the individual queue, plus one on the global one 3307*ce3ee1e7SLuigi Rizzo * if needed (multiqueue card _and_ there are multiqueue listeners), 3308*ce3ee1e7SLuigi Rizzo * and return 1. 3309*ce3ee1e7SLuigi Rizzo * 3310*ce3ee1e7SLuigi Rizzo * Finally, if called on rx from an interface connected to a switch, 3311*ce3ee1e7SLuigi Rizzo * calls the proper forwarding routine, and return 1. 33121a26580eSLuigi Rizzo */ 3313babc7c12SLuigi Rizzo int 3314*ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done) 33151a26580eSLuigi Rizzo { 33161a26580eSLuigi Rizzo struct netmap_adapter *na; 3317*ce3ee1e7SLuigi Rizzo struct netmap_kring *kring; 33181a26580eSLuigi Rizzo 33191a26580eSLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 33201a26580eSLuigi Rizzo return 0; 3321849bec0eSLuigi Rizzo 3322*ce3ee1e7SLuigi Rizzo q &= NETMAP_RING_MASK; 3323849bec0eSLuigi Rizzo 3324*ce3ee1e7SLuigi Rizzo if (netmap_verbose) 3325*ce3ee1e7SLuigi Rizzo RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); 33261a26580eSLuigi Rizzo na = NA(ifp); 33278241616dSLuigi Rizzo if (na->na_flags & NAF_SKIP_INTR) { 33288241616dSLuigi Rizzo ND("use regular interrupt"); 33298241616dSLuigi Rizzo return 0; 33308241616dSLuigi Rizzo } 33318241616dSLuigi Rizzo 333264ae02c3SLuigi Rizzo if (work_done) { /* RX path */ 33338241616dSLuigi Rizzo if (q >= na->num_rx_rings) 3334849bec0eSLuigi Rizzo return 0; // not a physical queue 3335*ce3ee1e7SLuigi Rizzo kring = na->rx_rings + q; 3336*ce3ee1e7SLuigi Rizzo kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ? 3337*ce3ee1e7SLuigi Rizzo if (na->na_bdg != NULL) { 3338*ce3ee1e7SLuigi Rizzo netmap_nic_to_bdg(ifp, q); 3339*ce3ee1e7SLuigi Rizzo } else { 3340*ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3341*ce3ee1e7SLuigi Rizzo if (na->num_rx_rings > 1 /* or multiple listeners */ ) 3342*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_si, PI_NET); 3343*ce3ee1e7SLuigi Rizzo } 3344*ce3ee1e7SLuigi Rizzo *work_done = 1; /* do not fire napi again */ 3345849bec0eSLuigi Rizzo } else { /* TX path */ 33468241616dSLuigi Rizzo if (q >= na->num_tx_rings) 3347849bec0eSLuigi Rizzo return 0; // not a physical queue 3348*ce3ee1e7SLuigi Rizzo kring = na->tx_rings + q; 3349*ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3350*ce3ee1e7SLuigi Rizzo if (na->num_tx_rings > 1 /* or multiple listeners */ ) 3351*ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_si, PI_NET); 335264ae02c3SLuigi Rizzo } 33531a26580eSLuigi Rizzo return 1; 33541a26580eSLuigi Rizzo } 33551a26580eSLuigi Rizzo 335664ae02c3SLuigi Rizzo 335701c7d25fSLuigi Rizzo #ifdef linux /* linux-specific routines */ 335801c7d25fSLuigi Rizzo 3359f18be576SLuigi Rizzo 336001c7d25fSLuigi Rizzo /* 336101c7d25fSLuigi Rizzo * Remap linux arguments into the FreeBSD call. 336201c7d25fSLuigi Rizzo * - pwait is the poll table, passed as 'dev'; 336301c7d25fSLuigi Rizzo * If pwait == NULL someone else already woke up before. We can report 336401c7d25fSLuigi Rizzo * events but they are filtered upstream. 336501c7d25fSLuigi Rizzo * If pwait != NULL, then pwait->key contains the list of events. 336601c7d25fSLuigi Rizzo * - events is computed from pwait as above. 336701c7d25fSLuigi Rizzo * - file is passed as 'td'; 336801c7d25fSLuigi Rizzo */ 336901c7d25fSLuigi Rizzo static u_int 337001c7d25fSLuigi Rizzo linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) 337101c7d25fSLuigi Rizzo { 3372849bec0eSLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) 3373849bec0eSLuigi Rizzo int events = POLLIN | POLLOUT; /* XXX maybe... */ 3374849bec0eSLuigi Rizzo #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) 337501c7d25fSLuigi Rizzo int events = pwait ? pwait->key : POLLIN | POLLOUT; 337601c7d25fSLuigi Rizzo #else /* in 3.4.0 field 'key' was renamed to '_key' */ 337701c7d25fSLuigi Rizzo int events = pwait ? pwait->_key : POLLIN | POLLOUT; 337801c7d25fSLuigi Rizzo #endif 337901c7d25fSLuigi Rizzo return netmap_poll((void *)pwait, events, (void *)file); 338001c7d25fSLuigi Rizzo } 338101c7d25fSLuigi Rizzo 3382f18be576SLuigi Rizzo 338301c7d25fSLuigi Rizzo static int 338442a3a5bdSLuigi Rizzo linux_netmap_mmap(struct file *f, struct vm_area_struct *vma) 338501c7d25fSLuigi Rizzo { 33868241616dSLuigi Rizzo int error = 0; 3387*ce3ee1e7SLuigi Rizzo unsigned long off, va; 3388*ce3ee1e7SLuigi Rizzo vm_ooffset_t pa; 3389*ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = f->private_data; 339001c7d25fSLuigi Rizzo /* 339101c7d25fSLuigi Rizzo * vma->vm_start: start of mapping user address space 339201c7d25fSLuigi Rizzo * vma->vm_end: end of the mapping user address space 33938241616dSLuigi Rizzo * vma->vm_pfoff: offset of first page in the device 339401c7d25fSLuigi Rizzo */ 339501c7d25fSLuigi Rizzo 339601c7d25fSLuigi Rizzo // XXX security checks 339701c7d25fSLuigi Rizzo 3398*ce3ee1e7SLuigi Rizzo error = netmap_get_memory(priv); 33998241616dSLuigi Rizzo ND("get_memory returned %d", error); 34008241616dSLuigi Rizzo if (error) 34018241616dSLuigi Rizzo return -error; 34028241616dSLuigi Rizzo 3403*ce3ee1e7SLuigi Rizzo if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) { 3404*ce3ee1e7SLuigi Rizzo ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end); 3405*ce3ee1e7SLuigi Rizzo return -EINVAL; 3406*ce3ee1e7SLuigi Rizzo } 34078241616dSLuigi Rizzo 3408*ce3ee1e7SLuigi Rizzo for (va = vma->vm_start, off = vma->vm_pgoff; 3409*ce3ee1e7SLuigi Rizzo va < vma->vm_end; 3410*ce3ee1e7SLuigi Rizzo va += PAGE_SIZE, off++) 3411*ce3ee1e7SLuigi Rizzo { 3412*ce3ee1e7SLuigi Rizzo pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT); 3413*ce3ee1e7SLuigi Rizzo if (pa == 0) 3414*ce3ee1e7SLuigi Rizzo return -EINVAL; 341501c7d25fSLuigi Rizzo 3416*ce3ee1e7SLuigi Rizzo ND("va %lx pa %p", va, pa); 3417*ce3ee1e7SLuigi Rizzo error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); 3418*ce3ee1e7SLuigi Rizzo if (error) 3419*ce3ee1e7SLuigi Rizzo return error; 3420*ce3ee1e7SLuigi Rizzo } 342101c7d25fSLuigi Rizzo return 0; 342201c7d25fSLuigi Rizzo } 342301c7d25fSLuigi Rizzo 3424f18be576SLuigi Rizzo 3425*ce3ee1e7SLuigi Rizzo /* 3426*ce3ee1e7SLuigi Rizzo * This one is probably already protected by the netif lock XXX 3427*ce3ee1e7SLuigi Rizzo */ 342801c7d25fSLuigi Rizzo static netdev_tx_t 3429*ce3ee1e7SLuigi Rizzo linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev) 343001c7d25fSLuigi Rizzo { 3431*ce3ee1e7SLuigi Rizzo netmap_transmit(dev, skb); 343201c7d25fSLuigi Rizzo return (NETDEV_TX_OK); 343301c7d25fSLuigi Rizzo } 343401c7d25fSLuigi Rizzo 343501c7d25fSLuigi Rizzo 3436*ce3ee1e7SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // XXX was 37 343701c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME .ioctl 343801c7d25fSLuigi Rizzo int 343901c7d25fSLuigi Rizzo linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */) 344001c7d25fSLuigi Rizzo #else 344101c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME .unlocked_ioctl 344201c7d25fSLuigi Rizzo long 344301c7d25fSLuigi Rizzo linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */) 344401c7d25fSLuigi Rizzo #endif 344501c7d25fSLuigi Rizzo { 344601c7d25fSLuigi Rizzo int ret; 344701c7d25fSLuigi Rizzo struct nmreq nmr; 344801c7d25fSLuigi Rizzo bzero(&nmr, sizeof(nmr)); 344901c7d25fSLuigi Rizzo 3450*ce3ee1e7SLuigi Rizzo if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) { 3451*ce3ee1e7SLuigi Rizzo data = 0; /* no argument required here */ 3452*ce3ee1e7SLuigi Rizzo } 345301c7d25fSLuigi Rizzo if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0) 345401c7d25fSLuigi Rizzo return -EFAULT; 345501c7d25fSLuigi Rizzo ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file); 345601c7d25fSLuigi Rizzo if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0) 345701c7d25fSLuigi Rizzo return -EFAULT; 345801c7d25fSLuigi Rizzo return -ret; 345901c7d25fSLuigi Rizzo } 346001c7d25fSLuigi Rizzo 346101c7d25fSLuigi Rizzo 346201c7d25fSLuigi Rizzo static int 34630b8ed8e0SLuigi Rizzo netmap_release(struct inode *inode, struct file *file) 346401c7d25fSLuigi Rizzo { 34650b8ed8e0SLuigi Rizzo (void)inode; /* UNUSED */ 346601c7d25fSLuigi Rizzo if (file->private_data) 346701c7d25fSLuigi Rizzo netmap_dtor(file->private_data); 346801c7d25fSLuigi Rizzo return (0); 346901c7d25fSLuigi Rizzo } 347001c7d25fSLuigi Rizzo 3471f18be576SLuigi Rizzo 34728241616dSLuigi Rizzo static int 34738241616dSLuigi Rizzo linux_netmap_open(struct inode *inode, struct file *file) 34748241616dSLuigi Rizzo { 34758241616dSLuigi Rizzo struct netmap_priv_d *priv; 34768241616dSLuigi Rizzo (void)inode; /* UNUSED */ 34778241616dSLuigi Rizzo 34788241616dSLuigi Rizzo priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 34798241616dSLuigi Rizzo M_NOWAIT | M_ZERO); 34808241616dSLuigi Rizzo if (priv == NULL) 34818241616dSLuigi Rizzo return -ENOMEM; 34828241616dSLuigi Rizzo 34838241616dSLuigi Rizzo file->private_data = priv; 34848241616dSLuigi Rizzo 34858241616dSLuigi Rizzo return (0); 34868241616dSLuigi Rizzo } 348701c7d25fSLuigi Rizzo 3488f18be576SLuigi Rizzo 348901c7d25fSLuigi Rizzo static struct file_operations netmap_fops = { 3490f18be576SLuigi Rizzo .owner = THIS_MODULE, 34918241616dSLuigi Rizzo .open = linux_netmap_open, 349242a3a5bdSLuigi Rizzo .mmap = linux_netmap_mmap, 349301c7d25fSLuigi Rizzo LIN_IOCTL_NAME = linux_netmap_ioctl, 349401c7d25fSLuigi Rizzo .poll = linux_netmap_poll, 349501c7d25fSLuigi Rizzo .release = netmap_release, 349601c7d25fSLuigi Rizzo }; 349701c7d25fSLuigi Rizzo 3498f18be576SLuigi Rizzo 349901c7d25fSLuigi Rizzo static struct miscdevice netmap_cdevsw = { /* same name as FreeBSD */ 350001c7d25fSLuigi Rizzo MISC_DYNAMIC_MINOR, 350101c7d25fSLuigi Rizzo "netmap", 350201c7d25fSLuigi Rizzo &netmap_fops, 350301c7d25fSLuigi Rizzo }; 350401c7d25fSLuigi Rizzo 350501c7d25fSLuigi Rizzo static int netmap_init(void); 350601c7d25fSLuigi Rizzo static void netmap_fini(void); 350701c7d25fSLuigi Rizzo 3508f18be576SLuigi Rizzo 350942a3a5bdSLuigi Rizzo /* Errors have negative values on linux */ 351042a3a5bdSLuigi Rizzo static int linux_netmap_init(void) 351142a3a5bdSLuigi Rizzo { 351242a3a5bdSLuigi Rizzo return -netmap_init(); 351342a3a5bdSLuigi Rizzo } 351442a3a5bdSLuigi Rizzo 351542a3a5bdSLuigi Rizzo module_init(linux_netmap_init); 351601c7d25fSLuigi Rizzo module_exit(netmap_fini); 351701c7d25fSLuigi Rizzo /* export certain symbols to other modules */ 351801c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_attach); // driver attach routines 351901c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_detach); // driver detach routines 352001c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_ring_reinit); // ring init on error 352101c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_lut); 352201c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_total_buffers); // index check 352301c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_base); 352401c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_reset); // ring init routines 352501c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buf_size); 352601c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_rx_irq); // default irq handler 352701c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_no_pendintr); // XXX mitigation - should go away 3528f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_ctl); // bridge configuration routine 3529f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_learning); // the default lookup function 3530*ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_disable_all_rings); 3531*ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_enable_all_rings); 353201c7d25fSLuigi Rizzo 353301c7d25fSLuigi Rizzo 353401c7d25fSLuigi Rizzo MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/"); 353501c7d25fSLuigi Rizzo MODULE_DESCRIPTION("The netmap packet I/O framework"); 353601c7d25fSLuigi Rizzo MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ 353701c7d25fSLuigi Rizzo 353801c7d25fSLuigi Rizzo #else /* __FreeBSD__ */ 353901c7d25fSLuigi Rizzo 3540f18be576SLuigi Rizzo 3541babc7c12SLuigi Rizzo static struct cdevsw netmap_cdevsw = { 3542babc7c12SLuigi Rizzo .d_version = D_VERSION, 3543babc7c12SLuigi Rizzo .d_name = "netmap", 35448241616dSLuigi Rizzo .d_open = netmap_open, 35458241616dSLuigi Rizzo .d_mmap_single = netmap_mmap_single, 3546babc7c12SLuigi Rizzo .d_ioctl = netmap_ioctl, 3547babc7c12SLuigi Rizzo .d_poll = netmap_poll, 35488241616dSLuigi Rizzo .d_close = netmap_close, 3549babc7c12SLuigi Rizzo }; 355001c7d25fSLuigi Rizzo #endif /* __FreeBSD__ */ 3551babc7c12SLuigi Rizzo 3552f196ce38SLuigi Rizzo /* 3553f196ce38SLuigi Rizzo *---- support for virtual bridge ----- 3554f196ce38SLuigi Rizzo */ 3555f196ce38SLuigi Rizzo 3556f196ce38SLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 3557f196ce38SLuigi Rizzo 3558f196ce38SLuigi Rizzo /* 3559f196ce38SLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3560f196ce38SLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3561f196ce38SLuigi Rizzo * 3562f196ce38SLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 3563f196ce38SLuigi Rizzo */ 3564f196ce38SLuigi Rizzo #define mix(a, b, c) \ 3565f196ce38SLuigi Rizzo do { \ 3566f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 3567f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 3568f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 3569f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 3570f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 3571f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 3572f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 3573f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 3574f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 3575f196ce38SLuigi Rizzo } while (/*CONSTCOND*/0) 3576f196ce38SLuigi Rizzo 3577f196ce38SLuigi Rizzo static __inline uint32_t 3578f196ce38SLuigi Rizzo nm_bridge_rthash(const uint8_t *addr) 3579f196ce38SLuigi Rizzo { 3580f196ce38SLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 3581f196ce38SLuigi Rizzo 3582f196ce38SLuigi Rizzo b += addr[5] << 8; 3583f196ce38SLuigi Rizzo b += addr[4]; 3584f196ce38SLuigi Rizzo a += addr[3] << 24; 3585f196ce38SLuigi Rizzo a += addr[2] << 16; 3586f196ce38SLuigi Rizzo a += addr[1] << 8; 3587f196ce38SLuigi Rizzo a += addr[0]; 3588f196ce38SLuigi Rizzo 3589f196ce38SLuigi Rizzo mix(a, b, c); 3590f196ce38SLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 3591f196ce38SLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 3592f196ce38SLuigi Rizzo } 3593f196ce38SLuigi Rizzo 3594f196ce38SLuigi Rizzo #undef mix 3595f196ce38SLuigi Rizzo 3596f196ce38SLuigi Rizzo 3597f196ce38SLuigi Rizzo static int 3598f196ce38SLuigi Rizzo bdg_netmap_reg(struct ifnet *ifp, int onoff) 3599f196ce38SLuigi Rizzo { 3600f18be576SLuigi Rizzo /* the interface is already attached to the bridge, 3601f18be576SLuigi Rizzo * so we only need to toggle IFCAP_NETMAP. 3602f196ce38SLuigi Rizzo */ 3603f18be576SLuigi Rizzo if (onoff) { 3604f196ce38SLuigi Rizzo ifp->if_capenable |= IFCAP_NETMAP; 3605f196ce38SLuigi Rizzo } else { 3606f196ce38SLuigi Rizzo ifp->if_capenable &= ~IFCAP_NETMAP; 3607f196ce38SLuigi Rizzo } 3608f18be576SLuigi Rizzo return 0; 3609f196ce38SLuigi Rizzo } 3610f196ce38SLuigi Rizzo 3611f196ce38SLuigi Rizzo 3612f18be576SLuigi Rizzo /* 3613f18be576SLuigi Rizzo * Lookup function for a learning bridge. 3614f18be576SLuigi Rizzo * Update the hash table with the source address, 3615f18be576SLuigi Rizzo * and then returns the destination port index, and the 3616f18be576SLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 3617f18be576SLuigi Rizzo */ 3618f18be576SLuigi Rizzo u_int 3619*ce3ee1e7SLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 3620f18be576SLuigi Rizzo struct netmap_adapter *na) 3621f196ce38SLuigi Rizzo { 3622f18be576SLuigi Rizzo struct nm_hash_ent *ht = na->na_bdg->ht; 3623f196ce38SLuigi Rizzo uint32_t sh, dh; 3624f18be576SLuigi Rizzo u_int dst, mysrc = na->bdg_port; 3625f196ce38SLuigi Rizzo uint64_t smac, dmac; 3626f196ce38SLuigi Rizzo 3627*ce3ee1e7SLuigi Rizzo if (buf_len < 14) { 3628*ce3ee1e7SLuigi Rizzo D("invalid buf length %d", buf_len); 3629*ce3ee1e7SLuigi Rizzo return NM_BDG_NOPORT; 3630*ce3ee1e7SLuigi Rizzo } 3631f196ce38SLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 3632f196ce38SLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 3633f196ce38SLuigi Rizzo smac >>= 16; 3634f18be576SLuigi Rizzo 3635f196ce38SLuigi Rizzo /* 3636f196ce38SLuigi Rizzo * The hash is somewhat expensive, there might be some 3637f196ce38SLuigi Rizzo * worthwhile optimizations here. 3638f196ce38SLuigi Rizzo */ 3639f196ce38SLuigi Rizzo if ((buf[6] & 1) == 0) { /* valid src */ 3640f196ce38SLuigi Rizzo uint8_t *s = buf+6; 3641*ce3ee1e7SLuigi Rizzo sh = nm_bridge_rthash(s); // XXX hash of source 3642f196ce38SLuigi Rizzo /* update source port forwarding entry */ 3643f18be576SLuigi Rizzo ht[sh].mac = smac; /* XXX expire ? */ 3644f18be576SLuigi Rizzo ht[sh].ports = mysrc; 3645f196ce38SLuigi Rizzo if (netmap_verbose) 3646f196ce38SLuigi Rizzo D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 3647f18be576SLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 3648f196ce38SLuigi Rizzo } 3649f18be576SLuigi Rizzo dst = NM_BDG_BROADCAST; 3650f196ce38SLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 3651f196ce38SLuigi Rizzo dh = nm_bridge_rthash(buf); // XXX hash of dst 3652f18be576SLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 3653f18be576SLuigi Rizzo dst = ht[dh].ports; 3654f196ce38SLuigi Rizzo } 3655f18be576SLuigi Rizzo /* XXX otherwise return NM_BDG_UNKNOWN ? */ 3656f196ce38SLuigi Rizzo } 3657f18be576SLuigi Rizzo *dst_ring = 0; 3658f18be576SLuigi Rizzo return dst; 3659f196ce38SLuigi Rizzo } 3660f196ce38SLuigi Rizzo 3661f18be576SLuigi Rizzo 3662f18be576SLuigi Rizzo /* 3663f18be576SLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 3664f18be576SLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 3665f18be576SLuigi Rizzo */ 3666f18be576SLuigi Rizzo int 3667*ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na, 3668f18be576SLuigi Rizzo u_int ring_nr) 3669f18be576SLuigi Rizzo { 3670f18be576SLuigi Rizzo struct nm_bdg_q *dst_ents, *brddst; 3671f18be576SLuigi Rizzo uint16_t num_dsts = 0, *dsts; 3672f18be576SLuigi Rizzo struct nm_bridge *b = na->na_bdg; 3673*ce3ee1e7SLuigi Rizzo u_int i, j, me = na->bdg_port; 3674f18be576SLuigi Rizzo 3675*ce3ee1e7SLuigi Rizzo /* 3676*ce3ee1e7SLuigi Rizzo * The work area (pointed by ft) is followed by an array of 3677*ce3ee1e7SLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 3678*ce3ee1e7SLuigi Rizzo * queues per port plus one for the broadcast traffic. 3679*ce3ee1e7SLuigi Rizzo * Then we have an array of destination indexes. 3680*ce3ee1e7SLuigi Rizzo */ 3681*ce3ee1e7SLuigi Rizzo dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 3682f18be576SLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 3683f18be576SLuigi Rizzo 3684*ce3ee1e7SLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 3685*ce3ee1e7SLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 3686*ce3ee1e7SLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 3687f18be576SLuigi Rizzo uint16_t dst_port, d_i; 3688f18be576SLuigi Rizzo struct nm_bdg_q *d; 3689f18be576SLuigi Rizzo 3690*ce3ee1e7SLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 3691*ce3ee1e7SLuigi Rizzo dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len, 3692*ce3ee1e7SLuigi Rizzo &dst_ring, na); 3693*ce3ee1e7SLuigi Rizzo if (netmap_verbose > 255) 3694*ce3ee1e7SLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 3695*ce3ee1e7SLuigi Rizzo if (dst_port == NM_BDG_NOPORT) 3696f18be576SLuigi Rizzo continue; /* this packet is identified to be dropped */ 3697*ce3ee1e7SLuigi Rizzo else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 3698f18be576SLuigi Rizzo continue; 3699*ce3ee1e7SLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 3700f18be576SLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 3701*ce3ee1e7SLuigi Rizzo else if (unlikely(dst_port == me || 3702*ce3ee1e7SLuigi Rizzo !b->bdg_ports[dst_port])) 3703f18be576SLuigi Rizzo continue; 3704f18be576SLuigi Rizzo 3705f18be576SLuigi Rizzo /* get a position in the scratch pad */ 3706f18be576SLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 3707f18be576SLuigi Rizzo d = dst_ents + d_i; 3708*ce3ee1e7SLuigi Rizzo 3709*ce3ee1e7SLuigi Rizzo /* append the first fragment to the list */ 3710*ce3ee1e7SLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 3711f18be576SLuigi Rizzo d->bq_head = d->bq_tail = i; 3712f18be576SLuigi Rizzo /* remember this position to be scanned later */ 3713f18be576SLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 3714f18be576SLuigi Rizzo dsts[num_dsts++] = d_i; 371585233a7dSLuigi Rizzo } else { 3716f18be576SLuigi Rizzo ft[d->bq_tail].ft_next = i; 3717f18be576SLuigi Rizzo d->bq_tail = i; 3718f18be576SLuigi Rizzo } 3719*ce3ee1e7SLuigi Rizzo d->bq_len += ft[i].ft_frags; 372085233a7dSLuigi Rizzo } 3721f18be576SLuigi Rizzo 3722*ce3ee1e7SLuigi Rizzo /* 3723*ce3ee1e7SLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 3724*ce3ee1e7SLuigi Rizzo * So we need to add these rings to the list of ports to scan. 3725*ce3ee1e7SLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 3726*ce3ee1e7SLuigi Rizzo * expensive. We should keep a compact list of active destinations 3727*ce3ee1e7SLuigi Rizzo * so we could shorten this loop. 3728f18be576SLuigi Rizzo */ 3729f18be576SLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 3730*ce3ee1e7SLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 3731*ce3ee1e7SLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 3732*ce3ee1e7SLuigi Rizzo uint16_t d_i; 3733*ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 3734*ce3ee1e7SLuigi Rizzo if (unlikely(i == me)) 3735f18be576SLuigi Rizzo continue; 3736*ce3ee1e7SLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 3737*ce3ee1e7SLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 3738f18be576SLuigi Rizzo dsts[num_dsts++] = d_i; 3739f18be576SLuigi Rizzo } 3740f18be576SLuigi Rizzo } 3741f18be576SLuigi Rizzo 3742*ce3ee1e7SLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 3743f18be576SLuigi Rizzo /* second pass: scan destinations (XXX will be modular somehow) */ 3744f18be576SLuigi Rizzo for (i = 0; i < num_dsts; i++) { 3745f18be576SLuigi Rizzo struct ifnet *dst_ifp; 3746f18be576SLuigi Rizzo struct netmap_adapter *dst_na; 3747f196ce38SLuigi Rizzo struct netmap_kring *kring; 3748f196ce38SLuigi Rizzo struct netmap_ring *ring; 3749f18be576SLuigi Rizzo u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next; 3750*ce3ee1e7SLuigi Rizzo u_int needed, howmany; 3751*ce3ee1e7SLuigi Rizzo int retry = netmap_txsync_retry; 3752f18be576SLuigi Rizzo struct nm_bdg_q *d; 3753*ce3ee1e7SLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 3754*ce3ee1e7SLuigi Rizzo int nrings; 3755f196ce38SLuigi Rizzo 3756f18be576SLuigi Rizzo d_i = dsts[i]; 3757*ce3ee1e7SLuigi Rizzo ND("second pass %d port %d", i, d_i); 3758f18be576SLuigi Rizzo d = dst_ents + d_i; 3759*ce3ee1e7SLuigi Rizzo // XXX fix the division 3760*ce3ee1e7SLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 3761f18be576SLuigi Rizzo /* protect from the lookup function returning an inactive 3762f18be576SLuigi Rizzo * destination port 3763f18be576SLuigi Rizzo */ 3764f18be576SLuigi Rizzo if (unlikely(dst_na == NULL)) 3765*ce3ee1e7SLuigi Rizzo goto cleanup; 3766*ce3ee1e7SLuigi Rizzo if (dst_na->na_flags & NAF_SW_ONLY) 3767*ce3ee1e7SLuigi Rizzo goto cleanup; 3768f18be576SLuigi Rizzo dst_ifp = dst_na->ifp; 3769f18be576SLuigi Rizzo /* 3770f18be576SLuigi Rizzo * The interface may be in !netmap mode in two cases: 3771f18be576SLuigi Rizzo * - when na is attached but not activated yet; 3772f18be576SLuigi Rizzo * - when na is being deactivated but is still attached. 3773f18be576SLuigi Rizzo */ 3774*ce3ee1e7SLuigi Rizzo if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 3775*ce3ee1e7SLuigi Rizzo ND("not in netmap mode!"); 3776*ce3ee1e7SLuigi Rizzo goto cleanup; 3777*ce3ee1e7SLuigi Rizzo } 3778f196ce38SLuigi Rizzo 3779f18be576SLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 3780f18be576SLuigi Rizzo brd_next = brddst->bq_head; 3781f18be576SLuigi Rizzo next = d->bq_head; 3782*ce3ee1e7SLuigi Rizzo /* we need to reserve this many slots. If fewer are 3783*ce3ee1e7SLuigi Rizzo * available, some packets will be dropped. 3784*ce3ee1e7SLuigi Rizzo * Packets may have multiple fragments, so we may not use 3785*ce3ee1e7SLuigi Rizzo * there is a chance that we may not use all of the slots 3786*ce3ee1e7SLuigi Rizzo * we have claimed, so we will need to handle the leftover 3787*ce3ee1e7SLuigi Rizzo * ones when we regain the lock. 3788*ce3ee1e7SLuigi Rizzo */ 3789*ce3ee1e7SLuigi Rizzo needed = d->bq_len + brddst->bq_len; 3790f18be576SLuigi Rizzo 3791f18be576SLuigi Rizzo is_vp = nma_is_vp(dst_na); 3792*ce3ee1e7SLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 3793*ce3ee1e7SLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 3794f18be576SLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 3795f18be576SLuigi Rizzo if (is_vp) { /* virtual port */ 3796*ce3ee1e7SLuigi Rizzo nrings = dst_na->num_rx_rings; 3797*ce3ee1e7SLuigi Rizzo } else { 3798*ce3ee1e7SLuigi Rizzo nrings = dst_na->num_tx_rings; 3799f18be576SLuigi Rizzo } 3800*ce3ee1e7SLuigi Rizzo if (dst_nr >= nrings) 3801*ce3ee1e7SLuigi Rizzo dst_nr = dst_nr % nrings; 3802*ce3ee1e7SLuigi Rizzo kring = is_vp ? &dst_na->rx_rings[dst_nr] : 3803*ce3ee1e7SLuigi Rizzo &dst_na->tx_rings[dst_nr]; 3804*ce3ee1e7SLuigi Rizzo ring = kring->ring; 3805*ce3ee1e7SLuigi Rizzo lim = kring->nkr_num_slots - 1; 3806f18be576SLuigi Rizzo 3807*ce3ee1e7SLuigi Rizzo retry: 3808*ce3ee1e7SLuigi Rizzo 3809*ce3ee1e7SLuigi Rizzo /* reserve the buffers in the queue and an entry 3810*ce3ee1e7SLuigi Rizzo * to report completion, and drop lock. 3811*ce3ee1e7SLuigi Rizzo * XXX this might become a helper function. 3812*ce3ee1e7SLuigi Rizzo */ 3813*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 3814*ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) { 3815*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3816*ce3ee1e7SLuigi Rizzo goto cleanup; 3817*ce3ee1e7SLuigi Rizzo } 3818*ce3ee1e7SLuigi Rizzo /* on physical interfaces, do a txsync to recover 3819*ce3ee1e7SLuigi Rizzo * slots for packets already transmitted. 3820*ce3ee1e7SLuigi Rizzo * XXX maybe we could be optimistic and rely on a retry 3821*ce3ee1e7SLuigi Rizzo * in case of failure. 3822*ce3ee1e7SLuigi Rizzo */ 3823*ce3ee1e7SLuigi Rizzo if (nma_is_hw(dst_na)) { 3824*ce3ee1e7SLuigi Rizzo dst_na->nm_txsync(dst_ifp, dst_nr, 0); 3825*ce3ee1e7SLuigi Rizzo } 3826*ce3ee1e7SLuigi Rizzo my_start = j = kring->nkr_hwlease; 3827*ce3ee1e7SLuigi Rizzo howmany = nm_kr_space(kring, is_vp); 3828*ce3ee1e7SLuigi Rizzo if (needed < howmany) 3829*ce3ee1e7SLuigi Rizzo howmany = needed; 3830*ce3ee1e7SLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, is_vp); 3831*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3832*ce3ee1e7SLuigi Rizzo 3833*ce3ee1e7SLuigi Rizzo /* only retry if we need more than available slots */ 3834*ce3ee1e7SLuigi Rizzo if (retry && needed <= howmany) 3835*ce3ee1e7SLuigi Rizzo retry = 0; 3836*ce3ee1e7SLuigi Rizzo 3837*ce3ee1e7SLuigi Rizzo /* copy to the destination queue */ 3838*ce3ee1e7SLuigi Rizzo while (howmany > 0) { 3839*ce3ee1e7SLuigi Rizzo struct netmap_slot *slot; 3840*ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 3841*ce3ee1e7SLuigi Rizzo u_int cnt; 3842*ce3ee1e7SLuigi Rizzo 3843*ce3ee1e7SLuigi Rizzo /* find the queue from which we pick next packet. 3844*ce3ee1e7SLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 384585233a7dSLuigi Rizzo * so we never dereference it if the other list 3846*ce3ee1e7SLuigi Rizzo * has packets (and if both are empty we never 384785233a7dSLuigi Rizzo * get here). 384885233a7dSLuigi Rizzo */ 3849f18be576SLuigi Rizzo if (next < brd_next) { 3850f18be576SLuigi Rizzo ft_p = ft + next; 3851f18be576SLuigi Rizzo next = ft_p->ft_next; 3852f18be576SLuigi Rizzo } else { /* insert broadcast */ 3853f18be576SLuigi Rizzo ft_p = ft + brd_next; 3854f18be576SLuigi Rizzo brd_next = ft_p->ft_next; 3855f18be576SLuigi Rizzo } 3856*ce3ee1e7SLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 3857*ce3ee1e7SLuigi Rizzo if (unlikely(cnt > howmany)) 3858*ce3ee1e7SLuigi Rizzo break; /* no more space */ 3859*ce3ee1e7SLuigi Rizzo howmany -= cnt; 3860*ce3ee1e7SLuigi Rizzo if (netmap_verbose && cnt > 1) 3861*ce3ee1e7SLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 3862*ce3ee1e7SLuigi Rizzo ft_end = ft_p + cnt; 3863*ce3ee1e7SLuigi Rizzo do { 3864*ce3ee1e7SLuigi Rizzo void *dst, *src = ft_p->ft_buf; 3865*ce3ee1e7SLuigi Rizzo size_t len = (ft_p->ft_len + 63) & ~63; 3866*ce3ee1e7SLuigi Rizzo 3867f196ce38SLuigi Rizzo slot = &ring->slot[j]; 3868*ce3ee1e7SLuigi Rizzo dst = BDG_NMB(dst_na->nm_mem, slot); 3869*ce3ee1e7SLuigi Rizzo /* round to a multiple of 64 */ 3870*ce3ee1e7SLuigi Rizzo 3871*ce3ee1e7SLuigi Rizzo ND("send %d %d bytes at %s:%d", 3872*ce3ee1e7SLuigi Rizzo i, ft_p->ft_len, dst_ifp->if_xname, j); 387385233a7dSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 3874*ce3ee1e7SLuigi Rizzo if (copyin(src, dst, len)) { 3875*ce3ee1e7SLuigi Rizzo // invalid user pointer, pretend len is 0 3876*ce3ee1e7SLuigi Rizzo ft_p->ft_len = 0; 3877*ce3ee1e7SLuigi Rizzo } 387885233a7dSLuigi Rizzo } else { 3879*ce3ee1e7SLuigi Rizzo //memcpy(dst, src, len); 3880*ce3ee1e7SLuigi Rizzo pkt_copy(src, dst, (int)len); 388185233a7dSLuigi Rizzo } 3882f18be576SLuigi Rizzo slot->len = ft_p->ft_len; 3883*ce3ee1e7SLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 3884*ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 3885*ce3ee1e7SLuigi Rizzo ft_p++; 3886f196ce38SLuigi Rizzo sent++; 3887*ce3ee1e7SLuigi Rizzo } while (ft_p != ft_end); 3888*ce3ee1e7SLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 388985233a7dSLuigi Rizzo /* are we done ? */ 3890*ce3ee1e7SLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 3891f18be576SLuigi Rizzo break; 3892f196ce38SLuigi Rizzo } 3893*ce3ee1e7SLuigi Rizzo { 3894*ce3ee1e7SLuigi Rizzo /* current position */ 3895*ce3ee1e7SLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 3896*ce3ee1e7SLuigi Rizzo uint32_t update_pos; 3897*ce3ee1e7SLuigi Rizzo int still_locked = 1; 3898*ce3ee1e7SLuigi Rizzo 3899*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 3900*ce3ee1e7SLuigi Rizzo if (unlikely(howmany > 0)) { 3901*ce3ee1e7SLuigi Rizzo /* not used all bufs. If i am the last one 3902*ce3ee1e7SLuigi Rizzo * i can recover the slots, otherwise must 3903*ce3ee1e7SLuigi Rizzo * fill them with 0 to mark empty packets. 3904*ce3ee1e7SLuigi Rizzo */ 3905*ce3ee1e7SLuigi Rizzo ND("leftover %d bufs", howmany); 3906*ce3ee1e7SLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 3907*ce3ee1e7SLuigi Rizzo /* yes i am the last one */ 3908*ce3ee1e7SLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 3909*ce3ee1e7SLuigi Rizzo kring->nkr_hwlease = j; 3910f18be576SLuigi Rizzo } else { 3911*ce3ee1e7SLuigi Rizzo while (howmany-- > 0) { 3912*ce3ee1e7SLuigi Rizzo ring->slot[j].len = 0; 3913*ce3ee1e7SLuigi Rizzo ring->slot[j].flags = 0; 3914*ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 3915*ce3ee1e7SLuigi Rizzo } 3916*ce3ee1e7SLuigi Rizzo } 3917*ce3ee1e7SLuigi Rizzo } 3918*ce3ee1e7SLuigi Rizzo p[lease_idx] = j; /* report I am done */ 3919*ce3ee1e7SLuigi Rizzo 3920*ce3ee1e7SLuigi Rizzo update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur; 3921*ce3ee1e7SLuigi Rizzo 3922*ce3ee1e7SLuigi Rizzo if (my_start == update_pos) { 3923*ce3ee1e7SLuigi Rizzo /* all slots before my_start have been reported, 3924*ce3ee1e7SLuigi Rizzo * so scan subsequent leases to see if other ranges 3925*ce3ee1e7SLuigi Rizzo * have been completed, and to a selwakeup or txsync. 3926*ce3ee1e7SLuigi Rizzo */ 3927*ce3ee1e7SLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 3928*ce3ee1e7SLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 3929*ce3ee1e7SLuigi Rizzo j = p[lease_idx]; 3930*ce3ee1e7SLuigi Rizzo p[lease_idx] = NR_NOSLOT; 3931*ce3ee1e7SLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 3932*ce3ee1e7SLuigi Rizzo } 3933*ce3ee1e7SLuigi Rizzo /* j is the new 'write' position. j != my_start 3934*ce3ee1e7SLuigi Rizzo * means there are new buffers to report 3935*ce3ee1e7SLuigi Rizzo */ 3936*ce3ee1e7SLuigi Rizzo if (likely(j != my_start)) { 3937*ce3ee1e7SLuigi Rizzo if (is_vp) { 3938*ce3ee1e7SLuigi Rizzo uint32_t old_avail = kring->nr_hwavail; 3939*ce3ee1e7SLuigi Rizzo 3940*ce3ee1e7SLuigi Rizzo kring->nr_hwavail = (j >= kring->nr_hwcur) ? 3941*ce3ee1e7SLuigi Rizzo j - kring->nr_hwcur : 3942*ce3ee1e7SLuigi Rizzo j + lim + 1 - kring->nr_hwcur; 3943*ce3ee1e7SLuigi Rizzo if (kring->nr_hwavail < old_avail) { 3944*ce3ee1e7SLuigi Rizzo D("avail shrink %d -> %d", 3945*ce3ee1e7SLuigi Rizzo old_avail, kring->nr_hwavail); 3946*ce3ee1e7SLuigi Rizzo } 3947*ce3ee1e7SLuigi Rizzo still_locked = 0; 3948*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3949*ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3950*ce3ee1e7SLuigi Rizzo } else { 3951f18be576SLuigi Rizzo ring->cur = j; 3952*ce3ee1e7SLuigi Rizzo /* XXX update avail ? */ 3953*ce3ee1e7SLuigi Rizzo still_locked = 0; 3954f18be576SLuigi Rizzo dst_na->nm_txsync(dst_ifp, dst_nr, 0); 3955*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3956*ce3ee1e7SLuigi Rizzo 3957f18be576SLuigi Rizzo /* retry to send more packets */ 3958*ce3ee1e7SLuigi Rizzo if (nma_is_hw(dst_na) && retry--) 3959f18be576SLuigi Rizzo goto retry; 3960f18be576SLuigi Rizzo } 3961f18be576SLuigi Rizzo } 3962*ce3ee1e7SLuigi Rizzo } 3963*ce3ee1e7SLuigi Rizzo if (still_locked) 3964*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3965*ce3ee1e7SLuigi Rizzo } 3966*ce3ee1e7SLuigi Rizzo cleanup: 3967*ce3ee1e7SLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 3968*ce3ee1e7SLuigi Rizzo d->bq_len = 0; 3969*ce3ee1e7SLuigi Rizzo } 3970*ce3ee1e7SLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 3971*ce3ee1e7SLuigi Rizzo brddst->bq_len = 0; 3972f196ce38SLuigi Rizzo return 0; 3973f196ce38SLuigi Rizzo } 3974f196ce38SLuigi Rizzo 3975f18be576SLuigi Rizzo 3976f196ce38SLuigi Rizzo /* 3977*ce3ee1e7SLuigi Rizzo * main dispatch routine for the bridge. 3978*ce3ee1e7SLuigi Rizzo * We already know that only one thread is running this. 3979*ce3ee1e7SLuigi Rizzo * we must run nm_bdg_preflush without lock. 3980f196ce38SLuigi Rizzo */ 3981f196ce38SLuigi Rizzo static int 3982*ce3ee1e7SLuigi Rizzo bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags) 3983f196ce38SLuigi Rizzo { 3984f196ce38SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3985f196ce38SLuigi Rizzo struct netmap_kring *kring = &na->tx_rings[ring_nr]; 3986f196ce38SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3987*ce3ee1e7SLuigi Rizzo u_int j, k, lim = kring->nkr_num_slots - 1; 3988f196ce38SLuigi Rizzo 3989f196ce38SLuigi Rizzo k = ring->cur; 3990f196ce38SLuigi Rizzo if (k > lim) 3991f196ce38SLuigi Rizzo return netmap_ring_reinit(kring); 3992f196ce38SLuigi Rizzo 3993*ce3ee1e7SLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 3994f196ce38SLuigi Rizzo j = k; // used all 3995f196ce38SLuigi Rizzo goto done; 3996f196ce38SLuigi Rizzo } 3997*ce3ee1e7SLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 3998*ce3ee1e7SLuigi Rizzo bridge_batch = NM_BDG_BATCH; 3999f196ce38SLuigi Rizzo 4000f18be576SLuigi Rizzo j = nm_bdg_preflush(na, ring_nr, kring, k); 4001f196ce38SLuigi Rizzo if (j != k) 4002f196ce38SLuigi Rizzo D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail); 4003*ce3ee1e7SLuigi Rizzo /* k-j modulo ring size is the number of slots processed */ 4004*ce3ee1e7SLuigi Rizzo if (k < j) 4005*ce3ee1e7SLuigi Rizzo k += kring->nkr_num_slots; 4006*ce3ee1e7SLuigi Rizzo kring->nr_hwavail = lim - (k - j); 4007f196ce38SLuigi Rizzo 4008f196ce38SLuigi Rizzo done: 4009f196ce38SLuigi Rizzo kring->nr_hwcur = j; 4010f196ce38SLuigi Rizzo ring->avail = kring->nr_hwavail; 4011f196ce38SLuigi Rizzo if (netmap_verbose) 4012*ce3ee1e7SLuigi Rizzo D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags); 4013f196ce38SLuigi Rizzo return 0; 4014f196ce38SLuigi Rizzo } 4015f196ce38SLuigi Rizzo 4016f18be576SLuigi Rizzo 4017*ce3ee1e7SLuigi Rizzo /* 4018*ce3ee1e7SLuigi Rizzo * user process reading from a VALE switch. 4019*ce3ee1e7SLuigi Rizzo * Already protected against concurrent calls from userspace, 4020*ce3ee1e7SLuigi Rizzo * but we must acquire the queue's lock to protect against 4021*ce3ee1e7SLuigi Rizzo * writers on the same queue. 4022*ce3ee1e7SLuigi Rizzo */ 4023f196ce38SLuigi Rizzo static int 4024*ce3ee1e7SLuigi Rizzo bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags) 4025f196ce38SLuigi Rizzo { 4026f196ce38SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 4027f196ce38SLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[ring_nr]; 4028f196ce38SLuigi Rizzo struct netmap_ring *ring = kring->ring; 4029f18be576SLuigi Rizzo u_int j, lim = kring->nkr_num_slots - 1; 4030f196ce38SLuigi Rizzo u_int k = ring->cur, resvd = ring->reserved; 4031f18be576SLuigi Rizzo int n; 4032f196ce38SLuigi Rizzo 4033*ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 4034*ce3ee1e7SLuigi Rizzo if (k > lim) { 4035*ce3ee1e7SLuigi Rizzo D("ouch dangerous reset!!!"); 4036*ce3ee1e7SLuigi Rizzo n = netmap_ring_reinit(kring); 4037*ce3ee1e7SLuigi Rizzo goto done; 4038*ce3ee1e7SLuigi Rizzo } 4039f196ce38SLuigi Rizzo 4040f196ce38SLuigi Rizzo /* skip past packets that userspace has released */ 4041f196ce38SLuigi Rizzo j = kring->nr_hwcur; /* netmap ring index */ 4042f196ce38SLuigi Rizzo if (resvd > 0) { 4043f196ce38SLuigi Rizzo if (resvd + ring->avail >= lim + 1) { 4044f196ce38SLuigi Rizzo D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 4045f196ce38SLuigi Rizzo ring->reserved = resvd = 0; // XXX panic... 4046f196ce38SLuigi Rizzo } 4047f196ce38SLuigi Rizzo k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; 4048f196ce38SLuigi Rizzo } 4049f196ce38SLuigi Rizzo 4050f196ce38SLuigi Rizzo if (j != k) { /* userspace has released some packets. */ 4051f196ce38SLuigi Rizzo n = k - j; 4052f196ce38SLuigi Rizzo if (n < 0) 4053f196ce38SLuigi Rizzo n += kring->nkr_num_slots; 4054f196ce38SLuigi Rizzo ND("userspace releases %d packets", n); 4055f196ce38SLuigi Rizzo for (n = 0; likely(j != k); n++) { 4056f196ce38SLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 4057*ce3ee1e7SLuigi Rizzo void *addr = BDG_NMB(na->nm_mem, slot); 4058f196ce38SLuigi Rizzo 4059f196ce38SLuigi Rizzo if (addr == netmap_buffer_base) { /* bad buf */ 4060*ce3ee1e7SLuigi Rizzo D("bad buffer index %d, ignore ?", 4061*ce3ee1e7SLuigi Rizzo slot->buf_idx); 4062f196ce38SLuigi Rizzo } 4063f196ce38SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 4064*ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 4065f196ce38SLuigi Rizzo } 4066f196ce38SLuigi Rizzo kring->nr_hwavail -= n; 4067f196ce38SLuigi Rizzo kring->nr_hwcur = k; 4068f196ce38SLuigi Rizzo } 4069f196ce38SLuigi Rizzo /* tell userspace that there are new packets */ 4070f196ce38SLuigi Rizzo ring->avail = kring->nr_hwavail - resvd; 4071*ce3ee1e7SLuigi Rizzo n = 0; 4072*ce3ee1e7SLuigi Rizzo done: 4073*ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 4074*ce3ee1e7SLuigi Rizzo return n; 4075f196ce38SLuigi Rizzo } 4076f196ce38SLuigi Rizzo 4077f18be576SLuigi Rizzo 4078f196ce38SLuigi Rizzo static void 4079f18be576SLuigi Rizzo bdg_netmap_attach(struct netmap_adapter *arg) 4080f196ce38SLuigi Rizzo { 4081f196ce38SLuigi Rizzo struct netmap_adapter na; 4082f196ce38SLuigi Rizzo 4083f196ce38SLuigi Rizzo ND("attaching virtual bridge"); 4084f196ce38SLuigi Rizzo bzero(&na, sizeof(na)); 4085f196ce38SLuigi Rizzo 4086f18be576SLuigi Rizzo na.ifp = arg->ifp; 4087*ce3ee1e7SLuigi Rizzo na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 4088f18be576SLuigi Rizzo na.num_tx_rings = arg->num_tx_rings; 4089f18be576SLuigi Rizzo na.num_rx_rings = arg->num_rx_rings; 4090*ce3ee1e7SLuigi Rizzo na.num_tx_desc = arg->num_tx_desc; 4091*ce3ee1e7SLuigi Rizzo na.num_rx_desc = arg->num_rx_desc; 4092f196ce38SLuigi Rizzo na.nm_txsync = bdg_netmap_txsync; 4093f196ce38SLuigi Rizzo na.nm_rxsync = bdg_netmap_rxsync; 4094f196ce38SLuigi Rizzo na.nm_register = bdg_netmap_reg; 4095*ce3ee1e7SLuigi Rizzo na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname, 4096*ce3ee1e7SLuigi Rizzo na.num_tx_rings, na.num_tx_desc, 4097*ce3ee1e7SLuigi Rizzo na.num_rx_rings, na.num_rx_desc); 4098f18be576SLuigi Rizzo netmap_attach(&na, na.num_tx_rings); 4099f196ce38SLuigi Rizzo } 4100f196ce38SLuigi Rizzo 4101babc7c12SLuigi Rizzo 4102babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */ 4103babc7c12SLuigi Rizzo 4104babc7c12SLuigi Rizzo 41051a26580eSLuigi Rizzo /* 410668b8534bSLuigi Rizzo * Module loader. 410768b8534bSLuigi Rizzo * 410868b8534bSLuigi Rizzo * Create the /dev/netmap device and initialize all global 410968b8534bSLuigi Rizzo * variables. 411068b8534bSLuigi Rizzo * 411168b8534bSLuigi Rizzo * Return 0 on success, errno on failure. 411268b8534bSLuigi Rizzo */ 411368b8534bSLuigi Rizzo static int 411468b8534bSLuigi Rizzo netmap_init(void) 411568b8534bSLuigi Rizzo { 4116*ce3ee1e7SLuigi Rizzo int i, error; 411768b8534bSLuigi Rizzo 4118*ce3ee1e7SLuigi Rizzo NMG_LOCK_INIT(); 4119*ce3ee1e7SLuigi Rizzo 4120*ce3ee1e7SLuigi Rizzo error = netmap_mem_init(); 412168b8534bSLuigi Rizzo if (error != 0) { 412242a3a5bdSLuigi Rizzo printf("netmap: unable to initialize the memory allocator.\n"); 412368b8534bSLuigi Rizzo return (error); 412468b8534bSLuigi Rizzo } 41258241616dSLuigi Rizzo printf("netmap: loaded module\n"); 412668b8534bSLuigi Rizzo netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, 412768b8534bSLuigi Rizzo "netmap"); 4128f196ce38SLuigi Rizzo 4129f18be576SLuigi Rizzo bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 4130f196ce38SLuigi Rizzo for (i = 0; i < NM_BRIDGES; i++) 4131*ce3ee1e7SLuigi Rizzo BDG_RWINIT(&nm_bridges[i]); 4132babc7c12SLuigi Rizzo return (error); 413368b8534bSLuigi Rizzo } 413468b8534bSLuigi Rizzo 413568b8534bSLuigi Rizzo 413668b8534bSLuigi Rizzo /* 413768b8534bSLuigi Rizzo * Module unloader. 413868b8534bSLuigi Rizzo * 413968b8534bSLuigi Rizzo * Free all the memory, and destroy the ``/dev/netmap`` device. 414068b8534bSLuigi Rizzo */ 414168b8534bSLuigi Rizzo static void 414268b8534bSLuigi Rizzo netmap_fini(void) 414368b8534bSLuigi Rizzo { 414468b8534bSLuigi Rizzo destroy_dev(netmap_dev); 4145*ce3ee1e7SLuigi Rizzo netmap_mem_fini(); 4146*ce3ee1e7SLuigi Rizzo NMG_LOCK_DESTROY(); 414768b8534bSLuigi Rizzo printf("netmap: unloaded module.\n"); 414868b8534bSLuigi Rizzo } 414968b8534bSLuigi Rizzo 415068b8534bSLuigi Rizzo 4151f196ce38SLuigi Rizzo #ifdef __FreeBSD__ 415268b8534bSLuigi Rizzo /* 415368b8534bSLuigi Rizzo * Kernel entry point. 415468b8534bSLuigi Rizzo * 415568b8534bSLuigi Rizzo * Initialize/finalize the module and return. 415668b8534bSLuigi Rizzo * 415768b8534bSLuigi Rizzo * Return 0 on success, errno on failure. 415868b8534bSLuigi Rizzo */ 415968b8534bSLuigi Rizzo static int 416068b8534bSLuigi Rizzo netmap_loader(__unused struct module *module, int event, __unused void *arg) 416168b8534bSLuigi Rizzo { 416268b8534bSLuigi Rizzo int error = 0; 416368b8534bSLuigi Rizzo 416468b8534bSLuigi Rizzo switch (event) { 416568b8534bSLuigi Rizzo case MOD_LOAD: 416668b8534bSLuigi Rizzo error = netmap_init(); 416768b8534bSLuigi Rizzo break; 416868b8534bSLuigi Rizzo 416968b8534bSLuigi Rizzo case MOD_UNLOAD: 417068b8534bSLuigi Rizzo netmap_fini(); 417168b8534bSLuigi Rizzo break; 417268b8534bSLuigi Rizzo 417368b8534bSLuigi Rizzo default: 417468b8534bSLuigi Rizzo error = EOPNOTSUPP; 417568b8534bSLuigi Rizzo break; 417668b8534bSLuigi Rizzo } 417768b8534bSLuigi Rizzo 417868b8534bSLuigi Rizzo return (error); 417968b8534bSLuigi Rizzo } 418068b8534bSLuigi Rizzo 418168b8534bSLuigi Rizzo 418268b8534bSLuigi Rizzo DEV_MODULE(netmap, netmap_loader, NULL); 4183f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */ 4184