xref: /freebsd/sys/dev/netmap/netmap.c (revision 37e3a6d349581b4dd0aebf24be7b1b159a698dcf)
168b8534bSLuigi Rizzo /*
2*37e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2014 Matteo Landi
3*37e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Luigi Rizzo
4*37e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Giuseppe Lettieri
5*37e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Vincenzo Maffione
6*37e3a6d3SLuigi Rizzo  * All rights reserved.
768b8534bSLuigi Rizzo  *
868b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
968b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
1068b8534bSLuigi Rizzo  * are met:
1168b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
1268b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
1368b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1468b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1568b8534bSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
1668b8534bSLuigi Rizzo  *
1768b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1868b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1968b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2068b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2168b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2268b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2368b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2468b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2568b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2668b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2768b8534bSLuigi Rizzo  * SUCH DAMAGE.
2868b8534bSLuigi Rizzo  */
2968b8534bSLuigi Rizzo 
30ce3ee1e7SLuigi Rizzo 
3168b8534bSLuigi Rizzo /*
32f9790aebSLuigi Rizzo  * $FreeBSD$
33f9790aebSLuigi Rizzo  *
3468b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3568b8534bSLuigi Rizzo  * see netmap(4).
3668b8534bSLuigi Rizzo  *
3768b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
3868b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
3968b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
4068b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
4168b8534bSLuigi Rizzo  *
4268b8534bSLuigi Rizzo  * Access to the network card works like this:
4368b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4468b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4568b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4668b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4768b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
4868b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
4968b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
5068b8534bSLuigi Rizzo  *    the shared memory region.
5168b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5268b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5368b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5468b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5568b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5668b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5768b8534bSLuigi Rizzo  *    packets on the output interface.
5868b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
5968b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
60ce3ee1e7SLuigi Rizzo  *
61ce3ee1e7SLuigi Rizzo 
62ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
63ce3ee1e7SLuigi Rizzo 
64ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
65ce3ee1e7SLuigi Rizzo user threads or even independent processes.
66ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
67ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
68ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
69ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
70ce3ee1e7SLuigi Rizzo invalid usage.
71ce3ee1e7SLuigi Rizzo 
72ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
75ce3ee1e7SLuigi Rizzo 
76ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
77ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
78ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
79ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
80ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
81ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
82ce3ee1e7SLuigi Rizzo 
83ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
84ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
85ce3ee1e7SLuigi Rizzo   For rings connected to user file
86ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
87ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
88ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
89ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
90ce3ee1e7SLuigi Rizzo   already guarantee this).
91ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
92ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
93ce3ee1e7SLuigi Rizzo   them out).
94ce3ee1e7SLuigi Rizzo 
95ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
96ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
97ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
98ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
99ce3ee1e7SLuigi Rizzo 
100ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
101ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
102ce3ee1e7SLuigi Rizzo 
103ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
104ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
105ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
106ce3ee1e7SLuigi Rizzo   only while they are down.
107ce3ee1e7SLuigi Rizzo 
108ce3ee1e7SLuigi Rizzo 
109ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
110ce3ee1e7SLuigi Rizzo 
111ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
112ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
113ce3ee1e7SLuigi Rizzo 
114ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
115ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
116ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
117ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
118ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
119ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
120ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
121ce3ee1e7SLuigi Rizzo 
122ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
123ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
124ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
125ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
126ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
127ce3ee1e7SLuigi Rizzo ports attached to the switch)
128ce3ee1e7SLuigi Rizzo 
12968b8534bSLuigi Rizzo  */
13068b8534bSLuigi Rizzo 
1314bf50f18SLuigi Rizzo 
1324bf50f18SLuigi Rizzo /* --- internals ----
1334bf50f18SLuigi Rizzo  *
1344bf50f18SLuigi Rizzo  * Roadmap to the code that implements the above.
1354bf50f18SLuigi Rizzo  *
1364bf50f18SLuigi Rizzo  * > 1. a process/thread issues one or more open() on /dev/netmap, to create
1374bf50f18SLuigi Rizzo  * >    select()able file descriptor on which events are reported.
1384bf50f18SLuigi Rizzo  *
1394bf50f18SLuigi Rizzo  *  	Internally, we allocate a netmap_priv_d structure, that will be
140*37e3a6d3SLuigi Rizzo  *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
141*37e3a6d3SLuigi Rizzo  *  	structure for each open().
1424bf50f18SLuigi Rizzo  *
1434bf50f18SLuigi Rizzo  *      os-specific:
144*37e3a6d3SLuigi Rizzo  *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
145*37e3a6d3SLuigi Rizzo  *  	    linux:   see linux_netmap_open() (netmap_linux.c)
1464bf50f18SLuigi Rizzo  *
1474bf50f18SLuigi Rizzo  * > 2. on each descriptor, the process issues an ioctl() to identify
1484bf50f18SLuigi Rizzo  * >    the interface that should report events to the file descriptor.
1494bf50f18SLuigi Rizzo  *
1504bf50f18SLuigi Rizzo  * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
1514bf50f18SLuigi Rizzo  * 	Most important things happen in netmap_get_na() and
1524bf50f18SLuigi Rizzo  * 	netmap_do_regif(), called from there. Additional details can be
1534bf50f18SLuigi Rizzo  * 	found in the comments above those functions.
1544bf50f18SLuigi Rizzo  *
1554bf50f18SLuigi Rizzo  * 	In all cases, this action creates/takes-a-reference-to a
1564bf50f18SLuigi Rizzo  * 	netmap_*_adapter describing the port, and allocates a netmap_if
1574bf50f18SLuigi Rizzo  * 	and all necessary netmap rings, filling them with netmap buffers.
1584bf50f18SLuigi Rizzo  *
1594bf50f18SLuigi Rizzo  *      In this phase, the sync callbacks for each ring are set (these are used
1604bf50f18SLuigi Rizzo  *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
1614bf50f18SLuigi Rizzo  *      The adapter creation/initialization code puts them in the
1624bf50f18SLuigi Rizzo  * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
1634bf50f18SLuigi Rizzo  * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
1644bf50f18SLuigi Rizzo  * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
1654bf50f18SLuigi Rizzo  * 	actually call netmap_krings_create() to perform this and the other
1664bf50f18SLuigi Rizzo  * 	common stuff. netmap_krings_create() also takes care of the host rings,
1674bf50f18SLuigi Rizzo  * 	if needed, by setting their sync callbacks appropriately.
1684bf50f18SLuigi Rizzo  *
1694bf50f18SLuigi Rizzo  * 	Additional actions depend on the kind of netmap_adapter that has been
1704bf50f18SLuigi Rizzo  * 	registered:
1714bf50f18SLuigi Rizzo  *
1724bf50f18SLuigi Rizzo  * 	- netmap_hw_adapter:  	     [netmap.c]
1734bf50f18SLuigi Rizzo  * 	     This is a system netdev/ifp with native netmap support.
1744bf50f18SLuigi Rizzo  * 	     The ifp is detached from the host stack by redirecting:
1754bf50f18SLuigi Rizzo  * 	       - transmissions (from the network stack) to netmap_transmit()
1764bf50f18SLuigi Rizzo  * 	       - receive notifications to the nm_notify() callback for
1774bf50f18SLuigi Rizzo  * 	         this adapter. The callback is normally netmap_notify(), unless
1784bf50f18SLuigi Rizzo  * 	         the ifp is attached to a bridge using bwrap, in which case it
1794bf50f18SLuigi Rizzo  * 	         is netmap_bwrap_intr_notify().
1804bf50f18SLuigi Rizzo  *
1814bf50f18SLuigi Rizzo  * 	- netmap_generic_adapter:      [netmap_generic.c]
1824bf50f18SLuigi Rizzo  * 	      A system netdev/ifp without native netmap support.
1834bf50f18SLuigi Rizzo  *
1844bf50f18SLuigi Rizzo  * 	(the decision about native/non native support is taken in
1854bf50f18SLuigi Rizzo  * 	 netmap_get_hw_na(), called by netmap_get_na())
1864bf50f18SLuigi Rizzo  *
1874bf50f18SLuigi Rizzo  * 	- netmap_vp_adapter 		[netmap_vale.c]
1884bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_bdg_na().
1894bf50f18SLuigi Rizzo  * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
1904bf50f18SLuigi Rizzo  * 	      are created on the fly if they don't already exist, and are
1914bf50f18SLuigi Rizzo  * 	      always attached to a bridge.
192453130d9SPedro F. Giffuni  * 	      Persistent VALE ports must must be created separately, and i
1934bf50f18SLuigi Rizzo  * 	      then attached like normal NICs. The NIOCREGIF we are examining
1944bf50f18SLuigi Rizzo  * 	      will find them only if they had previosly been created and
1954bf50f18SLuigi Rizzo  * 	      attached (see VALE_CTL below).
1964bf50f18SLuigi Rizzo  *
1974bf50f18SLuigi Rizzo  * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
1984bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_pipe_na().
1994bf50f18SLuigi Rizzo  * 	      Both pipe ends are created, if they didn't already exist.
2004bf50f18SLuigi Rizzo  *
2014bf50f18SLuigi Rizzo  * 	- netmap_monitor_adapter      [netmap_monitor.c]
2024bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_monitor_na().
2034bf50f18SLuigi Rizzo  * 	      If successful, the nm_sync callbacks of the monitored adapter
2044bf50f18SLuigi Rizzo  * 	      will be intercepted by the returned monitor.
2054bf50f18SLuigi Rizzo  *
2064bf50f18SLuigi Rizzo  * 	- netmap_bwrap_adapter	      [netmap_vale.c]
2074bf50f18SLuigi Rizzo  * 	      Cannot be obtained in this way, see VALE_CTL below
2084bf50f18SLuigi Rizzo  *
2094bf50f18SLuigi Rizzo  *
2104bf50f18SLuigi Rizzo  * 	os-specific:
2114bf50f18SLuigi Rizzo  * 	    linux: we first go through linux_netmap_ioctl() to
2124bf50f18SLuigi Rizzo  * 	           adapt the FreeBSD interface to the linux one.
2134bf50f18SLuigi Rizzo  *
2144bf50f18SLuigi Rizzo  *
2154bf50f18SLuigi Rizzo  * > 3. on each descriptor, the process issues an mmap() request to
2164bf50f18SLuigi Rizzo  * >    map the shared memory region within the process' address space.
2174bf50f18SLuigi Rizzo  * >    The list of interesting queues is indicated by a location in
2184bf50f18SLuigi Rizzo  * >    the shared memory region.
2194bf50f18SLuigi Rizzo  *
2204bf50f18SLuigi Rizzo  *      os-specific:
2214bf50f18SLuigi Rizzo  *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
2224bf50f18SLuigi Rizzo  *  	    linux:   linux_netmap_mmap (netmap_linux.c).
2234bf50f18SLuigi Rizzo  *
2244bf50f18SLuigi Rizzo  * > 4. using the functions in the netmap(4) userspace API, a process
2254bf50f18SLuigi Rizzo  * >    can look up the occupation state of a queue, access memory buffers,
2264bf50f18SLuigi Rizzo  * >    and retrieve received packets or enqueue packets to transmit.
2274bf50f18SLuigi Rizzo  *
2284bf50f18SLuigi Rizzo  * 	these actions do not involve the kernel.
2294bf50f18SLuigi Rizzo  *
2304bf50f18SLuigi Rizzo  * > 5. using some ioctl()s the process can synchronize the userspace view
2314bf50f18SLuigi Rizzo  * >    of the queue with the actual status in the kernel. This includes both
2324bf50f18SLuigi Rizzo  * >    receiving the notification of new packets, and transmitting new
2334bf50f18SLuigi Rizzo  * >    packets on the output interface.
2344bf50f18SLuigi Rizzo  *
2354bf50f18SLuigi Rizzo  * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
2364bf50f18SLuigi Rizzo  * 	cases. They invoke the nm_sync callbacks on the netmap_kring
2374bf50f18SLuigi Rizzo  * 	structures, as initialized in step 2 and maybe later modified
2384bf50f18SLuigi Rizzo  * 	by a monitor. Monitors, however, will always call the original
2394bf50f18SLuigi Rizzo  * 	callback before doing anything else.
2404bf50f18SLuigi Rizzo  *
2414bf50f18SLuigi Rizzo  *
2424bf50f18SLuigi Rizzo  * > 6. select() or poll() can be used to wait for events on individual
2434bf50f18SLuigi Rizzo  * >    transmit or receive queues (or all queues for a given interface).
2444bf50f18SLuigi Rizzo  *
2454bf50f18SLuigi Rizzo  * 	Implemented in netmap_poll(). This will call the same nm_sync()
2464bf50f18SLuigi Rizzo  * 	callbacks as in step 5 above.
2474bf50f18SLuigi Rizzo  *
2484bf50f18SLuigi Rizzo  * 	os-specific:
2494bf50f18SLuigi Rizzo  * 		linux: we first go through linux_netmap_poll() to adapt
2504bf50f18SLuigi Rizzo  * 		       the FreeBSD interface to the linux one.
2514bf50f18SLuigi Rizzo  *
2524bf50f18SLuigi Rizzo  *
2534bf50f18SLuigi Rizzo  *  ----  VALE_CTL -----
2544bf50f18SLuigi Rizzo  *
2554bf50f18SLuigi Rizzo  *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
2564bf50f18SLuigi Rizzo  *  nr_cmd in the nmreq structure. These subcommands are handled by
2574bf50f18SLuigi Rizzo  *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
2584bf50f18SLuigi Rizzo  *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
2594bf50f18SLuigi Rizzo  *  subcommands, respectively.
2604bf50f18SLuigi Rizzo  *
2614bf50f18SLuigi Rizzo  *  Any network interface known to the system (including a persistent VALE
2624bf50f18SLuigi Rizzo  *  port) can be attached to a VALE switch by issuing the
2634bf50f18SLuigi Rizzo  *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
2644bf50f18SLuigi Rizzo  *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
2654bf50f18SLuigi Rizzo  *  attachment of other interfaces, instead, requires the creation of a
2664bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
2674bf50f18SLuigi Rizzo  *  netmap mode. This may require the creation of a netmap_generic_adapter if
2684bf50f18SLuigi Rizzo  *  we have no native support for the interface, or if generic adapters have
2694bf50f18SLuigi Rizzo  *  been forced by sysctl.
2704bf50f18SLuigi Rizzo  *
2714bf50f18SLuigi Rizzo  *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
2724bf50f18SLuigi Rizzo  *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
2734bf50f18SLuigi Rizzo  *  callback.  In the case of the bwrap, the callback creates the
2744bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  The initialization of the bwrap is then
2754bf50f18SLuigi Rizzo  *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
2764bf50f18SLuigi Rizzo  *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
2774bf50f18SLuigi Rizzo  *  A generic adapter for the wrapped ifp will be created if needed, when
2784bf50f18SLuigi Rizzo  *  netmap_get_bdg_na() calls netmap_get_hw_na().
2794bf50f18SLuigi Rizzo  *
2804bf50f18SLuigi Rizzo  *
2814bf50f18SLuigi Rizzo  *  ---- DATAPATHS -----
2824bf50f18SLuigi Rizzo  *
2834bf50f18SLuigi Rizzo  *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
2844bf50f18SLuigi Rizzo  *
2854bf50f18SLuigi Rizzo  *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
2864bf50f18SLuigi Rizzo  *
2874bf50f18SLuigi Rizzo  *    - tx from netmap userspace:
2884bf50f18SLuigi Rizzo  *	 concurrently:
2894bf50f18SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
2904bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_txsync()
2914bf50f18SLuigi Rizzo  *           2) device interrupt handler
2924bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
2934bf50f18SLuigi Rizzo  *    - rx from netmap userspace:
2944bf50f18SLuigi Rizzo  *       concurrently:
2954bf50f18SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
2964bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_rxsync()
2974bf50f18SLuigi Rizzo  *           2) device interrupt handler
2984bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
299847bf383SLuigi Rizzo  *    - rx from host stack
3004bf50f18SLuigi Rizzo  *       concurrently:
3014bf50f18SLuigi Rizzo  *           1) host stack
3024bf50f18SLuigi Rizzo  *                netmap_transmit()
3034bf50f18SLuigi Rizzo  *                  na->nm_notify  == netmap_notify()
3044bf50f18SLuigi Rizzo  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
305*37e3a6d3SLuigi Rizzo  *                kring->nm_sync() == netmap_rxsync_from_host
3064bf50f18SLuigi Rizzo  *                  netmap_rxsync_from_host(na, NULL, NULL)
3074bf50f18SLuigi Rizzo  *    - tx to host stack
3084bf50f18SLuigi Rizzo  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
309*37e3a6d3SLuigi Rizzo  *             kring->nm_sync() == netmap_txsync_to_host
3104bf50f18SLuigi Rizzo  *               netmap_txsync_to_host(na)
311*37e3a6d3SLuigi Rizzo  *                 nm_os_send_up()
312*37e3a6d3SLuigi Rizzo  *                   FreeBSD: na->if_input() == ether_input()
3134bf50f18SLuigi Rizzo  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
3144bf50f18SLuigi Rizzo  *
3154bf50f18SLuigi Rizzo  *
3164bf50f18SLuigi Rizzo  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
3174bf50f18SLuigi Rizzo  *
318847bf383SLuigi Rizzo  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
319847bf383SLuigi Rizzo  *
320847bf383SLuigi Rizzo  *    - tx from netmap userspace:
321847bf383SLuigi Rizzo  *       concurrently:
322847bf383SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
323847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_txsync()
324*37e3a6d3SLuigi Rizzo  *                   nm_os_generic_xmit_frame()
325847bf383SLuigi Rizzo  *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
326*37e3a6d3SLuigi Rizzo  *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
327*37e3a6d3SLuigi Rizzo  *                               gna->save_start_xmit == orig. dev. start_xmit
328847bf383SLuigi Rizzo  *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
329847bf383SLuigi Rizzo  *           2) generic_mbuf_destructor()
330847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
331847bf383SLuigi Rizzo  *    - rx from netmap userspace:
332847bf383SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
333847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_rxsync()
334847bf383SLuigi Rizzo  *                   mbq_safe_dequeue()
335847bf383SLuigi Rizzo  *           2) device driver
336847bf383SLuigi Rizzo  *               generic_rx_handler()
337847bf383SLuigi Rizzo  *                   mbq_safe_enqueue()
338847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
339*37e3a6d3SLuigi Rizzo  *    - rx from host stack
340*37e3a6d3SLuigi Rizzo  *        FreeBSD: same as native
341*37e3a6d3SLuigi Rizzo  *        Linux: same as native except:
342847bf383SLuigi Rizzo  *           1) host stack
343*37e3a6d3SLuigi Rizzo  *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
344*37e3a6d3SLuigi Rizzo  *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
345847bf383SLuigi Rizzo  *                       netmap_transmit()
346847bf383SLuigi Rizzo  *                           na->nm_notify() == netmap_notify()
347*37e3a6d3SLuigi Rizzo  *    - tx to host stack (same as native):
3484bf50f18SLuigi Rizzo  *
3494bf50f18SLuigi Rizzo  *
350847bf383SLuigi Rizzo  *                           -= VALE =-
3514bf50f18SLuigi Rizzo  *
352847bf383SLuigi Rizzo  *   INCOMING:
3534bf50f18SLuigi Rizzo  *
354847bf383SLuigi Rizzo  *      - VALE ports:
355847bf383SLuigi Rizzo  *          ioctl(NIOCTXSYNC)/netmap_poll() in process context
356847bf383SLuigi Rizzo  *              kring->nm_sync() == netmap_vp_txsync()
3574bf50f18SLuigi Rizzo  *
358847bf383SLuigi Rizzo  *      - system device with native support:
359847bf383SLuigi Rizzo  *         from cable:
360847bf383SLuigi Rizzo  *             interrupt
361847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
362847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
363847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
364847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
365847bf383SLuigi Rizzo  *         from host stack:
366847bf383SLuigi Rizzo  *             netmap_transmit()
367847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
368*37e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
369847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3704bf50f18SLuigi Rizzo  *
371847bf383SLuigi Rizzo  *      - system device with generic support:
372847bf383SLuigi Rizzo  *         from device driver:
373847bf383SLuigi Rizzo  *            generic_rx_handler()
374847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
375847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
376847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
377847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
378847bf383SLuigi Rizzo  *         from host stack:
379847bf383SLuigi Rizzo  *            netmap_transmit()
380847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
381*37e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
382847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3834bf50f18SLuigi Rizzo  *
384847bf383SLuigi Rizzo  *   (all cases) --> nm_bdg_flush()
385847bf383SLuigi Rizzo  *                      dest_na->nm_notify() == (see below)
3864bf50f18SLuigi Rizzo  *
387847bf383SLuigi Rizzo  *   OUTGOING:
3884bf50f18SLuigi Rizzo  *
389847bf383SLuigi Rizzo  *      - VALE ports:
390847bf383SLuigi Rizzo  *         concurrently:
391847bf383SLuigi Rizzo  *             1) ioctlNIOCRXSYNC)/netmap_poll() in process context
392847bf383SLuigi Rizzo  *                    kring->nm_sync() == netmap_vp_rxsync()
393847bf383SLuigi Rizzo  *             2) from nm_bdg_flush()
394847bf383SLuigi Rizzo  *                    na->nm_notify() == netmap_notify()
3954bf50f18SLuigi Rizzo  *
396847bf383SLuigi Rizzo  *      - system device with native support:
397847bf383SLuigi Rizzo  *          to cable:
398847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
399847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
400847bf383SLuigi Rizzo  *                 kring->nm_sync() == DEVICE_netmap_txsync()
401847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
402847bf383SLuigi Rizzo  *          to host stack:
403847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
404*37e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
405847bf383SLuigi Rizzo  *                 netmap_vp_rxsync_locked()
4064bf50f18SLuigi Rizzo  *
407847bf383SLuigi Rizzo  *      - system device with generic adapter:
408847bf383SLuigi Rizzo  *          to device driver:
409847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
410847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
411847bf383SLuigi Rizzo  *                 kring->nm_sync() == generic_netmap_txsync()
412847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
413847bf383SLuigi Rizzo  *          to host stack:
414847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
415*37e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
416847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
4174bf50f18SLuigi Rizzo  *
4184bf50f18SLuigi Rizzo  */
4194bf50f18SLuigi Rizzo 
420ce3ee1e7SLuigi Rizzo /*
421ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
422ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
423ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
424ce3ee1e7SLuigi Rizzo  */
42501c7d25fSLuigi Rizzo 
426ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
42768b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
42868b8534bSLuigi Rizzo #include <sys/types.h>
42968b8534bSLuigi Rizzo #include <sys/errno.h>
43068b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
43168b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
432f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
43389e3fd52SLuigi Rizzo #include <sys/filio.h>	/* FIONBIO */
43468b8534bSLuigi Rizzo #include <sys/sockio.h>
43568b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
43668b8534bSLuigi Rizzo #include <sys/malloc.h>
43768b8534bSLuigi Rizzo #include <sys/poll.h>
43889f6b863SAttilio Rao #include <sys/rwlock.h>
43968b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
44068b8534bSLuigi Rizzo #include <sys/selinfo.h>
44168b8534bSLuigi Rizzo #include <sys/sysctl.h>
442339f59c0SGleb Smirnoff #include <sys/jail.h>
443339f59c0SGleb Smirnoff #include <net/vnet.h>
44468b8534bSLuigi Rizzo #include <net/if.h>
44576039bc8SGleb Smirnoff #include <net/if_var.h>
44668b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
44768b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
448ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
449ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
45068b8534bSLuigi Rizzo 
45168b8534bSLuigi Rizzo 
452ce3ee1e7SLuigi Rizzo #elif defined(linux)
453ce3ee1e7SLuigi Rizzo 
454ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
455ce3ee1e7SLuigi Rizzo 
456ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
457ce3ee1e7SLuigi Rizzo 
458ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
459ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
460ce3ee1e7SLuigi Rizzo 
461*37e3a6d3SLuigi Rizzo #elif defined (_WIN32)
462*37e3a6d3SLuigi Rizzo 
463*37e3a6d3SLuigi Rizzo #include "win_glue.h"
464*37e3a6d3SLuigi Rizzo 
465ce3ee1e7SLuigi Rizzo #else
466ce3ee1e7SLuigi Rizzo 
467ce3ee1e7SLuigi Rizzo #error	Unsupported platform
468ce3ee1e7SLuigi Rizzo 
469ce3ee1e7SLuigi Rizzo #endif /* unsupported */
470ce3ee1e7SLuigi Rizzo 
471ce3ee1e7SLuigi Rizzo /*
472ce3ee1e7SLuigi Rizzo  * common headers
473ce3ee1e7SLuigi Rizzo  */
4740b8ed8e0SLuigi Rizzo #include <net/netmap.h>
4750b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
476ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
4770b8ed8e0SLuigi Rizzo 
478ce3ee1e7SLuigi Rizzo 
4795819da83SLuigi Rizzo /* user-controlled variables */
4805819da83SLuigi Rizzo int netmap_verbose;
4815819da83SLuigi Rizzo 
4825819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
4835819da83SLuigi Rizzo int netmap_mitigate = 1;
484c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
485f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
4864bf50f18SLuigi Rizzo int netmap_adaptive_io = 0;
487f196ce38SLuigi Rizzo int netmap_flags = 0;	/* debug flags */
488*37e3a6d3SLuigi Rizzo static int netmap_fwd = 0;	/* force transparent mode */
489f196ce38SLuigi Rizzo 
490f9790aebSLuigi Rizzo /*
491f9790aebSLuigi Rizzo  * netmap_admode selects the netmap mode to use.
492f9790aebSLuigi Rizzo  * Invalid values are reset to NETMAP_ADMODE_BEST
493f9790aebSLuigi Rizzo  */
494f9790aebSLuigi Rizzo enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
495f9790aebSLuigi Rizzo 	NETMAP_ADMODE_NATIVE,	/* either native or none */
496f9790aebSLuigi Rizzo 	NETMAP_ADMODE_GENERIC,	/* force generic */
497f9790aebSLuigi Rizzo 	NETMAP_ADMODE_LAST };
498f9790aebSLuigi Rizzo static int netmap_admode = NETMAP_ADMODE_BEST;
499f9790aebSLuigi Rizzo 
500*37e3a6d3SLuigi Rizzo /* netmap_generic_mit controls mitigation of RX notifications for
501*37e3a6d3SLuigi Rizzo  * the generic netmap adapter. The value is a time interval in
502*37e3a6d3SLuigi Rizzo  * nanoseconds. */
503*37e3a6d3SLuigi Rizzo int netmap_generic_mit = 100*1000;
504*37e3a6d3SLuigi Rizzo 
505*37e3a6d3SLuigi Rizzo /* We use by default netmap-aware qdiscs with generic netmap adapters,
506*37e3a6d3SLuigi Rizzo  * even if there can be a little performance hit with hardware NICs.
507*37e3a6d3SLuigi Rizzo  * However, using the qdisc is the safer approach, for two reasons:
508*37e3a6d3SLuigi Rizzo  * 1) it prevents non-fifo qdiscs to break the TX notification
509*37e3a6d3SLuigi Rizzo  *    scheme, which is based on mbuf destructors when txqdisc is
510*37e3a6d3SLuigi Rizzo  *    not used.
511*37e3a6d3SLuigi Rizzo  * 2) it makes it possible to transmit over software devices that
512*37e3a6d3SLuigi Rizzo  *    change skb->dev, like bridge, veth, ...
513*37e3a6d3SLuigi Rizzo  *
514*37e3a6d3SLuigi Rizzo  * Anyway users looking for the best performance should
515*37e3a6d3SLuigi Rizzo  * use native adapters.
516*37e3a6d3SLuigi Rizzo  */
517*37e3a6d3SLuigi Rizzo int netmap_generic_txqdisc = 1;
518*37e3a6d3SLuigi Rizzo 
519*37e3a6d3SLuigi Rizzo /* Default number of slots and queues for generic adapters. */
520*37e3a6d3SLuigi Rizzo int netmap_generic_ringsize = 1024;
521*37e3a6d3SLuigi Rizzo int netmap_generic_rings = 1;
522*37e3a6d3SLuigi Rizzo 
523*37e3a6d3SLuigi Rizzo /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
524*37e3a6d3SLuigi Rizzo int ptnet_vnet_hdr = 1;
525*37e3a6d3SLuigi Rizzo 
526*37e3a6d3SLuigi Rizzo /*
527*37e3a6d3SLuigi Rizzo  * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
528*37e3a6d3SLuigi Rizzo  * in some other operating systems
529*37e3a6d3SLuigi Rizzo  */
530*37e3a6d3SLuigi Rizzo SYSBEGIN(main_init);
531*37e3a6d3SLuigi Rizzo 
532*37e3a6d3SLuigi Rizzo SYSCTL_DECL(_dev_netmap);
533*37e3a6d3SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
534*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
535*37e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
536*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
537*37e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
538*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
539*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
540*37e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
541*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
542*37e3a6d3SLuigi Rizzo     &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
543*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
544*37e3a6d3SLuigi Rizzo     &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
545f9790aebSLuigi Rizzo 
546f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
547091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
548f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
549f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
550f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
551f0ea3689SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
552*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
553*37e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
554*37e3a6d3SLuigi Rizzo 
555*37e3a6d3SLuigi Rizzo SYSEND;
556f196ce38SLuigi Rizzo 
557ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
558ce3ee1e7SLuigi Rizzo 
55917885a7bSLuigi Rizzo /*
56017885a7bSLuigi Rizzo  * mark the ring as stopped, and run through the locks
56117885a7bSLuigi Rizzo  * to make sure other users get to see it.
562*37e3a6d3SLuigi Rizzo  * stopped must be either NR_KR_STOPPED (for unbounded stop)
563*37e3a6d3SLuigi Rizzo  * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
56417885a7bSLuigi Rizzo  */
5654bf50f18SLuigi Rizzo static void
566*37e3a6d3SLuigi Rizzo netmap_disable_ring(struct netmap_kring *kr, int stopped)
567ce3ee1e7SLuigi Rizzo {
568*37e3a6d3SLuigi Rizzo 	nm_kr_stop(kr, stopped);
569*37e3a6d3SLuigi Rizzo 	// XXX check if nm_kr_stop is sufficient
570ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
571ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
572ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
573ce3ee1e7SLuigi Rizzo }
574ce3ee1e7SLuigi Rizzo 
575847bf383SLuigi Rizzo /* stop or enable a single ring */
5764bf50f18SLuigi Rizzo void
577847bf383SLuigi Rizzo netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
5784bf50f18SLuigi Rizzo {
5794bf50f18SLuigi Rizzo 	if (stopped)
580*37e3a6d3SLuigi Rizzo 		netmap_disable_ring(NMR(na, t) + ring_id, stopped);
5814bf50f18SLuigi Rizzo 	else
582847bf383SLuigi Rizzo 		NMR(na, t)[ring_id].nkr_stopped = 0;
5834bf50f18SLuigi Rizzo }
5844bf50f18SLuigi Rizzo 
585f9790aebSLuigi Rizzo 
58689cc2556SLuigi Rizzo /* stop or enable all the rings of na */
5874bf50f18SLuigi Rizzo void
5884bf50f18SLuigi Rizzo netmap_set_all_rings(struct netmap_adapter *na, int stopped)
589ce3ee1e7SLuigi Rizzo {
590ce3ee1e7SLuigi Rizzo 	int i;
591847bf383SLuigi Rizzo 	enum txrx t;
592ce3ee1e7SLuigi Rizzo 
5934bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
594ce3ee1e7SLuigi Rizzo 		return;
595ce3ee1e7SLuigi Rizzo 
596847bf383SLuigi Rizzo 	for_rx_tx(t) {
597847bf383SLuigi Rizzo 		for (i = 0; i < netmap_real_rings(na, t); i++) {
598847bf383SLuigi Rizzo 			netmap_set_ring(na, i, t, stopped);
599ce3ee1e7SLuigi Rizzo 		}
600ce3ee1e7SLuigi Rizzo 	}
601ce3ee1e7SLuigi Rizzo }
602ce3ee1e7SLuigi Rizzo 
60389cc2556SLuigi Rizzo /*
60489cc2556SLuigi Rizzo  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
60589cc2556SLuigi Rizzo  * to finish and prevents any new one from starting.  Call this before turning
606ddb13598SKevin Lo  * netmap mode off, or before removing the hardware rings (e.g., on module
607*37e3a6d3SLuigi Rizzo  * onload).
60889cc2556SLuigi Rizzo  */
609f9790aebSLuigi Rizzo void
610f9790aebSLuigi Rizzo netmap_disable_all_rings(struct ifnet *ifp)
611f9790aebSLuigi Rizzo {
612*37e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
613*37e3a6d3SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
614*37e3a6d3SLuigi Rizzo 	}
615f9790aebSLuigi Rizzo }
616f9790aebSLuigi Rizzo 
61789cc2556SLuigi Rizzo /*
61889cc2556SLuigi Rizzo  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
61989cc2556SLuigi Rizzo  * adapter's rings In linux drivers, this should be placed near each
62089cc2556SLuigi Rizzo  * napi_enable().
62189cc2556SLuigi Rizzo  */
622f9790aebSLuigi Rizzo void
623f9790aebSLuigi Rizzo netmap_enable_all_rings(struct ifnet *ifp)
624f9790aebSLuigi Rizzo {
625*37e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
6264bf50f18SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
627f9790aebSLuigi Rizzo 	}
628*37e3a6d3SLuigi Rizzo }
629f9790aebSLuigi Rizzo 
630*37e3a6d3SLuigi Rizzo void
631*37e3a6d3SLuigi Rizzo netmap_make_zombie(struct ifnet *ifp)
632*37e3a6d3SLuigi Rizzo {
633*37e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
634*37e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
635*37e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, NM_KR_LOCKED);
636*37e3a6d3SLuigi Rizzo 		na->na_flags |= NAF_ZOMBIE;
637*37e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, 0);
638*37e3a6d3SLuigi Rizzo 	}
639*37e3a6d3SLuigi Rizzo }
640*37e3a6d3SLuigi Rizzo 
641*37e3a6d3SLuigi Rizzo void
642*37e3a6d3SLuigi Rizzo netmap_undo_zombie(struct ifnet *ifp)
643*37e3a6d3SLuigi Rizzo {
644*37e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
645*37e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
646*37e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_ZOMBIE) {
647*37e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, NM_KR_LOCKED);
648*37e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_ZOMBIE;
649*37e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, 0);
650*37e3a6d3SLuigi Rizzo 		}
651*37e3a6d3SLuigi Rizzo 	}
652*37e3a6d3SLuigi Rizzo }
653f9790aebSLuigi Rizzo 
654ce3ee1e7SLuigi Rizzo /*
655ce3ee1e7SLuigi Rizzo  * generic bound_checking function
656ce3ee1e7SLuigi Rizzo  */
657ce3ee1e7SLuigi Rizzo u_int
658ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
659ce3ee1e7SLuigi Rizzo {
660ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
661ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
662ce3ee1e7SLuigi Rizzo 
663ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
664ce3ee1e7SLuigi Rizzo 		dflt = lo;
665ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
666ce3ee1e7SLuigi Rizzo 		dflt = hi;
667ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
668ce3ee1e7SLuigi Rizzo 		*v = dflt;
669ce3ee1e7SLuigi Rizzo 		op = "Bump";
670ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
671ce3ee1e7SLuigi Rizzo 		*v = hi;
672ce3ee1e7SLuigi Rizzo 		op = "Clamp";
673ce3ee1e7SLuigi Rizzo 	}
674ce3ee1e7SLuigi Rizzo 	if (op && msg)
675ce3ee1e7SLuigi Rizzo 		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
676ce3ee1e7SLuigi Rizzo 	return *v;
677ce3ee1e7SLuigi Rizzo }
678ce3ee1e7SLuigi Rizzo 
679f9790aebSLuigi Rizzo 
680ce3ee1e7SLuigi Rizzo /*
681ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
682ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
683ce3ee1e7SLuigi Rizzo  */
684ce3ee1e7SLuigi Rizzo const char *
685ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
686ce3ee1e7SLuigi Rizzo {
687ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
688ce3ee1e7SLuigi Rizzo 	int i, j, i0;
689ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
690ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
691ce3ee1e7SLuigi Rizzo 
692ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
693ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
694ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
695ce3ee1e7SLuigi Rizzo 	if (!dst)
696ce3ee1e7SLuigi Rizzo 		dst = _dst;
697ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
698ce3ee1e7SLuigi Rizzo 		lim = len;
699ce3ee1e7SLuigi Rizzo 	o = dst;
700ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
701ce3ee1e7SLuigi Rizzo 	o += strlen(o);
702ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
703ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
704ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
705ce3ee1e7SLuigi Rizzo 		o += strlen(o);
706ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
707ce3ee1e7SLuigi Rizzo 		i0 = i;
708ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
709ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
710ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
711ce3ee1e7SLuigi Rizzo 		}
712ce3ee1e7SLuigi Rizzo 		i = i0;
713ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
714ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
715ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
716ce3ee1e7SLuigi Rizzo 		o += j+49;
717ce3ee1e7SLuigi Rizzo 	}
718ce3ee1e7SLuigi Rizzo 	*o = '\0';
719ce3ee1e7SLuigi Rizzo #undef P_HI
720ce3ee1e7SLuigi Rizzo #undef P_LO
721ce3ee1e7SLuigi Rizzo #undef P_C
722ce3ee1e7SLuigi Rizzo 	return dst;
723ce3ee1e7SLuigi Rizzo }
724f196ce38SLuigi Rizzo 
725f18be576SLuigi Rizzo 
726ae10d1afSLuigi Rizzo /*
727ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
728ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
729ae10d1afSLuigi Rizzo  */
73089cc2556SLuigi Rizzo /* call with NMG_LOCK held */
731f9790aebSLuigi Rizzo int
732ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
733ae10d1afSLuigi Rizzo {
734ae10d1afSLuigi Rizzo 	u_int txr, txd, rxr, rxd;
735ae10d1afSLuigi Rizzo 
736ae10d1afSLuigi Rizzo 	txr = txd = rxr = rxd = 0;
7376641c68bSLuigi Rizzo 	if (na->nm_config == NULL ||
738847bf383SLuigi Rizzo 	    na->nm_config(na, &txr, &txd, &rxr, &rxd))
739847bf383SLuigi Rizzo 	{
740ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
741ae10d1afSLuigi Rizzo 		txr = na->num_tx_rings;
742ae10d1afSLuigi Rizzo 		txd = na->num_tx_desc;
743ae10d1afSLuigi Rizzo 		rxr = na->num_rx_rings;
744ae10d1afSLuigi Rizzo 		rxd = na->num_rx_desc;
745ae10d1afSLuigi Rizzo 	}
746ae10d1afSLuigi Rizzo 
747ae10d1afSLuigi Rizzo 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
748ae10d1afSLuigi Rizzo 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
749ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
750f9790aebSLuigi Rizzo 	if (netmap_verbose || na->active_fds > 0) {
751ae10d1afSLuigi Rizzo 		D("stored config %s: txring %d x %d, rxring %d x %d",
7524bf50f18SLuigi Rizzo 			na->name,
753ae10d1afSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
754ae10d1afSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
755ae10d1afSLuigi Rizzo 		D("new config %s: txring %d x %d, rxring %d x %d",
7564bf50f18SLuigi Rizzo 			na->name, txr, txd, rxr, rxd);
757ae10d1afSLuigi Rizzo 	}
758f9790aebSLuigi Rizzo 	if (na->active_fds == 0) {
759ae10d1afSLuigi Rizzo 		D("configuration changed (but fine)");
760ae10d1afSLuigi Rizzo 		na->num_tx_rings = txr;
761ae10d1afSLuigi Rizzo 		na->num_tx_desc = txd;
762ae10d1afSLuigi Rizzo 		na->num_rx_rings = rxr;
763ae10d1afSLuigi Rizzo 		na->num_rx_desc = rxd;
764ae10d1afSLuigi Rizzo 		return 0;
765ae10d1afSLuigi Rizzo 	}
766ae10d1afSLuigi Rizzo 	D("configuration changed while active, this is bad...");
767ae10d1afSLuigi Rizzo 	return 1;
768ae10d1afSLuigi Rizzo }
769ae10d1afSLuigi Rizzo 
770*37e3a6d3SLuigi Rizzo /* nm_sync callbacks for the host rings */
771*37e3a6d3SLuigi Rizzo static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
772*37e3a6d3SLuigi Rizzo static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
773f0ea3689SLuigi Rizzo 
774f0ea3689SLuigi Rizzo /* create the krings array and initialize the fields common to all adapters.
775f0ea3689SLuigi Rizzo  * The array layout is this:
776f0ea3689SLuigi Rizzo  *
777f0ea3689SLuigi Rizzo  *                    +----------+
778f0ea3689SLuigi Rizzo  * na->tx_rings ----->|          | \
779f0ea3689SLuigi Rizzo  *                    |          |  } na->num_tx_ring
780f0ea3689SLuigi Rizzo  *                    |          | /
781f0ea3689SLuigi Rizzo  *                    +----------+
782f0ea3689SLuigi Rizzo  *                    |          |    host tx kring
783f0ea3689SLuigi Rizzo  * na->rx_rings ----> +----------+
784f0ea3689SLuigi Rizzo  *                    |          | \
785f0ea3689SLuigi Rizzo  *                    |          |  } na->num_rx_rings
786f0ea3689SLuigi Rizzo  *                    |          | /
787f0ea3689SLuigi Rizzo  *                    +----------+
788f0ea3689SLuigi Rizzo  *                    |          |    host rx kring
789f0ea3689SLuigi Rizzo  *                    +----------+
790f0ea3689SLuigi Rizzo  * na->tailroom ----->|          | \
791f0ea3689SLuigi Rizzo  *                    |          |  } tailroom bytes
792f0ea3689SLuigi Rizzo  *                    |          | /
793f0ea3689SLuigi Rizzo  *                    +----------+
794f0ea3689SLuigi Rizzo  *
795f0ea3689SLuigi Rizzo  * Note: for compatibility, host krings are created even when not needed.
796f0ea3689SLuigi Rizzo  * The tailroom space is currently used by vale ports for allocating leases.
797f0ea3689SLuigi Rizzo  */
79889cc2556SLuigi Rizzo /* call with NMG_LOCK held */
799f9790aebSLuigi Rizzo int
800f0ea3689SLuigi Rizzo netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
801f9790aebSLuigi Rizzo {
802f9790aebSLuigi Rizzo 	u_int i, len, ndesc;
803f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
804847bf383SLuigi Rizzo 	u_int n[NR_TXRX];
805847bf383SLuigi Rizzo 	enum txrx t;
806f9790aebSLuigi Rizzo 
807f0ea3689SLuigi Rizzo 	/* account for the (possibly fake) host rings */
808847bf383SLuigi Rizzo 	n[NR_TX] = na->num_tx_rings + 1;
809847bf383SLuigi Rizzo 	n[NR_RX] = na->num_rx_rings + 1;
810f0ea3689SLuigi Rizzo 
811847bf383SLuigi Rizzo 	len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
812f9790aebSLuigi Rizzo 
813f9790aebSLuigi Rizzo 	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
814f9790aebSLuigi Rizzo 	if (na->tx_rings == NULL) {
815f9790aebSLuigi Rizzo 		D("Cannot allocate krings");
816f9790aebSLuigi Rizzo 		return ENOMEM;
817f9790aebSLuigi Rizzo 	}
818847bf383SLuigi Rizzo 	na->rx_rings = na->tx_rings + n[NR_TX];
819f9790aebSLuigi Rizzo 
82017885a7bSLuigi Rizzo 	/*
82117885a7bSLuigi Rizzo 	 * All fields in krings are 0 except the one initialized below.
82217885a7bSLuigi Rizzo 	 * but better be explicit on important kring fields.
82317885a7bSLuigi Rizzo 	 */
824847bf383SLuigi Rizzo 	for_rx_tx(t) {
825847bf383SLuigi Rizzo 		ndesc = nma_get_ndesc(na, t);
826847bf383SLuigi Rizzo 		for (i = 0; i < n[t]; i++) {
827847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
828f9790aebSLuigi Rizzo 			bzero(kring, sizeof(*kring));
829f9790aebSLuigi Rizzo 			kring->na = na;
83017885a7bSLuigi Rizzo 			kring->ring_id = i;
831847bf383SLuigi Rizzo 			kring->tx = t;
832f9790aebSLuigi Rizzo 			kring->nkr_num_slots = ndesc;
833*37e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
834*37e3a6d3SLuigi Rizzo 			kring->nr_pending_mode = NKR_NETMAP_OFF;
835847bf383SLuigi Rizzo 			if (i < nma_get_nrings(na, t)) {
836847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
837*37e3a6d3SLuigi Rizzo 			} else {
838847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ?
839*37e3a6d3SLuigi Rizzo 						netmap_txsync_to_host:
840*37e3a6d3SLuigi Rizzo 						netmap_rxsync_from_host);
841f0ea3689SLuigi Rizzo 			}
842847bf383SLuigi Rizzo 			kring->nm_notify = na->nm_notify;
843847bf383SLuigi Rizzo 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
844f9790aebSLuigi Rizzo 			/*
84517885a7bSLuigi Rizzo 			 * IMPORTANT: Always keep one slot empty.
846f9790aebSLuigi Rizzo 			 */
847847bf383SLuigi Rizzo 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
848847bf383SLuigi Rizzo 			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
849847bf383SLuigi Rizzo 					nm_txrx2str(t), i);
850f0ea3689SLuigi Rizzo 			ND("ktx %s h %d c %d t %d",
851f0ea3689SLuigi Rizzo 				kring->name, kring->rhead, kring->rcur, kring->rtail);
852847bf383SLuigi Rizzo 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
853*37e3a6d3SLuigi Rizzo 			nm_os_selinfo_init(&kring->si);
854f9790aebSLuigi Rizzo 		}
855*37e3a6d3SLuigi Rizzo 		nm_os_selinfo_init(&na->si[t]);
856f0ea3689SLuigi Rizzo 	}
857f9790aebSLuigi Rizzo 
858847bf383SLuigi Rizzo 	na->tailroom = na->rx_rings + n[NR_RX];
859f9790aebSLuigi Rizzo 
860f9790aebSLuigi Rizzo 	return 0;
861f9790aebSLuigi Rizzo }
862f9790aebSLuigi Rizzo 
863f9790aebSLuigi Rizzo 
864f0ea3689SLuigi Rizzo /* undo the actions performed by netmap_krings_create */
86589cc2556SLuigi Rizzo /* call with NMG_LOCK held */
866f9790aebSLuigi Rizzo void
867f9790aebSLuigi Rizzo netmap_krings_delete(struct netmap_adapter *na)
868f9790aebSLuigi Rizzo {
869f0ea3689SLuigi Rizzo 	struct netmap_kring *kring = na->tx_rings;
870847bf383SLuigi Rizzo 	enum txrx t;
871847bf383SLuigi Rizzo 
872847bf383SLuigi Rizzo 	for_rx_tx(t)
873*37e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&na->si[t]);
874f9790aebSLuigi Rizzo 
875f0ea3689SLuigi Rizzo 	/* we rely on the krings layout described above */
876f0ea3689SLuigi Rizzo 	for ( ; kring != na->tailroom; kring++) {
877f0ea3689SLuigi Rizzo 		mtx_destroy(&kring->q_lock);
878*37e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&kring->si);
879f9790aebSLuigi Rizzo 	}
880f9790aebSLuigi Rizzo 	free(na->tx_rings, M_DEVBUF);
881f9790aebSLuigi Rizzo 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
882f9790aebSLuigi Rizzo }
883f9790aebSLuigi Rizzo 
884f9790aebSLuigi Rizzo 
88517885a7bSLuigi Rizzo /*
88617885a7bSLuigi Rizzo  * Destructor for NIC ports. They also have an mbuf queue
88717885a7bSLuigi Rizzo  * on the rings connected to the host so we need to purge
88817885a7bSLuigi Rizzo  * them first.
88917885a7bSLuigi Rizzo  */
89089cc2556SLuigi Rizzo /* call with NMG_LOCK held */
891*37e3a6d3SLuigi Rizzo void
89217885a7bSLuigi Rizzo netmap_hw_krings_delete(struct netmap_adapter *na)
89317885a7bSLuigi Rizzo {
89417885a7bSLuigi Rizzo 	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
89517885a7bSLuigi Rizzo 
89617885a7bSLuigi Rizzo 	ND("destroy sw mbq with len %d", mbq_len(q));
89717885a7bSLuigi Rizzo 	mbq_purge(q);
898*37e3a6d3SLuigi Rizzo 	mbq_safe_fini(q);
89917885a7bSLuigi Rizzo 	netmap_krings_delete(na);
90017885a7bSLuigi Rizzo }
90117885a7bSLuigi Rizzo 
90217885a7bSLuigi Rizzo 
903f9790aebSLuigi Rizzo 
90468b8534bSLuigi Rizzo /*
905847bf383SLuigi Rizzo  * Undo everything that was done in netmap_do_regif(). In particular,
906847bf383SLuigi Rizzo  * call nm_register(ifp,0) to stop netmap mode on the interface and
9074bf50f18SLuigi Rizzo  * revert to normal operation.
90868b8534bSLuigi Rizzo  */
909ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
910847bf383SLuigi Rizzo static void netmap_unset_ringid(struct netmap_priv_d *);
911*37e3a6d3SLuigi Rizzo static void netmap_krings_put(struct netmap_priv_d *);
912*37e3a6d3SLuigi Rizzo void
913847bf383SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv)
91468b8534bSLuigi Rizzo {
915f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
91668b8534bSLuigi Rizzo 
917ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
918f9790aebSLuigi Rizzo 	na->active_fds--;
919*37e3a6d3SLuigi Rizzo 	/* unset nr_pending_mode and possibly release exclusive mode */
920*37e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
921847bf383SLuigi Rizzo 
922847bf383SLuigi Rizzo #ifdef	WITH_MONITOR
923*37e3a6d3SLuigi Rizzo 	/* XXX check whether we have to do something with monitor
924*37e3a6d3SLuigi Rizzo 	 * when rings change nr_mode. */
925*37e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {
926847bf383SLuigi Rizzo 		/* walk through all the rings and tell any monitor
927847bf383SLuigi Rizzo 		 * that the port is going to exit netmap mode
928847bf383SLuigi Rizzo 		 */
929847bf383SLuigi Rizzo 		netmap_monitor_stop(na);
930*37e3a6d3SLuigi Rizzo 	}
931847bf383SLuigi Rizzo #endif
932*37e3a6d3SLuigi Rizzo 
933*37e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
934*37e3a6d3SLuigi Rizzo 		na->nm_register(na, 0);
935*37e3a6d3SLuigi Rizzo 	}
936*37e3a6d3SLuigi Rizzo 
937*37e3a6d3SLuigi Rizzo 	/* delete rings and buffers that are no longer needed */
938*37e3a6d3SLuigi Rizzo 	netmap_mem_rings_delete(na);
939*37e3a6d3SLuigi Rizzo 
940*37e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {	/* last instance */
94168b8534bSLuigi Rizzo 		/*
942*37e3a6d3SLuigi Rizzo 		 * (TO CHECK) We enter here
943f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
944f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
945f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
946ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
947ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
948ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
949ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
950ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
951ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
952ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
953ce3ee1e7SLuigi Rizzo 		 * syscall is running.
95468b8534bSLuigi Rizzo 		 */
955*37e3a6d3SLuigi Rizzo 		if (netmap_verbose)
956*37e3a6d3SLuigi Rizzo 			D("deleting last instance for %s", na->name);
957*37e3a6d3SLuigi Rizzo 
958*37e3a6d3SLuigi Rizzo                 if (nm_netmap_on(na)) {
959*37e3a6d3SLuigi Rizzo                     D("BUG: netmap on while going to delete the krings");
960*37e3a6d3SLuigi Rizzo                 }
961*37e3a6d3SLuigi Rizzo 
962f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
96368b8534bSLuigi Rizzo 	}
964*37e3a6d3SLuigi Rizzo 
965847bf383SLuigi Rizzo 	/* possibily decrement counter of tx_si/rx_si users */
966847bf383SLuigi Rizzo 	netmap_unset_ringid(priv);
967f9790aebSLuigi Rizzo 	/* delete the nifp */
968847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, priv->np_nifp);
969847bf383SLuigi Rizzo 	/* drop the allocator */
970847bf383SLuigi Rizzo 	netmap_mem_deref(na->nm_mem, na);
971847bf383SLuigi Rizzo 	/* mark the priv as unregistered */
972847bf383SLuigi Rizzo 	priv->np_na = NULL;
973847bf383SLuigi Rizzo 	priv->np_nifp = NULL;
9745819da83SLuigi Rizzo }
97568b8534bSLuigi Rizzo 
97689cc2556SLuigi Rizzo /* call with NMG_LOCK held */
977f0ea3689SLuigi Rizzo static __inline int
978847bf383SLuigi Rizzo nm_si_user(struct netmap_priv_d *priv, enum txrx t)
979f0ea3689SLuigi Rizzo {
980f0ea3689SLuigi Rizzo 	return (priv->np_na != NULL &&
981847bf383SLuigi Rizzo 		(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
982f0ea3689SLuigi Rizzo }
983f0ea3689SLuigi Rizzo 
984*37e3a6d3SLuigi Rizzo struct netmap_priv_d*
985*37e3a6d3SLuigi Rizzo netmap_priv_new(void)
986*37e3a6d3SLuigi Rizzo {
987*37e3a6d3SLuigi Rizzo 	struct netmap_priv_d *priv;
988*37e3a6d3SLuigi Rizzo 
989*37e3a6d3SLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
990*37e3a6d3SLuigi Rizzo 			      M_NOWAIT | M_ZERO);
991*37e3a6d3SLuigi Rizzo 	if (priv == NULL)
992*37e3a6d3SLuigi Rizzo 		return NULL;
993*37e3a6d3SLuigi Rizzo 	priv->np_refs = 1;
994*37e3a6d3SLuigi Rizzo 	nm_os_get_module();
995*37e3a6d3SLuigi Rizzo 	return priv;
996*37e3a6d3SLuigi Rizzo }
997*37e3a6d3SLuigi Rizzo 
998ce3ee1e7SLuigi Rizzo /*
9998fd44c93SLuigi Rizzo  * Destructor of the netmap_priv_d, called when the fd is closed
10008fd44c93SLuigi Rizzo  * Action: undo all the things done by NIOCREGIF,
10018fd44c93SLuigi Rizzo  * On FreeBSD we need to track whether there are active mmap()s,
10028fd44c93SLuigi Rizzo  * and we use np_active_mmaps for that. On linux, the field is always 0.
10038fd44c93SLuigi Rizzo  * Return: 1 if we can free priv, 0 otherwise.
100489cc2556SLuigi Rizzo  *
1005ce3ee1e7SLuigi Rizzo  */
100689cc2556SLuigi Rizzo /* call with NMG_LOCK held */
1007*37e3a6d3SLuigi Rizzo void
1008*37e3a6d3SLuigi Rizzo netmap_priv_delete(struct netmap_priv_d *priv)
1009ce3ee1e7SLuigi Rizzo {
1010f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1011ce3ee1e7SLuigi Rizzo 
1012847adfb7SLuigi Rizzo 	/* number of active references to this fd */
10138fd44c93SLuigi Rizzo 	if (--priv->np_refs > 0) {
1014*37e3a6d3SLuigi Rizzo 		return;
1015ce3ee1e7SLuigi Rizzo 	}
1016*37e3a6d3SLuigi Rizzo 	nm_os_put_module();
1017*37e3a6d3SLuigi Rizzo 	if (na) {
1018847bf383SLuigi Rizzo 		netmap_do_unregif(priv);
1019*37e3a6d3SLuigi Rizzo 	}
1020*37e3a6d3SLuigi Rizzo 	netmap_unget_na(na, priv->np_ifp);
1021*37e3a6d3SLuigi Rizzo 	bzero(priv, sizeof(*priv));	/* for safety */
1022*37e3a6d3SLuigi Rizzo 	free(priv, M_DEVBUF);
1023f196ce38SLuigi Rizzo }
10245819da83SLuigi Rizzo 
1025f9790aebSLuigi Rizzo 
102689cc2556SLuigi Rizzo /* call with NMG_LOCK *not* held */
1027f9790aebSLuigi Rizzo void
10285819da83SLuigi Rizzo netmap_dtor(void *data)
10295819da83SLuigi Rizzo {
10305819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
10315819da83SLuigi Rizzo 
1032ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1033*37e3a6d3SLuigi Rizzo 	netmap_priv_delete(priv);
1034ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1035ce3ee1e7SLuigi Rizzo }
103668b8534bSLuigi Rizzo 
1037f18be576SLuigi Rizzo 
103868b8534bSLuigi Rizzo 
103968b8534bSLuigi Rizzo 
104068b8534bSLuigi Rizzo /*
104102ad4083SLuigi Rizzo  * Handlers for synchronization of the queues from/to the host.
1042091fd0abSLuigi Rizzo  * Netmap has two operating modes:
1043091fd0abSLuigi Rizzo  * - in the default mode, the rings connected to the host stack are
1044091fd0abSLuigi Rizzo  *   just another ring pair managed by userspace;
1045091fd0abSLuigi Rizzo  * - in transparent mode (XXX to be defined) incoming packets
1046091fd0abSLuigi Rizzo  *   (from the host or the NIC) are marked as NS_FORWARD upon
1047091fd0abSLuigi Rizzo  *   arrival, and the user application has a chance to reset the
1048091fd0abSLuigi Rizzo  *   flag for packets that should be dropped.
1049091fd0abSLuigi Rizzo  *   On the RXSYNC or poll(), packets in RX rings between
1050091fd0abSLuigi Rizzo  *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1051091fd0abSLuigi Rizzo  *   to the other side.
1052091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1053091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1054091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1055091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1056091fd0abSLuigi Rizzo  */
1057091fd0abSLuigi Rizzo 
1058f18be576SLuigi Rizzo 
1059091fd0abSLuigi Rizzo /*
1060091fd0abSLuigi Rizzo  * pass a chain of buffers to the host stack as coming from 'dst'
106117885a7bSLuigi Rizzo  * We do not need to lock because the queue is private.
1062091fd0abSLuigi Rizzo  */
1063091fd0abSLuigi Rizzo static void
1064f9790aebSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbq *q)
1065091fd0abSLuigi Rizzo {
1066091fd0abSLuigi Rizzo 	struct mbuf *m;
1067*37e3a6d3SLuigi Rizzo 	struct mbuf *head = NULL, *prev = NULL;
1068091fd0abSLuigi Rizzo 
1069091fd0abSLuigi Rizzo 	/* send packets up, outside the lock */
1070f9790aebSLuigi Rizzo 	while ((m = mbq_dequeue(q)) != NULL) {
1071091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1072091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
1073*37e3a6d3SLuigi Rizzo 		prev = nm_os_send_up(dst, m, prev);
1074*37e3a6d3SLuigi Rizzo 		if (head == NULL)
1075*37e3a6d3SLuigi Rizzo 			head = prev;
1076091fd0abSLuigi Rizzo 	}
1077*37e3a6d3SLuigi Rizzo 	if (head)
1078*37e3a6d3SLuigi Rizzo 		nm_os_send_up(dst, NULL, head);
1079*37e3a6d3SLuigi Rizzo 	mbq_fini(q);
1080091fd0abSLuigi Rizzo }
1081091fd0abSLuigi Rizzo 
1082f18be576SLuigi Rizzo 
1083091fd0abSLuigi Rizzo /*
1084091fd0abSLuigi Rizzo  * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
108517885a7bSLuigi Rizzo  * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
108617885a7bSLuigi Rizzo  * and pass them up. Drop remaining packets in the unlikely event
108717885a7bSLuigi Rizzo  * of an mbuf shortage.
1088091fd0abSLuigi Rizzo  */
1089091fd0abSLuigi Rizzo static void
1090091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1091091fd0abSLuigi Rizzo {
109217885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1093847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
109417885a7bSLuigi Rizzo 	u_int n;
1095f9790aebSLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1096091fd0abSLuigi Rizzo 
109717885a7bSLuigi Rizzo 	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
109817885a7bSLuigi Rizzo 		struct mbuf *m;
1099091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1100091fd0abSLuigi Rizzo 
1101091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1102091fd0abSLuigi Rizzo 			continue;
11034bf50f18SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
110417885a7bSLuigi Rizzo 			RD(5, "bad pkt at %d len %d", n, slot->len);
1105091fd0abSLuigi Rizzo 			continue;
1106091fd0abSLuigi Rizzo 		}
1107091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
110817885a7bSLuigi Rizzo 		/* XXX TODO: adapt to the case of a multisegment packet */
11094bf50f18SLuigi Rizzo 		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1110091fd0abSLuigi Rizzo 
1111091fd0abSLuigi Rizzo 		if (m == NULL)
1112091fd0abSLuigi Rizzo 			break;
1113f9790aebSLuigi Rizzo 		mbq_enqueue(q, m);
1114091fd0abSLuigi Rizzo 	}
1115091fd0abSLuigi Rizzo }
1116091fd0abSLuigi Rizzo 
1117*37e3a6d3SLuigi Rizzo static inline int
1118*37e3a6d3SLuigi Rizzo _nm_may_forward(struct netmap_kring *kring)
1119*37e3a6d3SLuigi Rizzo {
1120*37e3a6d3SLuigi Rizzo 	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
1121*37e3a6d3SLuigi Rizzo 		 kring->na->na_flags & NAF_HOST_RINGS &&
1122*37e3a6d3SLuigi Rizzo 		 kring->tx == NR_RX);
1123*37e3a6d3SLuigi Rizzo }
1124*37e3a6d3SLuigi Rizzo 
1125*37e3a6d3SLuigi Rizzo static inline int
1126*37e3a6d3SLuigi Rizzo nm_may_forward_up(struct netmap_kring *kring)
1127*37e3a6d3SLuigi Rizzo {
1128*37e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
1129*37e3a6d3SLuigi Rizzo 		 kring->ring_id != kring->na->num_rx_rings;
1130*37e3a6d3SLuigi Rizzo }
1131*37e3a6d3SLuigi Rizzo 
1132*37e3a6d3SLuigi Rizzo static inline int
1133*37e3a6d3SLuigi Rizzo nm_may_forward_down(struct netmap_kring *kring)
1134*37e3a6d3SLuigi Rizzo {
1135*37e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
1136*37e3a6d3SLuigi Rizzo 		 kring->ring_id == kring->na->num_rx_rings;
1137*37e3a6d3SLuigi Rizzo }
1138f18be576SLuigi Rizzo 
1139091fd0abSLuigi Rizzo /*
114017885a7bSLuigi Rizzo  * Send to the NIC rings packets marked NS_FORWARD between
114117885a7bSLuigi Rizzo  * kring->nr_hwcur and kring->rhead
114217885a7bSLuigi Rizzo  * Called under kring->rx_queue.lock on the sw rx ring,
1143091fd0abSLuigi Rizzo  */
114417885a7bSLuigi Rizzo static u_int
1145091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1146091fd0abSLuigi Rizzo {
1147091fd0abSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
114817885a7bSLuigi Rizzo 	struct netmap_slot *rxslot = kring->ring->slot;
114917885a7bSLuigi Rizzo 	u_int i, rxcur = kring->nr_hwcur;
115017885a7bSLuigi Rizzo 	u_int const head = kring->rhead;
115117885a7bSLuigi Rizzo 	u_int const src_lim = kring->nkr_num_slots - 1;
115217885a7bSLuigi Rizzo 	u_int sent = 0;
1153ce3ee1e7SLuigi Rizzo 
115417885a7bSLuigi Rizzo 	/* scan rings to find space, then fill as much as possible */
115517885a7bSLuigi Rizzo 	for (i = 0; i < na->num_tx_rings; i++) {
115617885a7bSLuigi Rizzo 		struct netmap_kring *kdst = &na->tx_rings[i];
115717885a7bSLuigi Rizzo 		struct netmap_ring *rdst = kdst->ring;
115817885a7bSLuigi Rizzo 		u_int const dst_lim = kdst->nkr_num_slots - 1;
1159ce3ee1e7SLuigi Rizzo 
116017885a7bSLuigi Rizzo 		/* XXX do we trust ring or kring->rcur,rtail ? */
116117885a7bSLuigi Rizzo 		for (; rxcur != head && !nm_ring_empty(rdst);
116217885a7bSLuigi Rizzo 		     rxcur = nm_next(rxcur, src_lim) ) {
1163091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
1164*37e3a6d3SLuigi Rizzo 			u_int dst_head = rdst->head;
116517885a7bSLuigi Rizzo 
116617885a7bSLuigi Rizzo 			src = &rxslot[rxcur];
116717885a7bSLuigi Rizzo 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
116817885a7bSLuigi Rizzo 				continue;
116917885a7bSLuigi Rizzo 
117017885a7bSLuigi Rizzo 			sent++;
117117885a7bSLuigi Rizzo 
1172*37e3a6d3SLuigi Rizzo 			dst = &rdst->slot[dst_head];
117317885a7bSLuigi Rizzo 
1174091fd0abSLuigi Rizzo 			tmp = *src;
117517885a7bSLuigi Rizzo 
1176091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1177091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1178091fd0abSLuigi Rizzo 
1179091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1180091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1181091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1182091fd0abSLuigi Rizzo 
1183*37e3a6d3SLuigi Rizzo 			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
1184091fd0abSLuigi Rizzo 		}
118517885a7bSLuigi Rizzo 		/* if (sent) XXX txsync ? */
1186091fd0abSLuigi Rizzo 	}
118717885a7bSLuigi Rizzo 	return sent;
1188091fd0abSLuigi Rizzo }
1189091fd0abSLuigi Rizzo 
1190f18be576SLuigi Rizzo 
1191091fd0abSLuigi Rizzo /*
1192ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
119302ad4083SLuigi Rizzo  * system call in user process context, and the only contention
119402ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1195091fd0abSLuigi Rizzo  * this routine concurrently.
119668b8534bSLuigi Rizzo  */
1197*37e3a6d3SLuigi Rizzo static int
1198*37e3a6d3SLuigi Rizzo netmap_txsync_to_host(struct netmap_kring *kring, int flags)
119968b8534bSLuigi Rizzo {
1200*37e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
120117885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1202f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
1203f9790aebSLuigi Rizzo 	struct mbq q;
120468b8534bSLuigi Rizzo 
120517885a7bSLuigi Rizzo 	/* Take packets from hwcur to head and pass them up.
120617885a7bSLuigi Rizzo 	 * force head = cur since netmap_grab_packets() stops at head
120768b8534bSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
120868b8534bSLuigi Rizzo 	 * the queue is drained in all cases.
120968b8534bSLuigi Rizzo 	 */
1210f9790aebSLuigi Rizzo 	mbq_init(&q);
121117885a7bSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1 /* force */);
121217885a7bSLuigi Rizzo 	ND("have %d pkts in queue", mbq_len(&q));
121317885a7bSLuigi Rizzo 	kring->nr_hwcur = head;
121417885a7bSLuigi Rizzo 	kring->nr_hwtail = head + lim;
121517885a7bSLuigi Rizzo 	if (kring->nr_hwtail > lim)
121617885a7bSLuigi Rizzo 		kring->nr_hwtail -= lim + 1;
121768b8534bSLuigi Rizzo 
1218f9790aebSLuigi Rizzo 	netmap_send_up(na->ifp, &q);
1219*37e3a6d3SLuigi Rizzo 	return 0;
1220f18be576SLuigi Rizzo }
1221f18be576SLuigi Rizzo 
1222f18be576SLuigi Rizzo 
122368b8534bSLuigi Rizzo /*
122402ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
122517885a7bSLuigi Rizzo  * They have been put in kring->rx_queue by netmap_transmit().
122617885a7bSLuigi Rizzo  * We protect access to the kring using kring->rx_queue.lock
122702ad4083SLuigi Rizzo  *
12284bf50f18SLuigi Rizzo  * This routine also does the selrecord if called from the poll handler
1229*37e3a6d3SLuigi Rizzo  * (we know because sr != NULL).
12304bf50f18SLuigi Rizzo  *
123117885a7bSLuigi Rizzo  * returns the number of packets delivered to tx queues in
123217885a7bSLuigi Rizzo  * transparent mode, or a negative value if error
123368b8534bSLuigi Rizzo  */
12348fd44c93SLuigi Rizzo static int
1235*37e3a6d3SLuigi Rizzo netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
123668b8534bSLuigi Rizzo {
1237*37e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
123868b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
123917885a7bSLuigi Rizzo 	u_int nm_i, n;
124017885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1241f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
124217885a7bSLuigi Rizzo 	int ret = 0;
1243847bf383SLuigi Rizzo 	struct mbq *q = &kring->rx_queue, fq;
124468b8534bSLuigi Rizzo 
1245847bf383SLuigi Rizzo 	mbq_init(&fq); /* fq holds packets to be freed */
1246847bf383SLuigi Rizzo 
1247997b054cSLuigi Rizzo 	mbq_lock(q);
124817885a7bSLuigi Rizzo 
124917885a7bSLuigi Rizzo 	/* First part: import newly received packets */
125017885a7bSLuigi Rizzo 	n = mbq_len(q);
125117885a7bSLuigi Rizzo 	if (n) { /* grab packets from the queue */
125217885a7bSLuigi Rizzo 		struct mbuf *m;
125317885a7bSLuigi Rizzo 		uint32_t stop_i;
125417885a7bSLuigi Rizzo 
125517885a7bSLuigi Rizzo 		nm_i = kring->nr_hwtail;
125617885a7bSLuigi Rizzo 		stop_i = nm_prev(nm_i, lim);
125717885a7bSLuigi Rizzo 		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
125817885a7bSLuigi Rizzo 			int len = MBUF_LEN(m);
125917885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
126017885a7bSLuigi Rizzo 
12614bf50f18SLuigi Rizzo 			m_copydata(m, 0, len, NMB(na, slot));
126217885a7bSLuigi Rizzo 			ND("nm %d len %d", nm_i, len);
126317885a7bSLuigi Rizzo 			if (netmap_verbose)
12644bf50f18SLuigi Rizzo                                 D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
126517885a7bSLuigi Rizzo 
126617885a7bSLuigi Rizzo 			slot->len = len;
126717885a7bSLuigi Rizzo 			slot->flags = kring->nkr_slot_flags;
126817885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
1269847bf383SLuigi Rizzo 			mbq_enqueue(&fq, m);
127064ae02c3SLuigi Rizzo 		}
127117885a7bSLuigi Rizzo 		kring->nr_hwtail = nm_i;
127264ae02c3SLuigi Rizzo 	}
127317885a7bSLuigi Rizzo 
127417885a7bSLuigi Rizzo 	/*
127517885a7bSLuigi Rizzo 	 * Second part: skip past packets that userspace has released.
127617885a7bSLuigi Rizzo 	 */
127717885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
127817885a7bSLuigi Rizzo 	if (nm_i != head) { /* something was released */
1279*37e3a6d3SLuigi Rizzo 		if (nm_may_forward_down(kring)) {
128017885a7bSLuigi Rizzo 			ret = netmap_sw_to_nic(na);
1281*37e3a6d3SLuigi Rizzo 			if (ret > 0) {
1282*37e3a6d3SLuigi Rizzo 				kring->nr_kflags |= NR_FORWARD;
1283*37e3a6d3SLuigi Rizzo 				ret = 0;
1284*37e3a6d3SLuigi Rizzo 			}
1285*37e3a6d3SLuigi Rizzo 		}
128617885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
128764ae02c3SLuigi Rizzo 	}
128817885a7bSLuigi Rizzo 
1289997b054cSLuigi Rizzo 	mbq_unlock(q);
1290847bf383SLuigi Rizzo 
1291847bf383SLuigi Rizzo 	mbq_purge(&fq);
1292*37e3a6d3SLuigi Rizzo 	mbq_fini(&fq);
1293847bf383SLuigi Rizzo 
129417885a7bSLuigi Rizzo 	return ret;
129568b8534bSLuigi Rizzo }
129668b8534bSLuigi Rizzo 
129768b8534bSLuigi Rizzo 
1298f9790aebSLuigi Rizzo /* Get a netmap adapter for the port.
1299f9790aebSLuigi Rizzo  *
1300f9790aebSLuigi Rizzo  * If it is possible to satisfy the request, return 0
1301f9790aebSLuigi Rizzo  * with *na containing the netmap adapter found.
1302f9790aebSLuigi Rizzo  * Otherwise return an error code, with *na containing NULL.
1303f9790aebSLuigi Rizzo  *
1304f9790aebSLuigi Rizzo  * When the port is attached to a bridge, we always return
1305f9790aebSLuigi Rizzo  * EBUSY.
1306f9790aebSLuigi Rizzo  * Otherwise, if the port is already bound to a file descriptor,
1307f9790aebSLuigi Rizzo  * then we unconditionally return the existing adapter into *na.
1308f9790aebSLuigi Rizzo  * In all the other cases, we return (into *na) either native,
1309f9790aebSLuigi Rizzo  * generic or NULL, according to the following table:
1310f9790aebSLuigi Rizzo  *
1311f9790aebSLuigi Rizzo  *					native_support
1312f9790aebSLuigi Rizzo  * active_fds   dev.netmap.admode         YES     NO
1313f9790aebSLuigi Rizzo  * -------------------------------------------------------
1314f9790aebSLuigi Rizzo  *    >0              *                 NA(ifp) NA(ifp)
1315f9790aebSLuigi Rizzo  *
1316f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1317f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1318f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1319f9790aebSLuigi Rizzo  *
1320f9790aebSLuigi Rizzo  */
1321*37e3a6d3SLuigi Rizzo static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
1322f9790aebSLuigi Rizzo int
1323f9790aebSLuigi Rizzo netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
1324f9790aebSLuigi Rizzo {
1325f9790aebSLuigi Rizzo 	/* generic support */
1326f9790aebSLuigi Rizzo 	int i = netmap_admode;	/* Take a snapshot. */
1327f9790aebSLuigi Rizzo 	struct netmap_adapter *prev_na;
1328847bf383SLuigi Rizzo 	int error = 0;
1329f9790aebSLuigi Rizzo 
1330f9790aebSLuigi Rizzo 	*na = NULL; /* default */
1331f9790aebSLuigi Rizzo 
1332f9790aebSLuigi Rizzo 	/* reset in case of invalid value */
1333f9790aebSLuigi Rizzo 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1334f9790aebSLuigi Rizzo 		i = netmap_admode = NETMAP_ADMODE_BEST;
1335f9790aebSLuigi Rizzo 
1336*37e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
13374bf50f18SLuigi Rizzo 		prev_na = NA(ifp);
1338f9790aebSLuigi Rizzo 		/* If an adapter already exists, return it if
1339f9790aebSLuigi Rizzo 		 * there are active file descriptors or if
1340f9790aebSLuigi Rizzo 		 * netmap is not forced to use generic
1341f9790aebSLuigi Rizzo 		 * adapters.
1342f9790aebSLuigi Rizzo 		 */
13434bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(prev_na)
13444bf50f18SLuigi Rizzo 			|| i != NETMAP_ADMODE_GENERIC
13454bf50f18SLuigi Rizzo 			|| prev_na->na_flags & NAF_FORCE_NATIVE
13464bf50f18SLuigi Rizzo #ifdef WITH_PIPES
13474bf50f18SLuigi Rizzo 			/* ugly, but we cannot allow an adapter switch
13484bf50f18SLuigi Rizzo 			 * if some pipe is referring to this one
13494bf50f18SLuigi Rizzo 			 */
13504bf50f18SLuigi Rizzo 			|| prev_na->na_next_pipe > 0
13514bf50f18SLuigi Rizzo #endif
13524bf50f18SLuigi Rizzo 		) {
13534bf50f18SLuigi Rizzo 			*na = prev_na;
1354f9790aebSLuigi Rizzo 			return 0;
1355f9790aebSLuigi Rizzo 		}
1356f9790aebSLuigi Rizzo 	}
1357f9790aebSLuigi Rizzo 
1358f9790aebSLuigi Rizzo 	/* If there isn't native support and netmap is not allowed
1359f9790aebSLuigi Rizzo 	 * to use generic adapters, we cannot satisfy the request.
1360f9790aebSLuigi Rizzo 	 */
1361*37e3a6d3SLuigi Rizzo 	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
1362f2637526SLuigi Rizzo 		return EOPNOTSUPP;
1363f9790aebSLuigi Rizzo 
1364f9790aebSLuigi Rizzo 	/* Otherwise, create a generic adapter and return it,
1365f9790aebSLuigi Rizzo 	 * saving the previously used netmap adapter, if any.
1366f9790aebSLuigi Rizzo 	 *
1367f9790aebSLuigi Rizzo 	 * Note that here 'prev_na', if not NULL, MUST be a
1368f9790aebSLuigi Rizzo 	 * native adapter, and CANNOT be a generic one. This is
1369f9790aebSLuigi Rizzo 	 * true because generic adapters are created on demand, and
1370f9790aebSLuigi Rizzo 	 * destroyed when not used anymore. Therefore, if the adapter
1371f9790aebSLuigi Rizzo 	 * currently attached to an interface 'ifp' is generic, it
1372f9790aebSLuigi Rizzo 	 * must be that
1373f9790aebSLuigi Rizzo 	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1374f9790aebSLuigi Rizzo 	 * Consequently, if NA(ifp) is generic, we will enter one of
1375f9790aebSLuigi Rizzo 	 * the branches above. This ensures that we never override
1376f9790aebSLuigi Rizzo 	 * a generic adapter with another generic adapter.
1377f9790aebSLuigi Rizzo 	 */
1378f9790aebSLuigi Rizzo 	error = generic_netmap_attach(ifp);
1379f9790aebSLuigi Rizzo 	if (error)
1380f9790aebSLuigi Rizzo 		return error;
1381f9790aebSLuigi Rizzo 
1382f9790aebSLuigi Rizzo 	*na = NA(ifp);
1383f9790aebSLuigi Rizzo 	return 0;
1384f9790aebSLuigi Rizzo }
1385f9790aebSLuigi Rizzo 
1386f9790aebSLuigi Rizzo 
138768b8534bSLuigi Rizzo /*
1388ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1389ce3ee1e7SLuigi Rizzo  *
1390f2637526SLuigi Rizzo  * Get a refcounted reference to a netmap adapter attached
1391f2637526SLuigi Rizzo  * to the interface specified by nmr.
1392ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1393ce3ee1e7SLuigi Rizzo  *
1394f2637526SLuigi Rizzo  * Return ENXIO if the interface specified by the request does
1395f2637526SLuigi Rizzo  * not exist, ENOTSUP if netmap is not supported by the interface,
1396f2637526SLuigi Rizzo  * EBUSY if the interface is already attached to a bridge,
1397f2637526SLuigi Rizzo  * EINVAL if parameters are invalid, ENOMEM if needed resources
1398f2637526SLuigi Rizzo  * could not be allocated.
1399f2637526SLuigi Rizzo  * If successful, hold a reference to the netmap adapter.
1400f18be576SLuigi Rizzo  *
1401*37e3a6d3SLuigi Rizzo  * If the interface specified by nmr is a system one, also keep
1402*37e3a6d3SLuigi Rizzo  * a reference to it and return a valid *ifp.
140368b8534bSLuigi Rizzo  */
1404f9790aebSLuigi Rizzo int
1405*37e3a6d3SLuigi Rizzo netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
1406*37e3a6d3SLuigi Rizzo 	      struct ifnet **ifp, int create)
140768b8534bSLuigi Rizzo {
1408f9790aebSLuigi Rizzo 	int error = 0;
1409f0ea3689SLuigi Rizzo 	struct netmap_adapter *ret = NULL;
1410f9790aebSLuigi Rizzo 
1411f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
1412*37e3a6d3SLuigi Rizzo 	*ifp = NULL;
1413f196ce38SLuigi Rizzo 
1414ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1415ce3ee1e7SLuigi Rizzo 
1416*37e3a6d3SLuigi Rizzo 	/* We cascade through all possible types of netmap adapter.
14174bf50f18SLuigi Rizzo 	 * All netmap_get_*_na() functions return an error and an na,
14184bf50f18SLuigi Rizzo 	 * with the following combinations:
14194bf50f18SLuigi Rizzo 	 *
14204bf50f18SLuigi Rizzo 	 * error    na
14214bf50f18SLuigi Rizzo 	 *   0	   NULL		type doesn't match
14224bf50f18SLuigi Rizzo 	 *  !0	   NULL		type matches, but na creation/lookup failed
14234bf50f18SLuigi Rizzo 	 *   0	  !NULL		type matches and na created/found
14244bf50f18SLuigi Rizzo 	 *  !0    !NULL		impossible
14254bf50f18SLuigi Rizzo 	 */
14264bf50f18SLuigi Rizzo 
1427*37e3a6d3SLuigi Rizzo 	/* try to see if this is a ptnetmap port */
1428*37e3a6d3SLuigi Rizzo 	error = netmap_get_pt_host_na(nmr, na, create);
1429*37e3a6d3SLuigi Rizzo 	if (error || *na != NULL)
1430*37e3a6d3SLuigi Rizzo 		return error;
1431*37e3a6d3SLuigi Rizzo 
14324bf50f18SLuigi Rizzo 	/* try to see if this is a monitor port */
14334bf50f18SLuigi Rizzo 	error = netmap_get_monitor_na(nmr, na, create);
14344bf50f18SLuigi Rizzo 	if (error || *na != NULL)
14354bf50f18SLuigi Rizzo 		return error;
14364bf50f18SLuigi Rizzo 
14374bf50f18SLuigi Rizzo 	/* try to see if this is a pipe port */
1438f0ea3689SLuigi Rizzo 	error = netmap_get_pipe_na(nmr, na, create);
1439f0ea3689SLuigi Rizzo 	if (error || *na != NULL)
1440f9790aebSLuigi Rizzo 		return error;
1441ce3ee1e7SLuigi Rizzo 
14424bf50f18SLuigi Rizzo 	/* try to see if this is a bridge port */
1443f0ea3689SLuigi Rizzo 	error = netmap_get_bdg_na(nmr, na, create);
1444f0ea3689SLuigi Rizzo 	if (error)
1445f0ea3689SLuigi Rizzo 		return error;
1446f0ea3689SLuigi Rizzo 
1447f0ea3689SLuigi Rizzo 	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1448847bf383SLuigi Rizzo 		goto out;
1449f0ea3689SLuigi Rizzo 
145089cc2556SLuigi Rizzo 	/*
145189cc2556SLuigi Rizzo 	 * This must be a hardware na, lookup the name in the system.
145289cc2556SLuigi Rizzo 	 * Note that by hardware we actually mean "it shows up in ifconfig".
145389cc2556SLuigi Rizzo 	 * This may still be a tap, a veth/epair, or even a
145489cc2556SLuigi Rizzo 	 * persistent VALE port.
145589cc2556SLuigi Rizzo 	 */
1456*37e3a6d3SLuigi Rizzo 	*ifp = ifunit_ref(nmr->nr_name);
1457*37e3a6d3SLuigi Rizzo 	if (*ifp == NULL) {
1458ce3ee1e7SLuigi Rizzo 	        return ENXIO;
1459f196ce38SLuigi Rizzo 	}
1460ce3ee1e7SLuigi Rizzo 
1461*37e3a6d3SLuigi Rizzo 	error = netmap_get_hw_na(*ifp, &ret);
1462f9790aebSLuigi Rizzo 	if (error)
1463f9790aebSLuigi Rizzo 		goto out;
1464f18be576SLuigi Rizzo 
1465f9790aebSLuigi Rizzo 	*na = ret;
1466f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
1467f0ea3689SLuigi Rizzo 
1468f9790aebSLuigi Rizzo out:
1469*37e3a6d3SLuigi Rizzo 	if (error) {
1470*37e3a6d3SLuigi Rizzo 		if (ret)
1471f0ea3689SLuigi Rizzo 			netmap_adapter_put(ret);
1472*37e3a6d3SLuigi Rizzo 		if (*ifp) {
1473*37e3a6d3SLuigi Rizzo 			if_rele(*ifp);
1474*37e3a6d3SLuigi Rizzo 			*ifp = NULL;
1475*37e3a6d3SLuigi Rizzo 		}
1476*37e3a6d3SLuigi Rizzo 	}
1477f18be576SLuigi Rizzo 
14785ab0d24dSLuigi Rizzo 	return error;
14795ab0d24dSLuigi Rizzo }
1480ce3ee1e7SLuigi Rizzo 
1481*37e3a6d3SLuigi Rizzo /* undo netmap_get_na() */
1482*37e3a6d3SLuigi Rizzo void
1483*37e3a6d3SLuigi Rizzo netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
1484*37e3a6d3SLuigi Rizzo {
1485*37e3a6d3SLuigi Rizzo 	if (ifp)
1486*37e3a6d3SLuigi Rizzo 		if_rele(ifp);
1487*37e3a6d3SLuigi Rizzo 	if (na)
1488*37e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
1489*37e3a6d3SLuigi Rizzo }
1490*37e3a6d3SLuigi Rizzo 
1491*37e3a6d3SLuigi Rizzo 
1492*37e3a6d3SLuigi Rizzo #define NM_FAIL_ON(t) do {						\
1493*37e3a6d3SLuigi Rizzo 	if (unlikely(t)) {						\
1494*37e3a6d3SLuigi Rizzo 		RD(5, "%s: fail '" #t "' "				\
1495*37e3a6d3SLuigi Rizzo 			"h %d c %d t %d "				\
1496*37e3a6d3SLuigi Rizzo 			"rh %d rc %d rt %d "				\
1497*37e3a6d3SLuigi Rizzo 			"hc %d ht %d",					\
1498*37e3a6d3SLuigi Rizzo 			kring->name,					\
1499*37e3a6d3SLuigi Rizzo 			head, cur, ring->tail,				\
1500*37e3a6d3SLuigi Rizzo 			kring->rhead, kring->rcur, kring->rtail,	\
1501*37e3a6d3SLuigi Rizzo 			kring->nr_hwcur, kring->nr_hwtail);		\
1502*37e3a6d3SLuigi Rizzo 		return kring->nkr_num_slots;				\
1503*37e3a6d3SLuigi Rizzo 	}								\
1504*37e3a6d3SLuigi Rizzo } while (0)
1505ce3ee1e7SLuigi Rizzo 
1506f9790aebSLuigi Rizzo /*
1507f9790aebSLuigi Rizzo  * validate parameters on entry for *_txsync()
1508f9790aebSLuigi Rizzo  * Returns ring->cur if ok, or something >= kring->nkr_num_slots
150917885a7bSLuigi Rizzo  * in case of error.
1510f9790aebSLuigi Rizzo  *
151117885a7bSLuigi Rizzo  * rhead, rcur and rtail=hwtail are stored from previous round.
151217885a7bSLuigi Rizzo  * hwcur is the next packet to send to the ring.
1513f9790aebSLuigi Rizzo  *
151417885a7bSLuigi Rizzo  * We want
151517885a7bSLuigi Rizzo  *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1516f9790aebSLuigi Rizzo  *
151717885a7bSLuigi Rizzo  * hwcur, rhead, rtail and hwtail are reliable
1518f9790aebSLuigi Rizzo  */
1519*37e3a6d3SLuigi Rizzo u_int
1520*37e3a6d3SLuigi Rizzo nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1521f9790aebSLuigi Rizzo {
152217885a7bSLuigi Rizzo 	u_int head = ring->head; /* read only once */
1523f9790aebSLuigi Rizzo 	u_int cur = ring->cur; /* read only once */
1524f9790aebSLuigi Rizzo 	u_int n = kring->nkr_num_slots;
1525ce3ee1e7SLuigi Rizzo 
152617885a7bSLuigi Rizzo 	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
152717885a7bSLuigi Rizzo 		kring->name,
152817885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
152917885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
153017885a7bSLuigi Rizzo #if 1 /* kernel sanity checks; but we can trust the kring. */
1531*37e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
1532*37e3a6d3SLuigi Rizzo 	    kring->rtail >= n ||  kring->nr_hwtail >= n);
1533f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
153417885a7bSLuigi Rizzo 	/*
1535*37e3a6d3SLuigi Rizzo 	 * user sanity checks. We only use head,
1536*37e3a6d3SLuigi Rizzo 	 * A, B, ... are possible positions for head:
153717885a7bSLuigi Rizzo 	 *
1538*37e3a6d3SLuigi Rizzo 	 *  0    A  rhead   B  rtail   C  n-1
1539*37e3a6d3SLuigi Rizzo 	 *  0    D  rtail   E  rhead   F  n-1
154017885a7bSLuigi Rizzo 	 *
154117885a7bSLuigi Rizzo 	 * B, F, D are valid. A, C, E are wrong
154217885a7bSLuigi Rizzo 	 */
154317885a7bSLuigi Rizzo 	if (kring->rtail >= kring->rhead) {
154417885a7bSLuigi Rizzo 		/* want rhead <= head <= rtail */
1545*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
154617885a7bSLuigi Rizzo 		/* and also head <= cur <= rtail */
1547*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->rtail);
154817885a7bSLuigi Rizzo 	} else { /* here rtail < rhead */
154917885a7bSLuigi Rizzo 		/* we need head outside rtail .. rhead */
1550*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
155117885a7bSLuigi Rizzo 
155217885a7bSLuigi Rizzo 		/* two cases now: head <= rtail or head >= rhead  */
155317885a7bSLuigi Rizzo 		if (head <= kring->rtail) {
155417885a7bSLuigi Rizzo 			/* want head <= cur <= rtail */
1555*37e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->rtail);
155617885a7bSLuigi Rizzo 		} else { /* head >= rhead */
155717885a7bSLuigi Rizzo 			/* cur must be outside rtail..head */
1558*37e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur > kring->rtail && cur < head);
1559f18be576SLuigi Rizzo 		}
1560f9790aebSLuigi Rizzo 	}
156117885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
156217885a7bSLuigi Rizzo 		RD(5, "tail overwritten was %d need %d",
156317885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
156417885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
156517885a7bSLuigi Rizzo 	}
156617885a7bSLuigi Rizzo 	kring->rhead = head;
156717885a7bSLuigi Rizzo 	kring->rcur = cur;
156817885a7bSLuigi Rizzo 	return head;
156968b8534bSLuigi Rizzo }
157068b8534bSLuigi Rizzo 
157168b8534bSLuigi Rizzo 
157268b8534bSLuigi Rizzo /*
1573f9790aebSLuigi Rizzo  * validate parameters on entry for *_rxsync()
157417885a7bSLuigi Rizzo  * Returns ring->head if ok, kring->nkr_num_slots on error.
1575f9790aebSLuigi Rizzo  *
157617885a7bSLuigi Rizzo  * For a valid configuration,
157717885a7bSLuigi Rizzo  * hwcur <= head <= cur <= tail <= hwtail
1578f9790aebSLuigi Rizzo  *
157917885a7bSLuigi Rizzo  * We only consider head and cur.
158017885a7bSLuigi Rizzo  * hwcur and hwtail are reliable.
1581f9790aebSLuigi Rizzo  *
1582f9790aebSLuigi Rizzo  */
1583*37e3a6d3SLuigi Rizzo u_int
1584*37e3a6d3SLuigi Rizzo nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1585f9790aebSLuigi Rizzo {
158617885a7bSLuigi Rizzo 	uint32_t const n = kring->nkr_num_slots;
158717885a7bSLuigi Rizzo 	uint32_t head, cur;
1588f9790aebSLuigi Rizzo 
1589847bf383SLuigi Rizzo 	ND(5,"%s kc %d kt %d h %d c %d t %d",
159017885a7bSLuigi Rizzo 		kring->name,
159117885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
159217885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
159317885a7bSLuigi Rizzo 	/*
159417885a7bSLuigi Rizzo 	 * Before storing the new values, we should check they do not
159517885a7bSLuigi Rizzo 	 * move backwards. However:
159617885a7bSLuigi Rizzo 	 * - head is not an issue because the previous value is hwcur;
159717885a7bSLuigi Rizzo 	 * - cur could in principle go back, however it does not matter
159817885a7bSLuigi Rizzo 	 *   because we are processing a brand new rxsync()
159917885a7bSLuigi Rizzo 	 */
160017885a7bSLuigi Rizzo 	cur = kring->rcur = ring->cur;	/* read only once */
160117885a7bSLuigi Rizzo 	head = kring->rhead = ring->head;	/* read only once */
1602f9790aebSLuigi Rizzo #if 1 /* kernel sanity checks */
1603*37e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
1604f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
1605f9790aebSLuigi Rizzo 	/* user sanity checks */
160617885a7bSLuigi Rizzo 	if (kring->nr_hwtail >= kring->nr_hwcur) {
160717885a7bSLuigi Rizzo 		/* want hwcur <= rhead <= hwtail */
1608*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
160917885a7bSLuigi Rizzo 		/* and also rhead <= rcur <= hwtail */
1610*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
1611f9790aebSLuigi Rizzo 	} else {
161217885a7bSLuigi Rizzo 		/* we need rhead outside hwtail..hwcur */
1613*37e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
161417885a7bSLuigi Rizzo 		/* two cases now: head <= hwtail or head >= hwcur  */
161517885a7bSLuigi Rizzo 		if (head <= kring->nr_hwtail) {
161617885a7bSLuigi Rizzo 			/* want head <= cur <= hwtail */
1617*37e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
161817885a7bSLuigi Rizzo 		} else {
161917885a7bSLuigi Rizzo 			/* cur must be outside hwtail..head */
1620*37e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
1621f9790aebSLuigi Rizzo 		}
1622f9790aebSLuigi Rizzo 	}
162317885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
162417885a7bSLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d",
162517885a7bSLuigi Rizzo 			kring->name,
162617885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
162717885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
162817885a7bSLuigi Rizzo 	}
162917885a7bSLuigi Rizzo 	return head;
1630f9790aebSLuigi Rizzo }
1631f9790aebSLuigi Rizzo 
163217885a7bSLuigi Rizzo 
1633f9790aebSLuigi Rizzo /*
163468b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
163517885a7bSLuigi Rizzo  * Can't do much more than resetting head =cur = hwcur, tail = hwtail
163668b8534bSLuigi Rizzo  * Return 1 on reinit.
1637506cc70cSLuigi Rizzo  *
1638506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1639506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
164017885a7bSLuigi Rizzo  * and hwtail (which may be changed by the lower half, but only on
1641506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1642506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1643506cc70cSLuigi Rizzo  * it under lock.
164468b8534bSLuigi Rizzo  */
164568b8534bSLuigi Rizzo int
164668b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
164768b8534bSLuigi Rizzo {
164868b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
164968b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
165068b8534bSLuigi Rizzo 	int errors = 0;
165168b8534bSLuigi Rizzo 
1652ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
16534bf50f18SLuigi Rizzo 	RD(10, "called for %s", kring->name);
165417885a7bSLuigi Rizzo 	// XXX probably wrong to trust userspace
165517885a7bSLuigi Rizzo 	kring->rhead = ring->head;
165617885a7bSLuigi Rizzo 	kring->rcur  = ring->cur;
165717885a7bSLuigi Rizzo 	kring->rtail = ring->tail;
165817885a7bSLuigi Rizzo 
165968b8534bSLuigi Rizzo 	if (ring->cur > lim)
166068b8534bSLuigi Rizzo 		errors++;
166117885a7bSLuigi Rizzo 	if (ring->head > lim)
166217885a7bSLuigi Rizzo 		errors++;
166317885a7bSLuigi Rizzo 	if (ring->tail > lim)
166417885a7bSLuigi Rizzo 		errors++;
166568b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
166668b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
166768b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
1668847bf383SLuigi Rizzo 		if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
166917885a7bSLuigi Rizzo 			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
167068b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
167168b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
16724bf50f18SLuigi Rizzo 		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
167368b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
167417885a7bSLuigi Rizzo 			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
167568b8534bSLuigi Rizzo 		}
167668b8534bSLuigi Rizzo 	}
167768b8534bSLuigi Rizzo 	if (errors) {
16788241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
167917885a7bSLuigi Rizzo 		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
168017885a7bSLuigi Rizzo 			kring->name,
168168b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
168217885a7bSLuigi Rizzo 			ring->tail, kring->nr_hwtail);
168317885a7bSLuigi Rizzo 		ring->head = kring->rhead = kring->nr_hwcur;
168417885a7bSLuigi Rizzo 		ring->cur  = kring->rcur  = kring->nr_hwcur;
168517885a7bSLuigi Rizzo 		ring->tail = kring->rtail = kring->nr_hwtail;
168668b8534bSLuigi Rizzo 	}
168768b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
168868b8534bSLuigi Rizzo }
168968b8534bSLuigi Rizzo 
16904bf50f18SLuigi Rizzo /* interpret the ringid and flags fields of an nmreq, by translating them
16914bf50f18SLuigi Rizzo  * into a pair of intervals of ring indices:
16924bf50f18SLuigi Rizzo  *
16934bf50f18SLuigi Rizzo  * [priv->np_txqfirst, priv->np_txqlast) and
16944bf50f18SLuigi Rizzo  * [priv->np_rxqfirst, priv->np_rxqlast)
16954bf50f18SLuigi Rizzo  *
169668b8534bSLuigi Rizzo  */
16974bf50f18SLuigi Rizzo int
16984bf50f18SLuigi Rizzo netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
169968b8534bSLuigi Rizzo {
1700f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1701f0ea3689SLuigi Rizzo 	u_int j, i = ringid & NETMAP_RING_MASK;
1702f0ea3689SLuigi Rizzo 	u_int reg = flags & NR_REG_MASK;
1703*37e3a6d3SLuigi Rizzo 	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
1704847bf383SLuigi Rizzo 	enum txrx t;
170568b8534bSLuigi Rizzo 
1706f0ea3689SLuigi Rizzo 	if (reg == NR_REG_DEFAULT) {
1707f0ea3689SLuigi Rizzo 		/* convert from old ringid to flags */
170868b8534bSLuigi Rizzo 		if (ringid & NETMAP_SW_RING) {
1709f0ea3689SLuigi Rizzo 			reg = NR_REG_SW;
171068b8534bSLuigi Rizzo 		} else if (ringid & NETMAP_HW_RING) {
1711f0ea3689SLuigi Rizzo 			reg = NR_REG_ONE_NIC;
171268b8534bSLuigi Rizzo 		} else {
1713f0ea3689SLuigi Rizzo 			reg = NR_REG_ALL_NIC;
1714f0ea3689SLuigi Rizzo 		}
1715f0ea3689SLuigi Rizzo 		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
1716f0ea3689SLuigi Rizzo 	}
1717*37e3a6d3SLuigi Rizzo 
1718*37e3a6d3SLuigi Rizzo 	if ((flags & NR_PTNETMAP_HOST) && (reg != NR_REG_ALL_NIC ||
1719*37e3a6d3SLuigi Rizzo 			flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
1720*37e3a6d3SLuigi Rizzo 		D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
1721*37e3a6d3SLuigi Rizzo 		return EINVAL;
1722*37e3a6d3SLuigi Rizzo 	}
1723*37e3a6d3SLuigi Rizzo 
1724*37e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
1725*37e3a6d3SLuigi Rizzo 		if (flags & excluded_direction[t]) {
1726*37e3a6d3SLuigi Rizzo 			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1727*37e3a6d3SLuigi Rizzo 			continue;
1728*37e3a6d3SLuigi Rizzo 		}
1729f0ea3689SLuigi Rizzo 		switch (reg) {
1730f0ea3689SLuigi Rizzo 		case NR_REG_ALL_NIC:
1731f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_MASTER:
1732f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_SLAVE:
1733847bf383SLuigi Rizzo 			priv->np_qfirst[t] = 0;
1734847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t);
1735*37e3a6d3SLuigi Rizzo 			ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
1736*37e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1737f0ea3689SLuigi Rizzo 			break;
1738f0ea3689SLuigi Rizzo 		case NR_REG_SW:
1739f0ea3689SLuigi Rizzo 		case NR_REG_NIC_SW:
1740f0ea3689SLuigi Rizzo 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1741f0ea3689SLuigi Rizzo 				D("host rings not supported");
1742f0ea3689SLuigi Rizzo 				return EINVAL;
1743f0ea3689SLuigi Rizzo 			}
1744847bf383SLuigi Rizzo 			priv->np_qfirst[t] = (reg == NR_REG_SW ?
1745847bf383SLuigi Rizzo 				nma_get_nrings(na, t) : 0);
1746847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
1747*37e3a6d3SLuigi Rizzo 			ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
1748*37e3a6d3SLuigi Rizzo 				nm_txrx2str(t),
1749*37e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1750f0ea3689SLuigi Rizzo 			break;
1751f0ea3689SLuigi Rizzo 		case NR_REG_ONE_NIC:
1752f0ea3689SLuigi Rizzo 			if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
1753f0ea3689SLuigi Rizzo 				D("invalid ring id %d", i);
1754f0ea3689SLuigi Rizzo 				return EINVAL;
1755f0ea3689SLuigi Rizzo 			}
1756f0ea3689SLuigi Rizzo 			/* if not enough rings, use the first one */
1757f0ea3689SLuigi Rizzo 			j = i;
1758847bf383SLuigi Rizzo 			if (j >= nma_get_nrings(na, t))
1759f0ea3689SLuigi Rizzo 				j = 0;
1760847bf383SLuigi Rizzo 			priv->np_qfirst[t] = j;
1761847bf383SLuigi Rizzo 			priv->np_qlast[t] = j + 1;
1762*37e3a6d3SLuigi Rizzo 			ND("ONE_NIC: %s %d %d", nm_txrx2str(t),
1763*37e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1764f0ea3689SLuigi Rizzo 			break;
1765f0ea3689SLuigi Rizzo 		default:
1766f0ea3689SLuigi Rizzo 			D("invalid regif type %d", reg);
1767f0ea3689SLuigi Rizzo 			return EINVAL;
176868b8534bSLuigi Rizzo 		}
1769*37e3a6d3SLuigi Rizzo 	}
1770f0ea3689SLuigi Rizzo 	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
17714bf50f18SLuigi Rizzo 
1772ae10d1afSLuigi Rizzo 	if (netmap_verbose) {
1773f0ea3689SLuigi Rizzo 		D("%s: tx [%d,%d) rx [%d,%d) id %d",
17744bf50f18SLuigi Rizzo 			na->name,
1775847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1776847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1777847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1778847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX],
1779f0ea3689SLuigi Rizzo 			i);
1780ae10d1afSLuigi Rizzo 	}
178168b8534bSLuigi Rizzo 	return 0;
178268b8534bSLuigi Rizzo }
178368b8534bSLuigi Rizzo 
17844bf50f18SLuigi Rizzo 
17854bf50f18SLuigi Rizzo /*
17864bf50f18SLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
17874bf50f18SLuigi Rizzo  * for all rings is the same as a single ring.
17884bf50f18SLuigi Rizzo  */
17894bf50f18SLuigi Rizzo static int
17904bf50f18SLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
17914bf50f18SLuigi Rizzo {
17924bf50f18SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
17934bf50f18SLuigi Rizzo 	int error;
1794847bf383SLuigi Rizzo 	enum txrx t;
17954bf50f18SLuigi Rizzo 
17964bf50f18SLuigi Rizzo 	error = netmap_interp_ringid(priv, ringid, flags);
17974bf50f18SLuigi Rizzo 	if (error) {
17984bf50f18SLuigi Rizzo 		return error;
17994bf50f18SLuigi Rizzo 	}
18004bf50f18SLuigi Rizzo 
18014bf50f18SLuigi Rizzo 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
18024bf50f18SLuigi Rizzo 
18034bf50f18SLuigi Rizzo 	/* optimization: count the users registered for more than
18044bf50f18SLuigi Rizzo 	 * one ring, which are the ones sleeping on the global queue.
18054bf50f18SLuigi Rizzo 	 * The default netmap_notify() callback will then
18064bf50f18SLuigi Rizzo 	 * avoid signaling the global queue if nobody is using it
18074bf50f18SLuigi Rizzo 	 */
1808847bf383SLuigi Rizzo 	for_rx_tx(t) {
1809847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1810847bf383SLuigi Rizzo 			na->si_users[t]++;
1811847bf383SLuigi Rizzo 	}
18124bf50f18SLuigi Rizzo 	return 0;
18134bf50f18SLuigi Rizzo }
18144bf50f18SLuigi Rizzo 
1815847bf383SLuigi Rizzo static void
1816847bf383SLuigi Rizzo netmap_unset_ringid(struct netmap_priv_d *priv)
1817847bf383SLuigi Rizzo {
1818847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1819847bf383SLuigi Rizzo 	enum txrx t;
1820847bf383SLuigi Rizzo 
1821847bf383SLuigi Rizzo 	for_rx_tx(t) {
1822847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1823847bf383SLuigi Rizzo 			na->si_users[t]--;
1824847bf383SLuigi Rizzo 		priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1825847bf383SLuigi Rizzo 	}
1826847bf383SLuigi Rizzo 	priv->np_flags = 0;
1827847bf383SLuigi Rizzo 	priv->np_txpoll = 0;
1828847bf383SLuigi Rizzo }
1829847bf383SLuigi Rizzo 
1830847bf383SLuigi Rizzo 
1831*37e3a6d3SLuigi Rizzo /* Set the nr_pending_mode for the requested rings.
1832*37e3a6d3SLuigi Rizzo  * If requested, also try to get exclusive access to the rings, provided
1833*37e3a6d3SLuigi Rizzo  * the rings we want to bind are not exclusively owned by a previous bind.
1834847bf383SLuigi Rizzo  */
1835847bf383SLuigi Rizzo static int
1836*37e3a6d3SLuigi Rizzo netmap_krings_get(struct netmap_priv_d *priv)
1837847bf383SLuigi Rizzo {
1838847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1839847bf383SLuigi Rizzo 	u_int i;
1840847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1841847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1842847bf383SLuigi Rizzo 	enum txrx t;
1843847bf383SLuigi Rizzo 
1844847bf383SLuigi Rizzo 	ND("%s: grabbing tx [%d, %d) rx [%d, %d)",
1845847bf383SLuigi Rizzo 			na->name,
1846847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1847847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1848847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1849847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX]);
1850847bf383SLuigi Rizzo 
1851847bf383SLuigi Rizzo 	/* first round: check that all the requested rings
1852847bf383SLuigi Rizzo 	 * are neither alread exclusively owned, nor we
1853847bf383SLuigi Rizzo 	 * want exclusive ownership when they are already in use
1854847bf383SLuigi Rizzo 	 */
1855847bf383SLuigi Rizzo 	for_rx_tx(t) {
1856847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1857847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1858847bf383SLuigi Rizzo 			if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
1859847bf383SLuigi Rizzo 			    (kring->users && excl))
1860847bf383SLuigi Rizzo 			{
1861847bf383SLuigi Rizzo 				ND("ring %s busy", kring->name);
1862847bf383SLuigi Rizzo 				return EBUSY;
1863847bf383SLuigi Rizzo 			}
1864847bf383SLuigi Rizzo 		}
1865847bf383SLuigi Rizzo 	}
1866847bf383SLuigi Rizzo 
1867*37e3a6d3SLuigi Rizzo 	/* second round: increment usage count (possibly marking them
1868*37e3a6d3SLuigi Rizzo 	 * as exclusive) and set the nr_pending_mode
1869847bf383SLuigi Rizzo 	 */
1870847bf383SLuigi Rizzo 	for_rx_tx(t) {
1871847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1872847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1873847bf383SLuigi Rizzo 			kring->users++;
1874847bf383SLuigi Rizzo 			if (excl)
1875847bf383SLuigi Rizzo 				kring->nr_kflags |= NKR_EXCLUSIVE;
1876*37e3a6d3SLuigi Rizzo 	                kring->nr_pending_mode = NKR_NETMAP_ON;
1877847bf383SLuigi Rizzo 		}
1878847bf383SLuigi Rizzo 	}
1879847bf383SLuigi Rizzo 
1880847bf383SLuigi Rizzo 	return 0;
1881847bf383SLuigi Rizzo 
1882847bf383SLuigi Rizzo }
1883847bf383SLuigi Rizzo 
1884*37e3a6d3SLuigi Rizzo /* Undo netmap_krings_get(). This is done by clearing the exclusive mode
1885*37e3a6d3SLuigi Rizzo  * if was asked on regif, and unset the nr_pending_mode if we are the
1886*37e3a6d3SLuigi Rizzo  * last users of the involved rings. */
1887847bf383SLuigi Rizzo static void
1888*37e3a6d3SLuigi Rizzo netmap_krings_put(struct netmap_priv_d *priv)
1889847bf383SLuigi Rizzo {
1890847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1891847bf383SLuigi Rizzo 	u_int i;
1892847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1893847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1894847bf383SLuigi Rizzo 	enum txrx t;
1895847bf383SLuigi Rizzo 
1896847bf383SLuigi Rizzo 	ND("%s: releasing tx [%d, %d) rx [%d, %d)",
1897847bf383SLuigi Rizzo 			na->name,
1898847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1899847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1900847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1901847bf383SLuigi Rizzo 			priv->np_qlast[MR_RX]);
1902847bf383SLuigi Rizzo 
1903847bf383SLuigi Rizzo 
1904847bf383SLuigi Rizzo 	for_rx_tx(t) {
1905847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1906847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1907847bf383SLuigi Rizzo 			if (excl)
1908847bf383SLuigi Rizzo 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
1909847bf383SLuigi Rizzo 			kring->users--;
1910*37e3a6d3SLuigi Rizzo 			if (kring->users == 0)
1911*37e3a6d3SLuigi Rizzo 				kring->nr_pending_mode = NKR_NETMAP_OFF;
1912847bf383SLuigi Rizzo 		}
1913847bf383SLuigi Rizzo 	}
1914847bf383SLuigi Rizzo }
1915847bf383SLuigi Rizzo 
1916f18be576SLuigi Rizzo /*
1917f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
1918f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
1919ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
19204bf50f18SLuigi Rizzo  *
19214bf50f18SLuigi Rizzo  * The following na callbacks are called in the process:
19224bf50f18SLuigi Rizzo  *
19234bf50f18SLuigi Rizzo  * na->nm_config()			[by netmap_update_config]
19244bf50f18SLuigi Rizzo  * (get current number and size of rings)
19254bf50f18SLuigi Rizzo  *
19264bf50f18SLuigi Rizzo  *  	We have a generic one for linux (netmap_linux_config).
19274bf50f18SLuigi Rizzo  *  	The bwrap has to override this, since it has to forward
19284bf50f18SLuigi Rizzo  *  	the request to the wrapped adapter (netmap_bwrap_config).
19294bf50f18SLuigi Rizzo  *
19304bf50f18SLuigi Rizzo  *
1931847bf383SLuigi Rizzo  * na->nm_krings_create()
19324bf50f18SLuigi Rizzo  * (create and init the krings array)
19334bf50f18SLuigi Rizzo  *
19344bf50f18SLuigi Rizzo  * 	One of the following:
19354bf50f18SLuigi Rizzo  *
19364bf50f18SLuigi Rizzo  *	* netmap_hw_krings_create, 			(hw ports)
19374bf50f18SLuigi Rizzo  *		creates the standard layout for the krings
19384bf50f18SLuigi Rizzo  * 		and adds the mbq (used for the host rings).
19394bf50f18SLuigi Rizzo  *
19404bf50f18SLuigi Rizzo  * 	* netmap_vp_krings_create			(VALE ports)
19414bf50f18SLuigi Rizzo  * 		add leases and scratchpads
19424bf50f18SLuigi Rizzo  *
19434bf50f18SLuigi Rizzo  * 	* netmap_pipe_krings_create			(pipes)
19444bf50f18SLuigi Rizzo  * 		create the krings and rings of both ends and
19454bf50f18SLuigi Rizzo  * 		cross-link them
19464bf50f18SLuigi Rizzo  *
19474bf50f18SLuigi Rizzo  *      * netmap_monitor_krings_create 			(monitors)
19484bf50f18SLuigi Rizzo  *      	avoid allocating the mbq
19494bf50f18SLuigi Rizzo  *
19504bf50f18SLuigi Rizzo  *      * netmap_bwrap_krings_create			(bwraps)
19514bf50f18SLuigi Rizzo  *      	create both the brap krings array,
19524bf50f18SLuigi Rizzo  *      	the krings array of the wrapped adapter, and
19534bf50f18SLuigi Rizzo  *      	(if needed) the fake array for the host adapter
19544bf50f18SLuigi Rizzo  *
19554bf50f18SLuigi Rizzo  * na->nm_register(, 1)
19564bf50f18SLuigi Rizzo  * (put the adapter in netmap mode)
19574bf50f18SLuigi Rizzo  *
19584bf50f18SLuigi Rizzo  * 	This may be one of the following:
19594bf50f18SLuigi Rizzo  *
1960*37e3a6d3SLuigi Rizzo  * 	* netmap_hw_reg				        (hw ports)
19614bf50f18SLuigi Rizzo  * 		checks that the ifp is still there, then calls
19624bf50f18SLuigi Rizzo  * 		the hardware specific callback;
19634bf50f18SLuigi Rizzo  *
19644bf50f18SLuigi Rizzo  * 	* netmap_vp_reg					(VALE ports)
19654bf50f18SLuigi Rizzo  *		If the port is connected to a bridge,
19664bf50f18SLuigi Rizzo  *		set the NAF_NETMAP_ON flag under the
19674bf50f18SLuigi Rizzo  *		bridge write lock.
19684bf50f18SLuigi Rizzo  *
19694bf50f18SLuigi Rizzo  *	* netmap_pipe_reg				(pipes)
19704bf50f18SLuigi Rizzo  *		inform the other pipe end that it is no
1971453130d9SPedro F. Giffuni  *		longer responsible for the lifetime of this
19724bf50f18SLuigi Rizzo  *		pipe end
19734bf50f18SLuigi Rizzo  *
19744bf50f18SLuigi Rizzo  *	* netmap_monitor_reg				(monitors)
19754bf50f18SLuigi Rizzo  *		intercept the sync callbacks of the monitored
19764bf50f18SLuigi Rizzo  *		rings
19774bf50f18SLuigi Rizzo  *
1978*37e3a6d3SLuigi Rizzo  *	* netmap_bwrap_reg				(bwraps)
19794bf50f18SLuigi Rizzo  *		cross-link the bwrap and hwna rings,
19804bf50f18SLuigi Rizzo  *		forward the request to the hwna, override
19814bf50f18SLuigi Rizzo  *		the hwna notify callback (to get the frames
19824bf50f18SLuigi Rizzo  *		coming from outside go through the bridge).
19834bf50f18SLuigi Rizzo  *
19844bf50f18SLuigi Rizzo  *
1985f18be576SLuigi Rizzo  */
1986847bf383SLuigi Rizzo int
1987f9790aebSLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1988847bf383SLuigi Rizzo 	uint16_t ringid, uint32_t flags)
1989f18be576SLuigi Rizzo {
1990f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
1991847bf383SLuigi Rizzo 	int error;
1992f18be576SLuigi Rizzo 
1993ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1994f18be576SLuigi Rizzo 	/* ring configuration may have changed, fetch from the card */
1995f18be576SLuigi Rizzo 	netmap_update_config(na);
1996f9790aebSLuigi Rizzo 	priv->np_na = na;     /* store the reference */
1997f0ea3689SLuigi Rizzo 	error = netmap_set_ringid(priv, ringid, flags);
1998f18be576SLuigi Rizzo 	if (error)
1999847bf383SLuigi Rizzo 		goto err;
2000847bf383SLuigi Rizzo 	error = netmap_mem_finalize(na->nm_mem, na);
2001ce3ee1e7SLuigi Rizzo 	if (error)
2002847bf383SLuigi Rizzo 		goto err;
2003847bf383SLuigi Rizzo 
2004847bf383SLuigi Rizzo 	if (na->active_fds == 0) {
2005847bf383SLuigi Rizzo 		/*
2006847bf383SLuigi Rizzo 		 * If this is the first registration of the adapter,
2007*37e3a6d3SLuigi Rizzo 		 * create the  in-kernel view of the netmap rings,
2008847bf383SLuigi Rizzo 		 * the netmap krings.
2009847bf383SLuigi Rizzo 		 */
2010847bf383SLuigi Rizzo 
2011847bf383SLuigi Rizzo 		/*
2012847bf383SLuigi Rizzo 		 * Depending on the adapter, this may also create
2013847bf383SLuigi Rizzo 		 * the netmap rings themselves
2014847bf383SLuigi Rizzo 		 */
2015847bf383SLuigi Rizzo 		error = na->nm_krings_create(na);
2016847bf383SLuigi Rizzo 		if (error)
2017847bf383SLuigi Rizzo 			goto err_drop_mem;
2018847bf383SLuigi Rizzo 
2019ce3ee1e7SLuigi Rizzo 	}
2020847bf383SLuigi Rizzo 
2021*37e3a6d3SLuigi Rizzo 	/* now the krings must exist and we can check whether some
2022*37e3a6d3SLuigi Rizzo 	 * previous bind has exclusive ownership on them, and set
2023*37e3a6d3SLuigi Rizzo 	 * nr_pending_mode
2024847bf383SLuigi Rizzo 	 */
2025*37e3a6d3SLuigi Rizzo 	error = netmap_krings_get(priv);
2026847bf383SLuigi Rizzo 	if (error)
2027*37e3a6d3SLuigi Rizzo 		goto err_del_krings;
2028*37e3a6d3SLuigi Rizzo 
2029*37e3a6d3SLuigi Rizzo 	/* create all needed missing netmap rings */
2030*37e3a6d3SLuigi Rizzo 	error = netmap_mem_rings_create(na);
2031*37e3a6d3SLuigi Rizzo 	if (error)
2032*37e3a6d3SLuigi Rizzo 		goto err_rel_excl;
2033847bf383SLuigi Rizzo 
2034847bf383SLuigi Rizzo 	/* in all cases, create a new netmap if */
2035847bf383SLuigi Rizzo 	nifp = netmap_mem_if_new(na);
2036847bf383SLuigi Rizzo 	if (nifp == NULL) {
2037f18be576SLuigi Rizzo 		error = ENOMEM;
2038*37e3a6d3SLuigi Rizzo 		goto err_del_rings;
2039ce3ee1e7SLuigi Rizzo 	}
2040847bf383SLuigi Rizzo 
2041*37e3a6d3SLuigi Rizzo 	if (na->active_fds == 0) {
204289cc2556SLuigi Rizzo 		/* cache the allocator info in the na */
2043*37e3a6d3SLuigi Rizzo 		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
2044847bf383SLuigi Rizzo 		if (error)
2045847bf383SLuigi Rizzo 			goto err_del_if;
2046*37e3a6d3SLuigi Rizzo 		ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
2047*37e3a6d3SLuigi Rizzo 					    na->na_lut.objsize);
2048f18be576SLuigi Rizzo 	}
2049847bf383SLuigi Rizzo 
2050*37e3a6d3SLuigi Rizzo 	if (nm_kring_pending(priv)) {
2051*37e3a6d3SLuigi Rizzo 		/* Some kring is switching mode, tell the adapter to
2052*37e3a6d3SLuigi Rizzo 		 * react on this. */
2053*37e3a6d3SLuigi Rizzo 		error = na->nm_register(na, 1);
2054*37e3a6d3SLuigi Rizzo 		if (error)
2055*37e3a6d3SLuigi Rizzo 			goto err_put_lut;
2056*37e3a6d3SLuigi Rizzo 	}
2057*37e3a6d3SLuigi Rizzo 
2058*37e3a6d3SLuigi Rizzo 	/* Commit the reference. */
2059*37e3a6d3SLuigi Rizzo 	na->active_fds++;
2060*37e3a6d3SLuigi Rizzo 
2061ce3ee1e7SLuigi Rizzo 	/*
2062847bf383SLuigi Rizzo 	 * advertise that the interface is ready by setting np_nifp.
2063847bf383SLuigi Rizzo 	 * The barrier is needed because readers (poll, *SYNC and mmap)
2064ce3ee1e7SLuigi Rizzo 	 * check for priv->np_nifp != NULL without locking
2065ce3ee1e7SLuigi Rizzo 	 */
2066847bf383SLuigi Rizzo 	mb(); /* make sure previous writes are visible to all CPUs */
2067ce3ee1e7SLuigi Rizzo 	priv->np_nifp = nifp;
2068847bf383SLuigi Rizzo 
2069847bf383SLuigi Rizzo 	return 0;
2070847bf383SLuigi Rizzo 
2071*37e3a6d3SLuigi Rizzo err_put_lut:
2072*37e3a6d3SLuigi Rizzo 	if (na->active_fds == 0)
2073847bf383SLuigi Rizzo 		memset(&na->na_lut, 0, sizeof(na->na_lut));
2074*37e3a6d3SLuigi Rizzo err_del_if:
2075847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
2076847bf383SLuigi Rizzo err_rel_excl:
2077*37e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
2078847bf383SLuigi Rizzo err_del_rings:
2079847bf383SLuigi Rizzo 	netmap_mem_rings_delete(na);
2080847bf383SLuigi Rizzo err_del_krings:
2081847bf383SLuigi Rizzo 	if (na->active_fds == 0)
2082847bf383SLuigi Rizzo 		na->nm_krings_delete(na);
2083847bf383SLuigi Rizzo err_drop_mem:
2084847bf383SLuigi Rizzo 	netmap_mem_deref(na->nm_mem, na);
2085847bf383SLuigi Rizzo err:
2086847bf383SLuigi Rizzo 	priv->np_na = NULL;
2087847bf383SLuigi Rizzo 	return error;
2088ce3ee1e7SLuigi Rizzo }
2089847bf383SLuigi Rizzo 
2090847bf383SLuigi Rizzo 
2091847bf383SLuigi Rizzo /*
2092*37e3a6d3SLuigi Rizzo  * update kring and ring at the end of rxsync/txsync.
2093847bf383SLuigi Rizzo  */
2094847bf383SLuigi Rizzo static inline void
2095*37e3a6d3SLuigi Rizzo nm_sync_finalize(struct netmap_kring *kring)
2096847bf383SLuigi Rizzo {
2097*37e3a6d3SLuigi Rizzo 	/*
2098*37e3a6d3SLuigi Rizzo 	 * Update ring tail to what the kernel knows
2099*37e3a6d3SLuigi Rizzo 	 * After txsync: head/rhead/hwcur might be behind cur/rcur
2100*37e3a6d3SLuigi Rizzo 	 * if no carrier.
2101*37e3a6d3SLuigi Rizzo 	 */
2102847bf383SLuigi Rizzo 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
2103847bf383SLuigi Rizzo 
2104847bf383SLuigi Rizzo 	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
2105847bf383SLuigi Rizzo 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
2106847bf383SLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
2107847bf383SLuigi Rizzo }
2108847bf383SLuigi Rizzo 
210968b8534bSLuigi Rizzo /*
211068b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
211168b8534bSLuigi Rizzo  *
211268b8534bSLuigi Rizzo  * Following a list of accepted commands:
211368b8534bSLuigi Rizzo  * - NIOCGINFO
211468b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
211568b8534bSLuigi Rizzo  * - NIOCREGIF
211668b8534bSLuigi Rizzo  * - NIOCTXSYNC
211768b8534bSLuigi Rizzo  * - NIOCRXSYNC
211868b8534bSLuigi Rizzo  *
211968b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
212068b8534bSLuigi Rizzo  */
2121f9790aebSLuigi Rizzo int
2122*37e3a6d3SLuigi Rizzo netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
212368b8534bSLuigi Rizzo {
212468b8534bSLuigi Rizzo 	struct nmreq *nmr = (struct nmreq *) data;
2125ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
2126*37e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
2127*37e3a6d3SLuigi Rizzo 	int error = 0;
2128f0ea3689SLuigi Rizzo 	u_int i, qfirst, qlast;
212968b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2130ce3ee1e7SLuigi Rizzo 	struct netmap_kring *krings;
2131847bf383SLuigi Rizzo 	enum txrx t;
213268b8534bSLuigi Rizzo 
213317885a7bSLuigi Rizzo 	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
213417885a7bSLuigi Rizzo 		/* truncate name */
213517885a7bSLuigi Rizzo 		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
213617885a7bSLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
213717885a7bSLuigi Rizzo 			D("API mismatch for %s got %d need %d",
213817885a7bSLuigi Rizzo 				nmr->nr_name,
213917885a7bSLuigi Rizzo 				nmr->nr_version, NETMAP_API);
214017885a7bSLuigi Rizzo 			nmr->nr_version = NETMAP_API;
2141f0ea3689SLuigi Rizzo 		}
2142f0ea3689SLuigi Rizzo 		if (nmr->nr_version < NETMAP_MIN_API ||
2143f0ea3689SLuigi Rizzo 		    nmr->nr_version > NETMAP_MAX_API) {
214417885a7bSLuigi Rizzo 			return EINVAL;
214517885a7bSLuigi Rizzo 		}
214617885a7bSLuigi Rizzo 	}
214768b8534bSLuigi Rizzo 
214868b8534bSLuigi Rizzo 	switch (cmd) {
214968b8534bSLuigi Rizzo 	case NIOCGINFO:		/* return capabilities etc */
2150f18be576SLuigi Rizzo 		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2151f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2152f18be576SLuigi Rizzo 			break;
2153f18be576SLuigi Rizzo 		}
2154ce3ee1e7SLuigi Rizzo 
2155ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2156ce3ee1e7SLuigi Rizzo 		do {
2157ce3ee1e7SLuigi Rizzo 			/* memsize is always valid */
2158ce3ee1e7SLuigi Rizzo 			struct netmap_mem_d *nmd = &nm_mem;
2159ce3ee1e7SLuigi Rizzo 			u_int memflags;
2160ce3ee1e7SLuigi Rizzo 
2161ce3ee1e7SLuigi Rizzo 			if (nmr->nr_name[0] != '\0') {
2162*37e3a6d3SLuigi Rizzo 
2163ce3ee1e7SLuigi Rizzo 				/* get a refcount */
2164*37e3a6d3SLuigi Rizzo 				error = netmap_get_na(nmr, &na, &ifp, 1 /* create */);
2165*37e3a6d3SLuigi Rizzo 				if (error) {
2166*37e3a6d3SLuigi Rizzo 					na = NULL;
2167*37e3a6d3SLuigi Rizzo 					ifp = NULL;
21688241616dSLuigi Rizzo 					break;
2169*37e3a6d3SLuigi Rizzo 				}
2170f9790aebSLuigi Rizzo 				nmd = na->nm_mem; /* get memory allocator */
2171ce3ee1e7SLuigi Rizzo 			}
2172ce3ee1e7SLuigi Rizzo 
2173f0ea3689SLuigi Rizzo 			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
2174f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2175ce3ee1e7SLuigi Rizzo 			if (error)
2176ce3ee1e7SLuigi Rizzo 				break;
2177ce3ee1e7SLuigi Rizzo 			if (na == NULL) /* only memory info */
2178ce3ee1e7SLuigi Rizzo 				break;
21798241616dSLuigi Rizzo 			nmr->nr_offset = 0;
21808241616dSLuigi Rizzo 			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2181ae10d1afSLuigi Rizzo 			netmap_update_config(na);
2182d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2183d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
218464ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
218564ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2186ce3ee1e7SLuigi Rizzo 		} while (0);
2187*37e3a6d3SLuigi Rizzo 		netmap_unget_na(na, ifp);
2188ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
218968b8534bSLuigi Rizzo 		break;
219068b8534bSLuigi Rizzo 
219168b8534bSLuigi Rizzo 	case NIOCREGIF:
2192f18be576SLuigi Rizzo 		/* possibly attach/detach NIC and VALE switch */
2193f18be576SLuigi Rizzo 		i = nmr->nr_cmd;
2194f9790aebSLuigi Rizzo 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
21954bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_VNET_HDR
21964bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_NEWIF
2197*37e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_DELIF
2198*37e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_ON
2199*37e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_OFF) {
2200f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2201f18be576SLuigi Rizzo 			break;
2202*37e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) {
2203*37e3a6d3SLuigi Rizzo 			error = ptnetmap_ctl(nmr, priv->np_na);
2204*37e3a6d3SLuigi Rizzo 			break;
2205*37e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_VNET_HDR_GET) {
2206*37e3a6d3SLuigi Rizzo 			struct ifnet *ifp;
2207*37e3a6d3SLuigi Rizzo 
2208*37e3a6d3SLuigi Rizzo 			NMG_LOCK();
2209*37e3a6d3SLuigi Rizzo 			error = netmap_get_na(nmr, &na, &ifp, 0);
2210*37e3a6d3SLuigi Rizzo 			if (na && !error) {
2211*37e3a6d3SLuigi Rizzo 				nmr->nr_arg1 = na->virt_hdr_len;
2212*37e3a6d3SLuigi Rizzo 			}
2213*37e3a6d3SLuigi Rizzo 			netmap_unget_na(na, ifp);
2214*37e3a6d3SLuigi Rizzo 			NMG_UNLOCK();
2215*37e3a6d3SLuigi Rizzo 			break;
2216f18be576SLuigi Rizzo 		} else if (i != 0) {
2217f18be576SLuigi Rizzo 			D("nr_cmd must be 0 not %d", i);
2218f18be576SLuigi Rizzo 			error = EINVAL;
2219f18be576SLuigi Rizzo 			break;
2220f18be576SLuigi Rizzo 		}
2221f18be576SLuigi Rizzo 
22228241616dSLuigi Rizzo 		/* protect access to priv from concurrent NIOCREGIF */
2223ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2224ce3ee1e7SLuigi Rizzo 		do {
2225ce3ee1e7SLuigi Rizzo 			u_int memflags;
2226*37e3a6d3SLuigi Rizzo 			struct ifnet *ifp;
2227ce3ee1e7SLuigi Rizzo 
2228847bf383SLuigi Rizzo 			if (priv->np_nifp != NULL) {	/* thread already registered */
2229f0ea3689SLuigi Rizzo 				error = EBUSY;
2230506cc70cSLuigi Rizzo 				break;
2231506cc70cSLuigi Rizzo 			}
223268b8534bSLuigi Rizzo 			/* find the interface and a reference */
2233*37e3a6d3SLuigi Rizzo 			error = netmap_get_na(nmr, &na, &ifp,
2234*37e3a6d3SLuigi Rizzo 					      1 /* create */); /* keep reference */
223568b8534bSLuigi Rizzo 			if (error)
2236ce3ee1e7SLuigi Rizzo 				break;
2237f9790aebSLuigi Rizzo 			if (NETMAP_OWNED_BY_KERN(na)) {
2238*37e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2239ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2240ce3ee1e7SLuigi Rizzo 				break;
2241f196ce38SLuigi Rizzo 			}
2242*37e3a6d3SLuigi Rizzo 
2243*37e3a6d3SLuigi Rizzo 			if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
2244*37e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2245*37e3a6d3SLuigi Rizzo 				error = EIO;
2246*37e3a6d3SLuigi Rizzo 				break;
2247*37e3a6d3SLuigi Rizzo 			}
2248*37e3a6d3SLuigi Rizzo 
2249847bf383SLuigi Rizzo 			error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
2250847bf383SLuigi Rizzo 			if (error) {    /* reg. failed, release priv and ref */
2251*37e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2252ce3ee1e7SLuigi Rizzo 				break;
225368b8534bSLuigi Rizzo 			}
2254847bf383SLuigi Rizzo 			nifp = priv->np_nifp;
2255f0ea3689SLuigi Rizzo 			priv->np_td = td; // XXX kqueue, debugging only
225668b8534bSLuigi Rizzo 
225768b8534bSLuigi Rizzo 			/* return the offset of the netmap_if object */
2258d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2259d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
226064ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
226164ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2262f0ea3689SLuigi Rizzo 			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
2263f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2264ce3ee1e7SLuigi Rizzo 			if (error) {
2265847bf383SLuigi Rizzo 				netmap_do_unregif(priv);
2266*37e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2267ce3ee1e7SLuigi Rizzo 				break;
2268ce3ee1e7SLuigi Rizzo 			}
2269ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE) {
22703d819cb6SLuigi Rizzo 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2271ce3ee1e7SLuigi Rizzo 			}
2272847bf383SLuigi Rizzo 			for_rx_tx(t) {
2273847bf383SLuigi Rizzo 				priv->np_si[t] = nm_si_user(priv, t) ?
2274847bf383SLuigi Rizzo 					&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si;
2275847bf383SLuigi Rizzo 			}
2276f0ea3689SLuigi Rizzo 
2277f0ea3689SLuigi Rizzo 			if (nmr->nr_arg3) {
2278*37e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2279f0ea3689SLuigi Rizzo 					D("requested %d extra buffers", nmr->nr_arg3);
2280f0ea3689SLuigi Rizzo 				nmr->nr_arg3 = netmap_extra_alloc(na,
2281f0ea3689SLuigi Rizzo 					&nifp->ni_bufs_head, nmr->nr_arg3);
2282*37e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2283f0ea3689SLuigi Rizzo 					D("got %d extra buffers", nmr->nr_arg3);
2284f0ea3689SLuigi Rizzo 			}
2285ce3ee1e7SLuigi Rizzo 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2286*37e3a6d3SLuigi Rizzo 
2287*37e3a6d3SLuigi Rizzo 			/* store ifp reference so that priv destructor may release it */
2288*37e3a6d3SLuigi Rizzo 			priv->np_ifp = ifp;
2289ce3ee1e7SLuigi Rizzo 		} while (0);
2290ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
229168b8534bSLuigi Rizzo 		break;
229268b8534bSLuigi Rizzo 
229368b8534bSLuigi Rizzo 	case NIOCTXSYNC:
229468b8534bSLuigi Rizzo 	case NIOCRXSYNC:
22958241616dSLuigi Rizzo 		nifp = priv->np_nifp;
22968241616dSLuigi Rizzo 
22978241616dSLuigi Rizzo 		if (nifp == NULL) {
2298506cc70cSLuigi Rizzo 			error = ENXIO;
2299506cc70cSLuigi Rizzo 			break;
2300506cc70cSLuigi Rizzo 		}
23016641c68bSLuigi Rizzo 		mb(); /* make sure following reads are not from cache */
23028241616dSLuigi Rizzo 
2303f9790aebSLuigi Rizzo 		na = priv->np_na;      /* we have a reference */
23048241616dSLuigi Rizzo 
2305f9790aebSLuigi Rizzo 		if (na == NULL) {
2306f9790aebSLuigi Rizzo 			D("Internal error: nifp != NULL && na == NULL");
23078241616dSLuigi Rizzo 			error = ENXIO;
23088241616dSLuigi Rizzo 			break;
23098241616dSLuigi Rizzo 		}
23108241616dSLuigi Rizzo 
2311847bf383SLuigi Rizzo 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
2312847bf383SLuigi Rizzo 		krings = NMR(na, t);
2313847bf383SLuigi Rizzo 		qfirst = priv->np_qfirst[t];
2314847bf383SLuigi Rizzo 		qlast = priv->np_qlast[t];
231568b8534bSLuigi Rizzo 
2316f0ea3689SLuigi Rizzo 		for (i = qfirst; i < qlast; i++) {
2317ce3ee1e7SLuigi Rizzo 			struct netmap_kring *kring = krings + i;
2318*37e3a6d3SLuigi Rizzo 			struct netmap_ring *ring = kring->ring;
2319*37e3a6d3SLuigi Rizzo 
2320*37e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
2321*37e3a6d3SLuigi Rizzo 				error = (error ? EIO : 0);
2322*37e3a6d3SLuigi Rizzo 				continue;
2323ce3ee1e7SLuigi Rizzo 			}
2324*37e3a6d3SLuigi Rizzo 
232568b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
232668b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
23273c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
2328*37e3a6d3SLuigi Rizzo 					    i, ring->cur,
232968b8534bSLuigi Rizzo 					    kring->nr_hwcur);
2330*37e3a6d3SLuigi Rizzo 				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
233117885a7bSLuigi Rizzo 					netmap_ring_reinit(kring);
2332847bf383SLuigi Rizzo 				} else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
2333*37e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
233417885a7bSLuigi Rizzo 				}
233568b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
23363c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
2337*37e3a6d3SLuigi Rizzo 					    i, ring->cur,
233868b8534bSLuigi Rizzo 					    kring->nr_hwcur);
233968b8534bSLuigi Rizzo 			} else {
2340*37e3a6d3SLuigi Rizzo 				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2341847bf383SLuigi Rizzo 					netmap_ring_reinit(kring);
2342847bf383SLuigi Rizzo 				} else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
2343*37e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
2344847bf383SLuigi Rizzo 				}
2345*37e3a6d3SLuigi Rizzo 				microtime(&ring->ts);
234668b8534bSLuigi Rizzo 			}
2347ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
234868b8534bSLuigi Rizzo 		}
234968b8534bSLuigi Rizzo 
235068b8534bSLuigi Rizzo 		break;
235168b8534bSLuigi Rizzo 
2352847bf383SLuigi Rizzo #ifdef WITH_VALE
23534bf50f18SLuigi Rizzo 	case NIOCCONFIG:
23544bf50f18SLuigi Rizzo 		error = netmap_bdg_config(nmr);
23554bf50f18SLuigi Rizzo 		break;
2356847bf383SLuigi Rizzo #endif
2357f196ce38SLuigi Rizzo #ifdef __FreeBSD__
235889e3fd52SLuigi Rizzo 	case FIONBIO:
235989e3fd52SLuigi Rizzo 	case FIOASYNC:
236089e3fd52SLuigi Rizzo 		ND("FIONBIO/FIOASYNC are no-ops");
236189e3fd52SLuigi Rizzo 		break;
236289e3fd52SLuigi Rizzo 
236368b8534bSLuigi Rizzo 	case BIOCIMMEDIATE:
236468b8534bSLuigi Rizzo 	case BIOCGHDRCMPLT:
236568b8534bSLuigi Rizzo 	case BIOCSHDRCMPLT:
236668b8534bSLuigi Rizzo 	case BIOCSSEESENT:
236768b8534bSLuigi Rizzo 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
236868b8534bSLuigi Rizzo 		break;
236968b8534bSLuigi Rizzo 
2370babc7c12SLuigi Rizzo 	default:	/* allow device-specific ioctls */
237168b8534bSLuigi Rizzo 	    {
2372b3d37588SLuigi Rizzo 		struct ifnet *ifp = ifunit_ref(nmr->nr_name);
2373b3d37588SLuigi Rizzo 		if (ifp == NULL) {
2374b3d37588SLuigi Rizzo 			error = ENXIO;
2375b3d37588SLuigi Rizzo 		} else {
237668b8534bSLuigi Rizzo 			struct socket so;
2377ce3ee1e7SLuigi Rizzo 
237868b8534bSLuigi Rizzo 			bzero(&so, sizeof(so));
237968b8534bSLuigi Rizzo 			so.so_vnet = ifp->if_vnet;
238068b8534bSLuigi Rizzo 			// so->so_proto not null.
238168b8534bSLuigi Rizzo 			error = ifioctl(&so, cmd, data, td);
2382b3d37588SLuigi Rizzo 			if_rele(ifp);
2383b3d37588SLuigi Rizzo 		}
2384babc7c12SLuigi Rizzo 		break;
238568b8534bSLuigi Rizzo 	    }
2386f196ce38SLuigi Rizzo 
2387f196ce38SLuigi Rizzo #else /* linux */
2388f196ce38SLuigi Rizzo 	default:
2389f196ce38SLuigi Rizzo 		error = EOPNOTSUPP;
2390f196ce38SLuigi Rizzo #endif /* linux */
239168b8534bSLuigi Rizzo 	}
239268b8534bSLuigi Rizzo 
239368b8534bSLuigi Rizzo 	return (error);
239468b8534bSLuigi Rizzo }
239568b8534bSLuigi Rizzo 
239668b8534bSLuigi Rizzo 
239768b8534bSLuigi Rizzo /*
239868b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
239968b8534bSLuigi Rizzo  *
240068b8534bSLuigi Rizzo  * Can be called for one or more queues.
240168b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
240268b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2403ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
240468b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
240568b8534bSLuigi Rizzo  * are done through callbacks.
2406f196ce38SLuigi Rizzo  *
240701c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
240801c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
240901c7d25fSLuigi Rizzo  * hidden argument.
241068b8534bSLuigi Rizzo  */
2411f9790aebSLuigi Rizzo int
2412*37e3a6d3SLuigi Rizzo netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
241368b8534bSLuigi Rizzo {
241468b8534bSLuigi Rizzo 	struct netmap_adapter *na;
241568b8534bSLuigi Rizzo 	struct netmap_kring *kring;
2416*37e3a6d3SLuigi Rizzo 	struct netmap_ring *ring;
2417847bf383SLuigi Rizzo 	u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
2418847bf383SLuigi Rizzo #define want_tx want[NR_TX]
2419847bf383SLuigi Rizzo #define want_rx want[NR_RX]
242017885a7bSLuigi Rizzo 	struct mbq q;		/* packets from hw queues to host stack */
2421847bf383SLuigi Rizzo 	enum txrx t;
242201c7d25fSLuigi Rizzo 
2423f9790aebSLuigi Rizzo 	/*
2424f9790aebSLuigi Rizzo 	 * In order to avoid nested locks, we need to "double check"
2425f9790aebSLuigi Rizzo 	 * txsync and rxsync if we decide to do a selrecord().
2426f9790aebSLuigi Rizzo 	 * retry_tx (and retry_rx, later) prevent looping forever.
2427f9790aebSLuigi Rizzo 	 */
242817885a7bSLuigi Rizzo 	int retry_tx = 1, retry_rx = 1;
2429ce3ee1e7SLuigi Rizzo 
2430*37e3a6d3SLuigi Rizzo 	/* transparent mode: send_down is 1 if we have found some
2431*37e3a6d3SLuigi Rizzo 	 * packets to forward during the rx scan and we have not
2432*37e3a6d3SLuigi Rizzo 	 * sent them down to the nic yet
2433f0ea3689SLuigi Rizzo 	 */
2434*37e3a6d3SLuigi Rizzo 	int send_down = 0;
2435*37e3a6d3SLuigi Rizzo 
2436*37e3a6d3SLuigi Rizzo 	mbq_init(&q);
243768b8534bSLuigi Rizzo 
24388241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
24398241616dSLuigi Rizzo 		D("No if registered");
24408241616dSLuigi Rizzo 		return POLLERR;
24418241616dSLuigi Rizzo 	}
2442847bf383SLuigi Rizzo 	mb(); /* make sure following reads are not from cache */
24438241616dSLuigi Rizzo 
2444f9790aebSLuigi Rizzo 	na = priv->np_na;
2445f9790aebSLuigi Rizzo 
24464bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
244768b8534bSLuigi Rizzo 		return POLLERR;
244868b8534bSLuigi Rizzo 
244968b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
24504bf50f18SLuigi Rizzo 		D("device %s events 0x%x", na->name, events);
245168b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
245268b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
245368b8534bSLuigi Rizzo 
245468b8534bSLuigi Rizzo 	/*
2455f9790aebSLuigi Rizzo 	 * check_all_{tx|rx} are set if the card has more than one queue AND
2456f9790aebSLuigi Rizzo 	 * the file descriptor is bound to all of them. If so, we sleep on
2457ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
2458ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
2459ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
2460ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
246168b8534bSLuigi Rizzo 	 *
246268b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
246368b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
246468b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
246568b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
246668b8534bSLuigi Rizzo 	 */
2467847bf383SLuigi Rizzo 	check_all_tx = nm_si_user(priv, NR_TX);
2468847bf383SLuigi Rizzo 	check_all_rx = nm_si_user(priv, NR_RX);
246964ae02c3SLuigi Rizzo 
247068b8534bSLuigi Rizzo 	/*
2471f9790aebSLuigi Rizzo 	 * We start with a lock free round which is cheap if we have
2472f9790aebSLuigi Rizzo 	 * slots available. If this fails, then lock and call the sync
247368b8534bSLuigi Rizzo 	 * routines.
247468b8534bSLuigi Rizzo 	 */
2475*37e3a6d3SLuigi Rizzo #if 1 /* new code- call rx if any of the ring needs to release or read buffers */
2476*37e3a6d3SLuigi Rizzo 	if (want_tx) {
2477*37e3a6d3SLuigi Rizzo 		t = NR_TX;
2478*37e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
2479*37e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
2480*37e3a6d3SLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
2481*37e3a6d3SLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
2482*37e3a6d3SLuigi Rizzo 				revents |= want[t];
2483*37e3a6d3SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
2484*37e3a6d3SLuigi Rizzo 			}
2485*37e3a6d3SLuigi Rizzo 		}
2486*37e3a6d3SLuigi Rizzo 	}
2487*37e3a6d3SLuigi Rizzo 	if (want_rx) {
2488*37e3a6d3SLuigi Rizzo 		want_rx = 0; /* look for a reason to run the handlers */
2489*37e3a6d3SLuigi Rizzo 		t = NR_RX;
2490*37e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
2491*37e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
2492*37e3a6d3SLuigi Rizzo 			if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
2493*37e3a6d3SLuigi Rizzo 			    || kring->rhead != kring->ring->head /* release buffers */) {
2494*37e3a6d3SLuigi Rizzo 				want_rx = 1;
2495*37e3a6d3SLuigi Rizzo 			}
2496*37e3a6d3SLuigi Rizzo 		}
2497*37e3a6d3SLuigi Rizzo 		if (!want_rx)
2498*37e3a6d3SLuigi Rizzo 			revents |= events & (POLLIN | POLLRDNORM); /* we have data */
2499*37e3a6d3SLuigi Rizzo 	}
2500*37e3a6d3SLuigi Rizzo #else /* old code */
2501847bf383SLuigi Rizzo 	for_rx_tx(t) {
2502847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
2503847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
250417885a7bSLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
250517885a7bSLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
2506847bf383SLuigi Rizzo 				revents |= want[t];
2507847bf383SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
250868b8534bSLuigi Rizzo 			}
250968b8534bSLuigi Rizzo 		}
251068b8534bSLuigi Rizzo 	}
2511*37e3a6d3SLuigi Rizzo #endif /* old code */
251268b8534bSLuigi Rizzo 
251368b8534bSLuigi Rizzo 	/*
251417885a7bSLuigi Rizzo 	 * If we want to push packets out (priv->np_txpoll) or
251517885a7bSLuigi Rizzo 	 * want_tx is still set, we must issue txsync calls
251617885a7bSLuigi Rizzo 	 * (on all rings, to avoid that the tx rings stall).
2517f9790aebSLuigi Rizzo 	 * XXX should also check cur != hwcur on the tx rings.
2518f9790aebSLuigi Rizzo 	 * Fortunately, normal tx mode has np_txpoll set.
251968b8534bSLuigi Rizzo 	 */
252068b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
252117885a7bSLuigi Rizzo 		/*
252217885a7bSLuigi Rizzo 		 * The first round checks if anyone is ready, if not
252317885a7bSLuigi Rizzo 		 * do a selrecord and another round to handle races.
252417885a7bSLuigi Rizzo 		 * want_tx goes to 0 if any space is found, and is
252517885a7bSLuigi Rizzo 		 * used to skip rings with no pending transmissions.
2526ce3ee1e7SLuigi Rizzo 		 */
2527091fd0abSLuigi Rizzo flush_tx:
2528*37e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
252917885a7bSLuigi Rizzo 			int found = 0;
253017885a7bSLuigi Rizzo 
253168b8534bSLuigi Rizzo 			kring = &na->tx_rings[i];
2532*37e3a6d3SLuigi Rizzo 			ring = kring->ring;
2533*37e3a6d3SLuigi Rizzo 
2534*37e3a6d3SLuigi Rizzo 			if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
253568b8534bSLuigi Rizzo 				continue;
2536*37e3a6d3SLuigi Rizzo 
2537*37e3a6d3SLuigi Rizzo 			if (nm_kr_tryget(kring, 1, &revents))
253817885a7bSLuigi Rizzo 				continue;
2539*37e3a6d3SLuigi Rizzo 
2540*37e3a6d3SLuigi Rizzo 			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
254117885a7bSLuigi Rizzo 				netmap_ring_reinit(kring);
254217885a7bSLuigi Rizzo 				revents |= POLLERR;
254317885a7bSLuigi Rizzo 			} else {
2544f0ea3689SLuigi Rizzo 				if (kring->nm_sync(kring, 0))
254568b8534bSLuigi Rizzo 					revents |= POLLERR;
2546847bf383SLuigi Rizzo 				else
2547*37e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
254817885a7bSLuigi Rizzo 			}
254968b8534bSLuigi Rizzo 
255017885a7bSLuigi Rizzo 			/*
255117885a7bSLuigi Rizzo 			 * If we found new slots, notify potential
255217885a7bSLuigi Rizzo 			 * listeners on the same ring.
255317885a7bSLuigi Rizzo 			 * Since we just did a txsync, look at the copies
255417885a7bSLuigi Rizzo 			 * of cur,tail in the kring.
2555f9790aebSLuigi Rizzo 			 */
255617885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
255717885a7bSLuigi Rizzo 			nm_kr_put(kring);
255817885a7bSLuigi Rizzo 			if (found) { /* notify other listeners */
255968b8534bSLuigi Rizzo 				revents |= want_tx;
256068b8534bSLuigi Rizzo 				want_tx = 0;
2561847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
256268b8534bSLuigi Rizzo 			}
2563ce3ee1e7SLuigi Rizzo 		}
2564*37e3a6d3SLuigi Rizzo 		/* if there were any packet to forward we must have handled them by now */
2565*37e3a6d3SLuigi Rizzo 		send_down = 0;
2566*37e3a6d3SLuigi Rizzo 		if (want_tx && retry_tx && sr) {
2567*37e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_tx ?
2568847bf383SLuigi Rizzo 			    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
2569ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
2570ce3ee1e7SLuigi Rizzo 			goto flush_tx;
257168b8534bSLuigi Rizzo 		}
257268b8534bSLuigi Rizzo 	}
257368b8534bSLuigi Rizzo 
257468b8534bSLuigi Rizzo 	/*
257517885a7bSLuigi Rizzo 	 * If want_rx is still set scan receive rings.
257668b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
257768b8534bSLuigi Rizzo 	 */
257868b8534bSLuigi Rizzo 	if (want_rx) {
257989cc2556SLuigi Rizzo 		/* two rounds here for race avoidance */
2580ce3ee1e7SLuigi Rizzo do_retry_rx:
2581847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
258217885a7bSLuigi Rizzo 			int found = 0;
258317885a7bSLuigi Rizzo 
258468b8534bSLuigi Rizzo 			kring = &na->rx_rings[i];
2585*37e3a6d3SLuigi Rizzo 			ring = kring->ring;
2586ce3ee1e7SLuigi Rizzo 
2587*37e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
258817885a7bSLuigi Rizzo 				continue;
2589ce3ee1e7SLuigi Rizzo 
2590*37e3a6d3SLuigi Rizzo 			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2591847bf383SLuigi Rizzo 				netmap_ring_reinit(kring);
2592847bf383SLuigi Rizzo 				revents |= POLLERR;
2593847bf383SLuigi Rizzo 			}
2594847bf383SLuigi Rizzo 			/* now we can use kring->rcur, rtail */
2595847bf383SLuigi Rizzo 
259617885a7bSLuigi Rizzo 			/*
259717885a7bSLuigi Rizzo 			 * transparent mode support: collect packets
259817885a7bSLuigi Rizzo 			 * from the rxring(s).
2599ce3ee1e7SLuigi Rizzo 			 */
2600*37e3a6d3SLuigi Rizzo 			if (nm_may_forward_up(kring)) {
2601091fd0abSLuigi Rizzo 				ND(10, "forwarding some buffers up %d to %d",
2602*37e3a6d3SLuigi Rizzo 				    kring->nr_hwcur, ring->cur);
2603091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
2604091fd0abSLuigi Rizzo 			}
260568b8534bSLuigi Rizzo 
2606*37e3a6d3SLuigi Rizzo 			kring->nr_kflags &= ~NR_FORWARD;
2607f0ea3689SLuigi Rizzo 			if (kring->nm_sync(kring, 0))
260868b8534bSLuigi Rizzo 				revents |= POLLERR;
2609847bf383SLuigi Rizzo 			else
2610*37e3a6d3SLuigi Rizzo 				nm_sync_finalize(kring);
2611*37e3a6d3SLuigi Rizzo 			send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
26125819da83SLuigi Rizzo 			if (netmap_no_timestamp == 0 ||
2613*37e3a6d3SLuigi Rizzo 					ring->flags & NR_TIMESTAMP) {
2614*37e3a6d3SLuigi Rizzo 				microtime(&ring->ts);
26155819da83SLuigi Rizzo 			}
261617885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
261717885a7bSLuigi Rizzo 			nm_kr_put(kring);
261817885a7bSLuigi Rizzo 			if (found) {
261968b8534bSLuigi Rizzo 				revents |= want_rx;
2620ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
2621847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
262268b8534bSLuigi Rizzo 			}
262368b8534bSLuigi Rizzo 		}
262417885a7bSLuigi Rizzo 
2625*37e3a6d3SLuigi Rizzo 		if (retry_rx && sr) {
2626*37e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_rx ?
2627847bf383SLuigi Rizzo 			    &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
2628*37e3a6d3SLuigi Rizzo 		}
262917885a7bSLuigi Rizzo 		if (send_down > 0 || retry_rx) {
263017885a7bSLuigi Rizzo 			retry_rx = 0;
263117885a7bSLuigi Rizzo 			if (send_down)
263217885a7bSLuigi Rizzo 				goto flush_tx; /* and retry_rx */
263317885a7bSLuigi Rizzo 			else
2634ce3ee1e7SLuigi Rizzo 				goto do_retry_rx;
2635ce3ee1e7SLuigi Rizzo 		}
263668b8534bSLuigi Rizzo 	}
2637091fd0abSLuigi Rizzo 
263817885a7bSLuigi Rizzo 	/*
263917885a7bSLuigi Rizzo 	 * Transparent mode: marked bufs on rx rings between
264017885a7bSLuigi Rizzo 	 * kring->nr_hwcur and ring->head
264117885a7bSLuigi Rizzo 	 * are passed to the other endpoint.
264217885a7bSLuigi Rizzo 	 *
2643*37e3a6d3SLuigi Rizzo 	 * Transparent mode requires to bind all
264417885a7bSLuigi Rizzo  	 * rings to a single file descriptor.
2645ce3ee1e7SLuigi Rizzo 	 */
2646091fd0abSLuigi Rizzo 
2647*37e3a6d3SLuigi Rizzo 	if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
2648f9790aebSLuigi Rizzo 		netmap_send_up(na->ifp, &q);
2649*37e3a6d3SLuigi Rizzo 		nm_kr_put(&na->tx_rings[na->num_tx_rings]);
2650*37e3a6d3SLuigi Rizzo 	}
265168b8534bSLuigi Rizzo 
265268b8534bSLuigi Rizzo 	return (revents);
2653847bf383SLuigi Rizzo #undef want_tx
2654847bf383SLuigi Rizzo #undef want_rx
265568b8534bSLuigi Rizzo }
265668b8534bSLuigi Rizzo 
265717885a7bSLuigi Rizzo 
265817885a7bSLuigi Rizzo /*-------------------- driver support routines -------------------*/
265968b8534bSLuigi Rizzo 
266089cc2556SLuigi Rizzo /* default notify callback */
2661f9790aebSLuigi Rizzo static int
2662847bf383SLuigi Rizzo netmap_notify(struct netmap_kring *kring, int flags)
2663f9790aebSLuigi Rizzo {
2664847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2665847bf383SLuigi Rizzo 	enum txrx t = kring->tx;
2666f9790aebSLuigi Rizzo 
2667*37e3a6d3SLuigi Rizzo 	nm_os_selwakeup(&kring->si);
266889cc2556SLuigi Rizzo 	/* optimization: avoid a wake up on the global
266989cc2556SLuigi Rizzo 	 * queue if nobody has registered for more
267089cc2556SLuigi Rizzo 	 * than one ring
267189cc2556SLuigi Rizzo 	 */
2672847bf383SLuigi Rizzo 	if (na->si_users[t] > 0)
2673*37e3a6d3SLuigi Rizzo 		nm_os_selwakeup(&na->si[t]);
2674847bf383SLuigi Rizzo 
2675*37e3a6d3SLuigi Rizzo 	return NM_IRQ_COMPLETED;
2676f9790aebSLuigi Rizzo }
2677f9790aebSLuigi Rizzo 
2678*37e3a6d3SLuigi Rizzo #if 0
2679*37e3a6d3SLuigi Rizzo static int
2680*37e3a6d3SLuigi Rizzo netmap_notify(struct netmap_adapter *na, u_int n_ring,
2681*37e3a6d3SLuigi Rizzo enum txrx tx, int flags)
2682*37e3a6d3SLuigi Rizzo {
2683*37e3a6d3SLuigi Rizzo 	if (tx == NR_TX) {
2684*37e3a6d3SLuigi Rizzo 		KeSetEvent(notes->TX_EVENT, 0, FALSE);
2685*37e3a6d3SLuigi Rizzo 	}
2686*37e3a6d3SLuigi Rizzo 	else
2687*37e3a6d3SLuigi Rizzo 	{
2688*37e3a6d3SLuigi Rizzo 		KeSetEvent(notes->RX_EVENT, 0, FALSE);
2689*37e3a6d3SLuigi Rizzo 	}
2690*37e3a6d3SLuigi Rizzo 	return 0;
2691*37e3a6d3SLuigi Rizzo }
2692*37e3a6d3SLuigi Rizzo #endif
2693f9790aebSLuigi Rizzo 
269489cc2556SLuigi Rizzo /* called by all routines that create netmap_adapters.
2695*37e3a6d3SLuigi Rizzo  * provide some defaults and get a reference to the
2696*37e3a6d3SLuigi Rizzo  * memory allocator
269789cc2556SLuigi Rizzo  */
2698f9790aebSLuigi Rizzo int
2699f9790aebSLuigi Rizzo netmap_attach_common(struct netmap_adapter *na)
2700f9790aebSLuigi Rizzo {
2701f9790aebSLuigi Rizzo 	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2702f9790aebSLuigi Rizzo 		D("%s: invalid rings tx %d rx %d",
27034bf50f18SLuigi Rizzo 			na->name, na->num_tx_rings, na->num_rx_rings);
2704f9790aebSLuigi Rizzo 		return EINVAL;
2705f9790aebSLuigi Rizzo 	}
270617885a7bSLuigi Rizzo 
270717885a7bSLuigi Rizzo #ifdef __FreeBSD__
2708*37e3a6d3SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
2709*37e3a6d3SLuigi Rizzo 		na->if_input = na->ifp->if_input; /* for netmap_send_up */
27104bf50f18SLuigi Rizzo 	}
2711*37e3a6d3SLuigi Rizzo #endif /* __FreeBSD__ */
2712f9790aebSLuigi Rizzo 	if (na->nm_krings_create == NULL) {
271389cc2556SLuigi Rizzo 		/* we assume that we have been called by a driver,
271489cc2556SLuigi Rizzo 		 * since other port types all provide their own
271589cc2556SLuigi Rizzo 		 * nm_krings_create
271689cc2556SLuigi Rizzo 		 */
2717f9790aebSLuigi Rizzo 		na->nm_krings_create = netmap_hw_krings_create;
271817885a7bSLuigi Rizzo 		na->nm_krings_delete = netmap_hw_krings_delete;
2719f9790aebSLuigi Rizzo 	}
2720f9790aebSLuigi Rizzo 	if (na->nm_notify == NULL)
2721f9790aebSLuigi Rizzo 		na->nm_notify = netmap_notify;
2722f9790aebSLuigi Rizzo 	na->active_fds = 0;
2723f9790aebSLuigi Rizzo 
2724f9790aebSLuigi Rizzo 	if (na->nm_mem == NULL)
27254bf50f18SLuigi Rizzo 		/* use the global allocator */
2726f9790aebSLuigi Rizzo 		na->nm_mem = &nm_mem;
2727847bf383SLuigi Rizzo 	netmap_mem_get(na->nm_mem);
2728847bf383SLuigi Rizzo #ifdef WITH_VALE
27294bf50f18SLuigi Rizzo 	if (na->nm_bdg_attach == NULL)
27304bf50f18SLuigi Rizzo 		/* no special nm_bdg_attach callback. On VALE
27314bf50f18SLuigi Rizzo 		 * attach, we need to interpose a bwrap
27324bf50f18SLuigi Rizzo 		 */
27334bf50f18SLuigi Rizzo 		na->nm_bdg_attach = netmap_bwrap_attach;
2734847bf383SLuigi Rizzo #endif
2735*37e3a6d3SLuigi Rizzo 
2736f9790aebSLuigi Rizzo 	return 0;
2737f9790aebSLuigi Rizzo }
2738f9790aebSLuigi Rizzo 
2739f9790aebSLuigi Rizzo 
274089cc2556SLuigi Rizzo /* standard cleanup, called by all destructors */
2741f9790aebSLuigi Rizzo void
2742f9790aebSLuigi Rizzo netmap_detach_common(struct netmap_adapter *na)
2743f9790aebSLuigi Rizzo {
2744f9790aebSLuigi Rizzo 	if (na->tx_rings) { /* XXX should not happen */
2745f9790aebSLuigi Rizzo 		D("freeing leftover tx_rings");
2746f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
2747f9790aebSLuigi Rizzo 	}
2748f0ea3689SLuigi Rizzo 	netmap_pipe_dealloc(na);
2749847bf383SLuigi Rizzo 	if (na->nm_mem)
2750847bf383SLuigi Rizzo 		netmap_mem_put(na->nm_mem);
2751f9790aebSLuigi Rizzo 	bzero(na, sizeof(*na));
2752f9790aebSLuigi Rizzo 	free(na, M_DEVBUF);
2753f9790aebSLuigi Rizzo }
2754f9790aebSLuigi Rizzo 
2755*37e3a6d3SLuigi Rizzo /* Wrapper for the register callback provided netmap-enabled
2756*37e3a6d3SLuigi Rizzo  * hardware drivers.
2757*37e3a6d3SLuigi Rizzo  * nm_iszombie(na) means that the driver module has been
27584bf50f18SLuigi Rizzo  * unloaded, so we cannot call into it.
2759*37e3a6d3SLuigi Rizzo  * nm_os_ifnet_lock() must guarantee mutual exclusion with
2760*37e3a6d3SLuigi Rizzo  * module unloading.
27614bf50f18SLuigi Rizzo  */
27624bf50f18SLuigi Rizzo static int
2763*37e3a6d3SLuigi Rizzo netmap_hw_reg(struct netmap_adapter *na, int onoff)
27644bf50f18SLuigi Rizzo {
27654bf50f18SLuigi Rizzo 	struct netmap_hw_adapter *hwna =
27664bf50f18SLuigi Rizzo 		(struct netmap_hw_adapter*)na;
2767*37e3a6d3SLuigi Rizzo 	int error = 0;
27684bf50f18SLuigi Rizzo 
2769*37e3a6d3SLuigi Rizzo 	nm_os_ifnet_lock();
27704bf50f18SLuigi Rizzo 
2771*37e3a6d3SLuigi Rizzo 	if (nm_iszombie(na)) {
2772*37e3a6d3SLuigi Rizzo 		if (onoff) {
2773*37e3a6d3SLuigi Rizzo 			error = ENXIO;
2774*37e3a6d3SLuigi Rizzo 		} else if (na != NULL) {
2775*37e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
2776*37e3a6d3SLuigi Rizzo 		}
2777*37e3a6d3SLuigi Rizzo 		goto out;
2778*37e3a6d3SLuigi Rizzo 	}
2779*37e3a6d3SLuigi Rizzo 
2780*37e3a6d3SLuigi Rizzo 	error = hwna->nm_hw_register(na, onoff);
2781*37e3a6d3SLuigi Rizzo 
2782*37e3a6d3SLuigi Rizzo out:
2783*37e3a6d3SLuigi Rizzo 	nm_os_ifnet_unlock();
2784*37e3a6d3SLuigi Rizzo 
2785*37e3a6d3SLuigi Rizzo 	return error;
2786*37e3a6d3SLuigi Rizzo }
2787*37e3a6d3SLuigi Rizzo 
2788*37e3a6d3SLuigi Rizzo static void
2789*37e3a6d3SLuigi Rizzo netmap_hw_dtor(struct netmap_adapter *na)
2790*37e3a6d3SLuigi Rizzo {
2791*37e3a6d3SLuigi Rizzo 	if (nm_iszombie(na) || na->ifp == NULL)
2792*37e3a6d3SLuigi Rizzo 		return;
2793*37e3a6d3SLuigi Rizzo 
2794*37e3a6d3SLuigi Rizzo 	WNA(na->ifp) = NULL;
27954bf50f18SLuigi Rizzo }
27964bf50f18SLuigi Rizzo 
2797f18be576SLuigi Rizzo 
279868b8534bSLuigi Rizzo /*
2799*37e3a6d3SLuigi Rizzo  * Allocate a ``netmap_adapter`` object, and initialize it from the
2800*37e3a6d3SLuigi Rizzo  * 'arg' passed by the driver on attach.
280168b8534bSLuigi Rizzo  * We allocate a block of memory with room for a struct netmap_adapter
280268b8534bSLuigi Rizzo  * plus two sets of N+2 struct netmap_kring (where N is the number
280368b8534bSLuigi Rizzo  * of hardware rings):
280468b8534bSLuigi Rizzo  * krings	0..N-1	are for the hardware queues.
280568b8534bSLuigi Rizzo  * kring	N	is for the host stack queue
280617885a7bSLuigi Rizzo  * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
280768b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
280868b8534bSLuigi Rizzo  */
2809*37e3a6d3SLuigi Rizzo static int
2810*37e3a6d3SLuigi Rizzo _netmap_attach(struct netmap_adapter *arg, size_t size)
281168b8534bSLuigi Rizzo {
2812f9790aebSLuigi Rizzo 	struct netmap_hw_adapter *hwna = NULL;
2813*37e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
281468b8534bSLuigi Rizzo 
2815*37e3a6d3SLuigi Rizzo 	if (arg == NULL || arg->ifp == NULL)
2816ae10d1afSLuigi Rizzo 		goto fail;
2817*37e3a6d3SLuigi Rizzo 	ifp = arg->ifp;
2818*37e3a6d3SLuigi Rizzo 	hwna = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
2819f9790aebSLuigi Rizzo 	if (hwna == NULL)
2820ae10d1afSLuigi Rizzo 		goto fail;
2821f9790aebSLuigi Rizzo 	hwna->up = *arg;
2822847bf383SLuigi Rizzo 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
28234bf50f18SLuigi Rizzo 	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
28244bf50f18SLuigi Rizzo 	hwna->nm_hw_register = hwna->up.nm_register;
2825*37e3a6d3SLuigi Rizzo 	hwna->up.nm_register = netmap_hw_reg;
2826f9790aebSLuigi Rizzo 	if (netmap_attach_common(&hwna->up)) {
2827f9790aebSLuigi Rizzo 		free(hwna, M_DEVBUF);
2828f9790aebSLuigi Rizzo 		goto fail;
2829f9790aebSLuigi Rizzo 	}
2830f9790aebSLuigi Rizzo 	netmap_adapter_get(&hwna->up);
2831f9790aebSLuigi Rizzo 
2832*37e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &hwna->up);
2833*37e3a6d3SLuigi Rizzo 
283464ae02c3SLuigi Rizzo #ifdef linux
2835f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
2836f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
2837847bf383SLuigi Rizzo #ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
2838f9790aebSLuigi Rizzo 		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2839f18be576SLuigi Rizzo #else
2840f9790aebSLuigi Rizzo 		hwna->nm_ndo = *ifp->netdev_ops;
2841*37e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
2842f18be576SLuigi Rizzo 	}
2843f9790aebSLuigi Rizzo 	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
28444bf50f18SLuigi Rizzo 	if (ifp->ethtool_ops) {
28454bf50f18SLuigi Rizzo 		hwna->nm_eto = *ifp->ethtool_ops;
28464bf50f18SLuigi Rizzo 	}
28474bf50f18SLuigi Rizzo 	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
2848847bf383SLuigi Rizzo #ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
28494bf50f18SLuigi Rizzo 	hwna->nm_eto.set_channels = linux_netmap_set_channels;
2850*37e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
28514bf50f18SLuigi Rizzo 	if (arg->nm_config == NULL) {
28524bf50f18SLuigi Rizzo 		hwna->up.nm_config = netmap_linux_config;
28534bf50f18SLuigi Rizzo 	}
2854ce3ee1e7SLuigi Rizzo #endif /* linux */
2855*37e3a6d3SLuigi Rizzo 	if (arg->nm_dtor == NULL) {
2856*37e3a6d3SLuigi Rizzo 		hwna->up.nm_dtor = netmap_hw_dtor;
2857*37e3a6d3SLuigi Rizzo 	}
2858f9790aebSLuigi Rizzo 
2859d82f9014SRui Paulo 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
2860d82f9014SRui Paulo 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
2861d82f9014SRui Paulo 	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
2862ae10d1afSLuigi Rizzo 	return 0;
286368b8534bSLuigi Rizzo 
2864ae10d1afSLuigi Rizzo fail:
2865f9790aebSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
2866f9790aebSLuigi Rizzo 	return (hwna ? EINVAL : ENOMEM);
286768b8534bSLuigi Rizzo }
286868b8534bSLuigi Rizzo 
286968b8534bSLuigi Rizzo 
2870*37e3a6d3SLuigi Rizzo int
2871*37e3a6d3SLuigi Rizzo netmap_attach(struct netmap_adapter *arg)
2872*37e3a6d3SLuigi Rizzo {
2873*37e3a6d3SLuigi Rizzo 	return _netmap_attach(arg, sizeof(struct netmap_hw_adapter));
2874*37e3a6d3SLuigi Rizzo }
2875*37e3a6d3SLuigi Rizzo 
2876*37e3a6d3SLuigi Rizzo 
2877*37e3a6d3SLuigi Rizzo #ifdef WITH_PTNETMAP_GUEST
2878*37e3a6d3SLuigi Rizzo int
2879*37e3a6d3SLuigi Rizzo netmap_pt_guest_attach(struct netmap_adapter *arg,
2880*37e3a6d3SLuigi Rizzo 		       void *csb,
2881*37e3a6d3SLuigi Rizzo 		       unsigned int nifp_offset,
2882*37e3a6d3SLuigi Rizzo 		       nm_pt_guest_ptctl_t ptctl)
2883*37e3a6d3SLuigi Rizzo {
2884*37e3a6d3SLuigi Rizzo 	struct netmap_pt_guest_adapter *ptna;
2885*37e3a6d3SLuigi Rizzo 	struct ifnet *ifp = arg ? arg->ifp : NULL;
2886*37e3a6d3SLuigi Rizzo 	int error;
2887*37e3a6d3SLuigi Rizzo 
2888*37e3a6d3SLuigi Rizzo 	/* get allocator */
2889*37e3a6d3SLuigi Rizzo 	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, ptctl);
2890*37e3a6d3SLuigi Rizzo 	if (arg->nm_mem == NULL)
2891*37e3a6d3SLuigi Rizzo 		return ENOMEM;
2892*37e3a6d3SLuigi Rizzo 	arg->na_flags |= NAF_MEM_OWNER;
2893*37e3a6d3SLuigi Rizzo 	error = _netmap_attach(arg, sizeof(struct netmap_pt_guest_adapter));
2894*37e3a6d3SLuigi Rizzo 	if (error)
2895*37e3a6d3SLuigi Rizzo 		return error;
2896*37e3a6d3SLuigi Rizzo 
2897*37e3a6d3SLuigi Rizzo 	/* get the netmap_pt_guest_adapter */
2898*37e3a6d3SLuigi Rizzo 	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
2899*37e3a6d3SLuigi Rizzo 	ptna->csb = csb;
2900*37e3a6d3SLuigi Rizzo 
2901*37e3a6d3SLuigi Rizzo 	/* Initialize a separate pass-through netmap adapter that is going to
2902*37e3a6d3SLuigi Rizzo 	 * be used by the ptnet driver only, and so never exposed to netmap
2903*37e3a6d3SLuigi Rizzo          * applications. We only need a subset of the available fields. */
2904*37e3a6d3SLuigi Rizzo 	memset(&ptna->dr, 0, sizeof(ptna->dr));
2905*37e3a6d3SLuigi Rizzo 	ptna->dr.up.ifp = ifp;
2906*37e3a6d3SLuigi Rizzo 	ptna->dr.up.nm_mem = ptna->hwup.up.nm_mem;
2907*37e3a6d3SLuigi Rizzo 	netmap_mem_get(ptna->dr.up.nm_mem);
2908*37e3a6d3SLuigi Rizzo         ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
2909*37e3a6d3SLuigi Rizzo 
2910*37e3a6d3SLuigi Rizzo 	ptna->backend_regifs = 0;
2911*37e3a6d3SLuigi Rizzo 
2912*37e3a6d3SLuigi Rizzo 	return 0;
2913*37e3a6d3SLuigi Rizzo }
2914*37e3a6d3SLuigi Rizzo #endif /* WITH_PTNETMAP_GUEST */
2915*37e3a6d3SLuigi Rizzo 
2916*37e3a6d3SLuigi Rizzo 
2917f9790aebSLuigi Rizzo void
2918f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
2919f9790aebSLuigi Rizzo {
2920f9790aebSLuigi Rizzo 	if (!na) {
2921f9790aebSLuigi Rizzo 		return;
2922f9790aebSLuigi Rizzo 	}
2923f9790aebSLuigi Rizzo 
2924f9790aebSLuigi Rizzo 	refcount_acquire(&na->na_refcount);
2925f9790aebSLuigi Rizzo }
2926f9790aebSLuigi Rizzo 
2927f9790aebSLuigi Rizzo 
2928f9790aebSLuigi Rizzo /* returns 1 iff the netmap_adapter is destroyed */
2929f9790aebSLuigi Rizzo int
2930f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
2931f9790aebSLuigi Rizzo {
2932f9790aebSLuigi Rizzo 	if (!na)
2933f9790aebSLuigi Rizzo 		return 1;
2934f9790aebSLuigi Rizzo 
2935f9790aebSLuigi Rizzo 	if (!refcount_release(&na->na_refcount))
2936f9790aebSLuigi Rizzo 		return 0;
2937f9790aebSLuigi Rizzo 
2938f9790aebSLuigi Rizzo 	if (na->nm_dtor)
2939f9790aebSLuigi Rizzo 		na->nm_dtor(na);
2940f9790aebSLuigi Rizzo 
2941f9790aebSLuigi Rizzo 	netmap_detach_common(na);
2942f9790aebSLuigi Rizzo 
2943f9790aebSLuigi Rizzo 	return 1;
2944f9790aebSLuigi Rizzo }
2945f9790aebSLuigi Rizzo 
294689cc2556SLuigi Rizzo /* nm_krings_create callback for all hardware native adapters */
2947f9790aebSLuigi Rizzo int
2948f9790aebSLuigi Rizzo netmap_hw_krings_create(struct netmap_adapter *na)
2949f9790aebSLuigi Rizzo {
2950f0ea3689SLuigi Rizzo 	int ret = netmap_krings_create(na, 0);
295117885a7bSLuigi Rizzo 	if (ret == 0) {
295217885a7bSLuigi Rizzo 		/* initialize the mbq for the sw rx ring */
295317885a7bSLuigi Rizzo 		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
295417885a7bSLuigi Rizzo 		ND("initialized sw rx queue %d", na->num_rx_rings);
295517885a7bSLuigi Rizzo 	}
295617885a7bSLuigi Rizzo 	return ret;
2957f9790aebSLuigi Rizzo }
2958f9790aebSLuigi Rizzo 
2959f9790aebSLuigi Rizzo 
2960f9790aebSLuigi Rizzo 
296168b8534bSLuigi Rizzo /*
296289cc2556SLuigi Rizzo  * Called on module unload by the netmap-enabled drivers
296368b8534bSLuigi Rizzo  */
296468b8534bSLuigi Rizzo void
296568b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
296668b8534bSLuigi Rizzo {
296768b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
296868b8534bSLuigi Rizzo 
296968b8534bSLuigi Rizzo 	if (!na)
297068b8534bSLuigi Rizzo 		return;
297168b8534bSLuigi Rizzo 
2972f9790aebSLuigi Rizzo 	NMG_LOCK();
2973*37e3a6d3SLuigi Rizzo 	netmap_set_all_rings(na, NM_KR_LOCKED);
2974*37e3a6d3SLuigi Rizzo 	na->na_flags |= NAF_ZOMBIE;
2975847bf383SLuigi Rizzo 	/*
2976847bf383SLuigi Rizzo 	 * if the netmap adapter is not native, somebody
2977847bf383SLuigi Rizzo 	 * changed it, so we can not release it here.
2978*37e3a6d3SLuigi Rizzo 	 * The NAF_ZOMBIE flag will notify the new owner that
2979847bf383SLuigi Rizzo 	 * the driver is gone.
2980847bf383SLuigi Rizzo 	 */
2981847bf383SLuigi Rizzo 	if (na->na_flags & NAF_NATIVE) {
2982*37e3a6d3SLuigi Rizzo 	        netmap_adapter_put(na);
2983847bf383SLuigi Rizzo 	}
2984*37e3a6d3SLuigi Rizzo 	/* give active users a chance to notice that NAF_ZOMBIE has been
2985*37e3a6d3SLuigi Rizzo 	 * turned on, so that they can stop and return an error to userspace.
2986*37e3a6d3SLuigi Rizzo 	 * Note that this becomes a NOP if there are no active users and,
2987*37e3a6d3SLuigi Rizzo 	 * therefore, the put() above has deleted the na, since now NA(ifp) is
2988*37e3a6d3SLuigi Rizzo 	 * NULL.
2989*37e3a6d3SLuigi Rizzo 	 */
2990f9790aebSLuigi Rizzo 	netmap_enable_all_rings(ifp);
2991f9790aebSLuigi Rizzo 	NMG_UNLOCK();
2992ae10d1afSLuigi Rizzo }
2993f18be576SLuigi Rizzo 
2994f18be576SLuigi Rizzo 
299568b8534bSLuigi Rizzo /*
299602ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
299702ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
299817885a7bSLuigi Rizzo  *
299917885a7bSLuigi Rizzo  * We only store packets in a bounded mbq and then copy them
300017885a7bSLuigi Rizzo  * in the relevant rxsync routine.
300117885a7bSLuigi Rizzo  *
3002ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3003ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3004ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3005f9790aebSLuigi Rizzo  * we make sure to make the mode change visible here.
300668b8534bSLuigi Rizzo  */
300768b8534bSLuigi Rizzo int
3008ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
300968b8534bSLuigi Rizzo {
301068b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3011*37e3a6d3SLuigi Rizzo 	struct netmap_kring *kring, *tx_kring;
301217885a7bSLuigi Rizzo 	u_int len = MBUF_LEN(m);
301317885a7bSLuigi Rizzo 	u_int error = ENOBUFS;
3014*37e3a6d3SLuigi Rizzo 	unsigned int txr;
301517885a7bSLuigi Rizzo 	struct mbq *q;
301617885a7bSLuigi Rizzo 	int space;
301768b8534bSLuigi Rizzo 
3018847bf383SLuigi Rizzo 	kring = &na->rx_rings[na->num_rx_rings];
3019ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3020ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3021ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
302217885a7bSLuigi Rizzo 
30234bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na)) {
30244bf50f18SLuigi Rizzo 		D("%s not in netmap mode anymore", na->name);
3025ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3026ce3ee1e7SLuigi Rizzo 		goto done;
3027ce3ee1e7SLuigi Rizzo 	}
3028ce3ee1e7SLuigi Rizzo 
3029*37e3a6d3SLuigi Rizzo 	txr = MBUF_TXQ(m);
3030*37e3a6d3SLuigi Rizzo 	if (txr >= na->num_tx_rings) {
3031*37e3a6d3SLuigi Rizzo 		txr %= na->num_tx_rings;
3032*37e3a6d3SLuigi Rizzo 	}
3033*37e3a6d3SLuigi Rizzo 	tx_kring = &NMR(na, NR_TX)[txr];
3034*37e3a6d3SLuigi Rizzo 
3035*37e3a6d3SLuigi Rizzo 	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
3036*37e3a6d3SLuigi Rizzo 		return MBUF_TRANSMIT(na, ifp, m);
3037*37e3a6d3SLuigi Rizzo 	}
3038*37e3a6d3SLuigi Rizzo 
303917885a7bSLuigi Rizzo 	q = &kring->rx_queue;
304017885a7bSLuigi Rizzo 
3041ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
30424bf50f18SLuigi Rizzo 	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
30434bf50f18SLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", na->name,
30444bf50f18SLuigi Rizzo 			len, NETMAP_BUF_SIZE(na));
3045ce3ee1e7SLuigi Rizzo 		goto done;
3046849bec0eSLuigi Rizzo 	}
304717885a7bSLuigi Rizzo 
3048*37e3a6d3SLuigi Rizzo 	if (nm_os_mbuf_has_offld(m)) {
3049*37e3a6d3SLuigi Rizzo 		RD(1, "%s drop mbuf requiring offloadings", na->name);
3050*37e3a6d3SLuigi Rizzo 		goto done;
3051*37e3a6d3SLuigi Rizzo 	}
3052*37e3a6d3SLuigi Rizzo 
305317885a7bSLuigi Rizzo 	/* protect against rxsync_from_host(), netmap_sw_to_nic()
305417885a7bSLuigi Rizzo 	 * and maybe other instances of netmap_transmit (the latter
305517885a7bSLuigi Rizzo 	 * not possible on Linux).
305617885a7bSLuigi Rizzo 	 * Also avoid overflowing the queue.
3057ce3ee1e7SLuigi Rizzo 	 */
3058997b054cSLuigi Rizzo 	mbq_lock(q);
305917885a7bSLuigi Rizzo 
306017885a7bSLuigi Rizzo         space = kring->nr_hwtail - kring->nr_hwcur;
306117885a7bSLuigi Rizzo         if (space < 0)
306217885a7bSLuigi Rizzo                 space += kring->nkr_num_slots;
306317885a7bSLuigi Rizzo 	if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
306417885a7bSLuigi Rizzo 		RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
30654bf50f18SLuigi Rizzo 			na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
306617885a7bSLuigi Rizzo 			len, m);
3067ce3ee1e7SLuigi Rizzo 	} else {
306817885a7bSLuigi Rizzo 		mbq_enqueue(q, m);
306917885a7bSLuigi Rizzo 		ND(10, "%s %d bufs in queue len %d m %p",
30704bf50f18SLuigi Rizzo 			na->name, mbq_len(q), len, m);
307117885a7bSLuigi Rizzo 		/* notify outside the lock */
307217885a7bSLuigi Rizzo 		m = NULL;
307368b8534bSLuigi Rizzo 		error = 0;
3074ce3ee1e7SLuigi Rizzo 	}
3075997b054cSLuigi Rizzo 	mbq_unlock(q);
3076ce3ee1e7SLuigi Rizzo 
307768b8534bSLuigi Rizzo done:
307817885a7bSLuigi Rizzo 	if (m)
307968b8534bSLuigi Rizzo 		m_freem(m);
308017885a7bSLuigi Rizzo 	/* unconditionally wake up listeners */
3081847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
308289cc2556SLuigi Rizzo 	/* this is normally netmap_notify(), but for nics
308389cc2556SLuigi Rizzo 	 * connected to a bridge it is netmap_bwrap_intr_notify(),
308489cc2556SLuigi Rizzo 	 * that possibly forwards the frames through the switch
308589cc2556SLuigi Rizzo 	 */
308668b8534bSLuigi Rizzo 
308768b8534bSLuigi Rizzo 	return (error);
308868b8534bSLuigi Rizzo }
308968b8534bSLuigi Rizzo 
309068b8534bSLuigi Rizzo 
309168b8534bSLuigi Rizzo /*
309268b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
309368b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
3094f9790aebSLuigi Rizzo  * If native netmap mode is not set just return NULL.
3095*37e3a6d3SLuigi Rizzo  * If native netmap mode is set, in particular, we have to set nr_mode to
3096*37e3a6d3SLuigi Rizzo  * NKR_NETMAP_ON.
309768b8534bSLuigi Rizzo  */
309868b8534bSLuigi Rizzo struct netmap_slot *
3099ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
310068b8534bSLuigi Rizzo 	u_int new_cur)
310168b8534bSLuigi Rizzo {
310268b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3103506cc70cSLuigi Rizzo 	int new_hwofs, lim;
310468b8534bSLuigi Rizzo 
31054bf50f18SLuigi Rizzo 	if (!nm_native_on(na)) {
31064bf50f18SLuigi Rizzo 		ND("interface not in native netmap mode");
310768b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3108ce3ee1e7SLuigi Rizzo 	}
310968b8534bSLuigi Rizzo 
3110ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3111ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3112ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3113ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3114ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3115ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3116ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3117ce3ee1e7SLuigi Rizzo 	 */
311864ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
31198241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
31208241616dSLuigi Rizzo 			return NULL;
3121*37e3a6d3SLuigi Rizzo 
312264ae02c3SLuigi Rizzo 		kring = na->tx_rings + n;
3123*37e3a6d3SLuigi Rizzo 
3124*37e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
3125*37e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
3126*37e3a6d3SLuigi Rizzo 			return NULL;
3127*37e3a6d3SLuigi Rizzo 		}
3128*37e3a6d3SLuigi Rizzo 
312917885a7bSLuigi Rizzo 		// XXX check whether we should use hwcur or rcur
3130506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
313164ae02c3SLuigi Rizzo 	} else {
31328241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
31338241616dSLuigi Rizzo 			return NULL;
313464ae02c3SLuigi Rizzo 		kring = na->rx_rings + n;
3135*37e3a6d3SLuigi Rizzo 
3136*37e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
3137*37e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
3138*37e3a6d3SLuigi Rizzo 			return NULL;
3139*37e3a6d3SLuigi Rizzo 		}
3140*37e3a6d3SLuigi Rizzo 
314117885a7bSLuigi Rizzo 		new_hwofs = kring->nr_hwtail - new_cur;
314264ae02c3SLuigi Rizzo 	}
314364ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3144506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3145506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3146506cc70cSLuigi Rizzo 
3147ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
314817885a7bSLuigi Rizzo 	if (netmap_verbose)
314917885a7bSLuigi Rizzo 	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
31504bf50f18SLuigi Rizzo 		na->name,
315117885a7bSLuigi Rizzo 		tx == NR_TX ? "TX" : "RX", n,
3152ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
315317885a7bSLuigi Rizzo 		kring->nr_hwtail,
315417885a7bSLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwtail);
3155506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
315617885a7bSLuigi Rizzo 	if (tx == NR_TX) {
315717885a7bSLuigi Rizzo 		kring->nr_hwtail = kring->nr_hwcur + lim;
315817885a7bSLuigi Rizzo 		if (kring->nr_hwtail > lim)
315917885a7bSLuigi Rizzo 			kring->nr_hwtail -= lim + 1;
316017885a7bSLuigi Rizzo 	}
3161506cc70cSLuigi Rizzo 
3162f196ce38SLuigi Rizzo #if 0 // def linux
3163f196ce38SLuigi Rizzo 	/* XXX check that the mappings are correct */
3164f196ce38SLuigi Rizzo 	/* need ring_nr, adapter->pdev, direction */
3165f196ce38SLuigi Rizzo 	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
3166f196ce38SLuigi Rizzo 	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
3167f196ce38SLuigi Rizzo 		D("error mapping rx netmap buffer %d", i);
3168f196ce38SLuigi Rizzo 		// XXX fix error handling
3169f196ce38SLuigi Rizzo 	}
3170f196ce38SLuigi Rizzo 
3171f196ce38SLuigi Rizzo #endif /* linux */
317268b8534bSLuigi Rizzo 	/*
3173ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3174506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3175506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
317668b8534bSLuigi Rizzo 	 */
3177*37e3a6d3SLuigi Rizzo 	kring->nr_mode = NKR_NETMAP_ON;
3178847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
317968b8534bSLuigi Rizzo 	return kring->ring->slot;
318068b8534bSLuigi Rizzo }
318168b8534bSLuigi Rizzo 
318268b8534bSLuigi Rizzo 
3183ce3ee1e7SLuigi Rizzo /*
3184f9790aebSLuigi Rizzo  * Dispatch rx/tx interrupts to the netmap rings.
3185ce3ee1e7SLuigi Rizzo  *
3186ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3187ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3188ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3189849bec0eSLuigi Rizzo  *
3190f9790aebSLuigi Rizzo  * The 'notify' routine depends on what the ring is attached to.
3191f9790aebSLuigi Rizzo  * - for a netmap file descriptor, do a selwakeup on the individual
3192f9790aebSLuigi Rizzo  *   waitqueue, plus one on the global one if needed
31934bf50f18SLuigi Rizzo  *   (see netmap_notify)
31944bf50f18SLuigi Rizzo  * - for a nic connected to a switch, call the proper forwarding routine
31954bf50f18SLuigi Rizzo  *   (see netmap_bwrap_intr_notify)
3196f9790aebSLuigi Rizzo  */
3197*37e3a6d3SLuigi Rizzo int
3198*37e3a6d3SLuigi Rizzo netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
3199f9790aebSLuigi Rizzo {
3200f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
3201847bf383SLuigi Rizzo 	enum txrx t = (work_done ? NR_RX : NR_TX);
3202f9790aebSLuigi Rizzo 
3203f9790aebSLuigi Rizzo 	q &= NETMAP_RING_MASK;
3204f9790aebSLuigi Rizzo 
3205f9790aebSLuigi Rizzo 	if (netmap_verbose) {
3206f9790aebSLuigi Rizzo 	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
3207f9790aebSLuigi Rizzo 	}
3208f9790aebSLuigi Rizzo 
3209847bf383SLuigi Rizzo 	if (q >= nma_get_nrings(na, t))
3210*37e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS; // not a physical queue
3211847bf383SLuigi Rizzo 
3212847bf383SLuigi Rizzo 	kring = NMR(na, t) + q;
3213847bf383SLuigi Rizzo 
3214*37e3a6d3SLuigi Rizzo 	if (kring->nr_mode == NKR_NETMAP_OFF) {
3215*37e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
3216*37e3a6d3SLuigi Rizzo 	}
3217*37e3a6d3SLuigi Rizzo 
3218847bf383SLuigi Rizzo 	if (t == NR_RX) {
3219f9790aebSLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3220f9790aebSLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3221f9790aebSLuigi Rizzo 	}
3222*37e3a6d3SLuigi Rizzo 
3223*37e3a6d3SLuigi Rizzo 	return kring->nm_notify(kring, 0);
3224f9790aebSLuigi Rizzo }
3225f9790aebSLuigi Rizzo 
322617885a7bSLuigi Rizzo 
3227f9790aebSLuigi Rizzo /*
3228f9790aebSLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3229f9790aebSLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3230f9790aebSLuigi Rizzo  *
3231*37e3a6d3SLuigi Rizzo  * If the card is not in netmap mode, simply return NM_IRQ_PASS,
3232ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
3233*37e3a6d3SLuigi Rizzo  * Otherwise call netmap_common_irq().
3234ce3ee1e7SLuigi Rizzo  *
3235ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3236ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3237ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
3238*37e3a6d3SLuigi Rizzo  * and return NR_IRQ_COMPLETED.
3239ce3ee1e7SLuigi Rizzo  *
3240ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
3241*37e3a6d3SLuigi Rizzo  * calls the proper forwarding routine.
32421a26580eSLuigi Rizzo  */
3243babc7c12SLuigi Rizzo int
3244ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
32451a26580eSLuigi Rizzo {
32464bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
32474bf50f18SLuigi Rizzo 
32484bf50f18SLuigi Rizzo 	/*
32494bf50f18SLuigi Rizzo 	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
32504bf50f18SLuigi Rizzo 	 * we still use the regular driver even though the previous
32514bf50f18SLuigi Rizzo 	 * check fails. It is unclear whether we should use
32524bf50f18SLuigi Rizzo 	 * nm_native_on() here.
32534bf50f18SLuigi Rizzo 	 */
32544bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
3255*37e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
3256849bec0eSLuigi Rizzo 
32574bf50f18SLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
32588241616dSLuigi Rizzo 		ND("use regular interrupt");
3259*37e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
32608241616dSLuigi Rizzo 	}
32618241616dSLuigi Rizzo 
3262*37e3a6d3SLuigi Rizzo 	return netmap_common_irq(na, q, work_done);
32631a26580eSLuigi Rizzo }
32641a26580eSLuigi Rizzo 
326564ae02c3SLuigi Rizzo 
326601c7d25fSLuigi Rizzo /*
3267f9790aebSLuigi Rizzo  * Module loader and unloader
3268f196ce38SLuigi Rizzo  *
3269f9790aebSLuigi Rizzo  * netmap_init() creates the /dev/netmap device and initializes
3270f9790aebSLuigi Rizzo  * all global variables. Returns 0 on success, errno on failure
3271f9790aebSLuigi Rizzo  * (but there is no chance)
3272f9790aebSLuigi Rizzo  *
3273f9790aebSLuigi Rizzo  * netmap_fini() destroys everything.
3274f196ce38SLuigi Rizzo  */
3275babc7c12SLuigi Rizzo 
3276babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
3277f9790aebSLuigi Rizzo extern struct cdevsw netmap_cdevsw;
3278babc7c12SLuigi Rizzo 
327917885a7bSLuigi Rizzo 
3280f9790aebSLuigi Rizzo void
328168b8534bSLuigi Rizzo netmap_fini(void)
328268b8534bSLuigi Rizzo {
3283f9790aebSLuigi Rizzo 	if (netmap_dev)
328468b8534bSLuigi Rizzo 		destroy_dev(netmap_dev);
3285*37e3a6d3SLuigi Rizzo 	/* we assume that there are no longer netmap users */
3286*37e3a6d3SLuigi Rizzo 	nm_os_ifnet_fini();
3287*37e3a6d3SLuigi Rizzo 	netmap_uninit_bridges();
3288ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
3289ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
329068b8534bSLuigi Rizzo 	printf("netmap: unloaded module.\n");
329168b8534bSLuigi Rizzo }
329268b8534bSLuigi Rizzo 
329317885a7bSLuigi Rizzo 
3294f9790aebSLuigi Rizzo int
3295f9790aebSLuigi Rizzo netmap_init(void)
329668b8534bSLuigi Rizzo {
3297f9790aebSLuigi Rizzo 	int error;
329868b8534bSLuigi Rizzo 
3299f9790aebSLuigi Rizzo 	NMG_LOCK_INIT();
330068b8534bSLuigi Rizzo 
3301f9790aebSLuigi Rizzo 	error = netmap_mem_init();
3302f9790aebSLuigi Rizzo 	if (error != 0)
3303f9790aebSLuigi Rizzo 		goto fail;
3304c929ca72SLuigi Rizzo 	/*
3305c929ca72SLuigi Rizzo 	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
3306c929ca72SLuigi Rizzo 	 * when the module is compiled in.
3307c929ca72SLuigi Rizzo 	 * XXX could use make_dev_credv() to get error number
3308c929ca72SLuigi Rizzo 	 */
33090e73f29aSLuigi Rizzo 	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
331011c0b69cSAdrian Chadd 		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
33110e73f29aSLuigi Rizzo 			      "netmap");
3312f9790aebSLuigi Rizzo 	if (!netmap_dev)
3313f9790aebSLuigi Rizzo 		goto fail;
3314f9790aebSLuigi Rizzo 
3315847bf383SLuigi Rizzo 	error = netmap_init_bridges();
3316847bf383SLuigi Rizzo 	if (error)
3317847bf383SLuigi Rizzo 		goto fail;
3318847bf383SLuigi Rizzo 
33194bf50f18SLuigi Rizzo #ifdef __FreeBSD__
3320*37e3a6d3SLuigi Rizzo 	nm_os_vi_init_index();
33214bf50f18SLuigi Rizzo #endif
3322847bf383SLuigi Rizzo 
3323*37e3a6d3SLuigi Rizzo 	error = nm_os_ifnet_init();
3324*37e3a6d3SLuigi Rizzo 	if (error)
3325*37e3a6d3SLuigi Rizzo 		goto fail;
3326*37e3a6d3SLuigi Rizzo 
3327f9790aebSLuigi Rizzo 	printf("netmap: loaded module\n");
3328f9790aebSLuigi Rizzo 	return (0);
3329f9790aebSLuigi Rizzo fail:
333068b8534bSLuigi Rizzo 	netmap_fini();
3331f9790aebSLuigi Rizzo 	return (EINVAL); /* may be incorrect */
333268b8534bSLuigi Rizzo }
3333