xref: /freebsd/sys/dev/netmap/netmap_kern.h (revision d01498defbe804f66435b44f22da9278acddf082)
1 /*
2  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * The header contains the definitions of constants and function
31  * prototypes used only in kernelspace.
32  */
33 
34 #ifndef _NET_NETMAP_KERN_H_
35 #define _NET_NETMAP_KERN_H_
36 
37 #if defined(linux)
38 
39 #if  defined(CONFIG_NETMAP_VALE)
40 #define WITH_VALE
41 #endif
42 #if defined(CONFIG_NETMAP_PIPE)
43 #define WITH_PIPES
44 #endif
45 #if defined(CONFIG_NETMAP_MONITOR)
46 #define WITH_MONITOR
47 #endif
48 #if defined(CONFIG_NETMAP_GENERIC)
49 #define WITH_GENERIC
50 #endif
51 #if defined(CONFIG_NETMAP_V1000)
52 #define WITH_V1000
53 #endif
54 
55 #else /* not linux */
56 
57 #define WITH_VALE	// comment out to disable VALE support
58 #define WITH_PIPES
59 #define WITH_MONITOR
60 #define WITH_GENERIC
61 
62 #endif
63 
64 #if defined(__FreeBSD__)
65 #include <sys/selinfo.h>
66 
67 #define likely(x)	__builtin_expect((long)!!(x), 1L)
68 #define unlikely(x)	__builtin_expect((long)!!(x), 0L)
69 
70 #define	NM_LOCK_T	struct mtx	/* low level spinlock, used to protect queues */
71 
72 #define NM_MTX_T	struct sx	/* OS-specific mutex (sleepable) */
73 #define NM_MTX_INIT(m)		sx_init(&(m), #m)
74 #define NM_MTX_DESTROY(m)	sx_destroy(&(m))
75 #define NM_MTX_LOCK(m)		sx_xlock(&(m))
76 #define NM_MTX_UNLOCK(m)	sx_xunlock(&(m))
77 #define NM_MTX_ASSERT(m)	sx_assert(&(m), SA_XLOCKED)
78 
79 #define	NM_SELINFO_T	struct nm_selinfo
80 #define	MBUF_LEN(m)	((m)->m_pkthdr.len)
81 #define	MBUF_IFP(m)	((m)->m_pkthdr.rcvif)
82 #define	NM_SEND_UP(ifp, m)	((NA(ifp))->if_input)(ifp, m)
83 
84 #define NM_ATOMIC_T	volatile int	// XXX ?
85 /* atomic operations */
86 #include <machine/atomic.h>
87 #define NM_ATOMIC_TEST_AND_SET(p)       (!atomic_cmpset_acq_int((p), 0, 1))
88 #define NM_ATOMIC_CLEAR(p)              atomic_store_rel_int((p), 0)
89 
90 #if __FreeBSD_version >= 1100030
91 #define	WNA(_ifp)	(_ifp)->if_netmap
92 #else /* older FreeBSD */
93 #define	WNA(_ifp)	(_ifp)->if_pspare[0]
94 #endif /* older FreeBSD */
95 
96 #if __FreeBSD_version >= 1100005
97 struct netmap_adapter *netmap_getna(if_t ifp);
98 #endif
99 
100 #if __FreeBSD_version >= 1100027
101 #define GET_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1)
102 #define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ext_cnt) = x
103 #define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt)
104 #else
105 #define GET_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
106 #define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ref_cnt) = x
107 #define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt)
108 #endif
109 
110 MALLOC_DECLARE(M_NETMAP);
111 
112 struct nm_selinfo {
113 	struct selinfo si;
114 	struct mtx m;
115 };
116 
117 void freebsd_selwakeup(struct nm_selinfo *si, int pri);
118 
119 // XXX linux struct, not used in FreeBSD
120 struct net_device_ops {
121 };
122 struct ethtool_ops {
123 };
124 struct hrtimer {
125 };
126 #define NM_BNS_GET(b)
127 #define NM_BNS_PUT(b)
128 
129 #elif defined (linux)
130 
131 #define	NM_LOCK_T	safe_spinlock_t	// see bsd_glue.h
132 #define	NM_SELINFO_T	wait_queue_head_t
133 #define	MBUF_LEN(m)	((m)->len)
134 #define	MBUF_IFP(m)	((m)->dev)
135 #define	NM_SEND_UP(ifp, m)  \
136                         do { \
137                             m->priority = NM_MAGIC_PRIORITY_RX; \
138                             netif_rx(m); \
139                         } while (0)
140 
141 #define NM_ATOMIC_T	volatile long unsigned int
142 
143 #define NM_MTX_T	struct mutex	/* OS-specific sleepable lock */
144 #define NM_MTX_INIT(m)	mutex_init(&(m))
145 #define NM_MTX_DESTROY(m)	do { (void)(m); } while (0)
146 #define NM_MTX_LOCK(m)		mutex_lock(&(m))
147 #define NM_MTX_UNLOCK(m)	mutex_unlock(&(m))
148 #define NM_MTX_ASSERT(m)	mutex_is_locked(&(m))
149 
150 #ifndef DEV_NETMAP
151 #define DEV_NETMAP
152 #endif /* DEV_NETMAP */
153 
154 #elif defined (__APPLE__)
155 
156 #warning apple support is incomplete.
157 #define likely(x)	__builtin_expect(!!(x), 1)
158 #define unlikely(x)	__builtin_expect(!!(x), 0)
159 #define	NM_LOCK_T	IOLock *
160 #define	NM_SELINFO_T	struct selinfo
161 #define	MBUF_LEN(m)	((m)->m_pkthdr.len)
162 #define	NM_SEND_UP(ifp, m)	((ifp)->if_input)(ifp, m)
163 
164 #else
165 
166 #error unsupported platform
167 
168 #endif /* end - platform-specific code */
169 
170 #define	NMG_LOCK_T		NM_MTX_T
171 #define	NMG_LOCK_INIT()		NM_MTX_INIT(netmap_global_lock)
172 #define	NMG_LOCK_DESTROY()	NM_MTX_DESTROY(netmap_global_lock)
173 #define	NMG_LOCK()		NM_MTX_LOCK(netmap_global_lock)
174 #define	NMG_UNLOCK()		NM_MTX_UNLOCK(netmap_global_lock)
175 #define	NMG_LOCK_ASSERT()	NM_MTX_ASSERT(netmap_global_lock)
176 
177 #define ND(format, ...)
178 #define D(format, ...)						\
179 	do {							\
180 		struct timeval __xxts;				\
181 		microtime(&__xxts);				\
182 		printf("%03d.%06d [%4d] %-25s " format "\n",	\
183 		(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec,	\
184 		__LINE__, __FUNCTION__, ##__VA_ARGS__);		\
185 	} while (0)
186 
187 /* rate limited, lps indicates how many per second */
188 #define RD(lps, format, ...)					\
189 	do {							\
190 		static int t0, __cnt;				\
191 		if (t0 != time_second) {			\
192 			t0 = time_second;			\
193 			__cnt = 0;				\
194 		}						\
195 		if (__cnt++ < lps)				\
196 			D(format, ##__VA_ARGS__);		\
197 	} while (0)
198 
199 struct netmap_adapter;
200 struct nm_bdg_fwd;
201 struct nm_bridge;
202 struct netmap_priv_d;
203 
204 const char *nm_dump_buf(char *p, int len, int lim, char *dst);
205 
206 #include "netmap_mbq.h"
207 
208 extern NMG_LOCK_T	netmap_global_lock;
209 
210 enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
211 
212 static __inline const char*
213 nm_txrx2str(enum txrx t)
214 {
215 	return (t== NR_RX ? "RX" : "TX");
216 }
217 
218 static __inline enum txrx
219 nm_txrx_swap(enum txrx t)
220 {
221 	return (t== NR_RX ? NR_TX : NR_RX);
222 }
223 
224 #define for_rx_tx(t)	for ((t) = 0; (t) < NR_TXRX; (t)++)
225 
226 
227 /*
228  * private, kernel view of a ring. Keeps track of the status of
229  * a ring across system calls.
230  *
231  *	nr_hwcur	index of the next buffer to refill.
232  *			It corresponds to ring->head
233  *			at the time the system call returns.
234  *
235  *	nr_hwtail	index of the first buffer owned by the kernel.
236  *			On RX, hwcur->hwtail are receive buffers
237  *			not yet released. hwcur is advanced following
238  *			ring->head, hwtail is advanced on incoming packets,
239  *			and a wakeup is generated when hwtail passes ring->cur
240  *			    On TX, hwcur->rcur have been filled by the sender
241  *			but not sent yet to the NIC; rcur->hwtail are available
242  *			for new transmissions, and hwtail->hwcur-1 are pending
243  *			transmissions not yet acknowledged.
244  *
245  * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
246  * This is so that, on a reset, buffers owned by userspace are not
247  * modified by the kernel. In particular:
248  * RX rings: the next empty buffer (hwtail + hwofs) coincides with
249  * 	the next empty buffer as known by the hardware (next_to_check or so).
250  * TX rings: hwcur + hwofs coincides with next_to_send
251  *
252  * For received packets, slot->flags is set to nkr_slot_flags
253  * so we can provide a proper initial value (e.g. set NS_FORWARD
254  * when operating in 'transparent' mode).
255  *
256  * The following fields are used to implement lock-free copy of packets
257  * from input to output ports in VALE switch:
258  *	nkr_hwlease	buffer after the last one being copied.
259  *			A writer in nm_bdg_flush reserves N buffers
260  *			from nr_hwlease, advances it, then does the
261  *			copy outside the lock.
262  *			In RX rings (used for VALE ports),
263  *			nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
264  *			In TX rings (used for NIC or host stack ports)
265  *			nkr_hwcur <= nkr_hwlease < nkr_hwtail
266  *	nkr_leases	array of nkr_num_slots where writers can report
267  *			completion of their block. NR_NOSLOT (~0) indicates
268  *			that the writer has not finished yet
269  *	nkr_lease_idx	index of next free slot in nr_leases, to be assigned
270  *
271  * The kring is manipulated by txsync/rxsync and generic netmap function.
272  *
273  * Concurrent rxsync or txsync on the same ring are prevented through
274  * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
275  * for NIC rings, and for TX rings attached to the host stack.
276  *
277  * RX rings attached to the host stack use an mbq (rx_queue) on both
278  * rxsync_from_host() and netmap_transmit(). The mbq is protected
279  * by its internal lock.
280  *
281  * RX rings attached to the VALE switch are accessed by both senders
282  * and receiver. They are protected through the q_lock on the RX ring.
283  */
284 struct netmap_kring {
285 	struct netmap_ring	*ring;
286 
287 	uint32_t	nr_hwcur;
288 	uint32_t	nr_hwtail;
289 
290 	/*
291 	 * Copies of values in user rings, so we do not need to look
292 	 * at the ring (which could be modified). These are set in the
293 	 * *sync_prologue()/finalize() routines.
294 	 */
295 	uint32_t	rhead;
296 	uint32_t	rcur;
297 	uint32_t	rtail;
298 
299 	uint32_t	nr_kflags;	/* private driver flags */
300 #define NKR_PENDINTR	0x1		// Pending interrupt.
301 #define NKR_EXCLUSIVE	0x2		/* exclusive binding */
302 	uint32_t	nkr_num_slots;
303 
304 	/*
305 	 * On a NIC reset, the NIC ring indexes may be reset but the
306 	 * indexes in the netmap rings remain the same. nkr_hwofs
307 	 * keeps track of the offset between the two.
308 	 */
309 	int32_t		nkr_hwofs;
310 
311 	uint16_t	nkr_slot_flags;	/* initial value for flags */
312 
313 	/* last_reclaim is opaque marker to help reduce the frequency
314 	 * of operations such as reclaiming tx buffers. A possible use
315 	 * is set it to ticks and do the reclaim only once per tick.
316 	 */
317 	uint64_t	last_reclaim;
318 
319 
320 	NM_SELINFO_T	si;		/* poll/select wait queue */
321 	NM_LOCK_T	q_lock;		/* protects kring and ring. */
322 	NM_ATOMIC_T	nr_busy;	/* prevent concurrent syscalls */
323 
324 	struct netmap_adapter *na;
325 
326 	/* The following fields are for VALE switch support */
327 	struct nm_bdg_fwd *nkr_ft;
328 	uint32_t	*nkr_leases;
329 #define NR_NOSLOT	((uint32_t)~0)	/* used in nkr_*lease* */
330 	uint32_t	nkr_hwlease;
331 	uint32_t	nkr_lease_idx;
332 
333 	/* while nkr_stopped is set, no new [tr]xsync operations can
334 	 * be started on this kring.
335 	 * This is used by netmap_disable_all_rings()
336 	 * to find a synchronization point where critical data
337 	 * structures pointed to by the kring can be added or removed
338 	 */
339 	volatile int nkr_stopped;
340 
341 	/* Support for adapters without native netmap support.
342 	 * On tx rings we preallocate an array of tx buffers
343 	 * (same size as the netmap ring), on rx rings we
344 	 * store incoming mbufs in a queue that is drained by
345 	 * a rxsync.
346 	 */
347 	struct mbuf **tx_pool;
348 	// u_int nr_ntc;		/* Emulation of a next-to-clean RX ring pointer. */
349 	struct mbq rx_queue;            /* intercepted rx mbufs. */
350 
351 	uint32_t	users;		/* existing bindings for this ring */
352 
353 	uint32_t	ring_id;	/* debugging */
354 	enum txrx	tx;		/* kind of ring (tx or rx) */
355 	char name[64];			/* diagnostic */
356 
357 	/* [tx]sync callback for this kring.
358 	 * The default nm_kring_create callback (netmap_krings_create)
359 	 * sets the nm_sync callback of each hardware tx(rx) kring to
360 	 * the corresponding nm_txsync(nm_rxsync) taken from the
361 	 * netmap_adapter; moreover, it sets the sync callback
362 	 * of the host tx(rx) ring to netmap_txsync_to_host
363 	 * (netmap_rxsync_from_host).
364 	 *
365 	 * Overrides: the above configuration is not changed by
366 	 * any of the nm_krings_create callbacks.
367 	 */
368 	int (*nm_sync)(struct netmap_kring *kring, int flags);
369 	int (*nm_notify)(struct netmap_kring *kring, int flags);
370 
371 #ifdef WITH_PIPES
372 	struct netmap_kring *pipe;	/* if this is a pipe ring,
373 					 * pointer to the other end
374 					 */
375 	struct netmap_ring *save_ring;	/* pointer to hidden rings
376        					 * (see netmap_pipe.c for details)
377 					 */
378 #endif /* WITH_PIPES */
379 
380 #ifdef WITH_VALE
381 	int (*save_notify)(struct netmap_kring *kring, int flags);
382 #endif
383 
384 #ifdef WITH_MONITOR
385 	/* array of krings that are monitoring this kring */
386 	struct netmap_kring **monitors;
387 	uint32_t max_monitors; /* current size of the monitors array */
388 	uint32_t n_monitors;	/* next unused entry in the monitor array */
389 	/*
390 	 * Monitors work by intercepting the sync and notify callbacks of the
391 	 * monitored krings. This is implemented by replacing the pointers
392 	 * above and saving the previous ones in mon_* pointers below
393 	 */
394 	int (*mon_sync)(struct netmap_kring *kring, int flags);
395 	int (*mon_notify)(struct netmap_kring *kring, int flags);
396 
397 	uint32_t mon_tail;  /* last seen slot on rx */
398 	uint32_t mon_pos;   /* index of this ring in the monitored ring array */
399 #endif
400 } __attribute__((__aligned__(64)));
401 
402 
403 /* return the next index, with wraparound */
404 static inline uint32_t
405 nm_next(uint32_t i, uint32_t lim)
406 {
407 	return unlikely (i == lim) ? 0 : i + 1;
408 }
409 
410 
411 /* return the previous index, with wraparound */
412 static inline uint32_t
413 nm_prev(uint32_t i, uint32_t lim)
414 {
415 	return unlikely (i == 0) ? lim : i - 1;
416 }
417 
418 
419 /*
420  *
421  * Here is the layout for the Rx and Tx rings.
422 
423        RxRING                            TxRING
424 
425       +-----------------+            +-----------------+
426       |                 |            |                 |
427       |XXX free slot XXX|            |XXX free slot XXX|
428       +-----------------+            +-----------------+
429 head->| owned by user   |<-hwcur     | not sent to nic |<-hwcur
430       |                 |            | yet             |
431       +-----------------+            |                 |
432  cur->| available to    |            |                 |
433       | user, not read  |            +-----------------+
434       | yet             |       cur->| (being          |
435       |                 |            |  prepared)      |
436       |                 |            |                 |
437       +-----------------+            +     ------      +
438 tail->|                 |<-hwtail    |                 |<-hwlease
439       | (being          | ...        |                 | ...
440       |  prepared)      | ...        |                 | ...
441       +-----------------+ ...        |                 | ...
442       |                 |<-hwlease   +-----------------+
443       |                 |      tail->|                 |<-hwtail
444       |                 |            |                 |
445       |                 |            |                 |
446       |                 |            |                 |
447       +-----------------+            +-----------------+
448 
449  * The cur/tail (user view) and hwcur/hwtail (kernel view)
450  * are used in the normal operation of the card.
451  *
452  * When a ring is the output of a switch port (Rx ring for
453  * a VALE port, Tx ring for the host stack or NIC), slots
454  * are reserved in blocks through 'hwlease' which points
455  * to the next unused slot.
456  * On an Rx ring, hwlease is always after hwtail,
457  * and completions cause hwtail to advance.
458  * On a Tx ring, hwlease is always between cur and hwtail,
459  * and completions cause cur to advance.
460  *
461  * nm_kr_space() returns the maximum number of slots that
462  * can be assigned.
463  * nm_kr_lease() reserves the required number of buffers,
464  *    advances nkr_hwlease and also returns an entry in
465  *    a circular array where completions should be reported.
466  */
467 
468 
469 struct netmap_lut {
470 	struct lut_entry *lut;
471 	uint32_t objtotal;	/* max buffer index */
472 	uint32_t objsize;	/* buffer size */
473 };
474 
475 struct netmap_vp_adapter; // forward
476 
477 /*
478  * The "struct netmap_adapter" extends the "struct adapter"
479  * (or equivalent) device descriptor.
480  * It contains all base fields needed to support netmap operation.
481  * There are in fact different types of netmap adapters
482  * (native, generic, VALE switch...) so a netmap_adapter is
483  * just the first field in the derived type.
484  */
485 struct netmap_adapter {
486 	/*
487 	 * On linux we do not have a good way to tell if an interface
488 	 * is netmap-capable. So we always use the following trick:
489 	 * NA(ifp) points here, and the first entry (which hopefully
490 	 * always exists and is at least 32 bits) contains a magic
491 	 * value which we can use to detect that the interface is good.
492 	 */
493 	uint32_t magic;
494 	uint32_t na_flags;	/* enabled, and other flags */
495 #define NAF_SKIP_INTR	1	/* use the regular interrupt handler.
496 				 * useful during initialization
497 				 */
498 #define NAF_SW_ONLY	2	/* forward packets only to sw adapter */
499 #define NAF_BDG_MAYSLEEP 4	/* the bridge is allowed to sleep when
500 				 * forwarding packets coming from this
501 				 * interface
502 				 */
503 #define NAF_MEM_OWNER	8	/* the adapter uses its own memory area
504 				 * that cannot be changed
505 				 */
506 #define NAF_NATIVE      16      /* the adapter is native.
507 				 * Virtual ports (non persistent vale ports,
508 				 * pipes, monitors...) should never use
509 				 * this flag.
510 				 */
511 #define	NAF_NETMAP_ON	32	/* netmap is active (either native or
512 				 * emulated). Where possible (e.g. FreeBSD)
513 				 * IFCAP_NETMAP also mirrors this flag.
514 				 */
515 #define NAF_HOST_RINGS  64	/* the adapter supports the host rings */
516 #define NAF_FORCE_NATIVE 128	/* the adapter is always NATIVE */
517 #define	NAF_BUSY	(1U<<31) /* the adapter is used internally and
518 				  * cannot be registered from userspace
519 				  */
520 	int active_fds; /* number of user-space descriptors using this
521 			 interface, which is equal to the number of
522 			 struct netmap_if objs in the mapped region. */
523 
524 	u_int num_rx_rings; /* number of adapter receive rings */
525 	u_int num_tx_rings; /* number of adapter transmit rings */
526 
527 	u_int num_tx_desc;  /* number of descriptor in each queue */
528 	u_int num_rx_desc;
529 
530 	/* tx_rings and rx_rings are private but allocated
531 	 * as a contiguous chunk of memory. Each array has
532 	 * N+1 entries, for the adapter queues and for the host queue.
533 	 */
534 	struct netmap_kring *tx_rings; /* array of TX rings. */
535 	struct netmap_kring *rx_rings; /* array of RX rings. */
536 
537 	void *tailroom;		       /* space below the rings array */
538 				       /* (used for leases) */
539 
540 
541 	NM_SELINFO_T si[NR_TXRX];	/* global wait queues */
542 
543 	/* count users of the global wait queues */
544 	int si_users[NR_TXRX];
545 
546 	void *pdev; /* used to store pci device */
547 
548 	/* copy of if_qflush and if_transmit pointers, to intercept
549 	 * packets from the network stack when netmap is active.
550 	 */
551 	int     (*if_transmit)(struct ifnet *, struct mbuf *);
552 
553 	/* copy of if_input for netmap_send_up() */
554 	void     (*if_input)(struct ifnet *, struct mbuf *);
555 
556 	/* references to the ifnet and device routines, used by
557 	 * the generic netmap functions.
558 	 */
559 	struct ifnet *ifp; /* adapter is ifp->if_softc */
560 
561 	/*---- callbacks for this netmap adapter -----*/
562 	/*
563 	 * nm_dtor() is the cleanup routine called when destroying
564 	 *	the adapter.
565 	 *	Called with NMG_LOCK held.
566 	 *
567 	 * nm_register() is called on NIOCREGIF and close() to enter
568 	 *	or exit netmap mode on the NIC
569 	 *	Called with NNG_LOCK held.
570 	 *
571 	 * nm_txsync() pushes packets to the underlying hw/switch
572 	 *
573 	 * nm_rxsync() collects packets from the underlying hw/switch
574 	 *
575 	 * nm_config() returns configuration information from the OS
576 	 *	Called with NMG_LOCK held.
577 	 *
578 	 * nm_krings_create() create and init the tx_rings and
579 	 * 	rx_rings arrays of kring structures. In particular,
580 	 * 	set the nm_sync callbacks for each ring.
581 	 * 	There is no need to also allocate the corresponding
582 	 * 	netmap_rings, since netmap_mem_rings_create() will always
583 	 * 	be called to provide the missing ones.
584 	 *	Called with NNG_LOCK held.
585 	 *
586 	 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings
587 	 * 	arrays
588 	 *	Called with NMG_LOCK held.
589 	 *
590 	 * nm_notify() is used to act after data have become available
591 	 * 	(or the stopped state of the ring has changed)
592 	 *	For hw devices this is typically a selwakeup(),
593 	 *	but for NIC/host ports attached to a switch (or vice-versa)
594 	 *	we also need to invoke the 'txsync' code downstream.
595 	 */
596 	void (*nm_dtor)(struct netmap_adapter *);
597 
598 	int (*nm_register)(struct netmap_adapter *, int onoff);
599 
600 	int (*nm_txsync)(struct netmap_kring *kring, int flags);
601 	int (*nm_rxsync)(struct netmap_kring *kring, int flags);
602 	int (*nm_notify)(struct netmap_kring *kring, int flags);
603 #define NAF_FORCE_READ    1
604 #define NAF_FORCE_RECLAIM 2
605 	/* return configuration information */
606 	int (*nm_config)(struct netmap_adapter *,
607 		u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
608 	int (*nm_krings_create)(struct netmap_adapter *);
609 	void (*nm_krings_delete)(struct netmap_adapter *);
610 #ifdef WITH_VALE
611 	/*
612 	 * nm_bdg_attach() initializes the na_vp field to point
613 	 *      to an adapter that can be attached to a VALE switch. If the
614 	 *      current adapter is already a VALE port, na_vp is simply a cast;
615 	 *      otherwise, na_vp points to a netmap_bwrap_adapter.
616 	 *      If applicable, this callback also initializes na_hostvp,
617 	 *      that can be used to connect the adapter host rings to the
618 	 *      switch.
619 	 *      Called with NMG_LOCK held.
620 	 *
621 	 * nm_bdg_ctl() is called on the actual attach/detach to/from
622 	 *      to/from the switch, to perform adapter-specific
623 	 *      initializations
624 	 *      Called with NMG_LOCK held.
625 	 */
626 	int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
627 	int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int);
628 
629 	/* adapter used to attach this adapter to a VALE switch (if any) */
630 	struct netmap_vp_adapter *na_vp;
631 	/* adapter used to attach the host rings of this adapter
632 	 * to a VALE switch (if any) */
633 	struct netmap_vp_adapter *na_hostvp;
634 #endif
635 
636 	/* standard refcount to control the lifetime of the adapter
637 	 * (it should be equal to the lifetime of the corresponding ifp)
638 	 */
639 	int na_refcount;
640 
641 	/* memory allocator (opaque)
642 	 * We also cache a pointer to the lut_entry for translating
643 	 * buffer addresses, and the total number of buffers.
644 	 */
645  	struct netmap_mem_d *nm_mem;
646 	struct netmap_lut na_lut;
647 
648 	/* additional information attached to this adapter
649 	 * by other netmap subsystems. Currently used by
650 	 * bwrap and LINUX/v1000.
651 	 */
652 	void *na_private;
653 
654 	/* array of pipes that have this adapter as a parent */
655 	struct netmap_pipe_adapter **na_pipes;
656 	int na_next_pipe;	/* next free slot in the array */
657 	int na_max_pipes;	/* size of the array */
658 
659 	char name[64];
660 };
661 
662 static __inline u_int
663 nma_get_ndesc(struct netmap_adapter *na, enum txrx t)
664 {
665 	return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
666 }
667 
668 static __inline void
669 nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
670 {
671 	if (t == NR_TX)
672 		na->num_tx_desc = v;
673 	else
674 		na->num_rx_desc = v;
675 }
676 
677 static __inline u_int
678 nma_get_nrings(struct netmap_adapter *na, enum txrx t)
679 {
680 	return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
681 }
682 
683 static __inline void
684 nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
685 {
686 	if (t == NR_TX)
687 		na->num_tx_rings = v;
688 	else
689 		na->num_rx_rings = v;
690 }
691 
692 static __inline struct netmap_kring*
693 NMR(struct netmap_adapter *na, enum txrx t)
694 {
695 	return (t == NR_TX ? na->tx_rings : na->rx_rings);
696 }
697 
698 /*
699  * If the NIC is owned by the kernel
700  * (i.e., bridge), neither another bridge nor user can use it;
701  * if the NIC is owned by a user, only users can share it.
702  * Evaluation must be done under NMG_LOCK().
703  */
704 #define NETMAP_OWNED_BY_KERN(na)	((na)->na_flags & NAF_BUSY)
705 #define NETMAP_OWNED_BY_ANY(na) \
706 	(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
707 
708 /*
709  * derived netmap adapters for various types of ports
710  */
711 struct netmap_vp_adapter {	/* VALE software port */
712 	struct netmap_adapter up;
713 
714 	/*
715 	 * Bridge support:
716 	 *
717 	 * bdg_port is the port number used in the bridge;
718 	 * na_bdg points to the bridge this NA is attached to.
719 	 */
720 	int bdg_port;
721 	struct nm_bridge *na_bdg;
722 	int retry;
723 
724 	/* Offset of ethernet header for each packet. */
725 	u_int virt_hdr_len;
726 	/* Maximum Frame Size, used in bdg_mismatch_datapath() */
727 	u_int mfs;
728 	/* Last source MAC on this port */
729 	uint64_t last_smac;
730 };
731 
732 
733 struct netmap_hw_adapter {	/* physical device */
734 	struct netmap_adapter up;
735 
736 	struct net_device_ops nm_ndo;	// XXX linux only
737 	struct ethtool_ops    nm_eto;	// XXX linux only
738 	const struct ethtool_ops*   save_ethtool;
739 
740 	int (*nm_hw_register)(struct netmap_adapter *, int onoff);
741 };
742 
743 #ifdef WITH_GENERIC
744 /* Mitigation support. */
745 struct nm_generic_mit {
746 	struct hrtimer mit_timer;
747 	int mit_pending;
748 	int mit_ring_idx;  /* index of the ring being mitigated */
749 	struct netmap_adapter *mit_na;  /* backpointer */
750 };
751 
752 struct netmap_generic_adapter {	/* emulated device */
753 	struct netmap_hw_adapter up;
754 
755 	/* Pointer to a previously used netmap adapter. */
756 	struct netmap_adapter *prev;
757 
758 	/* generic netmap adapters support:
759 	 * a net_device_ops struct overrides ndo_select_queue(),
760 	 * save_if_input saves the if_input hook (FreeBSD),
761 	 * mit implements rx interrupt mitigation,
762 	 */
763 	struct net_device_ops generic_ndo;
764 	void (*save_if_input)(struct ifnet *, struct mbuf *);
765 
766 	struct nm_generic_mit *mit;
767 #ifdef linux
768         netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
769 #endif
770 };
771 #endif  /* WITH_GENERIC */
772 
773 static __inline int
774 netmap_real_rings(struct netmap_adapter *na, enum txrx t)
775 {
776 	return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
777 }
778 
779 #ifdef WITH_VALE
780 
781 /*
782  * Bridge wrapper for non VALE ports attached to a VALE switch.
783  *
784  * The real device must already have its own netmap adapter (hwna).
785  * The bridge wrapper and the hwna adapter share the same set of
786  * netmap rings and buffers, but they have two separate sets of
787  * krings descriptors, with tx/rx meanings swapped:
788  *
789  *                                  netmap
790  *           bwrap     krings       rings      krings      hwna
791  *         +------+   +------+     +-----+    +------+   +------+
792  *         |tx_rings->|      |\   /|     |----|      |<-tx_rings|
793  *         |      |   +------+ \ / +-----+    +------+   |      |
794  *         |      |             X                        |      |
795  *         |      |            / \                       |      |
796  *         |      |   +------+/   \+-----+    +------+   |      |
797  *         |rx_rings->|      |     |     |----|      |<-rx_rings|
798  *         |      |   +------+     +-----+    +------+   |      |
799  *         +------+                                      +------+
800  *
801  * - packets coming from the bridge go to the brwap rx rings,
802  *   which are also the hwna tx rings.  The bwrap notify callback
803  *   will then complete the hwna tx (see netmap_bwrap_notify).
804  *
805  * - packets coming from the outside go to the hwna rx rings,
806  *   which are also the bwrap tx rings.  The (overwritten) hwna
807  *   notify method will then complete the bridge tx
808  *   (see netmap_bwrap_intr_notify).
809  *
810  *   The bridge wrapper may optionally connect the hwna 'host' rings
811  *   to the bridge. This is done by using a second port in the
812  *   bridge and connecting it to the 'host' netmap_vp_adapter
813  *   contained in the netmap_bwrap_adapter. The brwap host adapter
814  *   cross-links the hwna host rings in the same way as shown above.
815  *
816  * - packets coming from the bridge and directed to the host stack
817  *   are handled by the bwrap host notify callback
818  *   (see netmap_bwrap_host_notify)
819  *
820  * - packets coming from the host stack are still handled by the
821  *   overwritten hwna notify callback (netmap_bwrap_intr_notify),
822  *   but are diverted to the host adapter depending on the ring number.
823  *
824  */
825 struct netmap_bwrap_adapter {
826 	struct netmap_vp_adapter up;
827 	struct netmap_vp_adapter host;  /* for host rings */
828 	struct netmap_adapter *hwna;	/* the underlying device */
829 
830 	/* backup of the hwna memory allocator */
831 	struct netmap_mem_d *save_nmd;
832 
833 	/*
834 	 * When we attach a physical interface to the bridge, we
835 	 * allow the controlling process to terminate, so we need
836 	 * a place to store the n_detmap_priv_d data structure.
837 	 * This is only done when physical interfaces
838 	 * are attached to a bridge.
839 	 */
840 	struct netmap_priv_d *na_kpriv;
841 };
842 int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
843 
844 
845 #endif /* WITH_VALE */
846 
847 #ifdef WITH_PIPES
848 
849 #define NM_MAXPIPES 	64	/* max number of pipes per adapter */
850 
851 struct netmap_pipe_adapter {
852 	struct netmap_adapter up;
853 
854 	u_int id; 	/* pipe identifier */
855 	int role;	/* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */
856 
857 	struct netmap_adapter *parent; /* adapter that owns the memory */
858 	struct netmap_pipe_adapter *peer; /* the other end of the pipe */
859 	int peer_ref;		/* 1 iff we are holding a ref to the peer */
860 
861 	u_int parent_slot; /* index in the parent pipe array */
862 };
863 
864 #endif /* WITH_PIPES */
865 
866 
867 /* return slots reserved to rx clients; used in drivers */
868 static inline uint32_t
869 nm_kr_rxspace(struct netmap_kring *k)
870 {
871 	int space = k->nr_hwtail - k->nr_hwcur;
872 	if (space < 0)
873 		space += k->nkr_num_slots;
874 	ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
875 
876 	return space;
877 }
878 
879 
880 /* True if no space in the tx ring. only valid after txsync_prologue */
881 static inline int
882 nm_kr_txempty(struct netmap_kring *kring)
883 {
884 	return kring->rcur == kring->nr_hwtail;
885 }
886 
887 
888 /*
889  * protect against multiple threads using the same ring.
890  * also check that the ring has not been stopped.
891  * We only care for 0 or !=0 as a return code.
892  */
893 #define NM_KR_BUSY	1
894 #define NM_KR_STOPPED	2
895 
896 
897 static __inline void nm_kr_put(struct netmap_kring *kr)
898 {
899 	NM_ATOMIC_CLEAR(&kr->nr_busy);
900 }
901 
902 
903 static __inline int nm_kr_tryget(struct netmap_kring *kr)
904 {
905 	/* check a first time without taking the lock
906 	 * to avoid starvation for nm_kr_get()
907 	 */
908 	if (unlikely(kr->nkr_stopped)) {
909 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
910 		return NM_KR_STOPPED;
911 	}
912 	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
913 		return NM_KR_BUSY;
914 	/* check a second time with lock held */
915 	if (unlikely(kr->nkr_stopped)) {
916 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
917 		nm_kr_put(kr);
918 		return NM_KR_STOPPED;
919 	}
920 	return 0;
921 }
922 
923 static __inline void nm_kr_get(struct netmap_kring *kr)
924 {
925 	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
926 		tsleep(kr, 0, "NM_KR_GET", 4);
927 }
928 
929 
930 /*
931  * The following functions are used by individual drivers to
932  * support netmap operation.
933  *
934  * netmap_attach() initializes a struct netmap_adapter, allocating the
935  * 	struct netmap_ring's and the struct selinfo.
936  *
937  * netmap_detach() frees the memory allocated by netmap_attach().
938  *
939  * netmap_transmit() replaces the if_transmit routine of the interface,
940  *	and is used to intercept packets coming from the stack.
941  *
942  * netmap_load_map/netmap_reload_map are helper routines to set/reset
943  *	the dmamap for a packet buffer
944  *
945  * netmap_reset() is a helper routine to be called in the hw driver
946  *	when reinitializing a ring. It should not be called by
947  *	virtual ports (vale, pipes, monitor)
948  */
949 int netmap_attach(struct netmap_adapter *);
950 void netmap_detach(struct ifnet *);
951 int netmap_transmit(struct ifnet *, struct mbuf *);
952 struct netmap_slot *netmap_reset(struct netmap_adapter *na,
953 	enum txrx tx, u_int n, u_int new_cur);
954 int netmap_ring_reinit(struct netmap_kring *);
955 
956 /* default functions to handle rx/tx interrupts */
957 int netmap_rx_irq(struct ifnet *, u_int, u_int *);
958 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
959 void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
960 
961 
962 #ifdef WITH_VALE
963 /* functions used by external modules to interface with VALE */
964 #define netmap_vp_to_ifp(_vp)	((_vp)->up.ifp)
965 #define netmap_ifp_to_vp(_ifp)	(NA(_ifp)->na_vp)
966 #define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
967 #define netmap_bdg_idx(_vp)	((_vp)->bdg_port)
968 const char *netmap_bdg_name(struct netmap_vp_adapter *);
969 #else /* !WITH_VALE */
970 #define netmap_vp_to_ifp(_vp)	NULL
971 #define netmap_ifp_to_vp(_ifp)	NULL
972 #define netmap_ifp_to_host_vp(_ifp) NULL
973 #define netmap_bdg_idx(_vp)	-1
974 #define netmap_bdg_name(_vp)	NULL
975 #endif /* WITH_VALE */
976 
977 static inline int
978 nm_netmap_on(struct netmap_adapter *na)
979 {
980 	return na && na->na_flags & NAF_NETMAP_ON;
981 }
982 
983 static inline int
984 nm_native_on(struct netmap_adapter *na)
985 {
986 	return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
987 }
988 
989 /* set/clear native flags and if_transmit/netdev_ops */
990 static inline void
991 nm_set_native_flags(struct netmap_adapter *na)
992 {
993 	struct ifnet *ifp = na->ifp;
994 
995 	na->na_flags |= NAF_NETMAP_ON;
996 #ifdef IFCAP_NETMAP /* or FreeBSD ? */
997 	ifp->if_capenable |= IFCAP_NETMAP;
998 #endif
999 #ifdef __FreeBSD__
1000 	na->if_transmit = ifp->if_transmit;
1001 	ifp->if_transmit = netmap_transmit;
1002 #else
1003 	na->if_transmit = (void *)ifp->netdev_ops;
1004 	ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
1005 	((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
1006 	ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
1007 #endif
1008 }
1009 
1010 
1011 static inline void
1012 nm_clear_native_flags(struct netmap_adapter *na)
1013 {
1014 	struct ifnet *ifp = na->ifp;
1015 
1016 #ifdef __FreeBSD__
1017 	ifp->if_transmit = na->if_transmit;
1018 #else
1019 	ifp->netdev_ops = (void *)na->if_transmit;
1020 	ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
1021 #endif
1022 	na->na_flags &= ~NAF_NETMAP_ON;
1023 #ifdef IFCAP_NETMAP /* or FreeBSD ? */
1024 	ifp->if_capenable &= ~IFCAP_NETMAP;
1025 #endif
1026 }
1027 
1028 
1029 /* check/fix address and len in tx rings */
1030 #if 1 /* debug version */
1031 #define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
1032 	if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) {	\
1033 		RD(5, "bad addr/len ring %d slot %d idx %d len %d",	\
1034 			kring->ring_id, nm_i, slot->buf_idx, len);	\
1035 		if (_l > NETMAP_BUF_SIZE(_na))				\
1036 			_l = NETMAP_BUF_SIZE(_na);			\
1037 	} } while (0)
1038 #else /* no debug version */
1039 #define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
1040 		if (_l > NETMAP_BUF_SIZE(_na))				\
1041 			_l = NETMAP_BUF_SIZE(_na);			\
1042 	} while (0)
1043 #endif
1044 
1045 
1046 /*---------------------------------------------------------------*/
1047 /*
1048  * Support routines used by netmap subsystems
1049  * (native drivers, VALE, generic, pipes, monitors, ...)
1050  */
1051 
1052 
1053 /* common routine for all functions that create a netmap adapter. It performs
1054  * two main tasks:
1055  * - if the na points to an ifp, mark the ifp as netmap capable
1056  *   using na as its native adapter;
1057  * - provide defaults for the setup callbacks and the memory allocator
1058  */
1059 int netmap_attach_common(struct netmap_adapter *);
1060 /* common actions to be performed on netmap adapter destruction */
1061 void netmap_detach_common(struct netmap_adapter *);
1062 /* fill priv->np_[tr]xq{first,last} using the ringid and flags information
1063  * coming from a struct nmreq
1064  */
1065 int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags);
1066 /* update the ring parameters (number and size of tx and rx rings).
1067  * It calls the nm_config callback, if available.
1068  */
1069 int netmap_update_config(struct netmap_adapter *na);
1070 /* create and initialize the common fields of the krings array.
1071  * using the information that must be already available in the na.
1072  * tailroom can be used to request the allocation of additional
1073  * tailroom bytes after the krings array. This is used by
1074  * netmap_vp_adapter's (i.e., VALE ports) to make room for
1075  * leasing-related data structures
1076  */
1077 int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
1078 /* deletes the kring array of the adapter. The array must have
1079  * been created using netmap_krings_create
1080  */
1081 void netmap_krings_delete(struct netmap_adapter *na);
1082 
1083 /* set the stopped/enabled status of ring
1084  * When stopping, they also wait for all current activity on the ring to
1085  * terminate. The status change is then notified using the na nm_notify
1086  * callback.
1087  */
1088 void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
1089 /* set the stopped/enabled status of all rings of the adapter. */
1090 void netmap_set_all_rings(struct netmap_adapter *, int stopped);
1091 /* convenience wrappers for netmap_set_all_rings, used in drivers */
1092 void netmap_disable_all_rings(struct ifnet *);
1093 void netmap_enable_all_rings(struct ifnet *);
1094 
1095 int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1096 	uint16_t ringid, uint32_t flags);
1097 
1098 
1099 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
1100 int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1101 int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
1102 
1103 
1104 #ifdef WITH_VALE
1105 /*
1106  * The following bridge-related functions are used by other
1107  * kernel modules.
1108  *
1109  * VALE only supports unicast or broadcast. The lookup
1110  * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
1111  * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
1112  * XXX in practice "unknown" might be handled same as broadcast.
1113  */
1114 typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
1115 		struct netmap_vp_adapter *);
1116 typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
1117 typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
1118 struct netmap_bdg_ops {
1119 	bdg_lookup_fn_t lookup;
1120 	bdg_config_fn_t config;
1121 	bdg_dtor_fn_t	dtor;
1122 };
1123 
1124 u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1125 		struct netmap_vp_adapter *);
1126 
1127 #define	NM_BDG_MAXPORTS		254	/* up to 254 */
1128 #define	NM_BDG_BROADCAST	NM_BDG_MAXPORTS
1129 #define	NM_BDG_NOPORT		(NM_BDG_MAXPORTS+1)
1130 
1131 #define	NM_NAME			"vale"	/* prefix for bridge port name */
1132 
1133 /* these are redefined in case of no VALE support */
1134 int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1135 struct nm_bridge *netmap_init_bridges2(u_int);
1136 void netmap_uninit_bridges2(struct nm_bridge *, u_int);
1137 int netmap_init_bridges(void);
1138 void netmap_uninit_bridges(void);
1139 int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
1140 int netmap_bdg_config(struct nmreq *nmr);
1141 
1142 #else /* !WITH_VALE */
1143 #define	netmap_get_bdg_na(_1, _2, _3)	0
1144 #define netmap_init_bridges(_1) 0
1145 #define netmap_uninit_bridges()
1146 #define	netmap_bdg_ctl(_1, _2)	EINVAL
1147 #endif /* !WITH_VALE */
1148 
1149 #ifdef WITH_PIPES
1150 /* max number of pipes per device */
1151 #define NM_MAXPIPES	64	/* XXX how many? */
1152 void netmap_pipe_dealloc(struct netmap_adapter *);
1153 int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1154 #else /* !WITH_PIPES */
1155 #define NM_MAXPIPES	0
1156 #define netmap_pipe_alloc(_1, _2) 	0
1157 #define netmap_pipe_dealloc(_1)
1158 #define netmap_get_pipe_na(nmr, _2, _3)	\
1159 	({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
1160 	   (role__ == NR_REG_PIPE_MASTER || 	       \
1161 	    role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
1162 #endif
1163 
1164 #ifdef WITH_MONITOR
1165 int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1166 void netmap_monitor_stop(struct netmap_adapter *na);
1167 #else
1168 #define netmap_get_monitor_na(nmr, _2, _3) \
1169 	((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
1170 #endif
1171 
1172 #ifdef CONFIG_NET_NS
1173 struct net *netmap_bns_get(void);
1174 void netmap_bns_put(struct net *);
1175 void netmap_bns_getbridges(struct nm_bridge **, u_int *);
1176 #else
1177 #define netmap_bns_get()
1178 #define netmap_bns_put(_1)
1179 #define netmap_bns_getbridges(b, n) \
1180 	do { *b = nm_bridges; *n = NM_BRIDGES; } while (0)
1181 #endif
1182 
1183 /* Various prototypes */
1184 int netmap_poll(struct cdev *dev, int events, struct thread *td);
1185 int netmap_init(void);
1186 void netmap_fini(void);
1187 int netmap_get_memory(struct netmap_priv_d* p);
1188 void netmap_dtor(void *data);
1189 int netmap_dtor_locked(struct netmap_priv_d *priv);
1190 
1191 int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
1192 
1193 /* netmap_adapter creation/destruction */
1194 
1195 // #define NM_DEBUG_PUTGET 1
1196 
1197 #ifdef NM_DEBUG_PUTGET
1198 
1199 #define NM_DBG(f) __##f
1200 
1201 void __netmap_adapter_get(struct netmap_adapter *na);
1202 
1203 #define netmap_adapter_get(na) 				\
1204 	do {						\
1205 		struct netmap_adapter *__na = na;	\
1206 		D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
1207 		__netmap_adapter_get(__na);		\
1208 	} while (0)
1209 
1210 int __netmap_adapter_put(struct netmap_adapter *na);
1211 
1212 #define netmap_adapter_put(na)				\
1213 	({						\
1214 		struct netmap_adapter *__na = na;	\
1215 		D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
1216 		__netmap_adapter_put(__na);		\
1217 	})
1218 
1219 #else /* !NM_DEBUG_PUTGET */
1220 
1221 #define NM_DBG(f) f
1222 void netmap_adapter_get(struct netmap_adapter *na);
1223 int netmap_adapter_put(struct netmap_adapter *na);
1224 
1225 #endif /* !NM_DEBUG_PUTGET */
1226 
1227 
1228 /*
1229  * module variables
1230  */
1231 #define NETMAP_BUF_BASE(na)	((na)->na_lut.lut[0].vaddr)
1232 #define NETMAP_BUF_SIZE(na)	((na)->na_lut.objsize)
1233 extern int netmap_mitigate;	// XXX not really used
1234 extern int netmap_no_pendintr;
1235 extern int netmap_verbose;	// XXX debugging
1236 enum {                                  /* verbose flags */
1237 	NM_VERB_ON = 1,                 /* generic verbose */
1238 	NM_VERB_HOST = 0x2,             /* verbose host stack */
1239 	NM_VERB_RXSYNC = 0x10,          /* verbose on rxsync/txsync */
1240 	NM_VERB_TXSYNC = 0x20,
1241 	NM_VERB_RXINTR = 0x100,         /* verbose on rx/tx intr (driver) */
1242 	NM_VERB_TXINTR = 0x200,
1243 	NM_VERB_NIC_RXSYNC = 0x1000,    /* verbose on rx/tx intr (driver) */
1244 	NM_VERB_NIC_TXSYNC = 0x2000,
1245 };
1246 
1247 extern int netmap_txsync_retry;
1248 extern int netmap_generic_mit;
1249 extern int netmap_generic_ringsize;
1250 extern int netmap_generic_rings;
1251 extern int netmap_use_count;
1252 
1253 /*
1254  * NA returns a pointer to the struct netmap adapter from the ifp,
1255  * WNA is used to write it.
1256  */
1257 #define	NA(_ifp)	((struct netmap_adapter *)WNA(_ifp))
1258 
1259 /*
1260  * Macros to determine if an interface is netmap capable or netmap enabled.
1261  * See the magic field in struct netmap_adapter.
1262  */
1263 #ifdef __FreeBSD__
1264 /*
1265  * on FreeBSD just use if_capabilities and if_capenable.
1266  */
1267 #define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
1268 	(ifp)->if_capabilities & IFCAP_NETMAP )
1269 
1270 #define	NETMAP_SET_CAPABLE(ifp)				\
1271 	(ifp)->if_capabilities |= IFCAP_NETMAP
1272 
1273 #else	/* linux */
1274 
1275 /*
1276  * on linux:
1277  * we check if NA(ifp) is set and its first element has a related
1278  * magic value. The capenable is within the struct netmap_adapter.
1279  */
1280 #define	NETMAP_MAGIC	0x52697a7a
1281 
1282 #define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
1283 	((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
1284 
1285 #define	NETMAP_SET_CAPABLE(ifp)				\
1286 	NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC
1287 
1288 #endif	/* linux */
1289 
1290 #ifdef __FreeBSD__
1291 
1292 /* Assigns the device IOMMU domain to an allocator.
1293  * Returns -ENOMEM in case the domain is different */
1294 #define nm_iommu_group_id(dev) (0)
1295 
1296 /* Callback invoked by the dma machinery after a successful dmamap_load */
1297 static void netmap_dmamap_cb(__unused void *arg,
1298     __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
1299 {
1300 }
1301 
1302 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
1303  * XXX can we do it without a callback ?
1304  */
1305 static inline void
1306 netmap_load_map(struct netmap_adapter *na,
1307 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1308 {
1309 	if (map)
1310 		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1311 		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1312 }
1313 
1314 static inline void
1315 netmap_unload_map(struct netmap_adapter *na,
1316         bus_dma_tag_t tag, bus_dmamap_t map)
1317 {
1318 	if (map)
1319 		bus_dmamap_unload(tag, map);
1320 }
1321 
1322 /* update the map when a buffer changes. */
1323 static inline void
1324 netmap_reload_map(struct netmap_adapter *na,
1325 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1326 {
1327 	if (map) {
1328 		bus_dmamap_unload(tag, map);
1329 		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1330 		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1331 	}
1332 }
1333 
1334 #else /* linux */
1335 
1336 int nm_iommu_group_id(bus_dma_tag_t dev);
1337 #include <linux/dma-mapping.h>
1338 
1339 static inline void
1340 netmap_load_map(struct netmap_adapter *na,
1341 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1342 {
1343 	if (0 && map) {
1344 		*map = dma_map_single(na->pdev, buf, na->na_lut.objsize,
1345 				DMA_BIDIRECTIONAL);
1346 	}
1347 }
1348 
1349 static inline void
1350 netmap_unload_map(struct netmap_adapter *na,
1351 	bus_dma_tag_t tag, bus_dmamap_t map)
1352 {
1353 	u_int sz = na->na_lut.objsize;
1354 
1355 	if (*map) {
1356 		dma_unmap_single(na->pdev, *map, sz,
1357 				DMA_BIDIRECTIONAL);
1358 	}
1359 }
1360 
1361 static inline void
1362 netmap_reload_map(struct netmap_adapter *na,
1363 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1364 {
1365 	u_int sz = na->na_lut.objsize;
1366 
1367 	if (*map) {
1368 		dma_unmap_single(na->pdev, *map, sz,
1369 				DMA_BIDIRECTIONAL);
1370 	}
1371 
1372 	*map = dma_map_single(na->pdev, buf, sz,
1373 				DMA_BIDIRECTIONAL);
1374 }
1375 
1376 /*
1377  * XXX How do we redefine these functions:
1378  *
1379  * on linux we need
1380  *	dma_map_single(&pdev->dev, virt_addr, len, direction)
1381  *	dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction
1382  * The len can be implicit (on netmap it is NETMAP_BUF_SIZE)
1383  * unfortunately the direction is not, so we need to change
1384  * something to have a cross API
1385  */
1386 
1387 #if 0
1388 	struct e1000_buffer *buffer_info =  &tx_ring->buffer_info[l];
1389 	/* set time_stamp *before* dma to help avoid a possible race */
1390 	buffer_info->time_stamp = jiffies;
1391 	buffer_info->mapped_as_page = false;
1392 	buffer_info->length = len;
1393 	//buffer_info->next_to_watch = l;
1394 	/* reload dma map */
1395 	dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
1396 			NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1397 	buffer_info->dma = dma_map_single(&adapter->pdev->dev,
1398 			addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1399 
1400 	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
1401 		D("dma mapping error");
1402 		/* goto dma_error; See e1000_put_txbuf() */
1403 		/* XXX reset */
1404 	}
1405 	tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
1406 
1407 #endif
1408 
1409 /*
1410  * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction.
1411  */
1412 #define bus_dmamap_sync(_a, _b, _c)
1413 
1414 #endif /* linux */
1415 
1416 
1417 /*
1418  * functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
1419  */
1420 static inline int
1421 netmap_idx_n2k(struct netmap_kring *kr, int idx)
1422 {
1423 	int n = kr->nkr_num_slots;
1424 	idx += kr->nkr_hwofs;
1425 	if (idx < 0)
1426 		return idx + n;
1427 	else if (idx < n)
1428 		return idx;
1429 	else
1430 		return idx - n;
1431 }
1432 
1433 
1434 static inline int
1435 netmap_idx_k2n(struct netmap_kring *kr, int idx)
1436 {
1437 	int n = kr->nkr_num_slots;
1438 	idx -= kr->nkr_hwofs;
1439 	if (idx < 0)
1440 		return idx + n;
1441 	else if (idx < n)
1442 		return idx;
1443 	else
1444 		return idx - n;
1445 }
1446 
1447 
1448 /* Entries of the look-up table. */
1449 struct lut_entry {
1450 	void *vaddr;		/* virtual address. */
1451 	vm_paddr_t paddr;	/* physical address. */
1452 };
1453 
1454 struct netmap_obj_pool;
1455 
1456 /*
1457  * NMB return the virtual address of a buffer (buffer 0 on bad index)
1458  * PNMB also fills the physical address
1459  */
1460 static inline void *
1461 NMB(struct netmap_adapter *na, struct netmap_slot *slot)
1462 {
1463 	struct lut_entry *lut = na->na_lut.lut;
1464 	uint32_t i = slot->buf_idx;
1465 	return (unlikely(i >= na->na_lut.objtotal)) ?
1466 		lut[0].vaddr : lut[i].vaddr;
1467 }
1468 
1469 static inline void *
1470 PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
1471 {
1472 	uint32_t i = slot->buf_idx;
1473 	struct lut_entry *lut = na->na_lut.lut;
1474 	void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
1475 
1476 	*pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
1477 	return ret;
1478 }
1479 
1480 
1481 /*
1482  * Structure associated to each netmap file descriptor.
1483  * It is created on open and left unbound (np_nifp == NULL).
1484  * A successful NIOCREGIF will set np_nifp and the first few fields;
1485  * this is protected by a global lock (NMG_LOCK) due to low contention.
1486  *
1487  * np_refs counts the number of references to the structure: one for the fd,
1488  * plus (on FreeBSD) one for each active mmap which we track ourselves
1489  * (linux automatically tracks them, but FreeBSD does not).
1490  * np_refs is protected by NMG_LOCK.
1491  *
1492  * Read access to the structure is lock free, because ni_nifp once set
1493  * can only go to 0 when nobody is using the entry anymore. Readers
1494  * must check that np_nifp != NULL before using the other fields.
1495  */
1496 struct netmap_priv_d {
1497 	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
1498 
1499 	struct netmap_adapter	*np_na;
1500 	uint32_t	np_flags;	/* from the ioctl */
1501 	u_int		np_qfirst[NR_TXRX],
1502 			np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
1503 	uint16_t	np_txpoll;	/* XXX and also np_rxpoll ? */
1504 
1505 	int		np_refs;	/* use with NMG_LOCK held */
1506 
1507 	/* pointers to the selinfo to be used for selrecord.
1508 	 * Either the local or the global one depending on the
1509 	 * number of rings.
1510 	 */
1511 	NM_SELINFO_T *np_si[NR_TXRX];
1512 	struct thread	*np_td;		/* kqueue, just debugging */
1513 };
1514 
1515 #ifdef WITH_MONITOR
1516 
1517 struct netmap_monitor_adapter {
1518 	struct netmap_adapter up;
1519 
1520 	struct netmap_priv_d priv;
1521 	uint32_t flags;
1522 };
1523 
1524 #endif /* WITH_MONITOR */
1525 
1526 
1527 #ifdef WITH_GENERIC
1528 /*
1529  * generic netmap emulation for devices that do not have
1530  * native netmap support.
1531  */
1532 int generic_netmap_attach(struct ifnet *ifp);
1533 
1534 int netmap_catch_rx(struct netmap_generic_adapter *na, int intercept);
1535 void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
1536 void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
1537 int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
1538 int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
1539 void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
1540 static inline struct ifnet*
1541 netmap_generic_getifp(struct netmap_generic_adapter *gna)
1542 {
1543         if (gna->prev)
1544             return gna->prev->ifp;
1545 
1546         return gna->up.up.ifp;
1547 }
1548 
1549 //#define RATE_GENERIC  /* Enables communication statistics for generic. */
1550 #ifdef RATE_GENERIC
1551 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
1552 #else
1553 #define generic_rate(txp, txs, txi, rxp, rxs, rxi)
1554 #endif
1555 
1556 /*
1557  * netmap_mitigation API. This is used by the generic adapter
1558  * to reduce the number of interrupt requests/selwakeup
1559  * to clients on incoming packets.
1560  */
1561 void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
1562                                 struct netmap_adapter *na);
1563 void netmap_mitigation_start(struct nm_generic_mit *mit);
1564 void netmap_mitigation_restart(struct nm_generic_mit *mit);
1565 int netmap_mitigation_active(struct nm_generic_mit *mit);
1566 void netmap_mitigation_cleanup(struct nm_generic_mit *mit);
1567 #endif /* WITH_GENERIC */
1568 
1569 
1570 
1571 /* Shared declarations for the VALE switch. */
1572 
1573 /*
1574  * Each transmit queue accumulates a batch of packets into
1575  * a structure before forwarding. Packets to the same
1576  * destination are put in a list using ft_next as a link field.
1577  * ft_frags and ft_next are valid only on the first fragment.
1578  */
1579 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
1580 	void *ft_buf;		/* netmap or indirect buffer */
1581 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
1582 	uint8_t _ft_port;	/* dst port (unused) */
1583 	uint16_t ft_flags;	/* flags, e.g. indirect */
1584 	uint16_t ft_len;	/* src fragment len */
1585 	uint16_t ft_next;	/* next packet to same destination */
1586 };
1587 
1588 /* struct 'virtio_net_hdr' from linux. */
1589 struct nm_vnet_hdr {
1590 #define VIRTIO_NET_HDR_F_NEEDS_CSUM     1	/* Use csum_start, csum_offset */
1591 #define VIRTIO_NET_HDR_F_DATA_VALID    2	/* Csum is valid */
1592     uint8_t flags;
1593 #define VIRTIO_NET_HDR_GSO_NONE         0       /* Not a GSO frame */
1594 #define VIRTIO_NET_HDR_GSO_TCPV4        1       /* GSO frame, IPv4 TCP (TSO) */
1595 #define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
1596 #define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
1597 #define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */
1598     uint8_t gso_type;
1599     uint16_t hdr_len;
1600     uint16_t gso_size;
1601     uint16_t csum_start;
1602     uint16_t csum_offset;
1603 };
1604 
1605 #define WORST_CASE_GSO_HEADER	(14+40+60)  /* IPv6 + TCP */
1606 
1607 /* Private definitions for IPv4, IPv6, UDP and TCP headers. */
1608 
1609 struct nm_iphdr {
1610 	uint8_t		version_ihl;
1611 	uint8_t		tos;
1612 	uint16_t	tot_len;
1613 	uint16_t	id;
1614 	uint16_t	frag_off;
1615 	uint8_t		ttl;
1616 	uint8_t		protocol;
1617 	uint16_t	check;
1618 	uint32_t	saddr;
1619 	uint32_t	daddr;
1620 	/*The options start here. */
1621 };
1622 
1623 struct nm_tcphdr {
1624 	uint16_t	source;
1625 	uint16_t	dest;
1626 	uint32_t	seq;
1627 	uint32_t	ack_seq;
1628 	uint8_t		doff;  /* Data offset + Reserved */
1629 	uint8_t		flags;
1630 	uint16_t	window;
1631 	uint16_t	check;
1632 	uint16_t	urg_ptr;
1633 };
1634 
1635 struct nm_udphdr {
1636 	uint16_t	source;
1637 	uint16_t	dest;
1638 	uint16_t	len;
1639 	uint16_t	check;
1640 };
1641 
1642 struct nm_ipv6hdr {
1643 	uint8_t		priority_version;
1644 	uint8_t		flow_lbl[3];
1645 
1646 	uint16_t	payload_len;
1647 	uint8_t		nexthdr;
1648 	uint8_t		hop_limit;
1649 
1650 	uint8_t		saddr[16];
1651 	uint8_t		daddr[16];
1652 };
1653 
1654 /* Type used to store a checksum (in host byte order) that hasn't been
1655  * folded yet.
1656  */
1657 #define rawsum_t uint32_t
1658 
1659 rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
1660 uint16_t nm_csum_ipv4(struct nm_iphdr *iph);
1661 void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
1662 		      size_t datalen, uint16_t *check);
1663 void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
1664 		      size_t datalen, uint16_t *check);
1665 uint16_t nm_csum_fold(rawsum_t cur_sum);
1666 
1667 void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
1668 			   struct netmap_vp_adapter *dst_na,
1669 			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
1670 			   u_int *j, u_int lim, u_int *howmany);
1671 
1672 /* persistent virtual port routines */
1673 int nm_vi_persist(const char *, struct ifnet **);
1674 void nm_vi_detach(struct ifnet *);
1675 void nm_vi_init_index(void);
1676 
1677 #endif /* _NET_NETMAP_KERN_H_ */
1678