xref: /freebsd/sys/dev/netmap/netmap_vale.c (revision ccfb965433c67f3bda935a3cdf334be2e3c4348d)
1 /*
2  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #if defined(__FreeBSD__)
59 #include <sys/cdefs.h> /* prerequisite */
60 __FBSDID("$FreeBSD$");
61 
62 #include <sys/types.h>
63 #include <sys/errno.h>
64 #include <sys/param.h>	/* defines used in kernel.h */
65 #include <sys/kernel.h>	/* types used in module initialization */
66 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67 #include <sys/sockio.h>
68 #include <sys/socketvar.h>	/* struct socket */
69 #include <sys/malloc.h>
70 #include <sys/poll.h>
71 #include <sys/rwlock.h>
72 #include <sys/socket.h> /* sockaddrs */
73 #include <sys/selinfo.h>
74 #include <sys/sysctl.h>
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/bpf.h>		/* BIOCIMMEDIATE */
78 #include <machine/bus.h>	/* bus_dmamap_* */
79 #include <sys/endian.h>
80 #include <sys/refcount.h>
81 
82 
83 #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84 
85 #define	BDG_RWINIT(b)		\
86 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87 #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88 #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89 #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90 #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91 #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92 #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93 
94 
95 #elif defined(linux)
96 
97 #include "bsd_glue.h"
98 
99 #elif defined(__APPLE__)
100 
101 #warning OSX support is only partial
102 #include "osx_glue.h"
103 
104 #else
105 
106 #error	Unsupported platform
107 
108 #endif /* unsupported */
109 
110 /*
111  * common headers
112  */
113 
114 #include <net/netmap.h>
115 #include <dev/netmap/netmap_kern.h>
116 #include <dev/netmap/netmap_mem2.h>
117 
118 #ifdef WITH_VALE
119 
120 /*
121  * system parameters (most of them in netmap_kern.h)
122  * NM_NAME	prefix for switch port names, default "vale"
123  * NM_BDG_MAXPORTS	number of ports
124  * NM_BRIDGES	max number of switches in the system.
125  *	XXX should become a sysctl or tunable
126  *
127  * Switch ports are named valeX:Y where X is the switch name and Y
128  * is the port. If Y matches a physical interface name, the port is
129  * connected to a physical device.
130  *
131  * Unlike physical interfaces, switch ports use their own memory region
132  * for rings and buffers.
133  * The virtual interfaces use per-queue lock instead of core lock.
134  * In the tx loop, we aggregate traffic in batches to make all operations
135  * faster. The batch size is bridge_batch.
136  */
137 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140 #define NM_BDG_HASH		1024	/* forwarding table entries */
141 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142 #define NM_MULTISEG		64	/* max size of a chain of bufs */
143 /* actual size of the tables */
144 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145 /* NM_FT_NULL terminates a list of slots in the ft */
146 #define NM_FT_NULL		NM_BDG_BATCH_MAX
147 #define	NM_BRIDGES		8	/* number of bridges */
148 
149 
150 /*
151  * bridge_batch is set via sysctl to the max batch size to be
152  * used in the bridge. The actual value may be larger as the
153  * last packet in the block may overflow the size.
154  */
155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156 SYSCTL_DECL(_dev_netmap);
157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158 
159 
160 static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161 static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163 
164 /*
165  * For each output interface, nm_bdg_q is used to construct a list.
166  * bq_len is the number of output buffers (we can have coalescing
167  * during the copy).
168  */
169 struct nm_bdg_q {
170 	uint16_t bq_head;
171 	uint16_t bq_tail;
172 	uint32_t bq_len;	/* number of buffers */
173 };
174 
175 /* XXX revise this */
176 struct nm_hash_ent {
177 	uint64_t	mac;	/* the top 2 bytes are the epoch */
178 	uint64_t	ports;
179 };
180 
181 /*
182  * nm_bridge is a descriptor for a VALE switch.
183  * Interfaces for a bridge are all in bdg_ports[].
184  * The array has fixed size, an empty entry does not terminate
185  * the search, but lookups only occur on attach/detach so we
186  * don't mind if they are slow.
187  *
188  * The bridge is non blocking on the transmit ports: excess
189  * packets are dropped if there is no room on the output port.
190  *
191  * bdg_lock protects accesses to the bdg_ports array.
192  * This is a rw lock (or equivalent).
193  */
194 struct nm_bridge {
195 	/* XXX what is the proper alignment/layout ? */
196 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197 	int		bdg_namelen;
198 	uint32_t	bdg_active_ports; /* 0 means free */
199 	char		bdg_basename[IFNAMSIZ];
200 
201 	/* Indexes of active ports (up to active_ports)
202 	 * and all other remaining ports.
203 	 */
204 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205 
206 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207 
208 
209 	/*
210 	 * The function to decide the destination port.
211 	 * It returns either of an index of the destination port,
212 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213 	 * forward this packet.  ring_nr is the source ring index, and the
214 	 * function may overwrite this value to forward this packet to a
215 	 * different ring index.
216 	 * This function must be set by netmap_bdgctl().
217 	 */
218 	struct netmap_bdg_ops bdg_ops;
219 
220 	/* the forwarding table, MAC+ports.
221 	 * XXX should be changed to an argument to be passed to
222 	 * the lookup function, and allocated on attach
223 	 */
224 	struct nm_hash_ent ht[NM_BDG_HASH];
225 };
226 
227 const char*
228 netmap_bdg_name(struct netmap_vp_adapter *vp)
229 {
230 	struct nm_bridge *b = vp->na_bdg;
231 	if (b == NULL)
232 		return NULL;
233 	return b->bdg_basename;
234 }
235 
236 
237 /*
238  * XXX in principle nm_bridges could be created dynamically
239  * Right now we have a static array and deletions are protected
240  * by an exclusive lock.
241  */
242 struct nm_bridge nm_bridges[NM_BRIDGES];
243 
244 
245 /*
246  * this is a slightly optimized copy routine which rounds
247  * to multiple of 64 bytes and is often faster than dealing
248  * with other odd sizes. We assume there is enough room
249  * in the source and destination buffers.
250  *
251  * XXX only for multiples of 64 bytes, non overlapped.
252  */
253 static inline void
254 pkt_copy(void *_src, void *_dst, int l)
255 {
256         uint64_t *src = _src;
257         uint64_t *dst = _dst;
258         if (unlikely(l >= 1024)) {
259                 memcpy(dst, src, l);
260                 return;
261         }
262         for (; likely(l > 0); l-=64) {
263                 *dst++ = *src++;
264                 *dst++ = *src++;
265                 *dst++ = *src++;
266                 *dst++ = *src++;
267                 *dst++ = *src++;
268                 *dst++ = *src++;
269                 *dst++ = *src++;
270                 *dst++ = *src++;
271         }
272 }
273 
274 
275 /*
276  * locate a bridge among the existing ones.
277  * MUST BE CALLED WITH NMG_LOCK()
278  *
279  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
280  * We assume that this is called with a name of at least NM_NAME chars.
281  */
282 static struct nm_bridge *
283 nm_find_bridge(const char *name, int create)
284 {
285 	int i, l, namelen;
286 	struct nm_bridge *b = NULL;
287 
288 	NMG_LOCK_ASSERT();
289 
290 	namelen = strlen(NM_NAME);	/* base length */
291 	l = name ? strlen(name) : 0;		/* actual length */
292 	if (l < namelen) {
293 		D("invalid bridge name %s", name ? name : NULL);
294 		return NULL;
295 	}
296 	for (i = namelen + 1; i < l; i++) {
297 		if (name[i] == ':') {
298 			namelen = i;
299 			break;
300 		}
301 	}
302 	if (namelen >= IFNAMSIZ)
303 		namelen = IFNAMSIZ;
304 	ND("--- prefix is '%.*s' ---", namelen, name);
305 
306 	/* lookup the name, remember empty slot if there is one */
307 	for (i = 0; i < NM_BRIDGES; i++) {
308 		struct nm_bridge *x = nm_bridges + i;
309 
310 		if (x->bdg_active_ports == 0) {
311 			if (create && b == NULL)
312 				b = x;	/* record empty slot */
313 		} else if (x->bdg_namelen != namelen) {
314 			continue;
315 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
316 			ND("found '%.*s' at %d", namelen, name, i);
317 			b = x;
318 			break;
319 		}
320 	}
321 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
322 		/* initialize the bridge */
323 		strncpy(b->bdg_basename, name, namelen);
324 		ND("create new bridge %s with ports %d", b->bdg_basename,
325 			b->bdg_active_ports);
326 		b->bdg_namelen = namelen;
327 		b->bdg_active_ports = 0;
328 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
329 			b->bdg_port_index[i] = i;
330 		/* set the default function */
331 		b->bdg_ops.lookup = netmap_bdg_learning;
332 		/* reset the MAC address table */
333 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
334 	}
335 	return b;
336 }
337 
338 
339 /*
340  * Free the forwarding tables for rings attached to switch ports.
341  */
342 static void
343 nm_free_bdgfwd(struct netmap_adapter *na)
344 {
345 	int nrings, i;
346 	struct netmap_kring *kring;
347 
348 	NMG_LOCK_ASSERT();
349 	nrings = na->num_tx_rings;
350 	kring = na->tx_rings;
351 	for (i = 0; i < nrings; i++) {
352 		if (kring[i].nkr_ft) {
353 			free(kring[i].nkr_ft, M_DEVBUF);
354 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
355 		}
356 	}
357 }
358 
359 
360 /*
361  * Allocate the forwarding tables for the rings attached to the bridge ports.
362  */
363 static int
364 nm_alloc_bdgfwd(struct netmap_adapter *na)
365 {
366 	int nrings, l, i, num_dstq;
367 	struct netmap_kring *kring;
368 
369 	NMG_LOCK_ASSERT();
370 	/* all port:rings + broadcast */
371 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
372 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
373 	l += sizeof(struct nm_bdg_q) * num_dstq;
374 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
375 
376 	nrings = netmap_real_tx_rings(na);
377 	kring = na->tx_rings;
378 	for (i = 0; i < nrings; i++) {
379 		struct nm_bdg_fwd *ft;
380 		struct nm_bdg_q *dstq;
381 		int j;
382 
383 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
384 		if (!ft) {
385 			nm_free_bdgfwd(na);
386 			return ENOMEM;
387 		}
388 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
389 		for (j = 0; j < num_dstq; j++) {
390 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
391 			dstq[j].bq_len = 0;
392 		}
393 		kring[i].nkr_ft = ft;
394 	}
395 	return 0;
396 }
397 
398 
399 /* remove from bridge b the ports in slots hw and sw
400  * (sw can be -1 if not needed)
401  */
402 static void
403 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
404 {
405 	int s_hw = hw, s_sw = sw;
406 	int i, lim =b->bdg_active_ports;
407 	uint8_t tmp[NM_BDG_MAXPORTS];
408 
409 	/*
410 	New algorithm:
411 	make a copy of bdg_port_index;
412 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
413 	in the array of bdg_port_index, replacing them with
414 	entries from the bottom of the array;
415 	decrement bdg_active_ports;
416 	acquire BDG_WLOCK() and copy back the array.
417 	 */
418 
419 	if (netmap_verbose)
420 		D("detach %d and %d (lim %d)", hw, sw, lim);
421 	/* make a copy of the list of active ports, update it,
422 	 * and then copy back within BDG_WLOCK().
423 	 */
424 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
425 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
426 		if (hw >= 0 && tmp[i] == hw) {
427 			ND("detach hw %d at %d", hw, i);
428 			lim--; /* point to last active port */
429 			tmp[i] = tmp[lim]; /* swap with i */
430 			tmp[lim] = hw;	/* now this is inactive */
431 			hw = -1;
432 		} else if (sw >= 0 && tmp[i] == sw) {
433 			ND("detach sw %d at %d", sw, i);
434 			lim--;
435 			tmp[i] = tmp[lim];
436 			tmp[lim] = sw;
437 			sw = -1;
438 		} else {
439 			i++;
440 		}
441 	}
442 	if (hw >= 0 || sw >= 0) {
443 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
444 	}
445 
446 	BDG_WLOCK(b);
447 	if (b->bdg_ops.dtor)
448 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
449 	b->bdg_ports[s_hw] = NULL;
450 	if (s_sw >= 0) {
451 		b->bdg_ports[s_sw] = NULL;
452 	}
453 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
454 	b->bdg_active_ports = lim;
455 	BDG_WUNLOCK(b);
456 
457 	ND("now %d active ports", lim);
458 	if (lim == 0) {
459 		ND("marking bridge %s as free", b->bdg_basename);
460 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
461 	}
462 }
463 
464 /* nm_bdg_ctl callback for VALE ports */
465 static int
466 netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
467 {
468 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
469 	struct nm_bridge *b = vpna->na_bdg;
470 
471 	if (attach)
472 		return 0; /* nothing to do */
473 	if (b) {
474 		netmap_set_all_rings(na, 0 /* disable */);
475 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
476 		vpna->na_bdg = NULL;
477 		netmap_set_all_rings(na, 1 /* enable */);
478 	}
479 	/* I have took reference just for attach */
480 	netmap_adapter_put(na);
481 	return 0;
482 }
483 
484 /* nm_dtor callback for ephemeral VALE ports */
485 static void
486 netmap_vp_dtor(struct netmap_adapter *na)
487 {
488 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
489 	struct nm_bridge *b = vpna->na_bdg;
490 
491 	ND("%s has %d references", na->name, na->na_refcount);
492 
493 	if (b) {
494 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
495 	}
496 }
497 
498 /* nm_dtor callback for persistent VALE ports */
499 static void
500 netmap_persist_vp_dtor(struct netmap_adapter *na)
501 {
502 	struct ifnet *ifp = na->ifp;
503 
504 	netmap_vp_dtor(na);
505 	na->ifp = NULL;
506 	nm_vi_detach(ifp);
507 }
508 
509 /* remove a persistent VALE port from the system */
510 static int
511 nm_vi_destroy(const char *name)
512 {
513 	struct ifnet *ifp;
514 	int error;
515 
516 	ifp = ifunit_ref(name);
517 	if (!ifp)
518 		return ENXIO;
519 	NMG_LOCK();
520 	/* make sure this is actually a VALE port */
521 	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
522 		error = EINVAL;
523 		goto err;
524 	}
525 
526 	if (NA(ifp)->na_refcount > 1) {
527 		error = EBUSY;
528 		goto err;
529 	}
530 	NMG_UNLOCK();
531 
532 	D("destroying a persistent vale interface %s", ifp->if_xname);
533 	/* Linux requires all the references are released
534 	 * before unregister
535 	 */
536 	if_rele(ifp);
537 	netmap_detach(ifp);
538 	return 0;
539 
540 err:
541 	NMG_UNLOCK();
542 	if_rele(ifp);
543 	return error;
544 }
545 
546 /*
547  * Create a virtual interface registered to the system.
548  * The interface will be attached to a bridge later.
549  */
550 static int
551 nm_vi_create(struct nmreq *nmr)
552 {
553 	struct ifnet *ifp;
554 	struct netmap_vp_adapter *vpna;
555 	int error;
556 
557 	/* don't include VALE prefix */
558 	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
559 		return EINVAL;
560 	ifp = ifunit_ref(nmr->nr_name);
561 	if (ifp) { /* already exist, cannot create new one */
562 		if_rele(ifp);
563 		return EEXIST;
564 	}
565 	error = nm_vi_persist(nmr->nr_name, &ifp);
566 	if (error)
567 		return error;
568 
569 	NMG_LOCK();
570 	/* netmap_vp_create creates a struct netmap_vp_adapter */
571 	error = netmap_vp_create(nmr, ifp, &vpna);
572 	if (error) {
573 		D("error %d", error);
574 		nm_vi_detach(ifp);
575 		return error;
576 	}
577 	/* persist-specific routines */
578 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
579 	vpna->up.nm_dtor = netmap_persist_vp_dtor;
580 	netmap_adapter_get(&vpna->up);
581 	NMG_UNLOCK();
582 	D("created %s", ifp->if_xname);
583 	return 0;
584 }
585 
586 /* Try to get a reference to a netmap adapter attached to a VALE switch.
587  * If the adapter is found (or is created), this function returns 0, a
588  * non NULL pointer is returned into *na, and the caller holds a
589  * reference to the adapter.
590  * If an adapter is not found, then no reference is grabbed and the
591  * function returns an error code, or 0 if there is just a VALE prefix
592  * mismatch. Therefore the caller holds a reference when
593  * (*na != NULL && return == 0).
594  */
595 int
596 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
597 {
598 	char *nr_name = nmr->nr_name;
599 	const char *ifname;
600 	struct ifnet *ifp;
601 	int error = 0;
602 	struct netmap_vp_adapter *vpna, *hostna = NULL;
603 	struct nm_bridge *b;
604 	int i, j, cand = -1, cand2 = -1;
605 	int needed;
606 
607 	*na = NULL;     /* default return value */
608 
609 	/* first try to see if this is a bridge port. */
610 	NMG_LOCK_ASSERT();
611 	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612 		return 0;  /* no error, but no VALE prefix */
613 	}
614 
615 	b = nm_find_bridge(nr_name, create);
616 	if (b == NULL) {
617 		D("no bridges available for '%s'", nr_name);
618 		return (create ? ENOMEM : ENXIO);
619 	}
620 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
621 		panic("x");
622 
623 	/* Now we are sure that name starts with the bridge's name,
624 	 * lookup the port in the bridge. We need to scan the entire
625 	 * list. It is not important to hold a WLOCK on the bridge
626 	 * during the search because NMG_LOCK already guarantees
627 	 * that there are no other possible writers.
628 	 */
629 
630 	/* lookup in the local list of ports */
631 	for (j = 0; j < b->bdg_active_ports; j++) {
632 		i = b->bdg_port_index[j];
633 		vpna = b->bdg_ports[i];
634 		// KASSERT(na != NULL);
635 		D("checking %s", vpna->up.name);
636 		if (!strcmp(vpna->up.name, nr_name)) {
637 			netmap_adapter_get(&vpna->up);
638 			ND("found existing if %s refs %d", nr_name)
639 			*na = &vpna->up;
640 			return 0;
641 		}
642 	}
643 	/* not found, should we create it? */
644 	if (!create)
645 		return ENXIO;
646 	/* yes we should, see if we have space to attach entries */
647 	needed = 2; /* in some cases we only need 1 */
648 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
650 		return ENOMEM;
651 	}
652 	/* record the next two ports available, but do not allocate yet */
653 	cand = b->bdg_port_index[b->bdg_active_ports];
654 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655 	ND("+++ bridge %s port %s used %d avail %d %d",
656 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
657 
658 	/*
659 	 * try see if there is a matching NIC with this name
660 	 * (after the bridge's name)
661 	 */
662 	ifname = nr_name + b->bdg_namelen + 1;
663 	ifp = ifunit_ref(ifname);
664 	if (!ifp) {
665 		/* Create an ephemeral virtual port
666 		 * This block contains all the ephemeral-specific logics
667 		 */
668 		if (nmr->nr_cmd) {
669 			/* nr_cmd must be 0 for a virtual port */
670 			return EINVAL;
671 		}
672 
673 		/* bdg_netmap_attach creates a struct netmap_adapter */
674 		error = netmap_vp_create(nmr, NULL, &vpna);
675 		if (error) {
676 			D("error %d", error);
677 			free(ifp, M_DEVBUF);
678 			return error;
679 		}
680 		/* shortcut - we can skip get_hw_na(),
681 		 * ownership check and nm_bdg_attach()
682 		 */
683 	} else {
684 		struct netmap_adapter *hw;
685 
686 		error = netmap_get_hw_na(ifp, &hw);
687 		if (error || hw == NULL)
688 			goto out;
689 
690 		/* host adapter might not be created */
691 		error = hw->nm_bdg_attach(nr_name, hw);
692 		if (error)
693 			goto out;
694 		vpna = hw->na_vp;
695 		hostna = hw->na_hostvp;
696 		if_rele(ifp);
697 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
698 			hostna = NULL;
699 	}
700 
701 	BDG_WLOCK(b);
702 	vpna->bdg_port = cand;
703 	ND("NIC  %p to bridge port %d", vpna, cand);
704 	/* bind the port to the bridge (virtual ports are not active) */
705 	b->bdg_ports[cand] = vpna;
706 	vpna->na_bdg = b;
707 	b->bdg_active_ports++;
708 	if (hostna != NULL) {
709 		/* also bind the host stack to the bridge */
710 		b->bdg_ports[cand2] = hostna;
711 		hostna->bdg_port = cand2;
712 		hostna->na_bdg = b;
713 		b->bdg_active_ports++;
714 		ND("host %p to bridge port %d", hostna, cand2);
715 	}
716 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
717 	BDG_WUNLOCK(b);
718 	*na = &vpna->up;
719 	netmap_adapter_get(*na);
720 	return 0;
721 
722 out:
723 	if_rele(ifp);
724 
725 	return error;
726 }
727 
728 
729 /* Process NETMAP_BDG_ATTACH */
730 static int
731 nm_bdg_ctl_attach(struct nmreq *nmr)
732 {
733 	struct netmap_adapter *na;
734 	int error;
735 
736 	NMG_LOCK();
737 
738 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739 	if (error) /* no device */
740 		goto unlock_exit;
741 
742 	if (na == NULL) { /* VALE prefix missing */
743 		error = EINVAL;
744 		goto unlock_exit;
745 	}
746 
747 	if (NETMAP_OWNED_BY_ANY(na)) {
748 		error = EBUSY;
749 		goto unref_exit;
750 	}
751 
752 	if (na->nm_bdg_ctl) {
753 		/* nop for VALE ports. The bwrap needs to put the hwna
754 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
755 		 */
756 		error = na->nm_bdg_ctl(na, nmr, 1);
757 		if (error)
758 			goto unref_exit;
759 		ND("registered %s to netmap-mode", na->name);
760 	}
761 	NMG_UNLOCK();
762 	return 0;
763 
764 unref_exit:
765 	netmap_adapter_put(na);
766 unlock_exit:
767 	NMG_UNLOCK();
768 	return error;
769 }
770 
771 
772 /* process NETMAP_BDG_DETACH */
773 static int
774 nm_bdg_ctl_detach(struct nmreq *nmr)
775 {
776 	struct netmap_adapter *na;
777 	int error;
778 
779 	NMG_LOCK();
780 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781 	if (error) { /* no device, or another bridge or user owns the device */
782 		goto unlock_exit;
783 	}
784 
785 	if (na == NULL) { /* VALE prefix missing */
786 		error = EINVAL;
787 		goto unlock_exit;
788 	}
789 
790 	if (na->nm_bdg_ctl) {
791 		/* remove the port from bridge. The bwrap
792 		 * also needs to put the hwna in normal mode
793 		 */
794 		error = na->nm_bdg_ctl(na, nmr, 0);
795 	}
796 
797 	netmap_adapter_put(na);
798 unlock_exit:
799 	NMG_UNLOCK();
800 	return error;
801 
802 }
803 
804 
805 /* Called by either user's context (netmap_ioctl())
806  * or external kernel modules (e.g., Openvswitch).
807  * Operation is indicated in nmr->nr_cmd.
808  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809  * requires bdg_ops argument; the other commands ignore this argument.
810  *
811  * Called without NMG_LOCK.
812  */
813 int
814 netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
815 {
816 	struct nm_bridge *b;
817 	struct netmap_adapter *na;
818 	struct netmap_vp_adapter *vpna;
819 	char *name = nmr->nr_name;
820 	int cmd = nmr->nr_cmd, namelen = strlen(name);
821 	int error = 0, i, j;
822 
823 	switch (cmd) {
824 	case NETMAP_BDG_NEWIF:
825 		error = nm_vi_create(nmr);
826 		break;
827 
828 	case NETMAP_BDG_DELIF:
829 		error = nm_vi_destroy(nmr->nr_name);
830 		break;
831 
832 	case NETMAP_BDG_ATTACH:
833 		error = nm_bdg_ctl_attach(nmr);
834 		break;
835 
836 	case NETMAP_BDG_DETACH:
837 		error = nm_bdg_ctl_detach(nmr);
838 		break;
839 
840 	case NETMAP_BDG_LIST:
841 		/* this is used to enumerate bridges and ports */
842 		if (namelen) { /* look up indexes of bridge and port */
843 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
844 				error = EINVAL;
845 				break;
846 			}
847 			NMG_LOCK();
848 			b = nm_find_bridge(name, 0 /* don't create */);
849 			if (!b) {
850 				error = ENOENT;
851 				NMG_UNLOCK();
852 				break;
853 			}
854 
855 			name = name + b->bdg_namelen + 1;
856 			error = ENOENT;
857 			for (j = 0; j < b->bdg_active_ports; j++) {
858 				i = b->bdg_port_index[j];
859 				vpna = b->bdg_ports[i];
860 				if (vpna == NULL) {
861 					D("---AAAAAAAAARGH-------");
862 					continue;
863 				}
864 				/* the former and the latter identify a
865 				 * virtual port and a NIC, respectively
866 				 */
867 				if (!strcmp(vpna->up.name, name)) {
868 					/* bridge index */
869 					nmr->nr_arg1 = b - nm_bridges;
870 					nmr->nr_arg2 = i; /* port index */
871 					error = 0;
872 					break;
873 				}
874 			}
875 			NMG_UNLOCK();
876 		} else {
877 			/* return the first non-empty entry starting from
878 			 * bridge nr_arg1 and port nr_arg2.
879 			 *
880 			 * Users can detect the end of the same bridge by
881 			 * seeing the new and old value of nr_arg1, and can
882 			 * detect the end of all the bridge by error != 0
883 			 */
884 			i = nmr->nr_arg1;
885 			j = nmr->nr_arg2;
886 
887 			NMG_LOCK();
888 			for (error = ENOENT; i < NM_BRIDGES; i++) {
889 				b = nm_bridges + i;
890 				if (j >= b->bdg_active_ports) {
891 					j = 0; /* following bridges scan from 0 */
892 					continue;
893 				}
894 				nmr->nr_arg1 = i;
895 				nmr->nr_arg2 = j;
896 				j = b->bdg_port_index[j];
897 				vpna = b->bdg_ports[j];
898 				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
899 				error = 0;
900 				break;
901 			}
902 			NMG_UNLOCK();
903 		}
904 		break;
905 
906 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
907 		/* register callbacks to the given bridge.
908 		 * nmr->nr_name may be just bridge's name (including ':'
909 		 * if it is not just NM_NAME).
910 		 */
911 		if (!bdg_ops) {
912 			error = EINVAL;
913 			break;
914 		}
915 		NMG_LOCK();
916 		b = nm_find_bridge(name, 0 /* don't create */);
917 		if (!b) {
918 			error = EINVAL;
919 		} else {
920 			b->bdg_ops = *bdg_ops;
921 		}
922 		NMG_UNLOCK();
923 		break;
924 
925 	case NETMAP_BDG_VNET_HDR:
926 		/* Valid lengths for the virtio-net header are 0 (no header),
927 		   10 and 12. */
928 		if (nmr->nr_arg1 != 0 &&
929 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
930 				nmr->nr_arg1 != 12) {
931 			error = EINVAL;
932 			break;
933 		}
934 		NMG_LOCK();
935 		error = netmap_get_bdg_na(nmr, &na, 0);
936 		if (na && !error) {
937 			vpna = (struct netmap_vp_adapter *)na;
938 			vpna->virt_hdr_len = nmr->nr_arg1;
939 			if (vpna->virt_hdr_len)
940 				vpna->mfs = NETMAP_BUF_SIZE(na);
941 			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
942 			netmap_adapter_put(na);
943 		}
944 		NMG_UNLOCK();
945 		break;
946 
947 	default:
948 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
949 		error = EINVAL;
950 		break;
951 	}
952 	return error;
953 }
954 
955 int
956 netmap_bdg_config(struct nmreq *nmr)
957 {
958 	struct nm_bridge *b;
959 	int error = EINVAL;
960 
961 	NMG_LOCK();
962 	b = nm_find_bridge(nmr->nr_name, 0);
963 	if (!b) {
964 		NMG_UNLOCK();
965 		return error;
966 	}
967 	NMG_UNLOCK();
968 	/* Don't call config() with NMG_LOCK() held */
969 	BDG_RLOCK(b);
970 	if (b->bdg_ops.config != NULL)
971 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
972 	BDG_RUNLOCK(b);
973 	return error;
974 }
975 
976 
977 /* nm_krings_create callback for VALE ports.
978  * Calls the standard netmap_krings_create, then adds leases on rx
979  * rings and bdgfwd on tx rings.
980  */
981 static int
982 netmap_vp_krings_create(struct netmap_adapter *na)
983 {
984 	u_int tailroom;
985 	int error, i;
986 	uint32_t *leases;
987 	u_int nrx = netmap_real_rx_rings(na);
988 
989 	/*
990 	 * Leases are attached to RX rings on vale ports
991 	 */
992 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
993 
994 	error = netmap_krings_create(na, tailroom);
995 	if (error)
996 		return error;
997 
998 	leases = na->tailroom;
999 
1000 	for (i = 0; i < nrx; i++) { /* Receive rings */
1001 		na->rx_rings[i].nkr_leases = leases;
1002 		leases += na->num_rx_desc;
1003 	}
1004 
1005 	error = nm_alloc_bdgfwd(na);
1006 	if (error) {
1007 		netmap_krings_delete(na);
1008 		return error;
1009 	}
1010 
1011 	return 0;
1012 }
1013 
1014 
1015 /* nm_krings_delete callback for VALE ports. */
1016 static void
1017 netmap_vp_krings_delete(struct netmap_adapter *na)
1018 {
1019 	nm_free_bdgfwd(na);
1020 	netmap_krings_delete(na);
1021 }
1022 
1023 
1024 static int
1025 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1026 	struct netmap_vp_adapter *na, u_int ring_nr);
1027 
1028 
1029 /*
1030  * main dispatch routine for the bridge.
1031  * Grab packets from a kring, move them into the ft structure
1032  * associated to the tx (input) port. Max one instance per port,
1033  * filtered on input (ioctl, poll or XXX).
1034  * Returns the next position in the ring.
1035  */
1036 static int
1037 nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1038 {
1039 	struct netmap_vp_adapter *na =
1040 		(struct netmap_vp_adapter*)kring->na;
1041 	struct netmap_ring *ring = kring->ring;
1042 	struct nm_bdg_fwd *ft;
1043 	u_int ring_nr = kring->ring_id;
1044 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1045 	u_int ft_i = 0;	/* start from 0 */
1046 	u_int frags = 1; /* how many frags ? */
1047 	struct nm_bridge *b = na->na_bdg;
1048 
1049 	/* To protect against modifications to the bridge we acquire a
1050 	 * shared lock, waiting if we can sleep (if the source port is
1051 	 * attached to a user process) or with a trylock otherwise (NICs).
1052 	 */
1053 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1054 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1055 		BDG_RLOCK(b);
1056 	else if (!BDG_RTRYLOCK(b))
1057 		return 0;
1058 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1059 	ft = kring->nkr_ft;
1060 
1061 	for (; likely(j != end); j = nm_next(j, lim)) {
1062 		struct netmap_slot *slot = &ring->slot[j];
1063 		char *buf;
1064 
1065 		ft[ft_i].ft_len = slot->len;
1066 		ft[ft_i].ft_flags = slot->flags;
1067 
1068 		ND("flags is 0x%x", slot->flags);
1069 		/* this slot goes into a list so initialize the link field */
1070 		ft[ft_i].ft_next = NM_FT_NULL;
1071 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1072 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1073 		if (unlikely(buf == NULL)) {
1074 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1075 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1076 				kring->name, j, ft[ft_i].ft_len);
1077 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1078 			ft[ft_i].ft_len = 0;
1079 			ft[ft_i].ft_flags = 0;
1080 		}
1081 		__builtin_prefetch(buf);
1082 		++ft_i;
1083 		if (slot->flags & NS_MOREFRAG) {
1084 			frags++;
1085 			continue;
1086 		}
1087 		if (unlikely(netmap_verbose && frags > 1))
1088 			RD(5, "%d frags at %d", frags, ft_i - frags);
1089 		ft[ft_i - frags].ft_frags = frags;
1090 		frags = 1;
1091 		if (unlikely((int)ft_i >= bridge_batch))
1092 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1093 	}
1094 	if (frags > 1) {
1095 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1096 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1097 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1098 		ft[ft_i - frags].ft_frags = frags - 1;
1099 	}
1100 	if (ft_i)
1101 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1102 	BDG_RUNLOCK(b);
1103 	return j;
1104 }
1105 
1106 
1107 /* ----- FreeBSD if_bridge hash function ------- */
1108 
1109 /*
1110  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1111  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1112  *
1113  * http://www.burtleburtle.net/bob/hash/spooky.html
1114  */
1115 #define mix(a, b, c)                                                    \
1116 do {                                                                    \
1117         a -= b; a -= c; a ^= (c >> 13);                                 \
1118         b -= c; b -= a; b ^= (a << 8);                                  \
1119         c -= a; c -= b; c ^= (b >> 13);                                 \
1120         a -= b; a -= c; a ^= (c >> 12);                                 \
1121         b -= c; b -= a; b ^= (a << 16);                                 \
1122         c -= a; c -= b; c ^= (b >> 5);                                  \
1123         a -= b; a -= c; a ^= (c >> 3);                                  \
1124         b -= c; b -= a; b ^= (a << 10);                                 \
1125         c -= a; c -= b; c ^= (b >> 15);                                 \
1126 } while (/*CONSTCOND*/0)
1127 
1128 
1129 static __inline uint32_t
1130 nm_bridge_rthash(const uint8_t *addr)
1131 {
1132         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1133 
1134         b += addr[5] << 8;
1135         b += addr[4];
1136         a += addr[3] << 24;
1137         a += addr[2] << 16;
1138         a += addr[1] << 8;
1139         a += addr[0];
1140 
1141         mix(a, b, c);
1142 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1143         return (c & BRIDGE_RTHASH_MASK);
1144 }
1145 
1146 #undef mix
1147 
1148 
1149 /* nm_register callback for VALE ports */
1150 static int
1151 netmap_vp_reg(struct netmap_adapter *na, int onoff)
1152 {
1153 	struct netmap_vp_adapter *vpna =
1154 		(struct netmap_vp_adapter*)na;
1155 
1156 	/* persistent ports may be put in netmap mode
1157 	 * before being attached to a bridge
1158 	 */
1159 	if (vpna->na_bdg)
1160 		BDG_WLOCK(vpna->na_bdg);
1161 	if (onoff) {
1162 		na->na_flags |= NAF_NETMAP_ON;
1163 		 /* XXX on FreeBSD, persistent VALE ports should also
1164 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1165 		 */
1166 	} else {
1167 		na->na_flags &= ~NAF_NETMAP_ON;
1168 	}
1169 	if (vpna->na_bdg)
1170 		BDG_WUNLOCK(vpna->na_bdg);
1171 	return 0;
1172 }
1173 
1174 
1175 /*
1176  * Lookup function for a learning bridge.
1177  * Update the hash table with the source address,
1178  * and then returns the destination port index, and the
1179  * ring in *dst_ring (at the moment, always use ring 0)
1180  */
1181 u_int
1182 netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1183 		const struct netmap_vp_adapter *na)
1184 {
1185 	uint8_t *buf = ft->ft_buf;
1186 	u_int buf_len = ft->ft_len;
1187 	struct nm_hash_ent *ht = na->na_bdg->ht;
1188 	uint32_t sh, dh;
1189 	u_int dst, mysrc = na->bdg_port;
1190 	uint64_t smac, dmac;
1191 
1192 	/* safety check, unfortunately we have many cases */
1193 	if (buf_len >= 14 + na->virt_hdr_len) {
1194 		/* virthdr + mac_hdr in the same slot */
1195 		buf += na->virt_hdr_len;
1196 		buf_len -= na->virt_hdr_len;
1197 	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1198 		/* only header in first fragment */
1199 		ft++;
1200 		buf = ft->ft_buf;
1201 		buf_len = ft->ft_len;
1202 	} else {
1203 		RD(5, "invalid buf format, length %d", buf_len);
1204 		return NM_BDG_NOPORT;
1205 	}
1206 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1207 	smac = le64toh(*(uint64_t *)(buf + 4));
1208 	smac >>= 16;
1209 
1210 	/*
1211 	 * The hash is somewhat expensive, there might be some
1212 	 * worthwhile optimizations here.
1213 	 */
1214 	if ((buf[6] & 1) == 0) { /* valid src */
1215 		uint8_t *s = buf+6;
1216 		sh = nm_bridge_rthash(s); // XXX hash of source
1217 		/* update source port forwarding entry */
1218 		ht[sh].mac = smac;	/* XXX expire ? */
1219 		ht[sh].ports = mysrc;
1220 		if (netmap_verbose)
1221 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1222 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1223 	}
1224 	dst = NM_BDG_BROADCAST;
1225 	if ((buf[0] & 1) == 0) { /* unicast */
1226 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1227 		if (ht[dh].mac == dmac) {	/* found dst */
1228 			dst = ht[dh].ports;
1229 		}
1230 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1231 	}
1232 	*dst_ring = 0;
1233 	return dst;
1234 }
1235 
1236 
1237 /*
1238  * Available space in the ring. Only used in VALE code
1239  * and only with is_rx = 1
1240  */
1241 static inline uint32_t
1242 nm_kr_space(struct netmap_kring *k, int is_rx)
1243 {
1244 	int space;
1245 
1246 	if (is_rx) {
1247 		int busy = k->nkr_hwlease - k->nr_hwcur;
1248 		if (busy < 0)
1249 			busy += k->nkr_num_slots;
1250 		space = k->nkr_num_slots - 1 - busy;
1251 	} else {
1252 		/* XXX never used in this branch */
1253 		space = k->nr_hwtail - k->nkr_hwlease;
1254 		if (space < 0)
1255 			space += k->nkr_num_slots;
1256 	}
1257 #if 0
1258 	// sanity check
1259 	if (k->nkr_hwlease >= k->nkr_num_slots ||
1260 		k->nr_hwcur >= k->nkr_num_slots ||
1261 		k->nr_tail >= k->nkr_num_slots ||
1262 		busy < 0 ||
1263 		busy >= k->nkr_num_slots) {
1264 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1265 			k->nkr_lease_idx, k->nkr_num_slots);
1266 	}
1267 #endif
1268 	return space;
1269 }
1270 
1271 
1272 
1273 
1274 /* make a lease on the kring for N positions. return the
1275  * lease index
1276  * XXX only used in VALE code and with is_rx = 1
1277  */
1278 static inline uint32_t
1279 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1280 {
1281 	uint32_t lim = k->nkr_num_slots - 1;
1282 	uint32_t lease_idx = k->nkr_lease_idx;
1283 
1284 	k->nkr_leases[lease_idx] = NR_NOSLOT;
1285 	k->nkr_lease_idx = nm_next(lease_idx, lim);
1286 
1287 	if (n > nm_kr_space(k, is_rx)) {
1288 		D("invalid request for %d slots", n);
1289 		panic("x");
1290 	}
1291 	/* XXX verify that there are n slots */
1292 	k->nkr_hwlease += n;
1293 	if (k->nkr_hwlease > lim)
1294 		k->nkr_hwlease -= lim + 1;
1295 
1296 	if (k->nkr_hwlease >= k->nkr_num_slots ||
1297 		k->nr_hwcur >= k->nkr_num_slots ||
1298 		k->nr_hwtail >= k->nkr_num_slots ||
1299 		k->nkr_lease_idx >= k->nkr_num_slots) {
1300 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1301 			k->na->name,
1302 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1303 			k->nkr_lease_idx, k->nkr_num_slots);
1304 	}
1305 	return lease_idx;
1306 }
1307 
1308 /*
1309  *
1310  * This flush routine supports only unicast and broadcast but a large
1311  * number of ports, and lets us replace the learn and dispatch functions.
1312  */
1313 int
1314 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1315 		u_int ring_nr)
1316 {
1317 	struct nm_bdg_q *dst_ents, *brddst;
1318 	uint16_t num_dsts = 0, *dsts;
1319 	struct nm_bridge *b = na->na_bdg;
1320 	u_int i, j, me = na->bdg_port;
1321 
1322 	/*
1323 	 * The work area (pointed by ft) is followed by an array of
1324 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1325 	 * queues per port plus one for the broadcast traffic.
1326 	 * Then we have an array of destination indexes.
1327 	 */
1328 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1329 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1330 
1331 	/* first pass: find a destination for each packet in the batch */
1332 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1333 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1334 		uint16_t dst_port, d_i;
1335 		struct nm_bdg_q *d;
1336 
1337 		ND("slot %d frags %d", i, ft[i].ft_frags);
1338 		/* Drop the packet if the virtio-net header is not into the first
1339 		   fragment nor at the very beginning of the second. */
1340 		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1341 			continue;
1342 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1343 		if (netmap_verbose > 255)
1344 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1345 		if (dst_port == NM_BDG_NOPORT)
1346 			continue; /* this packet is identified to be dropped */
1347 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1348 			continue;
1349 		else if (dst_port == NM_BDG_BROADCAST)
1350 			dst_ring = 0; /* broadcasts always go to ring 0 */
1351 		else if (unlikely(dst_port == me ||
1352 		    !b->bdg_ports[dst_port]))
1353 			continue;
1354 
1355 		/* get a position in the scratch pad */
1356 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1357 		d = dst_ents + d_i;
1358 
1359 		/* append the first fragment to the list */
1360 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1361 			d->bq_head = d->bq_tail = i;
1362 			/* remember this position to be scanned later */
1363 			if (dst_port != NM_BDG_BROADCAST)
1364 				dsts[num_dsts++] = d_i;
1365 		} else {
1366 			ft[d->bq_tail].ft_next = i;
1367 			d->bq_tail = i;
1368 		}
1369 		d->bq_len += ft[i].ft_frags;
1370 	}
1371 
1372 	/*
1373 	 * Broadcast traffic goes to ring 0 on all destinations.
1374 	 * So we need to add these rings to the list of ports to scan.
1375 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1376 	 * expensive. We should keep a compact list of active destinations
1377 	 * so we could shorten this loop.
1378 	 */
1379 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1380 	if (brddst->bq_head != NM_FT_NULL) {
1381 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1382 			uint16_t d_i;
1383 			i = b->bdg_port_index[j];
1384 			if (unlikely(i == me))
1385 				continue;
1386 			d_i = i * NM_BDG_MAXRINGS;
1387 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1388 				dsts[num_dsts++] = d_i;
1389 		}
1390 	}
1391 
1392 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1393 	/* second pass: scan destinations */
1394 	for (i = 0; i < num_dsts; i++) {
1395 		struct netmap_vp_adapter *dst_na;
1396 		struct netmap_kring *kring;
1397 		struct netmap_ring *ring;
1398 		u_int dst_nr, lim, j, d_i, next, brd_next;
1399 		u_int needed, howmany;
1400 		int retry = netmap_txsync_retry;
1401 		struct nm_bdg_q *d;
1402 		uint32_t my_start = 0, lease_idx = 0;
1403 		int nrings;
1404 		int virt_hdr_mismatch = 0;
1405 
1406 		d_i = dsts[i];
1407 		ND("second pass %d port %d", i, d_i);
1408 		d = dst_ents + d_i;
1409 		// XXX fix the division
1410 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1411 		/* protect from the lookup function returning an inactive
1412 		 * destination port
1413 		 */
1414 		if (unlikely(dst_na == NULL))
1415 			goto cleanup;
1416 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1417 			goto cleanup;
1418 		/*
1419 		 * The interface may be in !netmap mode in two cases:
1420 		 * - when na is attached but not activated yet;
1421 		 * - when na is being deactivated but is still attached.
1422 		 */
1423 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1424 			ND("not in netmap mode!");
1425 			goto cleanup;
1426 		}
1427 
1428 		/* there is at least one either unicast or broadcast packet */
1429 		brd_next = brddst->bq_head;
1430 		next = d->bq_head;
1431 		/* we need to reserve this many slots. If fewer are
1432 		 * available, some packets will be dropped.
1433 		 * Packets may have multiple fragments, so we may not use
1434 		 * there is a chance that we may not use all of the slots
1435 		 * we have claimed, so we will need to handle the leftover
1436 		 * ones when we regain the lock.
1437 		 */
1438 		needed = d->bq_len + brddst->bq_len;
1439 
1440 		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1441 			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1442 			/* There is a virtio-net header/offloadings mismatch between
1443 			 * source and destination. The slower mismatch datapath will
1444 			 * be used to cope with all the mismatches.
1445 			 */
1446 			virt_hdr_mismatch = 1;
1447 			if (dst_na->mfs < na->mfs) {
1448 				/* We may need to do segmentation offloadings, and so
1449 				 * we may need a number of destination slots greater
1450 				 * than the number of input slots ('needed').
1451 				 * We look for the smallest integer 'x' which satisfies:
1452 				 *	needed * na->mfs + x * H <= x * na->mfs
1453 				 * where 'H' is the length of the longest header that may
1454 				 * be replicated in the segmentation process (e.g. for
1455 				 * TCPv4 we must account for ethernet header, IP header
1456 				 * and TCPv4 header).
1457 				 */
1458 				needed = (needed * na->mfs) /
1459 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1460 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1461 			}
1462 		}
1463 
1464 		ND(5, "pass 2 dst %d is %x %s",
1465 			i, d_i, is_vp ? "virtual" : "nic/host");
1466 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1467 		nrings = dst_na->up.num_rx_rings;
1468 		if (dst_nr >= nrings)
1469 			dst_nr = dst_nr % nrings;
1470 		kring = &dst_na->up.rx_rings[dst_nr];
1471 		ring = kring->ring;
1472 		lim = kring->nkr_num_slots - 1;
1473 
1474 retry:
1475 
1476 		if (dst_na->retry && retry) {
1477 			/* try to get some free slot from the previous run */
1478 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1479 			/* actually useful only for bwraps, since there
1480 			 * the notify will trigger a txsync on the hwna. VALE ports
1481 			 * have dst_na->retry == 0
1482 			 */
1483 		}
1484 		/* reserve the buffers in the queue and an entry
1485 		 * to report completion, and drop lock.
1486 		 * XXX this might become a helper function.
1487 		 */
1488 		mtx_lock(&kring->q_lock);
1489 		if (kring->nkr_stopped) {
1490 			mtx_unlock(&kring->q_lock);
1491 			goto cleanup;
1492 		}
1493 		my_start = j = kring->nkr_hwlease;
1494 		howmany = nm_kr_space(kring, 1);
1495 		if (needed < howmany)
1496 			howmany = needed;
1497 		lease_idx = nm_kr_lease(kring, howmany, 1);
1498 		mtx_unlock(&kring->q_lock);
1499 
1500 		/* only retry if we need more than available slots */
1501 		if (retry && needed <= howmany)
1502 			retry = 0;
1503 
1504 		/* copy to the destination queue */
1505 		while (howmany > 0) {
1506 			struct netmap_slot *slot;
1507 			struct nm_bdg_fwd *ft_p, *ft_end;
1508 			u_int cnt;
1509 
1510 			/* find the queue from which we pick next packet.
1511 			 * NM_FT_NULL is always higher than valid indexes
1512 			 * so we never dereference it if the other list
1513 			 * has packets (and if both are empty we never
1514 			 * get here).
1515 			 */
1516 			if (next < brd_next) {
1517 				ft_p = ft + next;
1518 				next = ft_p->ft_next;
1519 			} else { /* insert broadcast */
1520 				ft_p = ft + brd_next;
1521 				brd_next = ft_p->ft_next;
1522 			}
1523 			cnt = ft_p->ft_frags; // cnt > 0
1524 			if (unlikely(cnt > howmany))
1525 			    break; /* no more space */
1526 			if (netmap_verbose && cnt > 1)
1527 				RD(5, "rx %d frags to %d", cnt, j);
1528 			ft_end = ft_p + cnt;
1529 			if (unlikely(virt_hdr_mismatch)) {
1530 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1531 			} else {
1532 				howmany -= cnt;
1533 				do {
1534 					char *dst, *src = ft_p->ft_buf;
1535 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1536 
1537 					slot = &ring->slot[j];
1538 					dst = NMB(&dst_na->up, slot);
1539 
1540 					ND("send [%d] %d(%d) bytes at %s:%d",
1541 							i, (int)copy_len, (int)dst_len,
1542 							NM_IFPNAME(dst_ifp), j);
1543 					/* round to a multiple of 64 */
1544 					copy_len = (copy_len + 63) & ~63;
1545 
1546 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1547 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1548 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1549 						copy_len = dst_len = 64; // XXX
1550 					}
1551 					if (ft_p->ft_flags & NS_INDIRECT) {
1552 						if (copyin(src, dst, copy_len)) {
1553 							// invalid user pointer, pretend len is 0
1554 							dst_len = 0;
1555 						}
1556 					} else {
1557 						//memcpy(dst, src, copy_len);
1558 						pkt_copy(src, dst, (int)copy_len);
1559 					}
1560 					slot->len = dst_len;
1561 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1562 					j = nm_next(j, lim);
1563 					needed--;
1564 					ft_p++;
1565 				} while (ft_p != ft_end);
1566 				slot->flags = (cnt << 8); /* clear flag on last entry */
1567 			}
1568 			/* are we done ? */
1569 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1570 				break;
1571 		}
1572 		{
1573 		    /* current position */
1574 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1575 		    uint32_t update_pos;
1576 		    int still_locked = 1;
1577 
1578 		    mtx_lock(&kring->q_lock);
1579 		    if (unlikely(howmany > 0)) {
1580 			/* not used all bufs. If i am the last one
1581 			 * i can recover the slots, otherwise must
1582 			 * fill them with 0 to mark empty packets.
1583 			 */
1584 			ND("leftover %d bufs", howmany);
1585 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1586 			    /* yes i am the last one */
1587 			    ND("roll back nkr_hwlease to %d", j);
1588 			    kring->nkr_hwlease = j;
1589 			} else {
1590 			    while (howmany-- > 0) {
1591 				ring->slot[j].len = 0;
1592 				ring->slot[j].flags = 0;
1593 				j = nm_next(j, lim);
1594 			    }
1595 			}
1596 		    }
1597 		    p[lease_idx] = j; /* report I am done */
1598 
1599 		    update_pos = kring->nr_hwtail;
1600 
1601 		    if (my_start == update_pos) {
1602 			/* all slots before my_start have been reported,
1603 			 * so scan subsequent leases to see if other ranges
1604 			 * have been completed, and to a selwakeup or txsync.
1605 		         */
1606 			while (lease_idx != kring->nkr_lease_idx &&
1607 				p[lease_idx] != NR_NOSLOT) {
1608 			    j = p[lease_idx];
1609 			    p[lease_idx] = NR_NOSLOT;
1610 			    lease_idx = nm_next(lease_idx, lim);
1611 			}
1612 			/* j is the new 'write' position. j != my_start
1613 			 * means there are new buffers to report
1614 			 */
1615 			if (likely(j != my_start)) {
1616 				kring->nr_hwtail = j;
1617 				still_locked = 0;
1618 				mtx_unlock(&kring->q_lock);
1619 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1620 				/* this is netmap_notify for VALE ports and
1621 				 * netmap_bwrap_notify for bwrap. The latter will
1622 				 * trigger a txsync on the underlying hwna
1623 				 */
1624 				if (dst_na->retry && retry--) {
1625 					/* XXX this is going to call nm_notify again.
1626 					 * Only useful for bwrap in virtual machines
1627 					 */
1628 					goto retry;
1629 				}
1630 			}
1631 		    }
1632 		    if (still_locked)
1633 			mtx_unlock(&kring->q_lock);
1634 		}
1635 cleanup:
1636 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1637 		d->bq_len = 0;
1638 	}
1639 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1640 	brddst->bq_len = 0;
1641 	return 0;
1642 }
1643 
1644 /* nm_txsync callback for VALE ports */
1645 static int
1646 netmap_vp_txsync(struct netmap_kring *kring, int flags)
1647 {
1648 	struct netmap_vp_adapter *na =
1649 		(struct netmap_vp_adapter *)kring->na;
1650 	u_int done;
1651 	u_int const lim = kring->nkr_num_slots - 1;
1652 	u_int const cur = kring->rcur;
1653 
1654 	if (bridge_batch <= 0) { /* testing only */
1655 		done = cur; // used all
1656 		goto done;
1657 	}
1658 	if (!na->na_bdg) {
1659 		done = cur;
1660 		goto done;
1661 	}
1662 	if (bridge_batch > NM_BDG_BATCH)
1663 		bridge_batch = NM_BDG_BATCH;
1664 
1665 	done = nm_bdg_preflush(kring, cur);
1666 done:
1667 	if (done != cur)
1668 		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1669 	/*
1670 	 * packets between 'done' and 'cur' are left unsent.
1671 	 */
1672 	kring->nr_hwcur = done;
1673 	kring->nr_hwtail = nm_prev(done, lim);
1674 	nm_txsync_finalize(kring);
1675 	if (netmap_verbose)
1676 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1677 	return 0;
1678 }
1679 
1680 
1681 /* rxsync code used by VALE ports nm_rxsync callback and also
1682  * internally by the brwap
1683  */
1684 static int
1685 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1686 {
1687 	struct netmap_adapter *na = kring->na;
1688 	struct netmap_ring *ring = kring->ring;
1689 	u_int nm_i, lim = kring->nkr_num_slots - 1;
1690 	u_int head = nm_rxsync_prologue(kring);
1691 	int n;
1692 
1693 	if (head > lim) {
1694 		D("ouch dangerous reset!!!");
1695 		n = netmap_ring_reinit(kring);
1696 		goto done;
1697 	}
1698 
1699 	/* First part, import newly received packets. */
1700 	/* actually nothing to do here, they are already in the kring */
1701 
1702 	/* Second part, skip past packets that userspace has released. */
1703 	nm_i = kring->nr_hwcur;
1704 	if (nm_i != head) {
1705 		/* consistency check, but nothing really important here */
1706 		for (n = 0; likely(nm_i != head); n++) {
1707 			struct netmap_slot *slot = &ring->slot[nm_i];
1708 			void *addr = NMB(na, slot);
1709 
1710 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1711 				D("bad buffer index %d, ignore ?",
1712 					slot->buf_idx);
1713 			}
1714 			slot->flags &= ~NS_BUF_CHANGED;
1715 			nm_i = nm_next(nm_i, lim);
1716 		}
1717 		kring->nr_hwcur = head;
1718 	}
1719 
1720 	/* tell userspace that there are new packets */
1721 	nm_rxsync_finalize(kring);
1722 	n = 0;
1723 done:
1724 	return n;
1725 }
1726 
1727 /*
1728  * nm_rxsync callback for VALE ports
1729  * user process reading from a VALE switch.
1730  * Already protected against concurrent calls from userspace,
1731  * but we must acquire the queue's lock to protect against
1732  * writers on the same queue.
1733  */
1734 static int
1735 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1736 {
1737 	int n;
1738 
1739 	mtx_lock(&kring->q_lock);
1740 	n = netmap_vp_rxsync_locked(kring, flags);
1741 	mtx_unlock(&kring->q_lock);
1742 	return n;
1743 }
1744 
1745 
1746 /* nm_bdg_attach callback for VALE ports
1747  * The na_vp port is this same netmap_adapter. There is no host port.
1748  */
1749 static int
1750 netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1751 {
1752 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1753 
1754 	if (vpna->na_bdg)
1755 		return EBUSY;
1756 	na->na_vp = vpna;
1757 	strncpy(na->name, name, sizeof(na->name));
1758 	na->na_hostvp = NULL;
1759 	return 0;
1760 }
1761 
1762 /* create a netmap_vp_adapter that describes a VALE port.
1763  * Only persistent VALE ports have a non-null ifp.
1764  */
1765 static int
1766 netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1767 {
1768 	struct netmap_vp_adapter *vpna;
1769 	struct netmap_adapter *na;
1770 	int error;
1771 	u_int npipes = 0;
1772 
1773 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1774 	if (vpna == NULL)
1775 		return ENOMEM;
1776 
1777  	na = &vpna->up;
1778 
1779 	na->ifp = ifp;
1780 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1781 
1782 	/* bound checking */
1783 	na->num_tx_rings = nmr->nr_tx_rings;
1784 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1785 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1786 	na->num_rx_rings = nmr->nr_rx_rings;
1787 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1788 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1789 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1790 			1, NM_BDG_MAXSLOTS, NULL);
1791 	na->num_tx_desc = nmr->nr_tx_slots;
1792 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1793 			1, NM_BDG_MAXSLOTS, NULL);
1794 	/* validate number of pipes. We want at least 1,
1795 	 * but probably can do with some more.
1796 	 * So let's use 2 as default (when 0 is supplied)
1797 	 */
1798 	npipes = nmr->nr_arg1;
1799 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1800 	nmr->nr_arg1 = npipes;	/* write back */
1801 	/* validate extra bufs */
1802 	nm_bound_var(&nmr->nr_arg3, 0, 0,
1803 			128*NM_BDG_MAXSLOTS, NULL);
1804 	na->num_rx_desc = nmr->nr_rx_slots;
1805 	vpna->virt_hdr_len = 0;
1806 	vpna->mfs = 1514;
1807 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1808 		vpna->mfs = netmap_buf_size; */
1809         if (netmap_verbose)
1810 		D("max frame size %u", vpna->mfs);
1811 
1812 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1813 	na->nm_txsync = netmap_vp_txsync;
1814 	na->nm_rxsync = netmap_vp_rxsync;
1815 	na->nm_register = netmap_vp_reg;
1816 	na->nm_krings_create = netmap_vp_krings_create;
1817 	na->nm_krings_delete = netmap_vp_krings_delete;
1818 	na->nm_dtor = netmap_vp_dtor;
1819 	na->nm_mem = netmap_mem_private_new(na->name,
1820 			na->num_tx_rings, na->num_tx_desc,
1821 			na->num_rx_rings, na->num_rx_desc,
1822 			nmr->nr_arg3, npipes, &error);
1823 	if (na->nm_mem == NULL)
1824 		goto err;
1825 	na->nm_bdg_attach = netmap_vp_bdg_attach;
1826 	/* other nmd fields are set in the common routine */
1827 	error = netmap_attach_common(na);
1828 	if (error)
1829 		goto err;
1830 	*ret = vpna;
1831 	return 0;
1832 
1833 err:
1834 	if (na->nm_mem != NULL)
1835 		netmap_mem_private_delete(na->nm_mem);
1836 	free(vpna, M_DEVBUF);
1837 	return error;
1838 }
1839 
1840 /* Bridge wrapper code (bwrap).
1841  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1842  * VALE switch.
1843  * The main task is to swap the meaning of tx and rx rings to match the
1844  * expectations of the VALE switch code (see nm_bdg_flush).
1845  *
1846  * The bwrap works by interposing a netmap_bwrap_adapter between the
1847  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1848  * a netmap_vp_adapter to the rest the system, but, internally, it
1849  * translates all callbacks to what the hwna expects.
1850  *
1851  * Note that we have to intercept callbacks coming from two sides:
1852  *
1853  *  - callbacks coming from the netmap module are intercepted by
1854  *    passing around the netmap_bwrap_adapter instead of the hwna
1855  *
1856  *  - callbacks coming from outside of the netmap module only know
1857  *    about the hwna. This, however, only happens in interrupt
1858  *    handlers, where only the hwna->nm_notify callback is called.
1859  *    What the bwrap does is to overwrite the hwna->nm_notify callback
1860  *    with its own netmap_bwrap_intr_notify.
1861  *    XXX This assumes that the hwna->nm_notify callback was the
1862  *    standard netmap_notify(), as it is the case for nic adapters.
1863  *    Any additional action performed by hwna->nm_notify will not be
1864  *    performed by netmap_bwrap_intr_notify.
1865  *
1866  * Additionally, the bwrap can optionally attach the host rings pair
1867  * of the wrapped adapter to a different port of the switch.
1868  */
1869 
1870 
1871 static void
1872 netmap_bwrap_dtor(struct netmap_adapter *na)
1873 {
1874 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1875 	struct netmap_adapter *hwna = bna->hwna;
1876 
1877 	ND("na %p", na);
1878 	/* drop reference to hwna->ifp.
1879 	 * If we don't do this, netmap_detach_common(na)
1880 	 * will think it has set NA(na->ifp) to NULL
1881 	 */
1882 	na->ifp = NULL;
1883 	/* for safety, also drop the possible reference
1884 	 * in the hostna
1885 	 */
1886 	bna->host.up.ifp = NULL;
1887 
1888 	hwna->nm_mem = bna->save_nmd;
1889 	hwna->na_private = NULL;
1890 	hwna->na_vp = hwna->na_hostvp = NULL;
1891 	hwna->na_flags &= ~NAF_BUSY;
1892 	netmap_adapter_put(hwna);
1893 
1894 }
1895 
1896 
1897 /*
1898  * Intr callback for NICs connected to a bridge.
1899  * Simply ignore tx interrupts (maybe we could try to recover space ?)
1900  * and pass received packets from nic to the bridge.
1901  *
1902  * XXX TODO check locking: this is called from the interrupt
1903  * handler so we should make sure that the interface is not
1904  * disconnected while passing down an interrupt.
1905  *
1906  * Note, no user process can access this NIC or the host stack.
1907  * The only part of the ring that is significant are the slots,
1908  * and head/cur/tail are set from the kring as needed
1909  * (part as a receive ring, part as a transmit ring).
1910  *
1911  * callback that overwrites the hwna notify callback.
1912  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1913  * The bridge wrapper then sends the packets through the bridge.
1914  */
1915 static int
1916 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1917 {
1918 	struct netmap_bwrap_adapter *bna = na->na_private;
1919 	struct netmap_vp_adapter *hostna = &bna->host;
1920 	struct netmap_kring *kring, *bkring;
1921 	struct netmap_ring *ring;
1922 	int is_host_ring = ring_nr == na->num_rx_rings;
1923 	struct netmap_vp_adapter *vpna = &bna->up;
1924 	int error = 0;
1925 
1926 	if (netmap_verbose)
1927 	    D("%s %s%d 0x%x", na->name,
1928 		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1929 
1930 	if (flags & NAF_DISABLE_NOTIFY) {
1931 		/* the enabled/disabled state of the ring has changed,
1932 		 * propagate the info to the wrapper (with tx/rx swapped)
1933 		 */
1934 		if (tx == NR_TX) {
1935 			netmap_set_rxring(&vpna->up, ring_nr,
1936 					na->tx_rings[ring_nr].nkr_stopped);
1937 		} else {
1938 			netmap_set_txring(&vpna->up, ring_nr,
1939 					na->rx_rings[ring_nr].nkr_stopped);
1940 		}
1941 		return 0;
1942 	}
1943 
1944 	if (!nm_netmap_on(na))
1945 		return 0;
1946 
1947 	/* we only care about receive interrupts */
1948 	if (tx == NR_TX)
1949 		return 0;
1950 
1951 	kring = &na->rx_rings[ring_nr];
1952 	ring = kring->ring;
1953 
1954 	/* make sure the ring is not disabled */
1955 	if (nm_kr_tryget(kring))
1956 		return 0;
1957 
1958 	if (is_host_ring && hostna->na_bdg == NULL) {
1959 		error = bna->save_notify(na, ring_nr, tx, flags);
1960 		goto put_out;
1961 	}
1962 
1963 	/* Here we expect ring->head = ring->cur = ring->tail
1964 	 * because everything has been released from the previous round.
1965 	 * However the ring is shared and we might have info from
1966 	 * the wrong side (the tx ring). Hence we overwrite with
1967 	 * the info from the rx kring.
1968 	 */
1969 	if (netmap_verbose)
1970 	    D("%s head %d cur %d tail %d (kring %d %d %d)",  na->name,
1971 		ring->head, ring->cur, ring->tail,
1972 		kring->rhead, kring->rcur, kring->rtail);
1973 
1974 	ring->head = kring->rhead;
1975 	ring->cur = kring->rcur;
1976 	ring->tail = kring->rtail;
1977 
1978 	if (is_host_ring) {
1979 		vpna = hostna;
1980 		ring_nr = 0;
1981 	}
1982 	/* simulate a user wakeup on the rx ring */
1983 	/* fetch packets that have arrived.
1984 	 * XXX maybe do this in a loop ?
1985 	 */
1986 	error = kring->nm_sync(kring, 0);
1987 	if (error)
1988 		goto put_out;
1989 	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1990 		D("how strange, interrupt with no packets on %s",
1991 			na->name);
1992 		goto put_out;
1993 	}
1994 
1995 	/* new packets are ring->cur to ring->tail, and the bkring
1996 	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1997 	 * to push all packets out.
1998 	 */
1999 	ring->head = ring->cur = ring->tail;
2000 
2001 	/* also set tail to what the bwrap expects */
2002 	bkring = &vpna->up.tx_rings[ring_nr];
2003 	ring->tail = bkring->nr_hwtail; // rtail too ?
2004 
2005 	/* pass packets to the switch */
2006 	nm_txsync_prologue(bkring); // XXX error checking ?
2007 	netmap_vp_txsync(bkring, flags);
2008 
2009 	/* mark all buffers as released on this ring */
2010 	ring->head = ring->cur = kring->nr_hwtail;
2011 	ring->tail = kring->rtail;
2012 	/* another call to actually release the buffers */
2013 	if (!is_host_ring) {
2014 		error = kring->nm_sync(kring, 0);
2015 	} else {
2016 		/* mark all packets as released, as in the
2017 		 * second part of netmap_rxsync_from_host()
2018 		 */
2019 		kring->nr_hwcur = kring->nr_hwtail;
2020 		nm_rxsync_finalize(kring);
2021 	}
2022 
2023 put_out:
2024 	nm_kr_put(kring);
2025 	return error;
2026 }
2027 
2028 
2029 /* nm_register callback for bwrap */
2030 static int
2031 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
2032 {
2033 	struct netmap_bwrap_adapter *bna =
2034 		(struct netmap_bwrap_adapter *)na;
2035 	struct netmap_adapter *hwna = bna->hwna;
2036 	struct netmap_vp_adapter *hostna = &bna->host;
2037 	int error;
2038 
2039 	ND("%s %s", na->name, onoff ? "on" : "off");
2040 
2041 	if (onoff) {
2042 		int i;
2043 
2044 		/* netmap_do_regif has been called on the bwrap na.
2045 		 * We need to pass the information about the
2046 		 * memory allocator down to the hwna before
2047 		 * putting it in netmap mode
2048 		 */
2049 		hwna->na_lut = na->na_lut;
2050 		hwna->na_lut_objtotal = na->na_lut_objtotal;
2051 		hwna->na_lut_objsize = na->na_lut_objsize;
2052 
2053 		if (hostna->na_bdg) {
2054 			/* if the host rings have been attached to switch,
2055 			 * we need to copy the memory allocator information
2056 			 * in the hostna also
2057 			 */
2058 			hostna->up.na_lut = na->na_lut;
2059 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
2060 			hostna->up.na_lut_objsize = na->na_lut_objsize;
2061 		}
2062 
2063 		/* cross-link the netmap rings
2064 		 * The original number of rings comes from hwna,
2065 		 * rx rings on one side equals tx rings on the other.
2066 		 * We need to do this now, after the initialization
2067 		 * of the kring->ring pointers
2068 		 */
2069 		for (i = 0; i < na->num_rx_rings + 1; i++) {
2070 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
2071 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
2072 		}
2073 		for (i = 0; i < na->num_tx_rings + 1; i++) {
2074 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
2075 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
2076 		}
2077 	}
2078 
2079 	/* forward the request to the hwna */
2080 	error = hwna->nm_register(hwna, onoff);
2081 	if (error)
2082 		return error;
2083 
2084 	/* impersonate a netmap_vp_adapter */
2085 	netmap_vp_reg(na, onoff);
2086 	if (hostna->na_bdg)
2087 		netmap_vp_reg(&hostna->up, onoff);
2088 
2089 	if (onoff) {
2090 		/* intercept the hwna nm_nofify callback */
2091 		bna->save_notify = hwna->nm_notify;
2092 		hwna->nm_notify = netmap_bwrap_intr_notify;
2093 	} else {
2094 		hwna->nm_notify = bna->save_notify;
2095 		hwna->na_lut = NULL;
2096 		hwna->na_lut_objtotal = 0;
2097 		hwna->na_lut_objsize = 0;
2098 	}
2099 
2100 	return 0;
2101 }
2102 
2103 /* nm_config callback for bwrap */
2104 static int
2105 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2106 				    u_int *rxr, u_int *rxd)
2107 {
2108 	struct netmap_bwrap_adapter *bna =
2109 		(struct netmap_bwrap_adapter *)na;
2110 	struct netmap_adapter *hwna = bna->hwna;
2111 
2112 	/* forward the request */
2113 	netmap_update_config(hwna);
2114 	/* swap the results */
2115 	*txr = hwna->num_rx_rings;
2116 	*txd = hwna->num_rx_desc;
2117 	*rxr = hwna->num_tx_rings;
2118 	*rxd = hwna->num_rx_desc;
2119 
2120 	return 0;
2121 }
2122 
2123 
2124 /* nm_krings_create callback for bwrap */
2125 static int
2126 netmap_bwrap_krings_create(struct netmap_adapter *na)
2127 {
2128 	struct netmap_bwrap_adapter *bna =
2129 		(struct netmap_bwrap_adapter *)na;
2130 	struct netmap_adapter *hwna = bna->hwna;
2131 	struct netmap_adapter *hostna = &bna->host.up;
2132 	int error;
2133 
2134 	ND("%s", na->name);
2135 
2136 	/* impersonate a netmap_vp_adapter */
2137 	error = netmap_vp_krings_create(na);
2138 	if (error)
2139 		return error;
2140 
2141 	/* also create the hwna krings */
2142 	error = hwna->nm_krings_create(hwna);
2143 	if (error) {
2144 		netmap_vp_krings_delete(na);
2145 		return error;
2146 	}
2147 	/* the connection between the bwrap krings and the hwna krings
2148 	 * will be perfomed later, in the nm_register callback, since
2149 	 * now the kring->ring pointers have not been initialized yet
2150 	 */
2151 
2152 	if (na->na_flags & NAF_HOST_RINGS) {
2153 		/* the hostna rings are the host rings of the bwrap.
2154 		 * The corresponding krings must point back to the
2155 		 * hostna
2156 		 */
2157 		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
2158 		hostna->tx_rings[0].na = hostna;
2159 		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
2160 		hostna->rx_rings[0].na = hostna;
2161 	}
2162 
2163 	return 0;
2164 }
2165 
2166 
2167 static void
2168 netmap_bwrap_krings_delete(struct netmap_adapter *na)
2169 {
2170 	struct netmap_bwrap_adapter *bna =
2171 		(struct netmap_bwrap_adapter *)na;
2172 	struct netmap_adapter *hwna = bna->hwna;
2173 
2174 	ND("%s", na->name);
2175 
2176 	hwna->nm_krings_delete(hwna);
2177 	netmap_vp_krings_delete(na);
2178 }
2179 
2180 
2181 /* notify method for the bridge-->hwna direction */
2182 static int
2183 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2184 {
2185 	struct netmap_bwrap_adapter *bna =
2186 		(struct netmap_bwrap_adapter *)na;
2187 	struct netmap_adapter *hwna = bna->hwna;
2188 	struct netmap_kring *kring, *hw_kring;
2189 	struct netmap_ring *ring;
2190 	u_int lim;
2191 	int error = 0;
2192 
2193 	if (tx == NR_TX)
2194 	        return EINVAL;
2195 
2196 	kring = &na->rx_rings[ring_n];
2197 	hw_kring = &hwna->tx_rings[ring_n];
2198 	ring = kring->ring;
2199 	lim = kring->nkr_num_slots - 1;
2200 
2201 	if (!nm_netmap_on(hwna))
2202 		return 0;
2203 	mtx_lock(&kring->q_lock);
2204 	/* first step: simulate a user wakeup on the rx ring */
2205 	netmap_vp_rxsync_locked(kring, flags);
2206 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2207 		na->name, ring_n,
2208 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2209 		ring->head, ring->cur, ring->tail,
2210 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2211 	/* second step: the simulated user consumes all new packets */
2212 	ring->head = ring->cur = ring->tail;
2213 
2214 	/* third step: the new packets are sent on the tx ring
2215 	 * (which is actually the same ring)
2216 	 */
2217 	/* set tail to what the hw expects */
2218 	ring->tail = hw_kring->rtail;
2219 	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2220 	error = hw_kring->nm_sync(hw_kring, flags);
2221 
2222 	/* fourth step: now we are back the rx ring */
2223 	/* claim ownership on all hw owned bufs */
2224 	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
2225 	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2226 
2227 	/* fifth step: the user goes to sleep again, causing another rxsync */
2228 	netmap_vp_rxsync_locked(kring, flags);
2229 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2230 		na->name, ring_n,
2231 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2232 		ring->head, ring->cur, ring->tail,
2233 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2234 	mtx_unlock(&kring->q_lock);
2235 	return error;
2236 }
2237 
2238 
2239 /* notify method for the bridge-->host-rings path */
2240 static int
2241 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2242 {
2243 	struct netmap_bwrap_adapter *bna = na->na_private;
2244 	struct netmap_adapter *port_na = &bna->up.up;
2245 	if (tx == NR_TX || ring_n != 0)
2246 		return EINVAL;
2247 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2248 }
2249 
2250 
2251 /* nm_bdg_ctl callback for the bwrap.
2252  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2253  * On attach, it needs to provide a fake netmap_priv_d structure and
2254  * perform a netmap_do_regif() on the bwrap. This will put both the
2255  * bwrap and the hwna in netmap mode, with the netmap rings shared
2256  * and cross linked. Moroever, it will start intercepting interrupts
2257  * directed to hwna.
2258  */
2259 static int
2260 netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2261 {
2262 	struct netmap_priv_d *npriv;
2263 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2264 	struct netmap_if *nifp;
2265 	int error = 0;
2266 
2267 	if (attach) {
2268 		if (NETMAP_OWNED_BY_ANY(na)) {
2269 			return EBUSY;
2270 		}
2271 		if (bna->na_kpriv) {
2272 			/* nothing to do */
2273 			return 0;
2274 		}
2275 		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2276 		if (npriv == NULL)
2277 			return ENOMEM;
2278 		nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2279 		if (!nifp) {
2280 			bzero(npriv, sizeof(*npriv));
2281 			free(npriv, M_DEVBUF);
2282 			return error;
2283 		}
2284 		bna->na_kpriv = npriv;
2285 		na->na_flags |= NAF_BUSY;
2286 	} else {
2287 		int last_instance;
2288 
2289 		if (na->active_fds == 0) /* not registered */
2290 			return EINVAL;
2291 		last_instance = netmap_dtor_locked(bna->na_kpriv);
2292 		if (!last_instance) {
2293 			D("--- error, trying to detach an entry with active mmaps");
2294 			error = EINVAL;
2295 		} else {
2296 			struct nm_bridge *b = bna->up.na_bdg,
2297 				*bh = bna->host.na_bdg;
2298 			npriv = bna->na_kpriv;
2299 			bna->na_kpriv = NULL;
2300 			D("deleting priv");
2301 
2302 			bzero(npriv, sizeof(*npriv));
2303 			free(npriv, M_DEVBUF);
2304 			if (b) {
2305 				/* XXX the bwrap dtor should take care
2306 				 * of this (2014-06-16)
2307 				 */
2308 				netmap_bdg_detach_common(b, bna->up.bdg_port,
2309 				    (bh ? bna->host.bdg_port : -1));
2310 			}
2311 			na->na_flags &= ~NAF_BUSY;
2312 		}
2313 	}
2314 	return error;
2315 
2316 }
2317 
2318 /* attach a bridge wrapper to the 'real' device */
2319 int
2320 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2321 {
2322 	struct netmap_bwrap_adapter *bna;
2323 	struct netmap_adapter *na = NULL;
2324 	struct netmap_adapter *hostna = NULL;
2325 	int error = 0;
2326 
2327 	/* make sure the NIC is not already in use */
2328 	if (NETMAP_OWNED_BY_ANY(hwna)) {
2329 		D("NIC %s busy, cannot attach to bridge", hwna->name);
2330 		return EBUSY;
2331 	}
2332 
2333 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2334 	if (bna == NULL) {
2335 		return ENOMEM;
2336 	}
2337 
2338 	na = &bna->up.up;
2339 	strncpy(na->name, nr_name, sizeof(na->name));
2340 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2341 	 * swapped. The real cross-linking will be done during register,
2342 	 * when all the krings will have been created.
2343 	 */
2344 	na->num_rx_rings = hwna->num_tx_rings;
2345 	na->num_tx_rings = hwna->num_rx_rings;
2346 	na->num_tx_desc = hwna->num_rx_desc;
2347 	na->num_rx_desc = hwna->num_tx_desc;
2348 	na->nm_dtor = netmap_bwrap_dtor;
2349 	na->nm_register = netmap_bwrap_register;
2350 	// na->nm_txsync = netmap_bwrap_txsync;
2351 	// na->nm_rxsync = netmap_bwrap_rxsync;
2352 	na->nm_config = netmap_bwrap_config;
2353 	na->nm_krings_create = netmap_bwrap_krings_create;
2354 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2355 	na->nm_notify = netmap_bwrap_notify;
2356 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2357 	na->pdev = hwna->pdev;
2358 	na->nm_mem = netmap_mem_private_new(na->name,
2359 			na->num_tx_rings, na->num_tx_desc,
2360 			na->num_rx_rings, na->num_rx_desc,
2361 			0, 0, &error);
2362 	na->na_flags |= NAF_MEM_OWNER;
2363 	if (na->nm_mem == NULL)
2364 		goto err_put;
2365 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2366 
2367 	bna->hwna = hwna;
2368 	netmap_adapter_get(hwna);
2369 	hwna->na_private = bna; /* weak reference */
2370 	hwna->na_vp = &bna->up;
2371 
2372 	if (hwna->na_flags & NAF_HOST_RINGS) {
2373 		if (hwna->na_flags & NAF_SW_ONLY)
2374 			na->na_flags |= NAF_SW_ONLY;
2375 		na->na_flags |= NAF_HOST_RINGS;
2376 		hostna = &bna->host.up;
2377 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2378 		hostna->ifp = hwna->ifp;
2379 		hostna->num_tx_rings = 1;
2380 		hostna->num_tx_desc = hwna->num_rx_desc;
2381 		hostna->num_rx_rings = 1;
2382 		hostna->num_rx_desc = hwna->num_tx_desc;
2383 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2384 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2385 		hostna->nm_notify = netmap_bwrap_host_notify;
2386 		hostna->nm_mem = na->nm_mem;
2387 		hostna->na_private = bna;
2388 		hostna->na_vp = &bna->up;
2389 		na->na_hostvp = hwna->na_hostvp =
2390 			hostna->na_hostvp = &bna->host;
2391 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2392 	}
2393 
2394 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2395 		na->name, ifp->if_xname,
2396 		na->num_tx_rings, na->num_tx_desc,
2397 		na->num_rx_rings, na->num_rx_desc);
2398 
2399 	error = netmap_attach_common(na);
2400 	if (error) {
2401 		goto err_free;
2402 	}
2403 	/* make bwrap ifp point to the real ifp
2404 	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2405 	 * as a request to make the ifp point to the na. Since we
2406 	 * do not want to change the na already pointed to by hwna->ifp,
2407 	 * the following assignment has to be delayed until now
2408 	 */
2409 	na->ifp = hwna->ifp;
2410 	hwna->na_flags |= NAF_BUSY;
2411 	/* make hwna point to the allocator we are actually using,
2412 	 * so that monitors will be able to find it
2413 	 */
2414 	bna->save_nmd = hwna->nm_mem;
2415 	hwna->nm_mem = na->nm_mem;
2416 	return 0;
2417 
2418 err_free:
2419 	netmap_mem_private_delete(na->nm_mem);
2420 err_put:
2421 	hwna->na_vp = hwna->na_hostvp = NULL;
2422 	netmap_adapter_put(hwna);
2423 	free(bna, M_DEVBUF);
2424 	return error;
2425 
2426 }
2427 
2428 
2429 void
2430 netmap_init_bridges(void)
2431 {
2432 	int i;
2433 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2434 	for (i = 0; i < NM_BRIDGES; i++)
2435 		BDG_RWINIT(&nm_bridges[i]);
2436 }
2437 #endif /* WITH_VALE */
2438