xref: /freebsd/sys/dev/netmap/netmap_mem2.c (revision def7fe87e9b28032572ca6f820a260677fd0c2d5)
1 /*
2  * Copyright (C) 2012-2014 Matteo Landi
3  * Copyright (C) 2012-2016 Luigi Rizzo
4  * Copyright (C) 2012-2016 Giuseppe Lettieri
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #ifdef linux
30 #include "bsd_glue.h"
31 #endif /* linux */
32 
33 #ifdef __APPLE__
34 #include "osx_glue.h"
35 #endif /* __APPLE__ */
36 
37 #ifdef __FreeBSD__
38 #include <sys/cdefs.h> /* prerequisite */
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/types.h>
42 #include <sys/malloc.h>
43 #include <sys/kernel.h>		/* MALLOC_DEFINE */
44 #include <sys/proc.h>
45 #include <vm/vm.h>	/* vtophys */
46 #include <vm/pmap.h>	/* vtophys */
47 #include <sys/socket.h> /* sockaddrs */
48 #include <sys/selinfo.h>
49 #include <sys/sysctl.h>
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/vnet.h>
53 #include <machine/bus.h>	/* bus_dmamap_* */
54 
55 /* M_NETMAP only used in here */
56 MALLOC_DECLARE(M_NETMAP);
57 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
58 
59 #endif /* __FreeBSD__ */
60 
61 #ifdef _WIN32
62 #include <win_glue.h>
63 #endif
64 
65 #include <net/netmap.h>
66 #include <dev/netmap/netmap_kern.h>
67 #include <net/netmap_virt.h>
68 #include "netmap_mem2.h"
69 
70 #ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY
71 #define NETMAP_BUF_MAX_NUM  8*4096      /* if too big takes too much time to allocate */
72 #else
73 #define NETMAP_BUF_MAX_NUM 20*4096*2	/* large machine */
74 #endif
75 
76 #define NETMAP_POOL_MAX_NAMSZ	32
77 
78 
79 enum {
80 	NETMAP_IF_POOL   = 0,
81 	NETMAP_RING_POOL,
82 	NETMAP_BUF_POOL,
83 	NETMAP_POOLS_NR
84 };
85 
86 
87 struct netmap_obj_params {
88 	u_int size;
89 	u_int num;
90 };
91 
92 struct netmap_obj_pool {
93 	char name[NETMAP_POOL_MAX_NAMSZ];	/* name of the allocator */
94 
95 	/* ---------------------------------------------------*/
96 	/* these are only meaningful if the pool is finalized */
97 	/* (see 'finalized' field in netmap_mem_d)            */
98 	u_int objtotal;         /* actual total number of objects. */
99 	u_int memtotal;		/* actual total memory space */
100 	u_int numclusters;	/* actual number of clusters */
101 
102 	u_int objfree;          /* number of free objects. */
103 
104 	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
105 	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
106 	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
107 	/* ---------------------------------------------------*/
108 
109 	/* limits */
110 	u_int objminsize;	/* minimum object size */
111 	u_int objmaxsize;	/* maximum object size */
112 	u_int nummin;		/* minimum number of objects */
113 	u_int nummax;		/* maximum number of objects */
114 
115 	/* these are changed only by config */
116 	u_int _objtotal;	/* total number of objects */
117 	u_int _objsize;		/* object size */
118 	u_int _clustsize;       /* cluster size */
119 	u_int _clustentries;    /* objects per cluster */
120 	u_int _numclusters;	/* number of clusters */
121 
122 	/* requested values */
123 	u_int r_objtotal;
124 	u_int r_objsize;
125 };
126 
127 #define NMA_LOCK_T		NM_MTX_T
128 
129 
130 struct netmap_mem_ops {
131 	int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
132 	int  (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
133 			u_int *memflags, uint16_t *id);
134 
135 	vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
136 	int (*nmd_config)(struct netmap_mem_d *);
137 	int (*nmd_finalize)(struct netmap_mem_d *);
138 	void (*nmd_deref)(struct netmap_mem_d *);
139 	ssize_t  (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
140 	void (*nmd_delete)(struct netmap_mem_d *);
141 
142 	struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
143 	void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
144 	int  (*nmd_rings_create)(struct netmap_adapter *);
145 	void (*nmd_rings_delete)(struct netmap_adapter *);
146 };
147 
148 typedef uint16_t nm_memid_t;
149 
150 /*
151  * Shared info for netmap allocator
152  *
153  * Each allocator contains this structur as first netmap_if.
154  * In this way, we can share same details about allocator
155  * to the VM.
156  * Used in ptnetmap.
157  */
158 struct netmap_mem_shared_info {
159 #ifndef _WIN32
160         struct netmap_if up;	/* ends with a 0-sized array, which VSC does not like */
161 #else /* !_WIN32 */
162 	char up[sizeof(struct netmap_if)];
163 #endif /* !_WIN32 */
164         uint64_t features;
165 #define NMS_FEAT_BUF_POOL          0x0001
166 #define NMS_FEAT_MEMSIZE           0x0002
167 
168         uint32_t buf_pool_offset;
169         uint32_t buf_pool_objtotal;
170         uint32_t buf_pool_objsize;
171         uint32_t totalsize;
172 };
173 
174 #define NMS_NAME        "nms_info"
175 #define NMS_VERSION     1
176 static const struct netmap_if nms_if_blueprint = {
177     .ni_name = NMS_NAME,
178     .ni_version = NMS_VERSION,
179     .ni_tx_rings = 0,
180     .ni_rx_rings = 0
181 };
182 
183 struct netmap_mem_d {
184 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
185 	u_int nm_totalsize; /* shorthand */
186 
187 	u_int flags;
188 #define NETMAP_MEM_FINALIZED	0x1	/* preallocation done */
189 	int lasterr;		/* last error for curr config */
190 	int active;		/* active users */
191 	int refcount;
192 	/* the three allocators */
193 	struct netmap_obj_pool pools[NETMAP_POOLS_NR];
194 
195 	nm_memid_t nm_id;	/* allocator identifier */
196 	int nm_grp;	/* iommu groupd id */
197 
198 	/* list of all existing allocators, sorted by nm_id */
199 	struct netmap_mem_d *prev, *next;
200 
201 	struct netmap_mem_ops *ops;
202 };
203 
204 /*
205  * XXX need to fix the case of t0 == void
206  */
207 #define NMD_DEFCB(t0, name) \
208 t0 \
209 netmap_mem_##name(struct netmap_mem_d *nmd) \
210 { \
211 	return nmd->ops->nmd_##name(nmd); \
212 }
213 
214 #define NMD_DEFCB1(t0, name, t1) \
215 t0 \
216 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
217 { \
218 	return nmd->ops->nmd_##name(nmd, a1); \
219 }
220 
221 #define NMD_DEFCB3(t0, name, t1, t2, t3) \
222 t0 \
223 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
224 { \
225 	return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
226 }
227 
228 #define NMD_DEFNACB(t0, name) \
229 t0 \
230 netmap_mem_##name(struct netmap_adapter *na) \
231 { \
232 	return na->nm_mem->ops->nmd_##name(na); \
233 }
234 
235 #define NMD_DEFNACB1(t0, name, t1) \
236 t0 \
237 netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
238 { \
239 	return na->nm_mem->ops->nmd_##name(na, a1); \
240 }
241 
242 NMD_DEFCB1(int, get_lut, struct netmap_lut *);
243 NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
244 NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
245 static int netmap_mem_config(struct netmap_mem_d *);
246 NMD_DEFCB(int, config);
247 NMD_DEFCB1(ssize_t, if_offset, const void *);
248 NMD_DEFCB(void, delete);
249 
250 NMD_DEFNACB(struct netmap_if *, if_new);
251 NMD_DEFNACB1(void, if_delete, struct netmap_if *);
252 NMD_DEFNACB(int, rings_create);
253 NMD_DEFNACB(void, rings_delete);
254 
255 static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
256 static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
257 static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
258 
259 #define NMA_LOCK_INIT(n)	NM_MTX_INIT((n)->nm_mtx)
260 #define NMA_LOCK_DESTROY(n)	NM_MTX_DESTROY((n)->nm_mtx)
261 #define NMA_LOCK(n)		NM_MTX_LOCK((n)->nm_mtx)
262 #define NMA_UNLOCK(n)		NM_MTX_UNLOCK((n)->nm_mtx)
263 
264 #ifdef NM_DEBUG_MEM_PUTGET
265 #define NM_DBG_REFC(nmd, func, line)	\
266 	printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
267 #else
268 #define NM_DBG_REFC(nmd, func, line)
269 #endif
270 
271 #ifdef NM_DEBUG_MEM_PUTGET
272 void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
273 #else
274 void netmap_mem_get(struct netmap_mem_d *nmd)
275 #endif
276 {
277 	NMA_LOCK(nmd);
278 	nmd->refcount++;
279 	NM_DBG_REFC(nmd, func, line);
280 	NMA_UNLOCK(nmd);
281 }
282 
283 #ifdef NM_DEBUG_MEM_PUTGET
284 void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
285 #else
286 void netmap_mem_put(struct netmap_mem_d *nmd)
287 #endif
288 {
289 	int last;
290 	NMA_LOCK(nmd);
291 	last = (--nmd->refcount == 0);
292 	NM_DBG_REFC(nmd, func, line);
293 	NMA_UNLOCK(nmd);
294 	if (last)
295 		netmap_mem_delete(nmd);
296 }
297 
298 int
299 netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
300 {
301 	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
302 		return ENOMEM;
303 	} else {
304 		NMA_LOCK(nmd);
305 		nmd->lasterr = nmd->ops->nmd_finalize(nmd);
306 		NMA_UNLOCK(nmd);
307 	}
308 
309 	if (!nmd->lasterr && na->pdev)
310 		netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
311 
312 	return nmd->lasterr;
313 }
314 
315 static int netmap_mem_init_shared_info(struct netmap_mem_d *nmd);
316 
317 void
318 netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
319 {
320 	NMA_LOCK(nmd);
321 	netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
322 	if (nmd->active == 1) {
323 		u_int i;
324 
325 		/*
326 		 * Reset the allocator when it falls out of use so that any
327 		 * pool resources leaked by unclean application exits are
328 		 * reclaimed.
329 		 */
330 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
331 			struct netmap_obj_pool *p;
332 			u_int j;
333 
334 			p = &nmd->pools[i];
335 			p->objfree = p->objtotal;
336 			/*
337 			 * Reproduce the net effect of the M_ZERO malloc()
338 			 * and marking of free entries in the bitmap that
339 			 * occur in finalize_obj_allocator()
340 			 */
341 			memset(p->bitmap,
342 			    '\0',
343 			    sizeof(uint32_t) * ((p->objtotal + 31) / 32));
344 
345 			/*
346 			 * Set all the bits in the bitmap that have
347 			 * corresponding buffers to 1 to indicate they are
348 			 * free.
349 			 */
350 			for (j = 0; j < p->objtotal; j++) {
351 				if (p->lut[j].vaddr != NULL) {
352 					p->bitmap[ (j>>5) ] |=  ( 1 << (j & 31) );
353 				}
354 			}
355 		}
356 
357 		/*
358 		 * Per netmap_mem_finalize_all(),
359 		 * buffers 0 and 1 are reserved
360 		 */
361 		nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
362 		if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
363 			/* XXX This check is a workaround that prevents a
364 			 * NULL pointer crash which currently happens only
365 			 * with ptnetmap guests. Also,
366 			 * netmap_mem_init_shared_info must not be called
367 			 * by ptnetmap guest. */
368 			nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
369 
370 			/* expose info to the ptnetmap guest */
371 			netmap_mem_init_shared_info(nmd);
372 		}
373 	}
374 	nmd->ops->nmd_deref(nmd);
375 
376 	NMA_UNLOCK(nmd);
377 }
378 
379 
380 /* accessor functions */
381 static int
382 netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
383 {
384 	lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
385 	lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
386 	lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
387 
388 	return 0;
389 }
390 
391 static struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
392 	[NETMAP_IF_POOL] = {
393 		.size = 1024,
394 		.num  = 100,
395 	},
396 	[NETMAP_RING_POOL] = {
397 		.size = 9*PAGE_SIZE,
398 		.num  = 200,
399 	},
400 	[NETMAP_BUF_POOL] = {
401 		.size = 2048,
402 		.num  = NETMAP_BUF_MAX_NUM,
403 	},
404 };
405 
406 static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
407 	[NETMAP_IF_POOL] = {
408 		.size = 1024,
409 		.num  = 2,
410 	},
411 	[NETMAP_RING_POOL] = {
412 		.size = 5*PAGE_SIZE,
413 		.num  = 4,
414 	},
415 	[NETMAP_BUF_POOL] = {
416 		.size = 2048,
417 		.num  = 4098,
418 	},
419 };
420 
421 
422 /*
423  * nm_mem is the memory allocator used for all physical interfaces
424  * running in netmap mode.
425  * Virtual (VALE) ports will have each its own allocator.
426  */
427 extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
428 struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
429 	.pools = {
430 		[NETMAP_IF_POOL] = {
431 			.name 	= "netmap_if",
432 			.objminsize = sizeof(struct netmap_if),
433 			.objmaxsize = 4096,
434 			.nummin     = 10,	/* don't be stingy */
435 			.nummax	    = 10000,	/* XXX very large */
436 		},
437 		[NETMAP_RING_POOL] = {
438 			.name 	= "netmap_ring",
439 			.objminsize = sizeof(struct netmap_ring),
440 			.objmaxsize = 32*PAGE_SIZE,
441 			.nummin     = 2,
442 			.nummax	    = 1024,
443 		},
444 		[NETMAP_BUF_POOL] = {
445 			.name	= "netmap_buf",
446 			.objminsize = 64,
447 			.objmaxsize = 65536,
448 			.nummin     = 4,
449 			.nummax	    = 1000000, /* one million! */
450 		},
451 	},
452 
453 	.nm_id = 1,
454 	.nm_grp = -1,
455 
456 	.prev = &nm_mem,
457 	.next = &nm_mem,
458 
459 	.ops = &netmap_mem_global_ops
460 };
461 
462 
463 static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
464 
465 /* blueprint for the private memory allocators */
466 extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
467 /* XXX clang is not happy about using name as a print format */
468 static const struct netmap_mem_d nm_blueprint = {
469 	.pools = {
470 		[NETMAP_IF_POOL] = {
471 			.name 	= "%s_if",
472 			.objminsize = sizeof(struct netmap_if),
473 			.objmaxsize = 4096,
474 			.nummin     = 1,
475 			.nummax	    = 100,
476 		},
477 		[NETMAP_RING_POOL] = {
478 			.name 	= "%s_ring",
479 			.objminsize = sizeof(struct netmap_ring),
480 			.objmaxsize = 32*PAGE_SIZE,
481 			.nummin     = 2,
482 			.nummax	    = 1024,
483 		},
484 		[NETMAP_BUF_POOL] = {
485 			.name	= "%s_buf",
486 			.objminsize = 64,
487 			.objmaxsize = 65536,
488 			.nummin     = 4,
489 			.nummax	    = 1000000, /* one million! */
490 		},
491 	},
492 
493 	.flags = NETMAP_MEM_PRIVATE,
494 
495 	.ops = &netmap_mem_private_ops
496 };
497 
498 /* memory allocator related sysctls */
499 
500 #define STRINGIFY(x) #x
501 
502 
503 #define DECLARE_SYSCTLS(id, name) \
504 	SYSBEGIN(mem2_ ## name); \
505 	SYSCTL_DECL(_dev_netmap); /* leave it here, easier for porting */ \
506 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
507 	    CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
508 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
509 	    CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
510 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
511 	    CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
512 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
513 	    CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
514 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
515 	    CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
516 	    "Default size of private netmap " STRINGIFY(name) "s"); \
517 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
518 	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
519 	    "Default number of private netmap " STRINGIFY(name) "s");	\
520 	SYSEND
521 
522 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
523 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
524 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
525 
526 /* call with NMA_LOCK(&nm_mem) held */
527 static int
528 nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
529 {
530 	nm_memid_t id;
531 	struct netmap_mem_d *scan = netmap_last_mem_d;
532 	int error = ENOMEM;
533 
534 	do {
535 		/* we rely on unsigned wrap around */
536 		id = scan->nm_id + 1;
537 		if (id == 0) /* reserve 0 as error value */
538 			id = 1;
539 		scan = scan->next;
540 		if (id != scan->nm_id) {
541 			nmd->nm_id = id;
542 			nmd->prev = scan->prev;
543 			nmd->next = scan;
544 			scan->prev->next = nmd;
545 			scan->prev = nmd;
546 			netmap_last_mem_d = nmd;
547 			error = 0;
548 			break;
549 		}
550 	} while (scan != netmap_last_mem_d);
551 
552 	return error;
553 }
554 
555 /* call with NMA_LOCK(&nm_mem) *not* held */
556 static int
557 nm_mem_assign_id(struct netmap_mem_d *nmd)
558 {
559         int ret;
560 
561 	NMA_LOCK(&nm_mem);
562         ret = nm_mem_assign_id_locked(nmd);
563 	NMA_UNLOCK(&nm_mem);
564 
565 	return ret;
566 }
567 
568 static void
569 nm_mem_release_id(struct netmap_mem_d *nmd)
570 {
571 	NMA_LOCK(&nm_mem);
572 
573 	nmd->prev->next = nmd->next;
574 	nmd->next->prev = nmd->prev;
575 
576 	if (netmap_last_mem_d == nmd)
577 		netmap_last_mem_d = nmd->prev;
578 
579 	nmd->prev = nmd->next = NULL;
580 
581 	NMA_UNLOCK(&nm_mem);
582 }
583 
584 static int
585 nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
586 {
587 	int err = 0, id;
588 	id = nm_iommu_group_id(dev);
589 	if (netmap_verbose)
590 		D("iommu_group %d", id);
591 
592 	NMA_LOCK(nmd);
593 
594 	if (nmd->nm_grp < 0)
595 		nmd->nm_grp = id;
596 
597 	if (nmd->nm_grp != id)
598 		nmd->lasterr = err = ENOMEM;
599 
600 	NMA_UNLOCK(nmd);
601 	return err;
602 }
603 
604 /*
605  * First, find the allocator that contains the requested offset,
606  * then locate the cluster through a lookup table.
607  */
608 static vm_paddr_t
609 netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
610 {
611 	int i;
612 	vm_ooffset_t o = offset;
613 	vm_paddr_t pa;
614 	struct netmap_obj_pool *p;
615 
616 	NMA_LOCK(nmd);
617 	p = nmd->pools;
618 
619 	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
620 		if (offset >= p[i].memtotal)
621 			continue;
622 		// now lookup the cluster's address
623 #ifndef _WIN32
624 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
625 			offset % p[i]._objsize;
626 #else
627 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
628 		pa.QuadPart += offset % p[i]._objsize;
629 #endif
630 		NMA_UNLOCK(nmd);
631 		return pa;
632 	}
633 	/* this is only in case of errors */
634 	D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
635 		p[NETMAP_IF_POOL].memtotal,
636 		p[NETMAP_IF_POOL].memtotal
637 			+ p[NETMAP_RING_POOL].memtotal,
638 		p[NETMAP_IF_POOL].memtotal
639 			+ p[NETMAP_RING_POOL].memtotal
640 			+ p[NETMAP_BUF_POOL].memtotal);
641 	NMA_UNLOCK(nmd);
642 #ifndef _WIN32
643 	return 0;	// XXX bad address
644 #else
645 	vm_paddr_t res;
646 	res.QuadPart = 0;
647 	return res;
648 #endif
649 }
650 
651 #ifdef _WIN32
652 
653 /*
654  * win32_build_virtual_memory_for_userspace
655  *
656  * This function get all the object making part of the pools and maps
657  * a contiguous virtual memory space for the userspace
658  * It works this way
659  * 1 - allocate a Memory Descriptor List wide as the sum
660  *		of the memory needed for the pools
661  * 2 - cycle all the objects in every pool and for every object do
662  *
663  *		2a - cycle all the objects in every pool, get the list
664  *				of the physical address descriptors
665  *		2b - calculate the offset in the array of pages desciptor in the
666  *				main MDL
667  *		2c - copy the descriptors of the object in the main MDL
668  *
669  * 3 - return the resulting MDL that needs to be mapped in userland
670  *
671  * In this way we will have an MDL that describes all the memory for the
672  * objects in a single object
673 */
674 
675 PMDL
676 win32_build_user_vm_map(struct netmap_mem_d* nmd)
677 {
678 	int i, j;
679 	u_int memsize, memflags, ofs = 0;
680 	PMDL mainMdl, tempMdl;
681 
682 	if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
683 		D("memory not finalised yet");
684 		return NULL;
685 	}
686 
687 	mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
688 	if (mainMdl == NULL) {
689 		D("failed to allocate mdl");
690 		return NULL;
691 	}
692 
693 	NMA_LOCK(nmd);
694 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
695 		struct netmap_obj_pool *p = &nmd->pools[i];
696 		int clsz = p->_clustsize;
697 		int clobjs = p->_clustentries; /* objects per cluster */
698 		int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz);
699 		PPFN_NUMBER pSrc, pDst;
700 
701 		/* each pool has a different cluster size so we need to reallocate */
702 		tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
703 		if (tempMdl == NULL) {
704 			NMA_UNLOCK(nmd);
705 			D("fail to allocate tempMdl");
706 			IoFreeMdl(mainMdl);
707 			return NULL;
708 		}
709 		pSrc = MmGetMdlPfnArray(tempMdl);
710 		/* create one entry per cluster, the lut[] has one entry per object */
711 		for (j = 0; j < p->numclusters; j++, ofs += clsz) {
712 			pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)];
713 			MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz);
714 			MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */
715 			RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */
716 			mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */
717 		}
718 		IoFreeMdl(tempMdl);
719 	}
720 	NMA_UNLOCK(nmd);
721 	return mainMdl;
722 }
723 
724 #endif /* _WIN32 */
725 
726 /*
727  * helper function for OS-specific mmap routines (currently only windows).
728  * Given an nmd and a pool index, returns the cluster size and number of clusters.
729  * Returns 0 if memory is finalised and the pool is valid, otherwise 1.
730  * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change.
731  */
732 
733 int
734 netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters)
735 {
736 	if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR)
737 		return 1; /* invalid arguments */
738 	// NMA_LOCK_ASSERT(nmd);
739 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
740 		*clustsize = *numclusters = 0;
741 		return 1; /* not ready yet */
742 	}
743 	*clustsize = nmd->pools[pool]._clustsize;
744 	*numclusters = nmd->pools[pool].numclusters;
745 	return 0; /* success */
746 }
747 
748 static int
749 netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
750 	nm_memid_t *id)
751 {
752 	int error = 0;
753 	NMA_LOCK(nmd);
754 	error = netmap_mem_config(nmd);
755 	if (error)
756 		goto out;
757 	if (size) {
758 		if (nmd->flags & NETMAP_MEM_FINALIZED) {
759 			*size = nmd->nm_totalsize;
760 		} else {
761 			int i;
762 			*size = 0;
763 			for (i = 0; i < NETMAP_POOLS_NR; i++) {
764 				struct netmap_obj_pool *p = nmd->pools + i;
765 				*size += (p->_numclusters * p->_clustsize);
766 			}
767 		}
768 	}
769 	if (memflags)
770 		*memflags = nmd->flags;
771 	if (id)
772 		*id = nmd->nm_id;
773 out:
774 	NMA_UNLOCK(nmd);
775 	return error;
776 }
777 
778 /*
779  * we store objects by kernel address, need to find the offset
780  * within the pool to export the value to userspace.
781  * Algorithm: scan until we find the cluster, then add the
782  * actual offset in the cluster
783  */
784 static ssize_t
785 netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
786 {
787 	int i, k = p->_clustentries, n = p->objtotal;
788 	ssize_t ofs = 0;
789 
790 	for (i = 0; i < n; i += k, ofs += p->_clustsize) {
791 		const char *base = p->lut[i].vaddr;
792 		ssize_t relofs = (const char *) vaddr - base;
793 
794 		if (relofs < 0 || relofs >= p->_clustsize)
795 			continue;
796 
797 		ofs = ofs + relofs;
798 		ND("%s: return offset %d (cluster %d) for pointer %p",
799 		    p->name, ofs, i, vaddr);
800 		return ofs;
801 	}
802 	D("address %p is not contained inside any cluster (%s)",
803 	    vaddr, p->name);
804 	return 0; /* An error occurred */
805 }
806 
807 /* Helper functions which convert virtual addresses to offsets */
808 #define netmap_if_offset(n, v)					\
809 	netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v))
810 
811 #define netmap_ring_offset(n, v)				\
812     ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
813 	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
814 
815 static ssize_t
816 netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
817 {
818 	ssize_t v;
819 	NMA_LOCK(nmd);
820 	v = netmap_if_offset(nmd, addr);
821 	NMA_UNLOCK(nmd);
822 	return v;
823 }
824 
825 /*
826  * report the index, and use start position as a hint,
827  * otherwise buffer allocation becomes terribly expensive.
828  */
829 static void *
830 netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
831 {
832 	uint32_t i = 0;			/* index in the bitmap */
833 	uint32_t mask, j = 0;		/* slot counter */
834 	void *vaddr = NULL;
835 
836 	if (len > p->_objsize) {
837 		D("%s request size %d too large", p->name, len);
838 		// XXX cannot reduce the size
839 		return NULL;
840 	}
841 
842 	if (p->objfree == 0) {
843 		D("no more %s objects", p->name);
844 		return NULL;
845 	}
846 	if (start)
847 		i = *start;
848 
849 	/* termination is guaranteed by p->free, but better check bounds on i */
850 	while (vaddr == NULL && i < p->bitmap_slots)  {
851 		uint32_t cur = p->bitmap[i];
852 		if (cur == 0) { /* bitmask is fully used */
853 			i++;
854 			continue;
855 		}
856 		/* locate a slot */
857 		for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1)
858 			;
859 
860 		p->bitmap[i] &= ~mask; /* mark object as in use */
861 		p->objfree--;
862 
863 		vaddr = p->lut[i * 32 + j].vaddr;
864 		if (index)
865 			*index = i * 32 + j;
866 	}
867 	ND("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr);
868 
869 	if (start)
870 		*start = i;
871 	return vaddr;
872 }
873 
874 
875 /*
876  * free by index, not by address.
877  * XXX should we also cleanup the content ?
878  */
879 static int
880 netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
881 {
882 	uint32_t *ptr, mask;
883 
884 	if (j >= p->objtotal) {
885 		D("invalid index %u, max %u", j, p->objtotal);
886 		return 1;
887 	}
888 	ptr = &p->bitmap[j / 32];
889 	mask = (1 << (j % 32));
890 	if (*ptr & mask) {
891 		D("ouch, double free on buffer %d", j);
892 		return 1;
893 	} else {
894 		*ptr |= mask;
895 		p->objfree++;
896 		return 0;
897 	}
898 }
899 
900 /*
901  * free by address. This is slow but is only used for a few
902  * objects (rings, nifp)
903  */
904 static void
905 netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
906 {
907 	u_int i, j, n = p->numclusters;
908 
909 	for (i = 0, j = 0; i < n; i++, j += p->_clustentries) {
910 		void *base = p->lut[i * p->_clustentries].vaddr;
911 		ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
912 
913 		/* Given address, is out of the scope of the current cluster.*/
914 		if (vaddr < base || relofs >= p->_clustsize)
915 			continue;
916 
917 		j = j + relofs / p->_objsize;
918 		/* KASSERT(j != 0, ("Cannot free object 0")); */
919 		netmap_obj_free(p, j);
920 		return;
921 	}
922 	D("address %p is not contained inside any cluster (%s)",
923 	    vaddr, p->name);
924 }
925 
926 #define netmap_mem_bufsize(n)	\
927 	((n)->pools[NETMAP_BUF_POOL]._objsize)
928 
929 #define netmap_if_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
930 #define netmap_if_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
931 #define netmap_ring_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
932 #define netmap_ring_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
933 #define netmap_buf_malloc(n, _pos, _index)			\
934 	netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
935 
936 
937 #if 0 // XXX unused
938 /* Return the index associated to the given packet buffer */
939 #define netmap_buf_index(n, v)						\
940     (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
941 #endif
942 
943 /*
944  * allocate extra buffers in a linked list.
945  * returns the actual number.
946  */
947 uint32_t
948 netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
949 {
950 	struct netmap_mem_d *nmd = na->nm_mem;
951 	uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
952 
953 	NMA_LOCK(nmd);
954 
955 	*head = 0;	/* default, 'null' index ie empty list */
956 	for (i = 0 ; i < n; i++) {
957 		uint32_t cur = *head;	/* save current head */
958 		uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
959 		if (p == NULL) {
960 			D("no more buffers after %d of %d", i, n);
961 			*head = cur; /* restore */
962 			break;
963 		}
964 		ND(5, "allocate buffer %d -> %d", *head, cur);
965 		*p = cur; /* link to previous head */
966 	}
967 
968 	NMA_UNLOCK(nmd);
969 
970 	return i;
971 }
972 
973 static void
974 netmap_extra_free(struct netmap_adapter *na, uint32_t head)
975 {
976         struct lut_entry *lut = na->na_lut.lut;
977 	struct netmap_mem_d *nmd = na->nm_mem;
978 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
979 	uint32_t i, cur, *buf;
980 
981 	ND("freeing the extra list");
982 	for (i = 0; head >=2 && head < p->objtotal; i++) {
983 		cur = head;
984 		buf = lut[head].vaddr;
985 		head = *buf;
986 		*buf = 0;
987 		if (netmap_obj_free(p, cur))
988 			break;
989 	}
990 	if (head != 0)
991 		D("breaking with head %d", head);
992 	if (netmap_verbose)
993 		D("freed %d buffers", i);
994 }
995 
996 
997 /* Return nonzero on error */
998 static int
999 netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
1000 {
1001 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1002 	u_int i = 0;	/* slot counter */
1003 	uint32_t pos = 0;	/* slot in p->bitmap */
1004 	uint32_t index = 0;	/* buffer index */
1005 
1006 	for (i = 0; i < n; i++) {
1007 		void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
1008 		if (vaddr == NULL) {
1009 			D("no more buffers after %d of %d", i, n);
1010 			goto cleanup;
1011 		}
1012 		slot[i].buf_idx = index;
1013 		slot[i].len = p->_objsize;
1014 		slot[i].flags = 0;
1015 	}
1016 
1017 	ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
1018 	return (0);
1019 
1020 cleanup:
1021 	while (i > 0) {
1022 		i--;
1023 		netmap_obj_free(p, slot[i].buf_idx);
1024 	}
1025 	bzero(slot, n * sizeof(slot[0]));
1026 	return (ENOMEM);
1027 }
1028 
1029 static void
1030 netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
1031 {
1032 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1033 	u_int i;
1034 
1035 	for (i = 0; i < n; i++) {
1036 		slot[i].buf_idx = index;
1037 		slot[i].len = p->_objsize;
1038 		slot[i].flags = 0;
1039 	}
1040 }
1041 
1042 
1043 static void
1044 netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
1045 {
1046 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1047 
1048 	if (i < 2 || i >= p->objtotal) {
1049 		D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
1050 		return;
1051 	}
1052 	netmap_obj_free(p, i);
1053 }
1054 
1055 
1056 static void
1057 netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
1058 {
1059 	u_int i;
1060 
1061 	for (i = 0; i < n; i++) {
1062 		if (slot[i].buf_idx > 2)
1063 			netmap_free_buf(nmd, slot[i].buf_idx);
1064 	}
1065 }
1066 
1067 static void
1068 netmap_reset_obj_allocator(struct netmap_obj_pool *p)
1069 {
1070 
1071 	if (p == NULL)
1072 		return;
1073 	if (p->bitmap)
1074 		free(p->bitmap, M_NETMAP);
1075 	p->bitmap = NULL;
1076 	if (p->lut) {
1077 		u_int i;
1078 
1079 		/*
1080 		 * Free each cluster allocated in
1081 		 * netmap_finalize_obj_allocator().  The cluster start
1082 		 * addresses are stored at multiples of p->_clusterentries
1083 		 * in the lut.
1084 		 */
1085 		for (i = 0; i < p->objtotal; i += p->_clustentries) {
1086 			if (p->lut[i].vaddr)
1087 				contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
1088 		}
1089 		bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
1090 #ifdef linux
1091 		vfree(p->lut);
1092 #else
1093 		free(p->lut, M_NETMAP);
1094 #endif
1095 	}
1096 	p->lut = NULL;
1097 	p->objtotal = 0;
1098 	p->memtotal = 0;
1099 	p->numclusters = 0;
1100 	p->objfree = 0;
1101 }
1102 
1103 /*
1104  * Free all resources related to an allocator.
1105  */
1106 static void
1107 netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
1108 {
1109 	if (p == NULL)
1110 		return;
1111 	netmap_reset_obj_allocator(p);
1112 }
1113 
1114 /*
1115  * We receive a request for objtotal objects, of size objsize each.
1116  * Internally we may round up both numbers, as we allocate objects
1117  * in small clusters multiple of the page size.
1118  * We need to keep track of objtotal and clustentries,
1119  * as they are needed when freeing memory.
1120  *
1121  * XXX note -- userspace needs the buffers to be contiguous,
1122  *	so we cannot afford gaps at the end of a cluster.
1123  */
1124 
1125 
1126 /* call with NMA_LOCK held */
1127 static int
1128 netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
1129 {
1130 	int i;
1131 	u_int clustsize;	/* the cluster size, multiple of page size */
1132 	u_int clustentries;	/* how many objects per entry */
1133 
1134 	/* we store the current request, so we can
1135 	 * detect configuration changes later */
1136 	p->r_objtotal = objtotal;
1137 	p->r_objsize = objsize;
1138 
1139 #define MAX_CLUSTSIZE	(1<<22)		// 4 MB
1140 #define LINE_ROUND	NM_CACHE_ALIGN	// 64
1141 	if (objsize >= MAX_CLUSTSIZE) {
1142 		/* we could do it but there is no point */
1143 		D("unsupported allocation for %d bytes", objsize);
1144 		return EINVAL;
1145 	}
1146 	/* make sure objsize is a multiple of LINE_ROUND */
1147 	i = (objsize & (LINE_ROUND - 1));
1148 	if (i) {
1149 		D("XXX aligning object by %d bytes", LINE_ROUND - i);
1150 		objsize += LINE_ROUND - i;
1151 	}
1152 	if (objsize < p->objminsize || objsize > p->objmaxsize) {
1153 		D("requested objsize %d out of range [%d, %d]",
1154 			objsize, p->objminsize, p->objmaxsize);
1155 		return EINVAL;
1156 	}
1157 	if (objtotal < p->nummin || objtotal > p->nummax) {
1158 		D("requested objtotal %d out of range [%d, %d]",
1159 			objtotal, p->nummin, p->nummax);
1160 		return EINVAL;
1161 	}
1162 	/*
1163 	 * Compute number of objects using a brute-force approach:
1164 	 * given a max cluster size,
1165 	 * we try to fill it with objects keeping track of the
1166 	 * wasted space to the next page boundary.
1167 	 */
1168 	for (clustentries = 0, i = 1;; i++) {
1169 		u_int delta, used = i * objsize;
1170 		if (used > MAX_CLUSTSIZE)
1171 			break;
1172 		delta = used % PAGE_SIZE;
1173 		if (delta == 0) { // exact solution
1174 			clustentries = i;
1175 			break;
1176 		}
1177 	}
1178 	/* exact solution not found */
1179 	if (clustentries == 0) {
1180 		D("unsupported allocation for %d bytes", objsize);
1181 		return EINVAL;
1182 	}
1183 	/* compute clustsize */
1184 	clustsize = clustentries * objsize;
1185 	if (netmap_verbose)
1186 		D("objsize %d clustsize %d objects %d",
1187 			objsize, clustsize, clustentries);
1188 
1189 	/*
1190 	 * The number of clusters is n = ceil(objtotal/clustentries)
1191 	 * objtotal' = n * clustentries
1192 	 */
1193 	p->_clustentries = clustentries;
1194 	p->_clustsize = clustsize;
1195 	p->_numclusters = (objtotal + clustentries - 1) / clustentries;
1196 
1197 	/* actual values (may be larger than requested) */
1198 	p->_objsize = objsize;
1199 	p->_objtotal = p->_numclusters * clustentries;
1200 
1201 	return 0;
1202 }
1203 
1204 static struct lut_entry *
1205 nm_alloc_lut(u_int nobj)
1206 {
1207 	size_t n = sizeof(struct lut_entry) * nobj;
1208 	struct lut_entry *lut;
1209 #ifdef linux
1210 	lut = vmalloc(n);
1211 #else
1212 	lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
1213 #endif
1214 	return lut;
1215 }
1216 
1217 /* call with NMA_LOCK held */
1218 static int
1219 netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
1220 {
1221 	int i; /* must be signed */
1222 	size_t n;
1223 
1224 	/* optimistically assume we have enough memory */
1225 	p->numclusters = p->_numclusters;
1226 	p->objtotal = p->_objtotal;
1227 
1228 	p->lut = nm_alloc_lut(p->objtotal);
1229 	if (p->lut == NULL) {
1230 		D("Unable to create lookup table for '%s'", p->name);
1231 		goto clean;
1232 	}
1233 
1234 	/* Allocate the bitmap */
1235 	n = (p->objtotal + 31) / 32;
1236 	p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO);
1237 	if (p->bitmap == NULL) {
1238 		D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
1239 		    p->name);
1240 		goto clean;
1241 	}
1242 	p->bitmap_slots = n;
1243 
1244 	/*
1245 	 * Allocate clusters, init pointers and bitmap
1246 	 */
1247 
1248 	n = p->_clustsize;
1249 	for (i = 0; i < (int)p->objtotal;) {
1250 		int lim = i + p->_clustentries;
1251 		char *clust;
1252 
1253 		/*
1254 		 * XXX Note, we only need contigmalloc() for buffers attached
1255 		 * to native interfaces. In all other cases (nifp, netmap rings
1256 		 * and even buffers for VALE ports or emulated interfaces) we
1257 		 * can live with standard malloc, because the hardware will not
1258 		 * access the pages directly.
1259 		 */
1260 		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
1261 		    (size_t)0, -1UL, PAGE_SIZE, 0);
1262 		if (clust == NULL) {
1263 			/*
1264 			 * If we get here, there is a severe memory shortage,
1265 			 * so halve the allocated memory to reclaim some.
1266 			 */
1267 			D("Unable to create cluster at %d for '%s' allocator",
1268 			    i, p->name);
1269 			if (i < 2) /* nothing to halve */
1270 				goto out;
1271 			lim = i / 2;
1272 			for (i--; i >= lim; i--) {
1273 				p->bitmap[ (i>>5) ] &=  ~( 1 << (i & 31) );
1274 				if (i % p->_clustentries == 0 && p->lut[i].vaddr)
1275 					contigfree(p->lut[i].vaddr,
1276 						n, M_NETMAP);
1277 				p->lut[i].vaddr = NULL;
1278 			}
1279 		out:
1280 			p->objtotal = i;
1281 			/* we may have stopped in the middle of a cluster */
1282 			p->numclusters = (i + p->_clustentries - 1) / p->_clustentries;
1283 			break;
1284 		}
1285 		/*
1286 		 * Set bitmap and lut state for all buffers in the current
1287 		 * cluster.
1288 		 *
1289 		 * [i, lim) is the set of buffer indexes that cover the
1290 		 * current cluster.
1291 		 *
1292 		 * 'clust' is really the address of the current buffer in
1293 		 * the current cluster as we index through it with a stride
1294 		 * of p->_objsize.
1295 		 */
1296 		for (; i < lim; i++, clust += p->_objsize) {
1297 			p->bitmap[ (i>>5) ] |=  ( 1 << (i & 31) );
1298 			p->lut[i].vaddr = clust;
1299 			p->lut[i].paddr = vtophys(clust);
1300 		}
1301 	}
1302 	p->objfree = p->objtotal;
1303 	p->memtotal = p->numclusters * p->_clustsize;
1304 	if (p->objfree == 0)
1305 		goto clean;
1306 	if (netmap_verbose)
1307 		D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
1308 		    p->numclusters, p->_clustsize >> 10,
1309 		    p->memtotal >> 10, p->name);
1310 
1311 	return 0;
1312 
1313 clean:
1314 	netmap_reset_obj_allocator(p);
1315 	return ENOMEM;
1316 }
1317 
1318 /* call with lock held */
1319 static int
1320 netmap_memory_config_changed(struct netmap_mem_d *nmd)
1321 {
1322 	int i;
1323 
1324 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1325 		if (nmd->pools[i].r_objsize != netmap_params[i].size ||
1326 		    nmd->pools[i].r_objtotal != netmap_params[i].num)
1327 		    return 1;
1328 	}
1329 	return 0;
1330 }
1331 
1332 static void
1333 netmap_mem_reset_all(struct netmap_mem_d *nmd)
1334 {
1335 	int i;
1336 
1337 	if (netmap_verbose)
1338 		D("resetting %p", nmd);
1339 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1340 		netmap_reset_obj_allocator(&nmd->pools[i]);
1341 	}
1342 	nmd->flags  &= ~NETMAP_MEM_FINALIZED;
1343 }
1344 
1345 static int
1346 netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
1347 {
1348 	int i, lim = p->_objtotal;
1349 
1350 	if (na->pdev == NULL)
1351 		return 0;
1352 
1353 #if defined(__FreeBSD__)
1354 	(void)i;
1355 	(void)lim;
1356 	D("unsupported on FreeBSD");
1357 
1358 #elif defined(_WIN32)
1359 	(void)i;
1360 	(void)lim;
1361 	D("unsupported on Windows");	//XXX_ale, really?
1362 #else /* linux */
1363 	for (i = 2; i < lim; i++) {
1364 		netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
1365 	}
1366 #endif /* linux */
1367 
1368 	return 0;
1369 }
1370 
1371 static int
1372 netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
1373 {
1374 #if defined(__FreeBSD__)
1375 	D("unsupported on FreeBSD");
1376 #elif defined(_WIN32)
1377 	D("unsupported on Windows");	//XXX_ale, really?
1378 #else /* linux */
1379 	int i, lim = p->_objtotal;
1380 
1381 	if (na->pdev == NULL)
1382 		return 0;
1383 
1384 	for (i = 2; i < lim; i++) {
1385 		netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
1386 				p->lut[i].vaddr);
1387 	}
1388 #endif /* linux */
1389 
1390 	return 0;
1391 }
1392 
1393 static int
1394 netmap_mem_init_shared_info(struct netmap_mem_d *nmd)
1395 {
1396 	struct netmap_mem_shared_info *nms_info;
1397 	ssize_t base;
1398 
1399         /* Use the first slot in IF_POOL */
1400 	nms_info = netmap_if_malloc(nmd, sizeof(*nms_info));
1401 	if (nms_info == NULL) {
1402 	    return ENOMEM;
1403 	}
1404 
1405 	base = netmap_if_offset(nmd, nms_info);
1406 
1407         memcpy(&nms_info->up, &nms_if_blueprint, sizeof(nms_if_blueprint));
1408 	nms_info->buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal;
1409 	nms_info->buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
1410 	nms_info->buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
1411 	nms_info->totalsize = nmd->nm_totalsize;
1412 	nms_info->features = NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE;
1413 
1414 	return 0;
1415 }
1416 
1417 static int
1418 netmap_mem_finalize_all(struct netmap_mem_d *nmd)
1419 {
1420 	int i;
1421 	if (nmd->flags & NETMAP_MEM_FINALIZED)
1422 		return 0;
1423 	nmd->lasterr = 0;
1424 	nmd->nm_totalsize = 0;
1425 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1426 		nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
1427 		if (nmd->lasterr)
1428 			goto error;
1429 		nmd->nm_totalsize += nmd->pools[i].memtotal;
1430 	}
1431 	/* buffers 0 and 1 are reserved */
1432 	nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
1433 	nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
1434 	nmd->flags |= NETMAP_MEM_FINALIZED;
1435 
1436 	/* expose info to the ptnetmap guest */
1437 	nmd->lasterr = netmap_mem_init_shared_info(nmd);
1438 	if (nmd->lasterr)
1439 	        goto error;
1440 
1441 	if (netmap_verbose)
1442 		D("interfaces %d KB, rings %d KB, buffers %d MB",
1443 		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
1444 		    nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
1445 		    nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
1446 
1447 	if (netmap_verbose)
1448 		D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
1449 
1450 
1451 	return 0;
1452 error:
1453 	netmap_mem_reset_all(nmd);
1454 	return nmd->lasterr;
1455 }
1456 
1457 
1458 
1459 static void
1460 netmap_mem_private_delete(struct netmap_mem_d *nmd)
1461 {
1462 	if (nmd == NULL)
1463 		return;
1464 	if (netmap_verbose)
1465 		D("deleting %p", nmd);
1466 	if (nmd->active > 0)
1467 		D("bug: deleting mem allocator with active=%d!", nmd->active);
1468 	nm_mem_release_id(nmd);
1469 	if (netmap_verbose)
1470 		D("done deleting %p", nmd);
1471 	NMA_LOCK_DESTROY(nmd);
1472 	free(nmd, M_DEVBUF);
1473 }
1474 
1475 static int
1476 netmap_mem_private_config(struct netmap_mem_d *nmd)
1477 {
1478 	/* nothing to do, we are configured on creation
1479  	 * and configuration never changes thereafter
1480  	 */
1481 	return 0;
1482 }
1483 
1484 static int
1485 netmap_mem_private_finalize(struct netmap_mem_d *nmd)
1486 {
1487 	int err;
1488 	err = netmap_mem_finalize_all(nmd);
1489 	if (!err)
1490 		nmd->active++;
1491 	return err;
1492 
1493 }
1494 
1495 static void
1496 netmap_mem_private_deref(struct netmap_mem_d *nmd)
1497 {
1498 	if (--nmd->active <= 0)
1499 		netmap_mem_reset_all(nmd);
1500 }
1501 
1502 
1503 /*
1504  * allocator for private memory
1505  */
1506 struct netmap_mem_d *
1507 netmap_mem_private_new(const char *name, u_int txr, u_int txd,
1508 	u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
1509 {
1510 	struct netmap_mem_d *d = NULL;
1511 	struct netmap_obj_params p[NETMAP_POOLS_NR];
1512 	int i, err;
1513 	u_int v, maxd;
1514 
1515 	d = malloc(sizeof(struct netmap_mem_d),
1516 		   M_DEVBUF, M_NOWAIT | M_ZERO);
1517 	if (d == NULL) {
1518 		err = ENOMEM;
1519 		goto error;
1520 	}
1521 
1522 	*d = nm_blueprint;
1523 
1524 	err = nm_mem_assign_id(d);
1525 	if (err)
1526 		goto error;
1527 
1528 	/* account for the fake host rings */
1529 	txr++;
1530 	rxr++;
1531 
1532 	/* copy the min values */
1533 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1534 		p[i] = netmap_min_priv_params[i];
1535 	}
1536 
1537 	/* possibly increase them to fit user request */
1538 	v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
1539 	if (p[NETMAP_IF_POOL].size < v)
1540 		p[NETMAP_IF_POOL].size = v;
1541 	v = 2 + 4 * npipes;
1542 	if (p[NETMAP_IF_POOL].num < v)
1543 		p[NETMAP_IF_POOL].num = v;
1544 	maxd = (txd > rxd) ? txd : rxd;
1545 	v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
1546 	if (p[NETMAP_RING_POOL].size < v)
1547 		p[NETMAP_RING_POOL].size = v;
1548 	/* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
1549          * and two rx rings (again, 1 normal and 1 fake host)
1550          */
1551 	v = txr + rxr + 8 * npipes;
1552 	if (p[NETMAP_RING_POOL].num < v)
1553 		p[NETMAP_RING_POOL].num = v;
1554 	/* for each pipe we only need the buffers for the 4 "real" rings.
1555          * On the other end, the pipe ring dimension may be different from
1556          * the parent port ring dimension. As a compromise, we allocate twice the
1557          * space actually needed if the pipe rings were the same size as the parent rings
1558          */
1559 	v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
1560 		/* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
1561 	if (p[NETMAP_BUF_POOL].num < v)
1562 		p[NETMAP_BUF_POOL].num = v;
1563 
1564 	if (netmap_verbose)
1565 		D("req if %d*%d ring %d*%d buf %d*%d",
1566 			p[NETMAP_IF_POOL].num,
1567 			p[NETMAP_IF_POOL].size,
1568 			p[NETMAP_RING_POOL].num,
1569 			p[NETMAP_RING_POOL].size,
1570 			p[NETMAP_BUF_POOL].num,
1571 			p[NETMAP_BUF_POOL].size);
1572 
1573 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1574 		snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
1575 				nm_blueprint.pools[i].name,
1576 				name);
1577 		err = netmap_config_obj_allocator(&d->pools[i],
1578 				p[i].num, p[i].size);
1579 		if (err)
1580 			goto error;
1581 	}
1582 
1583 	d->flags &= ~NETMAP_MEM_FINALIZED;
1584 
1585 	NMA_LOCK_INIT(d);
1586 
1587 	return d;
1588 error:
1589 	netmap_mem_private_delete(d);
1590 	if (perr)
1591 		*perr = err;
1592 	return NULL;
1593 }
1594 
1595 
1596 /* call with lock held */
1597 static int
1598 netmap_mem_global_config(struct netmap_mem_d *nmd)
1599 {
1600 	int i;
1601 
1602 	if (nmd->active)
1603 		/* already in use, we cannot change the configuration */
1604 		goto out;
1605 
1606 	if (!netmap_memory_config_changed(nmd))
1607 		goto out;
1608 
1609 	ND("reconfiguring");
1610 
1611 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1612 		/* reset previous allocation */
1613 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
1614 			netmap_reset_obj_allocator(&nmd->pools[i]);
1615 		}
1616 		nmd->flags &= ~NETMAP_MEM_FINALIZED;
1617 	}
1618 
1619 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1620 		nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
1621 				netmap_params[i].num, netmap_params[i].size);
1622 		if (nmd->lasterr)
1623 			goto out;
1624 	}
1625 
1626 out:
1627 
1628 	return nmd->lasterr;
1629 }
1630 
1631 static int
1632 netmap_mem_global_finalize(struct netmap_mem_d *nmd)
1633 {
1634 	int err;
1635 
1636 	/* update configuration if changed */
1637 	if (netmap_mem_global_config(nmd))
1638 		return nmd->lasterr;
1639 
1640 	nmd->active++;
1641 
1642 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1643 		/* may happen if config is not changed */
1644 		ND("nothing to do");
1645 		goto out;
1646 	}
1647 
1648 	if (netmap_mem_finalize_all(nmd))
1649 		goto out;
1650 
1651 	nmd->lasterr = 0;
1652 
1653 out:
1654 	if (nmd->lasterr)
1655 		nmd->active--;
1656 	err = nmd->lasterr;
1657 
1658 	return err;
1659 
1660 }
1661 
1662 static void
1663 netmap_mem_global_delete(struct netmap_mem_d *nmd)
1664 {
1665 	int i;
1666 
1667 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1668 	    netmap_destroy_obj_allocator(&nm_mem.pools[i]);
1669 	}
1670 
1671 	NMA_LOCK_DESTROY(&nm_mem);
1672 }
1673 
1674 int
1675 netmap_mem_init(void)
1676 {
1677 	NMA_LOCK_INIT(&nm_mem);
1678 	netmap_mem_get(&nm_mem);
1679 	return (0);
1680 }
1681 
1682 void
1683 netmap_mem_fini(void)
1684 {
1685 	netmap_mem_put(&nm_mem);
1686 }
1687 
1688 static void
1689 netmap_free_rings(struct netmap_adapter *na)
1690 {
1691 	enum txrx t;
1692 
1693 	for_rx_tx(t) {
1694 		u_int i;
1695 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
1696 			struct netmap_kring *kring = &NMR(na, t)[i];
1697 			struct netmap_ring *ring = kring->ring;
1698 
1699 			if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
1700 				ND("skipping ring %s (ring %p, users %d)",
1701 						kring->name, ring, kring->users);
1702 				continue;
1703 			}
1704 			if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
1705 				netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
1706 			netmap_ring_free(na->nm_mem, ring);
1707 			kring->ring = NULL;
1708 		}
1709 	}
1710 }
1711 
1712 /* call with NMA_LOCK held *
1713  *
1714  * Allocate netmap rings and buffers for this card
1715  * The rings are contiguous, but have variable size.
1716  * The kring array must follow the layout described
1717  * in netmap_krings_create().
1718  */
1719 static int
1720 netmap_mem2_rings_create(struct netmap_adapter *na)
1721 {
1722 	enum txrx t;
1723 
1724 	NMA_LOCK(na->nm_mem);
1725 
1726 	for_rx_tx(t) {
1727 		u_int i;
1728 
1729 		for (i = 0; i <= nma_get_nrings(na, t); i++) {
1730 			struct netmap_kring *kring = &NMR(na, t)[i];
1731 			struct netmap_ring *ring = kring->ring;
1732 			u_int len, ndesc;
1733 
1734 			if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
1735 				/* uneeded, or already created by somebody else */
1736 				ND("skipping ring %s", kring->name);
1737 				continue;
1738 			}
1739 			ndesc = kring->nkr_num_slots;
1740 			len = sizeof(struct netmap_ring) +
1741 				  ndesc * sizeof(struct netmap_slot);
1742 			ring = netmap_ring_malloc(na->nm_mem, len);
1743 			if (ring == NULL) {
1744 				D("Cannot allocate %s_ring", nm_txrx2str(t));
1745 				goto cleanup;
1746 			}
1747 			ND("txring at %p", ring);
1748 			kring->ring = ring;
1749 			*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
1750 			*(int64_t *)(uintptr_t)&ring->buf_ofs =
1751 			    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
1752 				na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
1753 				netmap_ring_offset(na->nm_mem, ring);
1754 
1755 			/* copy values from kring */
1756 			ring->head = kring->rhead;
1757 			ring->cur = kring->rcur;
1758 			ring->tail = kring->rtail;
1759 			*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
1760 				netmap_mem_bufsize(na->nm_mem);
1761 			ND("%s h %d c %d t %d", kring->name,
1762 				ring->head, ring->cur, ring->tail);
1763 			ND("initializing slots for %s_ring", nm_txrx2str(txrx));
1764 			if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
1765 				/* this is a real ring */
1766 				if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
1767 					D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
1768 					goto cleanup;
1769 				}
1770 			} else {
1771 				/* this is a fake ring, set all indices to 0 */
1772 				netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
1773 			}
1774 		        /* ring info */
1775 		        *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
1776 		        *(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
1777 		}
1778 	}
1779 
1780 	NMA_UNLOCK(na->nm_mem);
1781 
1782 	return 0;
1783 
1784 cleanup:
1785 	netmap_free_rings(na);
1786 
1787 	NMA_UNLOCK(na->nm_mem);
1788 
1789 	return ENOMEM;
1790 }
1791 
1792 static void
1793 netmap_mem2_rings_delete(struct netmap_adapter *na)
1794 {
1795 	/* last instance, release bufs and rings */
1796 	NMA_LOCK(na->nm_mem);
1797 
1798 	netmap_free_rings(na);
1799 
1800 	NMA_UNLOCK(na->nm_mem);
1801 }
1802 
1803 
1804 /* call with NMA_LOCK held */
1805 /*
1806  * Allocate the per-fd structure netmap_if.
1807  *
1808  * We assume that the configuration stored in na
1809  * (number of tx/rx rings and descs) does not change while
1810  * the interface is in netmap mode.
1811  */
1812 static struct netmap_if *
1813 netmap_mem2_if_new(struct netmap_adapter *na)
1814 {
1815 	struct netmap_if *nifp;
1816 	ssize_t base; /* handy for relative offsets between rings and nifp */
1817 	u_int i, len, n[NR_TXRX], ntot;
1818 	enum txrx t;
1819 
1820 	ntot = 0;
1821 	for_rx_tx(t) {
1822 		/* account for the (eventually fake) host rings */
1823 		n[t] = nma_get_nrings(na, t) + 1;
1824 		ntot += n[t];
1825 	}
1826 	/*
1827 	 * the descriptor is followed inline by an array of offsets
1828 	 * to the tx and rx rings in the shared memory region.
1829 	 */
1830 
1831 	NMA_LOCK(na->nm_mem);
1832 
1833 	len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
1834 	nifp = netmap_if_malloc(na->nm_mem, len);
1835 	if (nifp == NULL) {
1836 		NMA_UNLOCK(na->nm_mem);
1837 		return NULL;
1838 	}
1839 
1840 	/* initialize base fields -- override const */
1841 	*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
1842 	*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
1843 	strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
1844 
1845 	/*
1846 	 * fill the slots for the rx and tx rings. They contain the offset
1847 	 * between the ring and nifp, so the information is usable in
1848 	 * userspace to reach the ring from the nifp.
1849 	 */
1850 	base = netmap_if_offset(na->nm_mem, nifp);
1851 	for (i = 0; i < n[NR_TX]; i++) {
1852 		if (na->tx_rings[i].ring == NULL) {
1853 			// XXX maybe use the offset of an error ring,
1854 			// like we do for buffers?
1855 			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = 0;
1856 			continue;
1857 		}
1858 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
1859 			netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
1860 	}
1861 	for (i = 0; i < n[NR_RX]; i++) {
1862 		if (na->rx_rings[i].ring == NULL) {
1863 			// XXX maybe use the offset of an error ring,
1864 			// like we do for buffers?
1865 			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = 0;
1866 			continue;
1867 		}
1868 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
1869 			netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
1870 	}
1871 
1872 	NMA_UNLOCK(na->nm_mem);
1873 
1874 	return (nifp);
1875 }
1876 
1877 static void
1878 netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
1879 {
1880 	if (nifp == NULL)
1881 		/* nothing to do */
1882 		return;
1883 	NMA_LOCK(na->nm_mem);
1884 	if (nifp->ni_bufs_head)
1885 		netmap_extra_free(na, nifp->ni_bufs_head);
1886 	netmap_if_free(na->nm_mem, nifp);
1887 
1888 	NMA_UNLOCK(na->nm_mem);
1889 }
1890 
1891 static void
1892 netmap_mem_global_deref(struct netmap_mem_d *nmd)
1893 {
1894 
1895 	nmd->active--;
1896 	if (!nmd->active)
1897 		nmd->nm_grp = -1;
1898 	if (netmap_verbose)
1899 		D("active = %d", nmd->active);
1900 
1901 }
1902 
1903 struct netmap_mem_ops netmap_mem_global_ops = {
1904 	.nmd_get_lut = netmap_mem2_get_lut,
1905 	.nmd_get_info = netmap_mem2_get_info,
1906 	.nmd_ofstophys = netmap_mem2_ofstophys,
1907 	.nmd_config = netmap_mem_global_config,
1908 	.nmd_finalize = netmap_mem_global_finalize,
1909 	.nmd_deref = netmap_mem_global_deref,
1910 	.nmd_delete = netmap_mem_global_delete,
1911 	.nmd_if_offset = netmap_mem2_if_offset,
1912 	.nmd_if_new = netmap_mem2_if_new,
1913 	.nmd_if_delete = netmap_mem2_if_delete,
1914 	.nmd_rings_create = netmap_mem2_rings_create,
1915 	.nmd_rings_delete = netmap_mem2_rings_delete
1916 };
1917 struct netmap_mem_ops netmap_mem_private_ops = {
1918 	.nmd_get_lut = netmap_mem2_get_lut,
1919 	.nmd_get_info = netmap_mem2_get_info,
1920 	.nmd_ofstophys = netmap_mem2_ofstophys,
1921 	.nmd_config = netmap_mem_private_config,
1922 	.nmd_finalize = netmap_mem_private_finalize,
1923 	.nmd_deref = netmap_mem_private_deref,
1924 	.nmd_if_offset = netmap_mem2_if_offset,
1925 	.nmd_delete = netmap_mem_private_delete,
1926 	.nmd_if_new = netmap_mem2_if_new,
1927 	.nmd_if_delete = netmap_mem2_if_delete,
1928 	.nmd_rings_create = netmap_mem2_rings_create,
1929 	.nmd_rings_delete = netmap_mem2_rings_delete
1930 };
1931 
1932 #ifdef WITH_PTNETMAP_GUEST
1933 struct mem_pt_if {
1934 	struct mem_pt_if *next;
1935 	struct ifnet *ifp;
1936 	unsigned int nifp_offset;
1937 	nm_pt_guest_ptctl_t ptctl;
1938 };
1939 
1940 /* Netmap allocator for ptnetmap guests. */
1941 struct netmap_mem_ptg {
1942 	struct netmap_mem_d up;
1943 
1944 	vm_paddr_t nm_paddr;            /* physical address in the guest */
1945 	void *nm_addr;                  /* virtual address in the guest */
1946 	struct netmap_lut buf_lut;      /* lookup table for BUF pool in the guest */
1947 	nm_memid_t nm_host_id;          /* allocator identifier in the host */
1948 	struct ptnetmap_memdev *ptn_dev;
1949 	struct mem_pt_if *pt_ifs;	/* list of interfaces in passthrough */
1950 };
1951 
1952 /* Link a passthrough interface to a passthrough netmap allocator. */
1953 static int
1954 netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
1955 			    unsigned int nifp_offset,
1956 			    nm_pt_guest_ptctl_t ptctl)
1957 {
1958 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1959 	struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP,
1960 					M_NOWAIT | M_ZERO);
1961 
1962 	if (!ptif) {
1963 		return ENOMEM;
1964 	}
1965 
1966 	NMA_LOCK(nmd);
1967 
1968 	ptif->ifp = ifp;
1969 	ptif->nifp_offset = nifp_offset;
1970 	ptif->ptctl = ptctl;
1971 
1972 	if (ptnmd->pt_ifs) {
1973 		ptif->next = ptnmd->pt_ifs;
1974 	}
1975 	ptnmd->pt_ifs = ptif;
1976 
1977 	NMA_UNLOCK(nmd);
1978 
1979 	D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
1980 
1981 	return 0;
1982 }
1983 
1984 /* Called with NMA_LOCK(nmd) held. */
1985 static struct mem_pt_if *
1986 netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp)
1987 {
1988 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1989 	struct mem_pt_if *curr;
1990 
1991 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
1992 		if (curr->ifp == ifp) {
1993 			return curr;
1994 		}
1995 	}
1996 
1997 	return NULL;
1998 }
1999 
2000 /* Unlink a passthrough interface from a passthrough netmap allocator. */
2001 int
2002 netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
2003 {
2004 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2005 	struct mem_pt_if *prev = NULL;
2006 	struct mem_pt_if *curr;
2007 	int ret = -1;
2008 
2009 	NMA_LOCK(nmd);
2010 
2011 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
2012 		if (curr->ifp == ifp) {
2013 			if (prev) {
2014 				prev->next = curr->next;
2015 			} else {
2016 				ptnmd->pt_ifs = curr->next;
2017 			}
2018 			D("removed (ifp=%p,nifp_offset=%u)",
2019 			  curr->ifp, curr->nifp_offset);
2020 			free(curr, M_NETMAP);
2021 			ret = 0;
2022 			break;
2023 		}
2024 		prev = curr;
2025 	}
2026 
2027 	NMA_UNLOCK(nmd);
2028 
2029 	return ret;
2030 }
2031 
2032 /* Read allocator info from the first netmap_if (only on finalize) */
2033 static int
2034 netmap_mem_pt_guest_read_shared_info(struct netmap_mem_d *nmd)
2035 {
2036 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2037 	struct netmap_mem_shared_info *nms_info;
2038 	uint32_t bufsize;
2039 	uint32_t nbuffers;
2040 	char *vaddr;
2041 	vm_paddr_t paddr;
2042 	int i;
2043 
2044         nms_info = (struct netmap_mem_shared_info *)ptnmd->nm_addr;
2045         if (strncmp(nms_info->up.ni_name, NMS_NAME, sizeof(NMS_NAME)) != 0) {
2046             D("error, the first slot does not contain shared info");
2047             return EINVAL;
2048         }
2049         /* check features mem_shared info */
2050         if ((nms_info->features & (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) !=
2051                (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) {
2052             D("error, the shared info does not contain BUF_POOL and MEMSIZE");
2053             return EINVAL;
2054         }
2055 
2056         bufsize = nms_info->buf_pool_objsize;
2057         nbuffers = nms_info->buf_pool_objtotal;
2058 
2059 	/* allocate the lut */
2060 	if (ptnmd->buf_lut.lut == NULL) {
2061 		D("allocating lut");
2062 		ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers);
2063 		if (ptnmd->buf_lut.lut == NULL) {
2064 			D("lut allocation failed");
2065 			return ENOMEM;
2066 		}
2067 	}
2068 
2069 	/* we have physically contiguous memory mapped through PCI BAR */
2070         vaddr = (char *)(ptnmd->nm_addr) + nms_info->buf_pool_offset;
2071 	paddr = ptnmd->nm_paddr + nms_info->buf_pool_offset;
2072 
2073 	for (i = 0; i < nbuffers; i++) {
2074 		ptnmd->buf_lut.lut[i].vaddr = vaddr;
2075 		ptnmd->buf_lut.lut[i].paddr = paddr;
2076 		vaddr += bufsize;
2077 		paddr += bufsize;
2078 	}
2079 
2080 	ptnmd->buf_lut.objtotal = nbuffers;
2081 	ptnmd->buf_lut.objsize = bufsize;
2082 
2083         nmd->nm_totalsize = nms_info->totalsize;
2084 
2085         return 0;
2086 }
2087 
2088 static int
2089 netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
2090 {
2091 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2092 
2093 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
2094 		return EINVAL;
2095 	}
2096 
2097 	*lut = ptnmd->buf_lut;
2098 	return 0;
2099 }
2100 
2101 static int
2102 netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size,
2103 			     u_int *memflags, uint16_t *id)
2104 {
2105 	int error = 0;
2106 
2107 	NMA_LOCK(nmd);
2108 
2109 	error = nmd->ops->nmd_config(nmd);
2110 	if (error)
2111 		goto out;
2112 
2113 	if (size)
2114 		*size = nmd->nm_totalsize;
2115 	if (memflags)
2116 		*memflags = nmd->flags;
2117 	if (id)
2118 		*id = nmd->nm_id;
2119 
2120 out:
2121 	NMA_UNLOCK(nmd);
2122 
2123 	return error;
2124 }
2125 
2126 static vm_paddr_t
2127 netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
2128 {
2129 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2130 	vm_paddr_t paddr;
2131 	/* if the offset is valid, just return csb->base_addr + off */
2132 	paddr = (vm_paddr_t)(ptnmd->nm_paddr + off);
2133 	ND("off %lx padr %lx", off, (unsigned long)paddr);
2134 	return paddr;
2135 }
2136 
2137 static int
2138 netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
2139 {
2140 	/* nothing to do, we are configured on creation
2141 	 * and configuration never changes thereafter
2142 	 */
2143 	return 0;
2144 }
2145 
2146 static int
2147 netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
2148 {
2149 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2150 	int error = 0;
2151 
2152 	nmd->active++;
2153 
2154 	if (nmd->flags & NETMAP_MEM_FINALIZED)
2155 		goto out;
2156 
2157 	if (ptnmd->ptn_dev == NULL) {
2158 		D("ptnetmap memdev not attached");
2159 		error = ENOMEM;
2160 		goto err;
2161 	}
2162 	/* map memory through ptnetmap-memdev BAR */
2163 	error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
2164 				      &ptnmd->nm_addr);
2165 	if (error)
2166 		goto err;
2167 
2168         /* read allcator info and create lut */
2169 	error = netmap_mem_pt_guest_read_shared_info(nmd);
2170 	if (error)
2171 		goto err;
2172 
2173 	nmd->flags |= NETMAP_MEM_FINALIZED;
2174 out:
2175 	return 0;
2176 err:
2177 	nmd->active--;
2178 	return error;
2179 }
2180 
2181 static void
2182 netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
2183 {
2184 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2185 
2186 	nmd->active--;
2187 	if (nmd->active <= 0 &&
2188 		(nmd->flags & NETMAP_MEM_FINALIZED)) {
2189 	    nmd->flags  &= ~NETMAP_MEM_FINALIZED;
2190 	    /* unmap ptnetmap-memdev memory */
2191 	    if (ptnmd->ptn_dev) {
2192 		nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
2193 	    }
2194 	    ptnmd->nm_addr = 0;
2195 	    ptnmd->nm_paddr = 0;
2196 	}
2197 }
2198 
2199 static ssize_t
2200 netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr)
2201 {
2202 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2203 
2204 	return (const char *)(vaddr) - (char *)(ptnmd->nm_addr);
2205 }
2206 
2207 static void
2208 netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
2209 {
2210 	if (nmd == NULL)
2211 		return;
2212 	if (netmap_verbose)
2213 		D("deleting %p", nmd);
2214 	if (nmd->active > 0)
2215 		D("bug: deleting mem allocator with active=%d!", nmd->active);
2216 	nm_mem_release_id(nmd);
2217 	if (netmap_verbose)
2218 		D("done deleting %p", nmd);
2219 	NMA_LOCK_DESTROY(nmd);
2220 	free(nmd, M_DEVBUF);
2221 }
2222 
2223 static struct netmap_if *
2224 netmap_mem_pt_guest_if_new(struct netmap_adapter *na)
2225 {
2226 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2227 	struct mem_pt_if *ptif;
2228 	struct netmap_if *nifp = NULL;
2229 
2230 	NMA_LOCK(na->nm_mem);
2231 
2232 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2233 	if (ptif == NULL) {
2234 		D("Error: interface %p is not in passthrough", na->ifp);
2235 		goto out;
2236 	}
2237 
2238 	nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
2239 				    ptif->nifp_offset);
2240 	NMA_UNLOCK(na->nm_mem);
2241 out:
2242 	return nifp;
2243 }
2244 
2245 static void
2246 netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
2247 {
2248 	struct mem_pt_if *ptif;
2249 
2250 	NMA_LOCK(na->nm_mem);
2251 
2252 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2253 	if (ptif == NULL) {
2254 		D("Error: interface %p is not in passthrough", na->ifp);
2255 		goto out;
2256 	}
2257 
2258 	ptif->ptctl(na->ifp, PTNETMAP_PTCTL_IFDELETE);
2259 out:
2260 	NMA_UNLOCK(na->nm_mem);
2261 }
2262 
2263 static int
2264 netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
2265 {
2266 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2267 	struct mem_pt_if *ptif;
2268 	struct netmap_if *nifp;
2269 	int i, error = -1;
2270 
2271 	NMA_LOCK(na->nm_mem);
2272 
2273 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2274 	if (ptif == NULL) {
2275 		D("Error: interface %p is not in passthrough", na->ifp);
2276 		goto out;
2277 	}
2278 
2279 
2280 	/* point each kring to the corresponding backend ring */
2281 	nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
2282 	for (i = 0; i <= na->num_tx_rings; i++) {
2283 		struct netmap_kring *kring = na->tx_rings + i;
2284 		if (kring->ring)
2285 			continue;
2286 		kring->ring = (struct netmap_ring *)
2287 			((char *)nifp + nifp->ring_ofs[i]);
2288 	}
2289 	for (i = 0; i <= na->num_rx_rings; i++) {
2290 		struct netmap_kring *kring = na->rx_rings + i;
2291 		if (kring->ring)
2292 			continue;
2293 		kring->ring = (struct netmap_ring *)
2294 			((char *)nifp +
2295 			 nifp->ring_ofs[i + na->num_tx_rings + 1]);
2296 	}
2297 
2298 	//error = ptif->ptctl->nm_ptctl(ifp, PTNETMAP_PTCTL_RINGSCREATE);
2299 	error = 0;
2300 out:
2301 	NMA_UNLOCK(na->nm_mem);
2302 
2303 	return error;
2304 }
2305 
2306 static void
2307 netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
2308 {
2309 	/* TODO: remove?? */
2310 #if 0
2311 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2312 	struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem,
2313 								na->ifp);
2314 #endif
2315 }
2316 
2317 static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
2318 	.nmd_get_lut = netmap_mem_pt_guest_get_lut,
2319 	.nmd_get_info = netmap_mem_pt_guest_get_info,
2320 	.nmd_ofstophys = netmap_mem_pt_guest_ofstophys,
2321 	.nmd_config = netmap_mem_pt_guest_config,
2322 	.nmd_finalize = netmap_mem_pt_guest_finalize,
2323 	.nmd_deref = netmap_mem_pt_guest_deref,
2324 	.nmd_if_offset = netmap_mem_pt_guest_if_offset,
2325 	.nmd_delete = netmap_mem_pt_guest_delete,
2326 	.nmd_if_new = netmap_mem_pt_guest_if_new,
2327 	.nmd_if_delete = netmap_mem_pt_guest_if_delete,
2328 	.nmd_rings_create = netmap_mem_pt_guest_rings_create,
2329 	.nmd_rings_delete = netmap_mem_pt_guest_rings_delete
2330 };
2331 
2332 /* Called with NMA_LOCK(&nm_mem) held. */
2333 static struct netmap_mem_d *
2334 netmap_mem_pt_guest_find_hostid(nm_memid_t host_id)
2335 {
2336 	struct netmap_mem_d *mem = NULL;
2337 	struct netmap_mem_d *scan = netmap_last_mem_d;
2338 
2339 	do {
2340 		/* find ptnetmap allocator through host ID */
2341 		if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
2342 			((struct netmap_mem_ptg *)(scan))->nm_host_id == host_id) {
2343 			mem = scan;
2344 			break;
2345 		}
2346 		scan = scan->next;
2347 	} while (scan != netmap_last_mem_d);
2348 
2349 	return mem;
2350 }
2351 
2352 /* Called with NMA_LOCK(&nm_mem) held. */
2353 static struct netmap_mem_d *
2354 netmap_mem_pt_guest_create(nm_memid_t host_id)
2355 {
2356 	struct netmap_mem_ptg *ptnmd;
2357 	int err = 0;
2358 
2359 	ptnmd = malloc(sizeof(struct netmap_mem_ptg),
2360 			M_DEVBUF, M_NOWAIT | M_ZERO);
2361 	if (ptnmd == NULL) {
2362 		err = ENOMEM;
2363 		goto error;
2364 	}
2365 
2366 	ptnmd->up.ops = &netmap_mem_pt_guest_ops;
2367 	ptnmd->nm_host_id = host_id;
2368 	ptnmd->pt_ifs = NULL;
2369 
2370         /* Assign new id in the guest (We have the lock) */
2371 	err = nm_mem_assign_id_locked(&ptnmd->up);
2372 	if (err)
2373 		goto error;
2374 
2375 	ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED;
2376 	ptnmd->up.flags |= NETMAP_MEM_IO;
2377 
2378 	NMA_LOCK_INIT(&ptnmd->up);
2379 
2380 	return &ptnmd->up;
2381 error:
2382 	netmap_mem_pt_guest_delete(&ptnmd->up);
2383 	return NULL;
2384 }
2385 
2386 /*
2387  * find host id in guest allocators and create guest allocator
2388  * if it is not there
2389  */
2390 static struct netmap_mem_d *
2391 netmap_mem_pt_guest_get(nm_memid_t host_id)
2392 {
2393 	struct netmap_mem_d *nmd;
2394 
2395 	NMA_LOCK(&nm_mem);
2396 	nmd = netmap_mem_pt_guest_find_hostid(host_id);
2397 	if (nmd == NULL) {
2398 		nmd = netmap_mem_pt_guest_create(host_id);
2399 	}
2400 	NMA_UNLOCK(&nm_mem);
2401 
2402 	return nmd;
2403 }
2404 
2405 /*
2406  * The guest allocator can be created by ptnetmap_memdev (during the device
2407  * attach) or by ptnetmap device (e1000/virtio), during the netmap_attach.
2408  *
2409  * The order is not important (we have different order in LINUX and FreeBSD).
2410  * The first one, creates the device, and the second one simply attaches it.
2411  */
2412 
2413 /* Called when ptnetmap_memdev is attaching, to attach a new allocator in
2414  * the guest */
2415 struct netmap_mem_d *
2416 netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t host_id)
2417 {
2418 	struct netmap_mem_d *nmd;
2419 	struct netmap_mem_ptg *ptnmd;
2420 
2421 	nmd = netmap_mem_pt_guest_get(host_id);
2422 
2423 	/* assign this device to the guest allocator */
2424 	if (nmd) {
2425 		ptnmd = (struct netmap_mem_ptg *)nmd;
2426 		ptnmd->ptn_dev = ptn_dev;
2427 	}
2428 
2429 	return nmd;
2430 }
2431 
2432 /* Called when ptnetmap device (virtio/e1000) is attaching */
2433 struct netmap_mem_d *
2434 netmap_mem_pt_guest_new(struct ifnet *ifp,
2435 			unsigned int nifp_offset,
2436 			nm_pt_guest_ptctl_t ptctl)
2437 {
2438 	struct netmap_mem_d *nmd;
2439 	nm_memid_t host_id;
2440 
2441 	if (ifp == NULL || ptctl == NULL) {
2442 		return NULL;
2443 	}
2444 
2445 	/* Get the host id allocator. */
2446 	host_id = ptctl(ifp, PTNETMAP_PTCTL_HOSTMEMID);
2447 
2448 	nmd = netmap_mem_pt_guest_get(host_id);
2449 
2450 	if (nmd) {
2451 		netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset,
2452 					    ptctl);
2453 	}
2454 
2455 	return nmd;
2456 }
2457 
2458 #endif /* WITH_PTNETMAP_GUEST */
2459