xref: /freebsd/sys/dev/netmap/netmap_mem2.c (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /*
2  * Copyright (C) 2012-2014 Matteo Landi
3  * Copyright (C) 2012-2016 Luigi Rizzo
4  * Copyright (C) 2012-2016 Giuseppe Lettieri
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #ifdef linux
30 #include "bsd_glue.h"
31 #endif /* linux */
32 
33 #ifdef __APPLE__
34 #include "osx_glue.h"
35 #endif /* __APPLE__ */
36 
37 #ifdef __FreeBSD__
38 #include <sys/cdefs.h> /* prerequisite */
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/types.h>
42 #include <sys/malloc.h>
43 #include <sys/kernel.h>		/* MALLOC_DEFINE */
44 #include <sys/proc.h>
45 #include <vm/vm.h>	/* vtophys */
46 #include <vm/pmap.h>	/* vtophys */
47 #include <sys/socket.h> /* sockaddrs */
48 #include <sys/selinfo.h>
49 #include <sys/sysctl.h>
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/vnet.h>
53 #include <machine/bus.h>	/* bus_dmamap_* */
54 
55 /* M_NETMAP only used in here */
56 MALLOC_DECLARE(M_NETMAP);
57 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
58 
59 #endif /* __FreeBSD__ */
60 
61 #ifdef _WIN32
62 #include <win_glue.h>
63 #endif
64 
65 #include <net/netmap.h>
66 #include <dev/netmap/netmap_kern.h>
67 #include <net/netmap_virt.h>
68 #include "netmap_mem2.h"
69 
70 #ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY
71 #define NETMAP_BUF_MAX_NUM  8*4096      /* if too big takes too much time to allocate */
72 #else
73 #define NETMAP_BUF_MAX_NUM 20*4096*2	/* large machine */
74 #endif
75 
76 #define NETMAP_POOL_MAX_NAMSZ	32
77 
78 
79 enum {
80 	NETMAP_IF_POOL   = 0,
81 	NETMAP_RING_POOL,
82 	NETMAP_BUF_POOL,
83 	NETMAP_POOLS_NR
84 };
85 
86 
87 struct netmap_obj_params {
88 	u_int size;
89 	u_int num;
90 };
91 
92 struct netmap_obj_pool {
93 	char name[NETMAP_POOL_MAX_NAMSZ];	/* name of the allocator */
94 
95 	/* ---------------------------------------------------*/
96 	/* these are only meaningful if the pool is finalized */
97 	/* (see 'finalized' field in netmap_mem_d)            */
98 	u_int objtotal;         /* actual total number of objects. */
99 	u_int memtotal;		/* actual total memory space */
100 	u_int numclusters;	/* actual number of clusters */
101 
102 	u_int objfree;          /* number of free objects. */
103 
104 	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
105 	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
106 	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
107 	/* ---------------------------------------------------*/
108 
109 	/* limits */
110 	u_int objminsize;	/* minimum object size */
111 	u_int objmaxsize;	/* maximum object size */
112 	u_int nummin;		/* minimum number of objects */
113 	u_int nummax;		/* maximum number of objects */
114 
115 	/* these are changed only by config */
116 	u_int _objtotal;	/* total number of objects */
117 	u_int _objsize;		/* object size */
118 	u_int _clustsize;       /* cluster size */
119 	u_int _clustentries;    /* objects per cluster */
120 	u_int _numclusters;	/* number of clusters */
121 
122 	/* requested values */
123 	u_int r_objtotal;
124 	u_int r_objsize;
125 };
126 
127 #define NMA_LOCK_T		NM_MTX_T
128 
129 
130 struct netmap_mem_ops {
131 	int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
132 	int  (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
133 			u_int *memflags, uint16_t *id);
134 
135 	vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
136 	int (*nmd_config)(struct netmap_mem_d *);
137 	int (*nmd_finalize)(struct netmap_mem_d *);
138 	void (*nmd_deref)(struct netmap_mem_d *);
139 	ssize_t  (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
140 	void (*nmd_delete)(struct netmap_mem_d *);
141 
142 	struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
143 	void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
144 	int  (*nmd_rings_create)(struct netmap_adapter *);
145 	void (*nmd_rings_delete)(struct netmap_adapter *);
146 };
147 
148 typedef uint16_t nm_memid_t;
149 
150 struct netmap_mem_d {
151 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
152 	u_int nm_totalsize; /* shorthand */
153 
154 	u_int flags;
155 #define NETMAP_MEM_FINALIZED	0x1	/* preallocation done */
156 	int lasterr;		/* last error for curr config */
157 	int active;		/* active users */
158 	int refcount;
159 	/* the three allocators */
160 	struct netmap_obj_pool pools[NETMAP_POOLS_NR];
161 
162 	nm_memid_t nm_id;	/* allocator identifier */
163 	int nm_grp;	/* iommu groupd id */
164 
165 	/* list of all existing allocators, sorted by nm_id */
166 	struct netmap_mem_d *prev, *next;
167 
168 	struct netmap_mem_ops *ops;
169 };
170 
171 /*
172  * XXX need to fix the case of t0 == void
173  */
174 #define NMD_DEFCB(t0, name) \
175 t0 \
176 netmap_mem_##name(struct netmap_mem_d *nmd) \
177 { \
178 	return nmd->ops->nmd_##name(nmd); \
179 }
180 
181 #define NMD_DEFCB1(t0, name, t1) \
182 t0 \
183 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
184 { \
185 	return nmd->ops->nmd_##name(nmd, a1); \
186 }
187 
188 #define NMD_DEFCB3(t0, name, t1, t2, t3) \
189 t0 \
190 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
191 { \
192 	return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
193 }
194 
195 #define NMD_DEFNACB(t0, name) \
196 t0 \
197 netmap_mem_##name(struct netmap_adapter *na) \
198 { \
199 	return na->nm_mem->ops->nmd_##name(na); \
200 }
201 
202 #define NMD_DEFNACB1(t0, name, t1) \
203 t0 \
204 netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
205 { \
206 	return na->nm_mem->ops->nmd_##name(na, a1); \
207 }
208 
209 NMD_DEFCB1(int, get_lut, struct netmap_lut *);
210 NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
211 NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
212 static int netmap_mem_config(struct netmap_mem_d *);
213 NMD_DEFCB(int, config);
214 NMD_DEFCB1(ssize_t, if_offset, const void *);
215 NMD_DEFCB(void, delete);
216 
217 NMD_DEFNACB(struct netmap_if *, if_new);
218 NMD_DEFNACB1(void, if_delete, struct netmap_if *);
219 NMD_DEFNACB(int, rings_create);
220 NMD_DEFNACB(void, rings_delete);
221 
222 static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
223 static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
224 static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
225 
226 #define NMA_LOCK_INIT(n)	NM_MTX_INIT((n)->nm_mtx)
227 #define NMA_LOCK_DESTROY(n)	NM_MTX_DESTROY((n)->nm_mtx)
228 #define NMA_LOCK(n)		NM_MTX_LOCK((n)->nm_mtx)
229 #define NMA_UNLOCK(n)		NM_MTX_UNLOCK((n)->nm_mtx)
230 
231 #ifdef NM_DEBUG_MEM_PUTGET
232 #define NM_DBG_REFC(nmd, func, line)	\
233 	printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
234 #else
235 #define NM_DBG_REFC(nmd, func, line)
236 #endif
237 
238 #ifdef NM_DEBUG_MEM_PUTGET
239 void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
240 #else
241 void netmap_mem_get(struct netmap_mem_d *nmd)
242 #endif
243 {
244 	NMA_LOCK(nmd);
245 	nmd->refcount++;
246 	NM_DBG_REFC(nmd, func, line);
247 	NMA_UNLOCK(nmd);
248 }
249 
250 #ifdef NM_DEBUG_MEM_PUTGET
251 void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
252 #else
253 void netmap_mem_put(struct netmap_mem_d *nmd)
254 #endif
255 {
256 	int last;
257 	NMA_LOCK(nmd);
258 	last = (--nmd->refcount == 0);
259 	NM_DBG_REFC(nmd, func, line);
260 	NMA_UNLOCK(nmd);
261 	if (last)
262 		netmap_mem_delete(nmd);
263 }
264 
265 int
266 netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
267 {
268 	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
269 		return ENOMEM;
270 	} else {
271 		NMA_LOCK(nmd);
272 		nmd->lasterr = nmd->ops->nmd_finalize(nmd);
273 		NMA_UNLOCK(nmd);
274 	}
275 
276 	if (!nmd->lasterr && na->pdev)
277 		netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
278 
279 	return nmd->lasterr;
280 }
281 
282 void
283 netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
284 {
285 	NMA_LOCK(nmd);
286 	netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
287 	if (nmd->active == 1) {
288 		u_int i;
289 
290 		/*
291 		 * Reset the allocator when it falls out of use so that any
292 		 * pool resources leaked by unclean application exits are
293 		 * reclaimed.
294 		 */
295 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
296 			struct netmap_obj_pool *p;
297 			u_int j;
298 
299 			p = &nmd->pools[i];
300 			p->objfree = p->objtotal;
301 			/*
302 			 * Reproduce the net effect of the M_ZERO malloc()
303 			 * and marking of free entries in the bitmap that
304 			 * occur in finalize_obj_allocator()
305 			 */
306 			memset(p->bitmap,
307 			    '\0',
308 			    sizeof(uint32_t) * ((p->objtotal + 31) / 32));
309 
310 			/*
311 			 * Set all the bits in the bitmap that have
312 			 * corresponding buffers to 1 to indicate they are
313 			 * free.
314 			 */
315 			for (j = 0; j < p->objtotal; j++) {
316 				if (p->lut[j].vaddr != NULL) {
317 					p->bitmap[ (j>>5) ] |=  ( 1 << (j & 31) );
318 				}
319 			}
320 		}
321 
322 		/*
323 		 * Per netmap_mem_finalize_all(),
324 		 * buffers 0 and 1 are reserved
325 		 */
326 		nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
327 		if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
328 			/* XXX This check is a workaround that prevents a
329 			 * NULL pointer crash which currently happens only
330 			 * with ptnetmap guests.
331 			 * Removed shared-info --> is the bug still there? */
332 			nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
333 		}
334 	}
335 	nmd->ops->nmd_deref(nmd);
336 
337 	NMA_UNLOCK(nmd);
338 }
339 
340 
341 /* accessor functions */
342 static int
343 netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
344 {
345 	lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
346 	lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
347 	lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
348 
349 	return 0;
350 }
351 
352 static struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
353 	[NETMAP_IF_POOL] = {
354 		.size = 1024,
355 		.num  = 100,
356 	},
357 	[NETMAP_RING_POOL] = {
358 		.size = 9*PAGE_SIZE,
359 		.num  = 200,
360 	},
361 	[NETMAP_BUF_POOL] = {
362 		.size = 2048,
363 		.num  = NETMAP_BUF_MAX_NUM,
364 	},
365 };
366 
367 static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
368 	[NETMAP_IF_POOL] = {
369 		.size = 1024,
370 		.num  = 2,
371 	},
372 	[NETMAP_RING_POOL] = {
373 		.size = 5*PAGE_SIZE,
374 		.num  = 4,
375 	},
376 	[NETMAP_BUF_POOL] = {
377 		.size = 2048,
378 		.num  = 4098,
379 	},
380 };
381 
382 
383 /*
384  * nm_mem is the memory allocator used for all physical interfaces
385  * running in netmap mode.
386  * Virtual (VALE) ports will have each its own allocator.
387  */
388 extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
389 struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
390 	.pools = {
391 		[NETMAP_IF_POOL] = {
392 			.name 	= "netmap_if",
393 			.objminsize = sizeof(struct netmap_if),
394 			.objmaxsize = 4096,
395 			.nummin     = 10,	/* don't be stingy */
396 			.nummax	    = 10000,	/* XXX very large */
397 		},
398 		[NETMAP_RING_POOL] = {
399 			.name 	= "netmap_ring",
400 			.objminsize = sizeof(struct netmap_ring),
401 			.objmaxsize = 32*PAGE_SIZE,
402 			.nummin     = 2,
403 			.nummax	    = 1024,
404 		},
405 		[NETMAP_BUF_POOL] = {
406 			.name	= "netmap_buf",
407 			.objminsize = 64,
408 			.objmaxsize = 65536,
409 			.nummin     = 4,
410 			.nummax	    = 1000000, /* one million! */
411 		},
412 	},
413 
414 	.nm_id = 1,
415 	.nm_grp = -1,
416 
417 	.prev = &nm_mem,
418 	.next = &nm_mem,
419 
420 	.ops = &netmap_mem_global_ops
421 };
422 
423 
424 static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
425 
426 /* blueprint for the private memory allocators */
427 extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
428 /* XXX clang is not happy about using name as a print format */
429 static const struct netmap_mem_d nm_blueprint = {
430 	.pools = {
431 		[NETMAP_IF_POOL] = {
432 			.name 	= "%s_if",
433 			.objminsize = sizeof(struct netmap_if),
434 			.objmaxsize = 4096,
435 			.nummin     = 1,
436 			.nummax	    = 100,
437 		},
438 		[NETMAP_RING_POOL] = {
439 			.name 	= "%s_ring",
440 			.objminsize = sizeof(struct netmap_ring),
441 			.objmaxsize = 32*PAGE_SIZE,
442 			.nummin     = 2,
443 			.nummax	    = 1024,
444 		},
445 		[NETMAP_BUF_POOL] = {
446 			.name	= "%s_buf",
447 			.objminsize = 64,
448 			.objmaxsize = 65536,
449 			.nummin     = 4,
450 			.nummax	    = 1000000, /* one million! */
451 		},
452 	},
453 
454 	.flags = NETMAP_MEM_PRIVATE,
455 
456 	.ops = &netmap_mem_private_ops
457 };
458 
459 /* memory allocator related sysctls */
460 
461 #define STRINGIFY(x) #x
462 
463 
464 #define DECLARE_SYSCTLS(id, name) \
465 	SYSBEGIN(mem2_ ## name); \
466 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
467 	    CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
468 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
469 	    CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
470 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
471 	    CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
472 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
473 	    CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
474 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
475 	    CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
476 	    "Default size of private netmap " STRINGIFY(name) "s"); \
477 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
478 	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
479 	    "Default number of private netmap " STRINGIFY(name) "s");	\
480 	SYSEND
481 
482 SYSCTL_DECL(_dev_netmap);
483 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
484 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
485 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
486 
487 /* call with NMA_LOCK(&nm_mem) held */
488 static int
489 nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
490 {
491 	nm_memid_t id;
492 	struct netmap_mem_d *scan = netmap_last_mem_d;
493 	int error = ENOMEM;
494 
495 	do {
496 		/* we rely on unsigned wrap around */
497 		id = scan->nm_id + 1;
498 		if (id == 0) /* reserve 0 as error value */
499 			id = 1;
500 		scan = scan->next;
501 		if (id != scan->nm_id) {
502 			nmd->nm_id = id;
503 			nmd->prev = scan->prev;
504 			nmd->next = scan;
505 			scan->prev->next = nmd;
506 			scan->prev = nmd;
507 			netmap_last_mem_d = nmd;
508 			error = 0;
509 			break;
510 		}
511 	} while (scan != netmap_last_mem_d);
512 
513 	return error;
514 }
515 
516 /* call with NMA_LOCK(&nm_mem) *not* held */
517 static int
518 nm_mem_assign_id(struct netmap_mem_d *nmd)
519 {
520         int ret;
521 
522 	NMA_LOCK(&nm_mem);
523         ret = nm_mem_assign_id_locked(nmd);
524 	NMA_UNLOCK(&nm_mem);
525 
526 	return ret;
527 }
528 
529 static void
530 nm_mem_release_id(struct netmap_mem_d *nmd)
531 {
532 	NMA_LOCK(&nm_mem);
533 
534 	nmd->prev->next = nmd->next;
535 	nmd->next->prev = nmd->prev;
536 
537 	if (netmap_last_mem_d == nmd)
538 		netmap_last_mem_d = nmd->prev;
539 
540 	nmd->prev = nmd->next = NULL;
541 
542 	NMA_UNLOCK(&nm_mem);
543 }
544 
545 static int
546 nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
547 {
548 	int err = 0, id;
549 	id = nm_iommu_group_id(dev);
550 	if (netmap_verbose)
551 		D("iommu_group %d", id);
552 
553 	NMA_LOCK(nmd);
554 
555 	if (nmd->nm_grp < 0)
556 		nmd->nm_grp = id;
557 
558 	if (nmd->nm_grp != id)
559 		nmd->lasterr = err = ENOMEM;
560 
561 	NMA_UNLOCK(nmd);
562 	return err;
563 }
564 
565 /*
566  * First, find the allocator that contains the requested offset,
567  * then locate the cluster through a lookup table.
568  */
569 static vm_paddr_t
570 netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
571 {
572 	int i;
573 	vm_ooffset_t o = offset;
574 	vm_paddr_t pa;
575 	struct netmap_obj_pool *p;
576 
577 	NMA_LOCK(nmd);
578 	p = nmd->pools;
579 
580 	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
581 		if (offset >= p[i].memtotal)
582 			continue;
583 		// now lookup the cluster's address
584 #ifndef _WIN32
585 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
586 			offset % p[i]._objsize;
587 #else
588 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
589 		pa.QuadPart += offset % p[i]._objsize;
590 #endif
591 		NMA_UNLOCK(nmd);
592 		return pa;
593 	}
594 	/* this is only in case of errors */
595 	D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
596 		p[NETMAP_IF_POOL].memtotal,
597 		p[NETMAP_IF_POOL].memtotal
598 			+ p[NETMAP_RING_POOL].memtotal,
599 		p[NETMAP_IF_POOL].memtotal
600 			+ p[NETMAP_RING_POOL].memtotal
601 			+ p[NETMAP_BUF_POOL].memtotal);
602 	NMA_UNLOCK(nmd);
603 #ifndef _WIN32
604 	return 0;	// XXX bad address
605 #else
606 	vm_paddr_t res;
607 	res.QuadPart = 0;
608 	return res;
609 #endif
610 }
611 
612 #ifdef _WIN32
613 
614 /*
615  * win32_build_virtual_memory_for_userspace
616  *
617  * This function get all the object making part of the pools and maps
618  * a contiguous virtual memory space for the userspace
619  * It works this way
620  * 1 - allocate a Memory Descriptor List wide as the sum
621  *		of the memory needed for the pools
622  * 2 - cycle all the objects in every pool and for every object do
623  *
624  *		2a - cycle all the objects in every pool, get the list
625  *				of the physical address descriptors
626  *		2b - calculate the offset in the array of pages desciptor in the
627  *				main MDL
628  *		2c - copy the descriptors of the object in the main MDL
629  *
630  * 3 - return the resulting MDL that needs to be mapped in userland
631  *
632  * In this way we will have an MDL that describes all the memory for the
633  * objects in a single object
634 */
635 
636 PMDL
637 win32_build_user_vm_map(struct netmap_mem_d* nmd)
638 {
639 	int i, j;
640 	u_int memsize, memflags, ofs = 0;
641 	PMDL mainMdl, tempMdl;
642 
643 	if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
644 		D("memory not finalised yet");
645 		return NULL;
646 	}
647 
648 	mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
649 	if (mainMdl == NULL) {
650 		D("failed to allocate mdl");
651 		return NULL;
652 	}
653 
654 	NMA_LOCK(nmd);
655 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
656 		struct netmap_obj_pool *p = &nmd->pools[i];
657 		int clsz = p->_clustsize;
658 		int clobjs = p->_clustentries; /* objects per cluster */
659 		int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz);
660 		PPFN_NUMBER pSrc, pDst;
661 
662 		/* each pool has a different cluster size so we need to reallocate */
663 		tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
664 		if (tempMdl == NULL) {
665 			NMA_UNLOCK(nmd);
666 			D("fail to allocate tempMdl");
667 			IoFreeMdl(mainMdl);
668 			return NULL;
669 		}
670 		pSrc = MmGetMdlPfnArray(tempMdl);
671 		/* create one entry per cluster, the lut[] has one entry per object */
672 		for (j = 0; j < p->numclusters; j++, ofs += clsz) {
673 			pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)];
674 			MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz);
675 			MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */
676 			RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */
677 			mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */
678 		}
679 		IoFreeMdl(tempMdl);
680 	}
681 	NMA_UNLOCK(nmd);
682 	return mainMdl;
683 }
684 
685 #endif /* _WIN32 */
686 
687 /*
688  * helper function for OS-specific mmap routines (currently only windows).
689  * Given an nmd and a pool index, returns the cluster size and number of clusters.
690  * Returns 0 if memory is finalised and the pool is valid, otherwise 1.
691  * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change.
692  */
693 
694 int
695 netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters)
696 {
697 	if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR)
698 		return 1; /* invalid arguments */
699 	// NMA_LOCK_ASSERT(nmd);
700 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
701 		*clustsize = *numclusters = 0;
702 		return 1; /* not ready yet */
703 	}
704 	*clustsize = nmd->pools[pool]._clustsize;
705 	*numclusters = nmd->pools[pool].numclusters;
706 	return 0; /* success */
707 }
708 
709 static int
710 netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
711 	nm_memid_t *id)
712 {
713 	int error = 0;
714 	NMA_LOCK(nmd);
715 	error = netmap_mem_config(nmd);
716 	if (error)
717 		goto out;
718 	if (size) {
719 		if (nmd->flags & NETMAP_MEM_FINALIZED) {
720 			*size = nmd->nm_totalsize;
721 		} else {
722 			int i;
723 			*size = 0;
724 			for (i = 0; i < NETMAP_POOLS_NR; i++) {
725 				struct netmap_obj_pool *p = nmd->pools + i;
726 				*size += (p->_numclusters * p->_clustsize);
727 			}
728 		}
729 	}
730 	if (memflags)
731 		*memflags = nmd->flags;
732 	if (id)
733 		*id = nmd->nm_id;
734 out:
735 	NMA_UNLOCK(nmd);
736 	return error;
737 }
738 
739 /*
740  * we store objects by kernel address, need to find the offset
741  * within the pool to export the value to userspace.
742  * Algorithm: scan until we find the cluster, then add the
743  * actual offset in the cluster
744  */
745 static ssize_t
746 netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
747 {
748 	int i, k = p->_clustentries, n = p->objtotal;
749 	ssize_t ofs = 0;
750 
751 	for (i = 0; i < n; i += k, ofs += p->_clustsize) {
752 		const char *base = p->lut[i].vaddr;
753 		ssize_t relofs = (const char *) vaddr - base;
754 
755 		if (relofs < 0 || relofs >= p->_clustsize)
756 			continue;
757 
758 		ofs = ofs + relofs;
759 		ND("%s: return offset %d (cluster %d) for pointer %p",
760 		    p->name, ofs, i, vaddr);
761 		return ofs;
762 	}
763 	D("address %p is not contained inside any cluster (%s)",
764 	    vaddr, p->name);
765 	return 0; /* An error occurred */
766 }
767 
768 /* Helper functions which convert virtual addresses to offsets */
769 #define netmap_if_offset(n, v)					\
770 	netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v))
771 
772 #define netmap_ring_offset(n, v)				\
773     ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
774 	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
775 
776 static ssize_t
777 netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
778 {
779 	ssize_t v;
780 	NMA_LOCK(nmd);
781 	v = netmap_if_offset(nmd, addr);
782 	NMA_UNLOCK(nmd);
783 	return v;
784 }
785 
786 /*
787  * report the index, and use start position as a hint,
788  * otherwise buffer allocation becomes terribly expensive.
789  */
790 static void *
791 netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
792 {
793 	uint32_t i = 0;			/* index in the bitmap */
794 	uint32_t mask, j = 0;		/* slot counter */
795 	void *vaddr = NULL;
796 
797 	if (len > p->_objsize) {
798 		D("%s request size %d too large", p->name, len);
799 		// XXX cannot reduce the size
800 		return NULL;
801 	}
802 
803 	if (p->objfree == 0) {
804 		D("no more %s objects", p->name);
805 		return NULL;
806 	}
807 	if (start)
808 		i = *start;
809 
810 	/* termination is guaranteed by p->free, but better check bounds on i */
811 	while (vaddr == NULL && i < p->bitmap_slots)  {
812 		uint32_t cur = p->bitmap[i];
813 		if (cur == 0) { /* bitmask is fully used */
814 			i++;
815 			continue;
816 		}
817 		/* locate a slot */
818 		for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1)
819 			;
820 
821 		p->bitmap[i] &= ~mask; /* mark object as in use */
822 		p->objfree--;
823 
824 		vaddr = p->lut[i * 32 + j].vaddr;
825 		if (index)
826 			*index = i * 32 + j;
827 	}
828 	ND("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr);
829 
830 	if (start)
831 		*start = i;
832 	return vaddr;
833 }
834 
835 
836 /*
837  * free by index, not by address.
838  * XXX should we also cleanup the content ?
839  */
840 static int
841 netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
842 {
843 	uint32_t *ptr, mask;
844 
845 	if (j >= p->objtotal) {
846 		D("invalid index %u, max %u", j, p->objtotal);
847 		return 1;
848 	}
849 	ptr = &p->bitmap[j / 32];
850 	mask = (1 << (j % 32));
851 	if (*ptr & mask) {
852 		D("ouch, double free on buffer %d", j);
853 		return 1;
854 	} else {
855 		*ptr |= mask;
856 		p->objfree++;
857 		return 0;
858 	}
859 }
860 
861 /*
862  * free by address. This is slow but is only used for a few
863  * objects (rings, nifp)
864  */
865 static void
866 netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
867 {
868 	u_int i, j, n = p->numclusters;
869 
870 	for (i = 0, j = 0; i < n; i++, j += p->_clustentries) {
871 		void *base = p->lut[i * p->_clustentries].vaddr;
872 		ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
873 
874 		/* Given address, is out of the scope of the current cluster.*/
875 		if (vaddr < base || relofs >= p->_clustsize)
876 			continue;
877 
878 		j = j + relofs / p->_objsize;
879 		/* KASSERT(j != 0, ("Cannot free object 0")); */
880 		netmap_obj_free(p, j);
881 		return;
882 	}
883 	D("address %p is not contained inside any cluster (%s)",
884 	    vaddr, p->name);
885 }
886 
887 #define netmap_mem_bufsize(n)	\
888 	((n)->pools[NETMAP_BUF_POOL]._objsize)
889 
890 #define netmap_if_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
891 #define netmap_if_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
892 #define netmap_ring_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
893 #define netmap_ring_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
894 #define netmap_buf_malloc(n, _pos, _index)			\
895 	netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
896 
897 
898 #if 0 // XXX unused
899 /* Return the index associated to the given packet buffer */
900 #define netmap_buf_index(n, v)						\
901     (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
902 #endif
903 
904 /*
905  * allocate extra buffers in a linked list.
906  * returns the actual number.
907  */
908 uint32_t
909 netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
910 {
911 	struct netmap_mem_d *nmd = na->nm_mem;
912 	uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
913 
914 	NMA_LOCK(nmd);
915 
916 	*head = 0;	/* default, 'null' index ie empty list */
917 	for (i = 0 ; i < n; i++) {
918 		uint32_t cur = *head;	/* save current head */
919 		uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
920 		if (p == NULL) {
921 			D("no more buffers after %d of %d", i, n);
922 			*head = cur; /* restore */
923 			break;
924 		}
925 		ND(5, "allocate buffer %d -> %d", *head, cur);
926 		*p = cur; /* link to previous head */
927 	}
928 
929 	NMA_UNLOCK(nmd);
930 
931 	return i;
932 }
933 
934 static void
935 netmap_extra_free(struct netmap_adapter *na, uint32_t head)
936 {
937         struct lut_entry *lut = na->na_lut.lut;
938 	struct netmap_mem_d *nmd = na->nm_mem;
939 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
940 	uint32_t i, cur, *buf;
941 
942 	ND("freeing the extra list");
943 	for (i = 0; head >=2 && head < p->objtotal; i++) {
944 		cur = head;
945 		buf = lut[head].vaddr;
946 		head = *buf;
947 		*buf = 0;
948 		if (netmap_obj_free(p, cur))
949 			break;
950 	}
951 	if (head != 0)
952 		D("breaking with head %d", head);
953 	if (netmap_verbose)
954 		D("freed %d buffers", i);
955 }
956 
957 
958 /* Return nonzero on error */
959 static int
960 netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
961 {
962 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
963 	u_int i = 0;	/* slot counter */
964 	uint32_t pos = 0;	/* slot in p->bitmap */
965 	uint32_t index = 0;	/* buffer index */
966 
967 	for (i = 0; i < n; i++) {
968 		void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
969 		if (vaddr == NULL) {
970 			D("no more buffers after %d of %d", i, n);
971 			goto cleanup;
972 		}
973 		slot[i].buf_idx = index;
974 		slot[i].len = p->_objsize;
975 		slot[i].flags = 0;
976 	}
977 
978 	ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
979 	return (0);
980 
981 cleanup:
982 	while (i > 0) {
983 		i--;
984 		netmap_obj_free(p, slot[i].buf_idx);
985 	}
986 	bzero(slot, n * sizeof(slot[0]));
987 	return (ENOMEM);
988 }
989 
990 static void
991 netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
992 {
993 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
994 	u_int i;
995 
996 	for (i = 0; i < n; i++) {
997 		slot[i].buf_idx = index;
998 		slot[i].len = p->_objsize;
999 		slot[i].flags = 0;
1000 	}
1001 }
1002 
1003 
1004 static void
1005 netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
1006 {
1007 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1008 
1009 	if (i < 2 || i >= p->objtotal) {
1010 		D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
1011 		return;
1012 	}
1013 	netmap_obj_free(p, i);
1014 }
1015 
1016 
1017 static void
1018 netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
1019 {
1020 	u_int i;
1021 
1022 	for (i = 0; i < n; i++) {
1023 		if (slot[i].buf_idx > 2)
1024 			netmap_free_buf(nmd, slot[i].buf_idx);
1025 	}
1026 }
1027 
1028 static void
1029 netmap_reset_obj_allocator(struct netmap_obj_pool *p)
1030 {
1031 
1032 	if (p == NULL)
1033 		return;
1034 	if (p->bitmap)
1035 		free(p->bitmap, M_NETMAP);
1036 	p->bitmap = NULL;
1037 	if (p->lut) {
1038 		u_int i;
1039 
1040 		/*
1041 		 * Free each cluster allocated in
1042 		 * netmap_finalize_obj_allocator().  The cluster start
1043 		 * addresses are stored at multiples of p->_clusterentries
1044 		 * in the lut.
1045 		 */
1046 		for (i = 0; i < p->objtotal; i += p->_clustentries) {
1047 			if (p->lut[i].vaddr)
1048 				contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
1049 		}
1050 		bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
1051 #ifdef linux
1052 		vfree(p->lut);
1053 #else
1054 		free(p->lut, M_NETMAP);
1055 #endif
1056 	}
1057 	p->lut = NULL;
1058 	p->objtotal = 0;
1059 	p->memtotal = 0;
1060 	p->numclusters = 0;
1061 	p->objfree = 0;
1062 }
1063 
1064 /*
1065  * Free all resources related to an allocator.
1066  */
1067 static void
1068 netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
1069 {
1070 	if (p == NULL)
1071 		return;
1072 	netmap_reset_obj_allocator(p);
1073 }
1074 
1075 /*
1076  * We receive a request for objtotal objects, of size objsize each.
1077  * Internally we may round up both numbers, as we allocate objects
1078  * in small clusters multiple of the page size.
1079  * We need to keep track of objtotal and clustentries,
1080  * as they are needed when freeing memory.
1081  *
1082  * XXX note -- userspace needs the buffers to be contiguous,
1083  *	so we cannot afford gaps at the end of a cluster.
1084  */
1085 
1086 
1087 /* call with NMA_LOCK held */
1088 static int
1089 netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
1090 {
1091 	int i;
1092 	u_int clustsize;	/* the cluster size, multiple of page size */
1093 	u_int clustentries;	/* how many objects per entry */
1094 
1095 	/* we store the current request, so we can
1096 	 * detect configuration changes later */
1097 	p->r_objtotal = objtotal;
1098 	p->r_objsize = objsize;
1099 
1100 #define MAX_CLUSTSIZE	(1<<22)		// 4 MB
1101 #define LINE_ROUND	NM_CACHE_ALIGN	// 64
1102 	if (objsize >= MAX_CLUSTSIZE) {
1103 		/* we could do it but there is no point */
1104 		D("unsupported allocation for %d bytes", objsize);
1105 		return EINVAL;
1106 	}
1107 	/* make sure objsize is a multiple of LINE_ROUND */
1108 	i = (objsize & (LINE_ROUND - 1));
1109 	if (i) {
1110 		D("XXX aligning object by %d bytes", LINE_ROUND - i);
1111 		objsize += LINE_ROUND - i;
1112 	}
1113 	if (objsize < p->objminsize || objsize > p->objmaxsize) {
1114 		D("requested objsize %d out of range [%d, %d]",
1115 			objsize, p->objminsize, p->objmaxsize);
1116 		return EINVAL;
1117 	}
1118 	if (objtotal < p->nummin || objtotal > p->nummax) {
1119 		D("requested objtotal %d out of range [%d, %d]",
1120 			objtotal, p->nummin, p->nummax);
1121 		return EINVAL;
1122 	}
1123 	/*
1124 	 * Compute number of objects using a brute-force approach:
1125 	 * given a max cluster size,
1126 	 * we try to fill it with objects keeping track of the
1127 	 * wasted space to the next page boundary.
1128 	 */
1129 	for (clustentries = 0, i = 1;; i++) {
1130 		u_int delta, used = i * objsize;
1131 		if (used > MAX_CLUSTSIZE)
1132 			break;
1133 		delta = used % PAGE_SIZE;
1134 		if (delta == 0) { // exact solution
1135 			clustentries = i;
1136 			break;
1137 		}
1138 	}
1139 	/* exact solution not found */
1140 	if (clustentries == 0) {
1141 		D("unsupported allocation for %d bytes", objsize);
1142 		return EINVAL;
1143 	}
1144 	/* compute clustsize */
1145 	clustsize = clustentries * objsize;
1146 	if (netmap_verbose)
1147 		D("objsize %d clustsize %d objects %d",
1148 			objsize, clustsize, clustentries);
1149 
1150 	/*
1151 	 * The number of clusters is n = ceil(objtotal/clustentries)
1152 	 * objtotal' = n * clustentries
1153 	 */
1154 	p->_clustentries = clustentries;
1155 	p->_clustsize = clustsize;
1156 	p->_numclusters = (objtotal + clustentries - 1) / clustentries;
1157 
1158 	/* actual values (may be larger than requested) */
1159 	p->_objsize = objsize;
1160 	p->_objtotal = p->_numclusters * clustentries;
1161 
1162 	return 0;
1163 }
1164 
1165 static struct lut_entry *
1166 nm_alloc_lut(u_int nobj)
1167 {
1168 	size_t n = sizeof(struct lut_entry) * nobj;
1169 	struct lut_entry *lut;
1170 #ifdef linux
1171 	lut = vmalloc(n);
1172 #else
1173 	lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
1174 #endif
1175 	return lut;
1176 }
1177 
1178 /* call with NMA_LOCK held */
1179 static int
1180 netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
1181 {
1182 	int i; /* must be signed */
1183 	size_t n;
1184 
1185 	/* optimistically assume we have enough memory */
1186 	p->numclusters = p->_numclusters;
1187 	p->objtotal = p->_objtotal;
1188 
1189 	p->lut = nm_alloc_lut(p->objtotal);
1190 	if (p->lut == NULL) {
1191 		D("Unable to create lookup table for '%s'", p->name);
1192 		goto clean;
1193 	}
1194 
1195 	/* Allocate the bitmap */
1196 	n = (p->objtotal + 31) / 32;
1197 	p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO);
1198 	if (p->bitmap == NULL) {
1199 		D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
1200 		    p->name);
1201 		goto clean;
1202 	}
1203 	p->bitmap_slots = n;
1204 
1205 	/*
1206 	 * Allocate clusters, init pointers and bitmap
1207 	 */
1208 
1209 	n = p->_clustsize;
1210 	for (i = 0; i < (int)p->objtotal;) {
1211 		int lim = i + p->_clustentries;
1212 		char *clust;
1213 
1214 		/*
1215 		 * XXX Note, we only need contigmalloc() for buffers attached
1216 		 * to native interfaces. In all other cases (nifp, netmap rings
1217 		 * and even buffers for VALE ports or emulated interfaces) we
1218 		 * can live with standard malloc, because the hardware will not
1219 		 * access the pages directly.
1220 		 */
1221 		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
1222 		    (size_t)0, -1UL, PAGE_SIZE, 0);
1223 		if (clust == NULL) {
1224 			/*
1225 			 * If we get here, there is a severe memory shortage,
1226 			 * so halve the allocated memory to reclaim some.
1227 			 */
1228 			D("Unable to create cluster at %d for '%s' allocator",
1229 			    i, p->name);
1230 			if (i < 2) /* nothing to halve */
1231 				goto out;
1232 			lim = i / 2;
1233 			for (i--; i >= lim; i--) {
1234 				p->bitmap[ (i>>5) ] &=  ~( 1 << (i & 31) );
1235 				if (i % p->_clustentries == 0 && p->lut[i].vaddr)
1236 					contigfree(p->lut[i].vaddr,
1237 						n, M_NETMAP);
1238 				p->lut[i].vaddr = NULL;
1239 			}
1240 		out:
1241 			p->objtotal = i;
1242 			/* we may have stopped in the middle of a cluster */
1243 			p->numclusters = (i + p->_clustentries - 1) / p->_clustentries;
1244 			break;
1245 		}
1246 		/*
1247 		 * Set bitmap and lut state for all buffers in the current
1248 		 * cluster.
1249 		 *
1250 		 * [i, lim) is the set of buffer indexes that cover the
1251 		 * current cluster.
1252 		 *
1253 		 * 'clust' is really the address of the current buffer in
1254 		 * the current cluster as we index through it with a stride
1255 		 * of p->_objsize.
1256 		 */
1257 		for (; i < lim; i++, clust += p->_objsize) {
1258 			p->bitmap[ (i>>5) ] |=  ( 1 << (i & 31) );
1259 			p->lut[i].vaddr = clust;
1260 			p->lut[i].paddr = vtophys(clust);
1261 		}
1262 	}
1263 	p->objfree = p->objtotal;
1264 	p->memtotal = p->numclusters * p->_clustsize;
1265 	if (p->objfree == 0)
1266 		goto clean;
1267 	if (netmap_verbose)
1268 		D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
1269 		    p->numclusters, p->_clustsize >> 10,
1270 		    p->memtotal >> 10, p->name);
1271 
1272 	return 0;
1273 
1274 clean:
1275 	netmap_reset_obj_allocator(p);
1276 	return ENOMEM;
1277 }
1278 
1279 /* call with lock held */
1280 static int
1281 netmap_memory_config_changed(struct netmap_mem_d *nmd)
1282 {
1283 	int i;
1284 
1285 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1286 		if (nmd->pools[i].r_objsize != netmap_params[i].size ||
1287 		    nmd->pools[i].r_objtotal != netmap_params[i].num)
1288 		    return 1;
1289 	}
1290 	return 0;
1291 }
1292 
1293 static void
1294 netmap_mem_reset_all(struct netmap_mem_d *nmd)
1295 {
1296 	int i;
1297 
1298 	if (netmap_verbose)
1299 		D("resetting %p", nmd);
1300 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1301 		netmap_reset_obj_allocator(&nmd->pools[i]);
1302 	}
1303 	nmd->flags  &= ~NETMAP_MEM_FINALIZED;
1304 }
1305 
1306 static int
1307 netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
1308 {
1309 	int i, lim = p->_objtotal;
1310 
1311 	if (na->pdev == NULL)
1312 		return 0;
1313 
1314 #if defined(__FreeBSD__)
1315 	(void)i;
1316 	(void)lim;
1317 	D("unsupported on FreeBSD");
1318 
1319 #elif defined(_WIN32)
1320 	(void)i;
1321 	(void)lim;
1322 	D("unsupported on Windows");	//XXX_ale, really?
1323 #else /* linux */
1324 	for (i = 2; i < lim; i++) {
1325 		netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
1326 	}
1327 #endif /* linux */
1328 
1329 	return 0;
1330 }
1331 
1332 static int
1333 netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
1334 {
1335 #if defined(__FreeBSD__)
1336 	D("unsupported on FreeBSD");
1337 #elif defined(_WIN32)
1338 	D("unsupported on Windows");	//XXX_ale, really?
1339 #else /* linux */
1340 	int i, lim = p->_objtotal;
1341 
1342 	if (na->pdev == NULL)
1343 		return 0;
1344 
1345 	for (i = 2; i < lim; i++) {
1346 		netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
1347 				p->lut[i].vaddr);
1348 	}
1349 #endif /* linux */
1350 
1351 	return 0;
1352 }
1353 
1354 static int
1355 netmap_mem_finalize_all(struct netmap_mem_d *nmd)
1356 {
1357 	int i;
1358 	if (nmd->flags & NETMAP_MEM_FINALIZED)
1359 		return 0;
1360 	nmd->lasterr = 0;
1361 	nmd->nm_totalsize = 0;
1362 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1363 		nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
1364 		if (nmd->lasterr)
1365 			goto error;
1366 		nmd->nm_totalsize += nmd->pools[i].memtotal;
1367 	}
1368 	/* buffers 0 and 1 are reserved */
1369 	nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
1370 	nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
1371 	nmd->flags |= NETMAP_MEM_FINALIZED;
1372 
1373 	if (netmap_verbose)
1374 		D("interfaces %d KB, rings %d KB, buffers %d MB",
1375 		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
1376 		    nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
1377 		    nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
1378 
1379 	if (netmap_verbose)
1380 		D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
1381 
1382 
1383 	return 0;
1384 error:
1385 	netmap_mem_reset_all(nmd);
1386 	return nmd->lasterr;
1387 }
1388 
1389 
1390 
1391 static void
1392 netmap_mem_private_delete(struct netmap_mem_d *nmd)
1393 {
1394 	if (nmd == NULL)
1395 		return;
1396 	if (netmap_verbose)
1397 		D("deleting %p", nmd);
1398 	if (nmd->active > 0)
1399 		D("bug: deleting mem allocator with active=%d!", nmd->active);
1400 	nm_mem_release_id(nmd);
1401 	if (netmap_verbose)
1402 		D("done deleting %p", nmd);
1403 	NMA_LOCK_DESTROY(nmd);
1404 	free(nmd, M_DEVBUF);
1405 }
1406 
1407 static int
1408 netmap_mem_private_config(struct netmap_mem_d *nmd)
1409 {
1410 	/* nothing to do, we are configured on creation
1411  	 * and configuration never changes thereafter
1412  	 */
1413 	return 0;
1414 }
1415 
1416 static int
1417 netmap_mem_private_finalize(struct netmap_mem_d *nmd)
1418 {
1419 	int err;
1420 	err = netmap_mem_finalize_all(nmd);
1421 	if (!err)
1422 		nmd->active++;
1423 	return err;
1424 
1425 }
1426 
1427 static void
1428 netmap_mem_private_deref(struct netmap_mem_d *nmd)
1429 {
1430 	if (--nmd->active <= 0)
1431 		netmap_mem_reset_all(nmd);
1432 }
1433 
1434 
1435 /*
1436  * allocator for private memory
1437  */
1438 struct netmap_mem_d *
1439 netmap_mem_private_new(const char *name, u_int txr, u_int txd,
1440 	u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
1441 {
1442 	struct netmap_mem_d *d = NULL;
1443 	struct netmap_obj_params p[NETMAP_POOLS_NR];
1444 	int i, err;
1445 	u_int v, maxd;
1446 
1447 	d = malloc(sizeof(struct netmap_mem_d),
1448 		   M_DEVBUF, M_NOWAIT | M_ZERO);
1449 	if (d == NULL) {
1450 		err = ENOMEM;
1451 		goto error;
1452 	}
1453 
1454 	*d = nm_blueprint;
1455 
1456 	err = nm_mem_assign_id(d);
1457 	if (err)
1458 		goto error;
1459 
1460 	/* account for the fake host rings */
1461 	txr++;
1462 	rxr++;
1463 
1464 	/* copy the min values */
1465 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1466 		p[i] = netmap_min_priv_params[i];
1467 	}
1468 
1469 	/* possibly increase them to fit user request */
1470 	v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
1471 	if (p[NETMAP_IF_POOL].size < v)
1472 		p[NETMAP_IF_POOL].size = v;
1473 	v = 2 + 4 * npipes;
1474 	if (p[NETMAP_IF_POOL].num < v)
1475 		p[NETMAP_IF_POOL].num = v;
1476 	maxd = (txd > rxd) ? txd : rxd;
1477 	v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
1478 	if (p[NETMAP_RING_POOL].size < v)
1479 		p[NETMAP_RING_POOL].size = v;
1480 	/* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
1481          * and two rx rings (again, 1 normal and 1 fake host)
1482          */
1483 	v = txr + rxr + 8 * npipes;
1484 	if (p[NETMAP_RING_POOL].num < v)
1485 		p[NETMAP_RING_POOL].num = v;
1486 	/* for each pipe we only need the buffers for the 4 "real" rings.
1487          * On the other end, the pipe ring dimension may be different from
1488          * the parent port ring dimension. As a compromise, we allocate twice the
1489          * space actually needed if the pipe rings were the same size as the parent rings
1490          */
1491 	v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
1492 		/* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
1493 	if (p[NETMAP_BUF_POOL].num < v)
1494 		p[NETMAP_BUF_POOL].num = v;
1495 
1496 	if (netmap_verbose)
1497 		D("req if %d*%d ring %d*%d buf %d*%d",
1498 			p[NETMAP_IF_POOL].num,
1499 			p[NETMAP_IF_POOL].size,
1500 			p[NETMAP_RING_POOL].num,
1501 			p[NETMAP_RING_POOL].size,
1502 			p[NETMAP_BUF_POOL].num,
1503 			p[NETMAP_BUF_POOL].size);
1504 
1505 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1506 		snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
1507 				nm_blueprint.pools[i].name,
1508 				name);
1509 		err = netmap_config_obj_allocator(&d->pools[i],
1510 				p[i].num, p[i].size);
1511 		if (err)
1512 			goto error;
1513 	}
1514 
1515 	d->flags &= ~NETMAP_MEM_FINALIZED;
1516 
1517 	NMA_LOCK_INIT(d);
1518 
1519 	return d;
1520 error:
1521 	netmap_mem_private_delete(d);
1522 	if (perr)
1523 		*perr = err;
1524 	return NULL;
1525 }
1526 
1527 
1528 /* call with lock held */
1529 static int
1530 netmap_mem_global_config(struct netmap_mem_d *nmd)
1531 {
1532 	int i;
1533 
1534 	if (nmd->active)
1535 		/* already in use, we cannot change the configuration */
1536 		goto out;
1537 
1538 	if (!netmap_memory_config_changed(nmd))
1539 		goto out;
1540 
1541 	ND("reconfiguring");
1542 
1543 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1544 		/* reset previous allocation */
1545 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
1546 			netmap_reset_obj_allocator(&nmd->pools[i]);
1547 		}
1548 		nmd->flags &= ~NETMAP_MEM_FINALIZED;
1549 	}
1550 
1551 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1552 		nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
1553 				netmap_params[i].num, netmap_params[i].size);
1554 		if (nmd->lasterr)
1555 			goto out;
1556 	}
1557 
1558 out:
1559 
1560 	return nmd->lasterr;
1561 }
1562 
1563 static int
1564 netmap_mem_global_finalize(struct netmap_mem_d *nmd)
1565 {
1566 	int err;
1567 
1568 	/* update configuration if changed */
1569 	if (netmap_mem_global_config(nmd))
1570 		return nmd->lasterr;
1571 
1572 	nmd->active++;
1573 
1574 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1575 		/* may happen if config is not changed */
1576 		ND("nothing to do");
1577 		goto out;
1578 	}
1579 
1580 	if (netmap_mem_finalize_all(nmd))
1581 		goto out;
1582 
1583 	nmd->lasterr = 0;
1584 
1585 out:
1586 	if (nmd->lasterr)
1587 		nmd->active--;
1588 	err = nmd->lasterr;
1589 
1590 	return err;
1591 
1592 }
1593 
1594 static void
1595 netmap_mem_global_delete(struct netmap_mem_d *nmd)
1596 {
1597 	int i;
1598 
1599 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1600 	    netmap_destroy_obj_allocator(&nm_mem.pools[i]);
1601 	}
1602 
1603 	NMA_LOCK_DESTROY(&nm_mem);
1604 }
1605 
1606 int
1607 netmap_mem_init(void)
1608 {
1609 	NMA_LOCK_INIT(&nm_mem);
1610 	netmap_mem_get(&nm_mem);
1611 	return (0);
1612 }
1613 
1614 void
1615 netmap_mem_fini(void)
1616 {
1617 	netmap_mem_put(&nm_mem);
1618 }
1619 
1620 static void
1621 netmap_free_rings(struct netmap_adapter *na)
1622 {
1623 	enum txrx t;
1624 
1625 	for_rx_tx(t) {
1626 		u_int i;
1627 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
1628 			struct netmap_kring *kring = &NMR(na, t)[i];
1629 			struct netmap_ring *ring = kring->ring;
1630 
1631 			if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
1632 				ND("skipping ring %s (ring %p, users %d)",
1633 						kring->name, ring, kring->users);
1634 				continue;
1635 			}
1636 			if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
1637 				netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
1638 			netmap_ring_free(na->nm_mem, ring);
1639 			kring->ring = NULL;
1640 		}
1641 	}
1642 }
1643 
1644 /* call with NMA_LOCK held *
1645  *
1646  * Allocate netmap rings and buffers for this card
1647  * The rings are contiguous, but have variable size.
1648  * The kring array must follow the layout described
1649  * in netmap_krings_create().
1650  */
1651 static int
1652 netmap_mem2_rings_create(struct netmap_adapter *na)
1653 {
1654 	enum txrx t;
1655 
1656 	NMA_LOCK(na->nm_mem);
1657 
1658 	for_rx_tx(t) {
1659 		u_int i;
1660 
1661 		for (i = 0; i <= nma_get_nrings(na, t); i++) {
1662 			struct netmap_kring *kring = &NMR(na, t)[i];
1663 			struct netmap_ring *ring = kring->ring;
1664 			u_int len, ndesc;
1665 
1666 			if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
1667 				/* uneeded, or already created by somebody else */
1668 				ND("skipping ring %s", kring->name);
1669 				continue;
1670 			}
1671 			ndesc = kring->nkr_num_slots;
1672 			len = sizeof(struct netmap_ring) +
1673 				  ndesc * sizeof(struct netmap_slot);
1674 			ring = netmap_ring_malloc(na->nm_mem, len);
1675 			if (ring == NULL) {
1676 				D("Cannot allocate %s_ring", nm_txrx2str(t));
1677 				goto cleanup;
1678 			}
1679 			ND("txring at %p", ring);
1680 			kring->ring = ring;
1681 			*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
1682 			*(int64_t *)(uintptr_t)&ring->buf_ofs =
1683 			    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
1684 				na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
1685 				netmap_ring_offset(na->nm_mem, ring);
1686 
1687 			/* copy values from kring */
1688 			ring->head = kring->rhead;
1689 			ring->cur = kring->rcur;
1690 			ring->tail = kring->rtail;
1691 			*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
1692 				netmap_mem_bufsize(na->nm_mem);
1693 			ND("%s h %d c %d t %d", kring->name,
1694 				ring->head, ring->cur, ring->tail);
1695 			ND("initializing slots for %s_ring", nm_txrx2str(txrx));
1696 			if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
1697 				/* this is a real ring */
1698 				if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
1699 					D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
1700 					goto cleanup;
1701 				}
1702 			} else {
1703 				/* this is a fake ring, set all indices to 0 */
1704 				netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
1705 			}
1706 		        /* ring info */
1707 		        *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
1708 		        *(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
1709 		}
1710 	}
1711 
1712 	NMA_UNLOCK(na->nm_mem);
1713 
1714 	return 0;
1715 
1716 cleanup:
1717 	netmap_free_rings(na);
1718 
1719 	NMA_UNLOCK(na->nm_mem);
1720 
1721 	return ENOMEM;
1722 }
1723 
1724 static void
1725 netmap_mem2_rings_delete(struct netmap_adapter *na)
1726 {
1727 	/* last instance, release bufs and rings */
1728 	NMA_LOCK(na->nm_mem);
1729 
1730 	netmap_free_rings(na);
1731 
1732 	NMA_UNLOCK(na->nm_mem);
1733 }
1734 
1735 
1736 /* call with NMA_LOCK held */
1737 /*
1738  * Allocate the per-fd structure netmap_if.
1739  *
1740  * We assume that the configuration stored in na
1741  * (number of tx/rx rings and descs) does not change while
1742  * the interface is in netmap mode.
1743  */
1744 static struct netmap_if *
1745 netmap_mem2_if_new(struct netmap_adapter *na)
1746 {
1747 	struct netmap_if *nifp;
1748 	ssize_t base; /* handy for relative offsets between rings and nifp */
1749 	u_int i, len, n[NR_TXRX], ntot;
1750 	enum txrx t;
1751 
1752 	ntot = 0;
1753 	for_rx_tx(t) {
1754 		/* account for the (eventually fake) host rings */
1755 		n[t] = nma_get_nrings(na, t) + 1;
1756 		ntot += n[t];
1757 	}
1758 	/*
1759 	 * the descriptor is followed inline by an array of offsets
1760 	 * to the tx and rx rings in the shared memory region.
1761 	 */
1762 
1763 	NMA_LOCK(na->nm_mem);
1764 
1765 	len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
1766 	nifp = netmap_if_malloc(na->nm_mem, len);
1767 	if (nifp == NULL) {
1768 		NMA_UNLOCK(na->nm_mem);
1769 		return NULL;
1770 	}
1771 
1772 	/* initialize base fields -- override const */
1773 	*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
1774 	*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
1775 	strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
1776 
1777 	/*
1778 	 * fill the slots for the rx and tx rings. They contain the offset
1779 	 * between the ring and nifp, so the information is usable in
1780 	 * userspace to reach the ring from the nifp.
1781 	 */
1782 	base = netmap_if_offset(na->nm_mem, nifp);
1783 	for (i = 0; i < n[NR_TX]; i++) {
1784 		if (na->tx_rings[i].ring == NULL) {
1785 			// XXX maybe use the offset of an error ring,
1786 			// like we do for buffers?
1787 			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = 0;
1788 			continue;
1789 		}
1790 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
1791 			netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
1792 	}
1793 	for (i = 0; i < n[NR_RX]; i++) {
1794 		if (na->rx_rings[i].ring == NULL) {
1795 			// XXX maybe use the offset of an error ring,
1796 			// like we do for buffers?
1797 			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = 0;
1798 			continue;
1799 		}
1800 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
1801 			netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
1802 	}
1803 
1804 	NMA_UNLOCK(na->nm_mem);
1805 
1806 	return (nifp);
1807 }
1808 
1809 static void
1810 netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
1811 {
1812 	if (nifp == NULL)
1813 		/* nothing to do */
1814 		return;
1815 	NMA_LOCK(na->nm_mem);
1816 	if (nifp->ni_bufs_head)
1817 		netmap_extra_free(na, nifp->ni_bufs_head);
1818 	netmap_if_free(na->nm_mem, nifp);
1819 
1820 	NMA_UNLOCK(na->nm_mem);
1821 }
1822 
1823 static void
1824 netmap_mem_global_deref(struct netmap_mem_d *nmd)
1825 {
1826 
1827 	nmd->active--;
1828 	if (!nmd->active)
1829 		nmd->nm_grp = -1;
1830 	if (netmap_verbose)
1831 		D("active = %d", nmd->active);
1832 
1833 }
1834 
1835 struct netmap_mem_ops netmap_mem_global_ops = {
1836 	.nmd_get_lut = netmap_mem2_get_lut,
1837 	.nmd_get_info = netmap_mem2_get_info,
1838 	.nmd_ofstophys = netmap_mem2_ofstophys,
1839 	.nmd_config = netmap_mem_global_config,
1840 	.nmd_finalize = netmap_mem_global_finalize,
1841 	.nmd_deref = netmap_mem_global_deref,
1842 	.nmd_delete = netmap_mem_global_delete,
1843 	.nmd_if_offset = netmap_mem2_if_offset,
1844 	.nmd_if_new = netmap_mem2_if_new,
1845 	.nmd_if_delete = netmap_mem2_if_delete,
1846 	.nmd_rings_create = netmap_mem2_rings_create,
1847 	.nmd_rings_delete = netmap_mem2_rings_delete
1848 };
1849 struct netmap_mem_ops netmap_mem_private_ops = {
1850 	.nmd_get_lut = netmap_mem2_get_lut,
1851 	.nmd_get_info = netmap_mem2_get_info,
1852 	.nmd_ofstophys = netmap_mem2_ofstophys,
1853 	.nmd_config = netmap_mem_private_config,
1854 	.nmd_finalize = netmap_mem_private_finalize,
1855 	.nmd_deref = netmap_mem_private_deref,
1856 	.nmd_if_offset = netmap_mem2_if_offset,
1857 	.nmd_delete = netmap_mem_private_delete,
1858 	.nmd_if_new = netmap_mem2_if_new,
1859 	.nmd_if_delete = netmap_mem2_if_delete,
1860 	.nmd_rings_create = netmap_mem2_rings_create,
1861 	.nmd_rings_delete = netmap_mem2_rings_delete
1862 };
1863 
1864 int
1865 netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
1866 {
1867 	uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
1868 	struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
1869 	struct netmap_mem_d *nmd = na->nm_mem;
1870 	struct netmap_pools_info pi;
1871 	unsigned int memsize;
1872 	uint16_t memid;
1873 	int ret;
1874 
1875 	if (!nmd) {
1876 		return -1;
1877 	}
1878 
1879 	ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid);
1880 	if (ret) {
1881 		return ret;
1882 	}
1883 
1884 	pi.memsize = memsize;
1885 	pi.memid = memid;
1886 	pi.if_pool_offset = 0;
1887 	pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
1888 	pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
1889 
1890 	pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal;
1891 	pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal;
1892 	pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize;
1893 
1894 	pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal +
1895 			     nmd->pools[NETMAP_RING_POOL].memtotal;
1896 	pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
1897 	pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
1898 
1899 	ret = copyout(&pi, upi, sizeof(pi));
1900 	if (ret) {
1901 		return ret;
1902 	}
1903 
1904 	return 0;
1905 }
1906 
1907 #ifdef WITH_PTNETMAP_GUEST
1908 struct mem_pt_if {
1909 	struct mem_pt_if *next;
1910 	struct ifnet *ifp;
1911 	unsigned int nifp_offset;
1912 };
1913 
1914 /* Netmap allocator for ptnetmap guests. */
1915 struct netmap_mem_ptg {
1916 	struct netmap_mem_d up;
1917 
1918 	vm_paddr_t nm_paddr;            /* physical address in the guest */
1919 	void *nm_addr;                  /* virtual address in the guest */
1920 	struct netmap_lut buf_lut;      /* lookup table for BUF pool in the guest */
1921 	nm_memid_t host_mem_id;         /* allocator identifier in the host */
1922 	struct ptnetmap_memdev *ptn_dev;/* ptnetmap memdev */
1923 	struct mem_pt_if *pt_ifs;	/* list of interfaces in passthrough */
1924 };
1925 
1926 /* Link a passthrough interface to a passthrough netmap allocator. */
1927 static int
1928 netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
1929 			    unsigned int nifp_offset)
1930 {
1931 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1932 	struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP,
1933 					M_NOWAIT | M_ZERO);
1934 
1935 	if (!ptif) {
1936 		return ENOMEM;
1937 	}
1938 
1939 	NMA_LOCK(nmd);
1940 
1941 	ptif->ifp = ifp;
1942 	ptif->nifp_offset = nifp_offset;
1943 
1944 	if (ptnmd->pt_ifs) {
1945 		ptif->next = ptnmd->pt_ifs;
1946 	}
1947 	ptnmd->pt_ifs = ptif;
1948 
1949 	NMA_UNLOCK(nmd);
1950 
1951 	D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
1952 
1953 	return 0;
1954 }
1955 
1956 /* Called with NMA_LOCK(nmd) held. */
1957 static struct mem_pt_if *
1958 netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp)
1959 {
1960 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1961 	struct mem_pt_if *curr;
1962 
1963 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
1964 		if (curr->ifp == ifp) {
1965 			return curr;
1966 		}
1967 	}
1968 
1969 	return NULL;
1970 }
1971 
1972 /* Unlink a passthrough interface from a passthrough netmap allocator. */
1973 int
1974 netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
1975 {
1976 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1977 	struct mem_pt_if *prev = NULL;
1978 	struct mem_pt_if *curr;
1979 	int ret = -1;
1980 
1981 	NMA_LOCK(nmd);
1982 
1983 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
1984 		if (curr->ifp == ifp) {
1985 			if (prev) {
1986 				prev->next = curr->next;
1987 			} else {
1988 				ptnmd->pt_ifs = curr->next;
1989 			}
1990 			D("removed (ifp=%p,nifp_offset=%u)",
1991 			  curr->ifp, curr->nifp_offset);
1992 			free(curr, M_NETMAP);
1993 			ret = 0;
1994 			break;
1995 		}
1996 		prev = curr;
1997 	}
1998 
1999 	NMA_UNLOCK(nmd);
2000 
2001 	return ret;
2002 }
2003 
2004 static int
2005 netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
2006 {
2007 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2008 
2009 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
2010 		return EINVAL;
2011 	}
2012 
2013 	*lut = ptnmd->buf_lut;
2014 	return 0;
2015 }
2016 
2017 static int
2018 netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size,
2019 			     u_int *memflags, uint16_t *id)
2020 {
2021 	int error = 0;
2022 
2023 	NMA_LOCK(nmd);
2024 
2025 	error = nmd->ops->nmd_config(nmd);
2026 	if (error)
2027 		goto out;
2028 
2029 	if (size)
2030 		*size = nmd->nm_totalsize;
2031 	if (memflags)
2032 		*memflags = nmd->flags;
2033 	if (id)
2034 		*id = nmd->nm_id;
2035 
2036 out:
2037 	NMA_UNLOCK(nmd);
2038 
2039 	return error;
2040 }
2041 
2042 static vm_paddr_t
2043 netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
2044 {
2045 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2046 	vm_paddr_t paddr;
2047 	/* if the offset is valid, just return csb->base_addr + off */
2048 	paddr = (vm_paddr_t)(ptnmd->nm_paddr + off);
2049 	ND("off %lx padr %lx", off, (unsigned long)paddr);
2050 	return paddr;
2051 }
2052 
2053 static int
2054 netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
2055 {
2056 	/* nothing to do, we are configured on creation
2057 	 * and configuration never changes thereafter
2058 	 */
2059 	return 0;
2060 }
2061 
2062 static int
2063 netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
2064 {
2065 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2066 	uint64_t mem_size;
2067 	uint32_t bufsize;
2068 	uint32_t nbuffers;
2069 	uint32_t poolofs;
2070 	vm_paddr_t paddr;
2071 	char *vaddr;
2072 	int i;
2073 	int error = 0;
2074 
2075 	nmd->active++;
2076 
2077 	if (nmd->flags & NETMAP_MEM_FINALIZED)
2078 		goto out;
2079 
2080 	if (ptnmd->ptn_dev == NULL) {
2081 		D("ptnetmap memdev not attached");
2082 		error = ENOMEM;
2083 		goto err;
2084 	}
2085 	/* Map memory through ptnetmap-memdev BAR. */
2086 	error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
2087 				      &ptnmd->nm_addr, &mem_size);
2088 	if (error)
2089 		goto err;
2090 
2091         /* Initialize the lut using the information contained in the
2092 	 * ptnetmap memory device. */
2093         bufsize = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2094 					 PTNET_MDEV_IO_BUF_POOL_OBJSZ);
2095         nbuffers = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2096 					 PTNET_MDEV_IO_BUF_POOL_OBJNUM);
2097 
2098 	/* allocate the lut */
2099 	if (ptnmd->buf_lut.lut == NULL) {
2100 		D("allocating lut");
2101 		ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers);
2102 		if (ptnmd->buf_lut.lut == NULL) {
2103 			D("lut allocation failed");
2104 			return ENOMEM;
2105 		}
2106 	}
2107 
2108 	/* we have physically contiguous memory mapped through PCI BAR */
2109 	poolofs = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2110 					 PTNET_MDEV_IO_BUF_POOL_OFS);
2111 	vaddr = (char *)(ptnmd->nm_addr) + poolofs;
2112 	paddr = ptnmd->nm_paddr + poolofs;
2113 
2114 	for (i = 0; i < nbuffers; i++) {
2115 		ptnmd->buf_lut.lut[i].vaddr = vaddr;
2116 		ptnmd->buf_lut.lut[i].paddr = paddr;
2117 		vaddr += bufsize;
2118 		paddr += bufsize;
2119 	}
2120 
2121 	ptnmd->buf_lut.objtotal = nbuffers;
2122 	ptnmd->buf_lut.objsize = bufsize;
2123 	nmd->nm_totalsize = (unsigned int)mem_size;
2124 
2125 	nmd->flags |= NETMAP_MEM_FINALIZED;
2126 out:
2127 	return 0;
2128 err:
2129 	nmd->active--;
2130 	return error;
2131 }
2132 
2133 static void
2134 netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
2135 {
2136 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2137 
2138 	nmd->active--;
2139 	if (nmd->active <= 0 &&
2140 		(nmd->flags & NETMAP_MEM_FINALIZED)) {
2141 	    nmd->flags  &= ~NETMAP_MEM_FINALIZED;
2142 	    /* unmap ptnetmap-memdev memory */
2143 	    if (ptnmd->ptn_dev) {
2144 		nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
2145 	    }
2146 	    ptnmd->nm_addr = 0;
2147 	    ptnmd->nm_paddr = 0;
2148 	}
2149 }
2150 
2151 static ssize_t
2152 netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr)
2153 {
2154 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2155 
2156 	return (const char *)(vaddr) - (char *)(ptnmd->nm_addr);
2157 }
2158 
2159 static void
2160 netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
2161 {
2162 	if (nmd == NULL)
2163 		return;
2164 	if (netmap_verbose)
2165 		D("deleting %p", nmd);
2166 	if (nmd->active > 0)
2167 		D("bug: deleting mem allocator with active=%d!", nmd->active);
2168 	nm_mem_release_id(nmd);
2169 	if (netmap_verbose)
2170 		D("done deleting %p", nmd);
2171 	NMA_LOCK_DESTROY(nmd);
2172 	free(nmd, M_DEVBUF);
2173 }
2174 
2175 static struct netmap_if *
2176 netmap_mem_pt_guest_if_new(struct netmap_adapter *na)
2177 {
2178 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2179 	struct mem_pt_if *ptif;
2180 	struct netmap_if *nifp = NULL;
2181 
2182 	NMA_LOCK(na->nm_mem);
2183 
2184 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2185 	if (ptif == NULL) {
2186 		D("Error: interface %p is not in passthrough", na->ifp);
2187 		goto out;
2188 	}
2189 
2190 	nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
2191 				    ptif->nifp_offset);
2192 	NMA_UNLOCK(na->nm_mem);
2193 out:
2194 	return nifp;
2195 }
2196 
2197 static void
2198 netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
2199 {
2200 	struct mem_pt_if *ptif;
2201 
2202 	NMA_LOCK(na->nm_mem);
2203 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2204 	if (ptif == NULL) {
2205 		D("Error: interface %p is not in passthrough", na->ifp);
2206 	}
2207 	NMA_UNLOCK(na->nm_mem);
2208 }
2209 
2210 static int
2211 netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
2212 {
2213 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2214 	struct mem_pt_if *ptif;
2215 	struct netmap_if *nifp;
2216 	int i, error = -1;
2217 
2218 	NMA_LOCK(na->nm_mem);
2219 
2220 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2221 	if (ptif == NULL) {
2222 		D("Error: interface %p is not in passthrough", na->ifp);
2223 		goto out;
2224 	}
2225 
2226 
2227 	/* point each kring to the corresponding backend ring */
2228 	nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
2229 	for (i = 0; i <= na->num_tx_rings; i++) {
2230 		struct netmap_kring *kring = na->tx_rings + i;
2231 		if (kring->ring)
2232 			continue;
2233 		kring->ring = (struct netmap_ring *)
2234 			((char *)nifp + nifp->ring_ofs[i]);
2235 	}
2236 	for (i = 0; i <= na->num_rx_rings; i++) {
2237 		struct netmap_kring *kring = na->rx_rings + i;
2238 		if (kring->ring)
2239 			continue;
2240 		kring->ring = (struct netmap_ring *)
2241 			((char *)nifp +
2242 			 nifp->ring_ofs[i + na->num_tx_rings + 1]);
2243 	}
2244 
2245 	error = 0;
2246 out:
2247 	NMA_UNLOCK(na->nm_mem);
2248 
2249 	return error;
2250 }
2251 
2252 static void
2253 netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
2254 {
2255 	/* TODO: remove?? */
2256 #if 0
2257 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2258 	struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem,
2259 								na->ifp);
2260 #endif
2261 }
2262 
2263 static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
2264 	.nmd_get_lut = netmap_mem_pt_guest_get_lut,
2265 	.nmd_get_info = netmap_mem_pt_guest_get_info,
2266 	.nmd_ofstophys = netmap_mem_pt_guest_ofstophys,
2267 	.nmd_config = netmap_mem_pt_guest_config,
2268 	.nmd_finalize = netmap_mem_pt_guest_finalize,
2269 	.nmd_deref = netmap_mem_pt_guest_deref,
2270 	.nmd_if_offset = netmap_mem_pt_guest_if_offset,
2271 	.nmd_delete = netmap_mem_pt_guest_delete,
2272 	.nmd_if_new = netmap_mem_pt_guest_if_new,
2273 	.nmd_if_delete = netmap_mem_pt_guest_if_delete,
2274 	.nmd_rings_create = netmap_mem_pt_guest_rings_create,
2275 	.nmd_rings_delete = netmap_mem_pt_guest_rings_delete
2276 };
2277 
2278 /* Called with NMA_LOCK(&nm_mem) held. */
2279 static struct netmap_mem_d *
2280 netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
2281 {
2282 	struct netmap_mem_d *mem = NULL;
2283 	struct netmap_mem_d *scan = netmap_last_mem_d;
2284 
2285 	do {
2286 		/* find ptnetmap allocator through host ID */
2287 		if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
2288 			((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) {
2289 			mem = scan;
2290 			break;
2291 		}
2292 		scan = scan->next;
2293 	} while (scan != netmap_last_mem_d);
2294 
2295 	return mem;
2296 }
2297 
2298 /* Called with NMA_LOCK(&nm_mem) held. */
2299 static struct netmap_mem_d *
2300 netmap_mem_pt_guest_create(nm_memid_t mem_id)
2301 {
2302 	struct netmap_mem_ptg *ptnmd;
2303 	int err = 0;
2304 
2305 	ptnmd = malloc(sizeof(struct netmap_mem_ptg),
2306 			M_DEVBUF, M_NOWAIT | M_ZERO);
2307 	if (ptnmd == NULL) {
2308 		err = ENOMEM;
2309 		goto error;
2310 	}
2311 
2312 	ptnmd->up.ops = &netmap_mem_pt_guest_ops;
2313 	ptnmd->host_mem_id = mem_id;
2314 	ptnmd->pt_ifs = NULL;
2315 
2316         /* Assign new id in the guest (We have the lock) */
2317 	err = nm_mem_assign_id_locked(&ptnmd->up);
2318 	if (err)
2319 		goto error;
2320 
2321 	ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED;
2322 	ptnmd->up.flags |= NETMAP_MEM_IO;
2323 
2324 	NMA_LOCK_INIT(&ptnmd->up);
2325 
2326 	return &ptnmd->up;
2327 error:
2328 	netmap_mem_pt_guest_delete(&ptnmd->up);
2329 	return NULL;
2330 }
2331 
2332 /*
2333  * find host id in guest allocators and create guest allocator
2334  * if it is not there
2335  */
2336 static struct netmap_mem_d *
2337 netmap_mem_pt_guest_get(nm_memid_t mem_id)
2338 {
2339 	struct netmap_mem_d *nmd;
2340 
2341 	NMA_LOCK(&nm_mem);
2342 	nmd = netmap_mem_pt_guest_find_memid(mem_id);
2343 	if (nmd == NULL) {
2344 		nmd = netmap_mem_pt_guest_create(mem_id);
2345 	}
2346 	NMA_UNLOCK(&nm_mem);
2347 
2348 	return nmd;
2349 }
2350 
2351 /*
2352  * The guest allocator can be created by ptnetmap_memdev (during the device
2353  * attach) or by ptnetmap device (ptnet), during the netmap_attach.
2354  *
2355  * The order is not important (we have different order in LINUX and FreeBSD).
2356  * The first one, creates the device, and the second one simply attaches it.
2357  */
2358 
2359 /* Called when ptnetmap_memdev is attaching, to attach a new allocator in
2360  * the guest */
2361 struct netmap_mem_d *
2362 netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t mem_id)
2363 {
2364 	struct netmap_mem_d *nmd;
2365 	struct netmap_mem_ptg *ptnmd;
2366 
2367 	nmd = netmap_mem_pt_guest_get(mem_id);
2368 
2369 	/* assign this device to the guest allocator */
2370 	if (nmd) {
2371 		ptnmd = (struct netmap_mem_ptg *)nmd;
2372 		ptnmd->ptn_dev = ptn_dev;
2373 	}
2374 
2375 	return nmd;
2376 }
2377 
2378 /* Called when ptnet device is attaching */
2379 struct netmap_mem_d *
2380 netmap_mem_pt_guest_new(struct ifnet *ifp,
2381 			unsigned int nifp_offset,
2382 			unsigned int memid)
2383 {
2384 	struct netmap_mem_d *nmd;
2385 
2386 	if (ifp == NULL) {
2387 		return NULL;
2388 	}
2389 
2390 	nmd = netmap_mem_pt_guest_get((nm_memid_t)memid);
2391 
2392 	if (nmd) {
2393 		netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset);
2394 	}
2395 
2396 	return nmd;
2397 }
2398 
2399 #endif /* WITH_PTNETMAP_GUEST */
2400