xref: /freebsd/sys/dev/netmap/netmap_mem2.c (revision 5bf5ca772c6de2d53344a78cf461447cc322ccea)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2012-2014 Matteo Landi
5  * Copyright (C) 2012-2016 Luigi Rizzo
6  * Copyright (C) 2012-2016 Giuseppe Lettieri
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *   1. Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *   2. Redistributions in binary form must reproduce the above copyright
15  *      notice, this list of conditions and the following disclaimer in the
16  *      documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #ifdef linux
32 #include "bsd_glue.h"
33 #endif /* linux */
34 
35 #ifdef __APPLE__
36 #include "osx_glue.h"
37 #endif /* __APPLE__ */
38 
39 #ifdef __FreeBSD__
40 #include <sys/cdefs.h> /* prerequisite */
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/types.h>
44 #include <sys/malloc.h>
45 #include <sys/kernel.h>		/* MALLOC_DEFINE */
46 #include <sys/proc.h>
47 #include <vm/vm.h>	/* vtophys */
48 #include <vm/pmap.h>	/* vtophys */
49 #include <sys/socket.h> /* sockaddrs */
50 #include <sys/selinfo.h>
51 #include <sys/sysctl.h>
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/vnet.h>
55 #include <machine/bus.h>	/* bus_dmamap_* */
56 
57 /* M_NETMAP only used in here */
58 MALLOC_DECLARE(M_NETMAP);
59 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
60 
61 #endif /* __FreeBSD__ */
62 
63 #ifdef _WIN32
64 #include <win_glue.h>
65 #endif
66 
67 #include <net/netmap.h>
68 #include <dev/netmap/netmap_kern.h>
69 #include <net/netmap_virt.h>
70 #include "netmap_mem2.h"
71 
72 #ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY
73 #define NETMAP_BUF_MAX_NUM  8*4096      /* if too big takes too much time to allocate */
74 #else
75 #define NETMAP_BUF_MAX_NUM 20*4096*2	/* large machine */
76 #endif
77 
78 #define NETMAP_POOL_MAX_NAMSZ	32
79 
80 
81 enum {
82 	NETMAP_IF_POOL   = 0,
83 	NETMAP_RING_POOL,
84 	NETMAP_BUF_POOL,
85 	NETMAP_POOLS_NR
86 };
87 
88 
89 struct netmap_obj_params {
90 	u_int size;
91 	u_int num;
92 
93 	u_int last_size;
94 	u_int last_num;
95 };
96 
97 struct netmap_obj_pool {
98 	char name[NETMAP_POOL_MAX_NAMSZ];	/* name of the allocator */
99 
100 	/* ---------------------------------------------------*/
101 	/* these are only meaningful if the pool is finalized */
102 	/* (see 'finalized' field in netmap_mem_d)            */
103 	u_int objtotal;         /* actual total number of objects. */
104 	u_int memtotal;		/* actual total memory space */
105 	u_int numclusters;	/* actual number of clusters */
106 
107 	u_int objfree;          /* number of free objects. */
108 
109 	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
110 	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
111 	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
112 	/* ---------------------------------------------------*/
113 
114 	/* limits */
115 	u_int objminsize;	/* minimum object size */
116 	u_int objmaxsize;	/* maximum object size */
117 	u_int nummin;		/* minimum number of objects */
118 	u_int nummax;		/* maximum number of objects */
119 
120 	/* these are changed only by config */
121 	u_int _objtotal;	/* total number of objects */
122 	u_int _objsize;		/* object size */
123 	u_int _clustsize;       /* cluster size */
124 	u_int _clustentries;    /* objects per cluster */
125 	u_int _numclusters;	/* number of clusters */
126 
127 	/* requested values */
128 	u_int r_objtotal;
129 	u_int r_objsize;
130 };
131 
132 #define NMA_LOCK_T		NM_MTX_T
133 
134 
135 struct netmap_mem_ops {
136 	int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
137 	int  (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
138 			u_int *memflags, uint16_t *id);
139 
140 	vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
141 	int (*nmd_config)(struct netmap_mem_d *);
142 	int (*nmd_finalize)(struct netmap_mem_d *);
143 	void (*nmd_deref)(struct netmap_mem_d *);
144 	ssize_t  (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
145 	void (*nmd_delete)(struct netmap_mem_d *);
146 
147 	struct netmap_if * (*nmd_if_new)(struct netmap_adapter *,
148 					 struct netmap_priv_d *);
149 	void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
150 	int  (*nmd_rings_create)(struct netmap_adapter *);
151 	void (*nmd_rings_delete)(struct netmap_adapter *);
152 };
153 
154 struct netmap_mem_d {
155 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
156 	u_int nm_totalsize; /* shorthand */
157 
158 	u_int flags;
159 #define NETMAP_MEM_FINALIZED	0x1	/* preallocation done */
160 #define NETMAP_MEM_HIDDEN	0x8	/* beeing prepared */
161 	int lasterr;		/* last error for curr config */
162 	int active;		/* active users */
163 	int refcount;
164 	/* the three allocators */
165 	struct netmap_obj_pool pools[NETMAP_POOLS_NR];
166 
167 	nm_memid_t nm_id;	/* allocator identifier */
168 	int nm_grp;	/* iommu groupd id */
169 
170 	/* list of all existing allocators, sorted by nm_id */
171 	struct netmap_mem_d *prev, *next;
172 
173 	struct netmap_mem_ops *ops;
174 
175 	struct netmap_obj_params params[NETMAP_POOLS_NR];
176 
177 #define NM_MEM_NAMESZ	16
178 	char name[NM_MEM_NAMESZ];
179 };
180 
181 /*
182  * XXX need to fix the case of t0 == void
183  */
184 #define NMD_DEFCB(t0, name) \
185 t0 \
186 netmap_mem_##name(struct netmap_mem_d *nmd) \
187 { \
188 	return nmd->ops->nmd_##name(nmd); \
189 }
190 
191 #define NMD_DEFCB1(t0, name, t1) \
192 t0 \
193 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
194 { \
195 	return nmd->ops->nmd_##name(nmd, a1); \
196 }
197 
198 #define NMD_DEFCB3(t0, name, t1, t2, t3) \
199 t0 \
200 netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
201 { \
202 	return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
203 }
204 
205 #define NMD_DEFNACB(t0, name) \
206 t0 \
207 netmap_mem_##name(struct netmap_adapter *na) \
208 { \
209 	return na->nm_mem->ops->nmd_##name(na); \
210 }
211 
212 #define NMD_DEFNACB1(t0, name, t1) \
213 t0 \
214 netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
215 { \
216 	return na->nm_mem->ops->nmd_##name(na, a1); \
217 }
218 
219 NMD_DEFCB1(int, get_lut, struct netmap_lut *);
220 NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
221 NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
222 static int netmap_mem_config(struct netmap_mem_d *);
223 NMD_DEFCB(int, config);
224 NMD_DEFCB1(ssize_t, if_offset, const void *);
225 NMD_DEFCB(void, delete);
226 
227 NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *);
228 NMD_DEFNACB1(void, if_delete, struct netmap_if *);
229 NMD_DEFNACB(int, rings_create);
230 NMD_DEFNACB(void, rings_delete);
231 
232 static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
233 static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
234 static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
235 static void nm_mem_release_id(struct netmap_mem_d *);
236 
237 nm_memid_t
238 netmap_mem_get_id(struct netmap_mem_d *nmd)
239 {
240 	return nmd->nm_id;
241 }
242 
243 #define NMA_LOCK_INIT(n)	NM_MTX_INIT((n)->nm_mtx)
244 #define NMA_LOCK_DESTROY(n)	NM_MTX_DESTROY((n)->nm_mtx)
245 #define NMA_LOCK(n)		NM_MTX_LOCK((n)->nm_mtx)
246 #define NMA_UNLOCK(n)		NM_MTX_UNLOCK((n)->nm_mtx)
247 
248 #ifdef NM_DEBUG_MEM_PUTGET
249 #define NM_DBG_REFC(nmd, func, line)	\
250 	nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
251 #else
252 #define NM_DBG_REFC(nmd, func, line)
253 #endif
254 
255 /* circular list of all existing allocators */
256 static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
257 NM_MTX_T nm_mem_list_lock;
258 
259 struct netmap_mem_d *
260 __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
261 {
262 	NM_MTX_LOCK(nm_mem_list_lock);
263 	nmd->refcount++;
264 	NM_DBG_REFC(nmd, func, line);
265 	NM_MTX_UNLOCK(nm_mem_list_lock);
266 	return nmd;
267 }
268 
269 void
270 __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
271 {
272 	int last;
273 	NM_MTX_LOCK(nm_mem_list_lock);
274 	last = (--nmd->refcount == 0);
275 	if (last)
276 		nm_mem_release_id(nmd);
277 	NM_DBG_REFC(nmd, func, line);
278 	NM_MTX_UNLOCK(nm_mem_list_lock);
279 	if (last)
280 		netmap_mem_delete(nmd);
281 }
282 
283 int
284 netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
285 {
286 	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
287 		return ENOMEM;
288 	} else {
289 		NMA_LOCK(nmd);
290 		nmd->lasterr = nmd->ops->nmd_finalize(nmd);
291 		NMA_UNLOCK(nmd);
292 	}
293 
294 	if (!nmd->lasterr && na->pdev)
295 		netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
296 
297 	return nmd->lasterr;
298 }
299 
300 void
301 netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
302 {
303 	NMA_LOCK(nmd);
304 	netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
305 	if (nmd->active == 1) {
306 		u_int i;
307 
308 		/*
309 		 * Reset the allocator when it falls out of use so that any
310 		 * pool resources leaked by unclean application exits are
311 		 * reclaimed.
312 		 */
313 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
314 			struct netmap_obj_pool *p;
315 			u_int j;
316 
317 			p = &nmd->pools[i];
318 			p->objfree = p->objtotal;
319 			/*
320 			 * Reproduce the net effect of the M_ZERO malloc()
321 			 * and marking of free entries in the bitmap that
322 			 * occur in finalize_obj_allocator()
323 			 */
324 			memset(p->bitmap,
325 			    '\0',
326 			    sizeof(uint32_t) * ((p->objtotal + 31) / 32));
327 
328 			/*
329 			 * Set all the bits in the bitmap that have
330 			 * corresponding buffers to 1 to indicate they are
331 			 * free.
332 			 */
333 			for (j = 0; j < p->objtotal; j++) {
334 				if (p->lut[j].vaddr != NULL) {
335 					p->bitmap[ (j>>5) ] |=  ( 1 << (j & 31) );
336 				}
337 			}
338 		}
339 
340 		/*
341 		 * Per netmap_mem_finalize_all(),
342 		 * buffers 0 and 1 are reserved
343 		 */
344 		nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
345 		if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
346 			/* XXX This check is a workaround that prevents a
347 			 * NULL pointer crash which currently happens only
348 			 * with ptnetmap guests.
349 			 * Removed shared-info --> is the bug still there? */
350 			nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
351 		}
352 	}
353 	nmd->ops->nmd_deref(nmd);
354 
355 	NMA_UNLOCK(nmd);
356 }
357 
358 
359 /* accessor functions */
360 static int
361 netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
362 {
363 	lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
364 	lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
365 	lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
366 
367 	return 0;
368 }
369 
370 static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
371 	[NETMAP_IF_POOL] = {
372 		.size = 1024,
373 		.num  = 2,
374 	},
375 	[NETMAP_RING_POOL] = {
376 		.size = 5*PAGE_SIZE,
377 		.num  = 4,
378 	},
379 	[NETMAP_BUF_POOL] = {
380 		.size = 2048,
381 		.num  = 4098,
382 	},
383 };
384 
385 
386 /*
387  * nm_mem is the memory allocator used for all physical interfaces
388  * running in netmap mode.
389  * Virtual (VALE) ports will have each its own allocator.
390  */
391 extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
392 struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
393 	.pools = {
394 		[NETMAP_IF_POOL] = {
395 			.name 	= "netmap_if",
396 			.objminsize = sizeof(struct netmap_if),
397 			.objmaxsize = 4096,
398 			.nummin     = 10,	/* don't be stingy */
399 			.nummax	    = 10000,	/* XXX very large */
400 		},
401 		[NETMAP_RING_POOL] = {
402 			.name 	= "netmap_ring",
403 			.objminsize = sizeof(struct netmap_ring),
404 			.objmaxsize = 32*PAGE_SIZE,
405 			.nummin     = 2,
406 			.nummax	    = 1024,
407 		},
408 		[NETMAP_BUF_POOL] = {
409 			.name	= "netmap_buf",
410 			.objminsize = 64,
411 			.objmaxsize = 65536,
412 			.nummin     = 4,
413 			.nummax	    = 1000000, /* one million! */
414 		},
415 	},
416 
417 	.params = {
418 		[NETMAP_IF_POOL] = {
419 			.size = 1024,
420 			.num  = 100,
421 		},
422 		[NETMAP_RING_POOL] = {
423 			.size = 9*PAGE_SIZE,
424 			.num  = 200,
425 		},
426 		[NETMAP_BUF_POOL] = {
427 			.size = 2048,
428 			.num  = NETMAP_BUF_MAX_NUM,
429 		},
430 	},
431 
432 	.nm_id = 1,
433 	.nm_grp = -1,
434 
435 	.prev = &nm_mem,
436 	.next = &nm_mem,
437 
438 	.ops = &netmap_mem_global_ops,
439 
440 	.name = "1"
441 };
442 
443 
444 /* blueprint for the private memory allocators */
445 extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
446 /* XXX clang is not happy about using name as a print format */
447 static const struct netmap_mem_d nm_blueprint = {
448 	.pools = {
449 		[NETMAP_IF_POOL] = {
450 			.name 	= "%s_if",
451 			.objminsize = sizeof(struct netmap_if),
452 			.objmaxsize = 4096,
453 			.nummin     = 1,
454 			.nummax	    = 100,
455 		},
456 		[NETMAP_RING_POOL] = {
457 			.name 	= "%s_ring",
458 			.objminsize = sizeof(struct netmap_ring),
459 			.objmaxsize = 32*PAGE_SIZE,
460 			.nummin     = 2,
461 			.nummax	    = 1024,
462 		},
463 		[NETMAP_BUF_POOL] = {
464 			.name	= "%s_buf",
465 			.objminsize = 64,
466 			.objmaxsize = 65536,
467 			.nummin     = 4,
468 			.nummax	    = 1000000, /* one million! */
469 		},
470 	},
471 
472 	.nm_grp = -1,
473 
474 	.flags = NETMAP_MEM_PRIVATE,
475 
476 	.ops = &netmap_mem_global_ops,
477 };
478 
479 /* memory allocator related sysctls */
480 
481 #define STRINGIFY(x) #x
482 
483 
484 #define DECLARE_SYSCTLS(id, name) \
485 	SYSBEGIN(mem2_ ## name); \
486 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
487 	    CTLFLAG_RW, &nm_mem.params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
488 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
489 	    CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
490 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
491 	    CTLFLAG_RW, &nm_mem.params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
492 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
493 	    CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
494 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
495 	    CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
496 	    "Default size of private netmap " STRINGIFY(name) "s"); \
497 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
498 	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
499 	    "Default number of private netmap " STRINGIFY(name) "s");	\
500 	SYSEND
501 
502 SYSCTL_DECL(_dev_netmap);
503 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
504 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
505 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
506 
507 /* call with nm_mem_list_lock held */
508 static int
509 nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
510 {
511 	nm_memid_t id;
512 	struct netmap_mem_d *scan = netmap_last_mem_d;
513 	int error = ENOMEM;
514 
515 	do {
516 		/* we rely on unsigned wrap around */
517 		id = scan->nm_id + 1;
518 		if (id == 0) /* reserve 0 as error value */
519 			id = 1;
520 		scan = scan->next;
521 		if (id != scan->nm_id) {
522 			nmd->nm_id = id;
523 			nmd->prev = scan->prev;
524 			nmd->next = scan;
525 			scan->prev->next = nmd;
526 			scan->prev = nmd;
527 			netmap_last_mem_d = nmd;
528 			nmd->refcount = 1;
529 			NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
530 			error = 0;
531 			break;
532 		}
533 	} while (scan != netmap_last_mem_d);
534 
535 	return error;
536 }
537 
538 /* call with nm_mem_list_lock *not* held */
539 static int
540 nm_mem_assign_id(struct netmap_mem_d *nmd)
541 {
542         int ret;
543 
544 	NM_MTX_LOCK(nm_mem_list_lock);
545         ret = nm_mem_assign_id_locked(nmd);
546 	NM_MTX_UNLOCK(nm_mem_list_lock);
547 
548 	return ret;
549 }
550 
551 /* call with nm_mem_list_lock held */
552 static void
553 nm_mem_release_id(struct netmap_mem_d *nmd)
554 {
555 	nmd->prev->next = nmd->next;
556 	nmd->next->prev = nmd->prev;
557 
558 	if (netmap_last_mem_d == nmd)
559 		netmap_last_mem_d = nmd->prev;
560 
561 	nmd->prev = nmd->next = NULL;
562 }
563 
564 struct netmap_mem_d *
565 netmap_mem_find(nm_memid_t id)
566 {
567 	struct netmap_mem_d *nmd;
568 
569 	NM_MTX_LOCK(nm_mem_list_lock);
570 	nmd = netmap_last_mem_d;
571 	do {
572 		if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_id == id) {
573 			nmd->refcount++;
574 			NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
575 			NM_MTX_UNLOCK(nm_mem_list_lock);
576 			return nmd;
577 		}
578 		nmd = nmd->next;
579 	} while (nmd != netmap_last_mem_d);
580 	NM_MTX_UNLOCK(nm_mem_list_lock);
581 	return NULL;
582 }
583 
584 static int
585 nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
586 {
587 	int err = 0, id;
588 	id = nm_iommu_group_id(dev);
589 	if (netmap_verbose)
590 		D("iommu_group %d", id);
591 
592 	NMA_LOCK(nmd);
593 
594 	if (nmd->nm_grp < 0)
595 		nmd->nm_grp = id;
596 
597 	if (nmd->nm_grp != id)
598 		nmd->lasterr = err = ENOMEM;
599 
600 	NMA_UNLOCK(nmd);
601 	return err;
602 }
603 
604 /*
605  * First, find the allocator that contains the requested offset,
606  * then locate the cluster through a lookup table.
607  */
608 static vm_paddr_t
609 netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
610 {
611 	int i;
612 	vm_ooffset_t o = offset;
613 	vm_paddr_t pa;
614 	struct netmap_obj_pool *p;
615 
616 	NMA_LOCK(nmd);
617 	p = nmd->pools;
618 
619 	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
620 		if (offset >= p[i].memtotal)
621 			continue;
622 		// now lookup the cluster's address
623 #ifndef _WIN32
624 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
625 			offset % p[i]._objsize;
626 #else
627 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
628 		pa.QuadPart += offset % p[i]._objsize;
629 #endif
630 		NMA_UNLOCK(nmd);
631 		return pa;
632 	}
633 	/* this is only in case of errors */
634 	D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
635 		p[NETMAP_IF_POOL].memtotal,
636 		p[NETMAP_IF_POOL].memtotal
637 			+ p[NETMAP_RING_POOL].memtotal,
638 		p[NETMAP_IF_POOL].memtotal
639 			+ p[NETMAP_RING_POOL].memtotal
640 			+ p[NETMAP_BUF_POOL].memtotal);
641 	NMA_UNLOCK(nmd);
642 #ifndef _WIN32
643 	return 0;	// XXX bad address
644 #else
645 	vm_paddr_t res;
646 	res.QuadPart = 0;
647 	return res;
648 #endif
649 }
650 
651 #ifdef _WIN32
652 
653 /*
654  * win32_build_virtual_memory_for_userspace
655  *
656  * This function get all the object making part of the pools and maps
657  * a contiguous virtual memory space for the userspace
658  * It works this way
659  * 1 - allocate a Memory Descriptor List wide as the sum
660  *		of the memory needed for the pools
661  * 2 - cycle all the objects in every pool and for every object do
662  *
663  *		2a - cycle all the objects in every pool, get the list
664  *				of the physical address descriptors
665  *		2b - calculate the offset in the array of pages desciptor in the
666  *				main MDL
667  *		2c - copy the descriptors of the object in the main MDL
668  *
669  * 3 - return the resulting MDL that needs to be mapped in userland
670  *
671  * In this way we will have an MDL that describes all the memory for the
672  * objects in a single object
673 */
674 
675 PMDL
676 win32_build_user_vm_map(struct netmap_mem_d* nmd)
677 {
678 	int i, j;
679 	u_int memsize, memflags, ofs = 0;
680 	PMDL mainMdl, tempMdl;
681 
682 	if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
683 		D("memory not finalised yet");
684 		return NULL;
685 	}
686 
687 	mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
688 	if (mainMdl == NULL) {
689 		D("failed to allocate mdl");
690 		return NULL;
691 	}
692 
693 	NMA_LOCK(nmd);
694 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
695 		struct netmap_obj_pool *p = &nmd->pools[i];
696 		int clsz = p->_clustsize;
697 		int clobjs = p->_clustentries; /* objects per cluster */
698 		int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz);
699 		PPFN_NUMBER pSrc, pDst;
700 
701 		/* each pool has a different cluster size so we need to reallocate */
702 		tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
703 		if (tempMdl == NULL) {
704 			NMA_UNLOCK(nmd);
705 			D("fail to allocate tempMdl");
706 			IoFreeMdl(mainMdl);
707 			return NULL;
708 		}
709 		pSrc = MmGetMdlPfnArray(tempMdl);
710 		/* create one entry per cluster, the lut[] has one entry per object */
711 		for (j = 0; j < p->numclusters; j++, ofs += clsz) {
712 			pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)];
713 			MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz);
714 			MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */
715 			RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */
716 			mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */
717 		}
718 		IoFreeMdl(tempMdl);
719 	}
720 	NMA_UNLOCK(nmd);
721 	return mainMdl;
722 }
723 
724 #endif /* _WIN32 */
725 
726 /*
727  * helper function for OS-specific mmap routines (currently only windows).
728  * Given an nmd and a pool index, returns the cluster size and number of clusters.
729  * Returns 0 if memory is finalised and the pool is valid, otherwise 1.
730  * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change.
731  */
732 
733 int
734 netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters)
735 {
736 	if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR)
737 		return 1; /* invalid arguments */
738 	// NMA_LOCK_ASSERT(nmd);
739 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
740 		*clustsize = *numclusters = 0;
741 		return 1; /* not ready yet */
742 	}
743 	*clustsize = nmd->pools[pool]._clustsize;
744 	*numclusters = nmd->pools[pool].numclusters;
745 	return 0; /* success */
746 }
747 
748 static int
749 netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
750 	nm_memid_t *id)
751 {
752 	int error = 0;
753 	NMA_LOCK(nmd);
754 	error = netmap_mem_config(nmd);
755 	if (error)
756 		goto out;
757 	if (size) {
758 		if (nmd->flags & NETMAP_MEM_FINALIZED) {
759 			*size = nmd->nm_totalsize;
760 		} else {
761 			int i;
762 			*size = 0;
763 			for (i = 0; i < NETMAP_POOLS_NR; i++) {
764 				struct netmap_obj_pool *p = nmd->pools + i;
765 				*size += (p->_numclusters * p->_clustsize);
766 			}
767 		}
768 	}
769 	if (memflags)
770 		*memflags = nmd->flags;
771 	if (id)
772 		*id = nmd->nm_id;
773 out:
774 	NMA_UNLOCK(nmd);
775 	return error;
776 }
777 
778 /*
779  * we store objects by kernel address, need to find the offset
780  * within the pool to export the value to userspace.
781  * Algorithm: scan until we find the cluster, then add the
782  * actual offset in the cluster
783  */
784 static ssize_t
785 netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
786 {
787 	int i, k = p->_clustentries, n = p->objtotal;
788 	ssize_t ofs = 0;
789 
790 	for (i = 0; i < n; i += k, ofs += p->_clustsize) {
791 		const char *base = p->lut[i].vaddr;
792 		ssize_t relofs = (const char *) vaddr - base;
793 
794 		if (relofs < 0 || relofs >= p->_clustsize)
795 			continue;
796 
797 		ofs = ofs + relofs;
798 		ND("%s: return offset %d (cluster %d) for pointer %p",
799 		    p->name, ofs, i, vaddr);
800 		return ofs;
801 	}
802 	D("address %p is not contained inside any cluster (%s)",
803 	    vaddr, p->name);
804 	return 0; /* An error occurred */
805 }
806 
807 /* Helper functions which convert virtual addresses to offsets */
808 #define netmap_if_offset(n, v)					\
809 	netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v))
810 
811 #define netmap_ring_offset(n, v)				\
812     ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
813 	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
814 
815 static ssize_t
816 netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
817 {
818 	ssize_t v;
819 	NMA_LOCK(nmd);
820 	v = netmap_if_offset(nmd, addr);
821 	NMA_UNLOCK(nmd);
822 	return v;
823 }
824 
825 /*
826  * report the index, and use start position as a hint,
827  * otherwise buffer allocation becomes terribly expensive.
828  */
829 static void *
830 netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
831 {
832 	uint32_t i = 0;			/* index in the bitmap */
833 	uint32_t mask, j = 0;		/* slot counter */
834 	void *vaddr = NULL;
835 
836 	if (len > p->_objsize) {
837 		D("%s request size %d too large", p->name, len);
838 		// XXX cannot reduce the size
839 		return NULL;
840 	}
841 
842 	if (p->objfree == 0) {
843 		D("no more %s objects", p->name);
844 		return NULL;
845 	}
846 	if (start)
847 		i = *start;
848 
849 	/* termination is guaranteed by p->free, but better check bounds on i */
850 	while (vaddr == NULL && i < p->bitmap_slots)  {
851 		uint32_t cur = p->bitmap[i];
852 		if (cur == 0) { /* bitmask is fully used */
853 			i++;
854 			continue;
855 		}
856 		/* locate a slot */
857 		for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1)
858 			;
859 
860 		p->bitmap[i] &= ~mask; /* mark object as in use */
861 		p->objfree--;
862 
863 		vaddr = p->lut[i * 32 + j].vaddr;
864 		if (index)
865 			*index = i * 32 + j;
866 	}
867 	ND("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr);
868 
869 	if (start)
870 		*start = i;
871 	return vaddr;
872 }
873 
874 
875 /*
876  * free by index, not by address.
877  * XXX should we also cleanup the content ?
878  */
879 static int
880 netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
881 {
882 	uint32_t *ptr, mask;
883 
884 	if (j >= p->objtotal) {
885 		D("invalid index %u, max %u", j, p->objtotal);
886 		return 1;
887 	}
888 	ptr = &p->bitmap[j / 32];
889 	mask = (1 << (j % 32));
890 	if (*ptr & mask) {
891 		D("ouch, double free on buffer %d", j);
892 		return 1;
893 	} else {
894 		*ptr |= mask;
895 		p->objfree++;
896 		return 0;
897 	}
898 }
899 
900 /*
901  * free by address. This is slow but is only used for a few
902  * objects (rings, nifp)
903  */
904 static void
905 netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
906 {
907 	u_int i, j, n = p->numclusters;
908 
909 	for (i = 0, j = 0; i < n; i++, j += p->_clustentries) {
910 		void *base = p->lut[i * p->_clustentries].vaddr;
911 		ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
912 
913 		/* Given address, is out of the scope of the current cluster.*/
914 		if (vaddr < base || relofs >= p->_clustsize)
915 			continue;
916 
917 		j = j + relofs / p->_objsize;
918 		/* KASSERT(j != 0, ("Cannot free object 0")); */
919 		netmap_obj_free(p, j);
920 		return;
921 	}
922 	D("address %p is not contained inside any cluster (%s)",
923 	    vaddr, p->name);
924 }
925 
926 #define netmap_mem_bufsize(n)	\
927 	((n)->pools[NETMAP_BUF_POOL]._objsize)
928 
929 #define netmap_if_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
930 #define netmap_if_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
931 #define netmap_ring_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
932 #define netmap_ring_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
933 #define netmap_buf_malloc(n, _pos, _index)			\
934 	netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
935 
936 
937 #if 0 // XXX unused
938 /* Return the index associated to the given packet buffer */
939 #define netmap_buf_index(n, v)						\
940     (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
941 #endif
942 
943 /*
944  * allocate extra buffers in a linked list.
945  * returns the actual number.
946  */
947 uint32_t
948 netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
949 {
950 	struct netmap_mem_d *nmd = na->nm_mem;
951 	uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
952 
953 	NMA_LOCK(nmd);
954 
955 	*head = 0;	/* default, 'null' index ie empty list */
956 	for (i = 0 ; i < n; i++) {
957 		uint32_t cur = *head;	/* save current head */
958 		uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
959 		if (p == NULL) {
960 			D("no more buffers after %d of %d", i, n);
961 			*head = cur; /* restore */
962 			break;
963 		}
964 		ND(5, "allocate buffer %d -> %d", *head, cur);
965 		*p = cur; /* link to previous head */
966 	}
967 
968 	NMA_UNLOCK(nmd);
969 
970 	return i;
971 }
972 
973 static void
974 netmap_extra_free(struct netmap_adapter *na, uint32_t head)
975 {
976         struct lut_entry *lut = na->na_lut.lut;
977 	struct netmap_mem_d *nmd = na->nm_mem;
978 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
979 	uint32_t i, cur, *buf;
980 
981 	ND("freeing the extra list");
982 	for (i = 0; head >=2 && head < p->objtotal; i++) {
983 		cur = head;
984 		buf = lut[head].vaddr;
985 		head = *buf;
986 		*buf = 0;
987 		if (netmap_obj_free(p, cur))
988 			break;
989 	}
990 	if (head != 0)
991 		D("breaking with head %d", head);
992 	if (netmap_verbose)
993 		D("freed %d buffers", i);
994 }
995 
996 
997 /* Return nonzero on error */
998 static int
999 netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
1000 {
1001 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1002 	u_int i = 0;	/* slot counter */
1003 	uint32_t pos = 0;	/* slot in p->bitmap */
1004 	uint32_t index = 0;	/* buffer index */
1005 
1006 	for (i = 0; i < n; i++) {
1007 		void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
1008 		if (vaddr == NULL) {
1009 			D("no more buffers after %d of %d", i, n);
1010 			goto cleanup;
1011 		}
1012 		slot[i].buf_idx = index;
1013 		slot[i].len = p->_objsize;
1014 		slot[i].flags = 0;
1015 	}
1016 
1017 	ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
1018 	return (0);
1019 
1020 cleanup:
1021 	while (i > 0) {
1022 		i--;
1023 		netmap_obj_free(p, slot[i].buf_idx);
1024 	}
1025 	bzero(slot, n * sizeof(slot[0]));
1026 	return (ENOMEM);
1027 }
1028 
1029 static void
1030 netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
1031 {
1032 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1033 	u_int i;
1034 
1035 	for (i = 0; i < n; i++) {
1036 		slot[i].buf_idx = index;
1037 		slot[i].len = p->_objsize;
1038 		slot[i].flags = 0;
1039 	}
1040 }
1041 
1042 
1043 static void
1044 netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
1045 {
1046 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
1047 
1048 	if (i < 2 || i >= p->objtotal) {
1049 		D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
1050 		return;
1051 	}
1052 	netmap_obj_free(p, i);
1053 }
1054 
1055 
1056 static void
1057 netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
1058 {
1059 	u_int i;
1060 
1061 	for (i = 0; i < n; i++) {
1062 		if (slot[i].buf_idx > 2)
1063 			netmap_free_buf(nmd, slot[i].buf_idx);
1064 	}
1065 }
1066 
1067 static void
1068 netmap_reset_obj_allocator(struct netmap_obj_pool *p)
1069 {
1070 
1071 	if (p == NULL)
1072 		return;
1073 	if (p->bitmap)
1074 		nm_os_free(p->bitmap);
1075 	p->bitmap = NULL;
1076 	if (p->lut) {
1077 		u_int i;
1078 
1079 		/*
1080 		 * Free each cluster allocated in
1081 		 * netmap_finalize_obj_allocator().  The cluster start
1082 		 * addresses are stored at multiples of p->_clusterentries
1083 		 * in the lut.
1084 		 */
1085 		for (i = 0; i < p->objtotal; i += p->_clustentries) {
1086 			if (p->lut[i].vaddr)
1087 				contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
1088 		}
1089 		bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
1090 #ifdef linux
1091 		vfree(p->lut);
1092 #else
1093 		nm_os_free(p->lut);
1094 #endif
1095 	}
1096 	p->lut = NULL;
1097 	p->objtotal = 0;
1098 	p->memtotal = 0;
1099 	p->numclusters = 0;
1100 	p->objfree = 0;
1101 }
1102 
1103 /*
1104  * Free all resources related to an allocator.
1105  */
1106 static void
1107 netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
1108 {
1109 	if (p == NULL)
1110 		return;
1111 	netmap_reset_obj_allocator(p);
1112 }
1113 
1114 /*
1115  * We receive a request for objtotal objects, of size objsize each.
1116  * Internally we may round up both numbers, as we allocate objects
1117  * in small clusters multiple of the page size.
1118  * We need to keep track of objtotal and clustentries,
1119  * as they are needed when freeing memory.
1120  *
1121  * XXX note -- userspace needs the buffers to be contiguous,
1122  *	so we cannot afford gaps at the end of a cluster.
1123  */
1124 
1125 
1126 /* call with NMA_LOCK held */
1127 static int
1128 netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
1129 {
1130 	int i;
1131 	u_int clustsize;	/* the cluster size, multiple of page size */
1132 	u_int clustentries;	/* how many objects per entry */
1133 
1134 	/* we store the current request, so we can
1135 	 * detect configuration changes later */
1136 	p->r_objtotal = objtotal;
1137 	p->r_objsize = objsize;
1138 
1139 #define MAX_CLUSTSIZE	(1<<22)		// 4 MB
1140 #define LINE_ROUND	NM_CACHE_ALIGN	// 64
1141 	if (objsize >= MAX_CLUSTSIZE) {
1142 		/* we could do it but there is no point */
1143 		D("unsupported allocation for %d bytes", objsize);
1144 		return EINVAL;
1145 	}
1146 	/* make sure objsize is a multiple of LINE_ROUND */
1147 	i = (objsize & (LINE_ROUND - 1));
1148 	if (i) {
1149 		D("XXX aligning object by %d bytes", LINE_ROUND - i);
1150 		objsize += LINE_ROUND - i;
1151 	}
1152 	if (objsize < p->objminsize || objsize > p->objmaxsize) {
1153 		D("requested objsize %d out of range [%d, %d]",
1154 			objsize, p->objminsize, p->objmaxsize);
1155 		return EINVAL;
1156 	}
1157 	if (objtotal < p->nummin || objtotal > p->nummax) {
1158 		D("requested objtotal %d out of range [%d, %d]",
1159 			objtotal, p->nummin, p->nummax);
1160 		return EINVAL;
1161 	}
1162 	/*
1163 	 * Compute number of objects using a brute-force approach:
1164 	 * given a max cluster size,
1165 	 * we try to fill it with objects keeping track of the
1166 	 * wasted space to the next page boundary.
1167 	 */
1168 	for (clustentries = 0, i = 1;; i++) {
1169 		u_int delta, used = i * objsize;
1170 		if (used > MAX_CLUSTSIZE)
1171 			break;
1172 		delta = used % PAGE_SIZE;
1173 		if (delta == 0) { // exact solution
1174 			clustentries = i;
1175 			break;
1176 		}
1177 	}
1178 	/* exact solution not found */
1179 	if (clustentries == 0) {
1180 		D("unsupported allocation for %d bytes", objsize);
1181 		return EINVAL;
1182 	}
1183 	/* compute clustsize */
1184 	clustsize = clustentries * objsize;
1185 	if (netmap_verbose)
1186 		D("objsize %d clustsize %d objects %d",
1187 			objsize, clustsize, clustentries);
1188 
1189 	/*
1190 	 * The number of clusters is n = ceil(objtotal/clustentries)
1191 	 * objtotal' = n * clustentries
1192 	 */
1193 	p->_clustentries = clustentries;
1194 	p->_clustsize = clustsize;
1195 	p->_numclusters = (objtotal + clustentries - 1) / clustentries;
1196 
1197 	/* actual values (may be larger than requested) */
1198 	p->_objsize = objsize;
1199 	p->_objtotal = p->_numclusters * clustentries;
1200 
1201 	return 0;
1202 }
1203 
1204 static struct lut_entry *
1205 nm_alloc_lut(u_int nobj)
1206 {
1207 	size_t n = sizeof(struct lut_entry) * nobj;
1208 	struct lut_entry *lut;
1209 #ifdef linux
1210 	lut = vmalloc(n);
1211 #else
1212 	lut = nm_os_malloc(n);
1213 #endif
1214 	return lut;
1215 }
1216 
1217 /* call with NMA_LOCK held */
1218 static int
1219 netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
1220 {
1221 	int i; /* must be signed */
1222 	size_t n;
1223 
1224 	/* optimistically assume we have enough memory */
1225 	p->numclusters = p->_numclusters;
1226 	p->objtotal = p->_objtotal;
1227 
1228 	p->lut = nm_alloc_lut(p->objtotal);
1229 	if (p->lut == NULL) {
1230 		D("Unable to create lookup table for '%s'", p->name);
1231 		goto clean;
1232 	}
1233 
1234 	/* Allocate the bitmap */
1235 	n = (p->objtotal + 31) / 32;
1236 	p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
1237 	if (p->bitmap == NULL) {
1238 		D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
1239 		    p->name);
1240 		goto clean;
1241 	}
1242 	p->bitmap_slots = n;
1243 
1244 	/*
1245 	 * Allocate clusters, init pointers and bitmap
1246 	 */
1247 
1248 	n = p->_clustsize;
1249 	for (i = 0; i < (int)p->objtotal;) {
1250 		int lim = i + p->_clustentries;
1251 		char *clust;
1252 
1253 		/*
1254 		 * XXX Note, we only need contigmalloc() for buffers attached
1255 		 * to native interfaces. In all other cases (nifp, netmap rings
1256 		 * and even buffers for VALE ports or emulated interfaces) we
1257 		 * can live with standard malloc, because the hardware will not
1258 		 * access the pages directly.
1259 		 */
1260 		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
1261 		    (size_t)0, -1UL, PAGE_SIZE, 0);
1262 		if (clust == NULL) {
1263 			/*
1264 			 * If we get here, there is a severe memory shortage,
1265 			 * so halve the allocated memory to reclaim some.
1266 			 */
1267 			D("Unable to create cluster at %d for '%s' allocator",
1268 			    i, p->name);
1269 			if (i < 2) /* nothing to halve */
1270 				goto out;
1271 			lim = i / 2;
1272 			for (i--; i >= lim; i--) {
1273 				p->bitmap[ (i>>5) ] &=  ~( 1 << (i & 31) );
1274 				if (i % p->_clustentries == 0 && p->lut[i].vaddr)
1275 					contigfree(p->lut[i].vaddr,
1276 						n, M_NETMAP);
1277 				p->lut[i].vaddr = NULL;
1278 			}
1279 		out:
1280 			p->objtotal = i;
1281 			/* we may have stopped in the middle of a cluster */
1282 			p->numclusters = (i + p->_clustentries - 1) / p->_clustentries;
1283 			break;
1284 		}
1285 		/*
1286 		 * Set bitmap and lut state for all buffers in the current
1287 		 * cluster.
1288 		 *
1289 		 * [i, lim) is the set of buffer indexes that cover the
1290 		 * current cluster.
1291 		 *
1292 		 * 'clust' is really the address of the current buffer in
1293 		 * the current cluster as we index through it with a stride
1294 		 * of p->_objsize.
1295 		 */
1296 		for (; i < lim; i++, clust += p->_objsize) {
1297 			p->bitmap[ (i>>5) ] |=  ( 1 << (i & 31) );
1298 			p->lut[i].vaddr = clust;
1299 			p->lut[i].paddr = vtophys(clust);
1300 		}
1301 	}
1302 	p->objfree = p->objtotal;
1303 	p->memtotal = p->numclusters * p->_clustsize;
1304 	if (p->objfree == 0)
1305 		goto clean;
1306 	if (netmap_verbose)
1307 		D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
1308 		    p->numclusters, p->_clustsize >> 10,
1309 		    p->memtotal >> 10, p->name);
1310 
1311 	return 0;
1312 
1313 clean:
1314 	netmap_reset_obj_allocator(p);
1315 	return ENOMEM;
1316 }
1317 
1318 /* call with lock held */
1319 static int
1320 netmap_mem_params_changed(struct netmap_obj_params* p)
1321 {
1322 	int i, rv = 0;
1323 
1324 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1325 		if (p[i].last_size != p[i].size || p[i].last_num != p[i].num) {
1326 			p[i].last_size = p[i].size;
1327 			p[i].last_num = p[i].num;
1328 			rv = 1;
1329 		}
1330 	}
1331 	return rv;
1332 }
1333 
1334 static void
1335 netmap_mem_reset_all(struct netmap_mem_d *nmd)
1336 {
1337 	int i;
1338 
1339 	if (netmap_verbose)
1340 		D("resetting %p", nmd);
1341 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1342 		netmap_reset_obj_allocator(&nmd->pools[i]);
1343 	}
1344 	nmd->flags  &= ~NETMAP_MEM_FINALIZED;
1345 }
1346 
1347 static int
1348 netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
1349 {
1350 	int i, lim = p->_objtotal;
1351 
1352 	if (na == NULL || na->pdev == NULL)
1353 		return 0;
1354 
1355 #if defined(__FreeBSD__)
1356 	(void)i;
1357 	(void)lim;
1358 	D("unsupported on FreeBSD");
1359 
1360 #elif defined(_WIN32)
1361 	(void)i;
1362 	(void)lim;
1363 	D("unsupported on Windows");	//XXX_ale, really?
1364 #else /* linux */
1365 	for (i = 2; i < lim; i++) {
1366 		netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
1367 	}
1368 #endif /* linux */
1369 
1370 	return 0;
1371 }
1372 
1373 static int
1374 netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
1375 {
1376 #if defined(__FreeBSD__)
1377 	D("unsupported on FreeBSD");
1378 #elif defined(_WIN32)
1379 	D("unsupported on Windows");	//XXX_ale, really?
1380 #else /* linux */
1381 	int i, lim = p->_objtotal;
1382 
1383 	if (na->pdev == NULL)
1384 		return 0;
1385 
1386 	for (i = 2; i < lim; i++) {
1387 		netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
1388 				p->lut[i].vaddr);
1389 	}
1390 #endif /* linux */
1391 
1392 	return 0;
1393 }
1394 
1395 static int
1396 netmap_mem_finalize_all(struct netmap_mem_d *nmd)
1397 {
1398 	int i;
1399 	if (nmd->flags & NETMAP_MEM_FINALIZED)
1400 		return 0;
1401 	nmd->lasterr = 0;
1402 	nmd->nm_totalsize = 0;
1403 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1404 		nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
1405 		if (nmd->lasterr)
1406 			goto error;
1407 		nmd->nm_totalsize += nmd->pools[i].memtotal;
1408 	}
1409 	/* buffers 0 and 1 are reserved */
1410 	nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
1411 	nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
1412 	nmd->flags |= NETMAP_MEM_FINALIZED;
1413 
1414 	if (netmap_verbose)
1415 		D("interfaces %d KB, rings %d KB, buffers %d MB",
1416 		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
1417 		    nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
1418 		    nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
1419 
1420 	if (netmap_verbose)
1421 		D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
1422 
1423 
1424 	return 0;
1425 error:
1426 	netmap_mem_reset_all(nmd);
1427 	return nmd->lasterr;
1428 }
1429 
1430 /*
1431  * allocator for private memory
1432  */
1433 static struct netmap_mem_d *
1434 _netmap_mem_private_new(struct netmap_obj_params *p, int *perr)
1435 {
1436 	struct netmap_mem_d *d = NULL;
1437 	int i, err = 0;
1438 
1439 	d = nm_os_malloc(sizeof(struct netmap_mem_d));
1440 	if (d == NULL) {
1441 		err = ENOMEM;
1442 		goto error;
1443 	}
1444 
1445 	*d = nm_blueprint;
1446 
1447 	err = nm_mem_assign_id(d);
1448 	if (err)
1449 		goto error;
1450 	snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
1451 
1452 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1453 		snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
1454 				nm_blueprint.pools[i].name,
1455 				d->name);
1456 		d->params[i].num = p[i].num;
1457 		d->params[i].size = p[i].size;
1458 	}
1459 
1460 	NMA_LOCK_INIT(d);
1461 
1462 	err = netmap_mem_config(d);
1463 	if (err)
1464 		goto error;
1465 
1466 	d->flags &= ~NETMAP_MEM_FINALIZED;
1467 
1468 	return d;
1469 
1470 error:
1471 	netmap_mem_delete(d);
1472 	if (perr)
1473 		*perr = err;
1474 	return NULL;
1475 }
1476 
1477 struct netmap_mem_d *
1478 netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
1479 		u_int extra_bufs, u_int npipes, int *perr)
1480 {
1481 	struct netmap_mem_d *d = NULL;
1482 	struct netmap_obj_params p[NETMAP_POOLS_NR];
1483 	int i, err = 0;
1484 	u_int v, maxd;
1485 	/* account for the fake host rings */
1486 	txr++;
1487 	rxr++;
1488 
1489 	/* copy the min values */
1490 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1491 		p[i] = netmap_min_priv_params[i];
1492 	}
1493 
1494 	/* possibly increase them to fit user request */
1495 	v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
1496 	if (p[NETMAP_IF_POOL].size < v)
1497 		p[NETMAP_IF_POOL].size = v;
1498 	v = 2 + 4 * npipes;
1499 	if (p[NETMAP_IF_POOL].num < v)
1500 		p[NETMAP_IF_POOL].num = v;
1501 	maxd = (txd > rxd) ? txd : rxd;
1502 	v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
1503 	if (p[NETMAP_RING_POOL].size < v)
1504 		p[NETMAP_RING_POOL].size = v;
1505 	/* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
1506          * and two rx rings (again, 1 normal and 1 fake host)
1507          */
1508 	v = txr + rxr + 8 * npipes;
1509 	if (p[NETMAP_RING_POOL].num < v)
1510 		p[NETMAP_RING_POOL].num = v;
1511 	/* for each pipe we only need the buffers for the 4 "real" rings.
1512          * On the other end, the pipe ring dimension may be different from
1513          * the parent port ring dimension. As a compromise, we allocate twice the
1514          * space actually needed if the pipe rings were the same size as the parent rings
1515          */
1516 	v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
1517 		/* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
1518 	if (p[NETMAP_BUF_POOL].num < v)
1519 		p[NETMAP_BUF_POOL].num = v;
1520 
1521 	if (netmap_verbose)
1522 		D("req if %d*%d ring %d*%d buf %d*%d",
1523 			p[NETMAP_IF_POOL].num,
1524 			p[NETMAP_IF_POOL].size,
1525 			p[NETMAP_RING_POOL].num,
1526 			p[NETMAP_RING_POOL].size,
1527 			p[NETMAP_BUF_POOL].num,
1528 			p[NETMAP_BUF_POOL].size);
1529 
1530 	d = _netmap_mem_private_new(p, perr);
1531 	if (d == NULL)
1532 		goto error;
1533 
1534 	return d;
1535 error:
1536 	netmap_mem_delete(d);
1537 	if (perr)
1538 		*perr = err;
1539 	return NULL;
1540 }
1541 
1542 
1543 /* call with lock held */
1544 static int
1545 netmap_mem2_config(struct netmap_mem_d *nmd)
1546 {
1547 	int i;
1548 
1549 	if (nmd->active)
1550 		/* already in use, we cannot change the configuration */
1551 		goto out;
1552 
1553 	if (!netmap_mem_params_changed(nmd->params))
1554 		goto out;
1555 
1556 	ND("reconfiguring");
1557 
1558 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1559 		/* reset previous allocation */
1560 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
1561 			netmap_reset_obj_allocator(&nmd->pools[i]);
1562 		}
1563 		nmd->flags &= ~NETMAP_MEM_FINALIZED;
1564 	}
1565 
1566 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1567 		nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
1568 				nmd->params[i].num, nmd->params[i].size);
1569 		if (nmd->lasterr)
1570 			goto out;
1571 	}
1572 
1573 out:
1574 
1575 	return nmd->lasterr;
1576 }
1577 
1578 static int
1579 netmap_mem2_finalize(struct netmap_mem_d *nmd)
1580 {
1581 	int err;
1582 
1583 	/* update configuration if changed */
1584 	if (netmap_mem2_config(nmd))
1585 		goto out1;
1586 
1587 	nmd->active++;
1588 
1589 	if (nmd->flags & NETMAP_MEM_FINALIZED) {
1590 		/* may happen if config is not changed */
1591 		ND("nothing to do");
1592 		goto out;
1593 	}
1594 
1595 	if (netmap_mem_finalize_all(nmd))
1596 		goto out;
1597 
1598 	nmd->lasterr = 0;
1599 
1600 out:
1601 	if (nmd->lasterr)
1602 		nmd->active--;
1603 out1:
1604 	err = nmd->lasterr;
1605 
1606 	return err;
1607 
1608 }
1609 
1610 static void
1611 netmap_mem2_delete(struct netmap_mem_d *nmd)
1612 {
1613 	int i;
1614 
1615 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
1616 	    netmap_destroy_obj_allocator(&nmd->pools[i]);
1617 	}
1618 
1619 	NMA_LOCK_DESTROY(nmd);
1620 	if (nmd != &nm_mem)
1621 		nm_os_free(nmd);
1622 }
1623 
1624 int
1625 netmap_mem_init(void)
1626 {
1627 	NM_MTX_INIT(nm_mem_list_lock);
1628 	NMA_LOCK_INIT(&nm_mem);
1629 	netmap_mem_get(&nm_mem);
1630 	return (0);
1631 }
1632 
1633 void
1634 netmap_mem_fini(void)
1635 {
1636 	netmap_mem_put(&nm_mem);
1637 }
1638 
1639 static void
1640 netmap_free_rings(struct netmap_adapter *na)
1641 {
1642 	enum txrx t;
1643 
1644 	for_rx_tx(t) {
1645 		u_int i;
1646 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
1647 			struct netmap_kring *kring = &NMR(na, t)[i];
1648 			struct netmap_ring *ring = kring->ring;
1649 
1650 			if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
1651 				ND("skipping ring %s (ring %p, users %d)",
1652 						kring->name, ring, kring->users);
1653 				continue;
1654 			}
1655 			if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
1656 				netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
1657 			netmap_ring_free(na->nm_mem, ring);
1658 			kring->ring = NULL;
1659 		}
1660 	}
1661 }
1662 
1663 /* call with NMA_LOCK held *
1664  *
1665  * Allocate netmap rings and buffers for this card
1666  * The rings are contiguous, but have variable size.
1667  * The kring array must follow the layout described
1668  * in netmap_krings_create().
1669  */
1670 static int
1671 netmap_mem2_rings_create(struct netmap_adapter *na)
1672 {
1673 	enum txrx t;
1674 
1675 	NMA_LOCK(na->nm_mem);
1676 
1677 	for_rx_tx(t) {
1678 		u_int i;
1679 
1680 		for (i = 0; i <= nma_get_nrings(na, t); i++) {
1681 			struct netmap_kring *kring = &NMR(na, t)[i];
1682 			struct netmap_ring *ring = kring->ring;
1683 			u_int len, ndesc;
1684 
1685 			if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
1686 				/* uneeded, or already created by somebody else */
1687 				ND("skipping ring %s", kring->name);
1688 				continue;
1689 			}
1690 			ndesc = kring->nkr_num_slots;
1691 			len = sizeof(struct netmap_ring) +
1692 				  ndesc * sizeof(struct netmap_slot);
1693 			ring = netmap_ring_malloc(na->nm_mem, len);
1694 			if (ring == NULL) {
1695 				D("Cannot allocate %s_ring", nm_txrx2str(t));
1696 				goto cleanup;
1697 			}
1698 			ND("txring at %p", ring);
1699 			kring->ring = ring;
1700 			*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
1701 			*(int64_t *)(uintptr_t)&ring->buf_ofs =
1702 			    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
1703 				na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
1704 				netmap_ring_offset(na->nm_mem, ring);
1705 
1706 			/* copy values from kring */
1707 			ring->head = kring->rhead;
1708 			ring->cur = kring->rcur;
1709 			ring->tail = kring->rtail;
1710 			*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
1711 				netmap_mem_bufsize(na->nm_mem);
1712 			ND("%s h %d c %d t %d", kring->name,
1713 				ring->head, ring->cur, ring->tail);
1714 			ND("initializing slots for %s_ring", nm_txrx2str(txrx));
1715 			if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
1716 				/* this is a real ring */
1717 				if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
1718 					D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
1719 					goto cleanup;
1720 				}
1721 			} else {
1722 				/* this is a fake ring, set all indices to 0 */
1723 				netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
1724 			}
1725 		        /* ring info */
1726 		        *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
1727 		        *(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
1728 		}
1729 	}
1730 
1731 	NMA_UNLOCK(na->nm_mem);
1732 
1733 	return 0;
1734 
1735 cleanup:
1736 	netmap_free_rings(na);
1737 
1738 	NMA_UNLOCK(na->nm_mem);
1739 
1740 	return ENOMEM;
1741 }
1742 
1743 static void
1744 netmap_mem2_rings_delete(struct netmap_adapter *na)
1745 {
1746 	/* last instance, release bufs and rings */
1747 	NMA_LOCK(na->nm_mem);
1748 
1749 	netmap_free_rings(na);
1750 
1751 	NMA_UNLOCK(na->nm_mem);
1752 }
1753 
1754 
1755 /* call with NMA_LOCK held */
1756 /*
1757  * Allocate the per-fd structure netmap_if.
1758  *
1759  * We assume that the configuration stored in na
1760  * (number of tx/rx rings and descs) does not change while
1761  * the interface is in netmap mode.
1762  */
1763 static struct netmap_if *
1764 netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
1765 {
1766 	struct netmap_if *nifp;
1767 	ssize_t base; /* handy for relative offsets between rings and nifp */
1768 	u_int i, len, n[NR_TXRX], ntot;
1769 	enum txrx t;
1770 
1771 	ntot = 0;
1772 	for_rx_tx(t) {
1773 		/* account for the (eventually fake) host rings */
1774 		n[t] = nma_get_nrings(na, t) + 1;
1775 		ntot += n[t];
1776 	}
1777 	/*
1778 	 * the descriptor is followed inline by an array of offsets
1779 	 * to the tx and rx rings in the shared memory region.
1780 	 */
1781 
1782 	NMA_LOCK(na->nm_mem);
1783 
1784 	len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
1785 	nifp = netmap_if_malloc(na->nm_mem, len);
1786 	if (nifp == NULL) {
1787 		NMA_UNLOCK(na->nm_mem);
1788 		return NULL;
1789 	}
1790 
1791 	/* initialize base fields -- override const */
1792 	*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
1793 	*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
1794 	strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
1795 
1796 	/*
1797 	 * fill the slots for the rx and tx rings. They contain the offset
1798 	 * between the ring and nifp, so the information is usable in
1799 	 * userspace to reach the ring from the nifp.
1800 	 */
1801 	base = netmap_if_offset(na->nm_mem, nifp);
1802 	for (i = 0; i < n[NR_TX]; i++) {
1803 		/* XXX instead of ofs == 0 maybe use the offset of an error
1804 		 * ring, like we do for buffers? */
1805 		ssize_t ofs = 0;
1806 
1807 		if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX]
1808 				&& i < priv->np_qlast[NR_TX]) {
1809 			ofs = netmap_ring_offset(na->nm_mem,
1810 						 na->tx_rings[i].ring) - base;
1811 		}
1812 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
1813 	}
1814 	for (i = 0; i < n[NR_RX]; i++) {
1815 		/* XXX instead of ofs == 0 maybe use the offset of an error
1816 		 * ring, like we do for buffers? */
1817 		ssize_t ofs = 0;
1818 
1819 		if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX]
1820 				&& i < priv->np_qlast[NR_RX]) {
1821 			ofs = netmap_ring_offset(na->nm_mem,
1822 						 na->rx_rings[i].ring) - base;
1823 		}
1824 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
1825 	}
1826 
1827 	NMA_UNLOCK(na->nm_mem);
1828 
1829 	return (nifp);
1830 }
1831 
1832 static void
1833 netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
1834 {
1835 	if (nifp == NULL)
1836 		/* nothing to do */
1837 		return;
1838 	NMA_LOCK(na->nm_mem);
1839 	if (nifp->ni_bufs_head)
1840 		netmap_extra_free(na, nifp->ni_bufs_head);
1841 	netmap_if_free(na->nm_mem, nifp);
1842 
1843 	NMA_UNLOCK(na->nm_mem);
1844 }
1845 
1846 static void
1847 netmap_mem2_deref(struct netmap_mem_d *nmd)
1848 {
1849 
1850 	nmd->active--;
1851 	if (!nmd->active)
1852 		nmd->nm_grp = -1;
1853 	if (netmap_verbose)
1854 		D("active = %d", nmd->active);
1855 
1856 }
1857 
1858 struct netmap_mem_ops netmap_mem_global_ops = {
1859 	.nmd_get_lut = netmap_mem2_get_lut,
1860 	.nmd_get_info = netmap_mem2_get_info,
1861 	.nmd_ofstophys = netmap_mem2_ofstophys,
1862 	.nmd_config = netmap_mem2_config,
1863 	.nmd_finalize = netmap_mem2_finalize,
1864 	.nmd_deref = netmap_mem2_deref,
1865 	.nmd_delete = netmap_mem2_delete,
1866 	.nmd_if_offset = netmap_mem2_if_offset,
1867 	.nmd_if_new = netmap_mem2_if_new,
1868 	.nmd_if_delete = netmap_mem2_if_delete,
1869 	.nmd_rings_create = netmap_mem2_rings_create,
1870 	.nmd_rings_delete = netmap_mem2_rings_delete
1871 };
1872 
1873 int
1874 netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
1875 {
1876 	uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
1877 	struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
1878 	struct netmap_pools_info pi;
1879 	unsigned int memsize;
1880 	uint16_t memid;
1881 	int ret;
1882 
1883 	ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid);
1884 	if (ret) {
1885 		return ret;
1886 	}
1887 
1888 	pi.memsize = memsize;
1889 	pi.memid = memid;
1890 	NMA_LOCK(nmd);
1891 	pi.if_pool_offset = 0;
1892 	pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
1893 	pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
1894 
1895 	pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal;
1896 	pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal;
1897 	pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize;
1898 
1899 	pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal +
1900 			     nmd->pools[NETMAP_RING_POOL].memtotal;
1901 	pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
1902 	pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
1903 	NMA_UNLOCK(nmd);
1904 
1905 	ret = copyout(&pi, upi, sizeof(pi));
1906 	if (ret) {
1907 		return ret;
1908 	}
1909 
1910 	return 0;
1911 }
1912 
1913 #ifdef WITH_PTNETMAP_GUEST
1914 struct mem_pt_if {
1915 	struct mem_pt_if *next;
1916 	struct ifnet *ifp;
1917 	unsigned int nifp_offset;
1918 };
1919 
1920 /* Netmap allocator for ptnetmap guests. */
1921 struct netmap_mem_ptg {
1922 	struct netmap_mem_d up;
1923 
1924 	vm_paddr_t nm_paddr;            /* physical address in the guest */
1925 	void *nm_addr;                  /* virtual address in the guest */
1926 	struct netmap_lut buf_lut;      /* lookup table for BUF pool in the guest */
1927 	nm_memid_t host_mem_id;         /* allocator identifier in the host */
1928 	struct ptnetmap_memdev *ptn_dev;/* ptnetmap memdev */
1929 	struct mem_pt_if *pt_ifs;	/* list of interfaces in passthrough */
1930 };
1931 
1932 /* Link a passthrough interface to a passthrough netmap allocator. */
1933 static int
1934 netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
1935 			    unsigned int nifp_offset)
1936 {
1937 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1938 	struct mem_pt_if *ptif = nm_os_malloc(sizeof(*ptif));
1939 
1940 	if (!ptif) {
1941 		return ENOMEM;
1942 	}
1943 
1944 	NMA_LOCK(nmd);
1945 
1946 	ptif->ifp = ifp;
1947 	ptif->nifp_offset = nifp_offset;
1948 
1949 	if (ptnmd->pt_ifs) {
1950 		ptif->next = ptnmd->pt_ifs;
1951 	}
1952 	ptnmd->pt_ifs = ptif;
1953 
1954 	NMA_UNLOCK(nmd);
1955 
1956 	D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
1957 
1958 	return 0;
1959 }
1960 
1961 /* Called with NMA_LOCK(nmd) held. */
1962 static struct mem_pt_if *
1963 netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp)
1964 {
1965 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1966 	struct mem_pt_if *curr;
1967 
1968 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
1969 		if (curr->ifp == ifp) {
1970 			return curr;
1971 		}
1972 	}
1973 
1974 	return NULL;
1975 }
1976 
1977 /* Unlink a passthrough interface from a passthrough netmap allocator. */
1978 int
1979 netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
1980 {
1981 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
1982 	struct mem_pt_if *prev = NULL;
1983 	struct mem_pt_if *curr;
1984 	int ret = -1;
1985 
1986 	NMA_LOCK(nmd);
1987 
1988 	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
1989 		if (curr->ifp == ifp) {
1990 			if (prev) {
1991 				prev->next = curr->next;
1992 			} else {
1993 				ptnmd->pt_ifs = curr->next;
1994 			}
1995 			D("removed (ifp=%p,nifp_offset=%u)",
1996 			  curr->ifp, curr->nifp_offset);
1997 			nm_os_free(curr);
1998 			ret = 0;
1999 			break;
2000 		}
2001 		prev = curr;
2002 	}
2003 
2004 	NMA_UNLOCK(nmd);
2005 
2006 	return ret;
2007 }
2008 
2009 static int
2010 netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
2011 {
2012 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2013 
2014 	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
2015 		return EINVAL;
2016 	}
2017 
2018 	*lut = ptnmd->buf_lut;
2019 	return 0;
2020 }
2021 
2022 static int
2023 netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size,
2024 			     u_int *memflags, uint16_t *id)
2025 {
2026 	int error = 0;
2027 
2028 	NMA_LOCK(nmd);
2029 
2030 	error = nmd->ops->nmd_config(nmd);
2031 	if (error)
2032 		goto out;
2033 
2034 	if (size)
2035 		*size = nmd->nm_totalsize;
2036 	if (memflags)
2037 		*memflags = nmd->flags;
2038 	if (id)
2039 		*id = nmd->nm_id;
2040 
2041 out:
2042 	NMA_UNLOCK(nmd);
2043 
2044 	return error;
2045 }
2046 
2047 static vm_paddr_t
2048 netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
2049 {
2050 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2051 	vm_paddr_t paddr;
2052 	/* if the offset is valid, just return csb->base_addr + off */
2053 	paddr = (vm_paddr_t)(ptnmd->nm_paddr + off);
2054 	ND("off %lx padr %lx", off, (unsigned long)paddr);
2055 	return paddr;
2056 }
2057 
2058 static int
2059 netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
2060 {
2061 	/* nothing to do, we are configured on creation
2062 	 * and configuration never changes thereafter
2063 	 */
2064 	return 0;
2065 }
2066 
2067 static int
2068 netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
2069 {
2070 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2071 	uint64_t mem_size;
2072 	uint32_t bufsize;
2073 	uint32_t nbuffers;
2074 	uint32_t poolofs;
2075 	vm_paddr_t paddr;
2076 	char *vaddr;
2077 	int i;
2078 	int error = 0;
2079 
2080 	nmd->active++;
2081 
2082 	if (nmd->flags & NETMAP_MEM_FINALIZED)
2083 		goto out;
2084 
2085 	if (ptnmd->ptn_dev == NULL) {
2086 		D("ptnetmap memdev not attached");
2087 		error = ENOMEM;
2088 		goto err;
2089 	}
2090 	/* Map memory through ptnetmap-memdev BAR. */
2091 	error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
2092 				      &ptnmd->nm_addr, &mem_size);
2093 	if (error)
2094 		goto err;
2095 
2096         /* Initialize the lut using the information contained in the
2097 	 * ptnetmap memory device. */
2098         bufsize = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2099 					 PTNET_MDEV_IO_BUF_POOL_OBJSZ);
2100         nbuffers = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2101 					 PTNET_MDEV_IO_BUF_POOL_OBJNUM);
2102 
2103 	/* allocate the lut */
2104 	if (ptnmd->buf_lut.lut == NULL) {
2105 		D("allocating lut");
2106 		ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers);
2107 		if (ptnmd->buf_lut.lut == NULL) {
2108 			D("lut allocation failed");
2109 			return ENOMEM;
2110 		}
2111 	}
2112 
2113 	/* we have physically contiguous memory mapped through PCI BAR */
2114 	poolofs = nm_os_pt_memdev_ioread(ptnmd->ptn_dev,
2115 					 PTNET_MDEV_IO_BUF_POOL_OFS);
2116 	vaddr = (char *)(ptnmd->nm_addr) + poolofs;
2117 	paddr = ptnmd->nm_paddr + poolofs;
2118 
2119 	for (i = 0; i < nbuffers; i++) {
2120 		ptnmd->buf_lut.lut[i].vaddr = vaddr;
2121 		ptnmd->buf_lut.lut[i].paddr = paddr;
2122 		vaddr += bufsize;
2123 		paddr += bufsize;
2124 	}
2125 
2126 	ptnmd->buf_lut.objtotal = nbuffers;
2127 	ptnmd->buf_lut.objsize = bufsize;
2128 	nmd->nm_totalsize = (unsigned int)mem_size;
2129 
2130 	nmd->flags |= NETMAP_MEM_FINALIZED;
2131 out:
2132 	return 0;
2133 err:
2134 	nmd->active--;
2135 	return error;
2136 }
2137 
2138 static void
2139 netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
2140 {
2141 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2142 
2143 	nmd->active--;
2144 	if (nmd->active <= 0 &&
2145 		(nmd->flags & NETMAP_MEM_FINALIZED)) {
2146 	    nmd->flags  &= ~NETMAP_MEM_FINALIZED;
2147 	    /* unmap ptnetmap-memdev memory */
2148 	    if (ptnmd->ptn_dev) {
2149 		nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
2150 	    }
2151 	    ptnmd->nm_addr = NULL;
2152 	    ptnmd->nm_paddr = 0;
2153 	}
2154 }
2155 
2156 static ssize_t
2157 netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr)
2158 {
2159 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
2160 
2161 	return (const char *)(vaddr) - (char *)(ptnmd->nm_addr);
2162 }
2163 
2164 static void
2165 netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
2166 {
2167 	if (nmd == NULL)
2168 		return;
2169 	if (netmap_verbose)
2170 		D("deleting %p", nmd);
2171 	if (nmd->active > 0)
2172 		D("bug: deleting mem allocator with active=%d!", nmd->active);
2173 	if (netmap_verbose)
2174 		D("done deleting %p", nmd);
2175 	NMA_LOCK_DESTROY(nmd);
2176 	nm_os_free(nmd);
2177 }
2178 
2179 static struct netmap_if *
2180 netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
2181 {
2182 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2183 	struct mem_pt_if *ptif;
2184 	struct netmap_if *nifp = NULL;
2185 
2186 	NMA_LOCK(na->nm_mem);
2187 
2188 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2189 	if (ptif == NULL) {
2190 		D("Error: interface %p is not in passthrough", na->ifp);
2191 		goto out;
2192 	}
2193 
2194 	nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
2195 				    ptif->nifp_offset);
2196 	NMA_UNLOCK(na->nm_mem);
2197 out:
2198 	return nifp;
2199 }
2200 
2201 static void
2202 netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
2203 {
2204 	struct mem_pt_if *ptif;
2205 
2206 	NMA_LOCK(na->nm_mem);
2207 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2208 	if (ptif == NULL) {
2209 		D("Error: interface %p is not in passthrough", na->ifp);
2210 	}
2211 	NMA_UNLOCK(na->nm_mem);
2212 }
2213 
2214 static int
2215 netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
2216 {
2217 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2218 	struct mem_pt_if *ptif;
2219 	struct netmap_if *nifp;
2220 	int i, error = -1;
2221 
2222 	NMA_LOCK(na->nm_mem);
2223 
2224 	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
2225 	if (ptif == NULL) {
2226 		D("Error: interface %p is not in passthrough", na->ifp);
2227 		goto out;
2228 	}
2229 
2230 
2231 	/* point each kring to the corresponding backend ring */
2232 	nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
2233 	for (i = 0; i <= na->num_tx_rings; i++) {
2234 		struct netmap_kring *kring = na->tx_rings + i;
2235 		if (kring->ring)
2236 			continue;
2237 		kring->ring = (struct netmap_ring *)
2238 			((char *)nifp + nifp->ring_ofs[i]);
2239 	}
2240 	for (i = 0; i <= na->num_rx_rings; i++) {
2241 		struct netmap_kring *kring = na->rx_rings + i;
2242 		if (kring->ring)
2243 			continue;
2244 		kring->ring = (struct netmap_ring *)
2245 			((char *)nifp +
2246 			 nifp->ring_ofs[i + na->num_tx_rings + 1]);
2247 	}
2248 
2249 	error = 0;
2250 out:
2251 	NMA_UNLOCK(na->nm_mem);
2252 
2253 	return error;
2254 }
2255 
2256 static void
2257 netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
2258 {
2259 	/* TODO: remove?? */
2260 #if 0
2261 	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
2262 	struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem,
2263 								na->ifp);
2264 #endif
2265 }
2266 
2267 static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
2268 	.nmd_get_lut = netmap_mem_pt_guest_get_lut,
2269 	.nmd_get_info = netmap_mem_pt_guest_get_info,
2270 	.nmd_ofstophys = netmap_mem_pt_guest_ofstophys,
2271 	.nmd_config = netmap_mem_pt_guest_config,
2272 	.nmd_finalize = netmap_mem_pt_guest_finalize,
2273 	.nmd_deref = netmap_mem_pt_guest_deref,
2274 	.nmd_if_offset = netmap_mem_pt_guest_if_offset,
2275 	.nmd_delete = netmap_mem_pt_guest_delete,
2276 	.nmd_if_new = netmap_mem_pt_guest_if_new,
2277 	.nmd_if_delete = netmap_mem_pt_guest_if_delete,
2278 	.nmd_rings_create = netmap_mem_pt_guest_rings_create,
2279 	.nmd_rings_delete = netmap_mem_pt_guest_rings_delete
2280 };
2281 
2282 /* Called with nm_mem_list_lock held. */
2283 static struct netmap_mem_d *
2284 netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
2285 {
2286 	struct netmap_mem_d *mem = NULL;
2287 	struct netmap_mem_d *scan = netmap_last_mem_d;
2288 
2289 	do {
2290 		/* find ptnetmap allocator through host ID */
2291 		if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
2292 			((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) {
2293 			mem = scan;
2294 			mem->refcount++;
2295 			NM_DBG_REFC(mem, __FUNCTION__, __LINE__);
2296 			break;
2297 		}
2298 		scan = scan->next;
2299 	} while (scan != netmap_last_mem_d);
2300 
2301 	return mem;
2302 }
2303 
2304 /* Called with nm_mem_list_lock held. */
2305 static struct netmap_mem_d *
2306 netmap_mem_pt_guest_create(nm_memid_t mem_id)
2307 {
2308 	struct netmap_mem_ptg *ptnmd;
2309 	int err = 0;
2310 
2311 	ptnmd = nm_os_malloc(sizeof(struct netmap_mem_ptg));
2312 	if (ptnmd == NULL) {
2313 		err = ENOMEM;
2314 		goto error;
2315 	}
2316 
2317 	ptnmd->up.ops = &netmap_mem_pt_guest_ops;
2318 	ptnmd->host_mem_id = mem_id;
2319 	ptnmd->pt_ifs = NULL;
2320 
2321         /* Assign new id in the guest (We have the lock) */
2322 	err = nm_mem_assign_id_locked(&ptnmd->up);
2323 	if (err)
2324 		goto error;
2325 
2326 	ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED;
2327 	ptnmd->up.flags |= NETMAP_MEM_IO;
2328 
2329 	NMA_LOCK_INIT(&ptnmd->up);
2330 
2331 	snprintf(ptnmd->up.name, NM_MEM_NAMESZ, "%d", ptnmd->up.nm_id);
2332 
2333 
2334 	return &ptnmd->up;
2335 error:
2336 	netmap_mem_pt_guest_delete(&ptnmd->up);
2337 	return NULL;
2338 }
2339 
2340 /*
2341  * find host id in guest allocators and create guest allocator
2342  * if it is not there
2343  */
2344 static struct netmap_mem_d *
2345 netmap_mem_pt_guest_get(nm_memid_t mem_id)
2346 {
2347 	struct netmap_mem_d *nmd;
2348 
2349 	NM_MTX_LOCK(nm_mem_list_lock);
2350 	nmd = netmap_mem_pt_guest_find_memid(mem_id);
2351 	if (nmd == NULL) {
2352 		nmd = netmap_mem_pt_guest_create(mem_id);
2353 	}
2354 	NM_MTX_UNLOCK(nm_mem_list_lock);
2355 
2356 	return nmd;
2357 }
2358 
2359 /*
2360  * The guest allocator can be created by ptnetmap_memdev (during the device
2361  * attach) or by ptnetmap device (ptnet), during the netmap_attach.
2362  *
2363  * The order is not important (we have different order in LINUX and FreeBSD).
2364  * The first one, creates the device, and the second one simply attaches it.
2365  */
2366 
2367 /* Called when ptnetmap_memdev is attaching, to attach a new allocator in
2368  * the guest */
2369 struct netmap_mem_d *
2370 netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t mem_id)
2371 {
2372 	struct netmap_mem_d *nmd;
2373 	struct netmap_mem_ptg *ptnmd;
2374 
2375 	nmd = netmap_mem_pt_guest_get(mem_id);
2376 
2377 	/* assign this device to the guest allocator */
2378 	if (nmd) {
2379 		ptnmd = (struct netmap_mem_ptg *)nmd;
2380 		ptnmd->ptn_dev = ptn_dev;
2381 	}
2382 
2383 	return nmd;
2384 }
2385 
2386 /* Called when ptnet device is attaching */
2387 struct netmap_mem_d *
2388 netmap_mem_pt_guest_new(struct ifnet *ifp,
2389 			unsigned int nifp_offset,
2390 			unsigned int memid)
2391 {
2392 	struct netmap_mem_d *nmd;
2393 
2394 	if (ifp == NULL) {
2395 		return NULL;
2396 	}
2397 
2398 	nmd = netmap_mem_pt_guest_get((nm_memid_t)memid);
2399 
2400 	if (nmd) {
2401 		netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset);
2402 	}
2403 
2404 	return nmd;
2405 }
2406 
2407 #endif /* WITH_PTNETMAP_GUEST */
2408