1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
5 * Copyright (c) 2013 EMC Corp.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * From:
32 * $NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $
33 * $NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $
34 */
35
36 /*
37 * reference:
38 * - Magazines and Vmem: Extending the Slab Allocator
39 * to Many CPUs and Arbitrary Resources
40 * http://www.usenix.org/event/usenix01/bonwick.html
41 */
42
43 #include <sys/cdefs.h>
44
45 #ifdef _KERNEL
46
47 #include "opt_ddb.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/queue.h>
53 #include <sys/callout.h>
54 #include <sys/hash.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/mutex.h>
58 #include <sys/smp.h>
59 #include <sys/condvar.h>
60 #include <sys/sysctl.h>
61 #include <sys/taskqueue.h>
62 #include <sys/vmem.h>
63 #include <sys/vmmeter.h>
64
65 #include "opt_vm.h"
66
67 #include <vm/uma.h>
68 #include <vm/vm.h>
69 #include <vm/pmap.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_param.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_phys.h>
78 #include <vm/vm_pagequeue.h>
79 #include <vm/uma_int.h>
80
81 #else /* _KERNEL */
82
83 #include <sys/types.h>
84 #include <sys/queue.h>
85 #include <sys/hash.h>
86 #include <sys/vmem.h>
87 #include <assert.h>
88 #include <errno.h>
89 #include <pthread.h>
90 #include <pthread_np.h>
91 #include <stdbool.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <strings.h>
95
96 #define KASSERT(a, b)
97 #define MPASS(a)
98 #define WITNESS_WARN(a, b, c)
99 #define panic(...) assert(0)
100
101 #endif /* _KERNEL */
102
103 #define VMEM_OPTORDER 5
104 #define VMEM_OPTVALUE (1 << VMEM_OPTORDER)
105 #define VMEM_MAXORDER \
106 (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER)
107
108 #define VMEM_HASHSIZE_MIN 16
109 #define VMEM_HASHSIZE_MAX 131072
110
111 #define VMEM_QCACHE_IDX_MAX 16
112
113 #define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
114
115 #define QC_NAME_MAX 16
116
117 /*
118 * Data structures private to vmem.
119 */
120 #ifdef _KERNEL
121
122 #define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \
123 M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
124
125 #define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
126
127 MALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures");
128
129 #else /* _KERNEL */
130
131 /* bit-compat with kernel */
132 #define M_ZERO 0
133 #define M_NOVM 0
134 #define M_USE_RESERVE 0
135
136 #define VMEM_FLAGS (M_NOWAIT | M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
137
138 #define BT_FLAGS 0
139
140 #endif /* _KERNEL */
141
142 typedef struct vmem_btag bt_t;
143
144 TAILQ_HEAD(vmem_seglist, vmem_btag);
145 LIST_HEAD(vmem_freelist, vmem_btag);
146 LIST_HEAD(vmem_hashlist, vmem_btag);
147
148 #ifdef _KERNEL
149 struct qcache {
150 uma_zone_t qc_cache;
151 vmem_t *qc_vmem;
152 vmem_size_t qc_size;
153 char qc_name[QC_NAME_MAX];
154 };
155 typedef struct qcache qcache_t;
156 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
157 #endif
158
159 #define VMEM_NAME_MAX 16
160
161 /* boundary tag */
162 struct vmem_btag {
163 TAILQ_ENTRY(vmem_btag) bt_seglist;
164 union {
165 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
166 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
167 } bt_u;
168 #define bt_hashlist bt_u.u_hashlist
169 #define bt_freelist bt_u.u_freelist
170 vmem_addr_t bt_start;
171 vmem_size_t bt_size;
172 int bt_type;
173 };
174
175 /* vmem arena */
176 struct vmem {
177 #ifdef _KERNEL
178 struct mtx_padalign vm_lock;
179 struct cv vm_cv;
180 #else
181 pthread_mutex_t vm_lock;
182 pthread_cond_t vm_cv;
183 #endif
184 char vm_name[VMEM_NAME_MAX+1];
185 LIST_ENTRY(vmem) vm_alllist;
186 struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN];
187 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
188 struct vmem_seglist vm_seglist;
189 struct vmem_hashlist *vm_hashlist;
190 vmem_size_t vm_hashsize;
191
192 /* Constant after init */
193 vmem_size_t vm_qcache_max;
194 vmem_size_t vm_quantum_mask;
195 vmem_size_t vm_import_quantum;
196 int vm_quantum_shift;
197
198 /* Written on alloc/free */
199 LIST_HEAD(, vmem_btag) vm_freetags;
200 int vm_nfreetags;
201 int vm_nbusytag;
202 vmem_size_t vm_inuse;
203 vmem_size_t vm_size;
204 vmem_size_t vm_limit;
205 struct vmem_btag vm_cursor;
206
207 /* Used on import. */
208 vmem_import_t *vm_importfn;
209 vmem_release_t *vm_releasefn;
210 void *vm_arg;
211
212 /* Space exhaustion callback. */
213 vmem_reclaim_t *vm_reclaimfn;
214
215 #ifdef _KERNEL
216 /* quantum cache */
217 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
218 #endif
219 };
220
221 #define BT_TYPE_SPAN 1 /* Allocated from importfn */
222 #define BT_TYPE_SPAN_STATIC 2 /* vmem_add() or create. */
223 #define BT_TYPE_FREE 3 /* Available space. */
224 #define BT_TYPE_BUSY 4 /* Used space. */
225 #define BT_TYPE_CURSOR 5 /* Cursor for nextfit allocations. */
226 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
227
228 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1)
229
230 #ifdef _KERNEL
231 #if defined(DIAGNOSTIC)
232 static int enable_vmem_check = 0;
233 SYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN,
234 &enable_vmem_check, 0, "Enable vmem check");
235 static void vmem_check(vmem_t *);
236 #endif
237
238 static struct callout vmem_periodic_ch;
239 static int vmem_periodic_interval;
240 static struct task vmem_periodic_wk;
241
242 static struct mtx_padalign __exclusive_cache_line vmem_list_lock;
243 static uma_zone_t vmem_zone;
244
245 #else /* _KERNEL */
246 static pthread_mutex_t vmem_list_lock = PTHREAD_MUTEX_INITIALIZER;
247
248 #endif /* _KERNEL */
249
250 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
251
252 /* ---- misc */
253 #ifdef _KERNEL
254 #define VMEM_LIST_LOCK() mtx_lock(&vmem_list_lock)
255 #define VMEM_LIST_UNLOCK() mtx_unlock(&vmem_list_lock)
256
257 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan)
258 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv)
259 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock)
260 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv)
261
262 #define VMEM_LOCK(vm) mtx_lock(&vm->vm_lock)
263 #define VMEM_UNLOCK(vm) mtx_unlock(&vm->vm_lock)
264 #define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF)
265 #define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock)
266 #define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED);
267 #else /* _KERNEL */
268 #define VMEM_LIST_LOCK() pthread_mutex_lock(&vmem_list_lock)
269 #define VMEM_LIST_UNLOCK() pthread_mutex_unlock(&vmem_list_lock)
270
271 #define VMEM_CONDVAR_INIT(vm, wchan) pthread_cond_init(&vm->vm_cv, NULL)
272 #define VMEM_CONDVAR_DESTROY(vm) pthread_cond_destroy(&vm->vm_cv)
273 #define VMEM_CONDVAR_WAIT(vm) pthread_cond_wait(&vm->vm_cv, &vm->vm_lock)
274 #define VMEM_CONDVAR_BROADCAST(vm) pthread_cond_broadcast(&vm->vm_cv)
275
276 #define VMEM_LOCK(vm) pthread_mutex_lock(&vm->vm_lock)
277 #define VMEM_UNLOCK(vm) pthread_mutex_unlock(&vm->vm_lock)
278 #define VMEM_LOCK_INIT(vm, name) pthread_mutex_init(&vm->vm_lock, NULL)
279 #define VMEM_LOCK_DESTROY(vm) pthread_mutex_destroy(&vm->vm_lock)
280 #define VMEM_ASSERT_LOCKED(vm) pthread_mutex_isowned_np(&vm->vm_lock)
281 #endif /* _KERNEL */
282
283 #define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align)))
284
285 #define VMEM_CROSS_P(addr1, addr2, boundary) \
286 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
287
288 #define ORDER2SIZE(order) ((order) < VMEM_OPTVALUE ? \
289 (vmem_size_t)((order) + 1) : \
290 (vmem_size_t)1 << ((order) - (VMEM_OPTVALUE - VMEM_OPTORDER - 1)))
291 #define SIZE2ORDER(size) ((size) <= VMEM_OPTVALUE ? \
292 (int)((size) - 1) : \
293 (flsl(size) + (VMEM_OPTVALUE - VMEM_OPTORDER - 2)))
294
295 /*
296 * Maximum number of boundary tags that may be required to satisfy an
297 * allocation. Two may be required to import. Another two may be
298 * required to clip edges.
299 */
300 #define BT_MAXALLOC 4
301
302 /*
303 * Max free limits the number of locally cached boundary tags. We
304 * just want to avoid hitting the zone allocator for every call.
305 */
306 #define BT_MAXFREE (BT_MAXALLOC * 8)
307
308 #ifdef _KERNEL
309 /* Allocator for boundary tags. */
310 static uma_zone_t vmem_bt_zone;
311
312 /* boot time arena storage. */
313 static struct vmem kernel_arena_storage;
314 static struct vmem buffer_arena_storage;
315 static struct vmem transient_arena_storage;
316 vmem_t *kernel_arena = &kernel_arena_storage;
317 vmem_t *buffer_arena = &buffer_arena_storage;
318 vmem_t *transient_arena = &transient_arena_storage;
319
320 #ifdef DEBUG_MEMGUARD
321 static struct vmem memguard_arena_storage;
322 vmem_t *memguard_arena = &memguard_arena_storage;
323 #endif /* DEBUG_MEMGUARD */
324 #endif /* _KERNEL */
325
326 static bool
bt_isbusy(bt_t * bt)327 bt_isbusy(bt_t *bt)
328 {
329 return (bt->bt_type == BT_TYPE_BUSY);
330 }
331
332 static bool
bt_isfree(bt_t * bt)333 bt_isfree(bt_t *bt)
334 {
335 return (bt->bt_type == BT_TYPE_FREE);
336 }
337
338 /*
339 * Fill the vmem's boundary tag cache. We guarantee that boundary tag
340 * allocation will not fail once bt_fill() passes. To do so we cache
341 * at least the maximum possible tag allocations in the arena.
342 */
343 static __noinline int
_bt_fill(vmem_t * vm,int flags __unused)344 _bt_fill(vmem_t *vm, int flags __unused)
345 {
346 bt_t *bt;
347
348 VMEM_ASSERT_LOCKED(vm);
349
350 #ifdef _KERNEL
351 /*
352 * Only allow the kernel arena and arenas derived from kernel arena to
353 * dip into reserve tags. They are where new tags come from.
354 */
355 flags &= BT_FLAGS;
356 if (vm != kernel_arena && vm->vm_arg != kernel_arena)
357 flags &= ~M_USE_RESERVE;
358 #endif
359
360 /*
361 * Loop until we meet the reserve. To minimize the lock shuffle
362 * and prevent simultaneous fills we first try a NOWAIT regardless
363 * of the caller's flags. Specify M_NOVM so we don't recurse while
364 * holding a vmem lock.
365 */
366 while (vm->vm_nfreetags < BT_MAXALLOC) {
367 #ifdef _KERNEL
368 bt = uma_zalloc(vmem_bt_zone,
369 (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM);
370 #else
371 bt = malloc(sizeof(struct vmem_btag));
372 #endif
373 if (bt == NULL) {
374 #ifdef _KERNEL
375 VMEM_UNLOCK(vm);
376 bt = uma_zalloc(vmem_bt_zone, flags);
377 VMEM_LOCK(vm);
378 #endif
379 if (bt == NULL)
380 break;
381 }
382 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
383 vm->vm_nfreetags++;
384 }
385
386 if (vm->vm_nfreetags < BT_MAXALLOC)
387 return ENOMEM;
388
389 return 0;
390 }
391
392 static inline int
bt_fill(vmem_t * vm,int flags)393 bt_fill(vmem_t *vm, int flags)
394 {
395 if (vm->vm_nfreetags >= BT_MAXALLOC)
396 return (0);
397 return (_bt_fill(vm, flags));
398 }
399
400 /*
401 * Pop a tag off of the freetag stack.
402 */
403 static bt_t *
bt_alloc(vmem_t * vm)404 bt_alloc(vmem_t *vm)
405 {
406 bt_t *bt;
407
408 VMEM_ASSERT_LOCKED(vm);
409 bt = LIST_FIRST(&vm->vm_freetags);
410 MPASS(bt != NULL);
411 LIST_REMOVE(bt, bt_freelist);
412 vm->vm_nfreetags--;
413
414 return bt;
415 }
416
417 /*
418 * Trim the per-vmem free list. Returns with the lock released to
419 * avoid allocator recursions.
420 */
421 static void
bt_freetrim(vmem_t * vm,int freelimit)422 bt_freetrim(vmem_t *vm, int freelimit)
423 {
424 LIST_HEAD(, vmem_btag) freetags;
425 bt_t *bt;
426
427 LIST_INIT(&freetags);
428 VMEM_ASSERT_LOCKED(vm);
429 while (vm->vm_nfreetags > freelimit) {
430 bt = LIST_FIRST(&vm->vm_freetags);
431 LIST_REMOVE(bt, bt_freelist);
432 vm->vm_nfreetags--;
433 LIST_INSERT_HEAD(&freetags, bt, bt_freelist);
434 }
435 VMEM_UNLOCK(vm);
436 while ((bt = LIST_FIRST(&freetags)) != NULL) {
437 LIST_REMOVE(bt, bt_freelist);
438 #ifdef _KERNEL
439 uma_zfree(vmem_bt_zone, bt);
440 #else
441 free(bt);
442 #endif
443 }
444 }
445
446 static inline void
bt_free(vmem_t * vm,bt_t * bt)447 bt_free(vmem_t *vm, bt_t *bt)
448 {
449
450 VMEM_ASSERT_LOCKED(vm);
451 MPASS(LIST_FIRST(&vm->vm_freetags) != bt);
452 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
453 vm->vm_nfreetags++;
454 }
455
456 /*
457 * Hide MAXALLOC tags before dropping the arena lock to ensure that a
458 * concurrent allocation attempt does not grab them.
459 */
460 static void
bt_save(vmem_t * vm)461 bt_save(vmem_t *vm)
462 {
463 KASSERT(vm->vm_nfreetags >= BT_MAXALLOC,
464 ("%s: insufficient free tags %d", __func__, vm->vm_nfreetags));
465 vm->vm_nfreetags -= BT_MAXALLOC;
466 }
467
468 static void
bt_restore(vmem_t * vm)469 bt_restore(vmem_t *vm)
470 {
471 vm->vm_nfreetags += BT_MAXALLOC;
472 }
473
474 /*
475 * freelist[0] ... [1, 1]
476 * freelist[1] ... [2, 2]
477 * :
478 * freelist[29] ... [30, 30]
479 * freelist[30] ... [31, 31]
480 * freelist[31] ... [32, 63]
481 * freelist[33] ... [64, 127]
482 * :
483 * freelist[n] ... [(1 << (n - 26)), (1 << (n - 25)) - 1]
484 * :
485 */
486
487 static struct vmem_freelist *
bt_freehead_tofree(vmem_t * vm,vmem_size_t size)488 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
489 {
490 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
491 const int idx = SIZE2ORDER(qsize);
492
493 MPASS(size != 0 && qsize != 0);
494 MPASS((size & vm->vm_quantum_mask) == 0);
495 MPASS(idx >= 0);
496 MPASS(idx < VMEM_MAXORDER);
497
498 return &vm->vm_freelist[idx];
499 }
500
501 /*
502 * bt_freehead_toalloc: return the freelist for the given size and allocation
503 * strategy.
504 *
505 * For M_FIRSTFIT, return the list in which any blocks are large enough
506 * for the requested size. otherwise, return the list which can have blocks
507 * large enough for the requested size.
508 */
509 static struct vmem_freelist *
bt_freehead_toalloc(vmem_t * vm,vmem_size_t size,int strat)510 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat)
511 {
512 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
513 int idx = SIZE2ORDER(qsize);
514
515 MPASS(size != 0 && qsize != 0);
516 MPASS((size & vm->vm_quantum_mask) == 0);
517
518 if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) {
519 idx++;
520 /* check too large request? */
521 }
522 MPASS(idx >= 0);
523 MPASS(idx < VMEM_MAXORDER);
524
525 return &vm->vm_freelist[idx];
526 }
527
528 /* ---- boundary tag hash */
529
530 static struct vmem_hashlist *
bt_hashhead(vmem_t * vm,vmem_addr_t addr)531 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
532 {
533 struct vmem_hashlist *list;
534 unsigned int hash;
535
536 hash = hash32_buf(&addr, sizeof(addr), 0);
537 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
538
539 return list;
540 }
541
542 static bt_t *
bt_lookupbusy(vmem_t * vm,vmem_addr_t addr)543 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
544 {
545 struct vmem_hashlist *list;
546 bt_t *bt;
547
548 VMEM_ASSERT_LOCKED(vm);
549 list = bt_hashhead(vm, addr);
550 LIST_FOREACH(bt, list, bt_hashlist) {
551 if (bt->bt_start == addr) {
552 break;
553 }
554 }
555
556 return bt;
557 }
558
559 static void
bt_rembusy(vmem_t * vm,bt_t * bt)560 bt_rembusy(vmem_t *vm, bt_t *bt)
561 {
562
563 VMEM_ASSERT_LOCKED(vm);
564 MPASS(vm->vm_nbusytag > 0);
565 vm->vm_inuse -= bt->bt_size;
566 vm->vm_nbusytag--;
567 LIST_REMOVE(bt, bt_hashlist);
568 }
569
570 static void
bt_insbusy(vmem_t * vm,bt_t * bt)571 bt_insbusy(vmem_t *vm, bt_t *bt)
572 {
573 struct vmem_hashlist *list;
574
575 VMEM_ASSERT_LOCKED(vm);
576 MPASS(bt->bt_type == BT_TYPE_BUSY);
577
578 list = bt_hashhead(vm, bt->bt_start);
579 LIST_INSERT_HEAD(list, bt, bt_hashlist);
580 vm->vm_nbusytag++;
581 vm->vm_inuse += bt->bt_size;
582 }
583
584 /* ---- boundary tag list */
585
586 static void
bt_remseg(vmem_t * vm,bt_t * bt)587 bt_remseg(vmem_t *vm, bt_t *bt)
588 {
589
590 MPASS(bt->bt_type != BT_TYPE_CURSOR);
591 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
592 bt_free(vm, bt);
593 }
594
595 static void
bt_insseg(vmem_t * vm,bt_t * bt,bt_t * prev)596 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
597 {
598
599 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
600 }
601
602 static void
bt_insseg_tail(vmem_t * vm,bt_t * bt)603 bt_insseg_tail(vmem_t *vm, bt_t *bt)
604 {
605
606 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
607 }
608
609 static void
bt_remfree(vmem_t * vm __unused,bt_t * bt)610 bt_remfree(vmem_t *vm __unused, bt_t *bt)
611 {
612
613 MPASS(bt->bt_type == BT_TYPE_FREE);
614
615 LIST_REMOVE(bt, bt_freelist);
616 }
617
618 static void
bt_insfree(vmem_t * vm,bt_t * bt)619 bt_insfree(vmem_t *vm, bt_t *bt)
620 {
621 struct vmem_freelist *list;
622
623 list = bt_freehead_tofree(vm, bt->bt_size);
624 LIST_INSERT_HEAD(list, bt, bt_freelist);
625 }
626
627 /* ---- vmem internal functions */
628
629 #ifdef _KERNEL
630 /*
631 * Import from the arena into the quantum cache in UMA.
632 *
633 * We use VMEM_ADDR_QCACHE_MIN instead of 0: uma_zalloc() returns 0 to indicate
634 * failure, so UMA can't be used to cache a resource with value 0.
635 */
636 static int
qc_import(void * arg,void ** store,int cnt,int domain,int flags)637 qc_import(void *arg, void **store, int cnt, int domain, int flags)
638 {
639 qcache_t *qc;
640 vmem_addr_t addr;
641 int i;
642
643 KASSERT((flags & M_WAITOK) == 0, ("blocking allocation"));
644
645 qc = arg;
646 for (i = 0; i < cnt; i++) {
647 if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0,
648 VMEM_ADDR_QCACHE_MIN, VMEM_ADDR_MAX, flags, &addr) != 0)
649 break;
650 store[i] = (void *)addr;
651 }
652 return (i);
653 }
654
655 /*
656 * Release memory from the UMA cache to the arena.
657 */
658 static void
qc_release(void * arg,void ** store,int cnt)659 qc_release(void *arg, void **store, int cnt)
660 {
661 qcache_t *qc;
662 int i;
663
664 qc = arg;
665 for (i = 0; i < cnt; i++)
666 vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size);
667 }
668
669 static void
qc_init(vmem_t * vm,vmem_size_t qcache_max)670 qc_init(vmem_t *vm, vmem_size_t qcache_max)
671 {
672 qcache_t *qc;
673 vmem_size_t size;
674 int qcache_idx_max;
675 int i;
676
677 MPASS((qcache_max & vm->vm_quantum_mask) == 0);
678 qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift,
679 VMEM_QCACHE_IDX_MAX);
680 vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift;
681 for (i = 0; i < qcache_idx_max; i++) {
682 qc = &vm->vm_qcache[i];
683 size = (i + 1) << vm->vm_quantum_shift;
684 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
685 vm->vm_name, size);
686 qc->qc_vmem = vm;
687 qc->qc_size = size;
688 qc->qc_cache = uma_zcache_create(qc->qc_name, size,
689 NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 0);
690 MPASS(qc->qc_cache);
691 }
692 }
693
694 static void
qc_destroy(vmem_t * vm)695 qc_destroy(vmem_t *vm)
696 {
697 int qcache_idx_max;
698 int i;
699
700 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
701 for (i = 0; i < qcache_idx_max; i++)
702 uma_zdestroy(vm->vm_qcache[i].qc_cache);
703 }
704
705 static void
qc_drain(vmem_t * vm)706 qc_drain(vmem_t *vm)
707 {
708 int qcache_idx_max;
709 int i;
710
711 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
712 for (i = 0; i < qcache_idx_max; i++)
713 uma_zone_reclaim(vm->vm_qcache[i].qc_cache, UMA_RECLAIM_DRAIN);
714 }
715
716 #ifndef UMA_USE_DMAP
717
718 static struct mtx_padalign __exclusive_cache_line vmem_bt_lock;
719
720 /*
721 * vmem_bt_alloc: Allocate a new page of boundary tags.
722 *
723 * On architectures with UMA_USE_DMAP there is no recursion; no address
724 * space need be allocated to allocate boundary tags. For the others, we
725 * must handle recursion. Boundary tags are necessary to allocate new
726 * boundary tags.
727 *
728 * UMA guarantees that enough tags are held in reserve to allocate a new
729 * page of kva. We dip into this reserve by specifying M_USE_RESERVE only
730 * when allocating the page to hold new boundary tags. In this way the
731 * reserve is automatically filled by the allocation that uses the reserve.
732 *
733 * We still have to guarantee that the new tags are allocated atomically since
734 * many threads may try concurrently. The bt_lock provides this guarantee.
735 * We convert WAITOK allocations to NOWAIT and then handle the blocking here
736 * on failure. It's ok to return NULL for a WAITOK allocation as UMA will
737 * loop again after checking to see if we lost the race to allocate.
738 *
739 * There is a small race between vmem_bt_alloc() returning the page and the
740 * zone lock being acquired to add the page to the zone. For WAITOK
741 * allocations we just pause briefly. NOWAIT may experience a transient
742 * failure. To alleviate this we permit a small number of simultaneous
743 * fills to proceed concurrently so NOWAIT is less likely to fail unless
744 * we are really out of KVA.
745 */
746 static void *
vmem_bt_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * pflag,int wait)747 vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
748 int wait)
749 {
750 vmem_addr_t addr;
751
752 *pflag = UMA_SLAB_KERNEL;
753
754 /*
755 * Single thread boundary tag allocation so that the address space
756 * and memory are added in one atomic operation.
757 */
758 mtx_lock(&vmem_bt_lock);
759 if (vmem_xalloc(vm_dom[domain].vmd_kernel_arena, bytes, 0, 0, 0,
760 VMEM_ADDR_MIN, VMEM_ADDR_MAX,
761 M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) {
762 if (kmem_back_domain(domain, kernel_object, addr, bytes,
763 M_NOWAIT | M_USE_RESERVE) == 0) {
764 mtx_unlock(&vmem_bt_lock);
765 return ((void *)addr);
766 }
767 vmem_xfree(vm_dom[domain].vmd_kernel_arena, addr, bytes);
768 mtx_unlock(&vmem_bt_lock);
769 /*
770 * Out of memory, not address space. This may not even be
771 * possible due to M_USE_RESERVE page allocation.
772 */
773 if (wait & M_WAITOK)
774 vm_wait_domain(domain);
775 return (NULL);
776 }
777 mtx_unlock(&vmem_bt_lock);
778 /*
779 * We're either out of address space or lost a fill race.
780 */
781 if (wait & M_WAITOK)
782 pause("btalloc", 1);
783
784 return (NULL);
785 }
786 #endif
787
788 void
vmem_startup(void)789 vmem_startup(void)
790 {
791
792 mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF);
793 vmem_zone = uma_zcreate("vmem",
794 sizeof(struct vmem), NULL, NULL, NULL, NULL,
795 UMA_ALIGN_PTR, 0);
796 vmem_bt_zone = uma_zcreate("vmem btag",
797 sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
798 UMA_ALIGN_PTR, UMA_ZONE_VM);
799 #ifndef UMA_USE_DMAP
800 mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF);
801 uma_prealloc(vmem_bt_zone, BT_MAXALLOC);
802 /*
803 * Reserve enough tags to allocate new tags. We allow multiple
804 * CPUs to attempt to allocate new tags concurrently to limit
805 * false restarts in UMA. vmem_bt_alloc() allocates from a per-domain
806 * arena, which may involve importing a range from the kernel arena,
807 * so we need to keep at least 2 * BT_MAXALLOC tags reserved.
808 */
809 uma_zone_reserve(vmem_bt_zone, 2 * BT_MAXALLOC * mp_ncpus);
810 uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc);
811 #endif
812 }
813
814 static int
vmem_rehash(vmem_t * vm,vmem_size_t newhashsize)815 vmem_rehash(vmem_t *vm, vmem_size_t newhashsize)
816 {
817 bt_t *bt;
818 struct vmem_hashlist *newhashlist;
819 struct vmem_hashlist *oldhashlist;
820 vmem_size_t i, oldhashsize;
821
822 MPASS(newhashsize > 0);
823
824 newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize,
825 M_VMEM, M_NOWAIT);
826 if (newhashlist == NULL)
827 return ENOMEM;
828 for (i = 0; i < newhashsize; i++) {
829 LIST_INIT(&newhashlist[i]);
830 }
831
832 VMEM_LOCK(vm);
833 oldhashlist = vm->vm_hashlist;
834 oldhashsize = vm->vm_hashsize;
835 vm->vm_hashlist = newhashlist;
836 vm->vm_hashsize = newhashsize;
837 if (oldhashlist == NULL) {
838 VMEM_UNLOCK(vm);
839 return 0;
840 }
841 for (i = 0; i < oldhashsize; i++) {
842 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
843 bt_rembusy(vm, bt);
844 bt_insbusy(vm, bt);
845 }
846 }
847 VMEM_UNLOCK(vm);
848
849 if (oldhashlist != vm->vm_hash0)
850 free(oldhashlist, M_VMEM);
851
852 return 0;
853 }
854
855 static void
vmem_periodic_kick(void * dummy)856 vmem_periodic_kick(void *dummy)
857 {
858
859 taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk);
860 }
861
862 static void
vmem_periodic(void * unused,int pending)863 vmem_periodic(void *unused, int pending)
864 {
865 vmem_t *vm;
866 vmem_size_t desired;
867 vmem_size_t current;
868
869 VMEM_LIST_LOCK();
870 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
871 #ifdef DIAGNOSTIC
872 /* Convenient time to verify vmem state. */
873 if (enable_vmem_check == 1) {
874 VMEM_LOCK(vm);
875 vmem_check(vm);
876 VMEM_UNLOCK(vm);
877 }
878 #endif
879 desired = 1 << flsl(vm->vm_nbusytag);
880 desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN),
881 VMEM_HASHSIZE_MAX);
882 current = vm->vm_hashsize;
883
884 /* Grow in powers of two. Shrink less aggressively. */
885 if (desired >= current * 2 || desired * 4 <= current)
886 vmem_rehash(vm, desired);
887
888 /*
889 * Periodically wake up threads waiting for resources,
890 * so they could ask for reclamation again.
891 */
892 VMEM_CONDVAR_BROADCAST(vm);
893 }
894 VMEM_LIST_UNLOCK();
895
896 callout_reset(&vmem_periodic_ch, vmem_periodic_interval,
897 vmem_periodic_kick, NULL);
898 }
899
900 static void
vmem_start_callout(void * unused)901 vmem_start_callout(void *unused)
902 {
903
904 TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL);
905 vmem_periodic_interval = hz * 10;
906 callout_init(&vmem_periodic_ch, 1);
907 callout_reset(&vmem_periodic_ch, vmem_periodic_interval,
908 vmem_periodic_kick, NULL);
909 }
910 SYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL);
911 #endif /* _KERNEL */
912
913 static void
vmem_add1(vmem_t * vm,vmem_addr_t addr,vmem_size_t size,int type)914 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type)
915 {
916 bt_t *btfree, *btprev, *btspan;
917
918 VMEM_ASSERT_LOCKED(vm);
919 MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC);
920 MPASS((size & vm->vm_quantum_mask) == 0);
921
922 if (vm->vm_releasefn == NULL) {
923 /*
924 * The new segment will never be released, so see if it is
925 * contiguous with respect to an existing segment. In this case
926 * a span tag is not needed, and it may be possible now or in
927 * the future to coalesce the new segment with an existing free
928 * segment.
929 */
930 btprev = TAILQ_LAST(&vm->vm_seglist, vmem_seglist);
931 if ((!bt_isbusy(btprev) && !bt_isfree(btprev)) ||
932 btprev->bt_start + btprev->bt_size != addr)
933 btprev = NULL;
934 } else {
935 btprev = NULL;
936 }
937
938 if (btprev == NULL || bt_isbusy(btprev)) {
939 if (btprev == NULL) {
940 btspan = bt_alloc(vm);
941 btspan->bt_type = type;
942 btspan->bt_start = addr;
943 btspan->bt_size = size;
944 bt_insseg_tail(vm, btspan);
945 }
946
947 btfree = bt_alloc(vm);
948 btfree->bt_type = BT_TYPE_FREE;
949 btfree->bt_start = addr;
950 btfree->bt_size = size;
951 bt_insseg_tail(vm, btfree);
952 bt_insfree(vm, btfree);
953 } else {
954 bt_remfree(vm, btprev);
955 btprev->bt_size += size;
956 bt_insfree(vm, btprev);
957 }
958
959 vm->vm_size += size;
960 }
961
962 static void
vmem_destroy1(vmem_t * vm)963 vmem_destroy1(vmem_t *vm)
964 {
965 bt_t *bt;
966
967 #ifdef _KERNEL
968 /*
969 * Drain per-cpu quantum caches.
970 */
971 qc_destroy(vm);
972 #endif
973
974 /*
975 * The vmem should now only contain empty segments.
976 */
977 VMEM_LOCK(vm);
978 MPASS(vm->vm_nbusytag == 0);
979
980 TAILQ_REMOVE(&vm->vm_seglist, &vm->vm_cursor, bt_seglist);
981 while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL)
982 bt_remseg(vm, bt);
983
984 if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) {
985 #ifdef _KERNEL
986 free(vm->vm_hashlist, M_VMEM);
987 #else
988 free(vm->vm_hashlist);
989 #endif
990 }
991
992 bt_freetrim(vm, 0);
993
994 VMEM_CONDVAR_DESTROY(vm);
995 VMEM_LOCK_DESTROY(vm);
996 #ifdef _KERNEL
997 uma_zfree(vmem_zone, vm);
998 #else
999 free(vm);
1000 #endif
1001 }
1002
1003 static int
vmem_import(vmem_t * vm,vmem_size_t size,vmem_size_t align,int flags)1004 vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags)
1005 {
1006 vmem_addr_t addr;
1007 int error;
1008
1009 if (vm->vm_importfn == NULL)
1010 return (EINVAL);
1011
1012 /*
1013 * To make sure we get a span that meets the alignment we double it
1014 * and add the size to the tail. This slightly overestimates.
1015 */
1016 if (align != vm->vm_quantum_mask + 1)
1017 size = (align * 2) + size;
1018 size = roundup(size, vm->vm_import_quantum);
1019
1020 if (vm->vm_limit != 0 && vm->vm_limit < vm->vm_size + size)
1021 return (ENOMEM);
1022
1023 bt_save(vm);
1024 VMEM_UNLOCK(vm);
1025 error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr);
1026 VMEM_LOCK(vm);
1027 bt_restore(vm);
1028 if (error)
1029 return (ENOMEM);
1030
1031 vmem_add1(vm, addr, size, BT_TYPE_SPAN);
1032
1033 return 0;
1034 }
1035
1036 /*
1037 * vmem_fit: check if a bt can satisfy the given restrictions.
1038 *
1039 * it's a caller's responsibility to ensure the region is big enough
1040 * before calling us.
1041 */
1042 static int
vmem_fit(const bt_t * bt,vmem_size_t size,vmem_size_t align,vmem_size_t phase,vmem_size_t nocross,vmem_addr_t minaddr,vmem_addr_t maxaddr,vmem_addr_t * addrp)1043 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align,
1044 vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
1045 vmem_addr_t maxaddr, vmem_addr_t *addrp)
1046 {
1047 vmem_addr_t start;
1048 vmem_addr_t end;
1049
1050 MPASS(size > 0);
1051 MPASS(bt->bt_size >= size); /* caller's responsibility */
1052
1053 /*
1054 * XXX assumption: vmem_addr_t and vmem_size_t are
1055 * unsigned integer of the same size.
1056 */
1057
1058 start = bt->bt_start;
1059 if (start < minaddr) {
1060 start = minaddr;
1061 }
1062 end = BT_END(bt);
1063 if (end > maxaddr)
1064 end = maxaddr;
1065 if (start > end)
1066 return (ENOMEM);
1067
1068 start = VMEM_ALIGNUP(start - phase, align) + phase;
1069 if (start < bt->bt_start)
1070 start += align;
1071 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
1072 MPASS(align < nocross);
1073 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
1074 }
1075 if (start <= end && end - start >= size - 1) {
1076 MPASS((start & (align - 1)) == phase);
1077 MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross));
1078 MPASS(minaddr <= start);
1079 MPASS(maxaddr == 0 || start + size - 1 <= maxaddr);
1080 MPASS(bt->bt_start <= start);
1081 MPASS(BT_END(bt) - start >= size - 1);
1082 *addrp = start;
1083
1084 return (0);
1085 }
1086 return (ENOMEM);
1087 }
1088
1089 /*
1090 * vmem_clip: Trim the boundary tag edges to the requested start and size.
1091 */
1092 static void
vmem_clip(vmem_t * vm,bt_t * bt,vmem_addr_t start,vmem_size_t size)1093 vmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size)
1094 {
1095 bt_t *btnew;
1096 bt_t *btprev;
1097
1098 VMEM_ASSERT_LOCKED(vm);
1099 MPASS(bt->bt_type == BT_TYPE_FREE);
1100 MPASS(bt->bt_size >= size);
1101 bt_remfree(vm, bt);
1102 if (bt->bt_start != start) {
1103 btprev = bt_alloc(vm);
1104 btprev->bt_type = BT_TYPE_FREE;
1105 btprev->bt_start = bt->bt_start;
1106 btprev->bt_size = start - bt->bt_start;
1107 bt->bt_start = start;
1108 bt->bt_size -= btprev->bt_size;
1109 bt_insfree(vm, btprev);
1110 bt_insseg(vm, btprev,
1111 TAILQ_PREV(bt, vmem_seglist, bt_seglist));
1112 }
1113 MPASS(bt->bt_start == start);
1114 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
1115 /* split */
1116 btnew = bt_alloc(vm);
1117 btnew->bt_type = BT_TYPE_BUSY;
1118 btnew->bt_start = bt->bt_start;
1119 btnew->bt_size = size;
1120 bt->bt_start = bt->bt_start + size;
1121 bt->bt_size -= size;
1122 bt_insfree(vm, bt);
1123 bt_insseg(vm, btnew,
1124 TAILQ_PREV(bt, vmem_seglist, bt_seglist));
1125 bt_insbusy(vm, btnew);
1126 bt = btnew;
1127 } else {
1128 bt->bt_type = BT_TYPE_BUSY;
1129 bt_insbusy(vm, bt);
1130 }
1131 MPASS(bt->bt_size >= size);
1132 }
1133
1134 static int
vmem_try_fetch(vmem_t * vm,const vmem_size_t size,vmem_size_t align,int flags)1135 vmem_try_fetch(vmem_t *vm, const vmem_size_t size, vmem_size_t align, int flags)
1136 {
1137 vmem_size_t avail;
1138
1139 VMEM_ASSERT_LOCKED(vm);
1140
1141 /*
1142 * XXX it is possible to fail to meet xalloc constraints with the
1143 * imported region. It is up to the user to specify the
1144 * import quantum such that it can satisfy any allocation.
1145 */
1146 if (vmem_import(vm, size, align, flags) == 0)
1147 return (1);
1148
1149 /*
1150 * Try to free some space from the quantum cache or reclaim
1151 * functions if available.
1152 */
1153 if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) {
1154 avail = vm->vm_size - vm->vm_inuse;
1155 bt_save(vm);
1156 VMEM_UNLOCK(vm);
1157 #ifdef _KERNEL
1158 if (vm->vm_qcache_max != 0)
1159 qc_drain(vm);
1160 #endif
1161 if (vm->vm_reclaimfn != NULL)
1162 vm->vm_reclaimfn(vm, flags);
1163 VMEM_LOCK(vm);
1164 bt_restore(vm);
1165 /* If we were successful retry even NOWAIT. */
1166 if (vm->vm_size - vm->vm_inuse > avail)
1167 return (1);
1168 }
1169 if ((flags & M_NOWAIT) != 0)
1170 return (0);
1171 bt_save(vm);
1172 VMEM_CONDVAR_WAIT(vm);
1173 bt_restore(vm);
1174 return (1);
1175 }
1176
1177 static int
vmem_try_release(vmem_t * vm,struct vmem_btag * bt,const bool remfree)1178 vmem_try_release(vmem_t *vm, struct vmem_btag *bt, const bool remfree)
1179 {
1180 struct vmem_btag *prev;
1181
1182 MPASS(bt->bt_type == BT_TYPE_FREE);
1183
1184 if (vm->vm_releasefn == NULL)
1185 return (0);
1186
1187 prev = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
1188 MPASS(prev != NULL);
1189 MPASS(prev->bt_type != BT_TYPE_FREE);
1190
1191 if (prev->bt_type == BT_TYPE_SPAN && prev->bt_size == bt->bt_size) {
1192 vmem_addr_t spanaddr;
1193 vmem_size_t spansize;
1194
1195 MPASS(prev->bt_start == bt->bt_start);
1196 spanaddr = prev->bt_start;
1197 spansize = prev->bt_size;
1198 if (remfree)
1199 bt_remfree(vm, bt);
1200 bt_remseg(vm, bt);
1201 bt_remseg(vm, prev);
1202 vm->vm_size -= spansize;
1203 VMEM_CONDVAR_BROADCAST(vm);
1204 bt_freetrim(vm, BT_MAXFREE);
1205 vm->vm_releasefn(vm->vm_arg, spanaddr, spansize);
1206 return (1);
1207 }
1208 return (0);
1209 }
1210
1211 static int
vmem_xalloc_nextfit(vmem_t * vm,const vmem_size_t size,vmem_size_t align,const vmem_size_t phase,const vmem_size_t nocross,int flags,vmem_addr_t * addrp)1212 vmem_xalloc_nextfit(vmem_t *vm, const vmem_size_t size, vmem_size_t align,
1213 const vmem_size_t phase, const vmem_size_t nocross, int flags,
1214 vmem_addr_t *addrp)
1215 {
1216 struct vmem_btag *bt, *cursor, *next, *prev;
1217 int error;
1218
1219 error = ENOMEM;
1220 VMEM_LOCK(vm);
1221
1222 /*
1223 * Make sure we have enough tags to complete the operation.
1224 */
1225 if (bt_fill(vm, flags) != 0)
1226 goto out;
1227
1228 retry:
1229 /*
1230 * Find the next free tag meeting our constraints. If one is found,
1231 * perform the allocation.
1232 */
1233 for (cursor = &vm->vm_cursor, bt = TAILQ_NEXT(cursor, bt_seglist);
1234 bt != cursor; bt = TAILQ_NEXT(bt, bt_seglist)) {
1235 if (bt == NULL)
1236 bt = TAILQ_FIRST(&vm->vm_seglist);
1237 if (bt->bt_type == BT_TYPE_FREE && bt->bt_size >= size &&
1238 (error = vmem_fit(bt, size, align, phase, nocross,
1239 VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) {
1240 vmem_clip(vm, bt, *addrp, size);
1241 break;
1242 }
1243 }
1244
1245 /*
1246 * Try to coalesce free segments around the cursor. If we succeed, and
1247 * have not yet satisfied the allocation request, try again with the
1248 * newly coalesced segment.
1249 */
1250 if ((next = TAILQ_NEXT(cursor, bt_seglist)) != NULL &&
1251 (prev = TAILQ_PREV(cursor, vmem_seglist, bt_seglist)) != NULL &&
1252 next->bt_type == BT_TYPE_FREE && prev->bt_type == BT_TYPE_FREE &&
1253 prev->bt_start + prev->bt_size == next->bt_start) {
1254 prev->bt_size += next->bt_size;
1255 bt_remfree(vm, next);
1256 bt_remseg(vm, next);
1257
1258 /*
1259 * The coalesced segment might be able to satisfy our request.
1260 * If not, we might need to release it from the arena.
1261 */
1262 if (error == ENOMEM && prev->bt_size >= size &&
1263 (error = vmem_fit(prev, size, align, phase, nocross,
1264 VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) {
1265 vmem_clip(vm, prev, *addrp, size);
1266 bt = prev;
1267 } else
1268 (void)vmem_try_release(vm, prev, true);
1269 }
1270
1271 /*
1272 * If the allocation was successful, advance the cursor.
1273 */
1274 if (error == 0) {
1275 TAILQ_REMOVE(&vm->vm_seglist, cursor, bt_seglist);
1276 for (; bt != NULL && bt->bt_start < *addrp + size;
1277 bt = TAILQ_NEXT(bt, bt_seglist))
1278 ;
1279 if (bt != NULL)
1280 TAILQ_INSERT_BEFORE(bt, cursor, bt_seglist);
1281 else
1282 TAILQ_INSERT_HEAD(&vm->vm_seglist, cursor, bt_seglist);
1283 }
1284
1285 /*
1286 * Attempt to bring additional resources into the arena. If that fails
1287 * and M_WAITOK is specified, sleep waiting for resources to be freed.
1288 */
1289 if (error == ENOMEM && vmem_try_fetch(vm, size, align, flags))
1290 goto retry;
1291
1292 out:
1293 VMEM_UNLOCK(vm);
1294 return (error);
1295 }
1296
1297 /* ---- vmem API */
1298
1299 void
vmem_set_import(vmem_t * vm,vmem_import_t * importfn,vmem_release_t * releasefn,void * arg,vmem_size_t import_quantum)1300 vmem_set_import(vmem_t *vm, vmem_import_t *importfn,
1301 vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum)
1302 {
1303
1304 VMEM_LOCK(vm);
1305 KASSERT(vm->vm_size == 0, ("%s: arena is non-empty", __func__));
1306 vm->vm_importfn = importfn;
1307 vm->vm_releasefn = releasefn;
1308 vm->vm_arg = arg;
1309 vm->vm_import_quantum = import_quantum;
1310 VMEM_UNLOCK(vm);
1311 }
1312
1313 void
vmem_set_limit(vmem_t * vm,vmem_size_t limit)1314 vmem_set_limit(vmem_t *vm, vmem_size_t limit)
1315 {
1316
1317 VMEM_LOCK(vm);
1318 vm->vm_limit = limit;
1319 VMEM_UNLOCK(vm);
1320 }
1321
1322 void
vmem_set_reclaim(vmem_t * vm,vmem_reclaim_t * reclaimfn)1323 vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn)
1324 {
1325
1326 VMEM_LOCK(vm);
1327 vm->vm_reclaimfn = reclaimfn;
1328 VMEM_UNLOCK(vm);
1329 }
1330
1331 /*
1332 * vmem_init: Initializes vmem arena.
1333 */
1334 vmem_t *
vmem_init(vmem_t * vm,const char * name,vmem_addr_t base,vmem_size_t size,vmem_size_t quantum,vmem_size_t qcache_max,int flags)1335 vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size,
1336 vmem_size_t quantum, vmem_size_t qcache_max, int flags)
1337 {
1338 vmem_size_t i;
1339
1340 #ifdef _KERNEL
1341 MPASS(quantum > 0);
1342 MPASS((quantum & (quantum - 1)) == 0);
1343 #else
1344 assert(quantum == 0);
1345 assert(qcache_max == 0);
1346 quantum = 1;
1347 #endif
1348
1349 bzero(vm, sizeof(*vm));
1350
1351 VMEM_CONDVAR_INIT(vm, name);
1352 VMEM_LOCK_INIT(vm, name);
1353 vm->vm_nfreetags = 0;
1354 LIST_INIT(&vm->vm_freetags);
1355 strlcpy(vm->vm_name, name, sizeof(vm->vm_name));
1356 vm->vm_quantum_mask = quantum - 1;
1357 vm->vm_quantum_shift = flsl(quantum) - 1;
1358 vm->vm_nbusytag = 0;
1359 vm->vm_size = 0;
1360 vm->vm_limit = 0;
1361 vm->vm_inuse = 0;
1362 #ifdef _KERNEL
1363 qc_init(vm, qcache_max);
1364 #else
1365 (void)qcache_max;
1366 #endif
1367
1368 TAILQ_INIT(&vm->vm_seglist);
1369 vm->vm_cursor.bt_start = vm->vm_cursor.bt_size = 0;
1370 vm->vm_cursor.bt_type = BT_TYPE_CURSOR;
1371 TAILQ_INSERT_TAIL(&vm->vm_seglist, &vm->vm_cursor, bt_seglist);
1372
1373 for (i = 0; i < VMEM_MAXORDER; i++)
1374 LIST_INIT(&vm->vm_freelist[i]);
1375
1376 memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0));
1377 vm->vm_hashsize = VMEM_HASHSIZE_MIN;
1378 vm->vm_hashlist = vm->vm_hash0;
1379
1380 if (size != 0) {
1381 if (vmem_add(vm, base, size, flags) != 0) {
1382 vmem_destroy1(vm);
1383 return NULL;
1384 }
1385 }
1386
1387 VMEM_LIST_LOCK();
1388 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
1389 VMEM_LIST_UNLOCK();
1390
1391 return vm;
1392 }
1393
1394 /*
1395 * vmem_create: create an arena.
1396 */
1397 vmem_t *
vmem_create(const char * name,vmem_addr_t base,vmem_size_t size,vmem_size_t quantum,vmem_size_t qcache_max,int flags)1398 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
1399 vmem_size_t quantum, vmem_size_t qcache_max, int flags)
1400 {
1401
1402 vmem_t *vm;
1403
1404 #ifdef _KERNEL
1405 vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT));
1406 #else
1407 assert(quantum == 0);
1408 assert(qcache_max == 0);
1409 vm = malloc(sizeof(vmem_t));
1410 #endif
1411 if (vm == NULL)
1412 return (NULL);
1413 if (vmem_init(vm, name, base, size, quantum, qcache_max,
1414 flags) == NULL)
1415 return (NULL);
1416 return (vm);
1417 }
1418
1419 void
vmem_destroy(vmem_t * vm)1420 vmem_destroy(vmem_t *vm)
1421 {
1422 VMEM_LIST_LOCK();
1423 LIST_REMOVE(vm, vm_alllist);
1424 VMEM_LIST_UNLOCK();
1425
1426 vmem_destroy1(vm);
1427 }
1428
1429 vmem_size_t
vmem_roundup_size(vmem_t * vm,vmem_size_t size)1430 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
1431 {
1432
1433 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
1434 }
1435
1436 /*
1437 * vmem_alloc: allocate resource from the arena.
1438 */
1439 int
vmem_alloc(vmem_t * vm,vmem_size_t size,int flags,vmem_addr_t * addrp)1440 vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
1441 {
1442 const int strat __unused = flags & VMEM_FITMASK;
1443
1444 flags &= VMEM_FLAGS;
1445 MPASS(size > 0);
1446 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT);
1447 if ((flags & M_NOWAIT) == 0)
1448 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc");
1449
1450 #ifdef _KERNEL
1451 if (size <= vm->vm_qcache_max) {
1452 qcache_t *qc;
1453
1454 /*
1455 * Resource 0 cannot be cached, so avoid a blocking allocation
1456 * in qc_import() and give the vmem_xalloc() call below a chance
1457 * to return 0.
1458 */
1459 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift];
1460 *addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache,
1461 (flags & ~M_WAITOK) | M_NOWAIT);
1462 if (__predict_true(*addrp != 0))
1463 return (0);
1464 }
1465 #endif
1466
1467 return (vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
1468 flags, addrp));
1469 }
1470
1471 int
vmem_xalloc(vmem_t * vm,const vmem_size_t size0,vmem_size_t align,const vmem_size_t phase,const vmem_size_t nocross,const vmem_addr_t minaddr,const vmem_addr_t maxaddr,int flags,vmem_addr_t * addrp)1472 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align,
1473 const vmem_size_t phase, const vmem_size_t nocross,
1474 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags,
1475 vmem_addr_t *addrp)
1476 {
1477 const vmem_size_t size = vmem_roundup_size(vm, size0);
1478 struct vmem_freelist *list;
1479 struct vmem_freelist *first;
1480 struct vmem_freelist *end;
1481 bt_t *bt;
1482 int error;
1483 int strat;
1484
1485 flags &= VMEM_FLAGS;
1486 strat = flags & VMEM_FITMASK;
1487 MPASS(size0 > 0);
1488 MPASS(size > 0);
1489 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT);
1490 MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK));
1491 if ((flags & M_NOWAIT) == 0)
1492 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc");
1493 MPASS((align & vm->vm_quantum_mask) == 0);
1494 MPASS((align & (align - 1)) == 0);
1495 MPASS((phase & vm->vm_quantum_mask) == 0);
1496 MPASS((nocross & vm->vm_quantum_mask) == 0);
1497 MPASS((nocross & (nocross - 1)) == 0);
1498 MPASS((align == 0 && phase == 0) || phase < align);
1499 MPASS(nocross == 0 || nocross >= size);
1500 MPASS(minaddr <= maxaddr);
1501 MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
1502 if (strat == M_NEXTFIT)
1503 MPASS(minaddr == VMEM_ADDR_MIN && maxaddr == VMEM_ADDR_MAX);
1504
1505 if (align == 0)
1506 align = vm->vm_quantum_mask + 1;
1507 *addrp = 0;
1508
1509 /*
1510 * Next-fit allocations don't use the freelists.
1511 */
1512 if (strat == M_NEXTFIT)
1513 return (vmem_xalloc_nextfit(vm, size0, align, phase, nocross,
1514 flags, addrp));
1515
1516 end = &vm->vm_freelist[VMEM_MAXORDER];
1517 /*
1518 * choose a free block from which we allocate.
1519 */
1520 first = bt_freehead_toalloc(vm, size, strat);
1521 VMEM_LOCK(vm);
1522
1523 /*
1524 * Make sure we have enough tags to complete the operation.
1525 */
1526 error = bt_fill(vm, flags);
1527 if (error != 0)
1528 goto out;
1529 for (;;) {
1530 /*
1531 * Scan freelists looking for a tag that satisfies the
1532 * allocation. If we're doing BESTFIT we may encounter
1533 * sizes below the request. If we're doing FIRSTFIT we
1534 * inspect only the first element from each list.
1535 */
1536 for (list = first; list < end; list++) {
1537 LIST_FOREACH(bt, list, bt_freelist) {
1538 if (bt->bt_size >= size) {
1539 error = vmem_fit(bt, size, align, phase,
1540 nocross, minaddr, maxaddr, addrp);
1541 if (error == 0) {
1542 vmem_clip(vm, bt, *addrp, size);
1543 goto out;
1544 }
1545 }
1546 /* FIRST skips to the next list. */
1547 if (strat == M_FIRSTFIT)
1548 break;
1549 }
1550 }
1551
1552 /*
1553 * Retry if the fast algorithm failed.
1554 */
1555 if (strat == M_FIRSTFIT) {
1556 strat = M_BESTFIT;
1557 first = bt_freehead_toalloc(vm, size, strat);
1558 continue;
1559 }
1560
1561 /*
1562 * Try a few measures to bring additional resources into the
1563 * arena. If all else fails, we will sleep waiting for
1564 * resources to be freed.
1565 */
1566 if (!vmem_try_fetch(vm, size, align, flags)) {
1567 error = ENOMEM;
1568 break;
1569 }
1570 }
1571 out:
1572 VMEM_UNLOCK(vm);
1573 if (error != 0 && (flags & M_NOWAIT) == 0)
1574 panic("failed to allocate waiting allocation\n");
1575
1576 return (error);
1577 }
1578
1579 /*
1580 * vmem_free: free the resource to the arena.
1581 */
1582 void
vmem_free(vmem_t * vm,vmem_addr_t addr,vmem_size_t size)1583 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1584 {
1585 MPASS(size > 0);
1586
1587 #ifdef _KERNEL
1588 if (size <= vm->vm_qcache_max &&
1589 __predict_true(addr >= VMEM_ADDR_QCACHE_MIN)) {
1590 qcache_t *qc;
1591
1592 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift];
1593 uma_zfree(qc->qc_cache, (void *)addr);
1594 } else
1595 #endif
1596 vmem_xfree(vm, addr, size);
1597 }
1598
1599 void
vmem_xfree(vmem_t * vm,vmem_addr_t addr,vmem_size_t size __unused)1600 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size __unused)
1601 {
1602 bt_t *bt;
1603 bt_t *t;
1604
1605 MPASS(size > 0);
1606
1607 VMEM_LOCK(vm);
1608 bt = bt_lookupbusy(vm, addr);
1609 MPASS(bt != NULL);
1610 MPASS(bt->bt_start == addr);
1611 MPASS(bt->bt_size == vmem_roundup_size(vm, size) ||
1612 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1613 MPASS(bt->bt_type == BT_TYPE_BUSY);
1614 bt_rembusy(vm, bt);
1615 bt->bt_type = BT_TYPE_FREE;
1616
1617 /* coalesce */
1618 t = TAILQ_NEXT(bt, bt_seglist);
1619 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1620 MPASS(BT_END(bt) < t->bt_start); /* YYY */
1621 bt->bt_size += t->bt_size;
1622 bt_remfree(vm, t);
1623 bt_remseg(vm, t);
1624 }
1625 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
1626 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1627 MPASS(BT_END(t) < bt->bt_start); /* YYY */
1628 bt->bt_size += t->bt_size;
1629 bt->bt_start = t->bt_start;
1630 bt_remfree(vm, t);
1631 bt_remseg(vm, t);
1632 }
1633
1634 if (!vmem_try_release(vm, bt, false)) {
1635 bt_insfree(vm, bt);
1636 VMEM_CONDVAR_BROADCAST(vm);
1637 bt_freetrim(vm, BT_MAXFREE);
1638 }
1639 }
1640
1641 /*
1642 * vmem_add:
1643 *
1644 */
1645 int
vmem_add(vmem_t * vm,vmem_addr_t addr,vmem_size_t size,int flags)1646 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags)
1647 {
1648 int error;
1649
1650 flags &= VMEM_FLAGS;
1651
1652 VMEM_LOCK(vm);
1653 error = bt_fill(vm, flags);
1654 if (error == 0)
1655 vmem_add1(vm, addr, size, BT_TYPE_SPAN_STATIC);
1656 VMEM_UNLOCK(vm);
1657
1658 return (error);
1659 }
1660
1661 /*
1662 * vmem_size: information about arenas size
1663 */
1664 vmem_size_t
vmem_size(vmem_t * vm,int typemask)1665 vmem_size(vmem_t *vm, int typemask)
1666 {
1667 int i;
1668
1669 switch (typemask) {
1670 case VMEM_ALLOC:
1671 return vm->vm_inuse;
1672 case VMEM_FREE:
1673 return vm->vm_size - vm->vm_inuse;
1674 case VMEM_FREE|VMEM_ALLOC:
1675 return vm->vm_size;
1676 case VMEM_MAXFREE:
1677 VMEM_LOCK(vm);
1678 for (i = VMEM_MAXORDER - 1; i >= 0; i--) {
1679 if (LIST_EMPTY(&vm->vm_freelist[i]))
1680 continue;
1681 VMEM_UNLOCK(vm);
1682 return ((vmem_size_t)ORDER2SIZE(i) <<
1683 vm->vm_quantum_shift);
1684 }
1685 VMEM_UNLOCK(vm);
1686 return (0);
1687 default:
1688 panic("vmem_size");
1689 return (0);
1690 }
1691 }
1692
1693 /* ---- debug */
1694
1695 #ifdef _KERNEL
1696 #if defined(DDB) || defined(DIAGNOSTIC)
1697
1698 static void bt_dump(const bt_t *, int (*)(const char *, ...)
1699 __printflike(1, 2));
1700
1701 static const char *
bt_type_string(int type)1702 bt_type_string(int type)
1703 {
1704
1705 switch (type) {
1706 case BT_TYPE_BUSY:
1707 return "busy";
1708 case BT_TYPE_FREE:
1709 return "free";
1710 case BT_TYPE_SPAN:
1711 return "span";
1712 case BT_TYPE_SPAN_STATIC:
1713 return "static span";
1714 case BT_TYPE_CURSOR:
1715 return "cursor";
1716 default:
1717 break;
1718 }
1719 return "BOGUS";
1720 }
1721
1722 static void
bt_dump(const bt_t * bt,int (* pr)(const char *,...))1723 bt_dump(const bt_t *bt, int (*pr)(const char *, ...))
1724 {
1725
1726 (*pr)("\t%p: %jx %jx, %d(%s)\n",
1727 bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size,
1728 bt->bt_type, bt_type_string(bt->bt_type));
1729 }
1730
1731 static void
1732 vmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2))
1733 {
1734 const bt_t *bt;
1735 int i;
1736
1737 (*pr)("vmem %p '%s'\n", vm, vm->vm_name);
1738 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1739 bt_dump(bt, pr);
1740 }
1741
1742 for (i = 0; i < VMEM_MAXORDER; i++) {
1743 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1744
1745 if (LIST_EMPTY(fl)) {
1746 continue;
1747 }
1748
1749 (*pr)("freelist[%d]\n", i);
LIST_FOREACH(bt,fl,bt_freelist)1750 LIST_FOREACH(bt, fl, bt_freelist) {
1751 bt_dump(bt, pr);
1752 }
1753 }
1754 }
1755
1756 #endif /* defined(DDB) || defined(DIAGNOSTIC) */
1757
1758 #if defined(DDB)
1759 #include <ddb/ddb.h>
1760
1761 static bt_t *
vmem_whatis_lookup(vmem_t * vm,vmem_addr_t addr)1762 vmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr)
1763 {
1764 bt_t *bt;
1765
1766 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1767 if (BT_ISSPAN_P(bt)) {
1768 continue;
1769 }
1770 if (bt->bt_start <= addr && addr <= BT_END(bt)) {
1771 return bt;
1772 }
1773 }
1774
1775 return NULL;
1776 }
1777
1778 void
vmem_whatis(vmem_addr_t addr,int (* pr)(const char *,...))1779 vmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...))
1780 {
1781 vmem_t *vm;
1782
1783 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1784 bt_t *bt;
1785
1786 bt = vmem_whatis_lookup(vm, addr);
1787 if (bt == NULL) {
1788 continue;
1789 }
1790 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n",
1791 (void *)addr, (void *)bt->bt_start,
1792 (vmem_size_t)(addr - bt->bt_start), vm->vm_name,
1793 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free");
1794 }
1795 }
1796
1797 void
vmem_printall(const char * modif,int (* pr)(const char *,...))1798 vmem_printall(const char *modif, int (*pr)(const char *, ...))
1799 {
1800 const vmem_t *vm;
1801
1802 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1803 vmem_dump(vm, pr);
1804 }
1805 }
1806
1807 void
vmem_print(vmem_addr_t addr,const char * modif,int (* pr)(const char *,...))1808 vmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...))
1809 {
1810 const vmem_t *vm = (const void *)addr;
1811
1812 vmem_dump(vm, pr);
1813 }
1814
DB_SHOW_COMMAND(vmemdump,vmemdump)1815 DB_SHOW_COMMAND(vmemdump, vmemdump)
1816 {
1817
1818 if (!have_addr) {
1819 db_printf("usage: show vmemdump <addr>\n");
1820 return;
1821 }
1822
1823 vmem_dump((const vmem_t *)addr, db_printf);
1824 }
1825
DB_SHOW_ALL_COMMAND(vmemdump,vmemdumpall)1826 DB_SHOW_ALL_COMMAND(vmemdump, vmemdumpall)
1827 {
1828 const vmem_t *vm;
1829
1830 LIST_FOREACH(vm, &vmem_list, vm_alllist)
1831 vmem_dump(vm, db_printf);
1832 }
1833
DB_SHOW_COMMAND(vmem,vmem_summ)1834 DB_SHOW_COMMAND(vmem, vmem_summ)
1835 {
1836 const vmem_t *vm = (const void *)addr;
1837 const bt_t *bt;
1838 size_t ft[VMEM_MAXORDER], ut[VMEM_MAXORDER];
1839 size_t fs[VMEM_MAXORDER], us[VMEM_MAXORDER];
1840 int ord;
1841
1842 if (!have_addr) {
1843 db_printf("usage: show vmem <addr>\n");
1844 return;
1845 }
1846
1847 db_printf("vmem %p '%s'\n", vm, vm->vm_name);
1848 db_printf("\tquantum:\t%zu\n", vm->vm_quantum_mask + 1);
1849 db_printf("\tsize:\t%zu\n", vm->vm_size);
1850 db_printf("\tinuse:\t%zu\n", vm->vm_inuse);
1851 db_printf("\tfree:\t%zu\n", vm->vm_size - vm->vm_inuse);
1852 db_printf("\tbusy tags:\t%d\n", vm->vm_nbusytag);
1853 db_printf("\tfree tags:\t%d\n", vm->vm_nfreetags);
1854
1855 memset(&ft, 0, sizeof(ft));
1856 memset(&ut, 0, sizeof(ut));
1857 memset(&fs, 0, sizeof(fs));
1858 memset(&us, 0, sizeof(us));
1859 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1860 ord = SIZE2ORDER(bt->bt_size >> vm->vm_quantum_shift);
1861 if (bt->bt_type == BT_TYPE_BUSY) {
1862 ut[ord]++;
1863 us[ord] += bt->bt_size;
1864 } else if (bt->bt_type == BT_TYPE_FREE) {
1865 ft[ord]++;
1866 fs[ord] += bt->bt_size;
1867 }
1868 }
1869 db_printf("\t\t\tinuse\tsize\t\tfree\tsize\n");
1870 for (ord = 0; ord < VMEM_MAXORDER; ord++) {
1871 if (ut[ord] == 0 && ft[ord] == 0)
1872 continue;
1873 db_printf("\t%-15zu %zu\t%-15zu %zu\t%-16zu\n",
1874 ORDER2SIZE(ord) << vm->vm_quantum_shift,
1875 ut[ord], us[ord], ft[ord], fs[ord]);
1876 }
1877 }
1878
DB_SHOW_ALL_COMMAND(vmem,vmem_summall)1879 DB_SHOW_ALL_COMMAND(vmem, vmem_summall)
1880 {
1881 const vmem_t *vm;
1882
1883 LIST_FOREACH(vm, &vmem_list, vm_alllist)
1884 vmem_summ((db_expr_t)vm, TRUE, count, modif);
1885 }
1886 #endif /* defined(DDB) */
1887
1888 #define vmem_printf printf
1889
1890 #if defined(DIAGNOSTIC)
1891
1892 static bool
vmem_check_sanity(vmem_t * vm)1893 vmem_check_sanity(vmem_t *vm)
1894 {
1895 const bt_t *bt, *bt2;
1896
1897 MPASS(vm != NULL);
1898
1899 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1900 if (bt->bt_start > BT_END(bt)) {
1901 printf("corrupted tag\n");
1902 bt_dump(bt, vmem_printf);
1903 return false;
1904 }
1905 }
1906 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1907 if (bt->bt_type == BT_TYPE_CURSOR) {
1908 if (bt->bt_start != 0 || bt->bt_size != 0) {
1909 printf("corrupted cursor\n");
1910 return false;
1911 }
1912 continue;
1913 }
1914 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) {
1915 if (bt == bt2) {
1916 continue;
1917 }
1918 if (bt2->bt_type == BT_TYPE_CURSOR) {
1919 continue;
1920 }
1921 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) {
1922 continue;
1923 }
1924 if (bt->bt_start <= BT_END(bt2) &&
1925 bt2->bt_start <= BT_END(bt)) {
1926 printf("overwrapped tags\n");
1927 bt_dump(bt, vmem_printf);
1928 bt_dump(bt2, vmem_printf);
1929 return false;
1930 }
1931 }
1932 }
1933
1934 return true;
1935 }
1936
1937 static void
vmem_check(vmem_t * vm)1938 vmem_check(vmem_t *vm)
1939 {
1940
1941 if (!vmem_check_sanity(vm)) {
1942 panic("insanity vmem %p", vm);
1943 }
1944 }
1945
1946 #endif /* defined(DIAGNOSTIC) */
1947 #endif /* _KERNEL */
1948