15f518366SJeff Roberson /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 45f518366SJeff Roberson * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 55f518366SJeff Roberson * Copyright (c) 2013 EMC Corp. 65f518366SJeff Roberson * All rights reserved. 75f518366SJeff Roberson * 85f518366SJeff Roberson * Redistribution and use in source and binary forms, with or without 95f518366SJeff Roberson * modification, are permitted provided that the following conditions 105f518366SJeff Roberson * are met: 115f518366SJeff Roberson * 1. Redistributions of source code must retain the above copyright 125f518366SJeff Roberson * notice, this list of conditions and the following disclaimer. 135f518366SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 145f518366SJeff Roberson * notice, this list of conditions and the following disclaimer in the 155f518366SJeff Roberson * documentation and/or other materials provided with the distribution. 165f518366SJeff Roberson * 175f518366SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 185f518366SJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 195f518366SJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 205f518366SJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 215f518366SJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 225f518366SJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 235f518366SJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 245f518366SJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 255f518366SJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 265f518366SJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 275f518366SJeff Roberson * SUCH DAMAGE. 285f518366SJeff Roberson */ 295f518366SJeff Roberson 305f518366SJeff Roberson /* 315f518366SJeff Roberson * From: 325f518366SJeff Roberson * $NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $ 335f518366SJeff Roberson * $NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $ 345f518366SJeff Roberson */ 355f518366SJeff Roberson 365f518366SJeff Roberson /* 375f518366SJeff Roberson * reference: 385f518366SJeff Roberson * - Magazines and Vmem: Extending the Slab Allocator 395f518366SJeff Roberson * to Many CPUs and Arbitrary Resources 405f518366SJeff Roberson * http://www.usenix.org/event/usenix01/bonwick.html 415f518366SJeff Roberson */ 425f518366SJeff Roberson 435f518366SJeff Roberson #include <sys/cdefs.h> 445f518366SJeff Roberson #include "opt_ddb.h" 455f518366SJeff Roberson 465f518366SJeff Roberson #include <sys/param.h> 475f518366SJeff Roberson #include <sys/systm.h> 485f518366SJeff Roberson #include <sys/kernel.h> 495f518366SJeff Roberson #include <sys/queue.h> 505f518366SJeff Roberson #include <sys/callout.h> 515f518366SJeff Roberson #include <sys/hash.h> 525f518366SJeff Roberson #include <sys/lock.h> 535f518366SJeff Roberson #include <sys/malloc.h> 545f518366SJeff Roberson #include <sys/mutex.h> 555f518366SJeff Roberson #include <sys/smp.h> 565f518366SJeff Roberson #include <sys/condvar.h> 57844e14d3SPeter Holm #include <sys/sysctl.h> 585f518366SJeff Roberson #include <sys/taskqueue.h> 595f518366SJeff Roberson #include <sys/vmem.h> 60e2068d0bSJeff Roberson #include <sys/vmmeter.h> 615f518366SJeff Roberson 628441d1e8SJeff Roberson #include "opt_vm.h" 638441d1e8SJeff Roberson 645f518366SJeff Roberson #include <vm/uma.h> 655f518366SJeff Roberson #include <vm/vm.h> 665f518366SJeff Roberson #include <vm/pmap.h> 675f518366SJeff Roberson #include <vm/vm_map.h> 685df87b21SJeff Roberson #include <vm/vm_object.h> 695f518366SJeff Roberson #include <vm/vm_kern.h> 705f518366SJeff Roberson #include <vm/vm_extern.h> 715f518366SJeff Roberson #include <vm/vm_param.h> 727a469c8eSJeff Roberson #include <vm/vm_page.h> 735f518366SJeff Roberson #include <vm/vm_pageout.h> 74e2068d0bSJeff Roberson #include <vm/vm_phys.h> 75e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h> 76ae941b1bSGleb Smirnoff #include <vm/uma_int.h> 77ae941b1bSGleb Smirnoff 781cf78c85SAlexander Motin #define VMEM_OPTORDER 5 791cf78c85SAlexander Motin #define VMEM_OPTVALUE (1 << VMEM_OPTORDER) 801cf78c85SAlexander Motin #define VMEM_MAXORDER \ 811cf78c85SAlexander Motin (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER) 825f518366SJeff Roberson 835f518366SJeff Roberson #define VMEM_HASHSIZE_MIN 16 845f518366SJeff Roberson #define VMEM_HASHSIZE_MAX 131072 855f518366SJeff Roberson 865f518366SJeff Roberson #define VMEM_QCACHE_IDX_MAX 16 875f518366SJeff Roberson 88f1c592fbSMark Johnston #define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) 895f518366SJeff Roberson 90f1c592fbSMark Johnston #define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \ 91f1c592fbSMark Johnston M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) 925f518366SJeff Roberson 935f518366SJeff Roberson #define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) 945f518366SJeff Roberson 955f518366SJeff Roberson #define QC_NAME_MAX 16 965f518366SJeff Roberson 975f518366SJeff Roberson /* 985f518366SJeff Roberson * Data structures private to vmem. 995f518366SJeff Roberson */ 1005f518366SJeff Roberson MALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures"); 1015f518366SJeff Roberson 1025f518366SJeff Roberson typedef struct vmem_btag bt_t; 1035f518366SJeff Roberson 1045f518366SJeff Roberson TAILQ_HEAD(vmem_seglist, vmem_btag); 1055f518366SJeff Roberson LIST_HEAD(vmem_freelist, vmem_btag); 1065f518366SJeff Roberson LIST_HEAD(vmem_hashlist, vmem_btag); 1075f518366SJeff Roberson 1085f518366SJeff Roberson struct qcache { 1095f518366SJeff Roberson uma_zone_t qc_cache; 1105f518366SJeff Roberson vmem_t *qc_vmem; 1115f518366SJeff Roberson vmem_size_t qc_size; 1125f518366SJeff Roberson char qc_name[QC_NAME_MAX]; 1135f518366SJeff Roberson }; 1145f518366SJeff Roberson typedef struct qcache qcache_t; 1155f518366SJeff Roberson #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 1165f518366SJeff Roberson 1175f518366SJeff Roberson #define VMEM_NAME_MAX 16 1185f518366SJeff Roberson 119f1c592fbSMark Johnston /* boundary tag */ 120f1c592fbSMark Johnston struct vmem_btag { 121f1c592fbSMark Johnston TAILQ_ENTRY(vmem_btag) bt_seglist; 122f1c592fbSMark Johnston union { 123f1c592fbSMark Johnston LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 124f1c592fbSMark Johnston LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 125f1c592fbSMark Johnston } bt_u; 126f1c592fbSMark Johnston #define bt_hashlist bt_u.u_hashlist 127f1c592fbSMark Johnston #define bt_freelist bt_u.u_freelist 128f1c592fbSMark Johnston vmem_addr_t bt_start; 129f1c592fbSMark Johnston vmem_size_t bt_size; 130f1c592fbSMark Johnston int bt_type; 131f1c592fbSMark Johnston }; 132f1c592fbSMark Johnston 1335f518366SJeff Roberson /* vmem arena */ 1345f518366SJeff Roberson struct vmem { 1355f518366SJeff Roberson struct mtx_padalign vm_lock; 1365f518366SJeff Roberson struct cv vm_cv; 1375f518366SJeff Roberson char vm_name[VMEM_NAME_MAX+1]; 1385f518366SJeff Roberson LIST_ENTRY(vmem) vm_alllist; 1395f518366SJeff Roberson struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN]; 1405f518366SJeff Roberson struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 1415f518366SJeff Roberson struct vmem_seglist vm_seglist; 1425f518366SJeff Roberson struct vmem_hashlist *vm_hashlist; 1435f518366SJeff Roberson vmem_size_t vm_hashsize; 1445f518366SJeff Roberson 1455f518366SJeff Roberson /* Constant after init */ 1465f518366SJeff Roberson vmem_size_t vm_qcache_max; 1475f518366SJeff Roberson vmem_size_t vm_quantum_mask; 1485f518366SJeff Roberson vmem_size_t vm_import_quantum; 1495f518366SJeff Roberson int vm_quantum_shift; 1505f518366SJeff Roberson 1515f518366SJeff Roberson /* Written on alloc/free */ 1525f518366SJeff Roberson LIST_HEAD(, vmem_btag) vm_freetags; 1535f518366SJeff Roberson int vm_nfreetags; 1545f518366SJeff Roberson int vm_nbusytag; 1555f518366SJeff Roberson vmem_size_t vm_inuse; 1565f518366SJeff Roberson vmem_size_t vm_size; 1572e47807cSJeff Roberson vmem_size_t vm_limit; 158f1c592fbSMark Johnston struct vmem_btag vm_cursor; 1595f518366SJeff Roberson 1605f518366SJeff Roberson /* Used on import. */ 1615f518366SJeff Roberson vmem_import_t *vm_importfn; 1625f518366SJeff Roberson vmem_release_t *vm_releasefn; 1635f518366SJeff Roberson void *vm_arg; 1645f518366SJeff Roberson 1655f518366SJeff Roberson /* Space exhaustion callback. */ 1665f518366SJeff Roberson vmem_reclaim_t *vm_reclaimfn; 1675f518366SJeff Roberson 1685f518366SJeff Roberson /* quantum cache */ 1695f518366SJeff Roberson qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; 1705f518366SJeff Roberson }; 1715f518366SJeff Roberson 1725f518366SJeff Roberson #define BT_TYPE_SPAN 1 /* Allocated from importfn */ 1735f518366SJeff Roberson #define BT_TYPE_SPAN_STATIC 2 /* vmem_add() or create. */ 1745f518366SJeff Roberson #define BT_TYPE_FREE 3 /* Available space. */ 1755f518366SJeff Roberson #define BT_TYPE_BUSY 4 /* Used space. */ 176f1c592fbSMark Johnston #define BT_TYPE_CURSOR 5 /* Cursor for nextfit allocations. */ 1775f518366SJeff Roberson #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 1785f518366SJeff Roberson 1795f518366SJeff Roberson #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 1805f518366SJeff Roberson 1815f518366SJeff Roberson #if defined(DIAGNOSTIC) 1826352bbf7SMateusz Guzik static int enable_vmem_check = 0; 183eeb697c8SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN, 184844e14d3SPeter Holm &enable_vmem_check, 0, "Enable vmem check"); 1855f518366SJeff Roberson static void vmem_check(vmem_t *); 1865f518366SJeff Roberson #endif 1875f518366SJeff Roberson 1885f518366SJeff Roberson static struct callout vmem_periodic_ch; 1895f518366SJeff Roberson static int vmem_periodic_interval; 1905f518366SJeff Roberson static struct task vmem_periodic_wk; 1915f518366SJeff Roberson 192fe933c1dSMateusz Guzik static struct mtx_padalign __exclusive_cache_line vmem_list_lock; 1935f518366SJeff Roberson static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 1947a469c8eSJeff Roberson static uma_zone_t vmem_zone; 1955f518366SJeff Roberson 1965f518366SJeff Roberson /* ---- misc */ 1975f518366SJeff Roberson #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 1985f518366SJeff Roberson #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 1995f518366SJeff Roberson #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 2005f518366SJeff Roberson #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 2015f518366SJeff Roberson 2025f518366SJeff Roberson #define VMEM_LOCK(vm) mtx_lock(&vm->vm_lock) 2035f518366SJeff Roberson #define VMEM_TRYLOCK(vm) mtx_trylock(&vm->vm_lock) 2045f518366SJeff Roberson #define VMEM_UNLOCK(vm) mtx_unlock(&vm->vm_lock) 2055f518366SJeff Roberson #define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF) 2065f518366SJeff Roberson #define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock) 2075f518366SJeff Roberson #define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED); 2085f518366SJeff Roberson 2095f518366SJeff Roberson #define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align))) 2105f518366SJeff Roberson 2115f518366SJeff Roberson #define VMEM_CROSS_P(addr1, addr2, boundary) \ 2125f518366SJeff Roberson ((((addr1) ^ (addr2)) & -(boundary)) != 0) 2135f518366SJeff Roberson 2141cf78c85SAlexander Motin #define ORDER2SIZE(order) ((order) < VMEM_OPTVALUE ? ((order) + 1) : \ 2151cf78c85SAlexander Motin (vmem_size_t)1 << ((order) - (VMEM_OPTVALUE - VMEM_OPTORDER - 1))) 2161cf78c85SAlexander Motin #define SIZE2ORDER(size) ((size) <= VMEM_OPTVALUE ? ((size) - 1) : \ 2171cf78c85SAlexander Motin (flsl(size) + (VMEM_OPTVALUE - VMEM_OPTORDER - 2))) 2185f518366SJeff Roberson 2195f518366SJeff Roberson /* 2205f518366SJeff Roberson * Maximum number of boundary tags that may be required to satisfy an 2215f518366SJeff Roberson * allocation. Two may be required to import. Another two may be 2225f518366SJeff Roberson * required to clip edges. 2235f518366SJeff Roberson */ 2245f518366SJeff Roberson #define BT_MAXALLOC 4 2255f518366SJeff Roberson 2265f518366SJeff Roberson /* 2275f518366SJeff Roberson * Max free limits the number of locally cached boundary tags. We 2285f518366SJeff Roberson * just want to avoid hitting the zone allocator for every call. 2295f518366SJeff Roberson */ 2305f518366SJeff Roberson #define BT_MAXFREE (BT_MAXALLOC * 8) 2315f518366SJeff Roberson 2325f518366SJeff Roberson /* Allocator for boundary tags. */ 2335f518366SJeff Roberson static uma_zone_t vmem_bt_zone; 2345f518366SJeff Roberson 2355f518366SJeff Roberson /* boot time arena storage. */ 2365df87b21SJeff Roberson static struct vmem kernel_arena_storage; 2375f518366SJeff Roberson static struct vmem buffer_arena_storage; 2385f518366SJeff Roberson static struct vmem transient_arena_storage; 2395df87b21SJeff Roberson vmem_t *kernel_arena = &kernel_arena_storage; 2405f518366SJeff Roberson vmem_t *buffer_arena = &buffer_arena_storage; 2415f518366SJeff Roberson vmem_t *transient_arena = &transient_arena_storage; 2425f518366SJeff Roberson 2438441d1e8SJeff Roberson #ifdef DEBUG_MEMGUARD 2448441d1e8SJeff Roberson static struct vmem memguard_arena_storage; 2458441d1e8SJeff Roberson vmem_t *memguard_arena = &memguard_arena_storage; 2468441d1e8SJeff Roberson #endif 2478441d1e8SJeff Roberson 24841c68387SMark Johnston static bool 24941c68387SMark Johnston bt_isbusy(bt_t *bt) 25041c68387SMark Johnston { 25141c68387SMark Johnston return (bt->bt_type == BT_TYPE_BUSY); 25241c68387SMark Johnston } 25341c68387SMark Johnston 25441c68387SMark Johnston static bool 25541c68387SMark Johnston bt_isfree(bt_t *bt) 25641c68387SMark Johnston { 25741c68387SMark Johnston return (bt->bt_type == BT_TYPE_FREE); 25841c68387SMark Johnston } 25941c68387SMark Johnston 2605f518366SJeff Roberson /* 2615f518366SJeff Roberson * Fill the vmem's boundary tag cache. We guarantee that boundary tag 2625f518366SJeff Roberson * allocation will not fail once bt_fill() passes. To do so we cache 2635f518366SJeff Roberson * at least the maximum possible tag allocations in the arena. 2645f518366SJeff Roberson */ 26533a9bce6SMark Johnston static __noinline int 26633a9bce6SMark Johnston _bt_fill(vmem_t *vm, int flags) 2675f518366SJeff Roberson { 2685f518366SJeff Roberson bt_t *bt; 2695f518366SJeff Roberson 2705f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 2715f518366SJeff Roberson 2725f518366SJeff Roberson /* 2737a469c8eSJeff Roberson * Only allow the kernel arena and arenas derived from kernel arena to 2747a469c8eSJeff Roberson * dip into reserve tags. They are where new tags come from. 2755df87b21SJeff Roberson */ 2765df87b21SJeff Roberson flags &= BT_FLAGS; 2777a469c8eSJeff Roberson if (vm != kernel_arena && vm->vm_arg != kernel_arena) 2785df87b21SJeff Roberson flags &= ~M_USE_RESERVE; 2795df87b21SJeff Roberson 2805df87b21SJeff Roberson /* 2815f518366SJeff Roberson * Loop until we meet the reserve. To minimize the lock shuffle 2825f518366SJeff Roberson * and prevent simultaneous fills we first try a NOWAIT regardless 2835f518366SJeff Roberson * of the caller's flags. Specify M_NOVM so we don't recurse while 2845f518366SJeff Roberson * holding a vmem lock. 2855f518366SJeff Roberson */ 2865f518366SJeff Roberson while (vm->vm_nfreetags < BT_MAXALLOC) { 2875f518366SJeff Roberson bt = uma_zalloc(vmem_bt_zone, 2885f518366SJeff Roberson (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM); 2895f518366SJeff Roberson if (bt == NULL) { 2905f518366SJeff Roberson VMEM_UNLOCK(vm); 2915f518366SJeff Roberson bt = uma_zalloc(vmem_bt_zone, flags); 2925f518366SJeff Roberson VMEM_LOCK(vm); 293a0a18fd4SMark Johnston if (bt == NULL) 2945f518366SJeff Roberson break; 2955f518366SJeff Roberson } 2965f518366SJeff Roberson LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 2975f518366SJeff Roberson vm->vm_nfreetags++; 2985f518366SJeff Roberson } 2995f518366SJeff Roberson 3005f518366SJeff Roberson if (vm->vm_nfreetags < BT_MAXALLOC) 3015f518366SJeff Roberson return ENOMEM; 3025f518366SJeff Roberson 3035f518366SJeff Roberson return 0; 3045f518366SJeff Roberson } 3055f518366SJeff Roberson 30633a9bce6SMark Johnston static inline int 30733a9bce6SMark Johnston bt_fill(vmem_t *vm, int flags) 30833a9bce6SMark Johnston { 30933a9bce6SMark Johnston if (vm->vm_nfreetags >= BT_MAXALLOC) 31033a9bce6SMark Johnston return (0); 31133a9bce6SMark Johnston return (_bt_fill(vm, flags)); 31233a9bce6SMark Johnston } 31333a9bce6SMark Johnston 3145f518366SJeff Roberson /* 3155f518366SJeff Roberson * Pop a tag off of the freetag stack. 3165f518366SJeff Roberson */ 3175f518366SJeff Roberson static bt_t * 3185f518366SJeff Roberson bt_alloc(vmem_t *vm) 3195f518366SJeff Roberson { 3205f518366SJeff Roberson bt_t *bt; 3215f518366SJeff Roberson 3225f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 3235f518366SJeff Roberson bt = LIST_FIRST(&vm->vm_freetags); 3245f518366SJeff Roberson MPASS(bt != NULL); 3255f518366SJeff Roberson LIST_REMOVE(bt, bt_freelist); 3265f518366SJeff Roberson vm->vm_nfreetags--; 3275f518366SJeff Roberson 3285f518366SJeff Roberson return bt; 3295f518366SJeff Roberson } 3305f518366SJeff Roberson 3315f518366SJeff Roberson /* 3325f518366SJeff Roberson * Trim the per-vmem free list. Returns with the lock released to 3335f518366SJeff Roberson * avoid allocator recursions. 3345f518366SJeff Roberson */ 3355f518366SJeff Roberson static void 3365f518366SJeff Roberson bt_freetrim(vmem_t *vm, int freelimit) 3375f518366SJeff Roberson { 3385f518366SJeff Roberson LIST_HEAD(, vmem_btag) freetags; 3395f518366SJeff Roberson bt_t *bt; 3405f518366SJeff Roberson 3415f518366SJeff Roberson LIST_INIT(&freetags); 3425f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 3435f518366SJeff Roberson while (vm->vm_nfreetags > freelimit) { 3445f518366SJeff Roberson bt = LIST_FIRST(&vm->vm_freetags); 3455f518366SJeff Roberson LIST_REMOVE(bt, bt_freelist); 3465f518366SJeff Roberson vm->vm_nfreetags--; 3475f518366SJeff Roberson LIST_INSERT_HEAD(&freetags, bt, bt_freelist); 3485f518366SJeff Roberson } 3495f518366SJeff Roberson VMEM_UNLOCK(vm); 3505f518366SJeff Roberson while ((bt = LIST_FIRST(&freetags)) != NULL) { 3515f518366SJeff Roberson LIST_REMOVE(bt, bt_freelist); 3525f518366SJeff Roberson uma_zfree(vmem_bt_zone, bt); 3535f518366SJeff Roberson } 3545f518366SJeff Roberson } 3555f518366SJeff Roberson 3565f518366SJeff Roberson static inline void 3575f518366SJeff Roberson bt_free(vmem_t *vm, bt_t *bt) 3585f518366SJeff Roberson { 3595f518366SJeff Roberson 3605f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 3615f518366SJeff Roberson MPASS(LIST_FIRST(&vm->vm_freetags) != bt); 3625f518366SJeff Roberson LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 3635f518366SJeff Roberson vm->vm_nfreetags++; 3645f518366SJeff Roberson } 3655f518366SJeff Roberson 3665f518366SJeff Roberson /* 3676351771bSMark Johnston * Hide MAXALLOC tags before dropping the arena lock to ensure that a 3686351771bSMark Johnston * concurrent allocation attempt does not grab them. 3696351771bSMark Johnston */ 3706351771bSMark Johnston static void 3716351771bSMark Johnston bt_save(vmem_t *vm) 3726351771bSMark Johnston { 3736351771bSMark Johnston KASSERT(vm->vm_nfreetags >= BT_MAXALLOC, 3746351771bSMark Johnston ("%s: insufficient free tags %d", __func__, vm->vm_nfreetags)); 3756351771bSMark Johnston vm->vm_nfreetags -= BT_MAXALLOC; 3766351771bSMark Johnston } 3776351771bSMark Johnston 3786351771bSMark Johnston static void 3796351771bSMark Johnston bt_restore(vmem_t *vm) 3806351771bSMark Johnston { 3816351771bSMark Johnston vm->vm_nfreetags += BT_MAXALLOC; 3826351771bSMark Johnston } 3836351771bSMark Johnston 3846351771bSMark Johnston /* 3855f518366SJeff Roberson * freelist[0] ... [1, 1] 3861cf78c85SAlexander Motin * freelist[1] ... [2, 2] 3875f518366SJeff Roberson * : 3881cf78c85SAlexander Motin * freelist[29] ... [30, 30] 3891cf78c85SAlexander Motin * freelist[30] ... [31, 31] 3901cf78c85SAlexander Motin * freelist[31] ... [32, 63] 3911cf78c85SAlexander Motin * freelist[33] ... [64, 127] 3921cf78c85SAlexander Motin * : 3931cf78c85SAlexander Motin * freelist[n] ... [(1 << (n - 26)), (1 << (n - 25)) - 1] 3945f518366SJeff Roberson * : 3955f518366SJeff Roberson */ 3965f518366SJeff Roberson 3975f518366SJeff Roberson static struct vmem_freelist * 3985f518366SJeff Roberson bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 3995f518366SJeff Roberson { 4005f518366SJeff Roberson const vmem_size_t qsize = size >> vm->vm_quantum_shift; 4015f518366SJeff Roberson const int idx = SIZE2ORDER(qsize); 4025f518366SJeff Roberson 4035f518366SJeff Roberson MPASS(size != 0 && qsize != 0); 4045f518366SJeff Roberson MPASS((size & vm->vm_quantum_mask) == 0); 4055f518366SJeff Roberson MPASS(idx >= 0); 4065f518366SJeff Roberson MPASS(idx < VMEM_MAXORDER); 4075f518366SJeff Roberson 4085f518366SJeff Roberson return &vm->vm_freelist[idx]; 4095f518366SJeff Roberson } 4105f518366SJeff Roberson 4115f518366SJeff Roberson /* 4125f518366SJeff Roberson * bt_freehead_toalloc: return the freelist for the given size and allocation 4135f518366SJeff Roberson * strategy. 4145f518366SJeff Roberson * 4155f518366SJeff Roberson * For M_FIRSTFIT, return the list in which any blocks are large enough 4165f518366SJeff Roberson * for the requested size. otherwise, return the list which can have blocks 4175f518366SJeff Roberson * large enough for the requested size. 4185f518366SJeff Roberson */ 4195f518366SJeff Roberson static struct vmem_freelist * 4205f518366SJeff Roberson bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat) 4215f518366SJeff Roberson { 4225f518366SJeff Roberson const vmem_size_t qsize = size >> vm->vm_quantum_shift; 4235f518366SJeff Roberson int idx = SIZE2ORDER(qsize); 4245f518366SJeff Roberson 4255f518366SJeff Roberson MPASS(size != 0 && qsize != 0); 4265f518366SJeff Roberson MPASS((size & vm->vm_quantum_mask) == 0); 4275f518366SJeff Roberson 4285f518366SJeff Roberson if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) { 4295f518366SJeff Roberson idx++; 4305f518366SJeff Roberson /* check too large request? */ 4315f518366SJeff Roberson } 4325f518366SJeff Roberson MPASS(idx >= 0); 4335f518366SJeff Roberson MPASS(idx < VMEM_MAXORDER); 4345f518366SJeff Roberson 4355f518366SJeff Roberson return &vm->vm_freelist[idx]; 4365f518366SJeff Roberson } 4375f518366SJeff Roberson 4385f518366SJeff Roberson /* ---- boundary tag hash */ 4395f518366SJeff Roberson 4405f518366SJeff Roberson static struct vmem_hashlist * 4415f518366SJeff Roberson bt_hashhead(vmem_t *vm, vmem_addr_t addr) 4425f518366SJeff Roberson { 4435f518366SJeff Roberson struct vmem_hashlist *list; 4445f518366SJeff Roberson unsigned int hash; 4455f518366SJeff Roberson 4465f518366SJeff Roberson hash = hash32_buf(&addr, sizeof(addr), 0); 4475f518366SJeff Roberson list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 4485f518366SJeff Roberson 4495f518366SJeff Roberson return list; 4505f518366SJeff Roberson } 4515f518366SJeff Roberson 4525f518366SJeff Roberson static bt_t * 4535f518366SJeff Roberson bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 4545f518366SJeff Roberson { 4555f518366SJeff Roberson struct vmem_hashlist *list; 4565f518366SJeff Roberson bt_t *bt; 4575f518366SJeff Roberson 4585f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 4595f518366SJeff Roberson list = bt_hashhead(vm, addr); 4605f518366SJeff Roberson LIST_FOREACH(bt, list, bt_hashlist) { 4615f518366SJeff Roberson if (bt->bt_start == addr) { 4625f518366SJeff Roberson break; 4635f518366SJeff Roberson } 4645f518366SJeff Roberson } 4655f518366SJeff Roberson 4665f518366SJeff Roberson return bt; 4675f518366SJeff Roberson } 4685f518366SJeff Roberson 4695f518366SJeff Roberson static void 4705f518366SJeff Roberson bt_rembusy(vmem_t *vm, bt_t *bt) 4715f518366SJeff Roberson { 4725f518366SJeff Roberson 4735f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 4745f518366SJeff Roberson MPASS(vm->vm_nbusytag > 0); 4755f518366SJeff Roberson vm->vm_inuse -= bt->bt_size; 4765f518366SJeff Roberson vm->vm_nbusytag--; 4775f518366SJeff Roberson LIST_REMOVE(bt, bt_hashlist); 4785f518366SJeff Roberson } 4795f518366SJeff Roberson 4805f518366SJeff Roberson static void 4815f518366SJeff Roberson bt_insbusy(vmem_t *vm, bt_t *bt) 4825f518366SJeff Roberson { 4835f518366SJeff Roberson struct vmem_hashlist *list; 4845f518366SJeff Roberson 4855f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 4865f518366SJeff Roberson MPASS(bt->bt_type == BT_TYPE_BUSY); 4875f518366SJeff Roberson 4885f518366SJeff Roberson list = bt_hashhead(vm, bt->bt_start); 4895f518366SJeff Roberson LIST_INSERT_HEAD(list, bt, bt_hashlist); 4905f518366SJeff Roberson vm->vm_nbusytag++; 4915f518366SJeff Roberson vm->vm_inuse += bt->bt_size; 4925f518366SJeff Roberson } 4935f518366SJeff Roberson 4945f518366SJeff Roberson /* ---- boundary tag list */ 4955f518366SJeff Roberson 4965f518366SJeff Roberson static void 4975f518366SJeff Roberson bt_remseg(vmem_t *vm, bt_t *bt) 4985f518366SJeff Roberson { 4995f518366SJeff Roberson 50097ecf6efSJohn Baldwin MPASS(bt->bt_type != BT_TYPE_CURSOR); 5015f518366SJeff Roberson TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 5025f518366SJeff Roberson bt_free(vm, bt); 5035f518366SJeff Roberson } 5045f518366SJeff Roberson 5055f518366SJeff Roberson static void 5065f518366SJeff Roberson bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 5075f518366SJeff Roberson { 5085f518366SJeff Roberson 5095f518366SJeff Roberson TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 5105f518366SJeff Roberson } 5115f518366SJeff Roberson 5125f518366SJeff Roberson static void 5135f518366SJeff Roberson bt_insseg_tail(vmem_t *vm, bt_t *bt) 5145f518366SJeff Roberson { 5155f518366SJeff Roberson 5165f518366SJeff Roberson TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 5175f518366SJeff Roberson } 5185f518366SJeff Roberson 5195f518366SJeff Roberson static void 520cb596eeaSKonstantin Belousov bt_remfree(vmem_t *vm __unused, bt_t *bt) 5215f518366SJeff Roberson { 5225f518366SJeff Roberson 5235f518366SJeff Roberson MPASS(bt->bt_type == BT_TYPE_FREE); 5245f518366SJeff Roberson 5255f518366SJeff Roberson LIST_REMOVE(bt, bt_freelist); 5265f518366SJeff Roberson } 5275f518366SJeff Roberson 5285f518366SJeff Roberson static void 5295f518366SJeff Roberson bt_insfree(vmem_t *vm, bt_t *bt) 5305f518366SJeff Roberson { 5315f518366SJeff Roberson struct vmem_freelist *list; 5325f518366SJeff Roberson 5335f518366SJeff Roberson list = bt_freehead_tofree(vm, bt->bt_size); 5345f518366SJeff Roberson LIST_INSERT_HEAD(list, bt, bt_freelist); 5355f518366SJeff Roberson } 5365f518366SJeff Roberson 5375f518366SJeff Roberson /* ---- vmem internal functions */ 5385f518366SJeff Roberson 5395f518366SJeff Roberson /* 5405f518366SJeff Roberson * Import from the arena into the quantum cache in UMA. 54121744c82SMark Johnston * 54221744c82SMark Johnston * We use VMEM_ADDR_QCACHE_MIN instead of 0: uma_zalloc() returns 0 to indicate 54321744c82SMark Johnston * failure, so UMA can't be used to cache a resource with value 0. 5445f518366SJeff Roberson */ 5455f518366SJeff Roberson static int 546ab3185d1SJeff Roberson qc_import(void *arg, void **store, int cnt, int domain, int flags) 5475f518366SJeff Roberson { 5485f518366SJeff Roberson qcache_t *qc; 5495f518366SJeff Roberson vmem_addr_t addr; 5505f518366SJeff Roberson int i; 5515f518366SJeff Roberson 55221744c82SMark Johnston KASSERT((flags & M_WAITOK) == 0, ("blocking allocation")); 55321744c82SMark Johnston 5545f518366SJeff Roberson qc = arg; 5555f518366SJeff Roberson for (i = 0; i < cnt; i++) { 5565f518366SJeff Roberson if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0, 55721744c82SMark Johnston VMEM_ADDR_QCACHE_MIN, VMEM_ADDR_MAX, flags, &addr) != 0) 5585f518366SJeff Roberson break; 5595f518366SJeff Roberson store[i] = (void *)addr; 5605f518366SJeff Roberson } 56121744c82SMark Johnston return (i); 5625f518366SJeff Roberson } 5635f518366SJeff Roberson 5645f518366SJeff Roberson /* 5655f518366SJeff Roberson * Release memory from the UMA cache to the arena. 5665f518366SJeff Roberson */ 5675f518366SJeff Roberson static void 5685f518366SJeff Roberson qc_release(void *arg, void **store, int cnt) 5695f518366SJeff Roberson { 5705f518366SJeff Roberson qcache_t *qc; 5715f518366SJeff Roberson int i; 5725f518366SJeff Roberson 5735f518366SJeff Roberson qc = arg; 5745f518366SJeff Roberson for (i = 0; i < cnt; i++) 5755f518366SJeff Roberson vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size); 5765f518366SJeff Roberson } 5775f518366SJeff Roberson 5785f518366SJeff Roberson static void 5795f518366SJeff Roberson qc_init(vmem_t *vm, vmem_size_t qcache_max) 5805f518366SJeff Roberson { 5815f518366SJeff Roberson qcache_t *qc; 5825f518366SJeff Roberson vmem_size_t size; 5835f518366SJeff Roberson int qcache_idx_max; 5845f518366SJeff Roberson int i; 5855f518366SJeff Roberson 5865f518366SJeff Roberson MPASS((qcache_max & vm->vm_quantum_mask) == 0); 5875f518366SJeff Roberson qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift, 5885f518366SJeff Roberson VMEM_QCACHE_IDX_MAX); 5895f518366SJeff Roberson vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift; 5905f518366SJeff Roberson for (i = 0; i < qcache_idx_max; i++) { 5915f518366SJeff Roberson qc = &vm->vm_qcache[i]; 5925f518366SJeff Roberson size = (i + 1) << vm->vm_quantum_shift; 5935f518366SJeff Roberson snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 5945f518366SJeff Roberson vm->vm_name, size); 5955f518366SJeff Roberson qc->qc_vmem = vm; 5965f518366SJeff Roberson qc->qc_size = size; 5975f518366SJeff Roberson qc->qc_cache = uma_zcache_create(qc->qc_name, size, 5986c5f36ffSJeff Roberson NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 0); 5995f518366SJeff Roberson MPASS(qc->qc_cache); 6005f518366SJeff Roberson } 6015f518366SJeff Roberson } 6025f518366SJeff Roberson 6035f518366SJeff Roberson static void 6045f518366SJeff Roberson qc_destroy(vmem_t *vm) 6055f518366SJeff Roberson { 6065f518366SJeff Roberson int qcache_idx_max; 6075f518366SJeff Roberson int i; 6085f518366SJeff Roberson 6095f518366SJeff Roberson qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 6105f518366SJeff Roberson for (i = 0; i < qcache_idx_max; i++) 6115f518366SJeff Roberson uma_zdestroy(vm->vm_qcache[i].qc_cache); 6125f518366SJeff Roberson } 6135f518366SJeff Roberson 6145f518366SJeff Roberson static void 6155f518366SJeff Roberson qc_drain(vmem_t *vm) 6165f518366SJeff Roberson { 6175f518366SJeff Roberson int qcache_idx_max; 6185f518366SJeff Roberson int i; 6195f518366SJeff Roberson 6205f518366SJeff Roberson qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 6215f518366SJeff Roberson for (i = 0; i < qcache_idx_max; i++) 62208cfa56eSMark Johnston uma_zone_reclaim(vm->vm_qcache[i].qc_cache, UMA_RECLAIM_DRAIN); 6235f518366SJeff Roberson } 6245f518366SJeff Roberson 625*da76d349SBojan Novković #ifndef UMA_USE_DMAP 6265df87b21SJeff Roberson 627fe933c1dSMateusz Guzik static struct mtx_padalign __exclusive_cache_line vmem_bt_lock; 6285df87b21SJeff Roberson 6295df87b21SJeff Roberson /* 6305df87b21SJeff Roberson * vmem_bt_alloc: Allocate a new page of boundary tags. 6315df87b21SJeff Roberson * 632*da76d349SBojan Novković * On architectures with UMA_USE_DMAP there is no recursion; no address 6335df87b21SJeff Roberson * space need be allocated to allocate boundary tags. For the others, we 6345df87b21SJeff Roberson * must handle recursion. Boundary tags are necessary to allocate new 6355df87b21SJeff Roberson * boundary tags. 6365df87b21SJeff Roberson * 6375df87b21SJeff Roberson * UMA guarantees that enough tags are held in reserve to allocate a new 6385df87b21SJeff Roberson * page of kva. We dip into this reserve by specifying M_USE_RESERVE only 6395df87b21SJeff Roberson * when allocating the page to hold new boundary tags. In this way the 6405df87b21SJeff Roberson * reserve is automatically filled by the allocation that uses the reserve. 6415df87b21SJeff Roberson * 6425df87b21SJeff Roberson * We still have to guarantee that the new tags are allocated atomically since 6435df87b21SJeff Roberson * many threads may try concurrently. The bt_lock provides this guarantee. 6445df87b21SJeff Roberson * We convert WAITOK allocations to NOWAIT and then handle the blocking here 6455df87b21SJeff Roberson * on failure. It's ok to return NULL for a WAITOK allocation as UMA will 6465df87b21SJeff Roberson * loop again after checking to see if we lost the race to allocate. 6475df87b21SJeff Roberson * 6485df87b21SJeff Roberson * There is a small race between vmem_bt_alloc() returning the page and the 6495df87b21SJeff Roberson * zone lock being acquired to add the page to the zone. For WAITOK 6505df87b21SJeff Roberson * allocations we just pause briefly. NOWAIT may experience a transient 6515df87b21SJeff Roberson * failure. To alleviate this we permit a small number of simultaneous 6525df87b21SJeff Roberson * fills to proceed concurrently so NOWAIT is less likely to fail unless 6535df87b21SJeff Roberson * we are really out of KVA. 6545df87b21SJeff Roberson */ 6555df87b21SJeff Roberson static void * 656ab3185d1SJeff Roberson vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 657ab3185d1SJeff Roberson int wait) 6585df87b21SJeff Roberson { 6595df87b21SJeff Roberson vmem_addr_t addr; 6605df87b21SJeff Roberson 6612e47807cSJeff Roberson *pflag = UMA_SLAB_KERNEL; 6625df87b21SJeff Roberson 6635df87b21SJeff Roberson /* 6645df87b21SJeff Roberson * Single thread boundary tag allocation so that the address space 6655df87b21SJeff Roberson * and memory are added in one atomic operation. 6665df87b21SJeff Roberson */ 6675df87b21SJeff Roberson mtx_lock(&vmem_bt_lock); 6687a469c8eSJeff Roberson if (vmem_xalloc(vm_dom[domain].vmd_kernel_arena, bytes, 0, 0, 0, 6697a469c8eSJeff Roberson VMEM_ADDR_MIN, VMEM_ADDR_MAX, 6707a469c8eSJeff Roberson M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) { 6717a469c8eSJeff Roberson if (kmem_back_domain(domain, kernel_object, addr, bytes, 6725df87b21SJeff Roberson M_NOWAIT | M_USE_RESERVE) == 0) { 6735df87b21SJeff Roberson mtx_unlock(&vmem_bt_lock); 6745df87b21SJeff Roberson return ((void *)addr); 6755df87b21SJeff Roberson } 6767a469c8eSJeff Roberson vmem_xfree(vm_dom[domain].vmd_kernel_arena, addr, bytes); 6775df87b21SJeff Roberson mtx_unlock(&vmem_bt_lock); 6785df87b21SJeff Roberson /* 6795df87b21SJeff Roberson * Out of memory, not address space. This may not even be 6805df87b21SJeff Roberson * possible due to M_USE_RESERVE page allocation. 6815df87b21SJeff Roberson */ 6825df87b21SJeff Roberson if (wait & M_WAITOK) 683e2068d0bSJeff Roberson vm_wait_domain(domain); 6845df87b21SJeff Roberson return (NULL); 6855df87b21SJeff Roberson } 6865df87b21SJeff Roberson mtx_unlock(&vmem_bt_lock); 6875df87b21SJeff Roberson /* 6885df87b21SJeff Roberson * We're either out of address space or lost a fill race. 6895df87b21SJeff Roberson */ 6905df87b21SJeff Roberson if (wait & M_WAITOK) 6915df87b21SJeff Roberson pause("btalloc", 1); 6925df87b21SJeff Roberson 6935df87b21SJeff Roberson return (NULL); 6945df87b21SJeff Roberson } 6955df87b21SJeff Roberson #endif 6965df87b21SJeff Roberson 6975f518366SJeff Roberson void 6985f518366SJeff Roberson vmem_startup(void) 6995f518366SJeff Roberson { 7005f518366SJeff Roberson 7015f518366SJeff Roberson mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); 7027a469c8eSJeff Roberson vmem_zone = uma_zcreate("vmem", 7037a469c8eSJeff Roberson sizeof(struct vmem), NULL, NULL, NULL, NULL, 7046c5f36ffSJeff Roberson UMA_ALIGN_PTR, 0); 7055f518366SJeff Roberson vmem_bt_zone = uma_zcreate("vmem btag", 7065f518366SJeff Roberson sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, 7076c5f36ffSJeff Roberson UMA_ALIGN_PTR, UMA_ZONE_VM); 708*da76d349SBojan Novković #ifndef UMA_USE_DMAP 7095df87b21SJeff Roberson mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); 7105df87b21SJeff Roberson uma_prealloc(vmem_bt_zone, BT_MAXALLOC); 7115df87b21SJeff Roberson /* 7125df87b21SJeff Roberson * Reserve enough tags to allocate new tags. We allow multiple 7135df87b21SJeff Roberson * CPUs to attempt to allocate new tags concurrently to limit 7142b601070SMark Johnston * false restarts in UMA. vmem_bt_alloc() allocates from a per-domain 7152b601070SMark Johnston * arena, which may involve importing a range from the kernel arena, 7162b601070SMark Johnston * so we need to keep at least 2 * BT_MAXALLOC tags reserved. 7175df87b21SJeff Roberson */ 7182b601070SMark Johnston uma_zone_reserve(vmem_bt_zone, 2 * BT_MAXALLOC * mp_ncpus); 7195df87b21SJeff Roberson uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc); 7205df87b21SJeff Roberson #endif 7215f518366SJeff Roberson } 7225f518366SJeff Roberson 7235f518366SJeff Roberson /* ---- rehash */ 7245f518366SJeff Roberson 7255f518366SJeff Roberson static int 7265f518366SJeff Roberson vmem_rehash(vmem_t *vm, vmem_size_t newhashsize) 7275f518366SJeff Roberson { 7285f518366SJeff Roberson bt_t *bt; 7295f518366SJeff Roberson struct vmem_hashlist *newhashlist; 7305f518366SJeff Roberson struct vmem_hashlist *oldhashlist; 731cb596eeaSKonstantin Belousov vmem_size_t i, oldhashsize; 7325f518366SJeff Roberson 7335f518366SJeff Roberson MPASS(newhashsize > 0); 7345f518366SJeff Roberson 735ac2fffa4SPedro F. Giffuni newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize, 7365f518366SJeff Roberson M_VMEM, M_NOWAIT); 7375f518366SJeff Roberson if (newhashlist == NULL) 7385f518366SJeff Roberson return ENOMEM; 7395f518366SJeff Roberson for (i = 0; i < newhashsize; i++) { 7405f518366SJeff Roberson LIST_INIT(&newhashlist[i]); 7415f518366SJeff Roberson } 7425f518366SJeff Roberson 7435f518366SJeff Roberson VMEM_LOCK(vm); 7445f518366SJeff Roberson oldhashlist = vm->vm_hashlist; 7455f518366SJeff Roberson oldhashsize = vm->vm_hashsize; 7465f518366SJeff Roberson vm->vm_hashlist = newhashlist; 7475f518366SJeff Roberson vm->vm_hashsize = newhashsize; 7485f518366SJeff Roberson if (oldhashlist == NULL) { 7495f518366SJeff Roberson VMEM_UNLOCK(vm); 7505f518366SJeff Roberson return 0; 7515f518366SJeff Roberson } 7525f518366SJeff Roberson for (i = 0; i < oldhashsize; i++) { 7535f518366SJeff Roberson while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 7545f518366SJeff Roberson bt_rembusy(vm, bt); 7555f518366SJeff Roberson bt_insbusy(vm, bt); 7565f518366SJeff Roberson } 7575f518366SJeff Roberson } 7585f518366SJeff Roberson VMEM_UNLOCK(vm); 7595f518366SJeff Roberson 760cb596eeaSKonstantin Belousov if (oldhashlist != vm->vm_hash0) 7615f518366SJeff Roberson free(oldhashlist, M_VMEM); 7625f518366SJeff Roberson 7635f518366SJeff Roberson return 0; 7645f518366SJeff Roberson } 7655f518366SJeff Roberson 7665f518366SJeff Roberson static void 7675f518366SJeff Roberson vmem_periodic_kick(void *dummy) 7685f518366SJeff Roberson { 7695f518366SJeff Roberson 7705f518366SJeff Roberson taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk); 7715f518366SJeff Roberson } 7725f518366SJeff Roberson 7735f518366SJeff Roberson static void 7745f518366SJeff Roberson vmem_periodic(void *unused, int pending) 7755f518366SJeff Roberson { 7765f518366SJeff Roberson vmem_t *vm; 7775f518366SJeff Roberson vmem_size_t desired; 7785f518366SJeff Roberson vmem_size_t current; 7795f518366SJeff Roberson 7805f518366SJeff Roberson mtx_lock(&vmem_list_lock); 7815f518366SJeff Roberson LIST_FOREACH(vm, &vmem_list, vm_alllist) { 7825f518366SJeff Roberson #ifdef DIAGNOSTIC 7835f518366SJeff Roberson /* Convenient time to verify vmem state. */ 784844e14d3SPeter Holm if (enable_vmem_check == 1) { 7855f518366SJeff Roberson VMEM_LOCK(vm); 7865f518366SJeff Roberson vmem_check(vm); 7875f518366SJeff Roberson VMEM_UNLOCK(vm); 788844e14d3SPeter Holm } 7895f518366SJeff Roberson #endif 7905f518366SJeff Roberson desired = 1 << flsl(vm->vm_nbusytag); 7915f518366SJeff Roberson desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN), 7925f518366SJeff Roberson VMEM_HASHSIZE_MAX); 7935f518366SJeff Roberson current = vm->vm_hashsize; 7945f518366SJeff Roberson 7955f518366SJeff Roberson /* Grow in powers of two. Shrink less aggressively. */ 7965f518366SJeff Roberson if (desired >= current * 2 || desired * 4 <= current) 7975f518366SJeff Roberson vmem_rehash(vm, desired); 79843329ffcSAlexander Motin 79943329ffcSAlexander Motin /* 80043329ffcSAlexander Motin * Periodically wake up threads waiting for resources, 80143329ffcSAlexander Motin * so they could ask for reclamation again. 80243329ffcSAlexander Motin */ 80343329ffcSAlexander Motin VMEM_CONDVAR_BROADCAST(vm); 8045f518366SJeff Roberson } 8055f518366SJeff Roberson mtx_unlock(&vmem_list_lock); 8065f518366SJeff Roberson 8075f518366SJeff Roberson callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 8085f518366SJeff Roberson vmem_periodic_kick, NULL); 8095f518366SJeff Roberson } 8105f518366SJeff Roberson 8115f518366SJeff Roberson static void 8125f518366SJeff Roberson vmem_start_callout(void *unused) 8135f518366SJeff Roberson { 8145f518366SJeff Roberson 8155f518366SJeff Roberson TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL); 8165f518366SJeff Roberson vmem_periodic_interval = hz * 10; 817fd90e2edSJung-uk Kim callout_init(&vmem_periodic_ch, 1); 8185f518366SJeff Roberson callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 8195f518366SJeff Roberson vmem_periodic_kick, NULL); 8205f518366SJeff Roberson } 8215f518366SJeff Roberson SYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL); 8225f518366SJeff Roberson 8235f518366SJeff Roberson static void 8249e3cc176SGleb Smirnoff vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) 8255f518366SJeff Roberson { 82641c68387SMark Johnston bt_t *btfree, *btprev, *btspan; 8275f518366SJeff Roberson 82841c68387SMark Johnston VMEM_ASSERT_LOCKED(vm); 8295f518366SJeff Roberson MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); 830d91722fbSJeff Roberson MPASS((size & vm->vm_quantum_mask) == 0); 8315f518366SJeff Roberson 83241c68387SMark Johnston if (vm->vm_releasefn == NULL) { 83341c68387SMark Johnston /* 83441c68387SMark Johnston * The new segment will never be released, so see if it is 83541c68387SMark Johnston * contiguous with respect to an existing segment. In this case 83641c68387SMark Johnston * a span tag is not needed, and it may be possible now or in 83741c68387SMark Johnston * the future to coalesce the new segment with an existing free 83841c68387SMark Johnston * segment. 83941c68387SMark Johnston */ 84041c68387SMark Johnston btprev = TAILQ_LAST(&vm->vm_seglist, vmem_seglist); 84141c68387SMark Johnston if ((!bt_isbusy(btprev) && !bt_isfree(btprev)) || 84241c68387SMark Johnston btprev->bt_start + btprev->bt_size != addr) 84341c68387SMark Johnston btprev = NULL; 84441c68387SMark Johnston } else { 84541c68387SMark Johnston btprev = NULL; 84641c68387SMark Johnston } 84741c68387SMark Johnston 84841c68387SMark Johnston if (btprev == NULL || bt_isbusy(btprev)) { 84941c68387SMark Johnston if (btprev == NULL) { 8505f518366SJeff Roberson btspan = bt_alloc(vm); 8515f518366SJeff Roberson btspan->bt_type = type; 8525f518366SJeff Roberson btspan->bt_start = addr; 8535f518366SJeff Roberson btspan->bt_size = size; 8545df87b21SJeff Roberson bt_insseg_tail(vm, btspan); 85541c68387SMark Johnston } 8565f518366SJeff Roberson 8575f518366SJeff Roberson btfree = bt_alloc(vm); 8585f518366SJeff Roberson btfree->bt_type = BT_TYPE_FREE; 8595f518366SJeff Roberson btfree->bt_start = addr; 8605f518366SJeff Roberson btfree->bt_size = size; 86141c68387SMark Johnston bt_insseg_tail(vm, btfree); 8625f518366SJeff Roberson bt_insfree(vm, btfree); 86341c68387SMark Johnston } else { 86441c68387SMark Johnston bt_remfree(vm, btprev); 86541c68387SMark Johnston btprev->bt_size += size; 86641c68387SMark Johnston bt_insfree(vm, btprev); 86741c68387SMark Johnston } 8685df87b21SJeff Roberson 8695f518366SJeff Roberson vm->vm_size += size; 8705f518366SJeff Roberson } 8715f518366SJeff Roberson 8725f518366SJeff Roberson static void 8735f518366SJeff Roberson vmem_destroy1(vmem_t *vm) 8745f518366SJeff Roberson { 8755f518366SJeff Roberson bt_t *bt; 8765f518366SJeff Roberson 8775f518366SJeff Roberson /* 8785f518366SJeff Roberson * Drain per-cpu quantum caches. 8795f518366SJeff Roberson */ 8805f518366SJeff Roberson qc_destroy(vm); 8815f518366SJeff Roberson 8825f518366SJeff Roberson /* 8835f518366SJeff Roberson * The vmem should now only contain empty segments. 8845f518366SJeff Roberson */ 8855f518366SJeff Roberson VMEM_LOCK(vm); 8865f518366SJeff Roberson MPASS(vm->vm_nbusytag == 0); 8875f518366SJeff Roberson 88897ecf6efSJohn Baldwin TAILQ_REMOVE(&vm->vm_seglist, &vm->vm_cursor, bt_seglist); 8895f518366SJeff Roberson while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL) 8905f518366SJeff Roberson bt_remseg(vm, bt); 8915f518366SJeff Roberson 8925f518366SJeff Roberson if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) 8935f518366SJeff Roberson free(vm->vm_hashlist, M_VMEM); 8945f518366SJeff Roberson 8955f518366SJeff Roberson bt_freetrim(vm, 0); 8965f518366SJeff Roberson 8975f518366SJeff Roberson VMEM_CONDVAR_DESTROY(vm); 8985f518366SJeff Roberson VMEM_LOCK_DESTROY(vm); 8997a469c8eSJeff Roberson uma_zfree(vmem_zone, vm); 9005f518366SJeff Roberson } 9015f518366SJeff Roberson 9025f518366SJeff Roberson static int 903d91722fbSJeff Roberson vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) 9045f518366SJeff Roberson { 9055f518366SJeff Roberson vmem_addr_t addr; 9065f518366SJeff Roberson int error; 9075f518366SJeff Roberson 9085f518366SJeff Roberson if (vm->vm_importfn == NULL) 9092e47807cSJeff Roberson return (EINVAL); 9105f518366SJeff Roberson 911d91722fbSJeff Roberson /* 912d91722fbSJeff Roberson * To make sure we get a span that meets the alignment we double it 913d91722fbSJeff Roberson * and add the size to the tail. This slightly overestimates. 914d91722fbSJeff Roberson */ 915d91722fbSJeff Roberson if (align != vm->vm_quantum_mask + 1) 916d91722fbSJeff Roberson size = (align * 2) + size; 9175f518366SJeff Roberson size = roundup(size, vm->vm_import_quantum); 9185f518366SJeff Roberson 9192e47807cSJeff Roberson if (vm->vm_limit != 0 && vm->vm_limit < vm->vm_size + size) 9202e47807cSJeff Roberson return (ENOMEM); 9212e47807cSJeff Roberson 9226351771bSMark Johnston bt_save(vm); 9235f518366SJeff Roberson VMEM_UNLOCK(vm); 9245f518366SJeff Roberson error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 9255f518366SJeff Roberson VMEM_LOCK(vm); 9266351771bSMark Johnston bt_restore(vm); 9275f518366SJeff Roberson if (error) 9282e47807cSJeff Roberson return (ENOMEM); 9295f518366SJeff Roberson 9309e3cc176SGleb Smirnoff vmem_add1(vm, addr, size, BT_TYPE_SPAN); 9315f518366SJeff Roberson 9325f518366SJeff Roberson return 0; 9335f518366SJeff Roberson } 9345f518366SJeff Roberson 9355f518366SJeff Roberson /* 9365f518366SJeff Roberson * vmem_fit: check if a bt can satisfy the given restrictions. 9375f518366SJeff Roberson * 9385f518366SJeff Roberson * it's a caller's responsibility to ensure the region is big enough 9395f518366SJeff Roberson * before calling us. 9405f518366SJeff Roberson */ 9415f518366SJeff Roberson static int 9425f518366SJeff Roberson vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 9435f518366SJeff Roberson vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr, 9445f518366SJeff Roberson vmem_addr_t maxaddr, vmem_addr_t *addrp) 9455f518366SJeff Roberson { 9465f518366SJeff Roberson vmem_addr_t start; 9475f518366SJeff Roberson vmem_addr_t end; 9485f518366SJeff Roberson 9495f518366SJeff Roberson MPASS(size > 0); 9505f518366SJeff Roberson MPASS(bt->bt_size >= size); /* caller's responsibility */ 9515f518366SJeff Roberson 9525f518366SJeff Roberson /* 9535f518366SJeff Roberson * XXX assumption: vmem_addr_t and vmem_size_t are 9545f518366SJeff Roberson * unsigned integer of the same size. 9555f518366SJeff Roberson */ 9565f518366SJeff Roberson 9575f518366SJeff Roberson start = bt->bt_start; 9585f518366SJeff Roberson if (start < minaddr) { 9595f518366SJeff Roberson start = minaddr; 9605f518366SJeff Roberson } 9615f518366SJeff Roberson end = BT_END(bt); 9625f518366SJeff Roberson if (end > maxaddr) 9635f518366SJeff Roberson end = maxaddr; 9645f518366SJeff Roberson if (start > end) 9655f518366SJeff Roberson return (ENOMEM); 9665f518366SJeff Roberson 9675f518366SJeff Roberson start = VMEM_ALIGNUP(start - phase, align) + phase; 9685f518366SJeff Roberson if (start < bt->bt_start) 9695f518366SJeff Roberson start += align; 9705f518366SJeff Roberson if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 9715f518366SJeff Roberson MPASS(align < nocross); 9725f518366SJeff Roberson start = VMEM_ALIGNUP(start - phase, nocross) + phase; 9735f518366SJeff Roberson } 9745f518366SJeff Roberson if (start <= end && end - start >= size - 1) { 9755f518366SJeff Roberson MPASS((start & (align - 1)) == phase); 9765f518366SJeff Roberson MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross)); 9775f518366SJeff Roberson MPASS(minaddr <= start); 9785f518366SJeff Roberson MPASS(maxaddr == 0 || start + size - 1 <= maxaddr); 9795f518366SJeff Roberson MPASS(bt->bt_start <= start); 9805f518366SJeff Roberson MPASS(BT_END(bt) - start >= size - 1); 9815f518366SJeff Roberson *addrp = start; 9825f518366SJeff Roberson 9835f518366SJeff Roberson return (0); 9845f518366SJeff Roberson } 9855f518366SJeff Roberson return (ENOMEM); 9865f518366SJeff Roberson } 9875f518366SJeff Roberson 9885f518366SJeff Roberson /* 9895f518366SJeff Roberson * vmem_clip: Trim the boundary tag edges to the requested start and size. 9905f518366SJeff Roberson */ 9915f518366SJeff Roberson static void 9925f518366SJeff Roberson vmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size) 9935f518366SJeff Roberson { 9945f518366SJeff Roberson bt_t *btnew; 9955f518366SJeff Roberson bt_t *btprev; 9965f518366SJeff Roberson 9975f518366SJeff Roberson VMEM_ASSERT_LOCKED(vm); 9985f518366SJeff Roberson MPASS(bt->bt_type == BT_TYPE_FREE); 9995f518366SJeff Roberson MPASS(bt->bt_size >= size); 10005f518366SJeff Roberson bt_remfree(vm, bt); 10015f518366SJeff Roberson if (bt->bt_start != start) { 10025f518366SJeff Roberson btprev = bt_alloc(vm); 10035f518366SJeff Roberson btprev->bt_type = BT_TYPE_FREE; 10045f518366SJeff Roberson btprev->bt_start = bt->bt_start; 10055f518366SJeff Roberson btprev->bt_size = start - bt->bt_start; 10065f518366SJeff Roberson bt->bt_start = start; 10075f518366SJeff Roberson bt->bt_size -= btprev->bt_size; 10085f518366SJeff Roberson bt_insfree(vm, btprev); 10095f518366SJeff Roberson bt_insseg(vm, btprev, 10105f518366SJeff Roberson TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 10115f518366SJeff Roberson } 10125f518366SJeff Roberson MPASS(bt->bt_start == start); 10135f518366SJeff Roberson if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 10145f518366SJeff Roberson /* split */ 10155f518366SJeff Roberson btnew = bt_alloc(vm); 10165f518366SJeff Roberson btnew->bt_type = BT_TYPE_BUSY; 10175f518366SJeff Roberson btnew->bt_start = bt->bt_start; 10185f518366SJeff Roberson btnew->bt_size = size; 10195f518366SJeff Roberson bt->bt_start = bt->bt_start + size; 10205f518366SJeff Roberson bt->bt_size -= size; 10215f518366SJeff Roberson bt_insfree(vm, bt); 10225f518366SJeff Roberson bt_insseg(vm, btnew, 10235f518366SJeff Roberson TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 10245f518366SJeff Roberson bt_insbusy(vm, btnew); 10255f518366SJeff Roberson bt = btnew; 10265f518366SJeff Roberson } else { 10275f518366SJeff Roberson bt->bt_type = BT_TYPE_BUSY; 10285f518366SJeff Roberson bt_insbusy(vm, bt); 10295f518366SJeff Roberson } 10305f518366SJeff Roberson MPASS(bt->bt_size >= size); 10315f518366SJeff Roberson } 10325f518366SJeff Roberson 1033f1c592fbSMark Johnston static int 1034f1c592fbSMark Johnston vmem_try_fetch(vmem_t *vm, const vmem_size_t size, vmem_size_t align, int flags) 1035f1c592fbSMark Johnston { 1036f1c592fbSMark Johnston vmem_size_t avail; 1037f1c592fbSMark Johnston 1038f1c592fbSMark Johnston VMEM_ASSERT_LOCKED(vm); 1039f1c592fbSMark Johnston 1040f1c592fbSMark Johnston /* 1041f1c592fbSMark Johnston * XXX it is possible to fail to meet xalloc constraints with the 1042f1c592fbSMark Johnston * imported region. It is up to the user to specify the 1043f1c592fbSMark Johnston * import quantum such that it can satisfy any allocation. 1044f1c592fbSMark Johnston */ 1045f1c592fbSMark Johnston if (vmem_import(vm, size, align, flags) == 0) 1046f1c592fbSMark Johnston return (1); 1047f1c592fbSMark Johnston 1048f1c592fbSMark Johnston /* 1049f1c592fbSMark Johnston * Try to free some space from the quantum cache or reclaim 1050f1c592fbSMark Johnston * functions if available. 1051f1c592fbSMark Johnston */ 1052f1c592fbSMark Johnston if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) { 1053f1c592fbSMark Johnston avail = vm->vm_size - vm->vm_inuse; 10546351771bSMark Johnston bt_save(vm); 1055f1c592fbSMark Johnston VMEM_UNLOCK(vm); 1056f1c592fbSMark Johnston if (vm->vm_qcache_max != 0) 1057f1c592fbSMark Johnston qc_drain(vm); 1058f1c592fbSMark Johnston if (vm->vm_reclaimfn != NULL) 1059f1c592fbSMark Johnston vm->vm_reclaimfn(vm, flags); 1060f1c592fbSMark Johnston VMEM_LOCK(vm); 10616351771bSMark Johnston bt_restore(vm); 1062f1c592fbSMark Johnston /* If we were successful retry even NOWAIT. */ 1063f1c592fbSMark Johnston if (vm->vm_size - vm->vm_inuse > avail) 1064f1c592fbSMark Johnston return (1); 1065f1c592fbSMark Johnston } 1066f1c592fbSMark Johnston if ((flags & M_NOWAIT) != 0) 1067f1c592fbSMark Johnston return (0); 10686351771bSMark Johnston bt_save(vm); 1069f1c592fbSMark Johnston VMEM_CONDVAR_WAIT(vm); 10706351771bSMark Johnston bt_restore(vm); 1071f1c592fbSMark Johnston return (1); 1072f1c592fbSMark Johnston } 1073f1c592fbSMark Johnston 1074f1c592fbSMark Johnston static int 1075f1c592fbSMark Johnston vmem_try_release(vmem_t *vm, struct vmem_btag *bt, const bool remfree) 1076f1c592fbSMark Johnston { 1077f1c592fbSMark Johnston struct vmem_btag *prev; 1078f1c592fbSMark Johnston 1079f1c592fbSMark Johnston MPASS(bt->bt_type == BT_TYPE_FREE); 1080f1c592fbSMark Johnston 1081f1c592fbSMark Johnston if (vm->vm_releasefn == NULL) 1082f1c592fbSMark Johnston return (0); 1083f1c592fbSMark Johnston 1084f1c592fbSMark Johnston prev = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1085f1c592fbSMark Johnston MPASS(prev != NULL); 1086f1c592fbSMark Johnston MPASS(prev->bt_type != BT_TYPE_FREE); 1087f1c592fbSMark Johnston 1088f1c592fbSMark Johnston if (prev->bt_type == BT_TYPE_SPAN && prev->bt_size == bt->bt_size) { 1089f1c592fbSMark Johnston vmem_addr_t spanaddr; 1090f1c592fbSMark Johnston vmem_size_t spansize; 1091f1c592fbSMark Johnston 1092f1c592fbSMark Johnston MPASS(prev->bt_start == bt->bt_start); 1093f1c592fbSMark Johnston spanaddr = prev->bt_start; 1094f1c592fbSMark Johnston spansize = prev->bt_size; 1095f1c592fbSMark Johnston if (remfree) 1096f1c592fbSMark Johnston bt_remfree(vm, bt); 1097f1c592fbSMark Johnston bt_remseg(vm, bt); 1098f1c592fbSMark Johnston bt_remseg(vm, prev); 1099f1c592fbSMark Johnston vm->vm_size -= spansize; 1100f1c592fbSMark Johnston VMEM_CONDVAR_BROADCAST(vm); 1101f1c592fbSMark Johnston bt_freetrim(vm, BT_MAXFREE); 1102f1c592fbSMark Johnston vm->vm_releasefn(vm->vm_arg, spanaddr, spansize); 1103f1c592fbSMark Johnston return (1); 1104f1c592fbSMark Johnston } 1105f1c592fbSMark Johnston return (0); 1106f1c592fbSMark Johnston } 1107f1c592fbSMark Johnston 1108f1c592fbSMark Johnston static int 1109f1c592fbSMark Johnston vmem_xalloc_nextfit(vmem_t *vm, const vmem_size_t size, vmem_size_t align, 1110f1c592fbSMark Johnston const vmem_size_t phase, const vmem_size_t nocross, int flags, 1111f1c592fbSMark Johnston vmem_addr_t *addrp) 1112f1c592fbSMark Johnston { 1113f1c592fbSMark Johnston struct vmem_btag *bt, *cursor, *next, *prev; 1114f1c592fbSMark Johnston int error; 1115f1c592fbSMark Johnston 1116f1c592fbSMark Johnston error = ENOMEM; 1117f1c592fbSMark Johnston VMEM_LOCK(vm); 11186351771bSMark Johnston 1119f1c592fbSMark Johnston /* 1120f1c592fbSMark Johnston * Make sure we have enough tags to complete the operation. 1121f1c592fbSMark Johnston */ 112233a9bce6SMark Johnston if (bt_fill(vm, flags) != 0) 1123f1c592fbSMark Johnston goto out; 1124f1c592fbSMark Johnston 11256351771bSMark Johnston retry: 1126f1c592fbSMark Johnston /* 1127f1c592fbSMark Johnston * Find the next free tag meeting our constraints. If one is found, 1128f1c592fbSMark Johnston * perform the allocation. 1129f1c592fbSMark Johnston */ 1130f1c592fbSMark Johnston for (cursor = &vm->vm_cursor, bt = TAILQ_NEXT(cursor, bt_seglist); 1131f1c592fbSMark Johnston bt != cursor; bt = TAILQ_NEXT(bt, bt_seglist)) { 1132f1c592fbSMark Johnston if (bt == NULL) 1133f1c592fbSMark Johnston bt = TAILQ_FIRST(&vm->vm_seglist); 1134f1c592fbSMark Johnston if (bt->bt_type == BT_TYPE_FREE && bt->bt_size >= size && 1135f1c592fbSMark Johnston (error = vmem_fit(bt, size, align, phase, nocross, 1136f1c592fbSMark Johnston VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) { 1137f1c592fbSMark Johnston vmem_clip(vm, bt, *addrp, size); 1138f1c592fbSMark Johnston break; 1139f1c592fbSMark Johnston } 1140f1c592fbSMark Johnston } 1141f1c592fbSMark Johnston 1142f1c592fbSMark Johnston /* 1143f1c592fbSMark Johnston * Try to coalesce free segments around the cursor. If we succeed, and 1144f1c592fbSMark Johnston * have not yet satisfied the allocation request, try again with the 1145f1c592fbSMark Johnston * newly coalesced segment. 1146f1c592fbSMark Johnston */ 1147f1c592fbSMark Johnston if ((next = TAILQ_NEXT(cursor, bt_seglist)) != NULL && 1148f1c592fbSMark Johnston (prev = TAILQ_PREV(cursor, vmem_seglist, bt_seglist)) != NULL && 1149f1c592fbSMark Johnston next->bt_type == BT_TYPE_FREE && prev->bt_type == BT_TYPE_FREE && 1150f1c592fbSMark Johnston prev->bt_start + prev->bt_size == next->bt_start) { 1151f1c592fbSMark Johnston prev->bt_size += next->bt_size; 1152f1c592fbSMark Johnston bt_remfree(vm, next); 1153f1c592fbSMark Johnston bt_remseg(vm, next); 1154f1c592fbSMark Johnston 1155f1c592fbSMark Johnston /* 1156f1c592fbSMark Johnston * The coalesced segment might be able to satisfy our request. 1157f1c592fbSMark Johnston * If not, we might need to release it from the arena. 1158f1c592fbSMark Johnston */ 1159f1c592fbSMark Johnston if (error == ENOMEM && prev->bt_size >= size && 1160f1c592fbSMark Johnston (error = vmem_fit(prev, size, align, phase, nocross, 1161f1c592fbSMark Johnston VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) { 1162f1c592fbSMark Johnston vmem_clip(vm, prev, *addrp, size); 1163f1c592fbSMark Johnston bt = prev; 1164f1c592fbSMark Johnston } else 1165f1c592fbSMark Johnston (void)vmem_try_release(vm, prev, true); 1166f1c592fbSMark Johnston } 1167f1c592fbSMark Johnston 1168f1c592fbSMark Johnston /* 1169f1c592fbSMark Johnston * If the allocation was successful, advance the cursor. 1170f1c592fbSMark Johnston */ 1171f1c592fbSMark Johnston if (error == 0) { 1172f1c592fbSMark Johnston TAILQ_REMOVE(&vm->vm_seglist, cursor, bt_seglist); 1173f1c592fbSMark Johnston for (; bt != NULL && bt->bt_start < *addrp + size; 1174f1c592fbSMark Johnston bt = TAILQ_NEXT(bt, bt_seglist)) 1175f1c592fbSMark Johnston ; 1176f1c592fbSMark Johnston if (bt != NULL) 1177f1c592fbSMark Johnston TAILQ_INSERT_BEFORE(bt, cursor, bt_seglist); 1178f1c592fbSMark Johnston else 1179f1c592fbSMark Johnston TAILQ_INSERT_HEAD(&vm->vm_seglist, cursor, bt_seglist); 1180f1c592fbSMark Johnston } 1181f1c592fbSMark Johnston 1182f1c592fbSMark Johnston /* 1183f1c592fbSMark Johnston * Attempt to bring additional resources into the arena. If that fails 1184f1c592fbSMark Johnston * and M_WAITOK is specified, sleep waiting for resources to be freed. 1185f1c592fbSMark Johnston */ 1186f1c592fbSMark Johnston if (error == ENOMEM && vmem_try_fetch(vm, size, align, flags)) 1187f1c592fbSMark Johnston goto retry; 1188f1c592fbSMark Johnston 1189f1c592fbSMark Johnston out: 1190f1c592fbSMark Johnston VMEM_UNLOCK(vm); 1191f1c592fbSMark Johnston return (error); 1192f1c592fbSMark Johnston } 1193f1c592fbSMark Johnston 11945f518366SJeff Roberson /* ---- vmem API */ 11955f518366SJeff Roberson 11965f518366SJeff Roberson void 11975f518366SJeff Roberson vmem_set_import(vmem_t *vm, vmem_import_t *importfn, 11985f518366SJeff Roberson vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum) 11995f518366SJeff Roberson { 12005f518366SJeff Roberson 12015f518366SJeff Roberson VMEM_LOCK(vm); 120241c68387SMark Johnston KASSERT(vm->vm_size == 0, ("%s: arena is non-empty", __func__)); 12035f518366SJeff Roberson vm->vm_importfn = importfn; 12045f518366SJeff Roberson vm->vm_releasefn = releasefn; 12055f518366SJeff Roberson vm->vm_arg = arg; 12065f518366SJeff Roberson vm->vm_import_quantum = import_quantum; 12075f518366SJeff Roberson VMEM_UNLOCK(vm); 12085f518366SJeff Roberson } 12095f518366SJeff Roberson 12105f518366SJeff Roberson void 12112e47807cSJeff Roberson vmem_set_limit(vmem_t *vm, vmem_size_t limit) 12122e47807cSJeff Roberson { 12132e47807cSJeff Roberson 12142e47807cSJeff Roberson VMEM_LOCK(vm); 12152e47807cSJeff Roberson vm->vm_limit = limit; 12162e47807cSJeff Roberson VMEM_UNLOCK(vm); 12172e47807cSJeff Roberson } 12182e47807cSJeff Roberson 12192e47807cSJeff Roberson void 12205f518366SJeff Roberson vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) 12215f518366SJeff Roberson { 12225f518366SJeff Roberson 12235f518366SJeff Roberson VMEM_LOCK(vm); 12245f518366SJeff Roberson vm->vm_reclaimfn = reclaimfn; 12255f518366SJeff Roberson VMEM_UNLOCK(vm); 12265f518366SJeff Roberson } 12275f518366SJeff Roberson 12285f518366SJeff Roberson /* 12295f518366SJeff Roberson * vmem_init: Initializes vmem arena. 12305f518366SJeff Roberson */ 12315f518366SJeff Roberson vmem_t * 12325f518366SJeff Roberson vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, 12335f518366SJeff Roberson vmem_size_t quantum, vmem_size_t qcache_max, int flags) 12345f518366SJeff Roberson { 1235cb596eeaSKonstantin Belousov vmem_size_t i; 12365f518366SJeff Roberson 12375f518366SJeff Roberson MPASS(quantum > 0); 12381cf78c85SAlexander Motin MPASS((quantum & (quantum - 1)) == 0); 12395f518366SJeff Roberson 12405f518366SJeff Roberson bzero(vm, sizeof(*vm)); 12415f518366SJeff Roberson 12425f518366SJeff Roberson VMEM_CONDVAR_INIT(vm, name); 12435f518366SJeff Roberson VMEM_LOCK_INIT(vm, name); 12445f518366SJeff Roberson vm->vm_nfreetags = 0; 12455f518366SJeff Roberson LIST_INIT(&vm->vm_freetags); 12465f518366SJeff Roberson strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 12475f518366SJeff Roberson vm->vm_quantum_mask = quantum - 1; 12481cf78c85SAlexander Motin vm->vm_quantum_shift = flsl(quantum) - 1; 12495f518366SJeff Roberson vm->vm_nbusytag = 0; 12505f518366SJeff Roberson vm->vm_size = 0; 12512e47807cSJeff Roberson vm->vm_limit = 0; 12525f518366SJeff Roberson vm->vm_inuse = 0; 12535f518366SJeff Roberson qc_init(vm, qcache_max); 12545f518366SJeff Roberson 12555f518366SJeff Roberson TAILQ_INIT(&vm->vm_seglist); 1256f1c592fbSMark Johnston vm->vm_cursor.bt_start = vm->vm_cursor.bt_size = 0; 1257f1c592fbSMark Johnston vm->vm_cursor.bt_type = BT_TYPE_CURSOR; 1258f1c592fbSMark Johnston TAILQ_INSERT_TAIL(&vm->vm_seglist, &vm->vm_cursor, bt_seglist); 1259f1c592fbSMark Johnston 1260f1c592fbSMark Johnston for (i = 0; i < VMEM_MAXORDER; i++) 12615f518366SJeff Roberson LIST_INIT(&vm->vm_freelist[i]); 1262f1c592fbSMark Johnston 12635f518366SJeff Roberson memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); 12645f518366SJeff Roberson vm->vm_hashsize = VMEM_HASHSIZE_MIN; 12655f518366SJeff Roberson vm->vm_hashlist = vm->vm_hash0; 12665f518366SJeff Roberson 12675f518366SJeff Roberson if (size != 0) { 12685f518366SJeff Roberson if (vmem_add(vm, base, size, flags) != 0) { 12695f518366SJeff Roberson vmem_destroy1(vm); 12705f518366SJeff Roberson return NULL; 12715f518366SJeff Roberson } 12725f518366SJeff Roberson } 12735f518366SJeff Roberson 12745f518366SJeff Roberson mtx_lock(&vmem_list_lock); 12755f518366SJeff Roberson LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 12765f518366SJeff Roberson mtx_unlock(&vmem_list_lock); 12775f518366SJeff Roberson 12785f518366SJeff Roberson return vm; 12795f518366SJeff Roberson } 12805f518366SJeff Roberson 12815f518366SJeff Roberson /* 12825f518366SJeff Roberson * vmem_create: create an arena. 12835f518366SJeff Roberson */ 12845f518366SJeff Roberson vmem_t * 12855f518366SJeff Roberson vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 12865f518366SJeff Roberson vmem_size_t quantum, vmem_size_t qcache_max, int flags) 12875f518366SJeff Roberson { 12885f518366SJeff Roberson 12895f518366SJeff Roberson vmem_t *vm; 12905f518366SJeff Roberson 12917a469c8eSJeff Roberson vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT)); 12925f518366SJeff Roberson if (vm == NULL) 12935f518366SJeff Roberson return (NULL); 12945f518366SJeff Roberson if (vmem_init(vm, name, base, size, quantum, qcache_max, 1295fe4be618SConrad Meyer flags) == NULL) 12965f518366SJeff Roberson return (NULL); 12975f518366SJeff Roberson return (vm); 12985f518366SJeff Roberson } 12995f518366SJeff Roberson 13005f518366SJeff Roberson void 13015f518366SJeff Roberson vmem_destroy(vmem_t *vm) 13025f518366SJeff Roberson { 13035f518366SJeff Roberson 13045f518366SJeff Roberson mtx_lock(&vmem_list_lock); 13055f518366SJeff Roberson LIST_REMOVE(vm, vm_alllist); 13065f518366SJeff Roberson mtx_unlock(&vmem_list_lock); 13075f518366SJeff Roberson 13085f518366SJeff Roberson vmem_destroy1(vm); 13095f518366SJeff Roberson } 13105f518366SJeff Roberson 13115f518366SJeff Roberson vmem_size_t 13125f518366SJeff Roberson vmem_roundup_size(vmem_t *vm, vmem_size_t size) 13135f518366SJeff Roberson { 13145f518366SJeff Roberson 13155f518366SJeff Roberson return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 13165f518366SJeff Roberson } 13175f518366SJeff Roberson 13185f518366SJeff Roberson /* 13195f518366SJeff Roberson * vmem_alloc: allocate resource from the arena. 13205f518366SJeff Roberson */ 13215f518366SJeff Roberson int 13225f518366SJeff Roberson vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) 13235f518366SJeff Roberson { 13245f518366SJeff Roberson const int strat __unused = flags & VMEM_FITMASK; 13255f518366SJeff Roberson qcache_t *qc; 13265f518366SJeff Roberson 13275f518366SJeff Roberson flags &= VMEM_FLAGS; 13285f518366SJeff Roberson MPASS(size > 0); 1329f1c592fbSMark Johnston MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT); 13305f518366SJeff Roberson if ((flags & M_NOWAIT) == 0) 13315f518366SJeff Roberson WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc"); 13325f518366SJeff Roberson 13335f518366SJeff Roberson if (size <= vm->vm_qcache_max) { 133421744c82SMark Johnston /* 133521744c82SMark Johnston * Resource 0 cannot be cached, so avoid a blocking allocation 133621744c82SMark Johnston * in qc_import() and give the vmem_xalloc() call below a chance 133721744c82SMark Johnston * to return 0. 133821744c82SMark Johnston */ 13395f518366SJeff Roberson qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 134021744c82SMark Johnston *addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache, 134121744c82SMark Johnston (flags & ~M_WAITOK) | M_NOWAIT); 134221744c82SMark Johnston if (__predict_true(*addrp != 0)) 13435f518366SJeff Roberson return (0); 13445f518366SJeff Roberson } 13455f518366SJeff Roberson 134621744c82SMark Johnston return (vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 134721744c82SMark Johnston flags, addrp)); 13485f518366SJeff Roberson } 13495f518366SJeff Roberson 13505f518366SJeff Roberson int 13515f518366SJeff Roberson vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 13525f518366SJeff Roberson const vmem_size_t phase, const vmem_size_t nocross, 13535f518366SJeff Roberson const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags, 13545f518366SJeff Roberson vmem_addr_t *addrp) 13555f518366SJeff Roberson { 13565f518366SJeff Roberson const vmem_size_t size = vmem_roundup_size(vm, size0); 13575f518366SJeff Roberson struct vmem_freelist *list; 13585f518366SJeff Roberson struct vmem_freelist *first; 13595f518366SJeff Roberson struct vmem_freelist *end; 13605f518366SJeff Roberson bt_t *bt; 13615f518366SJeff Roberson int error; 13625f518366SJeff Roberson int strat; 13635f518366SJeff Roberson 13645f518366SJeff Roberson flags &= VMEM_FLAGS; 13655f518366SJeff Roberson strat = flags & VMEM_FITMASK; 13665f518366SJeff Roberson MPASS(size0 > 0); 13675f518366SJeff Roberson MPASS(size > 0); 1368f1c592fbSMark Johnston MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT); 13695f518366SJeff Roberson MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK)); 13705f518366SJeff Roberson if ((flags & M_NOWAIT) == 0) 13715f518366SJeff Roberson WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc"); 13725f518366SJeff Roberson MPASS((align & vm->vm_quantum_mask) == 0); 13735f518366SJeff Roberson MPASS((align & (align - 1)) == 0); 13745f518366SJeff Roberson MPASS((phase & vm->vm_quantum_mask) == 0); 13755f518366SJeff Roberson MPASS((nocross & vm->vm_quantum_mask) == 0); 13765f518366SJeff Roberson MPASS((nocross & (nocross - 1)) == 0); 13775f518366SJeff Roberson MPASS((align == 0 && phase == 0) || phase < align); 13785f518366SJeff Roberson MPASS(nocross == 0 || nocross >= size); 13795f518366SJeff Roberson MPASS(minaddr <= maxaddr); 13805f518366SJeff Roberson MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1381f1c592fbSMark Johnston if (strat == M_NEXTFIT) 1382f1c592fbSMark Johnston MPASS(minaddr == VMEM_ADDR_MIN && maxaddr == VMEM_ADDR_MAX); 13835f518366SJeff Roberson 13845f518366SJeff Roberson if (align == 0) 13855f518366SJeff Roberson align = vm->vm_quantum_mask + 1; 13865f518366SJeff Roberson *addrp = 0; 1387f1c592fbSMark Johnston 1388f1c592fbSMark Johnston /* 1389f1c592fbSMark Johnston * Next-fit allocations don't use the freelists. 1390f1c592fbSMark Johnston */ 1391f1c592fbSMark Johnston if (strat == M_NEXTFIT) 1392f1c592fbSMark Johnston return (vmem_xalloc_nextfit(vm, size0, align, phase, nocross, 1393f1c592fbSMark Johnston flags, addrp)); 1394f1c592fbSMark Johnston 13955f518366SJeff Roberson end = &vm->vm_freelist[VMEM_MAXORDER]; 13965f518366SJeff Roberson /* 13975f518366SJeff Roberson * choose a free block from which we allocate. 13985f518366SJeff Roberson */ 13995f518366SJeff Roberson first = bt_freehead_toalloc(vm, size, strat); 14005f518366SJeff Roberson VMEM_LOCK(vm); 14016351771bSMark Johnston 14025f518366SJeff Roberson /* 14036351771bSMark Johnston * Make sure we have enough tags to complete the operation. 14045f518366SJeff Roberson */ 140533a9bce6SMark Johnston error = bt_fill(vm, flags); 140633a9bce6SMark Johnston if (error != 0) 14076351771bSMark Johnston goto out; 14086351771bSMark Johnston for (;;) { 14095f518366SJeff Roberson /* 14105f518366SJeff Roberson * Scan freelists looking for a tag that satisfies the 14115f518366SJeff Roberson * allocation. If we're doing BESTFIT we may encounter 14125f518366SJeff Roberson * sizes below the request. If we're doing FIRSTFIT we 14135f518366SJeff Roberson * inspect only the first element from each list. 14145f518366SJeff Roberson */ 14155f518366SJeff Roberson for (list = first; list < end; list++) { 14165f518366SJeff Roberson LIST_FOREACH(bt, list, bt_freelist) { 14175f518366SJeff Roberson if (bt->bt_size >= size) { 14185f518366SJeff Roberson error = vmem_fit(bt, size, align, phase, 14195f518366SJeff Roberson nocross, minaddr, maxaddr, addrp); 14205f518366SJeff Roberson if (error == 0) { 14215f518366SJeff Roberson vmem_clip(vm, bt, *addrp, size); 14225f518366SJeff Roberson goto out; 14235f518366SJeff Roberson } 14245f518366SJeff Roberson } 14255f518366SJeff Roberson /* FIRST skips to the next list. */ 14265f518366SJeff Roberson if (strat == M_FIRSTFIT) 14275f518366SJeff Roberson break; 14285f518366SJeff Roberson } 14295f518366SJeff Roberson } 1430f1c592fbSMark Johnston 14315f518366SJeff Roberson /* 14325f518366SJeff Roberson * Retry if the fast algorithm failed. 14335f518366SJeff Roberson */ 14345f518366SJeff Roberson if (strat == M_FIRSTFIT) { 14355f518366SJeff Roberson strat = M_BESTFIT; 14365f518366SJeff Roberson first = bt_freehead_toalloc(vm, size, strat); 14375f518366SJeff Roberson continue; 14385f518366SJeff Roberson } 14395f518366SJeff Roberson 14405f518366SJeff Roberson /* 1441f1c592fbSMark Johnston * Try a few measures to bring additional resources into the 1442f1c592fbSMark Johnston * arena. If all else fails, we will sleep waiting for 1443f1c592fbSMark Johnston * resources to be freed. 14445f518366SJeff Roberson */ 1445f1c592fbSMark Johnston if (!vmem_try_fetch(vm, size, align, flags)) { 14465f518366SJeff Roberson error = ENOMEM; 14475f518366SJeff Roberson break; 14485f518366SJeff Roberson } 14495f518366SJeff Roberson } 14505f518366SJeff Roberson out: 14515f518366SJeff Roberson VMEM_UNLOCK(vm); 14525f518366SJeff Roberson if (error != 0 && (flags & M_NOWAIT) == 0) 14535f518366SJeff Roberson panic("failed to allocate waiting allocation\n"); 14545f518366SJeff Roberson 14555f518366SJeff Roberson return (error); 14565f518366SJeff Roberson } 14575f518366SJeff Roberson 14585f518366SJeff Roberson /* 14595f518366SJeff Roberson * vmem_free: free the resource to the arena. 14605f518366SJeff Roberson */ 14615f518366SJeff Roberson void 14625f518366SJeff Roberson vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 14635f518366SJeff Roberson { 14645f518366SJeff Roberson qcache_t *qc; 14655f518366SJeff Roberson MPASS(size > 0); 14665f518366SJeff Roberson 146721744c82SMark Johnston if (size <= vm->vm_qcache_max && 146821744c82SMark Johnston __predict_true(addr >= VMEM_ADDR_QCACHE_MIN)) { 14695f518366SJeff Roberson qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 14705f518366SJeff Roberson uma_zfree(qc->qc_cache, (void *)addr); 14715f518366SJeff Roberson } else 14725f518366SJeff Roberson vmem_xfree(vm, addr, size); 14735f518366SJeff Roberson } 14745f518366SJeff Roberson 14755f518366SJeff Roberson void 1476cb596eeaSKonstantin Belousov vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size __unused) 14775f518366SJeff Roberson { 14785f518366SJeff Roberson bt_t *bt; 14795f518366SJeff Roberson bt_t *t; 14805f518366SJeff Roberson 14815f518366SJeff Roberson MPASS(size > 0); 14825f518366SJeff Roberson 14835f518366SJeff Roberson VMEM_LOCK(vm); 14845f518366SJeff Roberson bt = bt_lookupbusy(vm, addr); 14855f518366SJeff Roberson MPASS(bt != NULL); 14865f518366SJeff Roberson MPASS(bt->bt_start == addr); 14875f518366SJeff Roberson MPASS(bt->bt_size == vmem_roundup_size(vm, size) || 14885f518366SJeff Roberson bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 14895f518366SJeff Roberson MPASS(bt->bt_type == BT_TYPE_BUSY); 14905f518366SJeff Roberson bt_rembusy(vm, bt); 14915f518366SJeff Roberson bt->bt_type = BT_TYPE_FREE; 14925f518366SJeff Roberson 14935f518366SJeff Roberson /* coalesce */ 14945f518366SJeff Roberson t = TAILQ_NEXT(bt, bt_seglist); 14955f518366SJeff Roberson if (t != NULL && t->bt_type == BT_TYPE_FREE) { 14965f518366SJeff Roberson MPASS(BT_END(bt) < t->bt_start); /* YYY */ 14975f518366SJeff Roberson bt->bt_size += t->bt_size; 14985f518366SJeff Roberson bt_remfree(vm, t); 14995f518366SJeff Roberson bt_remseg(vm, t); 15005f518366SJeff Roberson } 15015f518366SJeff Roberson t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 15025f518366SJeff Roberson if (t != NULL && t->bt_type == BT_TYPE_FREE) { 15035f518366SJeff Roberson MPASS(BT_END(t) < bt->bt_start); /* YYY */ 15045f518366SJeff Roberson bt->bt_size += t->bt_size; 15055f518366SJeff Roberson bt->bt_start = t->bt_start; 15065f518366SJeff Roberson bt_remfree(vm, t); 15075f518366SJeff Roberson bt_remseg(vm, t); 15085f518366SJeff Roberson } 15095f518366SJeff Roberson 1510f1c592fbSMark Johnston if (!vmem_try_release(vm, bt, false)) { 15115f518366SJeff Roberson bt_insfree(vm, bt); 15125f518366SJeff Roberson VMEM_CONDVAR_BROADCAST(vm); 15135f518366SJeff Roberson bt_freetrim(vm, BT_MAXFREE); 15145f518366SJeff Roberson } 15155f518366SJeff Roberson } 15165f518366SJeff Roberson 15175f518366SJeff Roberson /* 15185f518366SJeff Roberson * vmem_add: 15195f518366SJeff Roberson * 15205f518366SJeff Roberson */ 15215f518366SJeff Roberson int 15225f518366SJeff Roberson vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags) 15235f518366SJeff Roberson { 15245f518366SJeff Roberson int error; 15255f518366SJeff Roberson 15265f518366SJeff Roberson flags &= VMEM_FLAGS; 152733a9bce6SMark Johnston 15285f518366SJeff Roberson VMEM_LOCK(vm); 152933a9bce6SMark Johnston error = bt_fill(vm, flags); 153033a9bce6SMark Johnston if (error == 0) 15319e3cc176SGleb Smirnoff vmem_add1(vm, addr, size, BT_TYPE_SPAN_STATIC); 15325f518366SJeff Roberson VMEM_UNLOCK(vm); 15335f518366SJeff Roberson 15345f518366SJeff Roberson return (error); 15355f518366SJeff Roberson } 15365f518366SJeff Roberson 15375f518366SJeff Roberson /* 15385f518366SJeff Roberson * vmem_size: information about arenas size 15395f518366SJeff Roberson */ 15405f518366SJeff Roberson vmem_size_t 15415f518366SJeff Roberson vmem_size(vmem_t *vm, int typemask) 15425f518366SJeff Roberson { 15432e9ccb32SAlexander Motin int i; 15445f518366SJeff Roberson 15455f518366SJeff Roberson switch (typemask) { 15465f518366SJeff Roberson case VMEM_ALLOC: 15475f518366SJeff Roberson return vm->vm_inuse; 15485f518366SJeff Roberson case VMEM_FREE: 15495f518366SJeff Roberson return vm->vm_size - vm->vm_inuse; 15505f518366SJeff Roberson case VMEM_FREE|VMEM_ALLOC: 15515f518366SJeff Roberson return vm->vm_size; 15522e9ccb32SAlexander Motin case VMEM_MAXFREE: 1553cdd09feaSAlexander Motin VMEM_LOCK(vm); 15542e9ccb32SAlexander Motin for (i = VMEM_MAXORDER - 1; i >= 0; i--) { 15552e9ccb32SAlexander Motin if (LIST_EMPTY(&vm->vm_freelist[i])) 15562e9ccb32SAlexander Motin continue; 1557cdd09feaSAlexander Motin VMEM_UNLOCK(vm); 15582e9ccb32SAlexander Motin return ((vmem_size_t)ORDER2SIZE(i) << 15592e9ccb32SAlexander Motin vm->vm_quantum_shift); 15602e9ccb32SAlexander Motin } 1561cdd09feaSAlexander Motin VMEM_UNLOCK(vm); 15622e9ccb32SAlexander Motin return (0); 15635f518366SJeff Roberson default: 15645f518366SJeff Roberson panic("vmem_size"); 15655f518366SJeff Roberson } 15665f518366SJeff Roberson } 15675f518366SJeff Roberson 15685f518366SJeff Roberson /* ---- debug */ 15695f518366SJeff Roberson 15705f518366SJeff Roberson #if defined(DDB) || defined(DIAGNOSTIC) 15715f518366SJeff Roberson 15725f518366SJeff Roberson static void bt_dump(const bt_t *, int (*)(const char *, ...) 15735f518366SJeff Roberson __printflike(1, 2)); 15745f518366SJeff Roberson 15755f518366SJeff Roberson static const char * 15765f518366SJeff Roberson bt_type_string(int type) 15775f518366SJeff Roberson { 15785f518366SJeff Roberson 15795f518366SJeff Roberson switch (type) { 15805f518366SJeff Roberson case BT_TYPE_BUSY: 15815f518366SJeff Roberson return "busy"; 15825f518366SJeff Roberson case BT_TYPE_FREE: 15835f518366SJeff Roberson return "free"; 15845f518366SJeff Roberson case BT_TYPE_SPAN: 15855f518366SJeff Roberson return "span"; 15865f518366SJeff Roberson case BT_TYPE_SPAN_STATIC: 15875f518366SJeff Roberson return "static span"; 1588f1c592fbSMark Johnston case BT_TYPE_CURSOR: 1589f1c592fbSMark Johnston return "cursor"; 15905f518366SJeff Roberson default: 15915f518366SJeff Roberson break; 15925f518366SJeff Roberson } 15935f518366SJeff Roberson return "BOGUS"; 15945f518366SJeff Roberson } 15955f518366SJeff Roberson 15965f518366SJeff Roberson static void 15975f518366SJeff Roberson bt_dump(const bt_t *bt, int (*pr)(const char *, ...)) 15985f518366SJeff Roberson { 15995f518366SJeff Roberson 16005f518366SJeff Roberson (*pr)("\t%p: %jx %jx, %d(%s)\n", 16015f518366SJeff Roberson bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size, 16025f518366SJeff Roberson bt->bt_type, bt_type_string(bt->bt_type)); 16035f518366SJeff Roberson } 16045f518366SJeff Roberson 16055f518366SJeff Roberson static void 16065f518366SJeff Roberson vmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2)) 16075f518366SJeff Roberson { 16085f518366SJeff Roberson const bt_t *bt; 16095f518366SJeff Roberson int i; 16105f518366SJeff Roberson 16115f518366SJeff Roberson (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 16125f518366SJeff Roberson TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 16135f518366SJeff Roberson bt_dump(bt, pr); 16145f518366SJeff Roberson } 16155f518366SJeff Roberson 16165f518366SJeff Roberson for (i = 0; i < VMEM_MAXORDER; i++) { 16175f518366SJeff Roberson const struct vmem_freelist *fl = &vm->vm_freelist[i]; 16185f518366SJeff Roberson 16195f518366SJeff Roberson if (LIST_EMPTY(fl)) { 16205f518366SJeff Roberson continue; 16215f518366SJeff Roberson } 16225f518366SJeff Roberson 16235f518366SJeff Roberson (*pr)("freelist[%d]\n", i); 16245f518366SJeff Roberson LIST_FOREACH(bt, fl, bt_freelist) { 16255f518366SJeff Roberson bt_dump(bt, pr); 16265f518366SJeff Roberson } 16275f518366SJeff Roberson } 16285f518366SJeff Roberson } 16295f518366SJeff Roberson 16305f518366SJeff Roberson #endif /* defined(DDB) || defined(DIAGNOSTIC) */ 16315f518366SJeff Roberson 16325f518366SJeff Roberson #if defined(DDB) 1633b308aaedSAlexander Motin #include <ddb/ddb.h> 1634b308aaedSAlexander Motin 16355f518366SJeff Roberson static bt_t * 16365f518366SJeff Roberson vmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr) 16375f518366SJeff Roberson { 16385f518366SJeff Roberson bt_t *bt; 16395f518366SJeff Roberson 16405f518366SJeff Roberson TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 16415f518366SJeff Roberson if (BT_ISSPAN_P(bt)) { 16425f518366SJeff Roberson continue; 16435f518366SJeff Roberson } 16445f518366SJeff Roberson if (bt->bt_start <= addr && addr <= BT_END(bt)) { 16455f518366SJeff Roberson return bt; 16465f518366SJeff Roberson } 16475f518366SJeff Roberson } 16485f518366SJeff Roberson 16495f518366SJeff Roberson return NULL; 16505f518366SJeff Roberson } 16515f518366SJeff Roberson 16525f518366SJeff Roberson void 16535f518366SJeff Roberson vmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...)) 16545f518366SJeff Roberson { 16555f518366SJeff Roberson vmem_t *vm; 16565f518366SJeff Roberson 16575f518366SJeff Roberson LIST_FOREACH(vm, &vmem_list, vm_alllist) { 16585f518366SJeff Roberson bt_t *bt; 16595f518366SJeff Roberson 16605f518366SJeff Roberson bt = vmem_whatis_lookup(vm, addr); 16615f518366SJeff Roberson if (bt == NULL) { 16625f518366SJeff Roberson continue; 16635f518366SJeff Roberson } 16645f518366SJeff Roberson (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 16655f518366SJeff Roberson (void *)addr, (void *)bt->bt_start, 16665f518366SJeff Roberson (vmem_size_t)(addr - bt->bt_start), vm->vm_name, 16675f518366SJeff Roberson (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 16685f518366SJeff Roberson } 16695f518366SJeff Roberson } 16705f518366SJeff Roberson 16715f518366SJeff Roberson void 16725f518366SJeff Roberson vmem_printall(const char *modif, int (*pr)(const char *, ...)) 16735f518366SJeff Roberson { 16745f518366SJeff Roberson const vmem_t *vm; 16755f518366SJeff Roberson 16765f518366SJeff Roberson LIST_FOREACH(vm, &vmem_list, vm_alllist) { 16775f518366SJeff Roberson vmem_dump(vm, pr); 16785f518366SJeff Roberson } 16795f518366SJeff Roberson } 16805f518366SJeff Roberson 16815f518366SJeff Roberson void 16825f518366SJeff Roberson vmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...)) 16835f518366SJeff Roberson { 16845f518366SJeff Roberson const vmem_t *vm = (const void *)addr; 16855f518366SJeff Roberson 16865f518366SJeff Roberson vmem_dump(vm, pr); 16875f518366SJeff Roberson } 1688b308aaedSAlexander Motin 1689b308aaedSAlexander Motin DB_SHOW_COMMAND(vmemdump, vmemdump) 1690b308aaedSAlexander Motin { 1691b308aaedSAlexander Motin 1692b308aaedSAlexander Motin if (!have_addr) { 1693b308aaedSAlexander Motin db_printf("usage: show vmemdump <addr>\n"); 1694b308aaedSAlexander Motin return; 1695b308aaedSAlexander Motin } 1696b308aaedSAlexander Motin 1697b308aaedSAlexander Motin vmem_dump((const vmem_t *)addr, db_printf); 1698b308aaedSAlexander Motin } 1699b308aaedSAlexander Motin 1700b308aaedSAlexander Motin DB_SHOW_ALL_COMMAND(vmemdump, vmemdumpall) 1701b308aaedSAlexander Motin { 1702b308aaedSAlexander Motin const vmem_t *vm; 1703b308aaedSAlexander Motin 1704b308aaedSAlexander Motin LIST_FOREACH(vm, &vmem_list, vm_alllist) 1705b308aaedSAlexander Motin vmem_dump(vm, db_printf); 1706b308aaedSAlexander Motin } 1707b308aaedSAlexander Motin 1708b308aaedSAlexander Motin DB_SHOW_COMMAND(vmem, vmem_summ) 1709b308aaedSAlexander Motin { 1710b308aaedSAlexander Motin const vmem_t *vm = (const void *)addr; 1711b308aaedSAlexander Motin const bt_t *bt; 1712b308aaedSAlexander Motin size_t ft[VMEM_MAXORDER], ut[VMEM_MAXORDER]; 1713b308aaedSAlexander Motin size_t fs[VMEM_MAXORDER], us[VMEM_MAXORDER]; 1714b308aaedSAlexander Motin int ord; 1715b308aaedSAlexander Motin 1716b308aaedSAlexander Motin if (!have_addr) { 1717b308aaedSAlexander Motin db_printf("usage: show vmem <addr>\n"); 1718b308aaedSAlexander Motin return; 1719b308aaedSAlexander Motin } 1720b308aaedSAlexander Motin 1721b308aaedSAlexander Motin db_printf("vmem %p '%s'\n", vm, vm->vm_name); 1722b308aaedSAlexander Motin db_printf("\tquantum:\t%zu\n", vm->vm_quantum_mask + 1); 1723b308aaedSAlexander Motin db_printf("\tsize:\t%zu\n", vm->vm_size); 1724b308aaedSAlexander Motin db_printf("\tinuse:\t%zu\n", vm->vm_inuse); 1725b308aaedSAlexander Motin db_printf("\tfree:\t%zu\n", vm->vm_size - vm->vm_inuse); 1726b308aaedSAlexander Motin db_printf("\tbusy tags:\t%d\n", vm->vm_nbusytag); 1727b308aaedSAlexander Motin db_printf("\tfree tags:\t%d\n", vm->vm_nfreetags); 1728b308aaedSAlexander Motin 1729b308aaedSAlexander Motin memset(&ft, 0, sizeof(ft)); 1730b308aaedSAlexander Motin memset(&ut, 0, sizeof(ut)); 1731b308aaedSAlexander Motin memset(&fs, 0, sizeof(fs)); 1732b308aaedSAlexander Motin memset(&us, 0, sizeof(us)); 1733b308aaedSAlexander Motin TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1734b308aaedSAlexander Motin ord = SIZE2ORDER(bt->bt_size >> vm->vm_quantum_shift); 1735b308aaedSAlexander Motin if (bt->bt_type == BT_TYPE_BUSY) { 1736b308aaedSAlexander Motin ut[ord]++; 1737b308aaedSAlexander Motin us[ord] += bt->bt_size; 1738b308aaedSAlexander Motin } else if (bt->bt_type == BT_TYPE_FREE) { 1739b308aaedSAlexander Motin ft[ord]++; 1740b308aaedSAlexander Motin fs[ord] += bt->bt_size; 1741b308aaedSAlexander Motin } 1742b308aaedSAlexander Motin } 1743b308aaedSAlexander Motin db_printf("\t\t\tinuse\tsize\t\tfree\tsize\n"); 1744b308aaedSAlexander Motin for (ord = 0; ord < VMEM_MAXORDER; ord++) { 1745b308aaedSAlexander Motin if (ut[ord] == 0 && ft[ord] == 0) 1746b308aaedSAlexander Motin continue; 1747b308aaedSAlexander Motin db_printf("\t%-15zu %zu\t%-15zu %zu\t%-16zu\n", 1748b308aaedSAlexander Motin ORDER2SIZE(ord) << vm->vm_quantum_shift, 1749b308aaedSAlexander Motin ut[ord], us[ord], ft[ord], fs[ord]); 1750b308aaedSAlexander Motin } 1751b308aaedSAlexander Motin } 1752b308aaedSAlexander Motin 1753b308aaedSAlexander Motin DB_SHOW_ALL_COMMAND(vmem, vmem_summall) 1754b308aaedSAlexander Motin { 1755b308aaedSAlexander Motin const vmem_t *vm; 1756b308aaedSAlexander Motin 1757b308aaedSAlexander Motin LIST_FOREACH(vm, &vmem_list, vm_alllist) 1758b308aaedSAlexander Motin vmem_summ((db_expr_t)vm, TRUE, count, modif); 1759b308aaedSAlexander Motin } 17605f518366SJeff Roberson #endif /* defined(DDB) */ 17615f518366SJeff Roberson 17625f518366SJeff Roberson #define vmem_printf printf 17635f518366SJeff Roberson 17645f518366SJeff Roberson #if defined(DIAGNOSTIC) 17655f518366SJeff Roberson 17665f518366SJeff Roberson static bool 17675f518366SJeff Roberson vmem_check_sanity(vmem_t *vm) 17685f518366SJeff Roberson { 17695f518366SJeff Roberson const bt_t *bt, *bt2; 17705f518366SJeff Roberson 17715f518366SJeff Roberson MPASS(vm != NULL); 17725f518366SJeff Roberson 17735f518366SJeff Roberson TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 17745f518366SJeff Roberson if (bt->bt_start > BT_END(bt)) { 17755f518366SJeff Roberson printf("corrupted tag\n"); 17765f518366SJeff Roberson bt_dump(bt, vmem_printf); 17775f518366SJeff Roberson return false; 17785f518366SJeff Roberson } 17795f518366SJeff Roberson } 17805f518366SJeff Roberson TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1781fd0be988SMark Johnston if (bt->bt_type == BT_TYPE_CURSOR) { 1782fd0be988SMark Johnston if (bt->bt_start != 0 || bt->bt_size != 0) { 1783fd0be988SMark Johnston printf("corrupted cursor\n"); 1784fd0be988SMark Johnston return false; 1785fd0be988SMark Johnston } 1786fd0be988SMark Johnston continue; 1787fd0be988SMark Johnston } 17885f518366SJeff Roberson TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 17895f518366SJeff Roberson if (bt == bt2) { 17905f518366SJeff Roberson continue; 17915f518366SJeff Roberson } 1792fd0be988SMark Johnston if (bt2->bt_type == BT_TYPE_CURSOR) { 1793fd0be988SMark Johnston continue; 1794fd0be988SMark Johnston } 17955f518366SJeff Roberson if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 17965f518366SJeff Roberson continue; 17975f518366SJeff Roberson } 17985f518366SJeff Roberson if (bt->bt_start <= BT_END(bt2) && 17995f518366SJeff Roberson bt2->bt_start <= BT_END(bt)) { 18005f518366SJeff Roberson printf("overwrapped tags\n"); 18015f518366SJeff Roberson bt_dump(bt, vmem_printf); 18025f518366SJeff Roberson bt_dump(bt2, vmem_printf); 18035f518366SJeff Roberson return false; 18045f518366SJeff Roberson } 18055f518366SJeff Roberson } 18065f518366SJeff Roberson } 18075f518366SJeff Roberson 18085f518366SJeff Roberson return true; 18095f518366SJeff Roberson } 18105f518366SJeff Roberson 18115f518366SJeff Roberson static void 18125f518366SJeff Roberson vmem_check(vmem_t *vm) 18135f518366SJeff Roberson { 18145f518366SJeff Roberson 18155f518366SJeff Roberson if (!vmem_check_sanity(vm)) { 18165f518366SJeff Roberson panic("insanity vmem %p", vm); 18175f518366SJeff Roberson } 18185f518366SJeff Roberson } 18195f518366SJeff Roberson 18205f518366SJeff Roberson #endif /* defined(DIAGNOSTIC) */ 1821