106b4bf3eSWarner Losh /*- 206b4bf3eSWarner Losh * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 306b4bf3eSWarner Losh * Redistribution and use in source and binary forms, with or without 406b4bf3eSWarner Losh * modification, are permitted provided that the following conditions 506b4bf3eSWarner Losh * are met: 606b4bf3eSWarner Losh * 1. Redistributions of source code must retain the above copyright 706b4bf3eSWarner Losh * notice, this list of conditions and the following disclaimer. 806b4bf3eSWarner Losh * 2. Redistributions in binary form must reproduce the above copyright 906b4bf3eSWarner Losh * notice, this list of conditions and the following disclaimer in the 1006b4bf3eSWarner Losh * documentation and/or other materials provided with the distribution. 1169a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 1206b4bf3eSWarner Losh * may be used to endorse or promote products derived from this software 1306b4bf3eSWarner Losh * without specific prior written permission. 1406b4bf3eSWarner Losh * 1506b4bf3eSWarner Losh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 1606b4bf3eSWarner Losh * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1706b4bf3eSWarner Losh * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1806b4bf3eSWarner Losh * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 1906b4bf3eSWarner Losh * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2006b4bf3eSWarner Losh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 2106b4bf3eSWarner Losh * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2206b4bf3eSWarner Losh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 2306b4bf3eSWarner Losh * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 2406b4bf3eSWarner Losh * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2506b4bf3eSWarner Losh * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2606b4bf3eSWarner Losh */ 277090df5aSMatthew Dillon /* 287090df5aSMatthew Dillon * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 297090df5aSMatthew Dillon * 307090df5aSMatthew Dillon * This module implements a general bitmap allocator/deallocator. The 317090df5aSMatthew Dillon * allocator eats around 2 bits per 'block'. The module does not 32f565f395SEitan Adler * try to interpret the meaning of a 'block' other than to return 337090df5aSMatthew Dillon * SWAPBLK_NONE on an allocation failure. 347090df5aSMatthew Dillon * 357090df5aSMatthew Dillon * A radix tree is used to maintain the bitmap. Two radix constants are 367090df5aSMatthew Dillon * involved: One for the bitmaps contained in the leaf nodes (typically 37b411efa4SAlan Cox * 64), and one for the meta nodes (typically 16). Both meta and leaf 387090df5aSMatthew Dillon * nodes have a hint field. This field gives us a hint as to the largest 397090df5aSMatthew Dillon * free contiguous range of blocks under the node. It may contain a 407090df5aSMatthew Dillon * value that is too high, but will never contain a value that is too 417090df5aSMatthew Dillon * low. When the radix tree is searched, allocation failures in subtrees 427090df5aSMatthew Dillon * update the hint. 437090df5aSMatthew Dillon * 447090df5aSMatthew Dillon * The radix tree also implements two collapsed states for meta nodes: 457090df5aSMatthew Dillon * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 467090df5aSMatthew Dillon * in either of these two states, all information contained underneath 477090df5aSMatthew Dillon * the node is considered stale. These states are used to optimize 487090df5aSMatthew Dillon * allocation and freeing operations. 497090df5aSMatthew Dillon * 507090df5aSMatthew Dillon * The hinting greatly increases code efficiency for allocations while 517090df5aSMatthew Dillon * the general radix structure optimizes both allocations and frees. The 527090df5aSMatthew Dillon * radix tree should be able to operate well no matter how much 537090df5aSMatthew Dillon * fragmentation there is and no matter how large a bitmap is used. 547090df5aSMatthew Dillon * 5551dc4feaSSergey Kandaurov * The blist code wires all necessary memory at creation time. Neither 5651dc4feaSSergey Kandaurov * allocations nor frees require interaction with the memory subsystem. 5751dc4feaSSergey Kandaurov * The non-blocking features of the blist code are used in the swap code 5851dc4feaSSergey Kandaurov * (vm/swap_pager.c). 597090df5aSMatthew Dillon * 60e3043798SPedro F. Giffuni * LAYOUT: The radix tree is laid out recursively using a 61e3043798SPedro F. Giffuni * linear array. Each meta node is immediately followed (laid out 627090df5aSMatthew Dillon * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 637090df5aSMatthew Dillon * is a recursive structure but one that can be easily scanned through 647090df5aSMatthew Dillon * a very simple 'skip' calculation. In order to support large radixes, 657090df5aSMatthew Dillon * portions of the tree may reside outside our memory allocation. We 667090df5aSMatthew Dillon * handle this with an early-termination optimization (when bighint is 677090df5aSMatthew Dillon * set to -1) on the scan. The memory allocation is only large enough 687090df5aSMatthew Dillon * to cover the number of blocks requested at creation time even if it 697090df5aSMatthew Dillon * must be encompassed in larger root-node radix. 707090df5aSMatthew Dillon * 71f565f395SEitan Adler * NOTE: the allocator cannot currently allocate more than 727090df5aSMatthew Dillon * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 737090df5aSMatthew Dillon * large' if you try. This is an area that could use improvement. The 747090df5aSMatthew Dillon * radix is large enough that this restriction does not effect the swap 75b411efa4SAlan Cox * system, though. Currently only the allocation code is affected by 767090df5aSMatthew Dillon * this algorithmic unfeature. The freeing code can handle arbitrary 777090df5aSMatthew Dillon * ranges. 787090df5aSMatthew Dillon * 797090df5aSMatthew Dillon * This code can be compiled stand-alone for debugging. 807090df5aSMatthew Dillon */ 817090df5aSMatthew Dillon 82677b542eSDavid E. O'Brien #include <sys/cdefs.h> 83677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 84677b542eSDavid E. O'Brien 85c4473420SPeter Wemm #ifdef _KERNEL 867090df5aSMatthew Dillon 877090df5aSMatthew Dillon #include <sys/param.h> 887090df5aSMatthew Dillon #include <sys/systm.h> 897090df5aSMatthew Dillon #include <sys/lock.h> 907090df5aSMatthew Dillon #include <sys/kernel.h> 917090df5aSMatthew Dillon #include <sys/blist.h> 927090df5aSMatthew Dillon #include <sys/malloc.h> 930cddd8f0SMatthew Dillon #include <sys/proc.h> 9423955314SAlfred Perlstein #include <sys/mutex.h> 957090df5aSMatthew Dillon 967090df5aSMatthew Dillon #else 977090df5aSMatthew Dillon 987090df5aSMatthew Dillon #ifndef BLIST_NO_DEBUG 997090df5aSMatthew Dillon #define BLIST_DEBUG 1007090df5aSMatthew Dillon #endif 1017090df5aSMatthew Dillon 1027090df5aSMatthew Dillon #include <sys/types.h> 103d90bf7d8SAlan Cox #include <sys/malloc.h> 1047090df5aSMatthew Dillon #include <stdio.h> 1057090df5aSMatthew Dillon #include <string.h> 1067090df5aSMatthew Dillon #include <stdlib.h> 1077090df5aSMatthew Dillon #include <stdarg.h> 108d3783724SAlan Cox #include <stdbool.h> 1097090df5aSMatthew Dillon 1104be4fd5dSAlan Cox #define bitcount64(x) __bitcount64((uint64_t)(x)) 11192da00bbSMatthew Dillon #define malloc(a,b,c) calloc(a, 1) 1127090df5aSMatthew Dillon #define free(a,b) free(a) 113*ba98e6a2SAlan Cox #define CTASSERT(expr) 1147090df5aSMatthew Dillon 1157090df5aSMatthew Dillon #include <sys/blist.h> 1167090df5aSMatthew Dillon 1177090df5aSMatthew Dillon void panic(const char *ctl, ...); 1187090df5aSMatthew Dillon 1197090df5aSMatthew Dillon #endif 1207090df5aSMatthew Dillon 1217090df5aSMatthew Dillon /* 1227090df5aSMatthew Dillon * static support functions 1237090df5aSMatthew Dillon */ 12454541421SAlan Cox static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, 12554541421SAlan Cox daddr_t cursor); 126d4e3484bSAlan Cox static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, 1272ac0c7c3SAlan Cox daddr_t radix, daddr_t cursor); 1287090df5aSMatthew Dillon static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 1297090df5aSMatthew Dillon static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 1302ac0c7c3SAlan Cox daddr_t radix, daddr_t blk); 1317090df5aSMatthew Dillon static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 1322ac0c7c3SAlan Cox blist_t dest, daddr_t count); 133a7249a6cSAlan Cox static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 134a7249a6cSAlan Cox static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 1352ac0c7c3SAlan Cox daddr_t radix, daddr_t blk); 1362ac0c7c3SAlan Cox static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count); 137c4473420SPeter Wemm #ifndef _KERNEL 138d3783724SAlan Cox static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, 1392ac0c7c3SAlan Cox int tab); 1407090df5aSMatthew Dillon #endif 1417090df5aSMatthew Dillon 142c4473420SPeter Wemm #ifdef _KERNEL 1437090df5aSMatthew Dillon static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 1447090df5aSMatthew Dillon #endif 1457090df5aSMatthew Dillon 146*ba98e6a2SAlan Cox CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0); 147*ba98e6a2SAlan Cox 1487090df5aSMatthew Dillon /* 1492ac0c7c3SAlan Cox * For a subtree that can represent the state of up to 'radix' blocks, the 1502ac0c7c3SAlan Cox * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm' 1512ac0c7c3SAlan Cox * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h 1522ac0c7c3SAlan Cox * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h, 1532ac0c7c3SAlan Cox * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip' 1542ac0c7c3SAlan Cox * in the 'meta' functions that process subtrees. Since integer division 1552ac0c7c3SAlan Cox * discards remainders, we can express this computation as 1562ac0c7c3SAlan Cox * skip = (m * m**h) / (m - 1) 157*ba98e6a2SAlan Cox * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1) 158*ba98e6a2SAlan Cox * and since m divides BLIST_BMAP_RADIX, we can simplify further to 159*ba98e6a2SAlan Cox * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1) 160*ba98e6a2SAlan Cox * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1)) 161*ba98e6a2SAlan Cox * so that simple integer division by a constant can safely be used for the 162*ba98e6a2SAlan Cox * calculation. 1632ac0c7c3SAlan Cox */ 1642ac0c7c3SAlan Cox static inline daddr_t 1652ac0c7c3SAlan Cox radix_to_skip(daddr_t radix) 1662ac0c7c3SAlan Cox { 1672ac0c7c3SAlan Cox 1682ac0c7c3SAlan Cox return (radix / 169*ba98e6a2SAlan Cox ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1))); 1702ac0c7c3SAlan Cox } 1712ac0c7c3SAlan Cox 1722ac0c7c3SAlan Cox /* 1737090df5aSMatthew Dillon * blist_create() - create a blist capable of handling up to the specified 1747090df5aSMatthew Dillon * number of blocks 1757090df5aSMatthew Dillon * 176f565f395SEitan Adler * blocks - must be greater than 0 177c8c7ad92SKip Macy * flags - malloc flags 1787090df5aSMatthew Dillon * 1797090df5aSMatthew Dillon * The smallest blist consists of a single leaf node capable of 1807090df5aSMatthew Dillon * managing BLIST_BMAP_RADIX blocks. 1817090df5aSMatthew Dillon */ 1827090df5aSMatthew Dillon blist_t 183c8c7ad92SKip Macy blist_create(daddr_t blocks, int flags) 1847090df5aSMatthew Dillon { 1857090df5aSMatthew Dillon blist_t bl; 1862ac0c7c3SAlan Cox daddr_t nodes, radix; 1877090df5aSMatthew Dillon 1887090df5aSMatthew Dillon /* 1892ac0c7c3SAlan Cox * Calculate the radix field used for scanning. 1907090df5aSMatthew Dillon */ 1917090df5aSMatthew Dillon radix = BLIST_BMAP_RADIX; 1927090df5aSMatthew Dillon while (radix < blocks) { 19357e6d29bSMatthew Dillon radix *= BLIST_META_RADIX; 1947090df5aSMatthew Dillon } 1952ac0c7c3SAlan Cox nodes = 1 + blst_radix_init(NULL, radix, blocks); 1967090df5aSMatthew Dillon 1974be4fd5dSAlan Cox bl = malloc(sizeof(struct blist), M_SWAP, flags); 198015d7db6SAlan Cox if (bl == NULL) 199015d7db6SAlan Cox return (NULL); 2007090df5aSMatthew Dillon 2017090df5aSMatthew Dillon bl->bl_blocks = blocks; 2027090df5aSMatthew Dillon bl->bl_radix = radix; 203d4e3484bSAlan Cox bl->bl_cursor = 0; 204015d7db6SAlan Cox bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 205015d7db6SAlan Cox if (bl->bl_root == NULL) { 206015d7db6SAlan Cox free(bl, M_SWAP); 207015d7db6SAlan Cox return (NULL); 208015d7db6SAlan Cox } 2092ac0c7c3SAlan Cox blst_radix_init(bl->bl_root, radix, blocks); 2107090df5aSMatthew Dillon 2117090df5aSMatthew Dillon #if defined(BLIST_DEBUG) 2127090df5aSMatthew Dillon printf( 21392da00bbSMatthew Dillon "BLIST representing %lld blocks (%lld MB of swap)" 21492da00bbSMatthew Dillon ", requiring %lldK of ram\n", 21592da00bbSMatthew Dillon (long long)bl->bl_blocks, 21692da00bbSMatthew Dillon (long long)bl->bl_blocks * 4 / 1024, 217015d7db6SAlan Cox (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 2187090df5aSMatthew Dillon ); 21992da00bbSMatthew Dillon printf("BLIST raw radix tree contains %lld records\n", 220015d7db6SAlan Cox (long long)nodes); 2217090df5aSMatthew Dillon #endif 2227090df5aSMatthew Dillon 2237090df5aSMatthew Dillon return (bl); 2247090df5aSMatthew Dillon } 2257090df5aSMatthew Dillon 2267090df5aSMatthew Dillon void 2277090df5aSMatthew Dillon blist_destroy(blist_t bl) 2287090df5aSMatthew Dillon { 2297090df5aSMatthew Dillon free(bl->bl_root, M_SWAP); 2307090df5aSMatthew Dillon free(bl, M_SWAP); 2317090df5aSMatthew Dillon } 2327090df5aSMatthew Dillon 2337090df5aSMatthew Dillon /* 2347090df5aSMatthew Dillon * blist_alloc() - reserve space in the block bitmap. Return the base 2357090df5aSMatthew Dillon * of a contiguous region or SWAPBLK_NONE if space could 2367090df5aSMatthew Dillon * not be allocated. 2377090df5aSMatthew Dillon */ 2387090df5aSMatthew Dillon daddr_t 2397090df5aSMatthew Dillon blist_alloc(blist_t bl, daddr_t count) 2407090df5aSMatthew Dillon { 2414be4fd5dSAlan Cox daddr_t blk; 2427090df5aSMatthew Dillon 243d4e3484bSAlan Cox /* 244d4e3484bSAlan Cox * This loop iterates at most twice. An allocation failure in the 245d4e3484bSAlan Cox * first iteration leads to a second iteration only if the cursor was 246d4e3484bSAlan Cox * non-zero. When the cursor is zero, an allocation failure will 247d4e3484bSAlan Cox * reduce the hint, stopping further iterations. 248d4e3484bSAlan Cox */ 249d4e3484bSAlan Cox while (count <= bl->bl_root->bm_bighint) { 250a396c83aSAlan Cox blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, 2512ac0c7c3SAlan Cox bl->bl_cursor); 252d4e3484bSAlan Cox if (blk != SWAPBLK_NONE) { 253d4e3484bSAlan Cox bl->bl_cursor = blk + count; 2547090df5aSMatthew Dillon return (blk); 255d4e3484bSAlan Cox } else if (bl->bl_cursor != 0) 256d4e3484bSAlan Cox bl->bl_cursor = 0; 2577090df5aSMatthew Dillon } 2584be4fd5dSAlan Cox return (SWAPBLK_NONE); 2594be4fd5dSAlan Cox } 2604be4fd5dSAlan Cox 2614be4fd5dSAlan Cox /* 2624be4fd5dSAlan Cox * blist_avail() - return the number of free blocks. 2634be4fd5dSAlan Cox */ 2644be4fd5dSAlan Cox daddr_t 2654be4fd5dSAlan Cox blist_avail(blist_t bl) 2664be4fd5dSAlan Cox { 2674be4fd5dSAlan Cox 2684be4fd5dSAlan Cox if (bl->bl_radix == BLIST_BMAP_RADIX) 2694be4fd5dSAlan Cox return (bitcount64(bl->bl_root->u.bmu_bitmap)); 2704be4fd5dSAlan Cox else 2714be4fd5dSAlan Cox return (bl->bl_root->u.bmu_avail); 2724be4fd5dSAlan Cox } 2737090df5aSMatthew Dillon 2747090df5aSMatthew Dillon /* 2757090df5aSMatthew Dillon * blist_free() - free up space in the block bitmap. Return the base 2767090df5aSMatthew Dillon * of a contiguous region. Panic if an inconsistancy is 2777090df5aSMatthew Dillon * found. 2787090df5aSMatthew Dillon */ 2797090df5aSMatthew Dillon void 2807090df5aSMatthew Dillon blist_free(blist_t bl, daddr_t blkno, daddr_t count) 2817090df5aSMatthew Dillon { 282a396c83aSAlan Cox 2832ac0c7c3SAlan Cox blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, 0); 2847090df5aSMatthew Dillon } 2857090df5aSMatthew Dillon 2867090df5aSMatthew Dillon /* 28792da00bbSMatthew Dillon * blist_fill() - mark a region in the block bitmap as off-limits 28892da00bbSMatthew Dillon * to the allocator (i.e. allocate it), ignoring any 28992da00bbSMatthew Dillon * existing allocations. Return the number of blocks 29092da00bbSMatthew Dillon * actually filled that were free before the call. 29192da00bbSMatthew Dillon */ 292a7249a6cSAlan Cox daddr_t 29392da00bbSMatthew Dillon blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 29492da00bbSMatthew Dillon { 29592da00bbSMatthew Dillon 2962ac0c7c3SAlan Cox return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix, 0)); 29792da00bbSMatthew Dillon } 29892da00bbSMatthew Dillon 29992da00bbSMatthew Dillon /* 3007090df5aSMatthew Dillon * blist_resize() - resize an existing radix tree to handle the 3017090df5aSMatthew Dillon * specified number of blocks. This will reallocate 3027090df5aSMatthew Dillon * the tree and transfer the previous bitmap to the new 3037090df5aSMatthew Dillon * one. When extending the tree you can specify whether 3047090df5aSMatthew Dillon * the new blocks are to left allocated or freed. 3057090df5aSMatthew Dillon */ 3067090df5aSMatthew Dillon void 307c8c7ad92SKip Macy blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 3087090df5aSMatthew Dillon { 309c8c7ad92SKip Macy blist_t newbl = blist_create(count, flags); 3107090df5aSMatthew Dillon blist_t save = *pbl; 3117090df5aSMatthew Dillon 3127090df5aSMatthew Dillon *pbl = newbl; 3137090df5aSMatthew Dillon if (count > save->bl_blocks) 3147090df5aSMatthew Dillon count = save->bl_blocks; 3152ac0c7c3SAlan Cox blst_copy(save->bl_root, 0, save->bl_radix, newbl, count); 3167090df5aSMatthew Dillon 3177090df5aSMatthew Dillon /* 3187090df5aSMatthew Dillon * If resizing upwards, should we free the new space or not? 3197090df5aSMatthew Dillon */ 3207090df5aSMatthew Dillon if (freenew && count < newbl->bl_blocks) { 3217090df5aSMatthew Dillon blist_free(newbl, count, newbl->bl_blocks - count); 3227090df5aSMatthew Dillon } 3237090df5aSMatthew Dillon blist_destroy(save); 3247090df5aSMatthew Dillon } 3257090df5aSMatthew Dillon 3267090df5aSMatthew Dillon #ifdef BLIST_DEBUG 3277090df5aSMatthew Dillon 3287090df5aSMatthew Dillon /* 3297090df5aSMatthew Dillon * blist_print() - dump radix tree 3307090df5aSMatthew Dillon */ 3317090df5aSMatthew Dillon void 3327090df5aSMatthew Dillon blist_print(blist_t bl) 3337090df5aSMatthew Dillon { 3342ac0c7c3SAlan Cox printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor); 3352ac0c7c3SAlan Cox blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); 3367090df5aSMatthew Dillon printf("}\n"); 3377090df5aSMatthew Dillon } 3387090df5aSMatthew Dillon 3397090df5aSMatthew Dillon #endif 3407090df5aSMatthew Dillon 3417090df5aSMatthew Dillon /************************************************************************ 3427090df5aSMatthew Dillon * ALLOCATION SUPPORT FUNCTIONS * 3437090df5aSMatthew Dillon ************************************************************************ 3447090df5aSMatthew Dillon * 3457090df5aSMatthew Dillon * These support functions do all the actual work. They may seem 3467090df5aSMatthew Dillon * rather longish, but that's because I've commented them up. The 3477090df5aSMatthew Dillon * actual code is straight forward. 3487090df5aSMatthew Dillon * 3497090df5aSMatthew Dillon */ 3507090df5aSMatthew Dillon 3517090df5aSMatthew Dillon /* 3527090df5aSMatthew Dillon * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 3537090df5aSMatthew Dillon * 35454541421SAlan Cox * This is the core of the allocator and is optimized for the 35554541421SAlan Cox * BLIST_BMAP_RADIX block allocation case. Otherwise, execution 35654541421SAlan Cox * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX). 3577090df5aSMatthew Dillon */ 3587090df5aSMatthew Dillon static daddr_t 35954541421SAlan Cox blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor) 36054541421SAlan Cox { 36154541421SAlan Cox u_daddr_t mask; 36254541421SAlan Cox int count1, hi, lo, mid, num_shifts, range1, range_ext; 3637090df5aSMatthew Dillon 36454541421SAlan Cox if (count == BLIST_BMAP_RADIX) { 3657090df5aSMatthew Dillon /* 36654541421SAlan Cox * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't 36754541421SAlan Cox * a special case, then forming the final value of 'mask' below 36854541421SAlan Cox * would require special handling to avoid an invalid left shift 36954541421SAlan Cox * when count equals the number of bits in mask. 3707090df5aSMatthew Dillon */ 37154541421SAlan Cox if (~scan->u.bmu_bitmap != 0) { 37254541421SAlan Cox scan->bm_bighint = BLIST_BMAP_RADIX - 1; 37354541421SAlan Cox return (SWAPBLK_NONE); 37454541421SAlan Cox } 37554541421SAlan Cox if (cursor != blk) 37654541421SAlan Cox return (SWAPBLK_NONE); 37754541421SAlan Cox scan->u.bmu_bitmap = 0; 3787090df5aSMatthew Dillon scan->bm_bighint = 0; 37954541421SAlan Cox return (blk); 38054541421SAlan Cox } 38154541421SAlan Cox range1 = 0; 38254541421SAlan Cox count1 = count - 1; 38354541421SAlan Cox num_shifts = fls(count1); 38454541421SAlan Cox mask = scan->u.bmu_bitmap; 38554541421SAlan Cox while (mask != 0 && num_shifts > 0) { 38654541421SAlan Cox /* 38754541421SAlan Cox * If bit i is set in mask, then bits in [i, i+range1] are set 38854541421SAlan Cox * in scan->u.bmu_bitmap. The value of range1 is equal to 38954541421SAlan Cox * count1 >> num_shifts. Grow range and reduce num_shifts to 0, 39054541421SAlan Cox * while preserving these invariants. The updates to mask leave 39154541421SAlan Cox * fewer bits set, but each bit that remains set represents a 39254541421SAlan Cox * longer string of consecutive bits set in scan->u.bmu_bitmap. 39354541421SAlan Cox */ 39454541421SAlan Cox num_shifts--; 39554541421SAlan Cox range_ext = range1 + ((count1 >> num_shifts) & 1); 39654541421SAlan Cox mask &= mask >> range_ext; 39754541421SAlan Cox range1 += range_ext; 39854541421SAlan Cox } 39954541421SAlan Cox if (mask == 0) { 40054541421SAlan Cox /* 40154541421SAlan Cox * Update bighint. There is no allocation bigger than range1 40254541421SAlan Cox * available in this leaf. 40354541421SAlan Cox */ 40454541421SAlan Cox scan->bm_bighint = range1; 4057090df5aSMatthew Dillon return (SWAPBLK_NONE); 4067090df5aSMatthew Dillon } 40754541421SAlan Cox 4087090df5aSMatthew Dillon /* 40954541421SAlan Cox * Discard any candidates that appear before the cursor. 4107090df5aSMatthew Dillon */ 41154541421SAlan Cox lo = cursor - blk; 41254541421SAlan Cox mask &= ~(u_daddr_t)0 << lo; 4137090df5aSMatthew Dillon 41454541421SAlan Cox if (mask == 0) 41554541421SAlan Cox return (SWAPBLK_NONE); 4167090df5aSMatthew Dillon 4177090df5aSMatthew Dillon /* 41854541421SAlan Cox * The least significant set bit in mask marks the start of the first 41954541421SAlan Cox * available range of sufficient size. Clear all the bits but that one, 42054541421SAlan Cox * and then perform a binary search to find its position. 4217090df5aSMatthew Dillon */ 42254541421SAlan Cox mask &= -mask; 42354541421SAlan Cox hi = BLIST_BMAP_RADIX - count1; 42454541421SAlan Cox while (lo + 1 < hi) { 42554541421SAlan Cox mid = (lo + hi) >> 1; 42654541421SAlan Cox if ((mask >> mid) != 0) 42754541421SAlan Cox lo = mid; 42854541421SAlan Cox else 42954541421SAlan Cox hi = mid; 43054541421SAlan Cox } 4317090df5aSMatthew Dillon 43254541421SAlan Cox /* 43354541421SAlan Cox * Set in mask exactly the bits being allocated, and clear them from 43454541421SAlan Cox * the set of available bits. 43554541421SAlan Cox */ 43654541421SAlan Cox mask = (mask << count) - mask; 4377090df5aSMatthew Dillon scan->u.bmu_bitmap &= ~mask; 43854541421SAlan Cox return (blk + lo); 4397090df5aSMatthew Dillon } 4407090df5aSMatthew Dillon 4417090df5aSMatthew Dillon /* 4427090df5aSMatthew Dillon * blist_meta_alloc() - allocate at a meta in the radix tree. 4437090df5aSMatthew Dillon * 4447090df5aSMatthew Dillon * Attempt to allocate at a meta node. If we can't, we update 4457090df5aSMatthew Dillon * bighint and return a failure. Updating bighint optimize future 4467090df5aSMatthew Dillon * calls that hit this node. We have to check for our collapse cases 4477090df5aSMatthew Dillon * and we have a few optimizations strewn in as well. 4487090df5aSMatthew Dillon */ 4497090df5aSMatthew Dillon static daddr_t 450d4e3484bSAlan Cox blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, daddr_t radix, 4512ac0c7c3SAlan Cox daddr_t cursor) 452d4e3484bSAlan Cox { 4532ac0c7c3SAlan Cox daddr_t i, next_skip, r, skip; 454d4e3484bSAlan Cox int child; 455d4e3484bSAlan Cox bool scan_from_start; 4567090df5aSMatthew Dillon 457a396c83aSAlan Cox if (radix == BLIST_BMAP_RADIX) 458a396c83aSAlan Cox return (blst_leaf_alloc(scan, blk, count, cursor)); 4594be4fd5dSAlan Cox if (scan->u.bmu_avail < count) { 4607090df5aSMatthew Dillon /* 4614be4fd5dSAlan Cox * The meta node's hint must be too large if the allocation 4624be4fd5dSAlan Cox * exceeds the number of free blocks. Reduce the hint, and 4634be4fd5dSAlan Cox * return failure. 4647090df5aSMatthew Dillon */ 4654be4fd5dSAlan Cox scan->bm_bighint = scan->u.bmu_avail; 4667090df5aSMatthew Dillon return (SWAPBLK_NONE); 4677090df5aSMatthew Dillon } 4682ac0c7c3SAlan Cox skip = radix_to_skip(radix); 469d4e3484bSAlan Cox next_skip = skip / BLIST_META_RADIX; 4707090df5aSMatthew Dillon 4714be4fd5dSAlan Cox /* 4724be4fd5dSAlan Cox * An ALL-FREE meta node requires special handling before allocating 4734be4fd5dSAlan Cox * any of its blocks. 4744be4fd5dSAlan Cox */ 4757090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 47657e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 4777090df5aSMatthew Dillon 4787090df5aSMatthew Dillon /* 4794be4fd5dSAlan Cox * Reinitialize each of the meta node's children. An ALL-FREE 4804be4fd5dSAlan Cox * meta node cannot have a terminator in any subtree. 4817090df5aSMatthew Dillon */ 4822ac0c7c3SAlan Cox for (i = 1; i < skip; i += next_skip) { 483d4e3484bSAlan Cox if (next_skip == 1) 4847090df5aSMatthew Dillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 485d4e3484bSAlan Cox else 4867090df5aSMatthew Dillon scan[i].u.bmu_avail = radix; 487d4e3484bSAlan Cox scan[i].bm_bighint = radix; 4887090df5aSMatthew Dillon } 4897090df5aSMatthew Dillon } else { 49057e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 4917090df5aSMatthew Dillon } 4927090df5aSMatthew Dillon 4934be4fd5dSAlan Cox if (count > radix) { 4944be4fd5dSAlan Cox /* 4954be4fd5dSAlan Cox * The allocation exceeds the number of blocks that are 4964be4fd5dSAlan Cox * managed by a subtree of this meta node. 4974be4fd5dSAlan Cox */ 4984be4fd5dSAlan Cox panic("allocation too large"); 4994be4fd5dSAlan Cox } 500d4e3484bSAlan Cox scan_from_start = cursor == blk; 501d4e3484bSAlan Cox child = (cursor - blk) / radix; 502d4e3484bSAlan Cox blk += child * radix; 5032ac0c7c3SAlan Cox for (i = 1 + child * next_skip; i < skip; i += next_skip) { 5047090df5aSMatthew Dillon if (count <= scan[i].bm_bighint) { 5057090df5aSMatthew Dillon /* 5064be4fd5dSAlan Cox * The allocation might fit in the i'th subtree. 5077090df5aSMatthew Dillon */ 508a396c83aSAlan Cox r = blst_meta_alloc(&scan[i], blk, count, radix, 5092ac0c7c3SAlan Cox cursor > blk ? cursor : blk); 5107090df5aSMatthew Dillon if (r != SWAPBLK_NONE) { 5117090df5aSMatthew Dillon scan->u.bmu_avail -= count; 5127090df5aSMatthew Dillon return (r); 5137090df5aSMatthew Dillon } 5147090df5aSMatthew Dillon } else if (scan[i].bm_bighint == (daddr_t)-1) { 5157090df5aSMatthew Dillon /* 5167090df5aSMatthew Dillon * Terminator 5177090df5aSMatthew Dillon */ 5187090df5aSMatthew Dillon break; 5197090df5aSMatthew Dillon } 5207090df5aSMatthew Dillon blk += radix; 5217090df5aSMatthew Dillon } 5227090df5aSMatthew Dillon 5237090df5aSMatthew Dillon /* 5247090df5aSMatthew Dillon * We couldn't allocate count in this subtree, update bighint. 5257090df5aSMatthew Dillon */ 526d4e3484bSAlan Cox if (scan_from_start && scan->bm_bighint >= count) 5277090df5aSMatthew Dillon scan->bm_bighint = count - 1; 528d4e3484bSAlan Cox 5297090df5aSMatthew Dillon return (SWAPBLK_NONE); 5307090df5aSMatthew Dillon } 5317090df5aSMatthew Dillon 5327090df5aSMatthew Dillon /* 5337090df5aSMatthew Dillon * BLST_LEAF_FREE() - free allocated block from leaf bitmap 5347090df5aSMatthew Dillon * 5357090df5aSMatthew Dillon */ 5367090df5aSMatthew Dillon static void 537b411efa4SAlan Cox blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) 538b411efa4SAlan Cox { 5397090df5aSMatthew Dillon /* 5407090df5aSMatthew Dillon * free some data in this bitmap 5417090df5aSMatthew Dillon * 5427090df5aSMatthew Dillon * e.g. 5437090df5aSMatthew Dillon * 0000111111111110000 5447090df5aSMatthew Dillon * \_________/\__/ 5457090df5aSMatthew Dillon * v n 5467090df5aSMatthew Dillon */ 5477090df5aSMatthew Dillon int n = blk & (BLIST_BMAP_RADIX - 1); 5487090df5aSMatthew Dillon u_daddr_t mask; 5497090df5aSMatthew Dillon 5507090df5aSMatthew Dillon mask = ((u_daddr_t)-1 << n) & 5517090df5aSMatthew Dillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 5527090df5aSMatthew Dillon 5537090df5aSMatthew Dillon if (scan->u.bmu_bitmap & mask) 5547090df5aSMatthew Dillon panic("blst_radix_free: freeing free block"); 5557090df5aSMatthew Dillon scan->u.bmu_bitmap |= mask; 5567090df5aSMatthew Dillon 5577090df5aSMatthew Dillon /* 5587090df5aSMatthew Dillon * We could probably do a better job here. We are required to make 5597090df5aSMatthew Dillon * bighint at least as large as the biggest contiguous block of 5607090df5aSMatthew Dillon * data. If we just shoehorn it, a little extra overhead will 5617090df5aSMatthew Dillon * be incured on the next allocation (but only that one typically). 5627090df5aSMatthew Dillon */ 5637090df5aSMatthew Dillon scan->bm_bighint = BLIST_BMAP_RADIX; 5647090df5aSMatthew Dillon } 5657090df5aSMatthew Dillon 5667090df5aSMatthew Dillon /* 5677090df5aSMatthew Dillon * BLST_META_FREE() - free allocated blocks from radix tree meta info 5687090df5aSMatthew Dillon * 5697090df5aSMatthew Dillon * This support routine frees a range of blocks from the bitmap. 5707090df5aSMatthew Dillon * The range must be entirely enclosed by this radix node. If a 5717090df5aSMatthew Dillon * meta node, we break the range down recursively to free blocks 5727090df5aSMatthew Dillon * in subnodes (which means that this code can free an arbitrary 5737090df5aSMatthew Dillon * range whereas the allocation code cannot allocate an arbitrary 5747090df5aSMatthew Dillon * range). 5757090df5aSMatthew Dillon */ 5767090df5aSMatthew Dillon static void 577d3783724SAlan Cox blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, daddr_t radix, 5782ac0c7c3SAlan Cox daddr_t blk) 579d3783724SAlan Cox { 5802ac0c7c3SAlan Cox daddr_t i, next_skip, skip, v; 581d3783724SAlan Cox int child; 5827090df5aSMatthew Dillon 583a396c83aSAlan Cox if (scan->bm_bighint == (daddr_t)-1) 584a396c83aSAlan Cox panic("freeing invalid range"); 585a396c83aSAlan Cox if (radix == BLIST_BMAP_RADIX) 586a396c83aSAlan Cox return (blst_leaf_free(scan, freeBlk, count)); 5872ac0c7c3SAlan Cox skip = radix_to_skip(radix); 588d3783724SAlan Cox next_skip = skip / BLIST_META_RADIX; 5897090df5aSMatthew Dillon 5907090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 5917090df5aSMatthew Dillon /* 5927090df5aSMatthew Dillon * ALL-ALLOCATED special case, with possible 5937090df5aSMatthew Dillon * shortcut to ALL-FREE special case. 5947090df5aSMatthew Dillon */ 5957090df5aSMatthew Dillon scan->u.bmu_avail = count; 5967090df5aSMatthew Dillon scan->bm_bighint = count; 5977090df5aSMatthew Dillon 5987090df5aSMatthew Dillon if (count != radix) { 5992ac0c7c3SAlan Cox for (i = 1; i < skip; i += next_skip) { 6007090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) 6017090df5aSMatthew Dillon break; 6027090df5aSMatthew Dillon scan[i].bm_bighint = 0; 6037090df5aSMatthew Dillon if (next_skip == 1) { 6047090df5aSMatthew Dillon scan[i].u.bmu_bitmap = 0; 6057090df5aSMatthew Dillon } else { 6067090df5aSMatthew Dillon scan[i].u.bmu_avail = 0; 6077090df5aSMatthew Dillon } 6087090df5aSMatthew Dillon } 6097090df5aSMatthew Dillon /* fall through */ 6107090df5aSMatthew Dillon } 6117090df5aSMatthew Dillon } else { 6127090df5aSMatthew Dillon scan->u.bmu_avail += count; 6137090df5aSMatthew Dillon /* scan->bm_bighint = radix; */ 6147090df5aSMatthew Dillon } 6157090df5aSMatthew Dillon 6167090df5aSMatthew Dillon /* 6177090df5aSMatthew Dillon * ALL-FREE special case. 6187090df5aSMatthew Dillon */ 6197090df5aSMatthew Dillon 6207090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) 6217090df5aSMatthew Dillon return; 6227090df5aSMatthew Dillon if (scan->u.bmu_avail > radix) 623bdc9a8d0SJohn Baldwin panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 624bdc9a8d0SJohn Baldwin (long long)count, (long long)scan->u.bmu_avail, 625bdc9a8d0SJohn Baldwin (long long)radix); 6267090df5aSMatthew Dillon 6277090df5aSMatthew Dillon /* 6287090df5aSMatthew Dillon * Break the free down into its components 6297090df5aSMatthew Dillon */ 6307090df5aSMatthew Dillon 63157e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 6327090df5aSMatthew Dillon 633d3783724SAlan Cox child = (freeBlk - blk) / radix; 634d3783724SAlan Cox blk += child * radix; 635d3783724SAlan Cox i = 1 + child * next_skip; 6362ac0c7c3SAlan Cox while (i < skip && blk < freeBlk + count) { 6377090df5aSMatthew Dillon v = blk + radix - freeBlk; 6387090df5aSMatthew Dillon if (v > count) 6397090df5aSMatthew Dillon v = count; 6402ac0c7c3SAlan Cox blst_meta_free(&scan[i], freeBlk, v, radix, blk); 6417090df5aSMatthew Dillon if (scan->bm_bighint < scan[i].bm_bighint) 6427090df5aSMatthew Dillon scan->bm_bighint = scan[i].bm_bighint; 6437090df5aSMatthew Dillon count -= v; 6447090df5aSMatthew Dillon freeBlk += v; 6457090df5aSMatthew Dillon blk += radix; 6467090df5aSMatthew Dillon i += next_skip; 6477090df5aSMatthew Dillon } 6487090df5aSMatthew Dillon } 6497090df5aSMatthew Dillon 6507090df5aSMatthew Dillon /* 6517090df5aSMatthew Dillon * BLIST_RADIX_COPY() - copy one radix tree to another 6527090df5aSMatthew Dillon * 6537090df5aSMatthew Dillon * Locates free space in the source tree and frees it in the destination 6547090df5aSMatthew Dillon * tree. The space may not already be free in the destination. 6557090df5aSMatthew Dillon */ 656b411efa4SAlan Cox static void 6572ac0c7c3SAlan Cox blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, 6582ac0c7c3SAlan Cox daddr_t count) 659b411efa4SAlan Cox { 6602ac0c7c3SAlan Cox daddr_t i, next_skip, skip; 6617090df5aSMatthew Dillon 6627090df5aSMatthew Dillon /* 6637090df5aSMatthew Dillon * Leaf node 6647090df5aSMatthew Dillon */ 6657090df5aSMatthew Dillon 6667090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 6677090df5aSMatthew Dillon u_daddr_t v = scan->u.bmu_bitmap; 6687090df5aSMatthew Dillon 6697090df5aSMatthew Dillon if (v == (u_daddr_t)-1) { 6707090df5aSMatthew Dillon blist_free(dest, blk, count); 6717090df5aSMatthew Dillon } else if (v != 0) { 6727090df5aSMatthew Dillon int i; 6737090df5aSMatthew Dillon 6747090df5aSMatthew Dillon for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 675064650c1SAlan Cox if (v & ((u_daddr_t)1 << i)) 6767090df5aSMatthew Dillon blist_free(dest, blk + i, 1); 6777090df5aSMatthew Dillon } 6787090df5aSMatthew Dillon } 6797090df5aSMatthew Dillon return; 6807090df5aSMatthew Dillon } 6817090df5aSMatthew Dillon 6827090df5aSMatthew Dillon /* 6837090df5aSMatthew Dillon * Meta node 6847090df5aSMatthew Dillon */ 6857090df5aSMatthew Dillon 6867090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 6877090df5aSMatthew Dillon /* 6887090df5aSMatthew Dillon * Source all allocated, leave dest allocated 6897090df5aSMatthew Dillon */ 6907090df5aSMatthew Dillon return; 6917090df5aSMatthew Dillon } 6927090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 6937090df5aSMatthew Dillon /* 6947090df5aSMatthew Dillon * Source all free, free entire dest 6957090df5aSMatthew Dillon */ 6967090df5aSMatthew Dillon if (count < radix) 6977090df5aSMatthew Dillon blist_free(dest, blk, count); 6987090df5aSMatthew Dillon else 6997090df5aSMatthew Dillon blist_free(dest, blk, radix); 7007090df5aSMatthew Dillon return; 7017090df5aSMatthew Dillon } 7027090df5aSMatthew Dillon 7037090df5aSMatthew Dillon 7042ac0c7c3SAlan Cox skip = radix_to_skip(radix); 705d3783724SAlan Cox next_skip = skip / BLIST_META_RADIX; 7062ac0c7c3SAlan Cox radix /= BLIST_META_RADIX; 7077090df5aSMatthew Dillon 7082ac0c7c3SAlan Cox for (i = 1; count && i < skip; i += next_skip) { 7097090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) 7107090df5aSMatthew Dillon break; 7117090df5aSMatthew Dillon 7127090df5aSMatthew Dillon if (count >= radix) { 7132ac0c7c3SAlan Cox blst_copy(&scan[i], blk, radix, dest, radix); 7147090df5aSMatthew Dillon count -= radix; 7157090df5aSMatthew Dillon } else { 7167090df5aSMatthew Dillon if (count) { 7172ac0c7c3SAlan Cox blst_copy(&scan[i], blk, radix, dest, count); 7187090df5aSMatthew Dillon } 7197090df5aSMatthew Dillon count = 0; 7207090df5aSMatthew Dillon } 7217090df5aSMatthew Dillon blk += radix; 7227090df5aSMatthew Dillon } 7237090df5aSMatthew Dillon } 7247090df5aSMatthew Dillon 7257090df5aSMatthew Dillon /* 72692da00bbSMatthew Dillon * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 72792da00bbSMatthew Dillon * 72892da00bbSMatthew Dillon * This routine allocates all blocks in the specified range 72992da00bbSMatthew Dillon * regardless of any existing allocations in that range. Returns 73092da00bbSMatthew Dillon * the number of blocks allocated by the call. 73192da00bbSMatthew Dillon */ 732a7249a6cSAlan Cox static daddr_t 73392da00bbSMatthew Dillon blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 73492da00bbSMatthew Dillon { 73592da00bbSMatthew Dillon int n = blk & (BLIST_BMAP_RADIX - 1); 736a7249a6cSAlan Cox daddr_t nblks; 7374be4fd5dSAlan Cox u_daddr_t mask; 73892da00bbSMatthew Dillon 73992da00bbSMatthew Dillon mask = ((u_daddr_t)-1 << n) & 74092da00bbSMatthew Dillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 74192da00bbSMatthew Dillon 7424be4fd5dSAlan Cox /* Count the number of blocks that we are allocating. */ 7434be4fd5dSAlan Cox nblks = bitcount64(scan->u.bmu_bitmap & mask); 74492da00bbSMatthew Dillon 74592da00bbSMatthew Dillon scan->u.bmu_bitmap &= ~mask; 7464be4fd5dSAlan Cox return (nblks); 74792da00bbSMatthew Dillon } 74892da00bbSMatthew Dillon 74992da00bbSMatthew Dillon /* 75092da00bbSMatthew Dillon * BLIST_META_FILL() - allocate specific blocks at a meta node 75192da00bbSMatthew Dillon * 75292da00bbSMatthew Dillon * This routine allocates the specified range of blocks, 75392da00bbSMatthew Dillon * regardless of any existing allocations in the range. The 75492da00bbSMatthew Dillon * range must be within the extent of this node. Returns the 75592da00bbSMatthew Dillon * number of blocks allocated by the call. 75692da00bbSMatthew Dillon */ 757a7249a6cSAlan Cox static daddr_t 758d3783724SAlan Cox blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, daddr_t radix, 7592ac0c7c3SAlan Cox daddr_t blk) 760d3783724SAlan Cox { 7612ac0c7c3SAlan Cox daddr_t i, nblks, next_skip, skip, v; 762d3783724SAlan Cox int child; 76392da00bbSMatthew Dillon 764a396c83aSAlan Cox if (scan->bm_bighint == (daddr_t)-1) 765a396c83aSAlan Cox panic("filling invalid range"); 7664be4fd5dSAlan Cox if (count > radix) { 7674be4fd5dSAlan Cox /* 7684be4fd5dSAlan Cox * The allocation exceeds the number of blocks that are 769a396c83aSAlan Cox * managed by this node. 7704be4fd5dSAlan Cox */ 771a396c83aSAlan Cox panic("fill too large"); 7724be4fd5dSAlan Cox } 773a396c83aSAlan Cox if (radix == BLIST_BMAP_RADIX) 774a396c83aSAlan Cox return (blst_leaf_fill(scan, allocBlk, count)); 77592da00bbSMatthew Dillon if (count == radix || scan->u.bmu_avail == 0) { 77692da00bbSMatthew Dillon /* 77792da00bbSMatthew Dillon * ALL-ALLOCATED special case 77892da00bbSMatthew Dillon */ 77992da00bbSMatthew Dillon nblks = scan->u.bmu_avail; 78092da00bbSMatthew Dillon scan->u.bmu_avail = 0; 78186dd278fSAlan Cox scan->bm_bighint = 0; 782a396c83aSAlan Cox return (nblks); 78392da00bbSMatthew Dillon } 7842ac0c7c3SAlan Cox skip = radix_to_skip(radix); 785d3783724SAlan Cox next_skip = skip / BLIST_META_RADIX; 78692da00bbSMatthew Dillon 7874be4fd5dSAlan Cox /* 7884be4fd5dSAlan Cox * An ALL-FREE meta node requires special handling before allocating 7894be4fd5dSAlan Cox * any of its blocks. 7904be4fd5dSAlan Cox */ 79192da00bbSMatthew Dillon if (scan->u.bmu_avail == radix) { 79257e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 79392da00bbSMatthew Dillon 79492da00bbSMatthew Dillon /* 7954be4fd5dSAlan Cox * Reinitialize each of the meta node's children. An ALL-FREE 7964be4fd5dSAlan Cox * meta node cannot have a terminator in any subtree. 79792da00bbSMatthew Dillon */ 7982ac0c7c3SAlan Cox for (i = 1; i < skip; i += next_skip) { 799a396c83aSAlan Cox if (next_skip == 1) 80092da00bbSMatthew Dillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 801a396c83aSAlan Cox else 80292da00bbSMatthew Dillon scan[i].u.bmu_avail = radix; 803a396c83aSAlan Cox scan[i].bm_bighint = radix; 80492da00bbSMatthew Dillon } 80592da00bbSMatthew Dillon } else { 80657e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 80792da00bbSMatthew Dillon } 80892da00bbSMatthew Dillon 809d3783724SAlan Cox nblks = 0; 810d3783724SAlan Cox child = (allocBlk - blk) / radix; 811d3783724SAlan Cox blk += child * radix; 812d3783724SAlan Cox i = 1 + child * next_skip; 8132ac0c7c3SAlan Cox while (i < skip && blk < allocBlk + count) { 81492da00bbSMatthew Dillon v = blk + radix - allocBlk; 81592da00bbSMatthew Dillon if (v > count) 81692da00bbSMatthew Dillon v = count; 8172ac0c7c3SAlan Cox nblks += blst_meta_fill(&scan[i], allocBlk, v, radix, blk); 81892da00bbSMatthew Dillon count -= v; 81992da00bbSMatthew Dillon allocBlk += v; 82092da00bbSMatthew Dillon blk += radix; 82192da00bbSMatthew Dillon i += next_skip; 82292da00bbSMatthew Dillon } 82392da00bbSMatthew Dillon scan->u.bmu_avail -= nblks; 824a396c83aSAlan Cox return (nblks); 82592da00bbSMatthew Dillon } 82692da00bbSMatthew Dillon 82792da00bbSMatthew Dillon /* 8287090df5aSMatthew Dillon * BLST_RADIX_INIT() - initialize radix tree 8297090df5aSMatthew Dillon * 8307090df5aSMatthew Dillon * Initialize our meta structures and bitmaps and calculate the exact 8317090df5aSMatthew Dillon * amount of space required to manage 'count' blocks - this space may 832f565f395SEitan Adler * be considerably less than the calculated radix due to the large 8337090df5aSMatthew Dillon * RADIX values we use. 8347090df5aSMatthew Dillon */ 8357090df5aSMatthew Dillon static daddr_t 8362ac0c7c3SAlan Cox blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count) 8377090df5aSMatthew Dillon { 8382ac0c7c3SAlan Cox daddr_t i, memindex, next_skip, skip; 839d3783724SAlan Cox 840d3783724SAlan Cox memindex = 0; 8417090df5aSMatthew Dillon 8427090df5aSMatthew Dillon /* 8437090df5aSMatthew Dillon * Leaf node 8447090df5aSMatthew Dillon */ 8457090df5aSMatthew Dillon 8467090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 8477090df5aSMatthew Dillon if (scan) { 8487090df5aSMatthew Dillon scan->bm_bighint = 0; 8497090df5aSMatthew Dillon scan->u.bmu_bitmap = 0; 8507090df5aSMatthew Dillon } 8517090df5aSMatthew Dillon return (memindex); 8527090df5aSMatthew Dillon } 8537090df5aSMatthew Dillon 8547090df5aSMatthew Dillon /* 8557090df5aSMatthew Dillon * Meta node. If allocating the entire object we can special 8567090df5aSMatthew Dillon * case it. However, we need to figure out how much memory 8577090df5aSMatthew Dillon * is required to manage 'count' blocks, so we continue on anyway. 8587090df5aSMatthew Dillon */ 8597090df5aSMatthew Dillon 8607090df5aSMatthew Dillon if (scan) { 8617090df5aSMatthew Dillon scan->bm_bighint = 0; 8627090df5aSMatthew Dillon scan->u.bmu_avail = 0; 8637090df5aSMatthew Dillon } 8647090df5aSMatthew Dillon 8652ac0c7c3SAlan Cox skip = radix_to_skip(radix); 866d3783724SAlan Cox next_skip = skip / BLIST_META_RADIX; 8672ac0c7c3SAlan Cox radix /= BLIST_META_RADIX; 8687090df5aSMatthew Dillon 8692ac0c7c3SAlan Cox for (i = 1; i < skip; i += next_skip) { 8707090df5aSMatthew Dillon if (count >= radix) { 8717090df5aSMatthew Dillon /* 8727090df5aSMatthew Dillon * Allocate the entire object 8737090df5aSMatthew Dillon */ 874b411efa4SAlan Cox memindex = i + 875b411efa4SAlan Cox blst_radix_init(((scan) ? &scan[i] : NULL), radix, 8762ac0c7c3SAlan Cox radix); 8777090df5aSMatthew Dillon count -= radix; 8787090df5aSMatthew Dillon } else if (count > 0) { 8797090df5aSMatthew Dillon /* 8807090df5aSMatthew Dillon * Allocate a partial object 8817090df5aSMatthew Dillon */ 882b411efa4SAlan Cox memindex = i + 883b411efa4SAlan Cox blst_radix_init(((scan) ? &scan[i] : NULL), radix, 8842ac0c7c3SAlan Cox count); 8857090df5aSMatthew Dillon count = 0; 8867090df5aSMatthew Dillon } else { 8877090df5aSMatthew Dillon /* 8887090df5aSMatthew Dillon * Add terminator and break out 8897090df5aSMatthew Dillon */ 8907090df5aSMatthew Dillon if (scan) 8917090df5aSMatthew Dillon scan[i].bm_bighint = (daddr_t)-1; 8927090df5aSMatthew Dillon break; 8937090df5aSMatthew Dillon } 8947090df5aSMatthew Dillon } 8957090df5aSMatthew Dillon if (memindex < i) 8967090df5aSMatthew Dillon memindex = i; 8977090df5aSMatthew Dillon return (memindex); 8987090df5aSMatthew Dillon } 8997090df5aSMatthew Dillon 9007090df5aSMatthew Dillon #ifdef BLIST_DEBUG 9017090df5aSMatthew Dillon 9027090df5aSMatthew Dillon static void 9032ac0c7c3SAlan Cox blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) 9047090df5aSMatthew Dillon { 9052ac0c7c3SAlan Cox daddr_t i, next_skip, skip; 9067090df5aSMatthew Dillon 9077090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 9087090df5aSMatthew Dillon printf( 909d90bf7d8SAlan Cox "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 9107090df5aSMatthew Dillon tab, tab, "", 91192da00bbSMatthew Dillon (long long)blk, (long long)radix, 91292da00bbSMatthew Dillon (long long)scan->u.bmu_bitmap, 91392da00bbSMatthew Dillon (long long)scan->bm_bighint 9147090df5aSMatthew Dillon ); 9157090df5aSMatthew Dillon return; 9167090df5aSMatthew Dillon } 9177090df5aSMatthew Dillon 9187090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 9197090df5aSMatthew Dillon printf( 92092da00bbSMatthew Dillon "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 9217090df5aSMatthew Dillon tab, tab, "", 92292da00bbSMatthew Dillon (long long)blk, 92392da00bbSMatthew Dillon (long long)radix 9247090df5aSMatthew Dillon ); 9257090df5aSMatthew Dillon return; 9267090df5aSMatthew Dillon } 9277090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 9287090df5aSMatthew Dillon printf( 92992da00bbSMatthew Dillon "%*.*s(%08llx,%lld) ALL FREE\n", 9307090df5aSMatthew Dillon tab, tab, "", 93192da00bbSMatthew Dillon (long long)blk, 93292da00bbSMatthew Dillon (long long)radix 9337090df5aSMatthew Dillon ); 9347090df5aSMatthew Dillon return; 9357090df5aSMatthew Dillon } 9367090df5aSMatthew Dillon 9377090df5aSMatthew Dillon printf( 93892da00bbSMatthew Dillon "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 9397090df5aSMatthew Dillon tab, tab, "", 94092da00bbSMatthew Dillon (long long)blk, (long long)radix, 94192da00bbSMatthew Dillon (long long)scan->u.bmu_avail, 94292da00bbSMatthew Dillon (long long)radix, 94392da00bbSMatthew Dillon (long long)scan->bm_bighint 9447090df5aSMatthew Dillon ); 9457090df5aSMatthew Dillon 9462ac0c7c3SAlan Cox skip = radix_to_skip(radix); 947d3783724SAlan Cox next_skip = skip / BLIST_META_RADIX; 9482ac0c7c3SAlan Cox radix /= BLIST_META_RADIX; 9497090df5aSMatthew Dillon tab += 4; 9507090df5aSMatthew Dillon 9512ac0c7c3SAlan Cox for (i = 1; i < skip; i += next_skip) { 9527090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) { 9537090df5aSMatthew Dillon printf( 95492da00bbSMatthew Dillon "%*.*s(%08llx,%lld): Terminator\n", 9557090df5aSMatthew Dillon tab, tab, "", 95692da00bbSMatthew Dillon (long long)blk, (long long)radix 9577090df5aSMatthew Dillon ); 9587090df5aSMatthew Dillon break; 9597090df5aSMatthew Dillon } 9602ac0c7c3SAlan Cox blst_radix_print(&scan[i], blk, radix, tab); 9617090df5aSMatthew Dillon blk += radix; 9627090df5aSMatthew Dillon } 9637090df5aSMatthew Dillon tab -= 4; 9647090df5aSMatthew Dillon 9657090df5aSMatthew Dillon printf( 9667090df5aSMatthew Dillon "%*.*s}\n", 9677090df5aSMatthew Dillon tab, tab, "" 9687090df5aSMatthew Dillon ); 9697090df5aSMatthew Dillon } 9707090df5aSMatthew Dillon 9717090df5aSMatthew Dillon #endif 9727090df5aSMatthew Dillon 9737090df5aSMatthew Dillon #ifdef BLIST_DEBUG 9747090df5aSMatthew Dillon 9757090df5aSMatthew Dillon int 9767090df5aSMatthew Dillon main(int ac, char **av) 9777090df5aSMatthew Dillon { 9787090df5aSMatthew Dillon int size = 1024; 9797090df5aSMatthew Dillon int i; 9807090df5aSMatthew Dillon blist_t bl; 9817090df5aSMatthew Dillon 9827090df5aSMatthew Dillon for (i = 1; i < ac; ++i) { 9837090df5aSMatthew Dillon const char *ptr = av[i]; 9847090df5aSMatthew Dillon if (*ptr != '-') { 9857090df5aSMatthew Dillon size = strtol(ptr, NULL, 0); 9867090df5aSMatthew Dillon continue; 9877090df5aSMatthew Dillon } 9887090df5aSMatthew Dillon ptr += 2; 9897090df5aSMatthew Dillon fprintf(stderr, "Bad option: %s\n", ptr - 2); 9907090df5aSMatthew Dillon exit(1); 9917090df5aSMatthew Dillon } 992c8c7ad92SKip Macy bl = blist_create(size, M_WAITOK); 9937090df5aSMatthew Dillon blist_free(bl, 0, size); 9947090df5aSMatthew Dillon 9957090df5aSMatthew Dillon for (;;) { 9967090df5aSMatthew Dillon char buf[1024]; 997d90bf7d8SAlan Cox long long da = 0; 998d90bf7d8SAlan Cox long long count = 0; 9997090df5aSMatthew Dillon 10004be4fd5dSAlan Cox printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 100192da00bbSMatthew Dillon (long long)size, (long long)bl->bl_radix); 10027090df5aSMatthew Dillon fflush(stdout); 10037090df5aSMatthew Dillon if (fgets(buf, sizeof(buf), stdin) == NULL) 10047090df5aSMatthew Dillon break; 10057090df5aSMatthew Dillon switch(buf[0]) { 10067090df5aSMatthew Dillon case 'r': 100792da00bbSMatthew Dillon if (sscanf(buf + 1, "%lld", &count) == 1) { 1008d90bf7d8SAlan Cox blist_resize(&bl, count, 1, M_WAITOK); 10097090df5aSMatthew Dillon } else { 10107090df5aSMatthew Dillon printf("?\n"); 10117090df5aSMatthew Dillon } 10127090df5aSMatthew Dillon case 'p': 10137090df5aSMatthew Dillon blist_print(bl); 10147090df5aSMatthew Dillon break; 10157090df5aSMatthew Dillon case 'a': 101692da00bbSMatthew Dillon if (sscanf(buf + 1, "%lld", &count) == 1) { 10177090df5aSMatthew Dillon daddr_t blk = blist_alloc(bl, count); 101892da00bbSMatthew Dillon printf(" R=%08llx\n", (long long)blk); 10197090df5aSMatthew Dillon } else { 10207090df5aSMatthew Dillon printf("?\n"); 10217090df5aSMatthew Dillon } 10227090df5aSMatthew Dillon break; 10237090df5aSMatthew Dillon case 'f': 1024d90bf7d8SAlan Cox if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 10257090df5aSMatthew Dillon blist_free(bl, da, count); 10267090df5aSMatthew Dillon } else { 10277090df5aSMatthew Dillon printf("?\n"); 10287090df5aSMatthew Dillon } 10297090df5aSMatthew Dillon break; 103092da00bbSMatthew Dillon case 'l': 1031d90bf7d8SAlan Cox if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1032a7249a6cSAlan Cox printf(" n=%jd\n", 1033a7249a6cSAlan Cox (intmax_t)blist_fill(bl, da, count)); 103492da00bbSMatthew Dillon } else { 103592da00bbSMatthew Dillon printf("?\n"); 103692da00bbSMatthew Dillon } 103792da00bbSMatthew Dillon break; 10387090df5aSMatthew Dillon case '?': 10397090df5aSMatthew Dillon case 'h': 10407090df5aSMatthew Dillon puts( 10417090df5aSMatthew Dillon "p -print\n" 10427090df5aSMatthew Dillon "a %d -allocate\n" 10437090df5aSMatthew Dillon "f %x %d -free\n" 104492da00bbSMatthew Dillon "l %x %d -fill\n" 10457090df5aSMatthew Dillon "r %d -resize\n" 10467090df5aSMatthew Dillon "h/? -help" 10477090df5aSMatthew Dillon ); 10487090df5aSMatthew Dillon break; 10497090df5aSMatthew Dillon default: 10507090df5aSMatthew Dillon printf("?\n"); 10517090df5aSMatthew Dillon break; 10527090df5aSMatthew Dillon } 10537090df5aSMatthew Dillon } 10547090df5aSMatthew Dillon return(0); 10557090df5aSMatthew Dillon } 10567090df5aSMatthew Dillon 10577090df5aSMatthew Dillon void 10587090df5aSMatthew Dillon panic(const char *ctl, ...) 10597090df5aSMatthew Dillon { 10607090df5aSMatthew Dillon va_list va; 10617090df5aSMatthew Dillon 10627090df5aSMatthew Dillon va_start(va, ctl); 10637090df5aSMatthew Dillon vfprintf(stderr, ctl, va); 10647090df5aSMatthew Dillon fprintf(stderr, "\n"); 10657090df5aSMatthew Dillon va_end(va); 10667090df5aSMatthew Dillon exit(1); 10677090df5aSMatthew Dillon } 10687090df5aSMatthew Dillon 10697090df5aSMatthew Dillon #endif 1070