106b4bf3eSWarner Losh /*- 206b4bf3eSWarner Losh * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 306b4bf3eSWarner Losh * Redistribution and use in source and binary forms, with or without 406b4bf3eSWarner Losh * modification, are permitted provided that the following conditions 506b4bf3eSWarner Losh * are met: 606b4bf3eSWarner Losh * 1. Redistributions of source code must retain the above copyright 706b4bf3eSWarner Losh * notice, this list of conditions and the following disclaimer. 806b4bf3eSWarner Losh * 2. Redistributions in binary form must reproduce the above copyright 906b4bf3eSWarner Losh * notice, this list of conditions and the following disclaimer in the 1006b4bf3eSWarner Losh * documentation and/or other materials provided with the distribution. 1169a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 1206b4bf3eSWarner Losh * may be used to endorse or promote products derived from this software 1306b4bf3eSWarner Losh * without specific prior written permission. 1406b4bf3eSWarner Losh * 1506b4bf3eSWarner Losh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 1606b4bf3eSWarner Losh * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1706b4bf3eSWarner Losh * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1806b4bf3eSWarner Losh * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 1906b4bf3eSWarner Losh * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2006b4bf3eSWarner Losh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 2106b4bf3eSWarner Losh * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2206b4bf3eSWarner Losh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 2306b4bf3eSWarner Losh * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 2406b4bf3eSWarner Losh * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2506b4bf3eSWarner Losh * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2606b4bf3eSWarner Losh */ 277090df5aSMatthew Dillon /* 287090df5aSMatthew Dillon * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 297090df5aSMatthew Dillon * 307090df5aSMatthew Dillon * This module implements a general bitmap allocator/deallocator. The 317090df5aSMatthew Dillon * allocator eats around 2 bits per 'block'. The module does not 32f565f395SEitan Adler * try to interpret the meaning of a 'block' other than to return 337090df5aSMatthew Dillon * SWAPBLK_NONE on an allocation failure. 347090df5aSMatthew Dillon * 357090df5aSMatthew Dillon * A radix tree is used to maintain the bitmap. Two radix constants are 367090df5aSMatthew Dillon * involved: One for the bitmaps contained in the leaf nodes (typically 377090df5aSMatthew Dillon * 32), and one for the meta nodes (typically 16). Both meta and leaf 387090df5aSMatthew Dillon * nodes have a hint field. This field gives us a hint as to the largest 397090df5aSMatthew Dillon * free contiguous range of blocks under the node. It may contain a 407090df5aSMatthew Dillon * value that is too high, but will never contain a value that is too 417090df5aSMatthew Dillon * low. When the radix tree is searched, allocation failures in subtrees 427090df5aSMatthew Dillon * update the hint. 437090df5aSMatthew Dillon * 447090df5aSMatthew Dillon * The radix tree also implements two collapsed states for meta nodes: 457090df5aSMatthew Dillon * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 467090df5aSMatthew Dillon * in either of these two states, all information contained underneath 477090df5aSMatthew Dillon * the node is considered stale. These states are used to optimize 487090df5aSMatthew Dillon * allocation and freeing operations. 497090df5aSMatthew Dillon * 507090df5aSMatthew Dillon * The hinting greatly increases code efficiency for allocations while 517090df5aSMatthew Dillon * the general radix structure optimizes both allocations and frees. The 527090df5aSMatthew Dillon * radix tree should be able to operate well no matter how much 537090df5aSMatthew Dillon * fragmentation there is and no matter how large a bitmap is used. 547090df5aSMatthew Dillon * 5551dc4feaSSergey Kandaurov * The blist code wires all necessary memory at creation time. Neither 5651dc4feaSSergey Kandaurov * allocations nor frees require interaction with the memory subsystem. 5751dc4feaSSergey Kandaurov * The non-blocking features of the blist code are used in the swap code 5851dc4feaSSergey Kandaurov * (vm/swap_pager.c). 597090df5aSMatthew Dillon * 60e3043798SPedro F. Giffuni * LAYOUT: The radix tree is laid out recursively using a 61e3043798SPedro F. Giffuni * linear array. Each meta node is immediately followed (laid out 627090df5aSMatthew Dillon * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 637090df5aSMatthew Dillon * is a recursive structure but one that can be easily scanned through 647090df5aSMatthew Dillon * a very simple 'skip' calculation. In order to support large radixes, 657090df5aSMatthew Dillon * portions of the tree may reside outside our memory allocation. We 667090df5aSMatthew Dillon * handle this with an early-termination optimization (when bighint is 677090df5aSMatthew Dillon * set to -1) on the scan. The memory allocation is only large enough 687090df5aSMatthew Dillon * to cover the number of blocks requested at creation time even if it 697090df5aSMatthew Dillon * must be encompassed in larger root-node radix. 707090df5aSMatthew Dillon * 71f565f395SEitan Adler * NOTE: the allocator cannot currently allocate more than 727090df5aSMatthew Dillon * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 737090df5aSMatthew Dillon * large' if you try. This is an area that could use improvement. The 747090df5aSMatthew Dillon * radix is large enough that this restriction does not effect the swap 757090df5aSMatthew Dillon * system, though. Currently only the allocation code is effected by 767090df5aSMatthew Dillon * this algorithmic unfeature. The freeing code can handle arbitrary 777090df5aSMatthew Dillon * ranges. 787090df5aSMatthew Dillon * 797090df5aSMatthew Dillon * This code can be compiled stand-alone for debugging. 807090df5aSMatthew Dillon */ 817090df5aSMatthew Dillon 82677b542eSDavid E. O'Brien #include <sys/cdefs.h> 83677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 84677b542eSDavid E. O'Brien 85c4473420SPeter Wemm #ifdef _KERNEL 867090df5aSMatthew Dillon 877090df5aSMatthew Dillon #include <sys/param.h> 887090df5aSMatthew Dillon #include <sys/systm.h> 897090df5aSMatthew Dillon #include <sys/lock.h> 907090df5aSMatthew Dillon #include <sys/kernel.h> 917090df5aSMatthew Dillon #include <sys/blist.h> 927090df5aSMatthew Dillon #include <sys/malloc.h> 930cddd8f0SMatthew Dillon #include <sys/proc.h> 9423955314SAlfred Perlstein #include <sys/mutex.h> 957090df5aSMatthew Dillon 967090df5aSMatthew Dillon #else 977090df5aSMatthew Dillon 987090df5aSMatthew Dillon #ifndef BLIST_NO_DEBUG 997090df5aSMatthew Dillon #define BLIST_DEBUG 1007090df5aSMatthew Dillon #endif 1017090df5aSMatthew Dillon 1027090df5aSMatthew Dillon #include <sys/types.h> 103d90bf7d8SAlan Cox #include <sys/malloc.h> 1047090df5aSMatthew Dillon #include <stdio.h> 1057090df5aSMatthew Dillon #include <string.h> 1067090df5aSMatthew Dillon #include <stdlib.h> 1077090df5aSMatthew Dillon #include <stdarg.h> 1087090df5aSMatthew Dillon 109*4be4fd5dSAlan Cox #define bitcount64(x) __bitcount64((uint64_t)(x)) 11092da00bbSMatthew Dillon #define malloc(a,b,c) calloc(a, 1) 1117090df5aSMatthew Dillon #define free(a,b) free(a) 1127090df5aSMatthew Dillon 1137090df5aSMatthew Dillon #include <sys/blist.h> 1147090df5aSMatthew Dillon 1157090df5aSMatthew Dillon void panic(const char *ctl, ...); 1167090df5aSMatthew Dillon 1177090df5aSMatthew Dillon #endif 1187090df5aSMatthew Dillon 1197090df5aSMatthew Dillon /* 1207090df5aSMatthew Dillon * static support functions 1217090df5aSMatthew Dillon */ 1227090df5aSMatthew Dillon 1237090df5aSMatthew Dillon static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 1247090df5aSMatthew Dillon static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 1257090df5aSMatthew Dillon daddr_t count, daddr_t radix, int skip); 1267090df5aSMatthew Dillon static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 1277090df5aSMatthew Dillon static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 1287090df5aSMatthew Dillon daddr_t radix, int skip, daddr_t blk); 1297090df5aSMatthew Dillon static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 1307090df5aSMatthew Dillon daddr_t skip, blist_t dest, daddr_t count); 131a7249a6cSAlan Cox static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 132a7249a6cSAlan Cox static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 13392da00bbSMatthew Dillon daddr_t radix, int skip, daddr_t blk); 1347090df5aSMatthew Dillon static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 1357090df5aSMatthew Dillon int skip, daddr_t count); 136c4473420SPeter Wemm #ifndef _KERNEL 1377090df5aSMatthew Dillon static void blst_radix_print(blmeta_t *scan, daddr_t blk, 1387090df5aSMatthew Dillon daddr_t radix, int skip, int tab); 1397090df5aSMatthew Dillon #endif 1407090df5aSMatthew Dillon 141c4473420SPeter Wemm #ifdef _KERNEL 1427090df5aSMatthew Dillon static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 1437090df5aSMatthew Dillon #endif 1447090df5aSMatthew Dillon 1457090df5aSMatthew Dillon /* 1467090df5aSMatthew Dillon * blist_create() - create a blist capable of handling up to the specified 1477090df5aSMatthew Dillon * number of blocks 1487090df5aSMatthew Dillon * 149f565f395SEitan Adler * blocks - must be greater than 0 150c8c7ad92SKip Macy * flags - malloc flags 1517090df5aSMatthew Dillon * 1527090df5aSMatthew Dillon * The smallest blist consists of a single leaf node capable of 1537090df5aSMatthew Dillon * managing BLIST_BMAP_RADIX blocks. 1547090df5aSMatthew Dillon */ 1557090df5aSMatthew Dillon 1567090df5aSMatthew Dillon blist_t 157c8c7ad92SKip Macy blist_create(daddr_t blocks, int flags) 1587090df5aSMatthew Dillon { 1597090df5aSMatthew Dillon blist_t bl; 160015d7db6SAlan Cox daddr_t nodes, radix; 1617090df5aSMatthew Dillon int skip = 0; 1627090df5aSMatthew Dillon 1637090df5aSMatthew Dillon /* 1647090df5aSMatthew Dillon * Calculate radix and skip field used for scanning. 1657090df5aSMatthew Dillon */ 1667090df5aSMatthew Dillon radix = BLIST_BMAP_RADIX; 1677090df5aSMatthew Dillon 1687090df5aSMatthew Dillon while (radix < blocks) { 16957e6d29bSMatthew Dillon radix *= BLIST_META_RADIX; 17057e6d29bSMatthew Dillon skip = (skip + 1) * BLIST_META_RADIX; 1717090df5aSMatthew Dillon } 1727090df5aSMatthew Dillon 173*4be4fd5dSAlan Cox bl = malloc(sizeof(struct blist), M_SWAP, flags); 174015d7db6SAlan Cox if (bl == NULL) 175015d7db6SAlan Cox return (NULL); 1767090df5aSMatthew Dillon 1777090df5aSMatthew Dillon bl->bl_blocks = blocks; 1787090df5aSMatthew Dillon bl->bl_radix = radix; 1797090df5aSMatthew Dillon bl->bl_skip = skip; 180015d7db6SAlan Cox nodes = 1 + blst_radix_init(NULL, radix, bl->bl_skip, blocks); 181015d7db6SAlan Cox bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 182015d7db6SAlan Cox if (bl->bl_root == NULL) { 183015d7db6SAlan Cox free(bl, M_SWAP); 184015d7db6SAlan Cox return (NULL); 185015d7db6SAlan Cox } 186015d7db6SAlan Cox blst_radix_init(bl->bl_root, radix, bl->bl_skip, blocks); 1877090df5aSMatthew Dillon 1887090df5aSMatthew Dillon #if defined(BLIST_DEBUG) 1897090df5aSMatthew Dillon printf( 19092da00bbSMatthew Dillon "BLIST representing %lld blocks (%lld MB of swap)" 19192da00bbSMatthew Dillon ", requiring %lldK of ram\n", 19292da00bbSMatthew Dillon (long long)bl->bl_blocks, 19392da00bbSMatthew Dillon (long long)bl->bl_blocks * 4 / 1024, 194015d7db6SAlan Cox (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 1957090df5aSMatthew Dillon ); 19692da00bbSMatthew Dillon printf("BLIST raw radix tree contains %lld records\n", 197015d7db6SAlan Cox (long long)nodes); 1987090df5aSMatthew Dillon #endif 1997090df5aSMatthew Dillon 2007090df5aSMatthew Dillon return (bl); 2017090df5aSMatthew Dillon } 2027090df5aSMatthew Dillon 2037090df5aSMatthew Dillon void 2047090df5aSMatthew Dillon blist_destroy(blist_t bl) 2057090df5aSMatthew Dillon { 2067090df5aSMatthew Dillon free(bl->bl_root, M_SWAP); 2077090df5aSMatthew Dillon free(bl, M_SWAP); 2087090df5aSMatthew Dillon } 2097090df5aSMatthew Dillon 2107090df5aSMatthew Dillon /* 2117090df5aSMatthew Dillon * blist_alloc() - reserve space in the block bitmap. Return the base 2127090df5aSMatthew Dillon * of a contiguous region or SWAPBLK_NONE if space could 2137090df5aSMatthew Dillon * not be allocated. 2147090df5aSMatthew Dillon */ 2157090df5aSMatthew Dillon 2167090df5aSMatthew Dillon daddr_t 2177090df5aSMatthew Dillon blist_alloc(blist_t bl, daddr_t count) 2187090df5aSMatthew Dillon { 219*4be4fd5dSAlan Cox daddr_t blk; 2207090df5aSMatthew Dillon 221*4be4fd5dSAlan Cox if (bl != NULL && count <= bl->bl_root->bm_bighint) { 2227090df5aSMatthew Dillon if (bl->bl_radix == BLIST_BMAP_RADIX) 2237090df5aSMatthew Dillon blk = blst_leaf_alloc(bl->bl_root, 0, count); 2247090df5aSMatthew Dillon else 225*4be4fd5dSAlan Cox blk = blst_meta_alloc(bl->bl_root, 0, count, 226*4be4fd5dSAlan Cox bl->bl_radix, bl->bl_skip); 2277090df5aSMatthew Dillon return (blk); 2287090df5aSMatthew Dillon } 229*4be4fd5dSAlan Cox return (SWAPBLK_NONE); 230*4be4fd5dSAlan Cox } 231*4be4fd5dSAlan Cox 232*4be4fd5dSAlan Cox /* 233*4be4fd5dSAlan Cox * blist_avail() - return the number of free blocks. 234*4be4fd5dSAlan Cox */ 235*4be4fd5dSAlan Cox 236*4be4fd5dSAlan Cox daddr_t 237*4be4fd5dSAlan Cox blist_avail(blist_t bl) 238*4be4fd5dSAlan Cox { 239*4be4fd5dSAlan Cox 240*4be4fd5dSAlan Cox if (bl->bl_radix == BLIST_BMAP_RADIX) 241*4be4fd5dSAlan Cox return (bitcount64(bl->bl_root->u.bmu_bitmap)); 242*4be4fd5dSAlan Cox else 243*4be4fd5dSAlan Cox return (bl->bl_root->u.bmu_avail); 244*4be4fd5dSAlan Cox } 2457090df5aSMatthew Dillon 2467090df5aSMatthew Dillon /* 2477090df5aSMatthew Dillon * blist_free() - free up space in the block bitmap. Return the base 2487090df5aSMatthew Dillon * of a contiguous region. Panic if an inconsistancy is 2497090df5aSMatthew Dillon * found. 2507090df5aSMatthew Dillon */ 2517090df5aSMatthew Dillon 2527090df5aSMatthew Dillon void 2537090df5aSMatthew Dillon blist_free(blist_t bl, daddr_t blkno, daddr_t count) 2547090df5aSMatthew Dillon { 2557090df5aSMatthew Dillon if (bl) { 2567090df5aSMatthew Dillon if (bl->bl_radix == BLIST_BMAP_RADIX) 2577090df5aSMatthew Dillon blst_leaf_free(bl->bl_root, blkno, count); 2587090df5aSMatthew Dillon else 259*4be4fd5dSAlan Cox blst_meta_free(bl->bl_root, blkno, count, 260*4be4fd5dSAlan Cox bl->bl_radix, bl->bl_skip, 0); 2617090df5aSMatthew Dillon } 2627090df5aSMatthew Dillon } 2637090df5aSMatthew Dillon 2647090df5aSMatthew Dillon /* 26592da00bbSMatthew Dillon * blist_fill() - mark a region in the block bitmap as off-limits 26692da00bbSMatthew Dillon * to the allocator (i.e. allocate it), ignoring any 26792da00bbSMatthew Dillon * existing allocations. Return the number of blocks 26892da00bbSMatthew Dillon * actually filled that were free before the call. 26992da00bbSMatthew Dillon */ 27092da00bbSMatthew Dillon 271a7249a6cSAlan Cox daddr_t 27292da00bbSMatthew Dillon blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 27392da00bbSMatthew Dillon { 274a7249a6cSAlan Cox daddr_t filled; 27592da00bbSMatthew Dillon 27692da00bbSMatthew Dillon if (bl) { 27792da00bbSMatthew Dillon if (bl->bl_radix == BLIST_BMAP_RADIX) 27892da00bbSMatthew Dillon filled = blst_leaf_fill(bl->bl_root, blkno, count); 27992da00bbSMatthew Dillon else 28092da00bbSMatthew Dillon filled = blst_meta_fill(bl->bl_root, blkno, count, 28192da00bbSMatthew Dillon bl->bl_radix, bl->bl_skip, 0); 282*4be4fd5dSAlan Cox return (filled); 283*4be4fd5dSAlan Cox } 284*4be4fd5dSAlan Cox return (0); 28592da00bbSMatthew Dillon } 28692da00bbSMatthew Dillon 28792da00bbSMatthew Dillon /* 2887090df5aSMatthew Dillon * blist_resize() - resize an existing radix tree to handle the 2897090df5aSMatthew Dillon * specified number of blocks. This will reallocate 2907090df5aSMatthew Dillon * the tree and transfer the previous bitmap to the new 2917090df5aSMatthew Dillon * one. When extending the tree you can specify whether 2927090df5aSMatthew Dillon * the new blocks are to left allocated or freed. 2937090df5aSMatthew Dillon */ 2947090df5aSMatthew Dillon 2957090df5aSMatthew Dillon void 296c8c7ad92SKip Macy blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 2977090df5aSMatthew Dillon { 298c8c7ad92SKip Macy blist_t newbl = blist_create(count, flags); 2997090df5aSMatthew Dillon blist_t save = *pbl; 3007090df5aSMatthew Dillon 3017090df5aSMatthew Dillon *pbl = newbl; 3027090df5aSMatthew Dillon if (count > save->bl_blocks) 3037090df5aSMatthew Dillon count = save->bl_blocks; 3047090df5aSMatthew Dillon blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 3057090df5aSMatthew Dillon 3067090df5aSMatthew Dillon /* 3077090df5aSMatthew Dillon * If resizing upwards, should we free the new space or not? 3087090df5aSMatthew Dillon */ 3097090df5aSMatthew Dillon if (freenew && count < newbl->bl_blocks) { 3107090df5aSMatthew Dillon blist_free(newbl, count, newbl->bl_blocks - count); 3117090df5aSMatthew Dillon } 3127090df5aSMatthew Dillon blist_destroy(save); 3137090df5aSMatthew Dillon } 3147090df5aSMatthew Dillon 3157090df5aSMatthew Dillon #ifdef BLIST_DEBUG 3167090df5aSMatthew Dillon 3177090df5aSMatthew Dillon /* 3187090df5aSMatthew Dillon * blist_print() - dump radix tree 3197090df5aSMatthew Dillon */ 3207090df5aSMatthew Dillon 3217090df5aSMatthew Dillon void 3227090df5aSMatthew Dillon blist_print(blist_t bl) 3237090df5aSMatthew Dillon { 3247090df5aSMatthew Dillon printf("BLIST {\n"); 3257090df5aSMatthew Dillon blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 3267090df5aSMatthew Dillon printf("}\n"); 3277090df5aSMatthew Dillon } 3287090df5aSMatthew Dillon 3297090df5aSMatthew Dillon #endif 3307090df5aSMatthew Dillon 3317090df5aSMatthew Dillon /************************************************************************ 3327090df5aSMatthew Dillon * ALLOCATION SUPPORT FUNCTIONS * 3337090df5aSMatthew Dillon ************************************************************************ 3347090df5aSMatthew Dillon * 3357090df5aSMatthew Dillon * These support functions do all the actual work. They may seem 3367090df5aSMatthew Dillon * rather longish, but that's because I've commented them up. The 3377090df5aSMatthew Dillon * actual code is straight forward. 3387090df5aSMatthew Dillon * 3397090df5aSMatthew Dillon */ 3407090df5aSMatthew Dillon 3417090df5aSMatthew Dillon /* 3427090df5aSMatthew Dillon * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 3437090df5aSMatthew Dillon * 3447090df5aSMatthew Dillon * This is the core of the allocator and is optimized for the 1 block 3457090df5aSMatthew Dillon * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 3467090df5aSMatthew Dillon * somewhat slower. The 1 block allocation case is log2 and extremely 3477090df5aSMatthew Dillon * quick. 3487090df5aSMatthew Dillon */ 3497090df5aSMatthew Dillon 3507090df5aSMatthew Dillon static daddr_t 3517090df5aSMatthew Dillon blst_leaf_alloc( 3527090df5aSMatthew Dillon blmeta_t *scan, 3537090df5aSMatthew Dillon daddr_t blk, 3547090df5aSMatthew Dillon int count 3557090df5aSMatthew Dillon ) { 3567090df5aSMatthew Dillon u_daddr_t orig = scan->u.bmu_bitmap; 3577090df5aSMatthew Dillon 3587090df5aSMatthew Dillon if (orig == 0) { 3597090df5aSMatthew Dillon /* 3607090df5aSMatthew Dillon * Optimize bitmap all-allocated case. Also, count = 1 3617090df5aSMatthew Dillon * case assumes at least 1 bit is free in the bitmap, so 3627090df5aSMatthew Dillon * we have to take care of this case here. 3637090df5aSMatthew Dillon */ 3647090df5aSMatthew Dillon scan->bm_bighint = 0; 3657090df5aSMatthew Dillon return(SWAPBLK_NONE); 3667090df5aSMatthew Dillon } 3677090df5aSMatthew Dillon if (count == 1) { 3687090df5aSMatthew Dillon /* 3697090df5aSMatthew Dillon * Optimized code to allocate one bit out of the bitmap 3707090df5aSMatthew Dillon */ 3717090df5aSMatthew Dillon u_daddr_t mask; 3727090df5aSMatthew Dillon int j = BLIST_BMAP_RADIX/2; 3737090df5aSMatthew Dillon int r = 0; 3747090df5aSMatthew Dillon 3757090df5aSMatthew Dillon mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 3767090df5aSMatthew Dillon 3777090df5aSMatthew Dillon while (j) { 3787090df5aSMatthew Dillon if ((orig & mask) == 0) { 3797090df5aSMatthew Dillon r += j; 3807090df5aSMatthew Dillon orig >>= j; 3817090df5aSMatthew Dillon } 3827090df5aSMatthew Dillon j >>= 1; 3837090df5aSMatthew Dillon mask >>= j; 3847090df5aSMatthew Dillon } 385064650c1SAlan Cox scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r); 3867090df5aSMatthew Dillon return(blk + r); 3877090df5aSMatthew Dillon } 3887090df5aSMatthew Dillon if (count <= BLIST_BMAP_RADIX) { 3897090df5aSMatthew Dillon /* 3907090df5aSMatthew Dillon * non-optimized code to allocate N bits out of the bitmap. 3917090df5aSMatthew Dillon * The more bits, the faster the code runs. It will run 3927090df5aSMatthew Dillon * the slowest allocating 2 bits, but since there aren't any 3937090df5aSMatthew Dillon * memory ops in the core loop (or shouldn't be, anyway), 3947090df5aSMatthew Dillon * you probably won't notice the difference. 3957090df5aSMatthew Dillon */ 3967090df5aSMatthew Dillon int j; 3977090df5aSMatthew Dillon int n = BLIST_BMAP_RADIX - count; 3987090df5aSMatthew Dillon u_daddr_t mask; 3997090df5aSMatthew Dillon 4007090df5aSMatthew Dillon mask = (u_daddr_t)-1 >> n; 4017090df5aSMatthew Dillon 4027090df5aSMatthew Dillon for (j = 0; j <= n; ++j) { 4037090df5aSMatthew Dillon if ((orig & mask) == mask) { 4047090df5aSMatthew Dillon scan->u.bmu_bitmap &= ~mask; 4057090df5aSMatthew Dillon return(blk + j); 4067090df5aSMatthew Dillon } 4077090df5aSMatthew Dillon mask = (mask << 1); 4087090df5aSMatthew Dillon } 4097090df5aSMatthew Dillon } 4107090df5aSMatthew Dillon /* 4117090df5aSMatthew Dillon * We couldn't allocate count in this subtree, update bighint. 4127090df5aSMatthew Dillon */ 4137090df5aSMatthew Dillon scan->bm_bighint = count - 1; 4147090df5aSMatthew Dillon return(SWAPBLK_NONE); 4157090df5aSMatthew Dillon } 4167090df5aSMatthew Dillon 4177090df5aSMatthew Dillon /* 4187090df5aSMatthew Dillon * blist_meta_alloc() - allocate at a meta in the radix tree. 4197090df5aSMatthew Dillon * 4207090df5aSMatthew Dillon * Attempt to allocate at a meta node. If we can't, we update 4217090df5aSMatthew Dillon * bighint and return a failure. Updating bighint optimize future 4227090df5aSMatthew Dillon * calls that hit this node. We have to check for our collapse cases 4237090df5aSMatthew Dillon * and we have a few optimizations strewn in as well. 4247090df5aSMatthew Dillon */ 4257090df5aSMatthew Dillon 4267090df5aSMatthew Dillon static daddr_t 4277090df5aSMatthew Dillon blst_meta_alloc( 4287090df5aSMatthew Dillon blmeta_t *scan, 4297090df5aSMatthew Dillon daddr_t blk, 4307090df5aSMatthew Dillon daddr_t count, 4317090df5aSMatthew Dillon daddr_t radix, 4327090df5aSMatthew Dillon int skip 4337090df5aSMatthew Dillon ) { 434*4be4fd5dSAlan Cox daddr_t r; 4357090df5aSMatthew Dillon int i; 43657e6d29bSMatthew Dillon int next_skip = ((u_int)skip / BLIST_META_RADIX); 4377090df5aSMatthew Dillon 438*4be4fd5dSAlan Cox if (scan->u.bmu_avail < count) { 4397090df5aSMatthew Dillon /* 440*4be4fd5dSAlan Cox * The meta node's hint must be too large if the allocation 441*4be4fd5dSAlan Cox * exceeds the number of free blocks. Reduce the hint, and 442*4be4fd5dSAlan Cox * return failure. 4437090df5aSMatthew Dillon */ 444*4be4fd5dSAlan Cox scan->bm_bighint = scan->u.bmu_avail; 4457090df5aSMatthew Dillon return (SWAPBLK_NONE); 4467090df5aSMatthew Dillon } 4477090df5aSMatthew Dillon 448*4be4fd5dSAlan Cox /* 449*4be4fd5dSAlan Cox * An ALL-FREE meta node requires special handling before allocating 450*4be4fd5dSAlan Cox * any of its blocks. 451*4be4fd5dSAlan Cox */ 4527090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 45357e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 4547090df5aSMatthew Dillon 4557090df5aSMatthew Dillon /* 456*4be4fd5dSAlan Cox * Reinitialize each of the meta node's children. An ALL-FREE 457*4be4fd5dSAlan Cox * meta node cannot have a terminator in any subtree. 4587090df5aSMatthew Dillon */ 4597090df5aSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 4607090df5aSMatthew Dillon if (next_skip == 1) { 4617090df5aSMatthew Dillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 4627090df5aSMatthew Dillon scan[i].bm_bighint = BLIST_BMAP_RADIX; 4637090df5aSMatthew Dillon } else { 4647090df5aSMatthew Dillon scan[i].bm_bighint = radix; 4657090df5aSMatthew Dillon scan[i].u.bmu_avail = radix; 4667090df5aSMatthew Dillon } 4677090df5aSMatthew Dillon } 4687090df5aSMatthew Dillon } else { 46957e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 4707090df5aSMatthew Dillon } 4717090df5aSMatthew Dillon 472*4be4fd5dSAlan Cox if (count > radix) { 473*4be4fd5dSAlan Cox /* 474*4be4fd5dSAlan Cox * The allocation exceeds the number of blocks that are 475*4be4fd5dSAlan Cox * managed by a subtree of this meta node. 476*4be4fd5dSAlan Cox */ 477*4be4fd5dSAlan Cox panic("allocation too large"); 478*4be4fd5dSAlan Cox } 4797090df5aSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 4807090df5aSMatthew Dillon if (count <= scan[i].bm_bighint) { 4817090df5aSMatthew Dillon /* 482*4be4fd5dSAlan Cox * The allocation might fit in the i'th subtree. 4837090df5aSMatthew Dillon */ 4847090df5aSMatthew Dillon if (next_skip == 1) { 4857090df5aSMatthew Dillon r = blst_leaf_alloc(&scan[i], blk, count); 4867090df5aSMatthew Dillon } else { 487*4be4fd5dSAlan Cox r = blst_meta_alloc(&scan[i], blk, count, 488*4be4fd5dSAlan Cox radix, next_skip - 1); 4897090df5aSMatthew Dillon } 4907090df5aSMatthew Dillon if (r != SWAPBLK_NONE) { 4917090df5aSMatthew Dillon scan->u.bmu_avail -= count; 4927090df5aSMatthew Dillon return (r); 4937090df5aSMatthew Dillon } 4947090df5aSMatthew Dillon } else if (scan[i].bm_bighint == (daddr_t)-1) { 4957090df5aSMatthew Dillon /* 4967090df5aSMatthew Dillon * Terminator 4977090df5aSMatthew Dillon */ 4987090df5aSMatthew Dillon break; 4997090df5aSMatthew Dillon } 5007090df5aSMatthew Dillon blk += radix; 5017090df5aSMatthew Dillon } 5027090df5aSMatthew Dillon 5037090df5aSMatthew Dillon /* 5047090df5aSMatthew Dillon * We couldn't allocate count in this subtree, update bighint. 5057090df5aSMatthew Dillon */ 5067090df5aSMatthew Dillon if (scan->bm_bighint >= count) 5077090df5aSMatthew Dillon scan->bm_bighint = count - 1; 5087090df5aSMatthew Dillon return(SWAPBLK_NONE); 5097090df5aSMatthew Dillon } 5107090df5aSMatthew Dillon 5117090df5aSMatthew Dillon /* 5127090df5aSMatthew Dillon * BLST_LEAF_FREE() - free allocated block from leaf bitmap 5137090df5aSMatthew Dillon * 5147090df5aSMatthew Dillon */ 5157090df5aSMatthew Dillon 5167090df5aSMatthew Dillon static void 5177090df5aSMatthew Dillon blst_leaf_free( 5187090df5aSMatthew Dillon blmeta_t *scan, 5197090df5aSMatthew Dillon daddr_t blk, 5207090df5aSMatthew Dillon int count 5217090df5aSMatthew Dillon ) { 5227090df5aSMatthew Dillon /* 5237090df5aSMatthew Dillon * free some data in this bitmap 5247090df5aSMatthew Dillon * 5257090df5aSMatthew Dillon * e.g. 5267090df5aSMatthew Dillon * 0000111111111110000 5277090df5aSMatthew Dillon * \_________/\__/ 5287090df5aSMatthew Dillon * v n 5297090df5aSMatthew Dillon */ 5307090df5aSMatthew Dillon int n = blk & (BLIST_BMAP_RADIX - 1); 5317090df5aSMatthew Dillon u_daddr_t mask; 5327090df5aSMatthew Dillon 5337090df5aSMatthew Dillon mask = ((u_daddr_t)-1 << n) & 5347090df5aSMatthew Dillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 5357090df5aSMatthew Dillon 5367090df5aSMatthew Dillon if (scan->u.bmu_bitmap & mask) 5377090df5aSMatthew Dillon panic("blst_radix_free: freeing free block"); 5387090df5aSMatthew Dillon scan->u.bmu_bitmap |= mask; 5397090df5aSMatthew Dillon 5407090df5aSMatthew Dillon /* 5417090df5aSMatthew Dillon * We could probably do a better job here. We are required to make 5427090df5aSMatthew Dillon * bighint at least as large as the biggest contiguous block of 5437090df5aSMatthew Dillon * data. If we just shoehorn it, a little extra overhead will 5447090df5aSMatthew Dillon * be incured on the next allocation (but only that one typically). 5457090df5aSMatthew Dillon */ 5467090df5aSMatthew Dillon scan->bm_bighint = BLIST_BMAP_RADIX; 5477090df5aSMatthew Dillon } 5487090df5aSMatthew Dillon 5497090df5aSMatthew Dillon /* 5507090df5aSMatthew Dillon * BLST_META_FREE() - free allocated blocks from radix tree meta info 5517090df5aSMatthew Dillon * 5527090df5aSMatthew Dillon * This support routine frees a range of blocks from the bitmap. 5537090df5aSMatthew Dillon * The range must be entirely enclosed by this radix node. If a 5547090df5aSMatthew Dillon * meta node, we break the range down recursively to free blocks 5557090df5aSMatthew Dillon * in subnodes (which means that this code can free an arbitrary 5567090df5aSMatthew Dillon * range whereas the allocation code cannot allocate an arbitrary 5577090df5aSMatthew Dillon * range). 5587090df5aSMatthew Dillon */ 5597090df5aSMatthew Dillon 5607090df5aSMatthew Dillon static void 5617090df5aSMatthew Dillon blst_meta_free( 5627090df5aSMatthew Dillon blmeta_t *scan, 5637090df5aSMatthew Dillon daddr_t freeBlk, 5647090df5aSMatthew Dillon daddr_t count, 5657090df5aSMatthew Dillon daddr_t radix, 5667090df5aSMatthew Dillon int skip, 5677090df5aSMatthew Dillon daddr_t blk 5687090df5aSMatthew Dillon ) { 5697090df5aSMatthew Dillon int i; 57057e6d29bSMatthew Dillon int next_skip = ((u_int)skip / BLIST_META_RADIX); 5717090df5aSMatthew Dillon 5727090df5aSMatthew Dillon #if 0 5731ede983cSDag-Erling Smørgrav printf("free (%llx,%lld) FROM (%llx,%lld)\n", 57492da00bbSMatthew Dillon (long long)freeBlk, (long long)count, 57592da00bbSMatthew Dillon (long long)blk, (long long)radix 5767090df5aSMatthew Dillon ); 5777090df5aSMatthew Dillon #endif 5787090df5aSMatthew Dillon 5797090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 5807090df5aSMatthew Dillon /* 5817090df5aSMatthew Dillon * ALL-ALLOCATED special case, with possible 5827090df5aSMatthew Dillon * shortcut to ALL-FREE special case. 5837090df5aSMatthew Dillon */ 5847090df5aSMatthew Dillon scan->u.bmu_avail = count; 5857090df5aSMatthew Dillon scan->bm_bighint = count; 5867090df5aSMatthew Dillon 5877090df5aSMatthew Dillon if (count != radix) { 5887090df5aSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 5897090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) 5907090df5aSMatthew Dillon break; 5917090df5aSMatthew Dillon scan[i].bm_bighint = 0; 5927090df5aSMatthew Dillon if (next_skip == 1) { 5937090df5aSMatthew Dillon scan[i].u.bmu_bitmap = 0; 5947090df5aSMatthew Dillon } else { 5957090df5aSMatthew Dillon scan[i].u.bmu_avail = 0; 5967090df5aSMatthew Dillon } 5977090df5aSMatthew Dillon } 5987090df5aSMatthew Dillon /* fall through */ 5997090df5aSMatthew Dillon } 6007090df5aSMatthew Dillon } else { 6017090df5aSMatthew Dillon scan->u.bmu_avail += count; 6027090df5aSMatthew Dillon /* scan->bm_bighint = radix; */ 6037090df5aSMatthew Dillon } 6047090df5aSMatthew Dillon 6057090df5aSMatthew Dillon /* 6067090df5aSMatthew Dillon * ALL-FREE special case. 6077090df5aSMatthew Dillon */ 6087090df5aSMatthew Dillon 6097090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) 6107090df5aSMatthew Dillon return; 6117090df5aSMatthew Dillon if (scan->u.bmu_avail > radix) 612bdc9a8d0SJohn Baldwin panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 613bdc9a8d0SJohn Baldwin (long long)count, (long long)scan->u.bmu_avail, 614bdc9a8d0SJohn Baldwin (long long)radix); 6157090df5aSMatthew Dillon 6167090df5aSMatthew Dillon /* 6177090df5aSMatthew Dillon * Break the free down into its components 6187090df5aSMatthew Dillon */ 6197090df5aSMatthew Dillon 62057e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 6217090df5aSMatthew Dillon 6227090df5aSMatthew Dillon i = (freeBlk - blk) / radix; 6237090df5aSMatthew Dillon blk += i * radix; 6247090df5aSMatthew Dillon i = i * next_skip + 1; 6257090df5aSMatthew Dillon 6267090df5aSMatthew Dillon while (i <= skip && blk < freeBlk + count) { 6277090df5aSMatthew Dillon daddr_t v; 6287090df5aSMatthew Dillon 6297090df5aSMatthew Dillon v = blk + radix - freeBlk; 6307090df5aSMatthew Dillon if (v > count) 6317090df5aSMatthew Dillon v = count; 6327090df5aSMatthew Dillon 6337090df5aSMatthew Dillon if (scan->bm_bighint == (daddr_t)-1) 6347090df5aSMatthew Dillon panic("blst_meta_free: freeing unexpected range"); 6357090df5aSMatthew Dillon 6367090df5aSMatthew Dillon if (next_skip == 1) { 6377090df5aSMatthew Dillon blst_leaf_free(&scan[i], freeBlk, v); 6387090df5aSMatthew Dillon } else { 6397090df5aSMatthew Dillon blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 6407090df5aSMatthew Dillon } 6417090df5aSMatthew Dillon if (scan->bm_bighint < scan[i].bm_bighint) 6427090df5aSMatthew Dillon scan->bm_bighint = scan[i].bm_bighint; 6437090df5aSMatthew Dillon count -= v; 6447090df5aSMatthew Dillon freeBlk += v; 6457090df5aSMatthew Dillon blk += radix; 6467090df5aSMatthew Dillon i += next_skip; 6477090df5aSMatthew Dillon } 6487090df5aSMatthew Dillon } 6497090df5aSMatthew Dillon 6507090df5aSMatthew Dillon /* 6517090df5aSMatthew Dillon * BLIST_RADIX_COPY() - copy one radix tree to another 6527090df5aSMatthew Dillon * 6537090df5aSMatthew Dillon * Locates free space in the source tree and frees it in the destination 6547090df5aSMatthew Dillon * tree. The space may not already be free in the destination. 6557090df5aSMatthew Dillon */ 6567090df5aSMatthew Dillon 6577090df5aSMatthew Dillon static void blst_copy( 6587090df5aSMatthew Dillon blmeta_t *scan, 6597090df5aSMatthew Dillon daddr_t blk, 6607090df5aSMatthew Dillon daddr_t radix, 6617090df5aSMatthew Dillon daddr_t skip, 6627090df5aSMatthew Dillon blist_t dest, 6637090df5aSMatthew Dillon daddr_t count 6647090df5aSMatthew Dillon ) { 6657090df5aSMatthew Dillon int next_skip; 6667090df5aSMatthew Dillon int i; 6677090df5aSMatthew Dillon 6687090df5aSMatthew Dillon /* 6697090df5aSMatthew Dillon * Leaf node 6707090df5aSMatthew Dillon */ 6717090df5aSMatthew Dillon 6727090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 6737090df5aSMatthew Dillon u_daddr_t v = scan->u.bmu_bitmap; 6747090df5aSMatthew Dillon 6757090df5aSMatthew Dillon if (v == (u_daddr_t)-1) { 6767090df5aSMatthew Dillon blist_free(dest, blk, count); 6777090df5aSMatthew Dillon } else if (v != 0) { 6787090df5aSMatthew Dillon int i; 6797090df5aSMatthew Dillon 6807090df5aSMatthew Dillon for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 681064650c1SAlan Cox if (v & ((u_daddr_t)1 << i)) 6827090df5aSMatthew Dillon blist_free(dest, blk + i, 1); 6837090df5aSMatthew Dillon } 6847090df5aSMatthew Dillon } 6857090df5aSMatthew Dillon return; 6867090df5aSMatthew Dillon } 6877090df5aSMatthew Dillon 6887090df5aSMatthew Dillon /* 6897090df5aSMatthew Dillon * Meta node 6907090df5aSMatthew Dillon */ 6917090df5aSMatthew Dillon 6927090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 6937090df5aSMatthew Dillon /* 6947090df5aSMatthew Dillon * Source all allocated, leave dest allocated 6957090df5aSMatthew Dillon */ 6967090df5aSMatthew Dillon return; 6977090df5aSMatthew Dillon } 6987090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 6997090df5aSMatthew Dillon /* 7007090df5aSMatthew Dillon * Source all free, free entire dest 7017090df5aSMatthew Dillon */ 7027090df5aSMatthew Dillon if (count < radix) 7037090df5aSMatthew Dillon blist_free(dest, blk, count); 7047090df5aSMatthew Dillon else 7057090df5aSMatthew Dillon blist_free(dest, blk, radix); 7067090df5aSMatthew Dillon return; 7077090df5aSMatthew Dillon } 7087090df5aSMatthew Dillon 7097090df5aSMatthew Dillon 71057e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 71157e6d29bSMatthew Dillon next_skip = ((u_int)skip / BLIST_META_RADIX); 7127090df5aSMatthew Dillon 7137090df5aSMatthew Dillon for (i = 1; count && i <= skip; i += next_skip) { 7147090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) 7157090df5aSMatthew Dillon break; 7167090df5aSMatthew Dillon 7177090df5aSMatthew Dillon if (count >= radix) { 7187090df5aSMatthew Dillon blst_copy( 7197090df5aSMatthew Dillon &scan[i], 7207090df5aSMatthew Dillon blk, 7217090df5aSMatthew Dillon radix, 7227090df5aSMatthew Dillon next_skip - 1, 7237090df5aSMatthew Dillon dest, 7247090df5aSMatthew Dillon radix 7257090df5aSMatthew Dillon ); 7267090df5aSMatthew Dillon count -= radix; 7277090df5aSMatthew Dillon } else { 7287090df5aSMatthew Dillon if (count) { 7297090df5aSMatthew Dillon blst_copy( 7307090df5aSMatthew Dillon &scan[i], 7317090df5aSMatthew Dillon blk, 7327090df5aSMatthew Dillon radix, 7337090df5aSMatthew Dillon next_skip - 1, 7347090df5aSMatthew Dillon dest, 7357090df5aSMatthew Dillon count 7367090df5aSMatthew Dillon ); 7377090df5aSMatthew Dillon } 7387090df5aSMatthew Dillon count = 0; 7397090df5aSMatthew Dillon } 7407090df5aSMatthew Dillon blk += radix; 7417090df5aSMatthew Dillon } 7427090df5aSMatthew Dillon } 7437090df5aSMatthew Dillon 7447090df5aSMatthew Dillon /* 74592da00bbSMatthew Dillon * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 74692da00bbSMatthew Dillon * 74792da00bbSMatthew Dillon * This routine allocates all blocks in the specified range 74892da00bbSMatthew Dillon * regardless of any existing allocations in that range. Returns 74992da00bbSMatthew Dillon * the number of blocks allocated by the call. 75092da00bbSMatthew Dillon */ 75192da00bbSMatthew Dillon 752a7249a6cSAlan Cox static daddr_t 75392da00bbSMatthew Dillon blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 75492da00bbSMatthew Dillon { 75592da00bbSMatthew Dillon int n = blk & (BLIST_BMAP_RADIX - 1); 756a7249a6cSAlan Cox daddr_t nblks; 757*4be4fd5dSAlan Cox u_daddr_t mask; 75892da00bbSMatthew Dillon 75992da00bbSMatthew Dillon mask = ((u_daddr_t)-1 << n) & 76092da00bbSMatthew Dillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 76192da00bbSMatthew Dillon 762*4be4fd5dSAlan Cox /* Count the number of blocks that we are allocating. */ 763*4be4fd5dSAlan Cox nblks = bitcount64(scan->u.bmu_bitmap & mask); 76492da00bbSMatthew Dillon 76592da00bbSMatthew Dillon scan->u.bmu_bitmap &= ~mask; 766*4be4fd5dSAlan Cox return (nblks); 76792da00bbSMatthew Dillon } 76892da00bbSMatthew Dillon 76992da00bbSMatthew Dillon /* 77092da00bbSMatthew Dillon * BLIST_META_FILL() - allocate specific blocks at a meta node 77192da00bbSMatthew Dillon * 77292da00bbSMatthew Dillon * This routine allocates the specified range of blocks, 77392da00bbSMatthew Dillon * regardless of any existing allocations in the range. The 77492da00bbSMatthew Dillon * range must be within the extent of this node. Returns the 77592da00bbSMatthew Dillon * number of blocks allocated by the call. 77692da00bbSMatthew Dillon */ 777a7249a6cSAlan Cox static daddr_t 77892da00bbSMatthew Dillon blst_meta_fill( 77992da00bbSMatthew Dillon blmeta_t *scan, 78092da00bbSMatthew Dillon daddr_t allocBlk, 78192da00bbSMatthew Dillon daddr_t count, 78292da00bbSMatthew Dillon daddr_t radix, 78392da00bbSMatthew Dillon int skip, 78492da00bbSMatthew Dillon daddr_t blk 78592da00bbSMatthew Dillon ) { 78692da00bbSMatthew Dillon int i; 78757e6d29bSMatthew Dillon int next_skip = ((u_int)skip / BLIST_META_RADIX); 788a7249a6cSAlan Cox daddr_t nblks = 0; 78992da00bbSMatthew Dillon 790*4be4fd5dSAlan Cox if (count > radix) { 791*4be4fd5dSAlan Cox /* 792*4be4fd5dSAlan Cox * The allocation exceeds the number of blocks that are 793*4be4fd5dSAlan Cox * managed by this meta node. 794*4be4fd5dSAlan Cox */ 795*4be4fd5dSAlan Cox panic("allocation too large"); 796*4be4fd5dSAlan Cox } 79792da00bbSMatthew Dillon if (count == radix || scan->u.bmu_avail == 0) { 79892da00bbSMatthew Dillon /* 79992da00bbSMatthew Dillon * ALL-ALLOCATED special case 80092da00bbSMatthew Dillon */ 80192da00bbSMatthew Dillon nblks = scan->u.bmu_avail; 80292da00bbSMatthew Dillon scan->u.bmu_avail = 0; 80386dd278fSAlan Cox scan->bm_bighint = 0; 80492da00bbSMatthew Dillon return nblks; 80592da00bbSMatthew Dillon } 80692da00bbSMatthew Dillon 807*4be4fd5dSAlan Cox /* 808*4be4fd5dSAlan Cox * An ALL-FREE meta node requires special handling before allocating 809*4be4fd5dSAlan Cox * any of its blocks. 810*4be4fd5dSAlan Cox */ 81192da00bbSMatthew Dillon if (scan->u.bmu_avail == radix) { 81257e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 81392da00bbSMatthew Dillon 81492da00bbSMatthew Dillon /* 815*4be4fd5dSAlan Cox * Reinitialize each of the meta node's children. An ALL-FREE 816*4be4fd5dSAlan Cox * meta node cannot have a terminator in any subtree. 81792da00bbSMatthew Dillon */ 81892da00bbSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 81992da00bbSMatthew Dillon if (next_skip == 1) { 82092da00bbSMatthew Dillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 82192da00bbSMatthew Dillon scan[i].bm_bighint = BLIST_BMAP_RADIX; 82292da00bbSMatthew Dillon } else { 82392da00bbSMatthew Dillon scan[i].bm_bighint = radix; 82492da00bbSMatthew Dillon scan[i].u.bmu_avail = radix; 82592da00bbSMatthew Dillon } 82692da00bbSMatthew Dillon } 82792da00bbSMatthew Dillon } else { 82857e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 82992da00bbSMatthew Dillon } 83092da00bbSMatthew Dillon 83192da00bbSMatthew Dillon i = (allocBlk - blk) / radix; 83292da00bbSMatthew Dillon blk += i * radix; 83392da00bbSMatthew Dillon i = i * next_skip + 1; 83492da00bbSMatthew Dillon 83592da00bbSMatthew Dillon while (i <= skip && blk < allocBlk + count) { 83692da00bbSMatthew Dillon daddr_t v; 83792da00bbSMatthew Dillon 83892da00bbSMatthew Dillon v = blk + radix - allocBlk; 83992da00bbSMatthew Dillon if (v > count) 84092da00bbSMatthew Dillon v = count; 84192da00bbSMatthew Dillon 84292da00bbSMatthew Dillon if (scan->bm_bighint == (daddr_t)-1) 84392da00bbSMatthew Dillon panic("blst_meta_fill: filling unexpected range"); 84492da00bbSMatthew Dillon 84592da00bbSMatthew Dillon if (next_skip == 1) { 84692da00bbSMatthew Dillon nblks += blst_leaf_fill(&scan[i], allocBlk, v); 84792da00bbSMatthew Dillon } else { 84892da00bbSMatthew Dillon nblks += blst_meta_fill(&scan[i], allocBlk, v, 84992da00bbSMatthew Dillon radix, next_skip - 1, blk); 85092da00bbSMatthew Dillon } 85192da00bbSMatthew Dillon count -= v; 85292da00bbSMatthew Dillon allocBlk += v; 85392da00bbSMatthew Dillon blk += radix; 85492da00bbSMatthew Dillon i += next_skip; 85592da00bbSMatthew Dillon } 85692da00bbSMatthew Dillon scan->u.bmu_avail -= nblks; 85792da00bbSMatthew Dillon return nblks; 85892da00bbSMatthew Dillon } 85992da00bbSMatthew Dillon 86092da00bbSMatthew Dillon /* 8617090df5aSMatthew Dillon * BLST_RADIX_INIT() - initialize radix tree 8627090df5aSMatthew Dillon * 8637090df5aSMatthew Dillon * Initialize our meta structures and bitmaps and calculate the exact 8647090df5aSMatthew Dillon * amount of space required to manage 'count' blocks - this space may 865f565f395SEitan Adler * be considerably less than the calculated radix due to the large 8667090df5aSMatthew Dillon * RADIX values we use. 8677090df5aSMatthew Dillon */ 8687090df5aSMatthew Dillon 8697090df5aSMatthew Dillon static daddr_t 8707090df5aSMatthew Dillon blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 8717090df5aSMatthew Dillon { 8727090df5aSMatthew Dillon int i; 8737090df5aSMatthew Dillon int next_skip; 8747090df5aSMatthew Dillon daddr_t memindex = 0; 8757090df5aSMatthew Dillon 8767090df5aSMatthew Dillon /* 8777090df5aSMatthew Dillon * Leaf node 8787090df5aSMatthew Dillon */ 8797090df5aSMatthew Dillon 8807090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 8817090df5aSMatthew Dillon if (scan) { 8827090df5aSMatthew Dillon scan->bm_bighint = 0; 8837090df5aSMatthew Dillon scan->u.bmu_bitmap = 0; 8847090df5aSMatthew Dillon } 8857090df5aSMatthew Dillon return(memindex); 8867090df5aSMatthew Dillon } 8877090df5aSMatthew Dillon 8887090df5aSMatthew Dillon /* 8897090df5aSMatthew Dillon * Meta node. If allocating the entire object we can special 8907090df5aSMatthew Dillon * case it. However, we need to figure out how much memory 8917090df5aSMatthew Dillon * is required to manage 'count' blocks, so we continue on anyway. 8927090df5aSMatthew Dillon */ 8937090df5aSMatthew Dillon 8947090df5aSMatthew Dillon if (scan) { 8957090df5aSMatthew Dillon scan->bm_bighint = 0; 8967090df5aSMatthew Dillon scan->u.bmu_avail = 0; 8977090df5aSMatthew Dillon } 8987090df5aSMatthew Dillon 89957e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 90057e6d29bSMatthew Dillon next_skip = ((u_int)skip / BLIST_META_RADIX); 9017090df5aSMatthew Dillon 9027090df5aSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 9037090df5aSMatthew Dillon if (count >= radix) { 9047090df5aSMatthew Dillon /* 9057090df5aSMatthew Dillon * Allocate the entire object 9067090df5aSMatthew Dillon */ 9077090df5aSMatthew Dillon memindex = i + blst_radix_init( 9087090df5aSMatthew Dillon ((scan) ? &scan[i] : NULL), 9097090df5aSMatthew Dillon radix, 9107090df5aSMatthew Dillon next_skip - 1, 9117090df5aSMatthew Dillon radix 9127090df5aSMatthew Dillon ); 9137090df5aSMatthew Dillon count -= radix; 9147090df5aSMatthew Dillon } else if (count > 0) { 9157090df5aSMatthew Dillon /* 9167090df5aSMatthew Dillon * Allocate a partial object 9177090df5aSMatthew Dillon */ 9187090df5aSMatthew Dillon memindex = i + blst_radix_init( 9197090df5aSMatthew Dillon ((scan) ? &scan[i] : NULL), 9207090df5aSMatthew Dillon radix, 9217090df5aSMatthew Dillon next_skip - 1, 9227090df5aSMatthew Dillon count 9237090df5aSMatthew Dillon ); 9247090df5aSMatthew Dillon count = 0; 9257090df5aSMatthew Dillon } else { 9267090df5aSMatthew Dillon /* 9277090df5aSMatthew Dillon * Add terminator and break out 9287090df5aSMatthew Dillon */ 9297090df5aSMatthew Dillon if (scan) 9307090df5aSMatthew Dillon scan[i].bm_bighint = (daddr_t)-1; 9317090df5aSMatthew Dillon break; 9327090df5aSMatthew Dillon } 9337090df5aSMatthew Dillon } 9347090df5aSMatthew Dillon if (memindex < i) 9357090df5aSMatthew Dillon memindex = i; 9367090df5aSMatthew Dillon return(memindex); 9377090df5aSMatthew Dillon } 9387090df5aSMatthew Dillon 9397090df5aSMatthew Dillon #ifdef BLIST_DEBUG 9407090df5aSMatthew Dillon 9417090df5aSMatthew Dillon static void 9427090df5aSMatthew Dillon blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 9437090df5aSMatthew Dillon { 9447090df5aSMatthew Dillon int i; 9457090df5aSMatthew Dillon int next_skip; 9467090df5aSMatthew Dillon int lastState = 0; 9477090df5aSMatthew Dillon 9487090df5aSMatthew Dillon if (radix == BLIST_BMAP_RADIX) { 9497090df5aSMatthew Dillon printf( 950d90bf7d8SAlan Cox "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 9517090df5aSMatthew Dillon tab, tab, "", 95292da00bbSMatthew Dillon (long long)blk, (long long)radix, 95392da00bbSMatthew Dillon (long long)scan->u.bmu_bitmap, 95492da00bbSMatthew Dillon (long long)scan->bm_bighint 9557090df5aSMatthew Dillon ); 9567090df5aSMatthew Dillon return; 9577090df5aSMatthew Dillon } 9587090df5aSMatthew Dillon 9597090df5aSMatthew Dillon if (scan->u.bmu_avail == 0) { 9607090df5aSMatthew Dillon printf( 96192da00bbSMatthew Dillon "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 9627090df5aSMatthew Dillon tab, tab, "", 96392da00bbSMatthew Dillon (long long)blk, 96492da00bbSMatthew Dillon (long long)radix 9657090df5aSMatthew Dillon ); 9667090df5aSMatthew Dillon return; 9677090df5aSMatthew Dillon } 9687090df5aSMatthew Dillon if (scan->u.bmu_avail == radix) { 9697090df5aSMatthew Dillon printf( 97092da00bbSMatthew Dillon "%*.*s(%08llx,%lld) ALL FREE\n", 9717090df5aSMatthew Dillon tab, tab, "", 97292da00bbSMatthew Dillon (long long)blk, 97392da00bbSMatthew Dillon (long long)radix 9747090df5aSMatthew Dillon ); 9757090df5aSMatthew Dillon return; 9767090df5aSMatthew Dillon } 9777090df5aSMatthew Dillon 9787090df5aSMatthew Dillon printf( 97992da00bbSMatthew Dillon "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 9807090df5aSMatthew Dillon tab, tab, "", 98192da00bbSMatthew Dillon (long long)blk, (long long)radix, 98292da00bbSMatthew Dillon (long long)scan->u.bmu_avail, 98392da00bbSMatthew Dillon (long long)radix, 98492da00bbSMatthew Dillon (long long)scan->bm_bighint 9857090df5aSMatthew Dillon ); 9867090df5aSMatthew Dillon 98757e6d29bSMatthew Dillon radix /= BLIST_META_RADIX; 98857e6d29bSMatthew Dillon next_skip = ((u_int)skip / BLIST_META_RADIX); 9897090df5aSMatthew Dillon tab += 4; 9907090df5aSMatthew Dillon 9917090df5aSMatthew Dillon for (i = 1; i <= skip; i += next_skip) { 9927090df5aSMatthew Dillon if (scan[i].bm_bighint == (daddr_t)-1) { 9937090df5aSMatthew Dillon printf( 99492da00bbSMatthew Dillon "%*.*s(%08llx,%lld): Terminator\n", 9957090df5aSMatthew Dillon tab, tab, "", 99692da00bbSMatthew Dillon (long long)blk, (long long)radix 9977090df5aSMatthew Dillon ); 9987090df5aSMatthew Dillon lastState = 0; 9997090df5aSMatthew Dillon break; 10007090df5aSMatthew Dillon } 10017090df5aSMatthew Dillon blst_radix_print( 10027090df5aSMatthew Dillon &scan[i], 10037090df5aSMatthew Dillon blk, 10047090df5aSMatthew Dillon radix, 10057090df5aSMatthew Dillon next_skip - 1, 10067090df5aSMatthew Dillon tab 10077090df5aSMatthew Dillon ); 10087090df5aSMatthew Dillon blk += radix; 10097090df5aSMatthew Dillon } 10107090df5aSMatthew Dillon tab -= 4; 10117090df5aSMatthew Dillon 10127090df5aSMatthew Dillon printf( 10137090df5aSMatthew Dillon "%*.*s}\n", 10147090df5aSMatthew Dillon tab, tab, "" 10157090df5aSMatthew Dillon ); 10167090df5aSMatthew Dillon } 10177090df5aSMatthew Dillon 10187090df5aSMatthew Dillon #endif 10197090df5aSMatthew Dillon 10207090df5aSMatthew Dillon #ifdef BLIST_DEBUG 10217090df5aSMatthew Dillon 10227090df5aSMatthew Dillon int 10237090df5aSMatthew Dillon main(int ac, char **av) 10247090df5aSMatthew Dillon { 10257090df5aSMatthew Dillon int size = 1024; 10267090df5aSMatthew Dillon int i; 10277090df5aSMatthew Dillon blist_t bl; 10287090df5aSMatthew Dillon 10297090df5aSMatthew Dillon for (i = 1; i < ac; ++i) { 10307090df5aSMatthew Dillon const char *ptr = av[i]; 10317090df5aSMatthew Dillon if (*ptr != '-') { 10327090df5aSMatthew Dillon size = strtol(ptr, NULL, 0); 10337090df5aSMatthew Dillon continue; 10347090df5aSMatthew Dillon } 10357090df5aSMatthew Dillon ptr += 2; 10367090df5aSMatthew Dillon fprintf(stderr, "Bad option: %s\n", ptr - 2); 10377090df5aSMatthew Dillon exit(1); 10387090df5aSMatthew Dillon } 1039c8c7ad92SKip Macy bl = blist_create(size, M_WAITOK); 10407090df5aSMatthew Dillon blist_free(bl, 0, size); 10417090df5aSMatthew Dillon 10427090df5aSMatthew Dillon for (;;) { 10437090df5aSMatthew Dillon char buf[1024]; 1044d90bf7d8SAlan Cox long long da = 0; 1045d90bf7d8SAlan Cox long long count = 0; 10467090df5aSMatthew Dillon 1047*4be4fd5dSAlan Cox printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 104892da00bbSMatthew Dillon (long long)size, (long long)bl->bl_radix); 10497090df5aSMatthew Dillon fflush(stdout); 10507090df5aSMatthew Dillon if (fgets(buf, sizeof(buf), stdin) == NULL) 10517090df5aSMatthew Dillon break; 10527090df5aSMatthew Dillon switch(buf[0]) { 10537090df5aSMatthew Dillon case 'r': 105492da00bbSMatthew Dillon if (sscanf(buf + 1, "%lld", &count) == 1) { 1055d90bf7d8SAlan Cox blist_resize(&bl, count, 1, M_WAITOK); 10567090df5aSMatthew Dillon } else { 10577090df5aSMatthew Dillon printf("?\n"); 10587090df5aSMatthew Dillon } 10597090df5aSMatthew Dillon case 'p': 10607090df5aSMatthew Dillon blist_print(bl); 10617090df5aSMatthew Dillon break; 10627090df5aSMatthew Dillon case 'a': 106392da00bbSMatthew Dillon if (sscanf(buf + 1, "%lld", &count) == 1) { 10647090df5aSMatthew Dillon daddr_t blk = blist_alloc(bl, count); 106592da00bbSMatthew Dillon printf(" R=%08llx\n", (long long)blk); 10667090df5aSMatthew Dillon } else { 10677090df5aSMatthew Dillon printf("?\n"); 10687090df5aSMatthew Dillon } 10697090df5aSMatthew Dillon break; 10707090df5aSMatthew Dillon case 'f': 1071d90bf7d8SAlan Cox if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 10727090df5aSMatthew Dillon blist_free(bl, da, count); 10737090df5aSMatthew Dillon } else { 10747090df5aSMatthew Dillon printf("?\n"); 10757090df5aSMatthew Dillon } 10767090df5aSMatthew Dillon break; 107792da00bbSMatthew Dillon case 'l': 1078d90bf7d8SAlan Cox if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1079a7249a6cSAlan Cox printf(" n=%jd\n", 1080a7249a6cSAlan Cox (intmax_t)blist_fill(bl, da, count)); 108192da00bbSMatthew Dillon } else { 108292da00bbSMatthew Dillon printf("?\n"); 108392da00bbSMatthew Dillon } 108492da00bbSMatthew Dillon break; 10857090df5aSMatthew Dillon case '?': 10867090df5aSMatthew Dillon case 'h': 10877090df5aSMatthew Dillon puts( 10887090df5aSMatthew Dillon "p -print\n" 10897090df5aSMatthew Dillon "a %d -allocate\n" 10907090df5aSMatthew Dillon "f %x %d -free\n" 109192da00bbSMatthew Dillon "l %x %d -fill\n" 10927090df5aSMatthew Dillon "r %d -resize\n" 10937090df5aSMatthew Dillon "h/? -help" 10947090df5aSMatthew Dillon ); 10957090df5aSMatthew Dillon break; 10967090df5aSMatthew Dillon default: 10977090df5aSMatthew Dillon printf("?\n"); 10987090df5aSMatthew Dillon break; 10997090df5aSMatthew Dillon } 11007090df5aSMatthew Dillon } 11017090df5aSMatthew Dillon return(0); 11027090df5aSMatthew Dillon } 11037090df5aSMatthew Dillon 11047090df5aSMatthew Dillon void 11057090df5aSMatthew Dillon panic(const char *ctl, ...) 11067090df5aSMatthew Dillon { 11077090df5aSMatthew Dillon va_list va; 11087090df5aSMatthew Dillon 11097090df5aSMatthew Dillon va_start(va, ctl); 11107090df5aSMatthew Dillon vfprintf(stderr, ctl, va); 11117090df5aSMatthew Dillon fprintf(stderr, "\n"); 11127090df5aSMatthew Dillon va_end(va); 11137090df5aSMatthew Dillon exit(1); 11147090df5aSMatthew Dillon } 11157090df5aSMatthew Dillon 11167090df5aSMatthew Dillon #endif 11177090df5aSMatthew Dillon 1118