xref: /freebsd/sys/kern/subr_blist.c (revision 51369649b03ece2aed3eb61b0c8214b9aa5b2fa2)
106b4bf3eSWarner Losh /*-
2*51369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
3*51369649SPedro F. Giffuni  *
406b4bf3eSWarner Losh  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
506b4bf3eSWarner Losh  * Redistribution and use in source and binary forms, with or without
606b4bf3eSWarner Losh  * modification, are permitted provided that the following conditions
706b4bf3eSWarner Losh  * are met:
806b4bf3eSWarner Losh  * 1. Redistributions of source code must retain the above copyright
906b4bf3eSWarner Losh  *    notice, this list of conditions and the following disclaimer.
1006b4bf3eSWarner Losh  * 2. Redistributions in binary form must reproduce the above copyright
1106b4bf3eSWarner Losh  *    notice, this list of conditions and the following disclaimer in the
1206b4bf3eSWarner Losh  *    documentation and/or other materials provided with the distribution.
1369a28758SEd Maste  * 3. Neither the name of the University nor the names of its contributors
1406b4bf3eSWarner Losh  *    may be used to endorse or promote products derived from this software
1506b4bf3eSWarner Losh  *    without specific prior written permission.
1606b4bf3eSWarner Losh  *
1706b4bf3eSWarner Losh  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1806b4bf3eSWarner Losh  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1906b4bf3eSWarner Losh  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2006b4bf3eSWarner Losh  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
2106b4bf3eSWarner Losh  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2206b4bf3eSWarner Losh  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
2306b4bf3eSWarner Losh  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2406b4bf3eSWarner Losh  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
2506b4bf3eSWarner Losh  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2606b4bf3eSWarner Losh  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2706b4bf3eSWarner Losh  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2806b4bf3eSWarner Losh  */
297090df5aSMatthew Dillon /*
307090df5aSMatthew Dillon  * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
317090df5aSMatthew Dillon  *
327090df5aSMatthew Dillon  *	This module implements a general bitmap allocator/deallocator.  The
337090df5aSMatthew Dillon  *	allocator eats around 2 bits per 'block'.  The module does not
34f565f395SEitan Adler  *	try to interpret the meaning of a 'block' other than to return
357090df5aSMatthew Dillon  *	SWAPBLK_NONE on an allocation failure.
367090df5aSMatthew Dillon  *
37ec371b57SAlan Cox  *	A radix tree controls access to pieces of the bitmap, and includes
38ec371b57SAlan Cox  *	auxiliary information at each interior node about the availabilty of
39ec371b57SAlan Cox  *	contiguous free blocks in the subtree rooted at that node.  Two radix
40ec371b57SAlan Cox  *	constants are involved: one for the size of the bitmaps contained in the
41ec371b57SAlan Cox  *	leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
42ec371b57SAlan Cox  *	each of the meta (interior) nodes (BLIST_META_RADIX).  Each subtree is
43ec371b57SAlan Cox  *	associated with a range of blocks.  The root of any subtree stores a
44ec371b57SAlan Cox  *	hint field that defines an upper bound on the size of the largest
45ec371b57SAlan Cox  *	allocation that can begin in the associated block range.  A hint is an
46ec371b57SAlan Cox  *	upper bound on a potential allocation, but not necessarily a tight upper
47ec371b57SAlan Cox  *	bound.
487090df5aSMatthew Dillon  *
497090df5aSMatthew Dillon  *	The radix tree also implements two collapsed states for meta nodes:
507090df5aSMatthew Dillon  *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
517090df5aSMatthew Dillon  *	in either of these two states, all information contained underneath
527090df5aSMatthew Dillon  *	the node is considered stale.  These states are used to optimize
537090df5aSMatthew Dillon  *	allocation and freeing operations.
547090df5aSMatthew Dillon  *
557090df5aSMatthew Dillon  * 	The hinting greatly increases code efficiency for allocations while
567090df5aSMatthew Dillon  *	the general radix structure optimizes both allocations and frees.  The
577090df5aSMatthew Dillon  *	radix tree should be able to operate well no matter how much
587090df5aSMatthew Dillon  *	fragmentation there is and no matter how large a bitmap is used.
597090df5aSMatthew Dillon  *
6051dc4feaSSergey Kandaurov  *	The blist code wires all necessary memory at creation time.  Neither
6151dc4feaSSergey Kandaurov  *	allocations nor frees require interaction with the memory subsystem.
6251dc4feaSSergey Kandaurov  *	The non-blocking features of the blist code are used in the swap code
6351dc4feaSSergey Kandaurov  *	(vm/swap_pager.c).
647090df5aSMatthew Dillon  *
65e3043798SPedro F. Giffuni  *	LAYOUT: The radix tree is laid out recursively using a
66e3043798SPedro F. Giffuni  *	linear array.  Each meta node is immediately followed (laid out
677090df5aSMatthew Dillon  *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
687090df5aSMatthew Dillon  *	is a recursive structure but one that can be easily scanned through
697090df5aSMatthew Dillon  *	a very simple 'skip' calculation.  In order to support large radixes,
707090df5aSMatthew Dillon  *	portions of the tree may reside outside our memory allocation.  We
717090df5aSMatthew Dillon  *	handle this with an early-termination optimization (when bighint is
727090df5aSMatthew Dillon  *	set to -1) on the scan.  The memory allocation is only large enough
737090df5aSMatthew Dillon  *	to cover the number of blocks requested at creation time even if it
747090df5aSMatthew Dillon  *	must be encompassed in larger root-node radix.
757090df5aSMatthew Dillon  *
76f565f395SEitan Adler  *	NOTE: the allocator cannot currently allocate more than
777090df5aSMatthew Dillon  *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
787090df5aSMatthew Dillon  *	large' if you try.  This is an area that could use improvement.  The
797090df5aSMatthew Dillon  *	radix is large enough that this restriction does not effect the swap
80b411efa4SAlan Cox  *	system, though.  Currently only the allocation code is affected by
817090df5aSMatthew Dillon  *	this algorithmic unfeature.  The freeing code can handle arbitrary
827090df5aSMatthew Dillon  *	ranges.
837090df5aSMatthew Dillon  *
847090df5aSMatthew Dillon  *	This code can be compiled stand-alone for debugging.
857090df5aSMatthew Dillon  */
867090df5aSMatthew Dillon 
87677b542eSDavid E. O'Brien #include <sys/cdefs.h>
88677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
89677b542eSDavid E. O'Brien 
90c4473420SPeter Wemm #ifdef _KERNEL
917090df5aSMatthew Dillon 
927090df5aSMatthew Dillon #include <sys/param.h>
937090df5aSMatthew Dillon #include <sys/systm.h>
947090df5aSMatthew Dillon #include <sys/lock.h>
957090df5aSMatthew Dillon #include <sys/kernel.h>
967090df5aSMatthew Dillon #include <sys/blist.h>
977090df5aSMatthew Dillon #include <sys/malloc.h>
98d027ed2eSAlan Cox #include <sys/sbuf.h>
990cddd8f0SMatthew Dillon #include <sys/proc.h>
10023955314SAlfred Perlstein #include <sys/mutex.h>
1017090df5aSMatthew Dillon 
1027090df5aSMatthew Dillon #else
1037090df5aSMatthew Dillon 
1047090df5aSMatthew Dillon #ifndef BLIST_NO_DEBUG
1057090df5aSMatthew Dillon #define BLIST_DEBUG
1067090df5aSMatthew Dillon #endif
1077090df5aSMatthew Dillon 
1087090df5aSMatthew Dillon #include <sys/types.h>
109d90bf7d8SAlan Cox #include <sys/malloc.h>
110d027ed2eSAlan Cox #include <sys/sbuf.h>
1117090df5aSMatthew Dillon #include <stdio.h>
1127090df5aSMatthew Dillon #include <string.h>
1138eefcd40SAlan Cox #include <stddef.h>
1147090df5aSMatthew Dillon #include <stdlib.h>
1157090df5aSMatthew Dillon #include <stdarg.h>
116d3783724SAlan Cox #include <stdbool.h>
1177090df5aSMatthew Dillon 
1184be4fd5dSAlan Cox #define	bitcount64(x)	__bitcount64((uint64_t)(x))
11992da00bbSMatthew Dillon #define malloc(a,b,c)	calloc(a, 1)
1207090df5aSMatthew Dillon #define free(a,b)	free(a)
121ec371b57SAlan Cox static __inline int imax(int a, int b) { return (a > b ? a : b); }
1227090df5aSMatthew Dillon 
1237090df5aSMatthew Dillon #include <sys/blist.h>
1247090df5aSMatthew Dillon 
1257090df5aSMatthew Dillon void panic(const char *ctl, ...);
1267090df5aSMatthew Dillon 
1277090df5aSMatthew Dillon #endif
1287090df5aSMatthew Dillon 
1297090df5aSMatthew Dillon /*
1307090df5aSMatthew Dillon  * static support functions
1317090df5aSMatthew Dillon  */
132ec371b57SAlan Cox static daddr_t	blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
133bee93d3cSAlan Cox static daddr_t	blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
134bee93d3cSAlan Cox 		    u_daddr_t radix);
1357090df5aSMatthew Dillon static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
1367090df5aSMatthew Dillon static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
137bee93d3cSAlan Cox 		    u_daddr_t radix);
1387090df5aSMatthew Dillon static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
1392ac0c7c3SAlan Cox 		    blist_t dest, daddr_t count);
140a7249a6cSAlan Cox static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
141a7249a6cSAlan Cox static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
142bee93d3cSAlan Cox 		    u_daddr_t radix);
143c4473420SPeter Wemm #ifndef _KERNEL
144d3783724SAlan Cox static void	blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
1452ac0c7c3SAlan Cox 		    int tab);
1467090df5aSMatthew Dillon #endif
1477090df5aSMatthew Dillon 
148c4473420SPeter Wemm #ifdef _KERNEL
1497090df5aSMatthew Dillon static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
1507090df5aSMatthew Dillon #endif
1517090df5aSMatthew Dillon 
152ec371b57SAlan Cox _Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
153ec371b57SAlan Cox     "radix divisibility error");
154ec371b57SAlan Cox #define	BLIST_BMAP_MASK	(BLIST_BMAP_RADIX - 1)
155d027ed2eSAlan Cox #define	BLIST_META_MASK	(BLIST_META_RADIX - 1)
156ba98e6a2SAlan Cox 
1577090df5aSMatthew Dillon /*
1582ac0c7c3SAlan Cox  * For a subtree that can represent the state of up to 'radix' blocks, the
1592ac0c7c3SAlan Cox  * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX.  If 'm'
1602ac0c7c3SAlan Cox  * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
1612ac0c7c3SAlan Cox  * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
1622ac0c7c3SAlan Cox  * or, equivalently, (m**(h+1)-1)/(m-1).  This quantity is called 'skip'
1632ac0c7c3SAlan Cox  * in the 'meta' functions that process subtrees.  Since integer division
1642ac0c7c3SAlan Cox  * discards remainders, we can express this computation as
1652ac0c7c3SAlan Cox  * skip = (m * m**h) / (m - 1)
166ba98e6a2SAlan Cox  * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
167ba98e6a2SAlan Cox  * and since m divides BLIST_BMAP_RADIX, we can simplify further to
168ba98e6a2SAlan Cox  * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
169ba98e6a2SAlan Cox  * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
170ba98e6a2SAlan Cox  * so that simple integer division by a constant can safely be used for the
171ba98e6a2SAlan Cox  * calculation.
1722ac0c7c3SAlan Cox  */
1732ac0c7c3SAlan Cox static inline daddr_t
1742ac0c7c3SAlan Cox radix_to_skip(daddr_t radix)
1752ac0c7c3SAlan Cox {
1762ac0c7c3SAlan Cox 
1772ac0c7c3SAlan Cox 	return (radix /
178d027ed2eSAlan Cox 	    ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
179d027ed2eSAlan Cox }
180d027ed2eSAlan Cox 
181d027ed2eSAlan Cox /*
182d027ed2eSAlan Cox  * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
183d027ed2eSAlan Cox  * Assumes that the argument has only one bit set.
184d027ed2eSAlan Cox  */
185d027ed2eSAlan Cox static inline int
186d027ed2eSAlan Cox bitpos(u_daddr_t mask)
187d027ed2eSAlan Cox {
188d027ed2eSAlan Cox 	int hi, lo, mid;
189d027ed2eSAlan Cox 
190d027ed2eSAlan Cox 	switch (sizeof(mask)) {
191d027ed2eSAlan Cox #ifdef HAVE_INLINE_FFSLL
192d027ed2eSAlan Cox 	case sizeof(long long):
193d027ed2eSAlan Cox 		return (ffsll(mask) - 1);
194d027ed2eSAlan Cox #endif
195d027ed2eSAlan Cox 	default:
196d027ed2eSAlan Cox 		lo = 0;
197d027ed2eSAlan Cox 		hi = BLIST_BMAP_RADIX;
198d027ed2eSAlan Cox 		while (lo + 1 < hi) {
199d027ed2eSAlan Cox 			mid = (lo + hi) >> 1;
200d027ed2eSAlan Cox 			if ((mask >> mid) != 0)
201d027ed2eSAlan Cox 				lo = mid;
202d027ed2eSAlan Cox 			else
203d027ed2eSAlan Cox 				hi = mid;
204d027ed2eSAlan Cox 		}
205d027ed2eSAlan Cox 		return (lo);
206d027ed2eSAlan Cox 	}
2072ac0c7c3SAlan Cox }
2082ac0c7c3SAlan Cox 
2092ac0c7c3SAlan Cox /*
2107090df5aSMatthew Dillon  * blist_create() - create a blist capable of handling up to the specified
2117090df5aSMatthew Dillon  *		    number of blocks
2127090df5aSMatthew Dillon  *
213f565f395SEitan Adler  *	blocks - must be greater than 0
214c8c7ad92SKip Macy  * 	flags  - malloc flags
2157090df5aSMatthew Dillon  *
2167090df5aSMatthew Dillon  *	The smallest blist consists of a single leaf node capable of
2177090df5aSMatthew Dillon  *	managing BLIST_BMAP_RADIX blocks.
2187090df5aSMatthew Dillon  */
2197090df5aSMatthew Dillon blist_t
220c8c7ad92SKip Macy blist_create(daddr_t blocks, int flags)
2217090df5aSMatthew Dillon {
2227090df5aSMatthew Dillon 	blist_t bl;
2238eefcd40SAlan Cox 	daddr_t i, last_block;
2248eefcd40SAlan Cox 	u_daddr_t nodes, radix, skip;
2258eefcd40SAlan Cox 	int digit;
2267090df5aSMatthew Dillon 
2277090df5aSMatthew Dillon 	/*
2288eefcd40SAlan Cox 	 * Calculate the radix and node count used for scanning.  Find the last
2298eefcd40SAlan Cox 	 * block that is followed by a terminator.
2307090df5aSMatthew Dillon 	 */
2318eefcd40SAlan Cox 	last_block = blocks - 1;
2327090df5aSMatthew Dillon 	radix = BLIST_BMAP_RADIX;
2337090df5aSMatthew Dillon 	while (radix < blocks) {
2348eefcd40SAlan Cox 		if (((last_block / radix + 1) & BLIST_META_MASK) != 0)
2358eefcd40SAlan Cox 			/*
2368eefcd40SAlan Cox 			 * A terminator will be added.  Update last_block to the
2378eefcd40SAlan Cox 			 * position just before that terminator.
2388eefcd40SAlan Cox 			 */
2398eefcd40SAlan Cox 			last_block |= radix - 1;
24057e6d29bSMatthew Dillon 		radix *= BLIST_META_RADIX;
2417090df5aSMatthew Dillon 	}
2427090df5aSMatthew Dillon 
2438eefcd40SAlan Cox 	/*
2448eefcd40SAlan Cox 	 * Count the meta-nodes in the expanded tree, including the final
2458eefcd40SAlan Cox 	 * terminator, from the bottom level up to the root.
2468eefcd40SAlan Cox 	 */
2478eefcd40SAlan Cox 	nodes = (last_block >= blocks) ? 2 : 1;
2488eefcd40SAlan Cox 	last_block /= BLIST_BMAP_RADIX;
2498eefcd40SAlan Cox 	while (last_block > 0) {
2508eefcd40SAlan Cox 		nodes += last_block + 1;
2518eefcd40SAlan Cox 		last_block /= BLIST_META_RADIX;
2528eefcd40SAlan Cox 	}
25303ca2137SAlan Cox 	bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
25403ca2137SAlan Cox 	    M_ZERO);
255015d7db6SAlan Cox 	if (bl == NULL)
256015d7db6SAlan Cox 		return (NULL);
2577090df5aSMatthew Dillon 
2587090df5aSMatthew Dillon 	bl->bl_blocks = blocks;
2597090df5aSMatthew Dillon 	bl->bl_radix = radix;
260d4e3484bSAlan Cox 	bl->bl_cursor = 0;
2618eefcd40SAlan Cox 
2628eefcd40SAlan Cox 	/*
2638eefcd40SAlan Cox 	 * Initialize the empty tree by filling in root values, then initialize
2648eefcd40SAlan Cox 	 * just the terminators in the rest of the tree.
2658eefcd40SAlan Cox 	 */
2668eefcd40SAlan Cox 	bl->bl_root[0].bm_bighint = 0;
2678eefcd40SAlan Cox 	if (radix == BLIST_BMAP_RADIX)
2688eefcd40SAlan Cox 		bl->bl_root[0].u.bmu_bitmap = 0;
2698eefcd40SAlan Cox 	else
2708eefcd40SAlan Cox 		bl->bl_root[0].u.bmu_avail = 0;
2718eefcd40SAlan Cox 	last_block = blocks - 1;
2728eefcd40SAlan Cox 	i = 0;
2738eefcd40SAlan Cox 	while (radix > BLIST_BMAP_RADIX) {
2748eefcd40SAlan Cox 		radix /= BLIST_META_RADIX;
2758eefcd40SAlan Cox 		skip = radix_to_skip(radix);
2768eefcd40SAlan Cox 		digit = last_block / radix;
2778eefcd40SAlan Cox 		i += 1 + digit * skip;
2788eefcd40SAlan Cox 		if (digit != BLIST_META_MASK) {
2798eefcd40SAlan Cox 			/*
2808eefcd40SAlan Cox 			 * Add a terminator.
2818eefcd40SAlan Cox 			 */
2828eefcd40SAlan Cox 			bl->bl_root[i + skip].bm_bighint = (daddr_t)-1;
2838eefcd40SAlan Cox 			bl->bl_root[i + skip].u.bmu_bitmap = 0;
284015d7db6SAlan Cox 		}
2858eefcd40SAlan Cox 		last_block %= radix;
2868eefcd40SAlan Cox 	}
2877090df5aSMatthew Dillon 
2887090df5aSMatthew Dillon #if defined(BLIST_DEBUG)
2897090df5aSMatthew Dillon 	printf(
29092da00bbSMatthew Dillon 		"BLIST representing %lld blocks (%lld MB of swap)"
29192da00bbSMatthew Dillon 		", requiring %lldK of ram\n",
29292da00bbSMatthew Dillon 		(long long)bl->bl_blocks,
29392da00bbSMatthew Dillon 		(long long)bl->bl_blocks * 4 / 1024,
294015d7db6SAlan Cox 		(long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
2957090df5aSMatthew Dillon 	);
29692da00bbSMatthew Dillon 	printf("BLIST raw radix tree contains %lld records\n",
297015d7db6SAlan Cox 	    (long long)nodes);
2987090df5aSMatthew Dillon #endif
2997090df5aSMatthew Dillon 
3007090df5aSMatthew Dillon 	return (bl);
3017090df5aSMatthew Dillon }
3027090df5aSMatthew Dillon 
3037090df5aSMatthew Dillon void
3047090df5aSMatthew Dillon blist_destroy(blist_t bl)
3057090df5aSMatthew Dillon {
3068eefcd40SAlan Cox 
3077090df5aSMatthew Dillon 	free(bl, M_SWAP);
3087090df5aSMatthew Dillon }
3097090df5aSMatthew Dillon 
3107090df5aSMatthew Dillon /*
3117090df5aSMatthew Dillon  * blist_alloc() -   reserve space in the block bitmap.  Return the base
3127090df5aSMatthew Dillon  *		     of a contiguous region or SWAPBLK_NONE if space could
3137090df5aSMatthew Dillon  *		     not be allocated.
3147090df5aSMatthew Dillon  */
3157090df5aSMatthew Dillon daddr_t
3167090df5aSMatthew Dillon blist_alloc(blist_t bl, daddr_t count)
3177090df5aSMatthew Dillon {
3184be4fd5dSAlan Cox 	daddr_t blk;
3197090df5aSMatthew Dillon 
320d4e3484bSAlan Cox 	/*
321d4e3484bSAlan Cox 	 * This loop iterates at most twice.  An allocation failure in the
322d4e3484bSAlan Cox 	 * first iteration leads to a second iteration only if the cursor was
323d4e3484bSAlan Cox 	 * non-zero.  When the cursor is zero, an allocation failure will
324d4e3484bSAlan Cox 	 * reduce the hint, stopping further iterations.
325d4e3484bSAlan Cox 	 */
326d4e3484bSAlan Cox 	while (count <= bl->bl_root->bm_bighint) {
327bee93d3cSAlan Cox 		blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
328bee93d3cSAlan Cox 		    bl->bl_radix);
329d4e3484bSAlan Cox 		if (blk != SWAPBLK_NONE) {
330d4e3484bSAlan Cox 			bl->bl_cursor = blk + count;
331a0ae476bSAlan Cox 			if (bl->bl_cursor == bl->bl_blocks)
332a0ae476bSAlan Cox 				bl->bl_cursor = 0;
3337090df5aSMatthew Dillon 			return (blk);
334d4e3484bSAlan Cox 		} else if (bl->bl_cursor != 0)
335d4e3484bSAlan Cox 			bl->bl_cursor = 0;
3367090df5aSMatthew Dillon 	}
3374be4fd5dSAlan Cox 	return (SWAPBLK_NONE);
3384be4fd5dSAlan Cox }
3394be4fd5dSAlan Cox 
3404be4fd5dSAlan Cox /*
3414be4fd5dSAlan Cox  * blist_avail() -	return the number of free blocks.
3424be4fd5dSAlan Cox  */
3434be4fd5dSAlan Cox daddr_t
3444be4fd5dSAlan Cox blist_avail(blist_t bl)
3454be4fd5dSAlan Cox {
3464be4fd5dSAlan Cox 
3474be4fd5dSAlan Cox 	if (bl->bl_radix == BLIST_BMAP_RADIX)
3484be4fd5dSAlan Cox 		return (bitcount64(bl->bl_root->u.bmu_bitmap));
3494be4fd5dSAlan Cox 	else
3504be4fd5dSAlan Cox 		return (bl->bl_root->u.bmu_avail);
3514be4fd5dSAlan Cox }
3527090df5aSMatthew Dillon 
3537090df5aSMatthew Dillon /*
3547090df5aSMatthew Dillon  * blist_free() -	free up space in the block bitmap.  Return the base
3557090df5aSMatthew Dillon  *		     	of a contiguous region.  Panic if an inconsistancy is
3567090df5aSMatthew Dillon  *			found.
3577090df5aSMatthew Dillon  */
3587090df5aSMatthew Dillon void
3597090df5aSMatthew Dillon blist_free(blist_t bl, daddr_t blkno, daddr_t count)
3607090df5aSMatthew Dillon {
361a396c83aSAlan Cox 
362bee93d3cSAlan Cox 	blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
3637090df5aSMatthew Dillon }
3647090df5aSMatthew Dillon 
3657090df5aSMatthew Dillon /*
36692da00bbSMatthew Dillon  * blist_fill() -	mark a region in the block bitmap as off-limits
36792da00bbSMatthew Dillon  *			to the allocator (i.e. allocate it), ignoring any
36892da00bbSMatthew Dillon  *			existing allocations.  Return the number of blocks
36992da00bbSMatthew Dillon  *			actually filled that were free before the call.
37092da00bbSMatthew Dillon  */
371a7249a6cSAlan Cox daddr_t
37292da00bbSMatthew Dillon blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
37392da00bbSMatthew Dillon {
37492da00bbSMatthew Dillon 
375bee93d3cSAlan Cox 	return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix));
37692da00bbSMatthew Dillon }
37792da00bbSMatthew Dillon 
37892da00bbSMatthew Dillon /*
3797090df5aSMatthew Dillon  * blist_resize() -	resize an existing radix tree to handle the
3807090df5aSMatthew Dillon  *			specified number of blocks.  This will reallocate
3817090df5aSMatthew Dillon  *			the tree and transfer the previous bitmap to the new
3827090df5aSMatthew Dillon  *			one.  When extending the tree you can specify whether
3837090df5aSMatthew Dillon  *			the new blocks are to left allocated or freed.
3847090df5aSMatthew Dillon  */
3857090df5aSMatthew Dillon void
386c8c7ad92SKip Macy blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
3877090df5aSMatthew Dillon {
388c8c7ad92SKip Macy     blist_t newbl = blist_create(count, flags);
3897090df5aSMatthew Dillon     blist_t save = *pbl;
3907090df5aSMatthew Dillon 
3917090df5aSMatthew Dillon     *pbl = newbl;
3927090df5aSMatthew Dillon     if (count > save->bl_blocks)
3937090df5aSMatthew Dillon 	    count = save->bl_blocks;
3942ac0c7c3SAlan Cox     blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
3957090df5aSMatthew Dillon 
3967090df5aSMatthew Dillon     /*
3977090df5aSMatthew Dillon      * If resizing upwards, should we free the new space or not?
3987090df5aSMatthew Dillon      */
3997090df5aSMatthew Dillon     if (freenew && count < newbl->bl_blocks) {
4007090df5aSMatthew Dillon 	    blist_free(newbl, count, newbl->bl_blocks - count);
4017090df5aSMatthew Dillon     }
4027090df5aSMatthew Dillon     blist_destroy(save);
4037090df5aSMatthew Dillon }
4047090df5aSMatthew Dillon 
4057090df5aSMatthew Dillon #ifdef BLIST_DEBUG
4067090df5aSMatthew Dillon 
4077090df5aSMatthew Dillon /*
4087090df5aSMatthew Dillon  * blist_print()    - dump radix tree
4097090df5aSMatthew Dillon  */
4107090df5aSMatthew Dillon void
4117090df5aSMatthew Dillon blist_print(blist_t bl)
4127090df5aSMatthew Dillon {
4132ac0c7c3SAlan Cox 	printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor);
4142ac0c7c3SAlan Cox 	blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
4157090df5aSMatthew Dillon 	printf("}\n");
4167090df5aSMatthew Dillon }
4177090df5aSMatthew Dillon 
4187090df5aSMatthew Dillon #endif
4197090df5aSMatthew Dillon 
420d027ed2eSAlan Cox static const u_daddr_t fib[] = {
421d027ed2eSAlan Cox 	1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
422d027ed2eSAlan Cox 	4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
423d027ed2eSAlan Cox 	514229, 832040, 1346269, 2178309, 3524578,
424d027ed2eSAlan Cox };
425d027ed2eSAlan Cox 
426d027ed2eSAlan Cox /*
427d027ed2eSAlan Cox  * Use 'gap' to describe a maximal range of unallocated blocks/bits.
428d027ed2eSAlan Cox  */
429d027ed2eSAlan Cox struct gap_stats {
430d027ed2eSAlan Cox 	daddr_t	start;		/* current gap start, or SWAPBLK_NONE */
431d027ed2eSAlan Cox 	daddr_t	num;		/* number of gaps observed */
432d027ed2eSAlan Cox 	daddr_t	max;		/* largest gap size */
433d027ed2eSAlan Cox 	daddr_t	avg;		/* average gap size */
434d027ed2eSAlan Cox 	daddr_t	err;		/* sum - num * avg */
435d027ed2eSAlan Cox 	daddr_t	histo[nitems(fib)]; /* # gaps in each size range */
436d027ed2eSAlan Cox 	int	max_bucket;	/* last histo elt with nonzero val */
437d027ed2eSAlan Cox };
438d027ed2eSAlan Cox 
439d027ed2eSAlan Cox /*
440d027ed2eSAlan Cox  * gap_stats_counting()    - is the state 'counting 1 bits'?
441d027ed2eSAlan Cox  *                           or 'skipping 0 bits'?
442d027ed2eSAlan Cox  */
443d027ed2eSAlan Cox static inline bool
444d027ed2eSAlan Cox gap_stats_counting(const struct gap_stats *stats)
445d027ed2eSAlan Cox {
446d027ed2eSAlan Cox 
447d027ed2eSAlan Cox 	return (stats->start != SWAPBLK_NONE);
448d027ed2eSAlan Cox }
449d027ed2eSAlan Cox 
450d027ed2eSAlan Cox /*
451d027ed2eSAlan Cox  * init_gap_stats()    - initialize stats on gap sizes
452d027ed2eSAlan Cox  */
453d027ed2eSAlan Cox static inline void
454d027ed2eSAlan Cox init_gap_stats(struct gap_stats *stats)
455d027ed2eSAlan Cox {
456d027ed2eSAlan Cox 
457d027ed2eSAlan Cox 	bzero(stats, sizeof(*stats));
458d027ed2eSAlan Cox 	stats->start = SWAPBLK_NONE;
459d027ed2eSAlan Cox }
460d027ed2eSAlan Cox 
461d027ed2eSAlan Cox /*
462d027ed2eSAlan Cox  * update_gap_stats()    - update stats on gap sizes
463d027ed2eSAlan Cox  */
464d027ed2eSAlan Cox static void
465d027ed2eSAlan Cox update_gap_stats(struct gap_stats *stats, daddr_t posn)
466d027ed2eSAlan Cox {
467d027ed2eSAlan Cox 	daddr_t size;
468d027ed2eSAlan Cox 	int hi, lo, mid;
469d027ed2eSAlan Cox 
470d027ed2eSAlan Cox 	if (!gap_stats_counting(stats)) {
471d027ed2eSAlan Cox 		stats->start = posn;
472d027ed2eSAlan Cox 		return;
473d027ed2eSAlan Cox 	}
474d027ed2eSAlan Cox 	size = posn - stats->start;
475d027ed2eSAlan Cox 	stats->start = SWAPBLK_NONE;
476d027ed2eSAlan Cox 	if (size > stats->max)
477d027ed2eSAlan Cox 		stats->max = size;
478d027ed2eSAlan Cox 
479d027ed2eSAlan Cox 	/*
480d027ed2eSAlan Cox 	 * Find the fibonacci range that contains size,
481d027ed2eSAlan Cox 	 * expecting to find it in an early range.
482d027ed2eSAlan Cox 	 */
483d027ed2eSAlan Cox 	lo = 0;
484d027ed2eSAlan Cox 	hi = 1;
485d027ed2eSAlan Cox 	while (hi < nitems(fib) && fib[hi] <= size) {
486d027ed2eSAlan Cox 		lo = hi;
487d027ed2eSAlan Cox 		hi *= 2;
488d027ed2eSAlan Cox 	}
489d027ed2eSAlan Cox 	if (hi >= nitems(fib))
490d027ed2eSAlan Cox 		hi = nitems(fib);
491d027ed2eSAlan Cox 	while (lo + 1 != hi) {
492d027ed2eSAlan Cox 		mid = (lo + hi) >> 1;
493d027ed2eSAlan Cox 		if (fib[mid] <= size)
494d027ed2eSAlan Cox 			lo = mid;
495d027ed2eSAlan Cox 		else
496d027ed2eSAlan Cox 			hi = mid;
497d027ed2eSAlan Cox 	}
498d027ed2eSAlan Cox 	stats->histo[lo]++;
499d027ed2eSAlan Cox 	if (lo > stats->max_bucket)
500d027ed2eSAlan Cox 		stats->max_bucket = lo;
501d027ed2eSAlan Cox 	stats->err += size - stats->avg;
502d027ed2eSAlan Cox 	stats->num++;
503d027ed2eSAlan Cox 	stats->avg += stats->err / stats->num;
504d027ed2eSAlan Cox 	stats->err %= stats->num;
505d027ed2eSAlan Cox }
506d027ed2eSAlan Cox 
507d027ed2eSAlan Cox /*
508d027ed2eSAlan Cox  * dump_gap_stats()    - print stats on gap sizes
509d027ed2eSAlan Cox  */
510d027ed2eSAlan Cox static inline void
511d027ed2eSAlan Cox dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
512d027ed2eSAlan Cox {
513d027ed2eSAlan Cox 	int i;
514d027ed2eSAlan Cox 
515d027ed2eSAlan Cox 	sbuf_printf(s, "number of maximal free ranges: %jd\n",
516d027ed2eSAlan Cox 	    (intmax_t)stats->num);
517d027ed2eSAlan Cox 	sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
518d027ed2eSAlan Cox 	sbuf_printf(s, "average maximal free range size: %jd\n",
519d027ed2eSAlan Cox 	    (intmax_t)stats->avg);
520d027ed2eSAlan Cox 	sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
521d027ed2eSAlan Cox 	sbuf_printf(s, "               count  |  size range\n");
522d027ed2eSAlan Cox 	sbuf_printf(s, "               -----  |  ----------\n");
523d027ed2eSAlan Cox 	for (i = 0; i < stats->max_bucket; i++) {
524d027ed2eSAlan Cox 		if (stats->histo[i] != 0) {
525d027ed2eSAlan Cox 			sbuf_printf(s, "%20jd  |  ",
526d027ed2eSAlan Cox 			    (intmax_t)stats->histo[i]);
527d027ed2eSAlan Cox 			if (fib[i] != fib[i + 1] - 1)
528d027ed2eSAlan Cox 				sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
529d027ed2eSAlan Cox 				    (intmax_t)fib[i + 1] - 1);
530d027ed2eSAlan Cox 			else
531d027ed2eSAlan Cox 				sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
532d027ed2eSAlan Cox 		}
533d027ed2eSAlan Cox 	}
534d027ed2eSAlan Cox 	sbuf_printf(s, "%20jd  |  ", (intmax_t)stats->histo[i]);
535d027ed2eSAlan Cox 	if (stats->histo[i] > 1)
536d027ed2eSAlan Cox 		sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
537d027ed2eSAlan Cox 		    (intmax_t)stats->max);
538d027ed2eSAlan Cox 	else
539d027ed2eSAlan Cox 		sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
540d027ed2eSAlan Cox }
541d027ed2eSAlan Cox 
542d027ed2eSAlan Cox /*
543d027ed2eSAlan Cox  * blist_stats()    - dump radix tree stats
544d027ed2eSAlan Cox  */
545d027ed2eSAlan Cox void
546d027ed2eSAlan Cox blist_stats(blist_t bl, struct sbuf *s)
547d027ed2eSAlan Cox {
548d027ed2eSAlan Cox 	struct gap_stats gstats;
549d027ed2eSAlan Cox 	struct gap_stats *stats = &gstats;
550d027ed2eSAlan Cox 	daddr_t i, nodes, radix;
551d027ed2eSAlan Cox 	u_daddr_t bit, diff, mask;
552d027ed2eSAlan Cox 
553d027ed2eSAlan Cox 	init_gap_stats(stats);
554d027ed2eSAlan Cox 	nodes = 0;
555d027ed2eSAlan Cox 	i = bl->bl_radix;
556d027ed2eSAlan Cox 	while (i < bl->bl_radix + bl->bl_blocks) {
557d027ed2eSAlan Cox 		/*
558d027ed2eSAlan Cox 		 * Find max size subtree starting at i.
559d027ed2eSAlan Cox 		 */
560d027ed2eSAlan Cox 		radix = BLIST_BMAP_RADIX;
561d027ed2eSAlan Cox 		while (((i / radix) & BLIST_META_MASK) == 0)
562d027ed2eSAlan Cox 			radix *= BLIST_META_RADIX;
563d027ed2eSAlan Cox 
564d027ed2eSAlan Cox 		/*
565d027ed2eSAlan Cox 		 * Check for skippable subtrees starting at i.
566d027ed2eSAlan Cox 		 */
567d027ed2eSAlan Cox 		while (radix > BLIST_BMAP_RADIX) {
568d027ed2eSAlan Cox 			if (bl->bl_root[nodes].u.bmu_avail == 0) {
569d027ed2eSAlan Cox 				if (gap_stats_counting(stats))
570d027ed2eSAlan Cox 					update_gap_stats(stats, i);
571d027ed2eSAlan Cox 				break;
572d027ed2eSAlan Cox 			}
573d027ed2eSAlan Cox 			if (bl->bl_root[nodes].u.bmu_avail == radix) {
574d027ed2eSAlan Cox 				if (!gap_stats_counting(stats))
575d027ed2eSAlan Cox 					update_gap_stats(stats, i);
576d027ed2eSAlan Cox 				break;
577d027ed2eSAlan Cox 			}
578d027ed2eSAlan Cox 
579d027ed2eSAlan Cox 			/*
580d027ed2eSAlan Cox 			 * Skip subtree root.
581d027ed2eSAlan Cox 			 */
582d027ed2eSAlan Cox 			nodes++;
583d027ed2eSAlan Cox 			radix /= BLIST_META_RADIX;
584d027ed2eSAlan Cox 		}
585d027ed2eSAlan Cox 		if (radix == BLIST_BMAP_RADIX) {
586d027ed2eSAlan Cox 			/*
587d027ed2eSAlan Cox 			 * Scan leaf.
588d027ed2eSAlan Cox 			 */
589d027ed2eSAlan Cox 			mask = bl->bl_root[nodes].u.bmu_bitmap;
590d027ed2eSAlan Cox 			diff = mask ^ (mask << 1);
591d027ed2eSAlan Cox 			if (gap_stats_counting(stats))
592d027ed2eSAlan Cox 				diff ^= 1;
593d027ed2eSAlan Cox 			while (diff != 0) {
594d027ed2eSAlan Cox 				bit = diff & -diff;
595d027ed2eSAlan Cox 				update_gap_stats(stats, i + bitpos(bit));
596d027ed2eSAlan Cox 				diff ^= bit;
597d027ed2eSAlan Cox 			}
598d027ed2eSAlan Cox 		}
599d027ed2eSAlan Cox 		nodes += radix_to_skip(radix);
600d027ed2eSAlan Cox 		i += radix;
601d027ed2eSAlan Cox 	}
602d027ed2eSAlan Cox 	update_gap_stats(stats, i);
603d027ed2eSAlan Cox 	dump_gap_stats(stats, s);
604d027ed2eSAlan Cox }
605d027ed2eSAlan Cox 
6067090df5aSMatthew Dillon /************************************************************************
6077090df5aSMatthew Dillon  *			  ALLOCATION SUPPORT FUNCTIONS			*
6087090df5aSMatthew Dillon  ************************************************************************
6097090df5aSMatthew Dillon  *
6107090df5aSMatthew Dillon  *	These support functions do all the actual work.  They may seem
6117090df5aSMatthew Dillon  *	rather longish, but that's because I've commented them up.  The
6127090df5aSMatthew Dillon  *	actual code is straight forward.
6137090df5aSMatthew Dillon  *
6147090df5aSMatthew Dillon  */
6157090df5aSMatthew Dillon 
6167090df5aSMatthew Dillon /*
6177090df5aSMatthew Dillon  * blist_leaf_alloc() -	allocate at a leaf in the radix tree (a bitmap).
6187090df5aSMatthew Dillon  *
61954541421SAlan Cox  *	This is the core of the allocator and is optimized for the
62054541421SAlan Cox  *	BLIST_BMAP_RADIX block allocation case.  Otherwise, execution
621d027ed2eSAlan Cox  *	time is proportional to log2(count) + bitpos time.
6227090df5aSMatthew Dillon  */
6237090df5aSMatthew Dillon static daddr_t
624ec371b57SAlan Cox blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
62554541421SAlan Cox {
62654541421SAlan Cox 	u_daddr_t mask;
627ec371b57SAlan Cox 	int count1, hi, lo, num_shifts, range1, range_ext;
6287090df5aSMatthew Dillon 
62954541421SAlan Cox 	range1 = 0;
63054541421SAlan Cox 	count1 = count - 1;
63154541421SAlan Cox 	num_shifts = fls(count1);
63254541421SAlan Cox 	mask = scan->u.bmu_bitmap;
633ec371b57SAlan Cox 	while ((-mask & ~mask) != 0 && num_shifts > 0) {
63454541421SAlan Cox 		/*
63554541421SAlan Cox 		 * If bit i is set in mask, then bits in [i, i+range1] are set
63654541421SAlan Cox 		 * in scan->u.bmu_bitmap.  The value of range1 is equal to
63754541421SAlan Cox 		 * count1 >> num_shifts.  Grow range and reduce num_shifts to 0,
63854541421SAlan Cox 		 * while preserving these invariants.  The updates to mask leave
63954541421SAlan Cox 		 * fewer bits set, but each bit that remains set represents a
64054541421SAlan Cox 		 * longer string of consecutive bits set in scan->u.bmu_bitmap.
641ec371b57SAlan Cox 		 * If more updates to mask cannot clear more bits, because mask
642ec371b57SAlan Cox 		 * is partitioned with all 0 bits preceding all 1 bits, the loop
643ec371b57SAlan Cox 		 * terminates immediately.
64454541421SAlan Cox 		 */
64554541421SAlan Cox 		num_shifts--;
64654541421SAlan Cox 		range_ext = range1 + ((count1 >> num_shifts) & 1);
647ec371b57SAlan Cox 		/*
648ec371b57SAlan Cox 		 * mask is a signed quantity for the shift because when it is
649ec371b57SAlan Cox 		 * shifted right, the sign bit should copied; when the last
650ec371b57SAlan Cox 		 * block of the leaf is free, pretend, for a while, that all the
651ec371b57SAlan Cox 		 * blocks that follow it are also free.
652ec371b57SAlan Cox 		 */
653ec371b57SAlan Cox 		mask &= (daddr_t)mask >> range_ext;
65454541421SAlan Cox 		range1 += range_ext;
65554541421SAlan Cox 	}
65654541421SAlan Cox 	if (mask == 0) {
65754541421SAlan Cox 		/*
65854541421SAlan Cox 		 * Update bighint.  There is no allocation bigger than range1
659ec371b57SAlan Cox 		 * starting in this leaf.
66054541421SAlan Cox 		 */
66154541421SAlan Cox 		scan->bm_bighint = range1;
6627090df5aSMatthew Dillon 		return (SWAPBLK_NONE);
6637090df5aSMatthew Dillon 	}
66454541421SAlan Cox 
665ec371b57SAlan Cox 	/* Discard any candidates that appear before blk. */
666ec371b57SAlan Cox 	mask &= (u_daddr_t)-1 << (blk & BLIST_BMAP_MASK);
66754541421SAlan Cox 	if (mask == 0)
66854541421SAlan Cox 		return (SWAPBLK_NONE);
6697090df5aSMatthew Dillon 
6707090df5aSMatthew Dillon 	/*
67154541421SAlan Cox 	 * The least significant set bit in mask marks the start of the first
67254541421SAlan Cox 	 * available range of sufficient size.  Clear all the bits but that one,
673d027ed2eSAlan Cox 	 * and then find its position.
6747090df5aSMatthew Dillon 	 */
67554541421SAlan Cox 	mask &= -mask;
676d027ed2eSAlan Cox 	lo = bitpos(mask);
6777090df5aSMatthew Dillon 
678ec371b57SAlan Cox 	hi = lo + count;
679ec371b57SAlan Cox 	if (hi > BLIST_BMAP_RADIX) {
68054541421SAlan Cox 		/*
681ec371b57SAlan Cox 		 * An allocation within this leaf is impossible, so a successful
682ec371b57SAlan Cox 		 * allocation depends on the next leaf providing some of the blocks.
68354541421SAlan Cox 		 */
684ec371b57SAlan Cox 		if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) {
685ec371b57SAlan Cox 			/*
686ec371b57SAlan Cox 			 * The next leaf has a different meta-node parent, so it
687ec371b57SAlan Cox 			 * is not necessarily initialized.  Update bighint,
688ec371b57SAlan Cox 			 * comparing the range found at the end of mask to the
689ec371b57SAlan Cox 			 * largest earlier range that could have been made to
690ec371b57SAlan Cox 			 * vanish in the initial processing of mask.
691ec371b57SAlan Cox 			 */
692ec371b57SAlan Cox 			scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1);
693ec371b57SAlan Cox 			return (SWAPBLK_NONE);
694ec371b57SAlan Cox 		}
695ec371b57SAlan Cox 		hi -= BLIST_BMAP_RADIX;
696ec371b57SAlan Cox 		if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) {
697ec371b57SAlan Cox 			/*
698ec371b57SAlan Cox 			 * The next leaf doesn't have enough free blocks at the
699ec371b57SAlan Cox 			 * beginning to complete the spanning allocation.  The
700ec371b57SAlan Cox 			 * hint cannot be updated, because the same allocation
701ec371b57SAlan Cox 			 * request could be satisfied later, by this leaf, if
702ec371b57SAlan Cox 			 * the state of the next leaf changes, and without any
703ec371b57SAlan Cox 			 * changes to this leaf.
704ec371b57SAlan Cox 			 */
705ec371b57SAlan Cox 			return (SWAPBLK_NONE);
706ec371b57SAlan Cox 		}
707ec371b57SAlan Cox 		/* Clear the first 'hi' bits in the next leaf, allocating them. */
708ec371b57SAlan Cox 		scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi;
709ec371b57SAlan Cox 		hi = BLIST_BMAP_RADIX;
710ec371b57SAlan Cox 	}
711ec371b57SAlan Cox 
712ec371b57SAlan Cox 	/* Set the bits of mask at position 'lo' and higher. */
713ec371b57SAlan Cox 	mask = -mask;
714ec371b57SAlan Cox 	if (hi == BLIST_BMAP_RADIX) {
715ec371b57SAlan Cox 		/*
716ec371b57SAlan Cox 		 * Update bighint.  There is no allocation bigger than range1
717ec371b57SAlan Cox 		 * available in this leaf after this allocation completes.
718ec371b57SAlan Cox 		 */
719ec371b57SAlan Cox 		scan->bm_bighint = range1;
720ec371b57SAlan Cox 	} else {
721ec371b57SAlan Cox 		/* Clear the bits of mask at position 'hi' and higher. */
722ec371b57SAlan Cox 		mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
723ec371b57SAlan Cox 		/* If this allocation uses all the bits, clear the hint. */
724ec371b57SAlan Cox 		if (mask == scan->u.bmu_bitmap)
725ec371b57SAlan Cox 			scan->bm_bighint = 0;
726ec371b57SAlan Cox 	}
727ec371b57SAlan Cox 	/* Clear the allocated bits from this leaf. */
7287090df5aSMatthew Dillon 	scan->u.bmu_bitmap &= ~mask;
729ec371b57SAlan Cox 	return ((blk & ~BLIST_BMAP_MASK) + lo);
7307090df5aSMatthew Dillon }
7317090df5aSMatthew Dillon 
7327090df5aSMatthew Dillon /*
7337090df5aSMatthew Dillon  * blist_meta_alloc() -	allocate at a meta in the radix tree.
7347090df5aSMatthew Dillon  *
7357090df5aSMatthew Dillon  *	Attempt to allocate at a meta node.  If we can't, we update
7367090df5aSMatthew Dillon  *	bighint and return a failure.  Updating bighint optimize future
7377090df5aSMatthew Dillon  *	calls that hit this node.  We have to check for our collapse cases
7387090df5aSMatthew Dillon  *	and we have a few optimizations strewn in as well.
7397090df5aSMatthew Dillon  */
7407090df5aSMatthew Dillon static daddr_t
741bee93d3cSAlan Cox blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
742d4e3484bSAlan Cox {
743bee93d3cSAlan Cox 	daddr_t blk, i, next_skip, r, skip;
744d4e3484bSAlan Cox 	int child;
745d4e3484bSAlan Cox 	bool scan_from_start;
7467090df5aSMatthew Dillon 
747a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
748ec371b57SAlan Cox 		return (blst_leaf_alloc(scan, cursor, count));
7494be4fd5dSAlan Cox 	if (scan->u.bmu_avail < count) {
7507090df5aSMatthew Dillon 		/*
7514be4fd5dSAlan Cox 		 * The meta node's hint must be too large if the allocation
7524be4fd5dSAlan Cox 		 * exceeds the number of free blocks.  Reduce the hint, and
7534be4fd5dSAlan Cox 		 * return failure.
7547090df5aSMatthew Dillon 		 */
7554be4fd5dSAlan Cox 		scan->bm_bighint = scan->u.bmu_avail;
7567090df5aSMatthew Dillon 		return (SWAPBLK_NONE);
7577090df5aSMatthew Dillon 	}
758ec371b57SAlan Cox 	blk = cursor & -radix;
7592ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
760d4e3484bSAlan Cox 	next_skip = skip / BLIST_META_RADIX;
7617090df5aSMatthew Dillon 
7624be4fd5dSAlan Cox 	/*
7634be4fd5dSAlan Cox 	 * An ALL-FREE meta node requires special handling before allocating
7644be4fd5dSAlan Cox 	 * any of its blocks.
7654be4fd5dSAlan Cox 	 */
7667090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
76757e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
7687090df5aSMatthew Dillon 
7697090df5aSMatthew Dillon 		/*
7704be4fd5dSAlan Cox 		 * Reinitialize each of the meta node's children.  An ALL-FREE
7714be4fd5dSAlan Cox 		 * meta node cannot have a terminator in any subtree.
7727090df5aSMatthew Dillon 		 */
7732ac0c7c3SAlan Cox 		for (i = 1; i < skip; i += next_skip) {
774d4e3484bSAlan Cox 			if (next_skip == 1)
7757090df5aSMatthew Dillon 				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
776d4e3484bSAlan Cox 			else
7777090df5aSMatthew Dillon 				scan[i].u.bmu_avail = radix;
778d4e3484bSAlan Cox 			scan[i].bm_bighint = radix;
7797090df5aSMatthew Dillon 		}
7807090df5aSMatthew Dillon 	} else {
78157e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
7827090df5aSMatthew Dillon 	}
7837090df5aSMatthew Dillon 
7844be4fd5dSAlan Cox 	if (count > radix) {
7854be4fd5dSAlan Cox 		/*
7864be4fd5dSAlan Cox 		 * The allocation exceeds the number of blocks that are
7874be4fd5dSAlan Cox 		 * managed by a subtree of this meta node.
7884be4fd5dSAlan Cox 		 */
7894be4fd5dSAlan Cox 		panic("allocation too large");
7904be4fd5dSAlan Cox 	}
791d4e3484bSAlan Cox 	scan_from_start = cursor == blk;
792d4e3484bSAlan Cox 	child = (cursor - blk) / radix;
793d4e3484bSAlan Cox 	blk += child * radix;
7942ac0c7c3SAlan Cox 	for (i = 1 + child * next_skip; i < skip; i += next_skip) {
7957090df5aSMatthew Dillon 		if (count <= scan[i].bm_bighint) {
7967090df5aSMatthew Dillon 			/*
797ec371b57SAlan Cox 			 * The allocation might fit beginning in the i'th subtree.
7987090df5aSMatthew Dillon 			 */
799bee93d3cSAlan Cox 			r = blst_meta_alloc(&scan[i],
800bee93d3cSAlan Cox 			    cursor > blk ? cursor : blk, count, radix);
8017090df5aSMatthew Dillon 			if (r != SWAPBLK_NONE) {
8027090df5aSMatthew Dillon 				scan->u.bmu_avail -= count;
8037090df5aSMatthew Dillon 				return (r);
8047090df5aSMatthew Dillon 			}
8057090df5aSMatthew Dillon 		} else if (scan[i].bm_bighint == (daddr_t)-1) {
8067090df5aSMatthew Dillon 			/*
8077090df5aSMatthew Dillon 			 * Terminator
8087090df5aSMatthew Dillon 			 */
8097090df5aSMatthew Dillon 			break;
8107090df5aSMatthew Dillon 		}
8117090df5aSMatthew Dillon 		blk += radix;
8127090df5aSMatthew Dillon 	}
8137090df5aSMatthew Dillon 
8147090df5aSMatthew Dillon 	/*
8157090df5aSMatthew Dillon 	 * We couldn't allocate count in this subtree, update bighint.
8167090df5aSMatthew Dillon 	 */
817d4e3484bSAlan Cox 	if (scan_from_start && scan->bm_bighint >= count)
8187090df5aSMatthew Dillon 		scan->bm_bighint = count - 1;
819d4e3484bSAlan Cox 
8207090df5aSMatthew Dillon 	return (SWAPBLK_NONE);
8217090df5aSMatthew Dillon }
8227090df5aSMatthew Dillon 
8237090df5aSMatthew Dillon /*
8247090df5aSMatthew Dillon  * BLST_LEAF_FREE() -	free allocated block from leaf bitmap
8257090df5aSMatthew Dillon  *
8267090df5aSMatthew Dillon  */
8277090df5aSMatthew Dillon static void
828b411efa4SAlan Cox blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
829b411efa4SAlan Cox {
830ec371b57SAlan Cox 	u_daddr_t mask;
831ec371b57SAlan Cox 	int n;
832ec371b57SAlan Cox 
8337090df5aSMatthew Dillon 	/*
8347090df5aSMatthew Dillon 	 * free some data in this bitmap
835ec371b57SAlan Cox 	 * mask=0000111111111110000
8367090df5aSMatthew Dillon 	 *          \_________/\__/
837ec371b57SAlan Cox 	 *		count   n
8387090df5aSMatthew Dillon 	 */
839ec371b57SAlan Cox 	n = blk & BLIST_BMAP_MASK;
8407090df5aSMatthew Dillon 	mask = ((u_daddr_t)-1 << n) &
8417090df5aSMatthew Dillon 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
8427090df5aSMatthew Dillon 	if (scan->u.bmu_bitmap & mask)
843ec371b57SAlan Cox 		panic("freeing free block");
8447090df5aSMatthew Dillon 	scan->u.bmu_bitmap |= mask;
8457090df5aSMatthew Dillon 
8467090df5aSMatthew Dillon 	/*
8477090df5aSMatthew Dillon 	 * We could probably do a better job here.  We are required to make
8487090df5aSMatthew Dillon 	 * bighint at least as large as the biggest contiguous block of
8497090df5aSMatthew Dillon 	 * data.  If we just shoehorn it, a little extra overhead will
8507090df5aSMatthew Dillon 	 * be incured on the next allocation (but only that one typically).
8517090df5aSMatthew Dillon 	 */
8527090df5aSMatthew Dillon 	scan->bm_bighint = BLIST_BMAP_RADIX;
8537090df5aSMatthew Dillon }
8547090df5aSMatthew Dillon 
8557090df5aSMatthew Dillon /*
8567090df5aSMatthew Dillon  * BLST_META_FREE() - free allocated blocks from radix tree meta info
8577090df5aSMatthew Dillon  *
8587090df5aSMatthew Dillon  *	This support routine frees a range of blocks from the bitmap.
8597090df5aSMatthew Dillon  *	The range must be entirely enclosed by this radix node.  If a
8607090df5aSMatthew Dillon  *	meta node, we break the range down recursively to free blocks
8617090df5aSMatthew Dillon  *	in subnodes (which means that this code can free an arbitrary
8627090df5aSMatthew Dillon  *	range whereas the allocation code cannot allocate an arbitrary
8637090df5aSMatthew Dillon  *	range).
8647090df5aSMatthew Dillon  */
8657090df5aSMatthew Dillon static void
866bee93d3cSAlan Cox blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
867d3783724SAlan Cox {
868bee93d3cSAlan Cox 	daddr_t blk, i, next_skip, skip, v;
869d3783724SAlan Cox 	int child;
8707090df5aSMatthew Dillon 
871a396c83aSAlan Cox 	if (scan->bm_bighint == (daddr_t)-1)
872a396c83aSAlan Cox 		panic("freeing invalid range");
873a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
874a396c83aSAlan Cox 		return (blst_leaf_free(scan, freeBlk, count));
8752ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
876d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
8777090df5aSMatthew Dillon 
8787090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
8797090df5aSMatthew Dillon 		/*
8807090df5aSMatthew Dillon 		 * ALL-ALLOCATED special case, with possible
8817090df5aSMatthew Dillon 		 * shortcut to ALL-FREE special case.
8827090df5aSMatthew Dillon 		 */
8837090df5aSMatthew Dillon 		scan->u.bmu_avail = count;
8847090df5aSMatthew Dillon 		scan->bm_bighint = count;
8857090df5aSMatthew Dillon 
8867090df5aSMatthew Dillon 		if (count != radix)  {
8872ac0c7c3SAlan Cox 			for (i = 1; i < skip; i += next_skip) {
8887090df5aSMatthew Dillon 				if (scan[i].bm_bighint == (daddr_t)-1)
8897090df5aSMatthew Dillon 					break;
8907090df5aSMatthew Dillon 				scan[i].bm_bighint = 0;
8917090df5aSMatthew Dillon 				if (next_skip == 1) {
8927090df5aSMatthew Dillon 					scan[i].u.bmu_bitmap = 0;
8937090df5aSMatthew Dillon 				} else {
8947090df5aSMatthew Dillon 					scan[i].u.bmu_avail = 0;
8957090df5aSMatthew Dillon 				}
8967090df5aSMatthew Dillon 			}
8977090df5aSMatthew Dillon 			/* fall through */
8987090df5aSMatthew Dillon 		}
8997090df5aSMatthew Dillon 	} else {
9007090df5aSMatthew Dillon 		scan->u.bmu_avail += count;
9017090df5aSMatthew Dillon 		/* scan->bm_bighint = radix; */
9027090df5aSMatthew Dillon 	}
9037090df5aSMatthew Dillon 
9047090df5aSMatthew Dillon 	/*
9057090df5aSMatthew Dillon 	 * ALL-FREE special case.
9067090df5aSMatthew Dillon 	 */
9077090df5aSMatthew Dillon 
9087090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix)
9097090df5aSMatthew Dillon 		return;
9107090df5aSMatthew Dillon 	if (scan->u.bmu_avail > radix)
911bdc9a8d0SJohn Baldwin 		panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
912bdc9a8d0SJohn Baldwin 		    (long long)count, (long long)scan->u.bmu_avail,
913bdc9a8d0SJohn Baldwin 		    (long long)radix);
9147090df5aSMatthew Dillon 
9157090df5aSMatthew Dillon 	/*
9167090df5aSMatthew Dillon 	 * Break the free down into its components
9177090df5aSMatthew Dillon 	 */
9187090df5aSMatthew Dillon 
919bee93d3cSAlan Cox 	blk = freeBlk & -radix;
92057e6d29bSMatthew Dillon 	radix /= BLIST_META_RADIX;
9217090df5aSMatthew Dillon 
922d3783724SAlan Cox 	child = (freeBlk - blk) / radix;
923d3783724SAlan Cox 	blk += child * radix;
924d3783724SAlan Cox 	i = 1 + child * next_skip;
9252ac0c7c3SAlan Cox 	while (i < skip && blk < freeBlk + count) {
9267090df5aSMatthew Dillon 		v = blk + radix - freeBlk;
9277090df5aSMatthew Dillon 		if (v > count)
9287090df5aSMatthew Dillon 			v = count;
929bee93d3cSAlan Cox 		blst_meta_free(&scan[i], freeBlk, v, radix);
9307090df5aSMatthew Dillon 		if (scan->bm_bighint < scan[i].bm_bighint)
9317090df5aSMatthew Dillon 			scan->bm_bighint = scan[i].bm_bighint;
9327090df5aSMatthew Dillon 		count -= v;
9337090df5aSMatthew Dillon 		freeBlk += v;
9347090df5aSMatthew Dillon 		blk += radix;
9357090df5aSMatthew Dillon 		i += next_skip;
9367090df5aSMatthew Dillon 	}
9377090df5aSMatthew Dillon }
9387090df5aSMatthew Dillon 
9397090df5aSMatthew Dillon /*
9407090df5aSMatthew Dillon  * BLIST_RADIX_COPY() - copy one radix tree to another
9417090df5aSMatthew Dillon  *
9427090df5aSMatthew Dillon  *	Locates free space in the source tree and frees it in the destination
9437090df5aSMatthew Dillon  *	tree.  The space may not already be free in the destination.
9447090df5aSMatthew Dillon  */
945b411efa4SAlan Cox static void
9462ac0c7c3SAlan Cox blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
9472ac0c7c3SAlan Cox     daddr_t count)
948b411efa4SAlan Cox {
9492ac0c7c3SAlan Cox 	daddr_t i, next_skip, skip;
9507090df5aSMatthew Dillon 
9517090df5aSMatthew Dillon 	/*
9527090df5aSMatthew Dillon 	 * Leaf node
9537090df5aSMatthew Dillon 	 */
9547090df5aSMatthew Dillon 
9557090df5aSMatthew Dillon 	if (radix == BLIST_BMAP_RADIX) {
9567090df5aSMatthew Dillon 		u_daddr_t v = scan->u.bmu_bitmap;
9577090df5aSMatthew Dillon 
9587090df5aSMatthew Dillon 		if (v == (u_daddr_t)-1) {
9597090df5aSMatthew Dillon 			blist_free(dest, blk, count);
9607090df5aSMatthew Dillon 		} else if (v != 0) {
9617090df5aSMatthew Dillon 			int i;
9627090df5aSMatthew Dillon 
9637090df5aSMatthew Dillon 			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
964064650c1SAlan Cox 				if (v & ((u_daddr_t)1 << i))
9657090df5aSMatthew Dillon 					blist_free(dest, blk + i, 1);
9667090df5aSMatthew Dillon 			}
9677090df5aSMatthew Dillon 		}
9687090df5aSMatthew Dillon 		return;
9697090df5aSMatthew Dillon 	}
9707090df5aSMatthew Dillon 
9717090df5aSMatthew Dillon 	/*
9727090df5aSMatthew Dillon 	 * Meta node
9737090df5aSMatthew Dillon 	 */
9747090df5aSMatthew Dillon 
9757090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
9767090df5aSMatthew Dillon 		/*
9777090df5aSMatthew Dillon 		 * Source all allocated, leave dest allocated
9787090df5aSMatthew Dillon 		 */
9797090df5aSMatthew Dillon 		return;
9807090df5aSMatthew Dillon 	}
9817090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
9827090df5aSMatthew Dillon 		/*
9837090df5aSMatthew Dillon 		 * Source all free, free entire dest
9847090df5aSMatthew Dillon 		 */
9857090df5aSMatthew Dillon 		if (count < radix)
9867090df5aSMatthew Dillon 			blist_free(dest, blk, count);
9877090df5aSMatthew Dillon 		else
9887090df5aSMatthew Dillon 			blist_free(dest, blk, radix);
9897090df5aSMatthew Dillon 		return;
9907090df5aSMatthew Dillon 	}
9917090df5aSMatthew Dillon 
9927090df5aSMatthew Dillon 
9932ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
994d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
9952ac0c7c3SAlan Cox 	radix /= BLIST_META_RADIX;
9967090df5aSMatthew Dillon 
9972ac0c7c3SAlan Cox 	for (i = 1; count && i < skip; i += next_skip) {
9987090df5aSMatthew Dillon 		if (scan[i].bm_bighint == (daddr_t)-1)
9997090df5aSMatthew Dillon 			break;
10007090df5aSMatthew Dillon 
10017090df5aSMatthew Dillon 		if (count >= radix) {
10022ac0c7c3SAlan Cox 			blst_copy(&scan[i], blk, radix, dest, radix);
10037090df5aSMatthew Dillon 			count -= radix;
10047090df5aSMatthew Dillon 		} else {
10057090df5aSMatthew Dillon 			if (count) {
10062ac0c7c3SAlan Cox 				blst_copy(&scan[i], blk, radix, dest, count);
10077090df5aSMatthew Dillon 			}
10087090df5aSMatthew Dillon 			count = 0;
10097090df5aSMatthew Dillon 		}
10107090df5aSMatthew Dillon 		blk += radix;
10117090df5aSMatthew Dillon 	}
10127090df5aSMatthew Dillon }
10137090df5aSMatthew Dillon 
10147090df5aSMatthew Dillon /*
101592da00bbSMatthew Dillon  * BLST_LEAF_FILL() -	allocate specific blocks in leaf bitmap
101692da00bbSMatthew Dillon  *
101792da00bbSMatthew Dillon  *	This routine allocates all blocks in the specified range
101892da00bbSMatthew Dillon  *	regardless of any existing allocations in that range.  Returns
101992da00bbSMatthew Dillon  *	the number of blocks allocated by the call.
102092da00bbSMatthew Dillon  */
1021a7249a6cSAlan Cox static daddr_t
102292da00bbSMatthew Dillon blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
102392da00bbSMatthew Dillon {
1024a7249a6cSAlan Cox 	daddr_t nblks;
10254be4fd5dSAlan Cox 	u_daddr_t mask;
1026ec371b57SAlan Cox 	int n;
102792da00bbSMatthew Dillon 
1028ec371b57SAlan Cox 	n = blk & BLIST_BMAP_MASK;
102992da00bbSMatthew Dillon 	mask = ((u_daddr_t)-1 << n) &
103092da00bbSMatthew Dillon 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
103192da00bbSMatthew Dillon 
10324be4fd5dSAlan Cox 	/* Count the number of blocks that we are allocating. */
10334be4fd5dSAlan Cox 	nblks = bitcount64(scan->u.bmu_bitmap & mask);
103492da00bbSMatthew Dillon 
103592da00bbSMatthew Dillon 	scan->u.bmu_bitmap &= ~mask;
10364be4fd5dSAlan Cox 	return (nblks);
103792da00bbSMatthew Dillon }
103892da00bbSMatthew Dillon 
103992da00bbSMatthew Dillon /*
104092da00bbSMatthew Dillon  * BLIST_META_FILL() -	allocate specific blocks at a meta node
104192da00bbSMatthew Dillon  *
104292da00bbSMatthew Dillon  *	This routine allocates the specified range of blocks,
104392da00bbSMatthew Dillon  *	regardless of any existing allocations in the range.  The
104492da00bbSMatthew Dillon  *	range must be within the extent of this node.  Returns the
104592da00bbSMatthew Dillon  *	number of blocks allocated by the call.
104692da00bbSMatthew Dillon  */
1047a7249a6cSAlan Cox static daddr_t
1048bee93d3cSAlan Cox blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1049d3783724SAlan Cox {
1050bee93d3cSAlan Cox 	daddr_t blk, i, nblks, next_skip, skip, v;
1051d3783724SAlan Cox 	int child;
105292da00bbSMatthew Dillon 
1053a396c83aSAlan Cox 	if (scan->bm_bighint == (daddr_t)-1)
1054a396c83aSAlan Cox 		panic("filling invalid range");
10554be4fd5dSAlan Cox 	if (count > radix) {
10564be4fd5dSAlan Cox 		/*
10574be4fd5dSAlan Cox 		 * The allocation exceeds the number of blocks that are
1058a396c83aSAlan Cox 		 * managed by this node.
10594be4fd5dSAlan Cox 		 */
1060a396c83aSAlan Cox 		panic("fill too large");
10614be4fd5dSAlan Cox 	}
1062a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
1063a396c83aSAlan Cox 		return (blst_leaf_fill(scan, allocBlk, count));
106492da00bbSMatthew Dillon 	if (count == radix || scan->u.bmu_avail == 0)  {
106592da00bbSMatthew Dillon 		/*
106692da00bbSMatthew Dillon 		 * ALL-ALLOCATED special case
106792da00bbSMatthew Dillon 		 */
106892da00bbSMatthew Dillon 		nblks = scan->u.bmu_avail;
106992da00bbSMatthew Dillon 		scan->u.bmu_avail = 0;
107086dd278fSAlan Cox 		scan->bm_bighint = 0;
1071a396c83aSAlan Cox 		return (nblks);
107292da00bbSMatthew Dillon 	}
10732ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
1074d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
1075bee93d3cSAlan Cox 	blk = allocBlk & -radix;
107692da00bbSMatthew Dillon 
10774be4fd5dSAlan Cox 	/*
10784be4fd5dSAlan Cox 	 * An ALL-FREE meta node requires special handling before allocating
10794be4fd5dSAlan Cox 	 * any of its blocks.
10804be4fd5dSAlan Cox 	 */
108192da00bbSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
108257e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
108392da00bbSMatthew Dillon 
108492da00bbSMatthew Dillon 		/*
10854be4fd5dSAlan Cox 		 * Reinitialize each of the meta node's children.  An ALL-FREE
10864be4fd5dSAlan Cox 		 * meta node cannot have a terminator in any subtree.
108792da00bbSMatthew Dillon 		 */
10882ac0c7c3SAlan Cox 		for (i = 1; i < skip; i += next_skip) {
1089a396c83aSAlan Cox 			if (next_skip == 1)
109092da00bbSMatthew Dillon 				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
1091a396c83aSAlan Cox 			else
109292da00bbSMatthew Dillon 				scan[i].u.bmu_avail = radix;
1093a396c83aSAlan Cox 			scan[i].bm_bighint = radix;
109492da00bbSMatthew Dillon 		}
109592da00bbSMatthew Dillon 	} else {
109657e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
109792da00bbSMatthew Dillon 	}
109892da00bbSMatthew Dillon 
1099d3783724SAlan Cox 	nblks = 0;
1100d3783724SAlan Cox 	child = (allocBlk - blk) / radix;
1101d3783724SAlan Cox 	blk += child * radix;
1102d3783724SAlan Cox 	i = 1 + child * next_skip;
11032ac0c7c3SAlan Cox 	while (i < skip && blk < allocBlk + count) {
110492da00bbSMatthew Dillon 		v = blk + radix - allocBlk;
110592da00bbSMatthew Dillon 		if (v > count)
110692da00bbSMatthew Dillon 			v = count;
1107bee93d3cSAlan Cox 		nblks += blst_meta_fill(&scan[i], allocBlk, v, radix);
110892da00bbSMatthew Dillon 		count -= v;
110992da00bbSMatthew Dillon 		allocBlk += v;
111092da00bbSMatthew Dillon 		blk += radix;
111192da00bbSMatthew Dillon 		i += next_skip;
111292da00bbSMatthew Dillon 	}
111392da00bbSMatthew Dillon 	scan->u.bmu_avail -= nblks;
1114a396c83aSAlan Cox 	return (nblks);
111592da00bbSMatthew Dillon }
111692da00bbSMatthew Dillon 
11177090df5aSMatthew Dillon #ifdef BLIST_DEBUG
11187090df5aSMatthew Dillon 
11197090df5aSMatthew Dillon static void
11202ac0c7c3SAlan Cox blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
11217090df5aSMatthew Dillon {
11222ac0c7c3SAlan Cox 	daddr_t i, next_skip, skip;
11237090df5aSMatthew Dillon 
11247090df5aSMatthew Dillon 	if (radix == BLIST_BMAP_RADIX) {
11257090df5aSMatthew Dillon 		printf(
1126d90bf7d8SAlan Cox 		    "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
11277090df5aSMatthew Dillon 		    tab, tab, "",
112892da00bbSMatthew Dillon 		    (long long)blk, (long long)radix,
112992da00bbSMatthew Dillon 		    (long long)scan->u.bmu_bitmap,
113092da00bbSMatthew Dillon 		    (long long)scan->bm_bighint
11317090df5aSMatthew Dillon 		);
11327090df5aSMatthew Dillon 		return;
11337090df5aSMatthew Dillon 	}
11347090df5aSMatthew Dillon 
11357090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
11367090df5aSMatthew Dillon 		printf(
113792da00bbSMatthew Dillon 		    "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
11387090df5aSMatthew Dillon 		    tab, tab, "",
113992da00bbSMatthew Dillon 		    (long long)blk,
114092da00bbSMatthew Dillon 		    (long long)radix
11417090df5aSMatthew Dillon 		);
11427090df5aSMatthew Dillon 		return;
11437090df5aSMatthew Dillon 	}
11447090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
11457090df5aSMatthew Dillon 		printf(
114692da00bbSMatthew Dillon 		    "%*.*s(%08llx,%lld) ALL FREE\n",
11477090df5aSMatthew Dillon 		    tab, tab, "",
114892da00bbSMatthew Dillon 		    (long long)blk,
114992da00bbSMatthew Dillon 		    (long long)radix
11507090df5aSMatthew Dillon 		);
11517090df5aSMatthew Dillon 		return;
11527090df5aSMatthew Dillon 	}
11537090df5aSMatthew Dillon 
11547090df5aSMatthew Dillon 	printf(
115592da00bbSMatthew Dillon 	    "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
11567090df5aSMatthew Dillon 	    tab, tab, "",
115792da00bbSMatthew Dillon 	    (long long)blk, (long long)radix,
115892da00bbSMatthew Dillon 	    (long long)scan->u.bmu_avail,
115992da00bbSMatthew Dillon 	    (long long)radix,
116092da00bbSMatthew Dillon 	    (long long)scan->bm_bighint
11617090df5aSMatthew Dillon 	);
11627090df5aSMatthew Dillon 
11632ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
1164d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
11652ac0c7c3SAlan Cox 	radix /= BLIST_META_RADIX;
11667090df5aSMatthew Dillon 	tab += 4;
11677090df5aSMatthew Dillon 
11682ac0c7c3SAlan Cox 	for (i = 1; i < skip; i += next_skip) {
11697090df5aSMatthew Dillon 		if (scan[i].bm_bighint == (daddr_t)-1) {
11707090df5aSMatthew Dillon 			printf(
117192da00bbSMatthew Dillon 			    "%*.*s(%08llx,%lld): Terminator\n",
11727090df5aSMatthew Dillon 			    tab, tab, "",
117392da00bbSMatthew Dillon 			    (long long)blk, (long long)radix
11747090df5aSMatthew Dillon 			);
11757090df5aSMatthew Dillon 			break;
11767090df5aSMatthew Dillon 		}
11772ac0c7c3SAlan Cox 		blst_radix_print(&scan[i], blk, radix, tab);
11787090df5aSMatthew Dillon 		blk += radix;
11797090df5aSMatthew Dillon 	}
11807090df5aSMatthew Dillon 	tab -= 4;
11817090df5aSMatthew Dillon 
11827090df5aSMatthew Dillon 	printf(
11837090df5aSMatthew Dillon 	    "%*.*s}\n",
11847090df5aSMatthew Dillon 	    tab, tab, ""
11857090df5aSMatthew Dillon 	);
11867090df5aSMatthew Dillon }
11877090df5aSMatthew Dillon 
11887090df5aSMatthew Dillon #endif
11897090df5aSMatthew Dillon 
11907090df5aSMatthew Dillon #ifdef BLIST_DEBUG
11917090df5aSMatthew Dillon 
11927090df5aSMatthew Dillon int
11937090df5aSMatthew Dillon main(int ac, char **av)
11947090df5aSMatthew Dillon {
11957090df5aSMatthew Dillon 	int size = 1024;
11967090df5aSMatthew Dillon 	int i;
11977090df5aSMatthew Dillon 	blist_t bl;
1198d027ed2eSAlan Cox 	struct sbuf *s;
11997090df5aSMatthew Dillon 
12007090df5aSMatthew Dillon 	for (i = 1; i < ac; ++i) {
12017090df5aSMatthew Dillon 		const char *ptr = av[i];
12027090df5aSMatthew Dillon 		if (*ptr != '-') {
12037090df5aSMatthew Dillon 			size = strtol(ptr, NULL, 0);
12047090df5aSMatthew Dillon 			continue;
12057090df5aSMatthew Dillon 		}
12067090df5aSMatthew Dillon 		ptr += 2;
12077090df5aSMatthew Dillon 		fprintf(stderr, "Bad option: %s\n", ptr - 2);
12087090df5aSMatthew Dillon 		exit(1);
12097090df5aSMatthew Dillon 	}
1210c8c7ad92SKip Macy 	bl = blist_create(size, M_WAITOK);
12117090df5aSMatthew Dillon 	blist_free(bl, 0, size);
12127090df5aSMatthew Dillon 
12137090df5aSMatthew Dillon 	for (;;) {
12147090df5aSMatthew Dillon 		char buf[1024];
1215d90bf7d8SAlan Cox 		long long da = 0;
1216d90bf7d8SAlan Cox 		long long count = 0;
12177090df5aSMatthew Dillon 
12184be4fd5dSAlan Cox 		printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
121992da00bbSMatthew Dillon 		    (long long)size, (long long)bl->bl_radix);
12207090df5aSMatthew Dillon 		fflush(stdout);
12217090df5aSMatthew Dillon 		if (fgets(buf, sizeof(buf), stdin) == NULL)
12227090df5aSMatthew Dillon 			break;
12237090df5aSMatthew Dillon 		switch(buf[0]) {
12247090df5aSMatthew Dillon 		case 'r':
122592da00bbSMatthew Dillon 			if (sscanf(buf + 1, "%lld", &count) == 1) {
1226d90bf7d8SAlan Cox 				blist_resize(&bl, count, 1, M_WAITOK);
12277090df5aSMatthew Dillon 			} else {
12287090df5aSMatthew Dillon 				printf("?\n");
12297090df5aSMatthew Dillon 			}
12307090df5aSMatthew Dillon 		case 'p':
12317090df5aSMatthew Dillon 			blist_print(bl);
12327090df5aSMatthew Dillon 			break;
1233d027ed2eSAlan Cox 		case 's':
1234d027ed2eSAlan Cox 			s = sbuf_new_auto();
1235d027ed2eSAlan Cox 			blist_stats(bl, s);
1236d027ed2eSAlan Cox 			sbuf_finish(s);
1237d027ed2eSAlan Cox 			printf("%s", sbuf_data(s));
1238d027ed2eSAlan Cox 			sbuf_delete(s);
1239d027ed2eSAlan Cox 			break;
12407090df5aSMatthew Dillon 		case 'a':
124192da00bbSMatthew Dillon 			if (sscanf(buf + 1, "%lld", &count) == 1) {
12427090df5aSMatthew Dillon 				daddr_t blk = blist_alloc(bl, count);
124392da00bbSMatthew Dillon 				printf("    R=%08llx\n", (long long)blk);
12447090df5aSMatthew Dillon 			} else {
12457090df5aSMatthew Dillon 				printf("?\n");
12467090df5aSMatthew Dillon 			}
12477090df5aSMatthew Dillon 			break;
12487090df5aSMatthew Dillon 		case 'f':
1249d90bf7d8SAlan Cox 			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
12507090df5aSMatthew Dillon 				blist_free(bl, da, count);
12517090df5aSMatthew Dillon 			} else {
12527090df5aSMatthew Dillon 				printf("?\n");
12537090df5aSMatthew Dillon 			}
12547090df5aSMatthew Dillon 			break;
125592da00bbSMatthew Dillon 		case 'l':
1256d90bf7d8SAlan Cox 			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1257a7249a6cSAlan Cox 				printf("    n=%jd\n",
1258a7249a6cSAlan Cox 				    (intmax_t)blist_fill(bl, da, count));
125992da00bbSMatthew Dillon 			} else {
126092da00bbSMatthew Dillon 				printf("?\n");
126192da00bbSMatthew Dillon 			}
126292da00bbSMatthew Dillon 			break;
12637090df5aSMatthew Dillon 		case '?':
12647090df5aSMatthew Dillon 		case 'h':
12657090df5aSMatthew Dillon 			puts(
12667090df5aSMatthew Dillon 			    "p          -print\n"
1267d027ed2eSAlan Cox 			    "s          -stats\n"
12687090df5aSMatthew Dillon 			    "a %d       -allocate\n"
12697090df5aSMatthew Dillon 			    "f %x %d    -free\n"
127092da00bbSMatthew Dillon 			    "l %x %d    -fill\n"
12717090df5aSMatthew Dillon 			    "r %d       -resize\n"
12727090df5aSMatthew Dillon 			    "h/?        -help"
12737090df5aSMatthew Dillon 			);
12747090df5aSMatthew Dillon 			break;
12757090df5aSMatthew Dillon 		default:
12767090df5aSMatthew Dillon 			printf("?\n");
12777090df5aSMatthew Dillon 			break;
12787090df5aSMatthew Dillon 		}
12797090df5aSMatthew Dillon 	}
12807090df5aSMatthew Dillon 	return(0);
12817090df5aSMatthew Dillon }
12827090df5aSMatthew Dillon 
12837090df5aSMatthew Dillon void
12847090df5aSMatthew Dillon panic(const char *ctl, ...)
12857090df5aSMatthew Dillon {
12867090df5aSMatthew Dillon 	va_list va;
12877090df5aSMatthew Dillon 
12887090df5aSMatthew Dillon 	va_start(va, ctl);
12897090df5aSMatthew Dillon 	vfprintf(stderr, ctl, va);
12907090df5aSMatthew Dillon 	fprintf(stderr, "\n");
12917090df5aSMatthew Dillon 	va_end(va);
12927090df5aSMatthew Dillon 	exit(1);
12937090df5aSMatthew Dillon }
12947090df5aSMatthew Dillon 
12957090df5aSMatthew Dillon #endif
1296