xref: /freebsd/sys/kern/subr_blist.c (revision 8eefcd407b8f1d44c3062cfefdeed166d3b8154f)
106b4bf3eSWarner Losh /*-
206b4bf3eSWarner Losh  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
306b4bf3eSWarner Losh  * Redistribution and use in source and binary forms, with or without
406b4bf3eSWarner Losh  * modification, are permitted provided that the following conditions
506b4bf3eSWarner Losh  * are met:
606b4bf3eSWarner Losh  * 1. Redistributions of source code must retain the above copyright
706b4bf3eSWarner Losh  *    notice, this list of conditions and the following disclaimer.
806b4bf3eSWarner Losh  * 2. Redistributions in binary form must reproduce the above copyright
906b4bf3eSWarner Losh  *    notice, this list of conditions and the following disclaimer in the
1006b4bf3eSWarner Losh  *    documentation and/or other materials provided with the distribution.
1169a28758SEd Maste  * 3. Neither the name of the University nor the names of its contributors
1206b4bf3eSWarner Losh  *    may be used to endorse or promote products derived from this software
1306b4bf3eSWarner Losh  *    without specific prior written permission.
1406b4bf3eSWarner Losh  *
1506b4bf3eSWarner Losh  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1606b4bf3eSWarner Losh  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1706b4bf3eSWarner Losh  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1806b4bf3eSWarner Losh  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
1906b4bf3eSWarner Losh  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2006b4bf3eSWarner Losh  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
2106b4bf3eSWarner Losh  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2206b4bf3eSWarner Losh  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
2306b4bf3eSWarner Losh  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2406b4bf3eSWarner Losh  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2506b4bf3eSWarner Losh  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2606b4bf3eSWarner Losh  */
277090df5aSMatthew Dillon /*
287090df5aSMatthew Dillon  * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
297090df5aSMatthew Dillon  *
307090df5aSMatthew Dillon  *	This module implements a general bitmap allocator/deallocator.  The
317090df5aSMatthew Dillon  *	allocator eats around 2 bits per 'block'.  The module does not
32f565f395SEitan Adler  *	try to interpret the meaning of a 'block' other than to return
337090df5aSMatthew Dillon  *	SWAPBLK_NONE on an allocation failure.
347090df5aSMatthew Dillon  *
35ec371b57SAlan Cox  *	A radix tree controls access to pieces of the bitmap, and includes
36ec371b57SAlan Cox  *	auxiliary information at each interior node about the availabilty of
37ec371b57SAlan Cox  *	contiguous free blocks in the subtree rooted at that node.  Two radix
38ec371b57SAlan Cox  *	constants are involved: one for the size of the bitmaps contained in the
39ec371b57SAlan Cox  *	leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
40ec371b57SAlan Cox  *	each of the meta (interior) nodes (BLIST_META_RADIX).  Each subtree is
41ec371b57SAlan Cox  *	associated with a range of blocks.  The root of any subtree stores a
42ec371b57SAlan Cox  *	hint field that defines an upper bound on the size of the largest
43ec371b57SAlan Cox  *	allocation that can begin in the associated block range.  A hint is an
44ec371b57SAlan Cox  *	upper bound on a potential allocation, but not necessarily a tight upper
45ec371b57SAlan Cox  *	bound.
467090df5aSMatthew Dillon  *
477090df5aSMatthew Dillon  *	The radix tree also implements two collapsed states for meta nodes:
487090df5aSMatthew Dillon  *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
497090df5aSMatthew Dillon  *	in either of these two states, all information contained underneath
507090df5aSMatthew Dillon  *	the node is considered stale.  These states are used to optimize
517090df5aSMatthew Dillon  *	allocation and freeing operations.
527090df5aSMatthew Dillon  *
537090df5aSMatthew Dillon  * 	The hinting greatly increases code efficiency for allocations while
547090df5aSMatthew Dillon  *	the general radix structure optimizes both allocations and frees.  The
557090df5aSMatthew Dillon  *	radix tree should be able to operate well no matter how much
567090df5aSMatthew Dillon  *	fragmentation there is and no matter how large a bitmap is used.
577090df5aSMatthew Dillon  *
5851dc4feaSSergey Kandaurov  *	The blist code wires all necessary memory at creation time.  Neither
5951dc4feaSSergey Kandaurov  *	allocations nor frees require interaction with the memory subsystem.
6051dc4feaSSergey Kandaurov  *	The non-blocking features of the blist code are used in the swap code
6151dc4feaSSergey Kandaurov  *	(vm/swap_pager.c).
627090df5aSMatthew Dillon  *
63e3043798SPedro F. Giffuni  *	LAYOUT: The radix tree is laid out recursively using a
64e3043798SPedro F. Giffuni  *	linear array.  Each meta node is immediately followed (laid out
657090df5aSMatthew Dillon  *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
667090df5aSMatthew Dillon  *	is a recursive structure but one that can be easily scanned through
677090df5aSMatthew Dillon  *	a very simple 'skip' calculation.  In order to support large radixes,
687090df5aSMatthew Dillon  *	portions of the tree may reside outside our memory allocation.  We
697090df5aSMatthew Dillon  *	handle this with an early-termination optimization (when bighint is
707090df5aSMatthew Dillon  *	set to -1) on the scan.  The memory allocation is only large enough
717090df5aSMatthew Dillon  *	to cover the number of blocks requested at creation time even if it
727090df5aSMatthew Dillon  *	must be encompassed in larger root-node radix.
737090df5aSMatthew Dillon  *
74f565f395SEitan Adler  *	NOTE: the allocator cannot currently allocate more than
757090df5aSMatthew Dillon  *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
767090df5aSMatthew Dillon  *	large' if you try.  This is an area that could use improvement.  The
777090df5aSMatthew Dillon  *	radix is large enough that this restriction does not effect the swap
78b411efa4SAlan Cox  *	system, though.  Currently only the allocation code is affected by
797090df5aSMatthew Dillon  *	this algorithmic unfeature.  The freeing code can handle arbitrary
807090df5aSMatthew Dillon  *	ranges.
817090df5aSMatthew Dillon  *
827090df5aSMatthew Dillon  *	This code can be compiled stand-alone for debugging.
837090df5aSMatthew Dillon  */
847090df5aSMatthew Dillon 
85677b542eSDavid E. O'Brien #include <sys/cdefs.h>
86677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
87677b542eSDavid E. O'Brien 
88c4473420SPeter Wemm #ifdef _KERNEL
897090df5aSMatthew Dillon 
907090df5aSMatthew Dillon #include <sys/param.h>
917090df5aSMatthew Dillon #include <sys/systm.h>
927090df5aSMatthew Dillon #include <sys/lock.h>
937090df5aSMatthew Dillon #include <sys/kernel.h>
947090df5aSMatthew Dillon #include <sys/blist.h>
957090df5aSMatthew Dillon #include <sys/malloc.h>
96d027ed2eSAlan Cox #include <sys/sbuf.h>
970cddd8f0SMatthew Dillon #include <sys/proc.h>
9823955314SAlfred Perlstein #include <sys/mutex.h>
997090df5aSMatthew Dillon 
1007090df5aSMatthew Dillon #else
1017090df5aSMatthew Dillon 
1027090df5aSMatthew Dillon #ifndef BLIST_NO_DEBUG
1037090df5aSMatthew Dillon #define BLIST_DEBUG
1047090df5aSMatthew Dillon #endif
1057090df5aSMatthew Dillon 
1067090df5aSMatthew Dillon #include <sys/types.h>
107d90bf7d8SAlan Cox #include <sys/malloc.h>
108d027ed2eSAlan Cox #include <sys/sbuf.h>
1097090df5aSMatthew Dillon #include <stdio.h>
1107090df5aSMatthew Dillon #include <string.h>
111*8eefcd40SAlan Cox #include <stddef.h>
1127090df5aSMatthew Dillon #include <stdlib.h>
1137090df5aSMatthew Dillon #include <stdarg.h>
114d3783724SAlan Cox #include <stdbool.h>
1157090df5aSMatthew Dillon 
1164be4fd5dSAlan Cox #define	bitcount64(x)	__bitcount64((uint64_t)(x))
11792da00bbSMatthew Dillon #define malloc(a,b,c)	calloc(a, 1)
1187090df5aSMatthew Dillon #define free(a,b)	free(a)
119ec371b57SAlan Cox static __inline int imax(int a, int b) { return (a > b ? a : b); }
1207090df5aSMatthew Dillon 
1217090df5aSMatthew Dillon #include <sys/blist.h>
1227090df5aSMatthew Dillon 
1237090df5aSMatthew Dillon void panic(const char *ctl, ...);
1247090df5aSMatthew Dillon 
1257090df5aSMatthew Dillon #endif
1267090df5aSMatthew Dillon 
1277090df5aSMatthew Dillon /*
1287090df5aSMatthew Dillon  * static support functions
1297090df5aSMatthew Dillon  */
130ec371b57SAlan Cox static daddr_t	blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
131bee93d3cSAlan Cox static daddr_t	blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
132bee93d3cSAlan Cox 		    u_daddr_t radix);
1337090df5aSMatthew Dillon static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
1347090df5aSMatthew Dillon static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
135bee93d3cSAlan Cox 		    u_daddr_t radix);
1367090df5aSMatthew Dillon static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
1372ac0c7c3SAlan Cox 		    blist_t dest, daddr_t count);
138a7249a6cSAlan Cox static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
139a7249a6cSAlan Cox static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
140bee93d3cSAlan Cox 		    u_daddr_t radix);
141c4473420SPeter Wemm #ifndef _KERNEL
142d3783724SAlan Cox static void	blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
1432ac0c7c3SAlan Cox 		    int tab);
1447090df5aSMatthew Dillon #endif
1457090df5aSMatthew Dillon 
146c4473420SPeter Wemm #ifdef _KERNEL
1477090df5aSMatthew Dillon static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
1487090df5aSMatthew Dillon #endif
1497090df5aSMatthew Dillon 
150ec371b57SAlan Cox _Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
151ec371b57SAlan Cox     "radix divisibility error");
152ec371b57SAlan Cox #define	BLIST_BMAP_MASK	(BLIST_BMAP_RADIX - 1)
153d027ed2eSAlan Cox #define	BLIST_META_MASK	(BLIST_META_RADIX - 1)
154ba98e6a2SAlan Cox 
1557090df5aSMatthew Dillon /*
1562ac0c7c3SAlan Cox  * For a subtree that can represent the state of up to 'radix' blocks, the
1572ac0c7c3SAlan Cox  * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX.  If 'm'
1582ac0c7c3SAlan Cox  * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
1592ac0c7c3SAlan Cox  * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
1602ac0c7c3SAlan Cox  * or, equivalently, (m**(h+1)-1)/(m-1).  This quantity is called 'skip'
1612ac0c7c3SAlan Cox  * in the 'meta' functions that process subtrees.  Since integer division
1622ac0c7c3SAlan Cox  * discards remainders, we can express this computation as
1632ac0c7c3SAlan Cox  * skip = (m * m**h) / (m - 1)
164ba98e6a2SAlan Cox  * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
165ba98e6a2SAlan Cox  * and since m divides BLIST_BMAP_RADIX, we can simplify further to
166ba98e6a2SAlan Cox  * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
167ba98e6a2SAlan Cox  * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
168ba98e6a2SAlan Cox  * so that simple integer division by a constant can safely be used for the
169ba98e6a2SAlan Cox  * calculation.
1702ac0c7c3SAlan Cox  */
1712ac0c7c3SAlan Cox static inline daddr_t
1722ac0c7c3SAlan Cox radix_to_skip(daddr_t radix)
1732ac0c7c3SAlan Cox {
1742ac0c7c3SAlan Cox 
1752ac0c7c3SAlan Cox 	return (radix /
176d027ed2eSAlan Cox 	    ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
177d027ed2eSAlan Cox }
178d027ed2eSAlan Cox 
179d027ed2eSAlan Cox /*
180d027ed2eSAlan Cox  * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
181d027ed2eSAlan Cox  * Assumes that the argument has only one bit set.
182d027ed2eSAlan Cox  */
183d027ed2eSAlan Cox static inline int
184d027ed2eSAlan Cox bitpos(u_daddr_t mask)
185d027ed2eSAlan Cox {
186d027ed2eSAlan Cox 	int hi, lo, mid;
187d027ed2eSAlan Cox 
188d027ed2eSAlan Cox 	switch (sizeof(mask)) {
189d027ed2eSAlan Cox #ifdef HAVE_INLINE_FFSLL
190d027ed2eSAlan Cox 	case sizeof(long long):
191d027ed2eSAlan Cox 		return (ffsll(mask) - 1);
192d027ed2eSAlan Cox #endif
193d027ed2eSAlan Cox 	default:
194d027ed2eSAlan Cox 		lo = 0;
195d027ed2eSAlan Cox 		hi = BLIST_BMAP_RADIX;
196d027ed2eSAlan Cox 		while (lo + 1 < hi) {
197d027ed2eSAlan Cox 			mid = (lo + hi) >> 1;
198d027ed2eSAlan Cox 			if ((mask >> mid) != 0)
199d027ed2eSAlan Cox 				lo = mid;
200d027ed2eSAlan Cox 			else
201d027ed2eSAlan Cox 				hi = mid;
202d027ed2eSAlan Cox 		}
203d027ed2eSAlan Cox 		return (lo);
204d027ed2eSAlan Cox 	}
2052ac0c7c3SAlan Cox }
2062ac0c7c3SAlan Cox 
2072ac0c7c3SAlan Cox /*
2087090df5aSMatthew Dillon  * blist_create() - create a blist capable of handling up to the specified
2097090df5aSMatthew Dillon  *		    number of blocks
2107090df5aSMatthew Dillon  *
211f565f395SEitan Adler  *	blocks - must be greater than 0
212c8c7ad92SKip Macy  * 	flags  - malloc flags
2137090df5aSMatthew Dillon  *
2147090df5aSMatthew Dillon  *	The smallest blist consists of a single leaf node capable of
2157090df5aSMatthew Dillon  *	managing BLIST_BMAP_RADIX blocks.
2167090df5aSMatthew Dillon  */
2177090df5aSMatthew Dillon blist_t
218c8c7ad92SKip Macy blist_create(daddr_t blocks, int flags)
2197090df5aSMatthew Dillon {
2207090df5aSMatthew Dillon 	blist_t bl;
221*8eefcd40SAlan Cox 	daddr_t i, last_block;
222*8eefcd40SAlan Cox 	u_daddr_t nodes, radix, skip;
223*8eefcd40SAlan Cox 	int digit;
2247090df5aSMatthew Dillon 
2257090df5aSMatthew Dillon 	/*
226*8eefcd40SAlan Cox 	 * Calculate the radix and node count used for scanning.  Find the last
227*8eefcd40SAlan Cox 	 * block that is followed by a terminator.
2287090df5aSMatthew Dillon 	 */
229*8eefcd40SAlan Cox 	last_block = blocks - 1;
2307090df5aSMatthew Dillon 	radix = BLIST_BMAP_RADIX;
2317090df5aSMatthew Dillon 	while (radix < blocks) {
232*8eefcd40SAlan Cox 		if (((last_block / radix + 1) & BLIST_META_MASK) != 0)
233*8eefcd40SAlan Cox 			/*
234*8eefcd40SAlan Cox 			 * A terminator will be added.  Update last_block to the
235*8eefcd40SAlan Cox 			 * position just before that terminator.
236*8eefcd40SAlan Cox 			 */
237*8eefcd40SAlan Cox 			last_block |= radix - 1;
23857e6d29bSMatthew Dillon 		radix *= BLIST_META_RADIX;
2397090df5aSMatthew Dillon 	}
2407090df5aSMatthew Dillon 
241*8eefcd40SAlan Cox 	/*
242*8eefcd40SAlan Cox 	 * Count the meta-nodes in the expanded tree, including the final
243*8eefcd40SAlan Cox 	 * terminator, from the bottom level up to the root.
244*8eefcd40SAlan Cox 	 */
245*8eefcd40SAlan Cox 	nodes = (last_block >= blocks) ? 2 : 1;
246*8eefcd40SAlan Cox 	last_block /= BLIST_BMAP_RADIX;
247*8eefcd40SAlan Cox 	while (last_block > 0) {
248*8eefcd40SAlan Cox 		nodes += last_block + 1;
249*8eefcd40SAlan Cox 		last_block /= BLIST_META_RADIX;
250*8eefcd40SAlan Cox 	}
251*8eefcd40SAlan Cox 	bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags);
252015d7db6SAlan Cox 	if (bl == NULL)
253015d7db6SAlan Cox 		return (NULL);
2547090df5aSMatthew Dillon 
2557090df5aSMatthew Dillon 	bl->bl_blocks = blocks;
2567090df5aSMatthew Dillon 	bl->bl_radix = radix;
257d4e3484bSAlan Cox 	bl->bl_cursor = 0;
258*8eefcd40SAlan Cox 
259*8eefcd40SAlan Cox 	/*
260*8eefcd40SAlan Cox 	 * Initialize the empty tree by filling in root values, then initialize
261*8eefcd40SAlan Cox 	 * just the terminators in the rest of the tree.
262*8eefcd40SAlan Cox 	 */
263*8eefcd40SAlan Cox 	bl->bl_root[0].bm_bighint = 0;
264*8eefcd40SAlan Cox 	if (radix == BLIST_BMAP_RADIX)
265*8eefcd40SAlan Cox 		bl->bl_root[0].u.bmu_bitmap = 0;
266*8eefcd40SAlan Cox 	else
267*8eefcd40SAlan Cox 		bl->bl_root[0].u.bmu_avail = 0;
268*8eefcd40SAlan Cox 	last_block = blocks - 1;
269*8eefcd40SAlan Cox 	i = 0;
270*8eefcd40SAlan Cox 	while (radix > BLIST_BMAP_RADIX) {
271*8eefcd40SAlan Cox 		radix /= BLIST_META_RADIX;
272*8eefcd40SAlan Cox 		skip = radix_to_skip(radix);
273*8eefcd40SAlan Cox 		digit = last_block / radix;
274*8eefcd40SAlan Cox 		i += 1 + digit * skip;
275*8eefcd40SAlan Cox 		if (digit != BLIST_META_MASK) {
276*8eefcd40SAlan Cox 			/*
277*8eefcd40SAlan Cox 			 * Add a terminator.
278*8eefcd40SAlan Cox 			 */
279*8eefcd40SAlan Cox 			bl->bl_root[i + skip].bm_bighint = (daddr_t)-1;
280*8eefcd40SAlan Cox 			bl->bl_root[i + skip].u.bmu_bitmap = 0;
281015d7db6SAlan Cox 		}
282*8eefcd40SAlan Cox 		last_block %= radix;
283*8eefcd40SAlan Cox 	}
2847090df5aSMatthew Dillon 
2857090df5aSMatthew Dillon #if defined(BLIST_DEBUG)
2867090df5aSMatthew Dillon 	printf(
28792da00bbSMatthew Dillon 		"BLIST representing %lld blocks (%lld MB of swap)"
28892da00bbSMatthew Dillon 		", requiring %lldK of ram\n",
28992da00bbSMatthew Dillon 		(long long)bl->bl_blocks,
29092da00bbSMatthew Dillon 		(long long)bl->bl_blocks * 4 / 1024,
291015d7db6SAlan Cox 		(long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
2927090df5aSMatthew Dillon 	);
29392da00bbSMatthew Dillon 	printf("BLIST raw radix tree contains %lld records\n",
294015d7db6SAlan Cox 	    (long long)nodes);
2957090df5aSMatthew Dillon #endif
2967090df5aSMatthew Dillon 
2977090df5aSMatthew Dillon 	return (bl);
2987090df5aSMatthew Dillon }
2997090df5aSMatthew Dillon 
3007090df5aSMatthew Dillon void
3017090df5aSMatthew Dillon blist_destroy(blist_t bl)
3027090df5aSMatthew Dillon {
303*8eefcd40SAlan Cox 
3047090df5aSMatthew Dillon 	free(bl, M_SWAP);
3057090df5aSMatthew Dillon }
3067090df5aSMatthew Dillon 
3077090df5aSMatthew Dillon /*
3087090df5aSMatthew Dillon  * blist_alloc() -   reserve space in the block bitmap.  Return the base
3097090df5aSMatthew Dillon  *		     of a contiguous region or SWAPBLK_NONE if space could
3107090df5aSMatthew Dillon  *		     not be allocated.
3117090df5aSMatthew Dillon  */
3127090df5aSMatthew Dillon daddr_t
3137090df5aSMatthew Dillon blist_alloc(blist_t bl, daddr_t count)
3147090df5aSMatthew Dillon {
3154be4fd5dSAlan Cox 	daddr_t blk;
3167090df5aSMatthew Dillon 
317d4e3484bSAlan Cox 	/*
318d4e3484bSAlan Cox 	 * This loop iterates at most twice.  An allocation failure in the
319d4e3484bSAlan Cox 	 * first iteration leads to a second iteration only if the cursor was
320d4e3484bSAlan Cox 	 * non-zero.  When the cursor is zero, an allocation failure will
321d4e3484bSAlan Cox 	 * reduce the hint, stopping further iterations.
322d4e3484bSAlan Cox 	 */
323d4e3484bSAlan Cox 	while (count <= bl->bl_root->bm_bighint) {
324bee93d3cSAlan Cox 		blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
325bee93d3cSAlan Cox 		    bl->bl_radix);
326d4e3484bSAlan Cox 		if (blk != SWAPBLK_NONE) {
327d4e3484bSAlan Cox 			bl->bl_cursor = blk + count;
328a0ae476bSAlan Cox 			if (bl->bl_cursor == bl->bl_blocks)
329a0ae476bSAlan Cox 				bl->bl_cursor = 0;
3307090df5aSMatthew Dillon 			return (blk);
331d4e3484bSAlan Cox 		} else if (bl->bl_cursor != 0)
332d4e3484bSAlan Cox 			bl->bl_cursor = 0;
3337090df5aSMatthew Dillon 	}
3344be4fd5dSAlan Cox 	return (SWAPBLK_NONE);
3354be4fd5dSAlan Cox }
3364be4fd5dSAlan Cox 
3374be4fd5dSAlan Cox /*
3384be4fd5dSAlan Cox  * blist_avail() -	return the number of free blocks.
3394be4fd5dSAlan Cox  */
3404be4fd5dSAlan Cox daddr_t
3414be4fd5dSAlan Cox blist_avail(blist_t bl)
3424be4fd5dSAlan Cox {
3434be4fd5dSAlan Cox 
3444be4fd5dSAlan Cox 	if (bl->bl_radix == BLIST_BMAP_RADIX)
3454be4fd5dSAlan Cox 		return (bitcount64(bl->bl_root->u.bmu_bitmap));
3464be4fd5dSAlan Cox 	else
3474be4fd5dSAlan Cox 		return (bl->bl_root->u.bmu_avail);
3484be4fd5dSAlan Cox }
3497090df5aSMatthew Dillon 
3507090df5aSMatthew Dillon /*
3517090df5aSMatthew Dillon  * blist_free() -	free up space in the block bitmap.  Return the base
3527090df5aSMatthew Dillon  *		     	of a contiguous region.  Panic if an inconsistancy is
3537090df5aSMatthew Dillon  *			found.
3547090df5aSMatthew Dillon  */
3557090df5aSMatthew Dillon void
3567090df5aSMatthew Dillon blist_free(blist_t bl, daddr_t blkno, daddr_t count)
3577090df5aSMatthew Dillon {
358a396c83aSAlan Cox 
359bee93d3cSAlan Cox 	blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
3607090df5aSMatthew Dillon }
3617090df5aSMatthew Dillon 
3627090df5aSMatthew Dillon /*
36392da00bbSMatthew Dillon  * blist_fill() -	mark a region in the block bitmap as off-limits
36492da00bbSMatthew Dillon  *			to the allocator (i.e. allocate it), ignoring any
36592da00bbSMatthew Dillon  *			existing allocations.  Return the number of blocks
36692da00bbSMatthew Dillon  *			actually filled that were free before the call.
36792da00bbSMatthew Dillon  */
368a7249a6cSAlan Cox daddr_t
36992da00bbSMatthew Dillon blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
37092da00bbSMatthew Dillon {
37192da00bbSMatthew Dillon 
372bee93d3cSAlan Cox 	return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix));
37392da00bbSMatthew Dillon }
37492da00bbSMatthew Dillon 
37592da00bbSMatthew Dillon /*
3767090df5aSMatthew Dillon  * blist_resize() -	resize an existing radix tree to handle the
3777090df5aSMatthew Dillon  *			specified number of blocks.  This will reallocate
3787090df5aSMatthew Dillon  *			the tree and transfer the previous bitmap to the new
3797090df5aSMatthew Dillon  *			one.  When extending the tree you can specify whether
3807090df5aSMatthew Dillon  *			the new blocks are to left allocated or freed.
3817090df5aSMatthew Dillon  */
3827090df5aSMatthew Dillon void
383c8c7ad92SKip Macy blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
3847090df5aSMatthew Dillon {
385c8c7ad92SKip Macy     blist_t newbl = blist_create(count, flags);
3867090df5aSMatthew Dillon     blist_t save = *pbl;
3877090df5aSMatthew Dillon 
3887090df5aSMatthew Dillon     *pbl = newbl;
3897090df5aSMatthew Dillon     if (count > save->bl_blocks)
3907090df5aSMatthew Dillon 	    count = save->bl_blocks;
3912ac0c7c3SAlan Cox     blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
3927090df5aSMatthew Dillon 
3937090df5aSMatthew Dillon     /*
3947090df5aSMatthew Dillon      * If resizing upwards, should we free the new space or not?
3957090df5aSMatthew Dillon      */
3967090df5aSMatthew Dillon     if (freenew && count < newbl->bl_blocks) {
3977090df5aSMatthew Dillon 	    blist_free(newbl, count, newbl->bl_blocks - count);
3987090df5aSMatthew Dillon     }
3997090df5aSMatthew Dillon     blist_destroy(save);
4007090df5aSMatthew Dillon }
4017090df5aSMatthew Dillon 
4027090df5aSMatthew Dillon #ifdef BLIST_DEBUG
4037090df5aSMatthew Dillon 
4047090df5aSMatthew Dillon /*
4057090df5aSMatthew Dillon  * blist_print()    - dump radix tree
4067090df5aSMatthew Dillon  */
4077090df5aSMatthew Dillon void
4087090df5aSMatthew Dillon blist_print(blist_t bl)
4097090df5aSMatthew Dillon {
4102ac0c7c3SAlan Cox 	printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor);
4112ac0c7c3SAlan Cox 	blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
4127090df5aSMatthew Dillon 	printf("}\n");
4137090df5aSMatthew Dillon }
4147090df5aSMatthew Dillon 
4157090df5aSMatthew Dillon #endif
4167090df5aSMatthew Dillon 
417d027ed2eSAlan Cox static const u_daddr_t fib[] = {
418d027ed2eSAlan Cox 	1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
419d027ed2eSAlan Cox 	4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
420d027ed2eSAlan Cox 	514229, 832040, 1346269, 2178309, 3524578,
421d027ed2eSAlan Cox };
422d027ed2eSAlan Cox 
423d027ed2eSAlan Cox /*
424d027ed2eSAlan Cox  * Use 'gap' to describe a maximal range of unallocated blocks/bits.
425d027ed2eSAlan Cox  */
426d027ed2eSAlan Cox struct gap_stats {
427d027ed2eSAlan Cox 	daddr_t	start;		/* current gap start, or SWAPBLK_NONE */
428d027ed2eSAlan Cox 	daddr_t	num;		/* number of gaps observed */
429d027ed2eSAlan Cox 	daddr_t	max;		/* largest gap size */
430d027ed2eSAlan Cox 	daddr_t	avg;		/* average gap size */
431d027ed2eSAlan Cox 	daddr_t	err;		/* sum - num * avg */
432d027ed2eSAlan Cox 	daddr_t	histo[nitems(fib)]; /* # gaps in each size range */
433d027ed2eSAlan Cox 	int	max_bucket;	/* last histo elt with nonzero val */
434d027ed2eSAlan Cox };
435d027ed2eSAlan Cox 
436d027ed2eSAlan Cox /*
437d027ed2eSAlan Cox  * gap_stats_counting()    - is the state 'counting 1 bits'?
438d027ed2eSAlan Cox  *                           or 'skipping 0 bits'?
439d027ed2eSAlan Cox  */
440d027ed2eSAlan Cox static inline bool
441d027ed2eSAlan Cox gap_stats_counting(const struct gap_stats *stats)
442d027ed2eSAlan Cox {
443d027ed2eSAlan Cox 
444d027ed2eSAlan Cox 	return (stats->start != SWAPBLK_NONE);
445d027ed2eSAlan Cox }
446d027ed2eSAlan Cox 
447d027ed2eSAlan Cox /*
448d027ed2eSAlan Cox  * init_gap_stats()    - initialize stats on gap sizes
449d027ed2eSAlan Cox  */
450d027ed2eSAlan Cox static inline void
451d027ed2eSAlan Cox init_gap_stats(struct gap_stats *stats)
452d027ed2eSAlan Cox {
453d027ed2eSAlan Cox 
454d027ed2eSAlan Cox 	bzero(stats, sizeof(*stats));
455d027ed2eSAlan Cox 	stats->start = SWAPBLK_NONE;
456d027ed2eSAlan Cox }
457d027ed2eSAlan Cox 
458d027ed2eSAlan Cox /*
459d027ed2eSAlan Cox  * update_gap_stats()    - update stats on gap sizes
460d027ed2eSAlan Cox  */
461d027ed2eSAlan Cox static void
462d027ed2eSAlan Cox update_gap_stats(struct gap_stats *stats, daddr_t posn)
463d027ed2eSAlan Cox {
464d027ed2eSAlan Cox 	daddr_t size;
465d027ed2eSAlan Cox 	int hi, lo, mid;
466d027ed2eSAlan Cox 
467d027ed2eSAlan Cox 	if (!gap_stats_counting(stats)) {
468d027ed2eSAlan Cox 		stats->start = posn;
469d027ed2eSAlan Cox 		return;
470d027ed2eSAlan Cox 	}
471d027ed2eSAlan Cox 	size = posn - stats->start;
472d027ed2eSAlan Cox 	stats->start = SWAPBLK_NONE;
473d027ed2eSAlan Cox 	if (size > stats->max)
474d027ed2eSAlan Cox 		stats->max = size;
475d027ed2eSAlan Cox 
476d027ed2eSAlan Cox 	/*
477d027ed2eSAlan Cox 	 * Find the fibonacci range that contains size,
478d027ed2eSAlan Cox 	 * expecting to find it in an early range.
479d027ed2eSAlan Cox 	 */
480d027ed2eSAlan Cox 	lo = 0;
481d027ed2eSAlan Cox 	hi = 1;
482d027ed2eSAlan Cox 	while (hi < nitems(fib) && fib[hi] <= size) {
483d027ed2eSAlan Cox 		lo = hi;
484d027ed2eSAlan Cox 		hi *= 2;
485d027ed2eSAlan Cox 	}
486d027ed2eSAlan Cox 	if (hi >= nitems(fib))
487d027ed2eSAlan Cox 		hi = nitems(fib);
488d027ed2eSAlan Cox 	while (lo + 1 != hi) {
489d027ed2eSAlan Cox 		mid = (lo + hi) >> 1;
490d027ed2eSAlan Cox 		if (fib[mid] <= size)
491d027ed2eSAlan Cox 			lo = mid;
492d027ed2eSAlan Cox 		else
493d027ed2eSAlan Cox 			hi = mid;
494d027ed2eSAlan Cox 	}
495d027ed2eSAlan Cox 	stats->histo[lo]++;
496d027ed2eSAlan Cox 	if (lo > stats->max_bucket)
497d027ed2eSAlan Cox 		stats->max_bucket = lo;
498d027ed2eSAlan Cox 	stats->err += size - stats->avg;
499d027ed2eSAlan Cox 	stats->num++;
500d027ed2eSAlan Cox 	stats->avg += stats->err / stats->num;
501d027ed2eSAlan Cox 	stats->err %= stats->num;
502d027ed2eSAlan Cox }
503d027ed2eSAlan Cox 
504d027ed2eSAlan Cox /*
505d027ed2eSAlan Cox  * dump_gap_stats()    - print stats on gap sizes
506d027ed2eSAlan Cox  */
507d027ed2eSAlan Cox static inline void
508d027ed2eSAlan Cox dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
509d027ed2eSAlan Cox {
510d027ed2eSAlan Cox 	int i;
511d027ed2eSAlan Cox 
512d027ed2eSAlan Cox 	sbuf_printf(s, "number of maximal free ranges: %jd\n",
513d027ed2eSAlan Cox 	    (intmax_t)stats->num);
514d027ed2eSAlan Cox 	sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
515d027ed2eSAlan Cox 	sbuf_printf(s, "average maximal free range size: %jd\n",
516d027ed2eSAlan Cox 	    (intmax_t)stats->avg);
517d027ed2eSAlan Cox 	sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
518d027ed2eSAlan Cox 	sbuf_printf(s, "               count  |  size range\n");
519d027ed2eSAlan Cox 	sbuf_printf(s, "               -----  |  ----------\n");
520d027ed2eSAlan Cox 	for (i = 0; i < stats->max_bucket; i++) {
521d027ed2eSAlan Cox 		if (stats->histo[i] != 0) {
522d027ed2eSAlan Cox 			sbuf_printf(s, "%20jd  |  ",
523d027ed2eSAlan Cox 			    (intmax_t)stats->histo[i]);
524d027ed2eSAlan Cox 			if (fib[i] != fib[i + 1] - 1)
525d027ed2eSAlan Cox 				sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
526d027ed2eSAlan Cox 				    (intmax_t)fib[i + 1] - 1);
527d027ed2eSAlan Cox 			else
528d027ed2eSAlan Cox 				sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
529d027ed2eSAlan Cox 		}
530d027ed2eSAlan Cox 	}
531d027ed2eSAlan Cox 	sbuf_printf(s, "%20jd  |  ", (intmax_t)stats->histo[i]);
532d027ed2eSAlan Cox 	if (stats->histo[i] > 1)
533d027ed2eSAlan Cox 		sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
534d027ed2eSAlan Cox 		    (intmax_t)stats->max);
535d027ed2eSAlan Cox 	else
536d027ed2eSAlan Cox 		sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
537d027ed2eSAlan Cox }
538d027ed2eSAlan Cox 
539d027ed2eSAlan Cox /*
540d027ed2eSAlan Cox  * blist_stats()    - dump radix tree stats
541d027ed2eSAlan Cox  */
542d027ed2eSAlan Cox void
543d027ed2eSAlan Cox blist_stats(blist_t bl, struct sbuf *s)
544d027ed2eSAlan Cox {
545d027ed2eSAlan Cox 	struct gap_stats gstats;
546d027ed2eSAlan Cox 	struct gap_stats *stats = &gstats;
547d027ed2eSAlan Cox 	daddr_t i, nodes, radix;
548d027ed2eSAlan Cox 	u_daddr_t bit, diff, mask;
549d027ed2eSAlan Cox 
550d027ed2eSAlan Cox 	init_gap_stats(stats);
551d027ed2eSAlan Cox 	nodes = 0;
552d027ed2eSAlan Cox 	i = bl->bl_radix;
553d027ed2eSAlan Cox 	while (i < bl->bl_radix + bl->bl_blocks) {
554d027ed2eSAlan Cox 		/*
555d027ed2eSAlan Cox 		 * Find max size subtree starting at i.
556d027ed2eSAlan Cox 		 */
557d027ed2eSAlan Cox 		radix = BLIST_BMAP_RADIX;
558d027ed2eSAlan Cox 		while (((i / radix) & BLIST_META_MASK) == 0)
559d027ed2eSAlan Cox 			radix *= BLIST_META_RADIX;
560d027ed2eSAlan Cox 
561d027ed2eSAlan Cox 		/*
562d027ed2eSAlan Cox 		 * Check for skippable subtrees starting at i.
563d027ed2eSAlan Cox 		 */
564d027ed2eSAlan Cox 		while (radix > BLIST_BMAP_RADIX) {
565d027ed2eSAlan Cox 			if (bl->bl_root[nodes].u.bmu_avail == 0) {
566d027ed2eSAlan Cox 				if (gap_stats_counting(stats))
567d027ed2eSAlan Cox 					update_gap_stats(stats, i);
568d027ed2eSAlan Cox 				break;
569d027ed2eSAlan Cox 			}
570d027ed2eSAlan Cox 			if (bl->bl_root[nodes].u.bmu_avail == radix) {
571d027ed2eSAlan Cox 				if (!gap_stats_counting(stats))
572d027ed2eSAlan Cox 					update_gap_stats(stats, i);
573d027ed2eSAlan Cox 				break;
574d027ed2eSAlan Cox 			}
575d027ed2eSAlan Cox 
576d027ed2eSAlan Cox 			/*
577d027ed2eSAlan Cox 			 * Skip subtree root.
578d027ed2eSAlan Cox 			 */
579d027ed2eSAlan Cox 			nodes++;
580d027ed2eSAlan Cox 			radix /= BLIST_META_RADIX;
581d027ed2eSAlan Cox 		}
582d027ed2eSAlan Cox 		if (radix == BLIST_BMAP_RADIX) {
583d027ed2eSAlan Cox 			/*
584d027ed2eSAlan Cox 			 * Scan leaf.
585d027ed2eSAlan Cox 			 */
586d027ed2eSAlan Cox 			mask = bl->bl_root[nodes].u.bmu_bitmap;
587d027ed2eSAlan Cox 			diff = mask ^ (mask << 1);
588d027ed2eSAlan Cox 			if (gap_stats_counting(stats))
589d027ed2eSAlan Cox 				diff ^= 1;
590d027ed2eSAlan Cox 			while (diff != 0) {
591d027ed2eSAlan Cox 				bit = diff & -diff;
592d027ed2eSAlan Cox 				update_gap_stats(stats, i + bitpos(bit));
593d027ed2eSAlan Cox 				diff ^= bit;
594d027ed2eSAlan Cox 			}
595d027ed2eSAlan Cox 		}
596d027ed2eSAlan Cox 		nodes += radix_to_skip(radix);
597d027ed2eSAlan Cox 		i += radix;
598d027ed2eSAlan Cox 	}
599d027ed2eSAlan Cox 	update_gap_stats(stats, i);
600d027ed2eSAlan Cox 	dump_gap_stats(stats, s);
601d027ed2eSAlan Cox }
602d027ed2eSAlan Cox 
6037090df5aSMatthew Dillon /************************************************************************
6047090df5aSMatthew Dillon  *			  ALLOCATION SUPPORT FUNCTIONS			*
6057090df5aSMatthew Dillon  ************************************************************************
6067090df5aSMatthew Dillon  *
6077090df5aSMatthew Dillon  *	These support functions do all the actual work.  They may seem
6087090df5aSMatthew Dillon  *	rather longish, but that's because I've commented them up.  The
6097090df5aSMatthew Dillon  *	actual code is straight forward.
6107090df5aSMatthew Dillon  *
6117090df5aSMatthew Dillon  */
6127090df5aSMatthew Dillon 
6137090df5aSMatthew Dillon /*
6147090df5aSMatthew Dillon  * blist_leaf_alloc() -	allocate at a leaf in the radix tree (a bitmap).
6157090df5aSMatthew Dillon  *
61654541421SAlan Cox  *	This is the core of the allocator and is optimized for the
61754541421SAlan Cox  *	BLIST_BMAP_RADIX block allocation case.  Otherwise, execution
618d027ed2eSAlan Cox  *	time is proportional to log2(count) + bitpos time.
6197090df5aSMatthew Dillon  */
6207090df5aSMatthew Dillon static daddr_t
621ec371b57SAlan Cox blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
62254541421SAlan Cox {
62354541421SAlan Cox 	u_daddr_t mask;
624ec371b57SAlan Cox 	int count1, hi, lo, num_shifts, range1, range_ext;
6257090df5aSMatthew Dillon 
62654541421SAlan Cox 	range1 = 0;
62754541421SAlan Cox 	count1 = count - 1;
62854541421SAlan Cox 	num_shifts = fls(count1);
62954541421SAlan Cox 	mask = scan->u.bmu_bitmap;
630ec371b57SAlan Cox 	while ((-mask & ~mask) != 0 && num_shifts > 0) {
63154541421SAlan Cox 		/*
63254541421SAlan Cox 		 * If bit i is set in mask, then bits in [i, i+range1] are set
63354541421SAlan Cox 		 * in scan->u.bmu_bitmap.  The value of range1 is equal to
63454541421SAlan Cox 		 * count1 >> num_shifts.  Grow range and reduce num_shifts to 0,
63554541421SAlan Cox 		 * while preserving these invariants.  The updates to mask leave
63654541421SAlan Cox 		 * fewer bits set, but each bit that remains set represents a
63754541421SAlan Cox 		 * longer string of consecutive bits set in scan->u.bmu_bitmap.
638ec371b57SAlan Cox 		 * If more updates to mask cannot clear more bits, because mask
639ec371b57SAlan Cox 		 * is partitioned with all 0 bits preceding all 1 bits, the loop
640ec371b57SAlan Cox 		 * terminates immediately.
64154541421SAlan Cox 		 */
64254541421SAlan Cox 		num_shifts--;
64354541421SAlan Cox 		range_ext = range1 + ((count1 >> num_shifts) & 1);
644ec371b57SAlan Cox 		/*
645ec371b57SAlan Cox 		 * mask is a signed quantity for the shift because when it is
646ec371b57SAlan Cox 		 * shifted right, the sign bit should copied; when the last
647ec371b57SAlan Cox 		 * block of the leaf is free, pretend, for a while, that all the
648ec371b57SAlan Cox 		 * blocks that follow it are also free.
649ec371b57SAlan Cox 		 */
650ec371b57SAlan Cox 		mask &= (daddr_t)mask >> range_ext;
65154541421SAlan Cox 		range1 += range_ext;
65254541421SAlan Cox 	}
65354541421SAlan Cox 	if (mask == 0) {
65454541421SAlan Cox 		/*
65554541421SAlan Cox 		 * Update bighint.  There is no allocation bigger than range1
656ec371b57SAlan Cox 		 * starting in this leaf.
65754541421SAlan Cox 		 */
65854541421SAlan Cox 		scan->bm_bighint = range1;
6597090df5aSMatthew Dillon 		return (SWAPBLK_NONE);
6607090df5aSMatthew Dillon 	}
66154541421SAlan Cox 
662ec371b57SAlan Cox 	/* Discard any candidates that appear before blk. */
663ec371b57SAlan Cox 	mask &= (u_daddr_t)-1 << (blk & BLIST_BMAP_MASK);
66454541421SAlan Cox 	if (mask == 0)
66554541421SAlan Cox 		return (SWAPBLK_NONE);
6667090df5aSMatthew Dillon 
6677090df5aSMatthew Dillon 	/*
66854541421SAlan Cox 	 * The least significant set bit in mask marks the start of the first
66954541421SAlan Cox 	 * available range of sufficient size.  Clear all the bits but that one,
670d027ed2eSAlan Cox 	 * and then find its position.
6717090df5aSMatthew Dillon 	 */
67254541421SAlan Cox 	mask &= -mask;
673d027ed2eSAlan Cox 	lo = bitpos(mask);
6747090df5aSMatthew Dillon 
675ec371b57SAlan Cox 	hi = lo + count;
676ec371b57SAlan Cox 	if (hi > BLIST_BMAP_RADIX) {
67754541421SAlan Cox 		/*
678ec371b57SAlan Cox 		 * An allocation within this leaf is impossible, so a successful
679ec371b57SAlan Cox 		 * allocation depends on the next leaf providing some of the blocks.
68054541421SAlan Cox 		 */
681ec371b57SAlan Cox 		if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) {
682ec371b57SAlan Cox 			/*
683ec371b57SAlan Cox 			 * The next leaf has a different meta-node parent, so it
684ec371b57SAlan Cox 			 * is not necessarily initialized.  Update bighint,
685ec371b57SAlan Cox 			 * comparing the range found at the end of mask to the
686ec371b57SAlan Cox 			 * largest earlier range that could have been made to
687ec371b57SAlan Cox 			 * vanish in the initial processing of mask.
688ec371b57SAlan Cox 			 */
689ec371b57SAlan Cox 			scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1);
690ec371b57SAlan Cox 			return (SWAPBLK_NONE);
691ec371b57SAlan Cox 		}
692ec371b57SAlan Cox 		hi -= BLIST_BMAP_RADIX;
693ec371b57SAlan Cox 		if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) {
694ec371b57SAlan Cox 			/*
695ec371b57SAlan Cox 			 * The next leaf doesn't have enough free blocks at the
696ec371b57SAlan Cox 			 * beginning to complete the spanning allocation.  The
697ec371b57SAlan Cox 			 * hint cannot be updated, because the same allocation
698ec371b57SAlan Cox 			 * request could be satisfied later, by this leaf, if
699ec371b57SAlan Cox 			 * the state of the next leaf changes, and without any
700ec371b57SAlan Cox 			 * changes to this leaf.
701ec371b57SAlan Cox 			 */
702ec371b57SAlan Cox 			return (SWAPBLK_NONE);
703ec371b57SAlan Cox 		}
704ec371b57SAlan Cox 		/* Clear the first 'hi' bits in the next leaf, allocating them. */
705ec371b57SAlan Cox 		scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi;
706ec371b57SAlan Cox 		hi = BLIST_BMAP_RADIX;
707ec371b57SAlan Cox 	}
708ec371b57SAlan Cox 
709ec371b57SAlan Cox 	/* Set the bits of mask at position 'lo' and higher. */
710ec371b57SAlan Cox 	mask = -mask;
711ec371b57SAlan Cox 	if (hi == BLIST_BMAP_RADIX) {
712ec371b57SAlan Cox 		/*
713ec371b57SAlan Cox 		 * Update bighint.  There is no allocation bigger than range1
714ec371b57SAlan Cox 		 * available in this leaf after this allocation completes.
715ec371b57SAlan Cox 		 */
716ec371b57SAlan Cox 		scan->bm_bighint = range1;
717ec371b57SAlan Cox 	} else {
718ec371b57SAlan Cox 		/* Clear the bits of mask at position 'hi' and higher. */
719ec371b57SAlan Cox 		mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
720ec371b57SAlan Cox 		/* If this allocation uses all the bits, clear the hint. */
721ec371b57SAlan Cox 		if (mask == scan->u.bmu_bitmap)
722ec371b57SAlan Cox 			scan->bm_bighint = 0;
723ec371b57SAlan Cox 	}
724ec371b57SAlan Cox 	/* Clear the allocated bits from this leaf. */
7257090df5aSMatthew Dillon 	scan->u.bmu_bitmap &= ~mask;
726ec371b57SAlan Cox 	return ((blk & ~BLIST_BMAP_MASK) + lo);
7277090df5aSMatthew Dillon }
7287090df5aSMatthew Dillon 
7297090df5aSMatthew Dillon /*
7307090df5aSMatthew Dillon  * blist_meta_alloc() -	allocate at a meta in the radix tree.
7317090df5aSMatthew Dillon  *
7327090df5aSMatthew Dillon  *	Attempt to allocate at a meta node.  If we can't, we update
7337090df5aSMatthew Dillon  *	bighint and return a failure.  Updating bighint optimize future
7347090df5aSMatthew Dillon  *	calls that hit this node.  We have to check for our collapse cases
7357090df5aSMatthew Dillon  *	and we have a few optimizations strewn in as well.
7367090df5aSMatthew Dillon  */
7377090df5aSMatthew Dillon static daddr_t
738bee93d3cSAlan Cox blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
739d4e3484bSAlan Cox {
740bee93d3cSAlan Cox 	daddr_t blk, i, next_skip, r, skip;
741d4e3484bSAlan Cox 	int child;
742d4e3484bSAlan Cox 	bool scan_from_start;
7437090df5aSMatthew Dillon 
744a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
745ec371b57SAlan Cox 		return (blst_leaf_alloc(scan, cursor, count));
7464be4fd5dSAlan Cox 	if (scan->u.bmu_avail < count) {
7477090df5aSMatthew Dillon 		/*
7484be4fd5dSAlan Cox 		 * The meta node's hint must be too large if the allocation
7494be4fd5dSAlan Cox 		 * exceeds the number of free blocks.  Reduce the hint, and
7504be4fd5dSAlan Cox 		 * return failure.
7517090df5aSMatthew Dillon 		 */
7524be4fd5dSAlan Cox 		scan->bm_bighint = scan->u.bmu_avail;
7537090df5aSMatthew Dillon 		return (SWAPBLK_NONE);
7547090df5aSMatthew Dillon 	}
755ec371b57SAlan Cox 	blk = cursor & -radix;
7562ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
757d4e3484bSAlan Cox 	next_skip = skip / BLIST_META_RADIX;
7587090df5aSMatthew Dillon 
7594be4fd5dSAlan Cox 	/*
7604be4fd5dSAlan Cox 	 * An ALL-FREE meta node requires special handling before allocating
7614be4fd5dSAlan Cox 	 * any of its blocks.
7624be4fd5dSAlan Cox 	 */
7637090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
76457e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
7657090df5aSMatthew Dillon 
7667090df5aSMatthew Dillon 		/*
7674be4fd5dSAlan Cox 		 * Reinitialize each of the meta node's children.  An ALL-FREE
7684be4fd5dSAlan Cox 		 * meta node cannot have a terminator in any subtree.
7697090df5aSMatthew Dillon 		 */
7702ac0c7c3SAlan Cox 		for (i = 1; i < skip; i += next_skip) {
771d4e3484bSAlan Cox 			if (next_skip == 1)
7727090df5aSMatthew Dillon 				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
773d4e3484bSAlan Cox 			else
7747090df5aSMatthew Dillon 				scan[i].u.bmu_avail = radix;
775d4e3484bSAlan Cox 			scan[i].bm_bighint = radix;
7767090df5aSMatthew Dillon 		}
7777090df5aSMatthew Dillon 	} else {
77857e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
7797090df5aSMatthew Dillon 	}
7807090df5aSMatthew Dillon 
7814be4fd5dSAlan Cox 	if (count > radix) {
7824be4fd5dSAlan Cox 		/*
7834be4fd5dSAlan Cox 		 * The allocation exceeds the number of blocks that are
7844be4fd5dSAlan Cox 		 * managed by a subtree of this meta node.
7854be4fd5dSAlan Cox 		 */
7864be4fd5dSAlan Cox 		panic("allocation too large");
7874be4fd5dSAlan Cox 	}
788d4e3484bSAlan Cox 	scan_from_start = cursor == blk;
789d4e3484bSAlan Cox 	child = (cursor - blk) / radix;
790d4e3484bSAlan Cox 	blk += child * radix;
7912ac0c7c3SAlan Cox 	for (i = 1 + child * next_skip; i < skip; i += next_skip) {
7927090df5aSMatthew Dillon 		if (count <= scan[i].bm_bighint) {
7937090df5aSMatthew Dillon 			/*
794ec371b57SAlan Cox 			 * The allocation might fit beginning in the i'th subtree.
7957090df5aSMatthew Dillon 			 */
796bee93d3cSAlan Cox 			r = blst_meta_alloc(&scan[i],
797bee93d3cSAlan Cox 			    cursor > blk ? cursor : blk, count, radix);
7987090df5aSMatthew Dillon 			if (r != SWAPBLK_NONE) {
7997090df5aSMatthew Dillon 				scan->u.bmu_avail -= count;
8007090df5aSMatthew Dillon 				return (r);
8017090df5aSMatthew Dillon 			}
8027090df5aSMatthew Dillon 		} else if (scan[i].bm_bighint == (daddr_t)-1) {
8037090df5aSMatthew Dillon 			/*
8047090df5aSMatthew Dillon 			 * Terminator
8057090df5aSMatthew Dillon 			 */
8067090df5aSMatthew Dillon 			break;
8077090df5aSMatthew Dillon 		}
8087090df5aSMatthew Dillon 		blk += radix;
8097090df5aSMatthew Dillon 	}
8107090df5aSMatthew Dillon 
8117090df5aSMatthew Dillon 	/*
8127090df5aSMatthew Dillon 	 * We couldn't allocate count in this subtree, update bighint.
8137090df5aSMatthew Dillon 	 */
814d4e3484bSAlan Cox 	if (scan_from_start && scan->bm_bighint >= count)
8157090df5aSMatthew Dillon 		scan->bm_bighint = count - 1;
816d4e3484bSAlan Cox 
8177090df5aSMatthew Dillon 	return (SWAPBLK_NONE);
8187090df5aSMatthew Dillon }
8197090df5aSMatthew Dillon 
8207090df5aSMatthew Dillon /*
8217090df5aSMatthew Dillon  * BLST_LEAF_FREE() -	free allocated block from leaf bitmap
8227090df5aSMatthew Dillon  *
8237090df5aSMatthew Dillon  */
8247090df5aSMatthew Dillon static void
825b411efa4SAlan Cox blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
826b411efa4SAlan Cox {
827ec371b57SAlan Cox 	u_daddr_t mask;
828ec371b57SAlan Cox 	int n;
829ec371b57SAlan Cox 
8307090df5aSMatthew Dillon 	/*
8317090df5aSMatthew Dillon 	 * free some data in this bitmap
832ec371b57SAlan Cox 	 * mask=0000111111111110000
8337090df5aSMatthew Dillon 	 *          \_________/\__/
834ec371b57SAlan Cox 	 *		count   n
8357090df5aSMatthew Dillon 	 */
836ec371b57SAlan Cox 	n = blk & BLIST_BMAP_MASK;
8377090df5aSMatthew Dillon 	mask = ((u_daddr_t)-1 << n) &
8387090df5aSMatthew Dillon 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
8397090df5aSMatthew Dillon 	if (scan->u.bmu_bitmap & mask)
840ec371b57SAlan Cox 		panic("freeing free block");
8417090df5aSMatthew Dillon 	scan->u.bmu_bitmap |= mask;
8427090df5aSMatthew Dillon 
8437090df5aSMatthew Dillon 	/*
8447090df5aSMatthew Dillon 	 * We could probably do a better job here.  We are required to make
8457090df5aSMatthew Dillon 	 * bighint at least as large as the biggest contiguous block of
8467090df5aSMatthew Dillon 	 * data.  If we just shoehorn it, a little extra overhead will
8477090df5aSMatthew Dillon 	 * be incured on the next allocation (but only that one typically).
8487090df5aSMatthew Dillon 	 */
8497090df5aSMatthew Dillon 	scan->bm_bighint = BLIST_BMAP_RADIX;
8507090df5aSMatthew Dillon }
8517090df5aSMatthew Dillon 
8527090df5aSMatthew Dillon /*
8537090df5aSMatthew Dillon  * BLST_META_FREE() - free allocated blocks from radix tree meta info
8547090df5aSMatthew Dillon  *
8557090df5aSMatthew Dillon  *	This support routine frees a range of blocks from the bitmap.
8567090df5aSMatthew Dillon  *	The range must be entirely enclosed by this radix node.  If a
8577090df5aSMatthew Dillon  *	meta node, we break the range down recursively to free blocks
8587090df5aSMatthew Dillon  *	in subnodes (which means that this code can free an arbitrary
8597090df5aSMatthew Dillon  *	range whereas the allocation code cannot allocate an arbitrary
8607090df5aSMatthew Dillon  *	range).
8617090df5aSMatthew Dillon  */
8627090df5aSMatthew Dillon static void
863bee93d3cSAlan Cox blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
864d3783724SAlan Cox {
865bee93d3cSAlan Cox 	daddr_t blk, i, next_skip, skip, v;
866d3783724SAlan Cox 	int child;
8677090df5aSMatthew Dillon 
868a396c83aSAlan Cox 	if (scan->bm_bighint == (daddr_t)-1)
869a396c83aSAlan Cox 		panic("freeing invalid range");
870a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
871a396c83aSAlan Cox 		return (blst_leaf_free(scan, freeBlk, count));
8722ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
873d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
8747090df5aSMatthew Dillon 
8757090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
8767090df5aSMatthew Dillon 		/*
8777090df5aSMatthew Dillon 		 * ALL-ALLOCATED special case, with possible
8787090df5aSMatthew Dillon 		 * shortcut to ALL-FREE special case.
8797090df5aSMatthew Dillon 		 */
8807090df5aSMatthew Dillon 		scan->u.bmu_avail = count;
8817090df5aSMatthew Dillon 		scan->bm_bighint = count;
8827090df5aSMatthew Dillon 
8837090df5aSMatthew Dillon 		if (count != radix)  {
8842ac0c7c3SAlan Cox 			for (i = 1; i < skip; i += next_skip) {
8857090df5aSMatthew Dillon 				if (scan[i].bm_bighint == (daddr_t)-1)
8867090df5aSMatthew Dillon 					break;
8877090df5aSMatthew Dillon 				scan[i].bm_bighint = 0;
8887090df5aSMatthew Dillon 				if (next_skip == 1) {
8897090df5aSMatthew Dillon 					scan[i].u.bmu_bitmap = 0;
8907090df5aSMatthew Dillon 				} else {
8917090df5aSMatthew Dillon 					scan[i].u.bmu_avail = 0;
8927090df5aSMatthew Dillon 				}
8937090df5aSMatthew Dillon 			}
8947090df5aSMatthew Dillon 			/* fall through */
8957090df5aSMatthew Dillon 		}
8967090df5aSMatthew Dillon 	} else {
8977090df5aSMatthew Dillon 		scan->u.bmu_avail += count;
8987090df5aSMatthew Dillon 		/* scan->bm_bighint = radix; */
8997090df5aSMatthew Dillon 	}
9007090df5aSMatthew Dillon 
9017090df5aSMatthew Dillon 	/*
9027090df5aSMatthew Dillon 	 * ALL-FREE special case.
9037090df5aSMatthew Dillon 	 */
9047090df5aSMatthew Dillon 
9057090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix)
9067090df5aSMatthew Dillon 		return;
9077090df5aSMatthew Dillon 	if (scan->u.bmu_avail > radix)
908bdc9a8d0SJohn Baldwin 		panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
909bdc9a8d0SJohn Baldwin 		    (long long)count, (long long)scan->u.bmu_avail,
910bdc9a8d0SJohn Baldwin 		    (long long)radix);
9117090df5aSMatthew Dillon 
9127090df5aSMatthew Dillon 	/*
9137090df5aSMatthew Dillon 	 * Break the free down into its components
9147090df5aSMatthew Dillon 	 */
9157090df5aSMatthew Dillon 
916bee93d3cSAlan Cox 	blk = freeBlk & -radix;
91757e6d29bSMatthew Dillon 	radix /= BLIST_META_RADIX;
9187090df5aSMatthew Dillon 
919d3783724SAlan Cox 	child = (freeBlk - blk) / radix;
920d3783724SAlan Cox 	blk += child * radix;
921d3783724SAlan Cox 	i = 1 + child * next_skip;
9222ac0c7c3SAlan Cox 	while (i < skip && blk < freeBlk + count) {
9237090df5aSMatthew Dillon 		v = blk + radix - freeBlk;
9247090df5aSMatthew Dillon 		if (v > count)
9257090df5aSMatthew Dillon 			v = count;
926bee93d3cSAlan Cox 		blst_meta_free(&scan[i], freeBlk, v, radix);
9277090df5aSMatthew Dillon 		if (scan->bm_bighint < scan[i].bm_bighint)
9287090df5aSMatthew Dillon 			scan->bm_bighint = scan[i].bm_bighint;
9297090df5aSMatthew Dillon 		count -= v;
9307090df5aSMatthew Dillon 		freeBlk += v;
9317090df5aSMatthew Dillon 		blk += radix;
9327090df5aSMatthew Dillon 		i += next_skip;
9337090df5aSMatthew Dillon 	}
9347090df5aSMatthew Dillon }
9357090df5aSMatthew Dillon 
9367090df5aSMatthew Dillon /*
9377090df5aSMatthew Dillon  * BLIST_RADIX_COPY() - copy one radix tree to another
9387090df5aSMatthew Dillon  *
9397090df5aSMatthew Dillon  *	Locates free space in the source tree and frees it in the destination
9407090df5aSMatthew Dillon  *	tree.  The space may not already be free in the destination.
9417090df5aSMatthew Dillon  */
942b411efa4SAlan Cox static void
9432ac0c7c3SAlan Cox blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
9442ac0c7c3SAlan Cox     daddr_t count)
945b411efa4SAlan Cox {
9462ac0c7c3SAlan Cox 	daddr_t i, next_skip, skip;
9477090df5aSMatthew Dillon 
9487090df5aSMatthew Dillon 	/*
9497090df5aSMatthew Dillon 	 * Leaf node
9507090df5aSMatthew Dillon 	 */
9517090df5aSMatthew Dillon 
9527090df5aSMatthew Dillon 	if (radix == BLIST_BMAP_RADIX) {
9537090df5aSMatthew Dillon 		u_daddr_t v = scan->u.bmu_bitmap;
9547090df5aSMatthew Dillon 
9557090df5aSMatthew Dillon 		if (v == (u_daddr_t)-1) {
9567090df5aSMatthew Dillon 			blist_free(dest, blk, count);
9577090df5aSMatthew Dillon 		} else if (v != 0) {
9587090df5aSMatthew Dillon 			int i;
9597090df5aSMatthew Dillon 
9607090df5aSMatthew Dillon 			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
961064650c1SAlan Cox 				if (v & ((u_daddr_t)1 << i))
9627090df5aSMatthew Dillon 					blist_free(dest, blk + i, 1);
9637090df5aSMatthew Dillon 			}
9647090df5aSMatthew Dillon 		}
9657090df5aSMatthew Dillon 		return;
9667090df5aSMatthew Dillon 	}
9677090df5aSMatthew Dillon 
9687090df5aSMatthew Dillon 	/*
9697090df5aSMatthew Dillon 	 * Meta node
9707090df5aSMatthew Dillon 	 */
9717090df5aSMatthew Dillon 
9727090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
9737090df5aSMatthew Dillon 		/*
9747090df5aSMatthew Dillon 		 * Source all allocated, leave dest allocated
9757090df5aSMatthew Dillon 		 */
9767090df5aSMatthew Dillon 		return;
9777090df5aSMatthew Dillon 	}
9787090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
9797090df5aSMatthew Dillon 		/*
9807090df5aSMatthew Dillon 		 * Source all free, free entire dest
9817090df5aSMatthew Dillon 		 */
9827090df5aSMatthew Dillon 		if (count < radix)
9837090df5aSMatthew Dillon 			blist_free(dest, blk, count);
9847090df5aSMatthew Dillon 		else
9857090df5aSMatthew Dillon 			blist_free(dest, blk, radix);
9867090df5aSMatthew Dillon 		return;
9877090df5aSMatthew Dillon 	}
9887090df5aSMatthew Dillon 
9897090df5aSMatthew Dillon 
9902ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
991d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
9922ac0c7c3SAlan Cox 	radix /= BLIST_META_RADIX;
9937090df5aSMatthew Dillon 
9942ac0c7c3SAlan Cox 	for (i = 1; count && i < skip; i += next_skip) {
9957090df5aSMatthew Dillon 		if (scan[i].bm_bighint == (daddr_t)-1)
9967090df5aSMatthew Dillon 			break;
9977090df5aSMatthew Dillon 
9987090df5aSMatthew Dillon 		if (count >= radix) {
9992ac0c7c3SAlan Cox 			blst_copy(&scan[i], blk, radix, dest, radix);
10007090df5aSMatthew Dillon 			count -= radix;
10017090df5aSMatthew Dillon 		} else {
10027090df5aSMatthew Dillon 			if (count) {
10032ac0c7c3SAlan Cox 				blst_copy(&scan[i], blk, radix, dest, count);
10047090df5aSMatthew Dillon 			}
10057090df5aSMatthew Dillon 			count = 0;
10067090df5aSMatthew Dillon 		}
10077090df5aSMatthew Dillon 		blk += radix;
10087090df5aSMatthew Dillon 	}
10097090df5aSMatthew Dillon }
10107090df5aSMatthew Dillon 
10117090df5aSMatthew Dillon /*
101292da00bbSMatthew Dillon  * BLST_LEAF_FILL() -	allocate specific blocks in leaf bitmap
101392da00bbSMatthew Dillon  *
101492da00bbSMatthew Dillon  *	This routine allocates all blocks in the specified range
101592da00bbSMatthew Dillon  *	regardless of any existing allocations in that range.  Returns
101692da00bbSMatthew Dillon  *	the number of blocks allocated by the call.
101792da00bbSMatthew Dillon  */
1018a7249a6cSAlan Cox static daddr_t
101992da00bbSMatthew Dillon blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
102092da00bbSMatthew Dillon {
1021a7249a6cSAlan Cox 	daddr_t nblks;
10224be4fd5dSAlan Cox 	u_daddr_t mask;
1023ec371b57SAlan Cox 	int n;
102492da00bbSMatthew Dillon 
1025ec371b57SAlan Cox 	n = blk & BLIST_BMAP_MASK;
102692da00bbSMatthew Dillon 	mask = ((u_daddr_t)-1 << n) &
102792da00bbSMatthew Dillon 	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
102892da00bbSMatthew Dillon 
10294be4fd5dSAlan Cox 	/* Count the number of blocks that we are allocating. */
10304be4fd5dSAlan Cox 	nblks = bitcount64(scan->u.bmu_bitmap & mask);
103192da00bbSMatthew Dillon 
103292da00bbSMatthew Dillon 	scan->u.bmu_bitmap &= ~mask;
10334be4fd5dSAlan Cox 	return (nblks);
103492da00bbSMatthew Dillon }
103592da00bbSMatthew Dillon 
103692da00bbSMatthew Dillon /*
103792da00bbSMatthew Dillon  * BLIST_META_FILL() -	allocate specific blocks at a meta node
103892da00bbSMatthew Dillon  *
103992da00bbSMatthew Dillon  *	This routine allocates the specified range of blocks,
104092da00bbSMatthew Dillon  *	regardless of any existing allocations in the range.  The
104192da00bbSMatthew Dillon  *	range must be within the extent of this node.  Returns the
104292da00bbSMatthew Dillon  *	number of blocks allocated by the call.
104392da00bbSMatthew Dillon  */
1044a7249a6cSAlan Cox static daddr_t
1045bee93d3cSAlan Cox blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1046d3783724SAlan Cox {
1047bee93d3cSAlan Cox 	daddr_t blk, i, nblks, next_skip, skip, v;
1048d3783724SAlan Cox 	int child;
104992da00bbSMatthew Dillon 
1050a396c83aSAlan Cox 	if (scan->bm_bighint == (daddr_t)-1)
1051a396c83aSAlan Cox 		panic("filling invalid range");
10524be4fd5dSAlan Cox 	if (count > radix) {
10534be4fd5dSAlan Cox 		/*
10544be4fd5dSAlan Cox 		 * The allocation exceeds the number of blocks that are
1055a396c83aSAlan Cox 		 * managed by this node.
10564be4fd5dSAlan Cox 		 */
1057a396c83aSAlan Cox 		panic("fill too large");
10584be4fd5dSAlan Cox 	}
1059a396c83aSAlan Cox 	if (radix == BLIST_BMAP_RADIX)
1060a396c83aSAlan Cox 		return (blst_leaf_fill(scan, allocBlk, count));
106192da00bbSMatthew Dillon 	if (count == radix || scan->u.bmu_avail == 0)  {
106292da00bbSMatthew Dillon 		/*
106392da00bbSMatthew Dillon 		 * ALL-ALLOCATED special case
106492da00bbSMatthew Dillon 		 */
106592da00bbSMatthew Dillon 		nblks = scan->u.bmu_avail;
106692da00bbSMatthew Dillon 		scan->u.bmu_avail = 0;
106786dd278fSAlan Cox 		scan->bm_bighint = 0;
1068a396c83aSAlan Cox 		return (nblks);
106992da00bbSMatthew Dillon 	}
10702ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
1071d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
1072bee93d3cSAlan Cox 	blk = allocBlk & -radix;
107392da00bbSMatthew Dillon 
10744be4fd5dSAlan Cox 	/*
10754be4fd5dSAlan Cox 	 * An ALL-FREE meta node requires special handling before allocating
10764be4fd5dSAlan Cox 	 * any of its blocks.
10774be4fd5dSAlan Cox 	 */
107892da00bbSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
107957e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
108092da00bbSMatthew Dillon 
108192da00bbSMatthew Dillon 		/*
10824be4fd5dSAlan Cox 		 * Reinitialize each of the meta node's children.  An ALL-FREE
10834be4fd5dSAlan Cox 		 * meta node cannot have a terminator in any subtree.
108492da00bbSMatthew Dillon 		 */
10852ac0c7c3SAlan Cox 		for (i = 1; i < skip; i += next_skip) {
1086a396c83aSAlan Cox 			if (next_skip == 1)
108792da00bbSMatthew Dillon 				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
1088a396c83aSAlan Cox 			else
108992da00bbSMatthew Dillon 				scan[i].u.bmu_avail = radix;
1090a396c83aSAlan Cox 			scan[i].bm_bighint = radix;
109192da00bbSMatthew Dillon 		}
109292da00bbSMatthew Dillon 	} else {
109357e6d29bSMatthew Dillon 		radix /= BLIST_META_RADIX;
109492da00bbSMatthew Dillon 	}
109592da00bbSMatthew Dillon 
1096d3783724SAlan Cox 	nblks = 0;
1097d3783724SAlan Cox 	child = (allocBlk - blk) / radix;
1098d3783724SAlan Cox 	blk += child * radix;
1099d3783724SAlan Cox 	i = 1 + child * next_skip;
11002ac0c7c3SAlan Cox 	while (i < skip && blk < allocBlk + count) {
110192da00bbSMatthew Dillon 		v = blk + radix - allocBlk;
110292da00bbSMatthew Dillon 		if (v > count)
110392da00bbSMatthew Dillon 			v = count;
1104bee93d3cSAlan Cox 		nblks += blst_meta_fill(&scan[i], allocBlk, v, radix);
110592da00bbSMatthew Dillon 		count -= v;
110692da00bbSMatthew Dillon 		allocBlk += v;
110792da00bbSMatthew Dillon 		blk += radix;
110892da00bbSMatthew Dillon 		i += next_skip;
110992da00bbSMatthew Dillon 	}
111092da00bbSMatthew Dillon 	scan->u.bmu_avail -= nblks;
1111a396c83aSAlan Cox 	return (nblks);
111292da00bbSMatthew Dillon }
111392da00bbSMatthew Dillon 
11147090df5aSMatthew Dillon #ifdef BLIST_DEBUG
11157090df5aSMatthew Dillon 
11167090df5aSMatthew Dillon static void
11172ac0c7c3SAlan Cox blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
11187090df5aSMatthew Dillon {
11192ac0c7c3SAlan Cox 	daddr_t i, next_skip, skip;
11207090df5aSMatthew Dillon 
11217090df5aSMatthew Dillon 	if (radix == BLIST_BMAP_RADIX) {
11227090df5aSMatthew Dillon 		printf(
1123d90bf7d8SAlan Cox 		    "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
11247090df5aSMatthew Dillon 		    tab, tab, "",
112592da00bbSMatthew Dillon 		    (long long)blk, (long long)radix,
112692da00bbSMatthew Dillon 		    (long long)scan->u.bmu_bitmap,
112792da00bbSMatthew Dillon 		    (long long)scan->bm_bighint
11287090df5aSMatthew Dillon 		);
11297090df5aSMatthew Dillon 		return;
11307090df5aSMatthew Dillon 	}
11317090df5aSMatthew Dillon 
11327090df5aSMatthew Dillon 	if (scan->u.bmu_avail == 0) {
11337090df5aSMatthew Dillon 		printf(
113492da00bbSMatthew Dillon 		    "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
11357090df5aSMatthew Dillon 		    tab, tab, "",
113692da00bbSMatthew Dillon 		    (long long)blk,
113792da00bbSMatthew Dillon 		    (long long)radix
11387090df5aSMatthew Dillon 		);
11397090df5aSMatthew Dillon 		return;
11407090df5aSMatthew Dillon 	}
11417090df5aSMatthew Dillon 	if (scan->u.bmu_avail == radix) {
11427090df5aSMatthew Dillon 		printf(
114392da00bbSMatthew Dillon 		    "%*.*s(%08llx,%lld) ALL FREE\n",
11447090df5aSMatthew Dillon 		    tab, tab, "",
114592da00bbSMatthew Dillon 		    (long long)blk,
114692da00bbSMatthew Dillon 		    (long long)radix
11477090df5aSMatthew Dillon 		);
11487090df5aSMatthew Dillon 		return;
11497090df5aSMatthew Dillon 	}
11507090df5aSMatthew Dillon 
11517090df5aSMatthew Dillon 	printf(
115292da00bbSMatthew Dillon 	    "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
11537090df5aSMatthew Dillon 	    tab, tab, "",
115492da00bbSMatthew Dillon 	    (long long)blk, (long long)radix,
115592da00bbSMatthew Dillon 	    (long long)scan->u.bmu_avail,
115692da00bbSMatthew Dillon 	    (long long)radix,
115792da00bbSMatthew Dillon 	    (long long)scan->bm_bighint
11587090df5aSMatthew Dillon 	);
11597090df5aSMatthew Dillon 
11602ac0c7c3SAlan Cox 	skip = radix_to_skip(radix);
1161d3783724SAlan Cox 	next_skip = skip / BLIST_META_RADIX;
11622ac0c7c3SAlan Cox 	radix /= BLIST_META_RADIX;
11637090df5aSMatthew Dillon 	tab += 4;
11647090df5aSMatthew Dillon 
11652ac0c7c3SAlan Cox 	for (i = 1; i < skip; i += next_skip) {
11667090df5aSMatthew Dillon 		if (scan[i].bm_bighint == (daddr_t)-1) {
11677090df5aSMatthew Dillon 			printf(
116892da00bbSMatthew Dillon 			    "%*.*s(%08llx,%lld): Terminator\n",
11697090df5aSMatthew Dillon 			    tab, tab, "",
117092da00bbSMatthew Dillon 			    (long long)blk, (long long)radix
11717090df5aSMatthew Dillon 			);
11727090df5aSMatthew Dillon 			break;
11737090df5aSMatthew Dillon 		}
11742ac0c7c3SAlan Cox 		blst_radix_print(&scan[i], blk, radix, tab);
11757090df5aSMatthew Dillon 		blk += radix;
11767090df5aSMatthew Dillon 	}
11777090df5aSMatthew Dillon 	tab -= 4;
11787090df5aSMatthew Dillon 
11797090df5aSMatthew Dillon 	printf(
11807090df5aSMatthew Dillon 	    "%*.*s}\n",
11817090df5aSMatthew Dillon 	    tab, tab, ""
11827090df5aSMatthew Dillon 	);
11837090df5aSMatthew Dillon }
11847090df5aSMatthew Dillon 
11857090df5aSMatthew Dillon #endif
11867090df5aSMatthew Dillon 
11877090df5aSMatthew Dillon #ifdef BLIST_DEBUG
11887090df5aSMatthew Dillon 
11897090df5aSMatthew Dillon int
11907090df5aSMatthew Dillon main(int ac, char **av)
11917090df5aSMatthew Dillon {
11927090df5aSMatthew Dillon 	int size = 1024;
11937090df5aSMatthew Dillon 	int i;
11947090df5aSMatthew Dillon 	blist_t bl;
1195d027ed2eSAlan Cox 	struct sbuf *s;
11967090df5aSMatthew Dillon 
11977090df5aSMatthew Dillon 	for (i = 1; i < ac; ++i) {
11987090df5aSMatthew Dillon 		const char *ptr = av[i];
11997090df5aSMatthew Dillon 		if (*ptr != '-') {
12007090df5aSMatthew Dillon 			size = strtol(ptr, NULL, 0);
12017090df5aSMatthew Dillon 			continue;
12027090df5aSMatthew Dillon 		}
12037090df5aSMatthew Dillon 		ptr += 2;
12047090df5aSMatthew Dillon 		fprintf(stderr, "Bad option: %s\n", ptr - 2);
12057090df5aSMatthew Dillon 		exit(1);
12067090df5aSMatthew Dillon 	}
1207c8c7ad92SKip Macy 	bl = blist_create(size, M_WAITOK);
12087090df5aSMatthew Dillon 	blist_free(bl, 0, size);
12097090df5aSMatthew Dillon 
12107090df5aSMatthew Dillon 	for (;;) {
12117090df5aSMatthew Dillon 		char buf[1024];
1212d90bf7d8SAlan Cox 		long long da = 0;
1213d90bf7d8SAlan Cox 		long long count = 0;
12147090df5aSMatthew Dillon 
12154be4fd5dSAlan Cox 		printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
121692da00bbSMatthew Dillon 		    (long long)size, (long long)bl->bl_radix);
12177090df5aSMatthew Dillon 		fflush(stdout);
12187090df5aSMatthew Dillon 		if (fgets(buf, sizeof(buf), stdin) == NULL)
12197090df5aSMatthew Dillon 			break;
12207090df5aSMatthew Dillon 		switch(buf[0]) {
12217090df5aSMatthew Dillon 		case 'r':
122292da00bbSMatthew Dillon 			if (sscanf(buf + 1, "%lld", &count) == 1) {
1223d90bf7d8SAlan Cox 				blist_resize(&bl, count, 1, M_WAITOK);
12247090df5aSMatthew Dillon 			} else {
12257090df5aSMatthew Dillon 				printf("?\n");
12267090df5aSMatthew Dillon 			}
12277090df5aSMatthew Dillon 		case 'p':
12287090df5aSMatthew Dillon 			blist_print(bl);
12297090df5aSMatthew Dillon 			break;
1230d027ed2eSAlan Cox 		case 's':
1231d027ed2eSAlan Cox 			s = sbuf_new_auto();
1232d027ed2eSAlan Cox 			blist_stats(bl, s);
1233d027ed2eSAlan Cox 			sbuf_finish(s);
1234d027ed2eSAlan Cox 			printf("%s", sbuf_data(s));
1235d027ed2eSAlan Cox 			sbuf_delete(s);
1236d027ed2eSAlan Cox 			break;
12377090df5aSMatthew Dillon 		case 'a':
123892da00bbSMatthew Dillon 			if (sscanf(buf + 1, "%lld", &count) == 1) {
12397090df5aSMatthew Dillon 				daddr_t blk = blist_alloc(bl, count);
124092da00bbSMatthew Dillon 				printf("    R=%08llx\n", (long long)blk);
12417090df5aSMatthew Dillon 			} else {
12427090df5aSMatthew Dillon 				printf("?\n");
12437090df5aSMatthew Dillon 			}
12447090df5aSMatthew Dillon 			break;
12457090df5aSMatthew Dillon 		case 'f':
1246d90bf7d8SAlan Cox 			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
12477090df5aSMatthew Dillon 				blist_free(bl, da, count);
12487090df5aSMatthew Dillon 			} else {
12497090df5aSMatthew Dillon 				printf("?\n");
12507090df5aSMatthew Dillon 			}
12517090df5aSMatthew Dillon 			break;
125292da00bbSMatthew Dillon 		case 'l':
1253d90bf7d8SAlan Cox 			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1254a7249a6cSAlan Cox 				printf("    n=%jd\n",
1255a7249a6cSAlan Cox 				    (intmax_t)blist_fill(bl, da, count));
125692da00bbSMatthew Dillon 			} else {
125792da00bbSMatthew Dillon 				printf("?\n");
125892da00bbSMatthew Dillon 			}
125992da00bbSMatthew Dillon 			break;
12607090df5aSMatthew Dillon 		case '?':
12617090df5aSMatthew Dillon 		case 'h':
12627090df5aSMatthew Dillon 			puts(
12637090df5aSMatthew Dillon 			    "p          -print\n"
1264d027ed2eSAlan Cox 			    "s          -stats\n"
12657090df5aSMatthew Dillon 			    "a %d       -allocate\n"
12667090df5aSMatthew Dillon 			    "f %x %d    -free\n"
126792da00bbSMatthew Dillon 			    "l %x %d    -fill\n"
12687090df5aSMatthew Dillon 			    "r %d       -resize\n"
12697090df5aSMatthew Dillon 			    "h/?        -help"
12707090df5aSMatthew Dillon 			);
12717090df5aSMatthew Dillon 			break;
12727090df5aSMatthew Dillon 		default:
12737090df5aSMatthew Dillon 			printf("?\n");
12747090df5aSMatthew Dillon 			break;
12757090df5aSMatthew Dillon 		}
12767090df5aSMatthew Dillon 	}
12777090df5aSMatthew Dillon 	return(0);
12787090df5aSMatthew Dillon }
12797090df5aSMatthew Dillon 
12807090df5aSMatthew Dillon void
12817090df5aSMatthew Dillon panic(const char *ctl, ...)
12827090df5aSMatthew Dillon {
12837090df5aSMatthew Dillon 	va_list va;
12847090df5aSMatthew Dillon 
12857090df5aSMatthew Dillon 	va_start(va, ctl);
12867090df5aSMatthew Dillon 	vfprintf(stderr, ctl, va);
12877090df5aSMatthew Dillon 	fprintf(stderr, "\n");
12887090df5aSMatthew Dillon 	va_end(va);
12897090df5aSMatthew Dillon 	exit(1);
12907090df5aSMatthew Dillon }
12917090df5aSMatthew Dillon 
12927090df5aSMatthew Dillon #endif
1293