1 /*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 3. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other than to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 64), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * The blist code wires all necessary memory at creation time. Neither 56 * allocations nor frees require interaction with the memory subsystem. 57 * The non-blocking features of the blist code are used in the swap code 58 * (vm/swap_pager.c). 59 * 60 * LAYOUT: The radix tree is laid out recursively using a 61 * linear array. Each meta node is immediately followed (laid out 62 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 63 * is a recursive structure but one that can be easily scanned through 64 * a very simple 'skip' calculation. In order to support large radixes, 65 * portions of the tree may reside outside our memory allocation. We 66 * handle this with an early-termination optimization (when bighint is 67 * set to -1) on the scan. The memory allocation is only large enough 68 * to cover the number of blocks requested at creation time even if it 69 * must be encompassed in larger root-node radix. 70 * 71 * NOTE: the allocator cannot currently allocate more than 72 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 73 * large' if you try. This is an area that could use improvement. The 74 * radix is large enough that this restriction does not effect the swap 75 * system, though. Currently only the allocation code is affected by 76 * this algorithmic unfeature. The freeing code can handle arbitrary 77 * ranges. 78 * 79 * This code can be compiled stand-alone for debugging. 80 */ 81 82 #include <sys/cdefs.h> 83 __FBSDID("$FreeBSD$"); 84 85 #ifdef _KERNEL 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/lock.h> 90 #include <sys/kernel.h> 91 #include <sys/blist.h> 92 #include <sys/malloc.h> 93 #include <sys/proc.h> 94 #include <sys/mutex.h> 95 96 #else 97 98 #ifndef BLIST_NO_DEBUG 99 #define BLIST_DEBUG 100 #endif 101 102 #include <sys/types.h> 103 #include <sys/malloc.h> 104 #include <stdio.h> 105 #include <string.h> 106 #include <stdlib.h> 107 #include <stdarg.h> 108 #include <stdbool.h> 109 110 #define bitcount64(x) __bitcount64((uint64_t)(x)) 111 #define malloc(a,b,c) calloc(a, 1) 112 #define free(a,b) free(a) 113 114 #include <sys/blist.h> 115 116 void panic(const char *ctl, ...); 117 118 #endif 119 120 /* 121 * static support functions 122 */ 123 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, 124 daddr_t cursor); 125 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, 126 daddr_t radix, daddr_t cursor); 127 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 128 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 129 daddr_t radix, daddr_t blk); 130 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 131 blist_t dest, daddr_t count); 132 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 133 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 134 daddr_t radix, daddr_t blk); 135 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count); 136 #ifndef _KERNEL 137 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, 138 int tab); 139 #endif 140 141 #ifdef _KERNEL 142 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 143 #endif 144 145 /* 146 * For a subtree that can represent the state of up to 'radix' blocks, the 147 * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm' 148 * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h 149 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h, 150 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip' 151 * in the 'meta' functions that process subtrees. Since integer division 152 * discards remainders, we can express this computation as 153 * skip = (m * m**h) / (m - 1) 154 * skip = (m * radix / BLIST_BMAP_RADIX) / (m - 1) 155 * and if m divides BLIST_BMAP_RADIX, we can simplify further to 156 * skip = radix / (BLIST_BMAP_RADIX / m * (m - 1)) 157 * so that a simple integer division is enough for the calculation. 158 */ 159 static inline daddr_t 160 radix_to_skip(daddr_t radix) 161 { 162 163 return (radix / 164 (BLIST_BMAP_RADIX / BLIST_META_RADIX * (BLIST_META_RADIX - 1))); 165 } 166 167 /* 168 * blist_create() - create a blist capable of handling up to the specified 169 * number of blocks 170 * 171 * blocks - must be greater than 0 172 * flags - malloc flags 173 * 174 * The smallest blist consists of a single leaf node capable of 175 * managing BLIST_BMAP_RADIX blocks. 176 */ 177 blist_t 178 blist_create(daddr_t blocks, int flags) 179 { 180 blist_t bl; 181 daddr_t nodes, radix; 182 183 /* 184 * Calculate the radix field used for scanning. 185 */ 186 radix = BLIST_BMAP_RADIX; 187 while (radix < blocks) { 188 radix *= BLIST_META_RADIX; 189 } 190 nodes = 1 + blst_radix_init(NULL, radix, blocks); 191 192 bl = malloc(sizeof(struct blist), M_SWAP, flags); 193 if (bl == NULL) 194 return (NULL); 195 196 bl->bl_blocks = blocks; 197 bl->bl_radix = radix; 198 bl->bl_cursor = 0; 199 bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 200 if (bl->bl_root == NULL) { 201 free(bl, M_SWAP); 202 return (NULL); 203 } 204 blst_radix_init(bl->bl_root, radix, blocks); 205 206 #if defined(BLIST_DEBUG) 207 printf( 208 "BLIST representing %lld blocks (%lld MB of swap)" 209 ", requiring %lldK of ram\n", 210 (long long)bl->bl_blocks, 211 (long long)bl->bl_blocks * 4 / 1024, 212 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 213 ); 214 printf("BLIST raw radix tree contains %lld records\n", 215 (long long)nodes); 216 #endif 217 218 return (bl); 219 } 220 221 void 222 blist_destroy(blist_t bl) 223 { 224 free(bl->bl_root, M_SWAP); 225 free(bl, M_SWAP); 226 } 227 228 /* 229 * blist_alloc() - reserve space in the block bitmap. Return the base 230 * of a contiguous region or SWAPBLK_NONE if space could 231 * not be allocated. 232 */ 233 daddr_t 234 blist_alloc(blist_t bl, daddr_t count) 235 { 236 daddr_t blk; 237 238 /* 239 * This loop iterates at most twice. An allocation failure in the 240 * first iteration leads to a second iteration only if the cursor was 241 * non-zero. When the cursor is zero, an allocation failure will 242 * reduce the hint, stopping further iterations. 243 */ 244 while (count <= bl->bl_root->bm_bighint) { 245 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, 246 bl->bl_cursor); 247 if (blk != SWAPBLK_NONE) { 248 bl->bl_cursor = blk + count; 249 return (blk); 250 } else if (bl->bl_cursor != 0) 251 bl->bl_cursor = 0; 252 } 253 return (SWAPBLK_NONE); 254 } 255 256 /* 257 * blist_avail() - return the number of free blocks. 258 */ 259 daddr_t 260 blist_avail(blist_t bl) 261 { 262 263 if (bl->bl_radix == BLIST_BMAP_RADIX) 264 return (bitcount64(bl->bl_root->u.bmu_bitmap)); 265 else 266 return (bl->bl_root->u.bmu_avail); 267 } 268 269 /* 270 * blist_free() - free up space in the block bitmap. Return the base 271 * of a contiguous region. Panic if an inconsistancy is 272 * found. 273 */ 274 void 275 blist_free(blist_t bl, daddr_t blkno, daddr_t count) 276 { 277 278 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, 0); 279 } 280 281 /* 282 * blist_fill() - mark a region in the block bitmap as off-limits 283 * to the allocator (i.e. allocate it), ignoring any 284 * existing allocations. Return the number of blocks 285 * actually filled that were free before the call. 286 */ 287 daddr_t 288 blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 289 { 290 291 return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix, 0)); 292 } 293 294 /* 295 * blist_resize() - resize an existing radix tree to handle the 296 * specified number of blocks. This will reallocate 297 * the tree and transfer the previous bitmap to the new 298 * one. When extending the tree you can specify whether 299 * the new blocks are to left allocated or freed. 300 */ 301 void 302 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 303 { 304 blist_t newbl = blist_create(count, flags); 305 blist_t save = *pbl; 306 307 *pbl = newbl; 308 if (count > save->bl_blocks) 309 count = save->bl_blocks; 310 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count); 311 312 /* 313 * If resizing upwards, should we free the new space or not? 314 */ 315 if (freenew && count < newbl->bl_blocks) { 316 blist_free(newbl, count, newbl->bl_blocks - count); 317 } 318 blist_destroy(save); 319 } 320 321 #ifdef BLIST_DEBUG 322 323 /* 324 * blist_print() - dump radix tree 325 */ 326 void 327 blist_print(blist_t bl) 328 { 329 printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor); 330 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); 331 printf("}\n"); 332 } 333 334 #endif 335 336 /************************************************************************ 337 * ALLOCATION SUPPORT FUNCTIONS * 338 ************************************************************************ 339 * 340 * These support functions do all the actual work. They may seem 341 * rather longish, but that's because I've commented them up. The 342 * actual code is straight forward. 343 * 344 */ 345 346 /* 347 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 348 * 349 * This is the core of the allocator and is optimized for the 350 * BLIST_BMAP_RADIX block allocation case. Otherwise, execution 351 * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX). 352 */ 353 static daddr_t 354 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor) 355 { 356 u_daddr_t mask; 357 int count1, hi, lo, mid, num_shifts, range1, range_ext; 358 359 if (count == BLIST_BMAP_RADIX) { 360 /* 361 * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't 362 * a special case, then forming the final value of 'mask' below 363 * would require special handling to avoid an invalid left shift 364 * when count equals the number of bits in mask. 365 */ 366 if (~scan->u.bmu_bitmap != 0) { 367 scan->bm_bighint = BLIST_BMAP_RADIX - 1; 368 return (SWAPBLK_NONE); 369 } 370 if (cursor != blk) 371 return (SWAPBLK_NONE); 372 scan->u.bmu_bitmap = 0; 373 scan->bm_bighint = 0; 374 return (blk); 375 } 376 range1 = 0; 377 count1 = count - 1; 378 num_shifts = fls(count1); 379 mask = scan->u.bmu_bitmap; 380 while (mask != 0 && num_shifts > 0) { 381 /* 382 * If bit i is set in mask, then bits in [i, i+range1] are set 383 * in scan->u.bmu_bitmap. The value of range1 is equal to 384 * count1 >> num_shifts. Grow range and reduce num_shifts to 0, 385 * while preserving these invariants. The updates to mask leave 386 * fewer bits set, but each bit that remains set represents a 387 * longer string of consecutive bits set in scan->u.bmu_bitmap. 388 */ 389 num_shifts--; 390 range_ext = range1 + ((count1 >> num_shifts) & 1); 391 mask &= mask >> range_ext; 392 range1 += range_ext; 393 } 394 if (mask == 0) { 395 /* 396 * Update bighint. There is no allocation bigger than range1 397 * available in this leaf. 398 */ 399 scan->bm_bighint = range1; 400 return (SWAPBLK_NONE); 401 } 402 403 /* 404 * Discard any candidates that appear before the cursor. 405 */ 406 lo = cursor - blk; 407 mask &= ~(u_daddr_t)0 << lo; 408 409 if (mask == 0) 410 return (SWAPBLK_NONE); 411 412 /* 413 * The least significant set bit in mask marks the start of the first 414 * available range of sufficient size. Clear all the bits but that one, 415 * and then perform a binary search to find its position. 416 */ 417 mask &= -mask; 418 hi = BLIST_BMAP_RADIX - count1; 419 while (lo + 1 < hi) { 420 mid = (lo + hi) >> 1; 421 if ((mask >> mid) != 0) 422 lo = mid; 423 else 424 hi = mid; 425 } 426 427 /* 428 * Set in mask exactly the bits being allocated, and clear them from 429 * the set of available bits. 430 */ 431 mask = (mask << count) - mask; 432 scan->u.bmu_bitmap &= ~mask; 433 return (blk + lo); 434 } 435 436 /* 437 * blist_meta_alloc() - allocate at a meta in the radix tree. 438 * 439 * Attempt to allocate at a meta node. If we can't, we update 440 * bighint and return a failure. Updating bighint optimize future 441 * calls that hit this node. We have to check for our collapse cases 442 * and we have a few optimizations strewn in as well. 443 */ 444 static daddr_t 445 blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, daddr_t radix, 446 daddr_t cursor) 447 { 448 daddr_t i, next_skip, r, skip; 449 int child; 450 bool scan_from_start; 451 452 if (radix == BLIST_BMAP_RADIX) 453 return (blst_leaf_alloc(scan, blk, count, cursor)); 454 if (scan->u.bmu_avail < count) { 455 /* 456 * The meta node's hint must be too large if the allocation 457 * exceeds the number of free blocks. Reduce the hint, and 458 * return failure. 459 */ 460 scan->bm_bighint = scan->u.bmu_avail; 461 return (SWAPBLK_NONE); 462 } 463 skip = radix_to_skip(radix); 464 next_skip = skip / BLIST_META_RADIX; 465 466 /* 467 * An ALL-FREE meta node requires special handling before allocating 468 * any of its blocks. 469 */ 470 if (scan->u.bmu_avail == radix) { 471 radix /= BLIST_META_RADIX; 472 473 /* 474 * Reinitialize each of the meta node's children. An ALL-FREE 475 * meta node cannot have a terminator in any subtree. 476 */ 477 for (i = 1; i < skip; i += next_skip) { 478 if (next_skip == 1) 479 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 480 else 481 scan[i].u.bmu_avail = radix; 482 scan[i].bm_bighint = radix; 483 } 484 } else { 485 radix /= BLIST_META_RADIX; 486 } 487 488 if (count > radix) { 489 /* 490 * The allocation exceeds the number of blocks that are 491 * managed by a subtree of this meta node. 492 */ 493 panic("allocation too large"); 494 } 495 scan_from_start = cursor == blk; 496 child = (cursor - blk) / radix; 497 blk += child * radix; 498 for (i = 1 + child * next_skip; i < skip; i += next_skip) { 499 if (count <= scan[i].bm_bighint) { 500 /* 501 * The allocation might fit in the i'th subtree. 502 */ 503 r = blst_meta_alloc(&scan[i], blk, count, radix, 504 cursor > blk ? cursor : blk); 505 if (r != SWAPBLK_NONE) { 506 scan->u.bmu_avail -= count; 507 return (r); 508 } 509 } else if (scan[i].bm_bighint == (daddr_t)-1) { 510 /* 511 * Terminator 512 */ 513 break; 514 } 515 blk += radix; 516 } 517 518 /* 519 * We couldn't allocate count in this subtree, update bighint. 520 */ 521 if (scan_from_start && scan->bm_bighint >= count) 522 scan->bm_bighint = count - 1; 523 524 return (SWAPBLK_NONE); 525 } 526 527 /* 528 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 529 * 530 */ 531 static void 532 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) 533 { 534 /* 535 * free some data in this bitmap 536 * 537 * e.g. 538 * 0000111111111110000 539 * \_________/\__/ 540 * v n 541 */ 542 int n = blk & (BLIST_BMAP_RADIX - 1); 543 u_daddr_t mask; 544 545 mask = ((u_daddr_t)-1 << n) & 546 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 547 548 if (scan->u.bmu_bitmap & mask) 549 panic("blst_radix_free: freeing free block"); 550 scan->u.bmu_bitmap |= mask; 551 552 /* 553 * We could probably do a better job here. We are required to make 554 * bighint at least as large as the biggest contiguous block of 555 * data. If we just shoehorn it, a little extra overhead will 556 * be incured on the next allocation (but only that one typically). 557 */ 558 scan->bm_bighint = BLIST_BMAP_RADIX; 559 } 560 561 /* 562 * BLST_META_FREE() - free allocated blocks from radix tree meta info 563 * 564 * This support routine frees a range of blocks from the bitmap. 565 * The range must be entirely enclosed by this radix node. If a 566 * meta node, we break the range down recursively to free blocks 567 * in subnodes (which means that this code can free an arbitrary 568 * range whereas the allocation code cannot allocate an arbitrary 569 * range). 570 */ 571 static void 572 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, daddr_t radix, 573 daddr_t blk) 574 { 575 daddr_t i, next_skip, skip, v; 576 int child; 577 578 if (scan->bm_bighint == (daddr_t)-1) 579 panic("freeing invalid range"); 580 if (radix == BLIST_BMAP_RADIX) 581 return (blst_leaf_free(scan, freeBlk, count)); 582 skip = radix_to_skip(radix); 583 next_skip = skip / BLIST_META_RADIX; 584 585 if (scan->u.bmu_avail == 0) { 586 /* 587 * ALL-ALLOCATED special case, with possible 588 * shortcut to ALL-FREE special case. 589 */ 590 scan->u.bmu_avail = count; 591 scan->bm_bighint = count; 592 593 if (count != radix) { 594 for (i = 1; i < skip; i += next_skip) { 595 if (scan[i].bm_bighint == (daddr_t)-1) 596 break; 597 scan[i].bm_bighint = 0; 598 if (next_skip == 1) { 599 scan[i].u.bmu_bitmap = 0; 600 } else { 601 scan[i].u.bmu_avail = 0; 602 } 603 } 604 /* fall through */ 605 } 606 } else { 607 scan->u.bmu_avail += count; 608 /* scan->bm_bighint = radix; */ 609 } 610 611 /* 612 * ALL-FREE special case. 613 */ 614 615 if (scan->u.bmu_avail == radix) 616 return; 617 if (scan->u.bmu_avail > radix) 618 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 619 (long long)count, (long long)scan->u.bmu_avail, 620 (long long)radix); 621 622 /* 623 * Break the free down into its components 624 */ 625 626 radix /= BLIST_META_RADIX; 627 628 child = (freeBlk - blk) / radix; 629 blk += child * radix; 630 i = 1 + child * next_skip; 631 while (i < skip && blk < freeBlk + count) { 632 v = blk + radix - freeBlk; 633 if (v > count) 634 v = count; 635 blst_meta_free(&scan[i], freeBlk, v, radix, blk); 636 if (scan->bm_bighint < scan[i].bm_bighint) 637 scan->bm_bighint = scan[i].bm_bighint; 638 count -= v; 639 freeBlk += v; 640 blk += radix; 641 i += next_skip; 642 } 643 } 644 645 /* 646 * BLIST_RADIX_COPY() - copy one radix tree to another 647 * 648 * Locates free space in the source tree and frees it in the destination 649 * tree. The space may not already be free in the destination. 650 */ 651 static void 652 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, 653 daddr_t count) 654 { 655 daddr_t i, next_skip, skip; 656 657 /* 658 * Leaf node 659 */ 660 661 if (radix == BLIST_BMAP_RADIX) { 662 u_daddr_t v = scan->u.bmu_bitmap; 663 664 if (v == (u_daddr_t)-1) { 665 blist_free(dest, blk, count); 666 } else if (v != 0) { 667 int i; 668 669 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 670 if (v & ((u_daddr_t)1 << i)) 671 blist_free(dest, blk + i, 1); 672 } 673 } 674 return; 675 } 676 677 /* 678 * Meta node 679 */ 680 681 if (scan->u.bmu_avail == 0) { 682 /* 683 * Source all allocated, leave dest allocated 684 */ 685 return; 686 } 687 if (scan->u.bmu_avail == radix) { 688 /* 689 * Source all free, free entire dest 690 */ 691 if (count < radix) 692 blist_free(dest, blk, count); 693 else 694 blist_free(dest, blk, radix); 695 return; 696 } 697 698 699 skip = radix_to_skip(radix); 700 next_skip = skip / BLIST_META_RADIX; 701 radix /= BLIST_META_RADIX; 702 703 for (i = 1; count && i < skip; i += next_skip) { 704 if (scan[i].bm_bighint == (daddr_t)-1) 705 break; 706 707 if (count >= radix) { 708 blst_copy(&scan[i], blk, radix, dest, radix); 709 count -= radix; 710 } else { 711 if (count) { 712 blst_copy(&scan[i], blk, radix, dest, count); 713 } 714 count = 0; 715 } 716 blk += radix; 717 } 718 } 719 720 /* 721 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 722 * 723 * This routine allocates all blocks in the specified range 724 * regardless of any existing allocations in that range. Returns 725 * the number of blocks allocated by the call. 726 */ 727 static daddr_t 728 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 729 { 730 int n = blk & (BLIST_BMAP_RADIX - 1); 731 daddr_t nblks; 732 u_daddr_t mask; 733 734 mask = ((u_daddr_t)-1 << n) & 735 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 736 737 /* Count the number of blocks that we are allocating. */ 738 nblks = bitcount64(scan->u.bmu_bitmap & mask); 739 740 scan->u.bmu_bitmap &= ~mask; 741 return (nblks); 742 } 743 744 /* 745 * BLIST_META_FILL() - allocate specific blocks at a meta node 746 * 747 * This routine allocates the specified range of blocks, 748 * regardless of any existing allocations in the range. The 749 * range must be within the extent of this node. Returns the 750 * number of blocks allocated by the call. 751 */ 752 static daddr_t 753 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, daddr_t radix, 754 daddr_t blk) 755 { 756 daddr_t i, nblks, next_skip, skip, v; 757 int child; 758 759 if (scan->bm_bighint == (daddr_t)-1) 760 panic("filling invalid range"); 761 if (count > radix) { 762 /* 763 * The allocation exceeds the number of blocks that are 764 * managed by this node. 765 */ 766 panic("fill too large"); 767 } 768 if (radix == BLIST_BMAP_RADIX) 769 return (blst_leaf_fill(scan, allocBlk, count)); 770 if (count == radix || scan->u.bmu_avail == 0) { 771 /* 772 * ALL-ALLOCATED special case 773 */ 774 nblks = scan->u.bmu_avail; 775 scan->u.bmu_avail = 0; 776 scan->bm_bighint = 0; 777 return (nblks); 778 } 779 skip = radix_to_skip(radix); 780 next_skip = skip / BLIST_META_RADIX; 781 782 /* 783 * An ALL-FREE meta node requires special handling before allocating 784 * any of its blocks. 785 */ 786 if (scan->u.bmu_avail == radix) { 787 radix /= BLIST_META_RADIX; 788 789 /* 790 * Reinitialize each of the meta node's children. An ALL-FREE 791 * meta node cannot have a terminator in any subtree. 792 */ 793 for (i = 1; i < skip; i += next_skip) { 794 if (next_skip == 1) 795 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 796 else 797 scan[i].u.bmu_avail = radix; 798 scan[i].bm_bighint = radix; 799 } 800 } else { 801 radix /= BLIST_META_RADIX; 802 } 803 804 nblks = 0; 805 child = (allocBlk - blk) / radix; 806 blk += child * radix; 807 i = 1 + child * next_skip; 808 while (i < skip && blk < allocBlk + count) { 809 v = blk + radix - allocBlk; 810 if (v > count) 811 v = count; 812 nblks += blst_meta_fill(&scan[i], allocBlk, v, radix, blk); 813 count -= v; 814 allocBlk += v; 815 blk += radix; 816 i += next_skip; 817 } 818 scan->u.bmu_avail -= nblks; 819 return (nblks); 820 } 821 822 /* 823 * BLST_RADIX_INIT() - initialize radix tree 824 * 825 * Initialize our meta structures and bitmaps and calculate the exact 826 * amount of space required to manage 'count' blocks - this space may 827 * be considerably less than the calculated radix due to the large 828 * RADIX values we use. 829 */ 830 static daddr_t 831 blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count) 832 { 833 daddr_t i, memindex, next_skip, skip; 834 835 memindex = 0; 836 837 /* 838 * Leaf node 839 */ 840 841 if (radix == BLIST_BMAP_RADIX) { 842 if (scan) { 843 scan->bm_bighint = 0; 844 scan->u.bmu_bitmap = 0; 845 } 846 return (memindex); 847 } 848 849 /* 850 * Meta node. If allocating the entire object we can special 851 * case it. However, we need to figure out how much memory 852 * is required to manage 'count' blocks, so we continue on anyway. 853 */ 854 855 if (scan) { 856 scan->bm_bighint = 0; 857 scan->u.bmu_avail = 0; 858 } 859 860 skip = radix_to_skip(radix); 861 next_skip = skip / BLIST_META_RADIX; 862 radix /= BLIST_META_RADIX; 863 864 for (i = 1; i < skip; i += next_skip) { 865 if (count >= radix) { 866 /* 867 * Allocate the entire object 868 */ 869 memindex = i + 870 blst_radix_init(((scan) ? &scan[i] : NULL), radix, 871 radix); 872 count -= radix; 873 } else if (count > 0) { 874 /* 875 * Allocate a partial object 876 */ 877 memindex = i + 878 blst_radix_init(((scan) ? &scan[i] : NULL), radix, 879 count); 880 count = 0; 881 } else { 882 /* 883 * Add terminator and break out 884 */ 885 if (scan) 886 scan[i].bm_bighint = (daddr_t)-1; 887 break; 888 } 889 } 890 if (memindex < i) 891 memindex = i; 892 return (memindex); 893 } 894 895 #ifdef BLIST_DEBUG 896 897 static void 898 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) 899 { 900 daddr_t i, next_skip, skip; 901 902 if (radix == BLIST_BMAP_RADIX) { 903 printf( 904 "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 905 tab, tab, "", 906 (long long)blk, (long long)radix, 907 (long long)scan->u.bmu_bitmap, 908 (long long)scan->bm_bighint 909 ); 910 return; 911 } 912 913 if (scan->u.bmu_avail == 0) { 914 printf( 915 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 916 tab, tab, "", 917 (long long)blk, 918 (long long)radix 919 ); 920 return; 921 } 922 if (scan->u.bmu_avail == radix) { 923 printf( 924 "%*.*s(%08llx,%lld) ALL FREE\n", 925 tab, tab, "", 926 (long long)blk, 927 (long long)radix 928 ); 929 return; 930 } 931 932 printf( 933 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 934 tab, tab, "", 935 (long long)blk, (long long)radix, 936 (long long)scan->u.bmu_avail, 937 (long long)radix, 938 (long long)scan->bm_bighint 939 ); 940 941 skip = radix_to_skip(radix); 942 next_skip = skip / BLIST_META_RADIX; 943 radix /= BLIST_META_RADIX; 944 tab += 4; 945 946 for (i = 1; i < skip; i += next_skip) { 947 if (scan[i].bm_bighint == (daddr_t)-1) { 948 printf( 949 "%*.*s(%08llx,%lld): Terminator\n", 950 tab, tab, "", 951 (long long)blk, (long long)radix 952 ); 953 break; 954 } 955 blst_radix_print(&scan[i], blk, radix, tab); 956 blk += radix; 957 } 958 tab -= 4; 959 960 printf( 961 "%*.*s}\n", 962 tab, tab, "" 963 ); 964 } 965 966 #endif 967 968 #ifdef BLIST_DEBUG 969 970 int 971 main(int ac, char **av) 972 { 973 int size = 1024; 974 int i; 975 blist_t bl; 976 977 for (i = 1; i < ac; ++i) { 978 const char *ptr = av[i]; 979 if (*ptr != '-') { 980 size = strtol(ptr, NULL, 0); 981 continue; 982 } 983 ptr += 2; 984 fprintf(stderr, "Bad option: %s\n", ptr - 2); 985 exit(1); 986 } 987 bl = blist_create(size, M_WAITOK); 988 blist_free(bl, 0, size); 989 990 for (;;) { 991 char buf[1024]; 992 long long da = 0; 993 long long count = 0; 994 995 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 996 (long long)size, (long long)bl->bl_radix); 997 fflush(stdout); 998 if (fgets(buf, sizeof(buf), stdin) == NULL) 999 break; 1000 switch(buf[0]) { 1001 case 'r': 1002 if (sscanf(buf + 1, "%lld", &count) == 1) { 1003 blist_resize(&bl, count, 1, M_WAITOK); 1004 } else { 1005 printf("?\n"); 1006 } 1007 case 'p': 1008 blist_print(bl); 1009 break; 1010 case 'a': 1011 if (sscanf(buf + 1, "%lld", &count) == 1) { 1012 daddr_t blk = blist_alloc(bl, count); 1013 printf(" R=%08llx\n", (long long)blk); 1014 } else { 1015 printf("?\n"); 1016 } 1017 break; 1018 case 'f': 1019 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1020 blist_free(bl, da, count); 1021 } else { 1022 printf("?\n"); 1023 } 1024 break; 1025 case 'l': 1026 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1027 printf(" n=%jd\n", 1028 (intmax_t)blist_fill(bl, da, count)); 1029 } else { 1030 printf("?\n"); 1031 } 1032 break; 1033 case '?': 1034 case 'h': 1035 puts( 1036 "p -print\n" 1037 "a %d -allocate\n" 1038 "f %x %d -free\n" 1039 "l %x %d -fill\n" 1040 "r %d -resize\n" 1041 "h/? -help" 1042 ); 1043 break; 1044 default: 1045 printf("?\n"); 1046 break; 1047 } 1048 } 1049 return(0); 1050 } 1051 1052 void 1053 panic(const char *ctl, ...) 1054 { 1055 va_list va; 1056 1057 va_start(va, ctl); 1058 vfprintf(stderr, ctl, va); 1059 fprintf(stderr, "\n"); 1060 va_end(va); 1061 exit(1); 1062 } 1063 1064 #endif 1065