1 /*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 3. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other than to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 32), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * The blist code wires all necessary memory at creation time. Neither 56 * allocations nor frees require interaction with the memory subsystem. 57 * The non-blocking features of the blist code are used in the swap code 58 * (vm/swap_pager.c). 59 * 60 * LAYOUT: The radix tree is laid out recursively using a 61 * linear array. Each meta node is immediately followed (laid out 62 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 63 * is a recursive structure but one that can be easily scanned through 64 * a very simple 'skip' calculation. In order to support large radixes, 65 * portions of the tree may reside outside our memory allocation. We 66 * handle this with an early-termination optimization (when bighint is 67 * set to -1) on the scan. The memory allocation is only large enough 68 * to cover the number of blocks requested at creation time even if it 69 * must be encompassed in larger root-node radix. 70 * 71 * NOTE: the allocator cannot currently allocate more than 72 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 73 * large' if you try. This is an area that could use improvement. The 74 * radix is large enough that this restriction does not effect the swap 75 * system, though. Currently only the allocation code is effected by 76 * this algorithmic unfeature. The freeing code can handle arbitrary 77 * ranges. 78 * 79 * This code can be compiled stand-alone for debugging. 80 */ 81 82 #include <sys/cdefs.h> 83 __FBSDID("$FreeBSD$"); 84 85 #ifdef _KERNEL 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/lock.h> 90 #include <sys/kernel.h> 91 #include <sys/blist.h> 92 #include <sys/malloc.h> 93 #include <sys/proc.h> 94 #include <sys/mutex.h> 95 96 #else 97 98 #ifndef BLIST_NO_DEBUG 99 #define BLIST_DEBUG 100 #endif 101 102 #include <sys/types.h> 103 #include <sys/malloc.h> 104 #include <stdio.h> 105 #include <string.h> 106 #include <stdlib.h> 107 #include <stdarg.h> 108 109 #define bitcount64(x) __bitcount64((uint64_t)(x)) 110 #define malloc(a,b,c) calloc(a, 1) 111 #define free(a,b) free(a) 112 113 #include <sys/blist.h> 114 115 void panic(const char *ctl, ...); 116 117 #endif 118 119 /* 120 * static support functions 121 */ 122 123 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 124 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, 125 daddr_t radix, daddr_t skip, daddr_t cursor); 126 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 127 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 128 daddr_t radix, int skip, daddr_t blk); 129 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 130 daddr_t skip, blist_t dest, daddr_t count); 131 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 132 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 133 daddr_t radix, int skip, daddr_t blk); 134 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 135 int skip, daddr_t count); 136 #ifndef _KERNEL 137 static void blst_radix_print(blmeta_t *scan, daddr_t blk, 138 daddr_t radix, int skip, int tab); 139 #endif 140 141 #ifdef _KERNEL 142 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 143 #endif 144 145 /* 146 * blist_create() - create a blist capable of handling up to the specified 147 * number of blocks 148 * 149 * blocks - must be greater than 0 150 * flags - malloc flags 151 * 152 * The smallest blist consists of a single leaf node capable of 153 * managing BLIST_BMAP_RADIX blocks. 154 */ 155 156 blist_t 157 blist_create(daddr_t blocks, int flags) 158 { 159 blist_t bl; 160 daddr_t nodes, radix; 161 int skip = 0; 162 163 /* 164 * Calculate radix and skip field used for scanning. 165 */ 166 radix = BLIST_BMAP_RADIX; 167 168 while (radix < blocks) { 169 radix *= BLIST_META_RADIX; 170 skip = (skip + 1) * BLIST_META_RADIX; 171 } 172 173 bl = malloc(sizeof(struct blist), M_SWAP, flags); 174 if (bl == NULL) 175 return (NULL); 176 177 bl->bl_blocks = blocks; 178 bl->bl_radix = radix; 179 bl->bl_skip = skip; 180 bl->bl_cursor = 0; 181 nodes = 1 + blst_radix_init(NULL, radix, bl->bl_skip, blocks); 182 bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 183 if (bl->bl_root == NULL) { 184 free(bl, M_SWAP); 185 return (NULL); 186 } 187 blst_radix_init(bl->bl_root, radix, bl->bl_skip, blocks); 188 189 #if defined(BLIST_DEBUG) 190 printf( 191 "BLIST representing %lld blocks (%lld MB of swap)" 192 ", requiring %lldK of ram\n", 193 (long long)bl->bl_blocks, 194 (long long)bl->bl_blocks * 4 / 1024, 195 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 196 ); 197 printf("BLIST raw radix tree contains %lld records\n", 198 (long long)nodes); 199 #endif 200 201 return (bl); 202 } 203 204 void 205 blist_destroy(blist_t bl) 206 { 207 free(bl->bl_root, M_SWAP); 208 free(bl, M_SWAP); 209 } 210 211 /* 212 * blist_alloc() - reserve space in the block bitmap. Return the base 213 * of a contiguous region or SWAPBLK_NONE if space could 214 * not be allocated. 215 */ 216 217 daddr_t 218 blist_alloc(blist_t bl, daddr_t count) 219 { 220 daddr_t blk; 221 222 /* 223 * This loop iterates at most twice. An allocation failure in the 224 * first iteration leads to a second iteration only if the cursor was 225 * non-zero. When the cursor is zero, an allocation failure will 226 * reduce the hint, stopping further iterations. 227 */ 228 while (count <= bl->bl_root->bm_bighint) { 229 if (bl->bl_radix == BLIST_BMAP_RADIX) 230 blk = blst_leaf_alloc(bl->bl_root, 0, count); 231 else 232 blk = blst_meta_alloc(bl->bl_root, 0, count, 233 bl->bl_radix, bl->bl_skip, bl->bl_cursor); 234 if (blk != SWAPBLK_NONE) { 235 bl->bl_cursor = blk + count; 236 return (blk); 237 } else if (bl->bl_cursor != 0) 238 bl->bl_cursor = 0; 239 } 240 return (SWAPBLK_NONE); 241 } 242 243 /* 244 * blist_avail() - return the number of free blocks. 245 */ 246 247 daddr_t 248 blist_avail(blist_t bl) 249 { 250 251 if (bl->bl_radix == BLIST_BMAP_RADIX) 252 return (bitcount64(bl->bl_root->u.bmu_bitmap)); 253 else 254 return (bl->bl_root->u.bmu_avail); 255 } 256 257 /* 258 * blist_free() - free up space in the block bitmap. Return the base 259 * of a contiguous region. Panic if an inconsistancy is 260 * found. 261 */ 262 263 void 264 blist_free(blist_t bl, daddr_t blkno, daddr_t count) 265 { 266 if (bl) { 267 if (bl->bl_radix == BLIST_BMAP_RADIX) 268 blst_leaf_free(bl->bl_root, blkno, count); 269 else 270 blst_meta_free(bl->bl_root, blkno, count, 271 bl->bl_radix, bl->bl_skip, 0); 272 } 273 } 274 275 /* 276 * blist_fill() - mark a region in the block bitmap as off-limits 277 * to the allocator (i.e. allocate it), ignoring any 278 * existing allocations. Return the number of blocks 279 * actually filled that were free before the call. 280 */ 281 282 daddr_t 283 blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 284 { 285 daddr_t filled; 286 287 if (bl) { 288 if (bl->bl_radix == BLIST_BMAP_RADIX) 289 filled = blst_leaf_fill(bl->bl_root, blkno, count); 290 else 291 filled = blst_meta_fill(bl->bl_root, blkno, count, 292 bl->bl_radix, bl->bl_skip, 0); 293 return (filled); 294 } 295 return (0); 296 } 297 298 /* 299 * blist_resize() - resize an existing radix tree to handle the 300 * specified number of blocks. This will reallocate 301 * the tree and transfer the previous bitmap to the new 302 * one. When extending the tree you can specify whether 303 * the new blocks are to left allocated or freed. 304 */ 305 306 void 307 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 308 { 309 blist_t newbl = blist_create(count, flags); 310 blist_t save = *pbl; 311 312 *pbl = newbl; 313 if (count > save->bl_blocks) 314 count = save->bl_blocks; 315 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 316 317 /* 318 * If resizing upwards, should we free the new space or not? 319 */ 320 if (freenew && count < newbl->bl_blocks) { 321 blist_free(newbl, count, newbl->bl_blocks - count); 322 } 323 blist_destroy(save); 324 } 325 326 #ifdef BLIST_DEBUG 327 328 /* 329 * blist_print() - dump radix tree 330 */ 331 332 void 333 blist_print(blist_t bl) 334 { 335 printf("BLIST {\n"); 336 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 337 printf("}\n"); 338 } 339 340 #endif 341 342 /************************************************************************ 343 * ALLOCATION SUPPORT FUNCTIONS * 344 ************************************************************************ 345 * 346 * These support functions do all the actual work. They may seem 347 * rather longish, but that's because I've commented them up. The 348 * actual code is straight forward. 349 * 350 */ 351 352 /* 353 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 354 * 355 * This is the core of the allocator and is optimized for the 1 block 356 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 357 * somewhat slower. The 1 block allocation case is log2 and extremely 358 * quick. 359 */ 360 361 static daddr_t 362 blst_leaf_alloc( 363 blmeta_t *scan, 364 daddr_t blk, 365 int count 366 ) { 367 u_daddr_t orig = scan->u.bmu_bitmap; 368 369 if (orig == 0) { 370 /* 371 * Optimize bitmap all-allocated case. Also, count = 1 372 * case assumes at least 1 bit is free in the bitmap, so 373 * we have to take care of this case here. 374 */ 375 scan->bm_bighint = 0; 376 return(SWAPBLK_NONE); 377 } 378 if (count == 1) { 379 /* 380 * Optimized code to allocate one bit out of the bitmap 381 */ 382 u_daddr_t mask; 383 int j = BLIST_BMAP_RADIX/2; 384 int r = 0; 385 386 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 387 388 while (j) { 389 if ((orig & mask) == 0) { 390 r += j; 391 orig >>= j; 392 } 393 j >>= 1; 394 mask >>= j; 395 } 396 scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r); 397 return(blk + r); 398 } 399 if (count <= BLIST_BMAP_RADIX) { 400 /* 401 * non-optimized code to allocate N bits out of the bitmap. 402 * The more bits, the faster the code runs. It will run 403 * the slowest allocating 2 bits, but since there aren't any 404 * memory ops in the core loop (or shouldn't be, anyway), 405 * you probably won't notice the difference. 406 */ 407 int j; 408 int n = BLIST_BMAP_RADIX - count; 409 u_daddr_t mask; 410 411 mask = (u_daddr_t)-1 >> n; 412 413 for (j = 0; j <= n; ++j) { 414 if ((orig & mask) == mask) { 415 scan->u.bmu_bitmap &= ~mask; 416 return(blk + j); 417 } 418 mask = (mask << 1); 419 } 420 } 421 /* 422 * We couldn't allocate count in this subtree, update bighint. 423 */ 424 scan->bm_bighint = count - 1; 425 return(SWAPBLK_NONE); 426 } 427 428 /* 429 * blist_meta_alloc() - allocate at a meta in the radix tree. 430 * 431 * Attempt to allocate at a meta node. If we can't, we update 432 * bighint and return a failure. Updating bighint optimize future 433 * calls that hit this node. We have to check for our collapse cases 434 * and we have a few optimizations strewn in as well. 435 */ 436 437 static daddr_t 438 blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, daddr_t radix, 439 daddr_t skip, daddr_t cursor) 440 { 441 daddr_t i, next_skip, r; 442 int child; 443 bool scan_from_start; 444 445 if (scan->u.bmu_avail < count) { 446 /* 447 * The meta node's hint must be too large if the allocation 448 * exceeds the number of free blocks. Reduce the hint, and 449 * return failure. 450 */ 451 scan->bm_bighint = scan->u.bmu_avail; 452 return (SWAPBLK_NONE); 453 } 454 next_skip = skip / BLIST_META_RADIX; 455 456 /* 457 * An ALL-FREE meta node requires special handling before allocating 458 * any of its blocks. 459 */ 460 if (scan->u.bmu_avail == radix) { 461 radix /= BLIST_META_RADIX; 462 463 /* 464 * Reinitialize each of the meta node's children. An ALL-FREE 465 * meta node cannot have a terminator in any subtree. 466 */ 467 for (i = 1; i <= skip; i += next_skip) { 468 if (next_skip == 1) 469 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 470 else 471 scan[i].u.bmu_avail = radix; 472 scan[i].bm_bighint = radix; 473 } 474 } else { 475 radix /= BLIST_META_RADIX; 476 } 477 478 if (count > radix) { 479 /* 480 * The allocation exceeds the number of blocks that are 481 * managed by a subtree of this meta node. 482 */ 483 panic("allocation too large"); 484 } 485 scan_from_start = cursor == blk; 486 child = (cursor - blk) / radix; 487 blk += child * radix; 488 for (i = 1 + child * next_skip; i <= skip; i += next_skip) { 489 if (count <= scan[i].bm_bighint) { 490 /* 491 * The allocation might fit in the i'th subtree. 492 */ 493 if (next_skip == 1) { 494 r = blst_leaf_alloc(&scan[i], blk, count); 495 } else { 496 r = blst_meta_alloc(&scan[i], blk, count, 497 radix, next_skip - 1, cursor > blk ? 498 cursor : blk); 499 } 500 if (r != SWAPBLK_NONE) { 501 scan->u.bmu_avail -= count; 502 return (r); 503 } 504 } else if (scan[i].bm_bighint == (daddr_t)-1) { 505 /* 506 * Terminator 507 */ 508 break; 509 } 510 blk += radix; 511 } 512 513 /* 514 * We couldn't allocate count in this subtree, update bighint. 515 */ 516 if (scan_from_start && scan->bm_bighint >= count) 517 scan->bm_bighint = count - 1; 518 519 return (SWAPBLK_NONE); 520 } 521 522 /* 523 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 524 * 525 */ 526 527 static void 528 blst_leaf_free( 529 blmeta_t *scan, 530 daddr_t blk, 531 int count 532 ) { 533 /* 534 * free some data in this bitmap 535 * 536 * e.g. 537 * 0000111111111110000 538 * \_________/\__/ 539 * v n 540 */ 541 int n = blk & (BLIST_BMAP_RADIX - 1); 542 u_daddr_t mask; 543 544 mask = ((u_daddr_t)-1 << n) & 545 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 546 547 if (scan->u.bmu_bitmap & mask) 548 panic("blst_radix_free: freeing free block"); 549 scan->u.bmu_bitmap |= mask; 550 551 /* 552 * We could probably do a better job here. We are required to make 553 * bighint at least as large as the biggest contiguous block of 554 * data. If we just shoehorn it, a little extra overhead will 555 * be incured on the next allocation (but only that one typically). 556 */ 557 scan->bm_bighint = BLIST_BMAP_RADIX; 558 } 559 560 /* 561 * BLST_META_FREE() - free allocated blocks from radix tree meta info 562 * 563 * This support routine frees a range of blocks from the bitmap. 564 * The range must be entirely enclosed by this radix node. If a 565 * meta node, we break the range down recursively to free blocks 566 * in subnodes (which means that this code can free an arbitrary 567 * range whereas the allocation code cannot allocate an arbitrary 568 * range). 569 */ 570 571 static void 572 blst_meta_free( 573 blmeta_t *scan, 574 daddr_t freeBlk, 575 daddr_t count, 576 daddr_t radix, 577 int skip, 578 daddr_t blk 579 ) { 580 int i; 581 int next_skip = ((u_int)skip / BLIST_META_RADIX); 582 583 #if 0 584 printf("free (%llx,%lld) FROM (%llx,%lld)\n", 585 (long long)freeBlk, (long long)count, 586 (long long)blk, (long long)radix 587 ); 588 #endif 589 590 if (scan->u.bmu_avail == 0) { 591 /* 592 * ALL-ALLOCATED special case, with possible 593 * shortcut to ALL-FREE special case. 594 */ 595 scan->u.bmu_avail = count; 596 scan->bm_bighint = count; 597 598 if (count != radix) { 599 for (i = 1; i <= skip; i += next_skip) { 600 if (scan[i].bm_bighint == (daddr_t)-1) 601 break; 602 scan[i].bm_bighint = 0; 603 if (next_skip == 1) { 604 scan[i].u.bmu_bitmap = 0; 605 } else { 606 scan[i].u.bmu_avail = 0; 607 } 608 } 609 /* fall through */ 610 } 611 } else { 612 scan->u.bmu_avail += count; 613 /* scan->bm_bighint = radix; */ 614 } 615 616 /* 617 * ALL-FREE special case. 618 */ 619 620 if (scan->u.bmu_avail == radix) 621 return; 622 if (scan->u.bmu_avail > radix) 623 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 624 (long long)count, (long long)scan->u.bmu_avail, 625 (long long)radix); 626 627 /* 628 * Break the free down into its components 629 */ 630 631 radix /= BLIST_META_RADIX; 632 633 i = (freeBlk - blk) / radix; 634 blk += i * radix; 635 i = i * next_skip + 1; 636 637 while (i <= skip && blk < freeBlk + count) { 638 daddr_t v; 639 640 v = blk + radix - freeBlk; 641 if (v > count) 642 v = count; 643 644 if (scan->bm_bighint == (daddr_t)-1) 645 panic("blst_meta_free: freeing unexpected range"); 646 647 if (next_skip == 1) { 648 blst_leaf_free(&scan[i], freeBlk, v); 649 } else { 650 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 651 } 652 if (scan->bm_bighint < scan[i].bm_bighint) 653 scan->bm_bighint = scan[i].bm_bighint; 654 count -= v; 655 freeBlk += v; 656 blk += radix; 657 i += next_skip; 658 } 659 } 660 661 /* 662 * BLIST_RADIX_COPY() - copy one radix tree to another 663 * 664 * Locates free space in the source tree and frees it in the destination 665 * tree. The space may not already be free in the destination. 666 */ 667 668 static void blst_copy( 669 blmeta_t *scan, 670 daddr_t blk, 671 daddr_t radix, 672 daddr_t skip, 673 blist_t dest, 674 daddr_t count 675 ) { 676 int next_skip; 677 int i; 678 679 /* 680 * Leaf node 681 */ 682 683 if (radix == BLIST_BMAP_RADIX) { 684 u_daddr_t v = scan->u.bmu_bitmap; 685 686 if (v == (u_daddr_t)-1) { 687 blist_free(dest, blk, count); 688 } else if (v != 0) { 689 int i; 690 691 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 692 if (v & ((u_daddr_t)1 << i)) 693 blist_free(dest, blk + i, 1); 694 } 695 } 696 return; 697 } 698 699 /* 700 * Meta node 701 */ 702 703 if (scan->u.bmu_avail == 0) { 704 /* 705 * Source all allocated, leave dest allocated 706 */ 707 return; 708 } 709 if (scan->u.bmu_avail == radix) { 710 /* 711 * Source all free, free entire dest 712 */ 713 if (count < radix) 714 blist_free(dest, blk, count); 715 else 716 blist_free(dest, blk, radix); 717 return; 718 } 719 720 721 radix /= BLIST_META_RADIX; 722 next_skip = ((u_int)skip / BLIST_META_RADIX); 723 724 for (i = 1; count && i <= skip; i += next_skip) { 725 if (scan[i].bm_bighint == (daddr_t)-1) 726 break; 727 728 if (count >= radix) { 729 blst_copy( 730 &scan[i], 731 blk, 732 radix, 733 next_skip - 1, 734 dest, 735 radix 736 ); 737 count -= radix; 738 } else { 739 if (count) { 740 blst_copy( 741 &scan[i], 742 blk, 743 radix, 744 next_skip - 1, 745 dest, 746 count 747 ); 748 } 749 count = 0; 750 } 751 blk += radix; 752 } 753 } 754 755 /* 756 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 757 * 758 * This routine allocates all blocks in the specified range 759 * regardless of any existing allocations in that range. Returns 760 * the number of blocks allocated by the call. 761 */ 762 763 static daddr_t 764 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 765 { 766 int n = blk & (BLIST_BMAP_RADIX - 1); 767 daddr_t nblks; 768 u_daddr_t mask; 769 770 mask = ((u_daddr_t)-1 << n) & 771 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 772 773 /* Count the number of blocks that we are allocating. */ 774 nblks = bitcount64(scan->u.bmu_bitmap & mask); 775 776 scan->u.bmu_bitmap &= ~mask; 777 return (nblks); 778 } 779 780 /* 781 * BLIST_META_FILL() - allocate specific blocks at a meta node 782 * 783 * This routine allocates the specified range of blocks, 784 * regardless of any existing allocations in the range. The 785 * range must be within the extent of this node. Returns the 786 * number of blocks allocated by the call. 787 */ 788 static daddr_t 789 blst_meta_fill( 790 blmeta_t *scan, 791 daddr_t allocBlk, 792 daddr_t count, 793 daddr_t radix, 794 int skip, 795 daddr_t blk 796 ) { 797 int i; 798 int next_skip = ((u_int)skip / BLIST_META_RADIX); 799 daddr_t nblks = 0; 800 801 if (count > radix) { 802 /* 803 * The allocation exceeds the number of blocks that are 804 * managed by this meta node. 805 */ 806 panic("allocation too large"); 807 } 808 if (count == radix || scan->u.bmu_avail == 0) { 809 /* 810 * ALL-ALLOCATED special case 811 */ 812 nblks = scan->u.bmu_avail; 813 scan->u.bmu_avail = 0; 814 scan->bm_bighint = 0; 815 return nblks; 816 } 817 818 /* 819 * An ALL-FREE meta node requires special handling before allocating 820 * any of its blocks. 821 */ 822 if (scan->u.bmu_avail == radix) { 823 radix /= BLIST_META_RADIX; 824 825 /* 826 * Reinitialize each of the meta node's children. An ALL-FREE 827 * meta node cannot have a terminator in any subtree. 828 */ 829 for (i = 1; i <= skip; i += next_skip) { 830 if (next_skip == 1) { 831 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 832 scan[i].bm_bighint = BLIST_BMAP_RADIX; 833 } else { 834 scan[i].bm_bighint = radix; 835 scan[i].u.bmu_avail = radix; 836 } 837 } 838 } else { 839 radix /= BLIST_META_RADIX; 840 } 841 842 i = (allocBlk - blk) / radix; 843 blk += i * radix; 844 i = i * next_skip + 1; 845 846 while (i <= skip && blk < allocBlk + count) { 847 daddr_t v; 848 849 v = blk + radix - allocBlk; 850 if (v > count) 851 v = count; 852 853 if (scan->bm_bighint == (daddr_t)-1) 854 panic("blst_meta_fill: filling unexpected range"); 855 856 if (next_skip == 1) { 857 nblks += blst_leaf_fill(&scan[i], allocBlk, v); 858 } else { 859 nblks += blst_meta_fill(&scan[i], allocBlk, v, 860 radix, next_skip - 1, blk); 861 } 862 count -= v; 863 allocBlk += v; 864 blk += radix; 865 i += next_skip; 866 } 867 scan->u.bmu_avail -= nblks; 868 return nblks; 869 } 870 871 /* 872 * BLST_RADIX_INIT() - initialize radix tree 873 * 874 * Initialize our meta structures and bitmaps and calculate the exact 875 * amount of space required to manage 'count' blocks - this space may 876 * be considerably less than the calculated radix due to the large 877 * RADIX values we use. 878 */ 879 880 static daddr_t 881 blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 882 { 883 int i; 884 int next_skip; 885 daddr_t memindex = 0; 886 887 /* 888 * Leaf node 889 */ 890 891 if (radix == BLIST_BMAP_RADIX) { 892 if (scan) { 893 scan->bm_bighint = 0; 894 scan->u.bmu_bitmap = 0; 895 } 896 return(memindex); 897 } 898 899 /* 900 * Meta node. If allocating the entire object we can special 901 * case it. However, we need to figure out how much memory 902 * is required to manage 'count' blocks, so we continue on anyway. 903 */ 904 905 if (scan) { 906 scan->bm_bighint = 0; 907 scan->u.bmu_avail = 0; 908 } 909 910 radix /= BLIST_META_RADIX; 911 next_skip = ((u_int)skip / BLIST_META_RADIX); 912 913 for (i = 1; i <= skip; i += next_skip) { 914 if (count >= radix) { 915 /* 916 * Allocate the entire object 917 */ 918 memindex = i + blst_radix_init( 919 ((scan) ? &scan[i] : NULL), 920 radix, 921 next_skip - 1, 922 radix 923 ); 924 count -= radix; 925 } else if (count > 0) { 926 /* 927 * Allocate a partial object 928 */ 929 memindex = i + blst_radix_init( 930 ((scan) ? &scan[i] : NULL), 931 radix, 932 next_skip - 1, 933 count 934 ); 935 count = 0; 936 } else { 937 /* 938 * Add terminator and break out 939 */ 940 if (scan) 941 scan[i].bm_bighint = (daddr_t)-1; 942 break; 943 } 944 } 945 if (memindex < i) 946 memindex = i; 947 return(memindex); 948 } 949 950 #ifdef BLIST_DEBUG 951 952 static void 953 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 954 { 955 int i; 956 int next_skip; 957 int lastState = 0; 958 959 if (radix == BLIST_BMAP_RADIX) { 960 printf( 961 "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 962 tab, tab, "", 963 (long long)blk, (long long)radix, 964 (long long)scan->u.bmu_bitmap, 965 (long long)scan->bm_bighint 966 ); 967 return; 968 } 969 970 if (scan->u.bmu_avail == 0) { 971 printf( 972 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 973 tab, tab, "", 974 (long long)blk, 975 (long long)radix 976 ); 977 return; 978 } 979 if (scan->u.bmu_avail == radix) { 980 printf( 981 "%*.*s(%08llx,%lld) ALL FREE\n", 982 tab, tab, "", 983 (long long)blk, 984 (long long)radix 985 ); 986 return; 987 } 988 989 printf( 990 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 991 tab, tab, "", 992 (long long)blk, (long long)radix, 993 (long long)scan->u.bmu_avail, 994 (long long)radix, 995 (long long)scan->bm_bighint 996 ); 997 998 radix /= BLIST_META_RADIX; 999 next_skip = ((u_int)skip / BLIST_META_RADIX); 1000 tab += 4; 1001 1002 for (i = 1; i <= skip; i += next_skip) { 1003 if (scan[i].bm_bighint == (daddr_t)-1) { 1004 printf( 1005 "%*.*s(%08llx,%lld): Terminator\n", 1006 tab, tab, "", 1007 (long long)blk, (long long)radix 1008 ); 1009 lastState = 0; 1010 break; 1011 } 1012 blst_radix_print( 1013 &scan[i], 1014 blk, 1015 radix, 1016 next_skip - 1, 1017 tab 1018 ); 1019 blk += radix; 1020 } 1021 tab -= 4; 1022 1023 printf( 1024 "%*.*s}\n", 1025 tab, tab, "" 1026 ); 1027 } 1028 1029 #endif 1030 1031 #ifdef BLIST_DEBUG 1032 1033 int 1034 main(int ac, char **av) 1035 { 1036 int size = 1024; 1037 int i; 1038 blist_t bl; 1039 1040 for (i = 1; i < ac; ++i) { 1041 const char *ptr = av[i]; 1042 if (*ptr != '-') { 1043 size = strtol(ptr, NULL, 0); 1044 continue; 1045 } 1046 ptr += 2; 1047 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1048 exit(1); 1049 } 1050 bl = blist_create(size, M_WAITOK); 1051 blist_free(bl, 0, size); 1052 1053 for (;;) { 1054 char buf[1024]; 1055 long long da = 0; 1056 long long count = 0; 1057 1058 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 1059 (long long)size, (long long)bl->bl_radix); 1060 fflush(stdout); 1061 if (fgets(buf, sizeof(buf), stdin) == NULL) 1062 break; 1063 switch(buf[0]) { 1064 case 'r': 1065 if (sscanf(buf + 1, "%lld", &count) == 1) { 1066 blist_resize(&bl, count, 1, M_WAITOK); 1067 } else { 1068 printf("?\n"); 1069 } 1070 case 'p': 1071 blist_print(bl); 1072 break; 1073 case 'a': 1074 if (sscanf(buf + 1, "%lld", &count) == 1) { 1075 daddr_t blk = blist_alloc(bl, count); 1076 printf(" R=%08llx\n", (long long)blk); 1077 } else { 1078 printf("?\n"); 1079 } 1080 break; 1081 case 'f': 1082 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1083 blist_free(bl, da, count); 1084 } else { 1085 printf("?\n"); 1086 } 1087 break; 1088 case 'l': 1089 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1090 printf(" n=%jd\n", 1091 (intmax_t)blist_fill(bl, da, count)); 1092 } else { 1093 printf("?\n"); 1094 } 1095 break; 1096 case '?': 1097 case 'h': 1098 puts( 1099 "p -print\n" 1100 "a %d -allocate\n" 1101 "f %x %d -free\n" 1102 "l %x %d -fill\n" 1103 "r %d -resize\n" 1104 "h/? -help" 1105 ); 1106 break; 1107 default: 1108 printf("?\n"); 1109 break; 1110 } 1111 } 1112 return(0); 1113 } 1114 1115 void 1116 panic(const char *ctl, ...) 1117 { 1118 va_list va; 1119 1120 va_start(va, ctl); 1121 vfprintf(stderr, ctl, va); 1122 fprintf(stderr, "\n"); 1123 va_end(va); 1124 exit(1); 1125 } 1126 1127 #endif 1128 1129