1 /*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 3. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other than to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 32), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * The blist code wires all necessary memory at creation time. Neither 56 * allocations nor frees require interaction with the memory subsystem. 57 * The non-blocking features of the blist code are used in the swap code 58 * (vm/swap_pager.c). 59 * 60 * LAYOUT: The radix tree is laid out recursively using a 61 * linear array. Each meta node is immediately followed (laid out 62 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 63 * is a recursive structure but one that can be easily scanned through 64 * a very simple 'skip' calculation. In order to support large radixes, 65 * portions of the tree may reside outside our memory allocation. We 66 * handle this with an early-termination optimization (when bighint is 67 * set to -1) on the scan. The memory allocation is only large enough 68 * to cover the number of blocks requested at creation time even if it 69 * must be encompassed in larger root-node radix. 70 * 71 * NOTE: the allocator cannot currently allocate more than 72 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 73 * large' if you try. This is an area that could use improvement. The 74 * radix is large enough that this restriction does not effect the swap 75 * system, though. Currently only the allocation code is effected by 76 * this algorithmic unfeature. The freeing code can handle arbitrary 77 * ranges. 78 * 79 * This code can be compiled stand-alone for debugging. 80 */ 81 82 #include <sys/cdefs.h> 83 __FBSDID("$FreeBSD$"); 84 85 #ifdef _KERNEL 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/lock.h> 90 #include <sys/kernel.h> 91 #include <sys/blist.h> 92 #include <sys/malloc.h> 93 #include <sys/proc.h> 94 #include <sys/mutex.h> 95 96 #else 97 98 #ifndef BLIST_NO_DEBUG 99 #define BLIST_DEBUG 100 #endif 101 102 #include <sys/types.h> 103 #include <sys/malloc.h> 104 #include <stdio.h> 105 #include <string.h> 106 #include <stdlib.h> 107 #include <stdarg.h> 108 109 #define bitcount64(x) __bitcount64((uint64_t)(x)) 110 #define malloc(a,b,c) calloc(a, 1) 111 #define free(a,b) free(a) 112 113 #include <sys/blist.h> 114 115 void panic(const char *ctl, ...); 116 117 #endif 118 119 /* 120 * static support functions 121 */ 122 123 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 124 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 125 daddr_t count, daddr_t radix, int skip); 126 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 127 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 128 daddr_t radix, int skip, daddr_t blk); 129 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 130 daddr_t skip, blist_t dest, daddr_t count); 131 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 132 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 133 daddr_t radix, int skip, daddr_t blk); 134 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 135 int skip, daddr_t count); 136 #ifndef _KERNEL 137 static void blst_radix_print(blmeta_t *scan, daddr_t blk, 138 daddr_t radix, int skip, int tab); 139 #endif 140 141 #ifdef _KERNEL 142 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 143 #endif 144 145 /* 146 * blist_create() - create a blist capable of handling up to the specified 147 * number of blocks 148 * 149 * blocks - must be greater than 0 150 * flags - malloc flags 151 * 152 * The smallest blist consists of a single leaf node capable of 153 * managing BLIST_BMAP_RADIX blocks. 154 */ 155 156 blist_t 157 blist_create(daddr_t blocks, int flags) 158 { 159 blist_t bl; 160 daddr_t nodes, radix; 161 int skip = 0; 162 163 /* 164 * Calculate radix and skip field used for scanning. 165 */ 166 radix = BLIST_BMAP_RADIX; 167 168 while (radix < blocks) { 169 radix *= BLIST_META_RADIX; 170 skip = (skip + 1) * BLIST_META_RADIX; 171 } 172 173 bl = malloc(sizeof(struct blist), M_SWAP, flags); 174 if (bl == NULL) 175 return (NULL); 176 177 bl->bl_blocks = blocks; 178 bl->bl_radix = radix; 179 bl->bl_skip = skip; 180 nodes = 1 + blst_radix_init(NULL, radix, bl->bl_skip, blocks); 181 bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 182 if (bl->bl_root == NULL) { 183 free(bl, M_SWAP); 184 return (NULL); 185 } 186 blst_radix_init(bl->bl_root, radix, bl->bl_skip, blocks); 187 188 #if defined(BLIST_DEBUG) 189 printf( 190 "BLIST representing %lld blocks (%lld MB of swap)" 191 ", requiring %lldK of ram\n", 192 (long long)bl->bl_blocks, 193 (long long)bl->bl_blocks * 4 / 1024, 194 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 195 ); 196 printf("BLIST raw radix tree contains %lld records\n", 197 (long long)nodes); 198 #endif 199 200 return (bl); 201 } 202 203 void 204 blist_destroy(blist_t bl) 205 { 206 free(bl->bl_root, M_SWAP); 207 free(bl, M_SWAP); 208 } 209 210 /* 211 * blist_alloc() - reserve space in the block bitmap. Return the base 212 * of a contiguous region or SWAPBLK_NONE if space could 213 * not be allocated. 214 */ 215 216 daddr_t 217 blist_alloc(blist_t bl, daddr_t count) 218 { 219 daddr_t blk; 220 221 if (bl != NULL && count <= bl->bl_root->bm_bighint) { 222 if (bl->bl_radix == BLIST_BMAP_RADIX) 223 blk = blst_leaf_alloc(bl->bl_root, 0, count); 224 else 225 blk = blst_meta_alloc(bl->bl_root, 0, count, 226 bl->bl_radix, bl->bl_skip); 227 return (blk); 228 } 229 return (SWAPBLK_NONE); 230 } 231 232 /* 233 * blist_avail() - return the number of free blocks. 234 */ 235 236 daddr_t 237 blist_avail(blist_t bl) 238 { 239 240 if (bl->bl_radix == BLIST_BMAP_RADIX) 241 return (bitcount64(bl->bl_root->u.bmu_bitmap)); 242 else 243 return (bl->bl_root->u.bmu_avail); 244 } 245 246 /* 247 * blist_free() - free up space in the block bitmap. Return the base 248 * of a contiguous region. Panic if an inconsistancy is 249 * found. 250 */ 251 252 void 253 blist_free(blist_t bl, daddr_t blkno, daddr_t count) 254 { 255 if (bl) { 256 if (bl->bl_radix == BLIST_BMAP_RADIX) 257 blst_leaf_free(bl->bl_root, blkno, count); 258 else 259 blst_meta_free(bl->bl_root, blkno, count, 260 bl->bl_radix, bl->bl_skip, 0); 261 } 262 } 263 264 /* 265 * blist_fill() - mark a region in the block bitmap as off-limits 266 * to the allocator (i.e. allocate it), ignoring any 267 * existing allocations. Return the number of blocks 268 * actually filled that were free before the call. 269 */ 270 271 daddr_t 272 blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 273 { 274 daddr_t filled; 275 276 if (bl) { 277 if (bl->bl_radix == BLIST_BMAP_RADIX) 278 filled = blst_leaf_fill(bl->bl_root, blkno, count); 279 else 280 filled = blst_meta_fill(bl->bl_root, blkno, count, 281 bl->bl_radix, bl->bl_skip, 0); 282 return (filled); 283 } 284 return (0); 285 } 286 287 /* 288 * blist_resize() - resize an existing radix tree to handle the 289 * specified number of blocks. This will reallocate 290 * the tree and transfer the previous bitmap to the new 291 * one. When extending the tree you can specify whether 292 * the new blocks are to left allocated or freed. 293 */ 294 295 void 296 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 297 { 298 blist_t newbl = blist_create(count, flags); 299 blist_t save = *pbl; 300 301 *pbl = newbl; 302 if (count > save->bl_blocks) 303 count = save->bl_blocks; 304 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 305 306 /* 307 * If resizing upwards, should we free the new space or not? 308 */ 309 if (freenew && count < newbl->bl_blocks) { 310 blist_free(newbl, count, newbl->bl_blocks - count); 311 } 312 blist_destroy(save); 313 } 314 315 #ifdef BLIST_DEBUG 316 317 /* 318 * blist_print() - dump radix tree 319 */ 320 321 void 322 blist_print(blist_t bl) 323 { 324 printf("BLIST {\n"); 325 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 326 printf("}\n"); 327 } 328 329 #endif 330 331 /************************************************************************ 332 * ALLOCATION SUPPORT FUNCTIONS * 333 ************************************************************************ 334 * 335 * These support functions do all the actual work. They may seem 336 * rather longish, but that's because I've commented them up. The 337 * actual code is straight forward. 338 * 339 */ 340 341 /* 342 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 343 * 344 * This is the core of the allocator and is optimized for the 1 block 345 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 346 * somewhat slower. The 1 block allocation case is log2 and extremely 347 * quick. 348 */ 349 350 static daddr_t 351 blst_leaf_alloc( 352 blmeta_t *scan, 353 daddr_t blk, 354 int count 355 ) { 356 u_daddr_t orig = scan->u.bmu_bitmap; 357 358 if (orig == 0) { 359 /* 360 * Optimize bitmap all-allocated case. Also, count = 1 361 * case assumes at least 1 bit is free in the bitmap, so 362 * we have to take care of this case here. 363 */ 364 scan->bm_bighint = 0; 365 return(SWAPBLK_NONE); 366 } 367 if (count == 1) { 368 /* 369 * Optimized code to allocate one bit out of the bitmap 370 */ 371 u_daddr_t mask; 372 int j = BLIST_BMAP_RADIX/2; 373 int r = 0; 374 375 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 376 377 while (j) { 378 if ((orig & mask) == 0) { 379 r += j; 380 orig >>= j; 381 } 382 j >>= 1; 383 mask >>= j; 384 } 385 scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r); 386 return(blk + r); 387 } 388 if (count <= BLIST_BMAP_RADIX) { 389 /* 390 * non-optimized code to allocate N bits out of the bitmap. 391 * The more bits, the faster the code runs. It will run 392 * the slowest allocating 2 bits, but since there aren't any 393 * memory ops in the core loop (or shouldn't be, anyway), 394 * you probably won't notice the difference. 395 */ 396 int j; 397 int n = BLIST_BMAP_RADIX - count; 398 u_daddr_t mask; 399 400 mask = (u_daddr_t)-1 >> n; 401 402 for (j = 0; j <= n; ++j) { 403 if ((orig & mask) == mask) { 404 scan->u.bmu_bitmap &= ~mask; 405 return(blk + j); 406 } 407 mask = (mask << 1); 408 } 409 } 410 /* 411 * We couldn't allocate count in this subtree, update bighint. 412 */ 413 scan->bm_bighint = count - 1; 414 return(SWAPBLK_NONE); 415 } 416 417 /* 418 * blist_meta_alloc() - allocate at a meta in the radix tree. 419 * 420 * Attempt to allocate at a meta node. If we can't, we update 421 * bighint and return a failure. Updating bighint optimize future 422 * calls that hit this node. We have to check for our collapse cases 423 * and we have a few optimizations strewn in as well. 424 */ 425 426 static daddr_t 427 blst_meta_alloc( 428 blmeta_t *scan, 429 daddr_t blk, 430 daddr_t count, 431 daddr_t radix, 432 int skip 433 ) { 434 daddr_t r; 435 int i; 436 int next_skip = ((u_int)skip / BLIST_META_RADIX); 437 438 if (scan->u.bmu_avail < count) { 439 /* 440 * The meta node's hint must be too large if the allocation 441 * exceeds the number of free blocks. Reduce the hint, and 442 * return failure. 443 */ 444 scan->bm_bighint = scan->u.bmu_avail; 445 return (SWAPBLK_NONE); 446 } 447 448 /* 449 * An ALL-FREE meta node requires special handling before allocating 450 * any of its blocks. 451 */ 452 if (scan->u.bmu_avail == radix) { 453 radix /= BLIST_META_RADIX; 454 455 /* 456 * Reinitialize each of the meta node's children. An ALL-FREE 457 * meta node cannot have a terminator in any subtree. 458 */ 459 for (i = 1; i <= skip; i += next_skip) { 460 if (next_skip == 1) { 461 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 462 scan[i].bm_bighint = BLIST_BMAP_RADIX; 463 } else { 464 scan[i].bm_bighint = radix; 465 scan[i].u.bmu_avail = radix; 466 } 467 } 468 } else { 469 radix /= BLIST_META_RADIX; 470 } 471 472 if (count > radix) { 473 /* 474 * The allocation exceeds the number of blocks that are 475 * managed by a subtree of this meta node. 476 */ 477 panic("allocation too large"); 478 } 479 for (i = 1; i <= skip; i += next_skip) { 480 if (count <= scan[i].bm_bighint) { 481 /* 482 * The allocation might fit in the i'th subtree. 483 */ 484 if (next_skip == 1) { 485 r = blst_leaf_alloc(&scan[i], blk, count); 486 } else { 487 r = blst_meta_alloc(&scan[i], blk, count, 488 radix, next_skip - 1); 489 } 490 if (r != SWAPBLK_NONE) { 491 scan->u.bmu_avail -= count; 492 return (r); 493 } 494 } else if (scan[i].bm_bighint == (daddr_t)-1) { 495 /* 496 * Terminator 497 */ 498 break; 499 } 500 blk += radix; 501 } 502 503 /* 504 * We couldn't allocate count in this subtree, update bighint. 505 */ 506 if (scan->bm_bighint >= count) 507 scan->bm_bighint = count - 1; 508 return(SWAPBLK_NONE); 509 } 510 511 /* 512 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 513 * 514 */ 515 516 static void 517 blst_leaf_free( 518 blmeta_t *scan, 519 daddr_t blk, 520 int count 521 ) { 522 /* 523 * free some data in this bitmap 524 * 525 * e.g. 526 * 0000111111111110000 527 * \_________/\__/ 528 * v n 529 */ 530 int n = blk & (BLIST_BMAP_RADIX - 1); 531 u_daddr_t mask; 532 533 mask = ((u_daddr_t)-1 << n) & 534 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 535 536 if (scan->u.bmu_bitmap & mask) 537 panic("blst_radix_free: freeing free block"); 538 scan->u.bmu_bitmap |= mask; 539 540 /* 541 * We could probably do a better job here. We are required to make 542 * bighint at least as large as the biggest contiguous block of 543 * data. If we just shoehorn it, a little extra overhead will 544 * be incured on the next allocation (but only that one typically). 545 */ 546 scan->bm_bighint = BLIST_BMAP_RADIX; 547 } 548 549 /* 550 * BLST_META_FREE() - free allocated blocks from radix tree meta info 551 * 552 * This support routine frees a range of blocks from the bitmap. 553 * The range must be entirely enclosed by this radix node. If a 554 * meta node, we break the range down recursively to free blocks 555 * in subnodes (which means that this code can free an arbitrary 556 * range whereas the allocation code cannot allocate an arbitrary 557 * range). 558 */ 559 560 static void 561 blst_meta_free( 562 blmeta_t *scan, 563 daddr_t freeBlk, 564 daddr_t count, 565 daddr_t radix, 566 int skip, 567 daddr_t blk 568 ) { 569 int i; 570 int next_skip = ((u_int)skip / BLIST_META_RADIX); 571 572 #if 0 573 printf("free (%llx,%lld) FROM (%llx,%lld)\n", 574 (long long)freeBlk, (long long)count, 575 (long long)blk, (long long)radix 576 ); 577 #endif 578 579 if (scan->u.bmu_avail == 0) { 580 /* 581 * ALL-ALLOCATED special case, with possible 582 * shortcut to ALL-FREE special case. 583 */ 584 scan->u.bmu_avail = count; 585 scan->bm_bighint = count; 586 587 if (count != radix) { 588 for (i = 1; i <= skip; i += next_skip) { 589 if (scan[i].bm_bighint == (daddr_t)-1) 590 break; 591 scan[i].bm_bighint = 0; 592 if (next_skip == 1) { 593 scan[i].u.bmu_bitmap = 0; 594 } else { 595 scan[i].u.bmu_avail = 0; 596 } 597 } 598 /* fall through */ 599 } 600 } else { 601 scan->u.bmu_avail += count; 602 /* scan->bm_bighint = radix; */ 603 } 604 605 /* 606 * ALL-FREE special case. 607 */ 608 609 if (scan->u.bmu_avail == radix) 610 return; 611 if (scan->u.bmu_avail > radix) 612 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 613 (long long)count, (long long)scan->u.bmu_avail, 614 (long long)radix); 615 616 /* 617 * Break the free down into its components 618 */ 619 620 radix /= BLIST_META_RADIX; 621 622 i = (freeBlk - blk) / radix; 623 blk += i * radix; 624 i = i * next_skip + 1; 625 626 while (i <= skip && blk < freeBlk + count) { 627 daddr_t v; 628 629 v = blk + radix - freeBlk; 630 if (v > count) 631 v = count; 632 633 if (scan->bm_bighint == (daddr_t)-1) 634 panic("blst_meta_free: freeing unexpected range"); 635 636 if (next_skip == 1) { 637 blst_leaf_free(&scan[i], freeBlk, v); 638 } else { 639 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 640 } 641 if (scan->bm_bighint < scan[i].bm_bighint) 642 scan->bm_bighint = scan[i].bm_bighint; 643 count -= v; 644 freeBlk += v; 645 blk += radix; 646 i += next_skip; 647 } 648 } 649 650 /* 651 * BLIST_RADIX_COPY() - copy one radix tree to another 652 * 653 * Locates free space in the source tree and frees it in the destination 654 * tree. The space may not already be free in the destination. 655 */ 656 657 static void blst_copy( 658 blmeta_t *scan, 659 daddr_t blk, 660 daddr_t radix, 661 daddr_t skip, 662 blist_t dest, 663 daddr_t count 664 ) { 665 int next_skip; 666 int i; 667 668 /* 669 * Leaf node 670 */ 671 672 if (radix == BLIST_BMAP_RADIX) { 673 u_daddr_t v = scan->u.bmu_bitmap; 674 675 if (v == (u_daddr_t)-1) { 676 blist_free(dest, blk, count); 677 } else if (v != 0) { 678 int i; 679 680 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 681 if (v & ((u_daddr_t)1 << i)) 682 blist_free(dest, blk + i, 1); 683 } 684 } 685 return; 686 } 687 688 /* 689 * Meta node 690 */ 691 692 if (scan->u.bmu_avail == 0) { 693 /* 694 * Source all allocated, leave dest allocated 695 */ 696 return; 697 } 698 if (scan->u.bmu_avail == radix) { 699 /* 700 * Source all free, free entire dest 701 */ 702 if (count < radix) 703 blist_free(dest, blk, count); 704 else 705 blist_free(dest, blk, radix); 706 return; 707 } 708 709 710 radix /= BLIST_META_RADIX; 711 next_skip = ((u_int)skip / BLIST_META_RADIX); 712 713 for (i = 1; count && i <= skip; i += next_skip) { 714 if (scan[i].bm_bighint == (daddr_t)-1) 715 break; 716 717 if (count >= radix) { 718 blst_copy( 719 &scan[i], 720 blk, 721 radix, 722 next_skip - 1, 723 dest, 724 radix 725 ); 726 count -= radix; 727 } else { 728 if (count) { 729 blst_copy( 730 &scan[i], 731 blk, 732 radix, 733 next_skip - 1, 734 dest, 735 count 736 ); 737 } 738 count = 0; 739 } 740 blk += radix; 741 } 742 } 743 744 /* 745 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 746 * 747 * This routine allocates all blocks in the specified range 748 * regardless of any existing allocations in that range. Returns 749 * the number of blocks allocated by the call. 750 */ 751 752 static daddr_t 753 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 754 { 755 int n = blk & (BLIST_BMAP_RADIX - 1); 756 daddr_t nblks; 757 u_daddr_t mask; 758 759 mask = ((u_daddr_t)-1 << n) & 760 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 761 762 /* Count the number of blocks that we are allocating. */ 763 nblks = bitcount64(scan->u.bmu_bitmap & mask); 764 765 scan->u.bmu_bitmap &= ~mask; 766 return (nblks); 767 } 768 769 /* 770 * BLIST_META_FILL() - allocate specific blocks at a meta node 771 * 772 * This routine allocates the specified range of blocks, 773 * regardless of any existing allocations in the range. The 774 * range must be within the extent of this node. Returns the 775 * number of blocks allocated by the call. 776 */ 777 static daddr_t 778 blst_meta_fill( 779 blmeta_t *scan, 780 daddr_t allocBlk, 781 daddr_t count, 782 daddr_t radix, 783 int skip, 784 daddr_t blk 785 ) { 786 int i; 787 int next_skip = ((u_int)skip / BLIST_META_RADIX); 788 daddr_t nblks = 0; 789 790 if (count > radix) { 791 /* 792 * The allocation exceeds the number of blocks that are 793 * managed by this meta node. 794 */ 795 panic("allocation too large"); 796 } 797 if (count == radix || scan->u.bmu_avail == 0) { 798 /* 799 * ALL-ALLOCATED special case 800 */ 801 nblks = scan->u.bmu_avail; 802 scan->u.bmu_avail = 0; 803 scan->bm_bighint = 0; 804 return nblks; 805 } 806 807 /* 808 * An ALL-FREE meta node requires special handling before allocating 809 * any of its blocks. 810 */ 811 if (scan->u.bmu_avail == radix) { 812 radix /= BLIST_META_RADIX; 813 814 /* 815 * Reinitialize each of the meta node's children. An ALL-FREE 816 * meta node cannot have a terminator in any subtree. 817 */ 818 for (i = 1; i <= skip; i += next_skip) { 819 if (next_skip == 1) { 820 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 821 scan[i].bm_bighint = BLIST_BMAP_RADIX; 822 } else { 823 scan[i].bm_bighint = radix; 824 scan[i].u.bmu_avail = radix; 825 } 826 } 827 } else { 828 radix /= BLIST_META_RADIX; 829 } 830 831 i = (allocBlk - blk) / radix; 832 blk += i * radix; 833 i = i * next_skip + 1; 834 835 while (i <= skip && blk < allocBlk + count) { 836 daddr_t v; 837 838 v = blk + radix - allocBlk; 839 if (v > count) 840 v = count; 841 842 if (scan->bm_bighint == (daddr_t)-1) 843 panic("blst_meta_fill: filling unexpected range"); 844 845 if (next_skip == 1) { 846 nblks += blst_leaf_fill(&scan[i], allocBlk, v); 847 } else { 848 nblks += blst_meta_fill(&scan[i], allocBlk, v, 849 radix, next_skip - 1, blk); 850 } 851 count -= v; 852 allocBlk += v; 853 blk += radix; 854 i += next_skip; 855 } 856 scan->u.bmu_avail -= nblks; 857 return nblks; 858 } 859 860 /* 861 * BLST_RADIX_INIT() - initialize radix tree 862 * 863 * Initialize our meta structures and bitmaps and calculate the exact 864 * amount of space required to manage 'count' blocks - this space may 865 * be considerably less than the calculated radix due to the large 866 * RADIX values we use. 867 */ 868 869 static daddr_t 870 blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 871 { 872 int i; 873 int next_skip; 874 daddr_t memindex = 0; 875 876 /* 877 * Leaf node 878 */ 879 880 if (radix == BLIST_BMAP_RADIX) { 881 if (scan) { 882 scan->bm_bighint = 0; 883 scan->u.bmu_bitmap = 0; 884 } 885 return(memindex); 886 } 887 888 /* 889 * Meta node. If allocating the entire object we can special 890 * case it. However, we need to figure out how much memory 891 * is required to manage 'count' blocks, so we continue on anyway. 892 */ 893 894 if (scan) { 895 scan->bm_bighint = 0; 896 scan->u.bmu_avail = 0; 897 } 898 899 radix /= BLIST_META_RADIX; 900 next_skip = ((u_int)skip / BLIST_META_RADIX); 901 902 for (i = 1; i <= skip; i += next_skip) { 903 if (count >= radix) { 904 /* 905 * Allocate the entire object 906 */ 907 memindex = i + blst_radix_init( 908 ((scan) ? &scan[i] : NULL), 909 radix, 910 next_skip - 1, 911 radix 912 ); 913 count -= radix; 914 } else if (count > 0) { 915 /* 916 * Allocate a partial object 917 */ 918 memindex = i + blst_radix_init( 919 ((scan) ? &scan[i] : NULL), 920 radix, 921 next_skip - 1, 922 count 923 ); 924 count = 0; 925 } else { 926 /* 927 * Add terminator and break out 928 */ 929 if (scan) 930 scan[i].bm_bighint = (daddr_t)-1; 931 break; 932 } 933 } 934 if (memindex < i) 935 memindex = i; 936 return(memindex); 937 } 938 939 #ifdef BLIST_DEBUG 940 941 static void 942 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 943 { 944 int i; 945 int next_skip; 946 int lastState = 0; 947 948 if (radix == BLIST_BMAP_RADIX) { 949 printf( 950 "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 951 tab, tab, "", 952 (long long)blk, (long long)radix, 953 (long long)scan->u.bmu_bitmap, 954 (long long)scan->bm_bighint 955 ); 956 return; 957 } 958 959 if (scan->u.bmu_avail == 0) { 960 printf( 961 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 962 tab, tab, "", 963 (long long)blk, 964 (long long)radix 965 ); 966 return; 967 } 968 if (scan->u.bmu_avail == radix) { 969 printf( 970 "%*.*s(%08llx,%lld) ALL FREE\n", 971 tab, tab, "", 972 (long long)blk, 973 (long long)radix 974 ); 975 return; 976 } 977 978 printf( 979 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 980 tab, tab, "", 981 (long long)blk, (long long)radix, 982 (long long)scan->u.bmu_avail, 983 (long long)radix, 984 (long long)scan->bm_bighint 985 ); 986 987 radix /= BLIST_META_RADIX; 988 next_skip = ((u_int)skip / BLIST_META_RADIX); 989 tab += 4; 990 991 for (i = 1; i <= skip; i += next_skip) { 992 if (scan[i].bm_bighint == (daddr_t)-1) { 993 printf( 994 "%*.*s(%08llx,%lld): Terminator\n", 995 tab, tab, "", 996 (long long)blk, (long long)radix 997 ); 998 lastState = 0; 999 break; 1000 } 1001 blst_radix_print( 1002 &scan[i], 1003 blk, 1004 radix, 1005 next_skip - 1, 1006 tab 1007 ); 1008 blk += radix; 1009 } 1010 tab -= 4; 1011 1012 printf( 1013 "%*.*s}\n", 1014 tab, tab, "" 1015 ); 1016 } 1017 1018 #endif 1019 1020 #ifdef BLIST_DEBUG 1021 1022 int 1023 main(int ac, char **av) 1024 { 1025 int size = 1024; 1026 int i; 1027 blist_t bl; 1028 1029 for (i = 1; i < ac; ++i) { 1030 const char *ptr = av[i]; 1031 if (*ptr != '-') { 1032 size = strtol(ptr, NULL, 0); 1033 continue; 1034 } 1035 ptr += 2; 1036 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1037 exit(1); 1038 } 1039 bl = blist_create(size, M_WAITOK); 1040 blist_free(bl, 0, size); 1041 1042 for (;;) { 1043 char buf[1024]; 1044 long long da = 0; 1045 long long count = 0; 1046 1047 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 1048 (long long)size, (long long)bl->bl_radix); 1049 fflush(stdout); 1050 if (fgets(buf, sizeof(buf), stdin) == NULL) 1051 break; 1052 switch(buf[0]) { 1053 case 'r': 1054 if (sscanf(buf + 1, "%lld", &count) == 1) { 1055 blist_resize(&bl, count, 1, M_WAITOK); 1056 } else { 1057 printf("?\n"); 1058 } 1059 case 'p': 1060 blist_print(bl); 1061 break; 1062 case 'a': 1063 if (sscanf(buf + 1, "%lld", &count) == 1) { 1064 daddr_t blk = blist_alloc(bl, count); 1065 printf(" R=%08llx\n", (long long)blk); 1066 } else { 1067 printf("?\n"); 1068 } 1069 break; 1070 case 'f': 1071 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1072 blist_free(bl, da, count); 1073 } else { 1074 printf("?\n"); 1075 } 1076 break; 1077 case 'l': 1078 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1079 printf(" n=%jd\n", 1080 (intmax_t)blist_fill(bl, da, count)); 1081 } else { 1082 printf("?\n"); 1083 } 1084 break; 1085 case '?': 1086 case 'h': 1087 puts( 1088 "p -print\n" 1089 "a %d -allocate\n" 1090 "f %x %d -free\n" 1091 "l %x %d -fill\n" 1092 "r %d -resize\n" 1093 "h/? -help" 1094 ); 1095 break; 1096 default: 1097 printf("?\n"); 1098 break; 1099 } 1100 } 1101 return(0); 1102 } 1103 1104 void 1105 panic(const char *ctl, ...) 1106 { 1107 va_list va; 1108 1109 va_start(va, ctl); 1110 vfprintf(stderr, ctl, va); 1111 fprintf(stderr, "\n"); 1112 va_end(va); 1113 exit(1); 1114 } 1115 1116 #endif 1117 1118