1 /*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 4. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other than to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 32), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * Unlike the rlist code, the blist code wires all necessary memory at 56 * creation time. Neither allocations nor frees require interaction with 57 * the memory subsystem. In contrast, the rlist code may allocate memory 58 * on an rlist_free() call. The non-blocking features of the blist code 59 * are used to great advantage in the swap code (vm/nswap_pager.c). The 60 * rlist code uses a little less overall memory than the blist code (but 61 * due to swap interleaving not all that much less), but the blist code 62 * scales much, much better. 63 * 64 * LAYOUT: The radix tree is layed out recursively using a 65 * linear array. Each meta node is immediately followed (layed out 66 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 67 * is a recursive structure but one that can be easily scanned through 68 * a very simple 'skip' calculation. In order to support large radixes, 69 * portions of the tree may reside outside our memory allocation. We 70 * handle this with an early-termination optimization (when bighint is 71 * set to -1) on the scan. The memory allocation is only large enough 72 * to cover the number of blocks requested at creation time even if it 73 * must be encompassed in larger root-node radix. 74 * 75 * NOTE: the allocator cannot currently allocate more than 76 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 77 * large' if you try. This is an area that could use improvement. The 78 * radix is large enough that this restriction does not effect the swap 79 * system, though. Currently only the allocation code is effected by 80 * this algorithmic unfeature. The freeing code can handle arbitrary 81 * ranges. 82 * 83 * This code can be compiled stand-alone for debugging. 84 */ 85 86 #include <sys/cdefs.h> 87 __FBSDID("$FreeBSD$"); 88 89 #ifdef _KERNEL 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/lock.h> 94 #include <sys/kernel.h> 95 #include <sys/blist.h> 96 #include <sys/malloc.h> 97 #include <sys/proc.h> 98 #include <sys/mutex.h> 99 100 #else 101 102 #ifndef BLIST_NO_DEBUG 103 #define BLIST_DEBUG 104 #endif 105 106 #define SWAPBLK_NONE ((daddr_t)-1) 107 108 #include <sys/types.h> 109 #include <stdio.h> 110 #include <string.h> 111 #include <stdlib.h> 112 #include <stdarg.h> 113 114 #define malloc(a,b,c) calloc(a, 1) 115 #define free(a,b) free(a) 116 117 typedef unsigned int u_daddr_t; 118 119 #include <sys/blist.h> 120 121 void panic(const char *ctl, ...); 122 123 #endif 124 125 /* 126 * static support functions 127 */ 128 129 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 130 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 131 daddr_t count, daddr_t radix, int skip); 132 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 133 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 134 daddr_t radix, int skip, daddr_t blk); 135 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 136 daddr_t skip, blist_t dest, daddr_t count); 137 static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 138 static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 139 daddr_t radix, int skip, daddr_t blk); 140 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 141 int skip, daddr_t count); 142 #ifndef _KERNEL 143 static void blst_radix_print(blmeta_t *scan, daddr_t blk, 144 daddr_t radix, int skip, int tab); 145 #endif 146 147 #ifdef _KERNEL 148 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 149 #endif 150 151 /* 152 * blist_create() - create a blist capable of handling up to the specified 153 * number of blocks 154 * 155 * blocks - must be greater than 0 156 * flags - malloc flags 157 * 158 * The smallest blist consists of a single leaf node capable of 159 * managing BLIST_BMAP_RADIX blocks. 160 */ 161 162 blist_t 163 blist_create(daddr_t blocks, int flags) 164 { 165 blist_t bl; 166 int radix; 167 int skip = 0; 168 169 /* 170 * Calculate radix and skip field used for scanning. 171 */ 172 radix = BLIST_BMAP_RADIX; 173 174 while (radix < blocks) { 175 radix *= BLIST_META_RADIX; 176 skip = (skip + 1) * BLIST_META_RADIX; 177 } 178 179 bl = malloc(sizeof(struct blist), M_SWAP, flags | M_ZERO); 180 181 bl->bl_blocks = blocks; 182 bl->bl_radix = radix; 183 bl->bl_skip = skip; 184 bl->bl_rootblks = 1 + 185 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 186 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, flags); 187 188 #if defined(BLIST_DEBUG) 189 printf( 190 "BLIST representing %lld blocks (%lld MB of swap)" 191 ", requiring %lldK of ram\n", 192 (long long)bl->bl_blocks, 193 (long long)bl->bl_blocks * 4 / 1024, 194 (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 195 ); 196 printf("BLIST raw radix tree contains %lld records\n", 197 (long long)bl->bl_rootblks); 198 #endif 199 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 200 201 return(bl); 202 } 203 204 void 205 blist_destroy(blist_t bl) 206 { 207 free(bl->bl_root, M_SWAP); 208 free(bl, M_SWAP); 209 } 210 211 /* 212 * blist_alloc() - reserve space in the block bitmap. Return the base 213 * of a contiguous region or SWAPBLK_NONE if space could 214 * not be allocated. 215 */ 216 217 daddr_t 218 blist_alloc(blist_t bl, daddr_t count) 219 { 220 daddr_t blk = SWAPBLK_NONE; 221 222 if (bl) { 223 if (bl->bl_radix == BLIST_BMAP_RADIX) 224 blk = blst_leaf_alloc(bl->bl_root, 0, count); 225 else 226 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip); 227 if (blk != SWAPBLK_NONE) 228 bl->bl_free -= count; 229 } 230 return(blk); 231 } 232 233 /* 234 * blist_free() - free up space in the block bitmap. Return the base 235 * of a contiguous region. Panic if an inconsistancy is 236 * found. 237 */ 238 239 void 240 blist_free(blist_t bl, daddr_t blkno, daddr_t count) 241 { 242 if (bl) { 243 if (bl->bl_radix == BLIST_BMAP_RADIX) 244 blst_leaf_free(bl->bl_root, blkno, count); 245 else 246 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 247 bl->bl_free += count; 248 } 249 } 250 251 /* 252 * blist_fill() - mark a region in the block bitmap as off-limits 253 * to the allocator (i.e. allocate it), ignoring any 254 * existing allocations. Return the number of blocks 255 * actually filled that were free before the call. 256 */ 257 258 int 259 blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 260 { 261 int filled; 262 263 if (bl) { 264 if (bl->bl_radix == BLIST_BMAP_RADIX) 265 filled = blst_leaf_fill(bl->bl_root, blkno, count); 266 else 267 filled = blst_meta_fill(bl->bl_root, blkno, count, 268 bl->bl_radix, bl->bl_skip, 0); 269 bl->bl_free -= filled; 270 return filled; 271 } else 272 return 0; 273 } 274 275 /* 276 * blist_resize() - resize an existing radix tree to handle the 277 * specified number of blocks. This will reallocate 278 * the tree and transfer the previous bitmap to the new 279 * one. When extending the tree you can specify whether 280 * the new blocks are to left allocated or freed. 281 */ 282 283 void 284 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 285 { 286 blist_t newbl = blist_create(count, flags); 287 blist_t save = *pbl; 288 289 *pbl = newbl; 290 if (count > save->bl_blocks) 291 count = save->bl_blocks; 292 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 293 294 /* 295 * If resizing upwards, should we free the new space or not? 296 */ 297 if (freenew && count < newbl->bl_blocks) { 298 blist_free(newbl, count, newbl->bl_blocks - count); 299 } 300 blist_destroy(save); 301 } 302 303 #ifdef BLIST_DEBUG 304 305 /* 306 * blist_print() - dump radix tree 307 */ 308 309 void 310 blist_print(blist_t bl) 311 { 312 printf("BLIST {\n"); 313 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 314 printf("}\n"); 315 } 316 317 #endif 318 319 /************************************************************************ 320 * ALLOCATION SUPPORT FUNCTIONS * 321 ************************************************************************ 322 * 323 * These support functions do all the actual work. They may seem 324 * rather longish, but that's because I've commented them up. The 325 * actual code is straight forward. 326 * 327 */ 328 329 /* 330 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 331 * 332 * This is the core of the allocator and is optimized for the 1 block 333 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 334 * somewhat slower. The 1 block allocation case is log2 and extremely 335 * quick. 336 */ 337 338 static daddr_t 339 blst_leaf_alloc( 340 blmeta_t *scan, 341 daddr_t blk, 342 int count 343 ) { 344 u_daddr_t orig = scan->u.bmu_bitmap; 345 346 if (orig == 0) { 347 /* 348 * Optimize bitmap all-allocated case. Also, count = 1 349 * case assumes at least 1 bit is free in the bitmap, so 350 * we have to take care of this case here. 351 */ 352 scan->bm_bighint = 0; 353 return(SWAPBLK_NONE); 354 } 355 if (count == 1) { 356 /* 357 * Optimized code to allocate one bit out of the bitmap 358 */ 359 u_daddr_t mask; 360 int j = BLIST_BMAP_RADIX/2; 361 int r = 0; 362 363 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 364 365 while (j) { 366 if ((orig & mask) == 0) { 367 r += j; 368 orig >>= j; 369 } 370 j >>= 1; 371 mask >>= j; 372 } 373 scan->u.bmu_bitmap &= ~(1 << r); 374 return(blk + r); 375 } 376 if (count <= BLIST_BMAP_RADIX) { 377 /* 378 * non-optimized code to allocate N bits out of the bitmap. 379 * The more bits, the faster the code runs. It will run 380 * the slowest allocating 2 bits, but since there aren't any 381 * memory ops in the core loop (or shouldn't be, anyway), 382 * you probably won't notice the difference. 383 */ 384 int j; 385 int n = BLIST_BMAP_RADIX - count; 386 u_daddr_t mask; 387 388 mask = (u_daddr_t)-1 >> n; 389 390 for (j = 0; j <= n; ++j) { 391 if ((orig & mask) == mask) { 392 scan->u.bmu_bitmap &= ~mask; 393 return(blk + j); 394 } 395 mask = (mask << 1); 396 } 397 } 398 /* 399 * We couldn't allocate count in this subtree, update bighint. 400 */ 401 scan->bm_bighint = count - 1; 402 return(SWAPBLK_NONE); 403 } 404 405 /* 406 * blist_meta_alloc() - allocate at a meta in the radix tree. 407 * 408 * Attempt to allocate at a meta node. If we can't, we update 409 * bighint and return a failure. Updating bighint optimize future 410 * calls that hit this node. We have to check for our collapse cases 411 * and we have a few optimizations strewn in as well. 412 */ 413 414 static daddr_t 415 blst_meta_alloc( 416 blmeta_t *scan, 417 daddr_t blk, 418 daddr_t count, 419 daddr_t radix, 420 int skip 421 ) { 422 int i; 423 int next_skip = ((u_int)skip / BLIST_META_RADIX); 424 425 if (scan->u.bmu_avail == 0) { 426 /* 427 * ALL-ALLOCATED special case 428 */ 429 scan->bm_bighint = count; 430 return(SWAPBLK_NONE); 431 } 432 433 if (scan->u.bmu_avail == radix) { 434 radix /= BLIST_META_RADIX; 435 436 /* 437 * ALL-FREE special case, initialize uninitialize 438 * sublevel. 439 */ 440 for (i = 1; i <= skip; i += next_skip) { 441 if (scan[i].bm_bighint == (daddr_t)-1) 442 break; 443 if (next_skip == 1) { 444 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 445 scan[i].bm_bighint = BLIST_BMAP_RADIX; 446 } else { 447 scan[i].bm_bighint = radix; 448 scan[i].u.bmu_avail = radix; 449 } 450 } 451 } else { 452 radix /= BLIST_META_RADIX; 453 } 454 455 for (i = 1; i <= skip; i += next_skip) { 456 if (count <= scan[i].bm_bighint) { 457 /* 458 * count fits in object 459 */ 460 daddr_t r; 461 if (next_skip == 1) { 462 r = blst_leaf_alloc(&scan[i], blk, count); 463 } else { 464 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1); 465 } 466 if (r != SWAPBLK_NONE) { 467 scan->u.bmu_avail -= count; 468 if (scan->bm_bighint > scan->u.bmu_avail) 469 scan->bm_bighint = scan->u.bmu_avail; 470 return(r); 471 } 472 } else if (scan[i].bm_bighint == (daddr_t)-1) { 473 /* 474 * Terminator 475 */ 476 break; 477 } else if (count > radix) { 478 /* 479 * count does not fit in object even if it were 480 * complete free. 481 */ 482 panic("blist_meta_alloc: allocation too large"); 483 } 484 blk += radix; 485 } 486 487 /* 488 * We couldn't allocate count in this subtree, update bighint. 489 */ 490 if (scan->bm_bighint >= count) 491 scan->bm_bighint = count - 1; 492 return(SWAPBLK_NONE); 493 } 494 495 /* 496 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 497 * 498 */ 499 500 static void 501 blst_leaf_free( 502 blmeta_t *scan, 503 daddr_t blk, 504 int count 505 ) { 506 /* 507 * free some data in this bitmap 508 * 509 * e.g. 510 * 0000111111111110000 511 * \_________/\__/ 512 * v n 513 */ 514 int n = blk & (BLIST_BMAP_RADIX - 1); 515 u_daddr_t mask; 516 517 mask = ((u_daddr_t)-1 << n) & 518 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 519 520 if (scan->u.bmu_bitmap & mask) 521 panic("blst_radix_free: freeing free block"); 522 scan->u.bmu_bitmap |= mask; 523 524 /* 525 * We could probably do a better job here. We are required to make 526 * bighint at least as large as the biggest contiguous block of 527 * data. If we just shoehorn it, a little extra overhead will 528 * be incured on the next allocation (but only that one typically). 529 */ 530 scan->bm_bighint = BLIST_BMAP_RADIX; 531 } 532 533 /* 534 * BLST_META_FREE() - free allocated blocks from radix tree meta info 535 * 536 * This support routine frees a range of blocks from the bitmap. 537 * The range must be entirely enclosed by this radix node. If a 538 * meta node, we break the range down recursively to free blocks 539 * in subnodes (which means that this code can free an arbitrary 540 * range whereas the allocation code cannot allocate an arbitrary 541 * range). 542 */ 543 544 static void 545 blst_meta_free( 546 blmeta_t *scan, 547 daddr_t freeBlk, 548 daddr_t count, 549 daddr_t radix, 550 int skip, 551 daddr_t blk 552 ) { 553 int i; 554 int next_skip = ((u_int)skip / BLIST_META_RADIX); 555 556 #if 0 557 printf("free (%llx,%lld) FROM (%llx,%lld)\n", 558 (long long)freeBlk, (long long)count, 559 (long long)blk, (long long)radix 560 ); 561 #endif 562 563 if (scan->u.bmu_avail == 0) { 564 /* 565 * ALL-ALLOCATED special case, with possible 566 * shortcut to ALL-FREE special case. 567 */ 568 scan->u.bmu_avail = count; 569 scan->bm_bighint = count; 570 571 if (count != radix) { 572 for (i = 1; i <= skip; i += next_skip) { 573 if (scan[i].bm_bighint == (daddr_t)-1) 574 break; 575 scan[i].bm_bighint = 0; 576 if (next_skip == 1) { 577 scan[i].u.bmu_bitmap = 0; 578 } else { 579 scan[i].u.bmu_avail = 0; 580 } 581 } 582 /* fall through */ 583 } 584 } else { 585 scan->u.bmu_avail += count; 586 /* scan->bm_bighint = radix; */ 587 } 588 589 /* 590 * ALL-FREE special case. 591 */ 592 593 if (scan->u.bmu_avail == radix) 594 return; 595 if (scan->u.bmu_avail > radix) 596 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 597 (long long)count, (long long)scan->u.bmu_avail, 598 (long long)radix); 599 600 /* 601 * Break the free down into its components 602 */ 603 604 radix /= BLIST_META_RADIX; 605 606 i = (freeBlk - blk) / radix; 607 blk += i * radix; 608 i = i * next_skip + 1; 609 610 while (i <= skip && blk < freeBlk + count) { 611 daddr_t v; 612 613 v = blk + radix - freeBlk; 614 if (v > count) 615 v = count; 616 617 if (scan->bm_bighint == (daddr_t)-1) 618 panic("blst_meta_free: freeing unexpected range"); 619 620 if (next_skip == 1) { 621 blst_leaf_free(&scan[i], freeBlk, v); 622 } else { 623 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 624 } 625 if (scan->bm_bighint < scan[i].bm_bighint) 626 scan->bm_bighint = scan[i].bm_bighint; 627 count -= v; 628 freeBlk += v; 629 blk += radix; 630 i += next_skip; 631 } 632 } 633 634 /* 635 * BLIST_RADIX_COPY() - copy one radix tree to another 636 * 637 * Locates free space in the source tree and frees it in the destination 638 * tree. The space may not already be free in the destination. 639 */ 640 641 static void blst_copy( 642 blmeta_t *scan, 643 daddr_t blk, 644 daddr_t radix, 645 daddr_t skip, 646 blist_t dest, 647 daddr_t count 648 ) { 649 int next_skip; 650 int i; 651 652 /* 653 * Leaf node 654 */ 655 656 if (radix == BLIST_BMAP_RADIX) { 657 u_daddr_t v = scan->u.bmu_bitmap; 658 659 if (v == (u_daddr_t)-1) { 660 blist_free(dest, blk, count); 661 } else if (v != 0) { 662 int i; 663 664 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 665 if (v & (1 << i)) 666 blist_free(dest, blk + i, 1); 667 } 668 } 669 return; 670 } 671 672 /* 673 * Meta node 674 */ 675 676 if (scan->u.bmu_avail == 0) { 677 /* 678 * Source all allocated, leave dest allocated 679 */ 680 return; 681 } 682 if (scan->u.bmu_avail == radix) { 683 /* 684 * Source all free, free entire dest 685 */ 686 if (count < radix) 687 blist_free(dest, blk, count); 688 else 689 blist_free(dest, blk, radix); 690 return; 691 } 692 693 694 radix /= BLIST_META_RADIX; 695 next_skip = ((u_int)skip / BLIST_META_RADIX); 696 697 for (i = 1; count && i <= skip; i += next_skip) { 698 if (scan[i].bm_bighint == (daddr_t)-1) 699 break; 700 701 if (count >= radix) { 702 blst_copy( 703 &scan[i], 704 blk, 705 radix, 706 next_skip - 1, 707 dest, 708 radix 709 ); 710 count -= radix; 711 } else { 712 if (count) { 713 blst_copy( 714 &scan[i], 715 blk, 716 radix, 717 next_skip - 1, 718 dest, 719 count 720 ); 721 } 722 count = 0; 723 } 724 blk += radix; 725 } 726 } 727 728 /* 729 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 730 * 731 * This routine allocates all blocks in the specified range 732 * regardless of any existing allocations in that range. Returns 733 * the number of blocks allocated by the call. 734 */ 735 736 static int 737 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 738 { 739 int n = blk & (BLIST_BMAP_RADIX - 1); 740 int nblks; 741 u_daddr_t mask, bitmap; 742 743 mask = ((u_daddr_t)-1 << n) & 744 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 745 746 /* Count the number of blocks we're about to allocate */ 747 bitmap = scan->u.bmu_bitmap & mask; 748 for (nblks = 0; bitmap != 0; nblks++) 749 bitmap &= bitmap - 1; 750 751 scan->u.bmu_bitmap &= ~mask; 752 return nblks; 753 } 754 755 /* 756 * BLIST_META_FILL() - allocate specific blocks at a meta node 757 * 758 * This routine allocates the specified range of blocks, 759 * regardless of any existing allocations in the range. The 760 * range must be within the extent of this node. Returns the 761 * number of blocks allocated by the call. 762 */ 763 static int 764 blst_meta_fill( 765 blmeta_t *scan, 766 daddr_t allocBlk, 767 daddr_t count, 768 daddr_t radix, 769 int skip, 770 daddr_t blk 771 ) { 772 int i; 773 int next_skip = ((u_int)skip / BLIST_META_RADIX); 774 int nblks = 0; 775 776 if (count == radix || scan->u.bmu_avail == 0) { 777 /* 778 * ALL-ALLOCATED special case 779 */ 780 nblks = scan->u.bmu_avail; 781 scan->u.bmu_avail = 0; 782 scan->bm_bighint = count; 783 return nblks; 784 } 785 786 if (scan->u.bmu_avail == radix) { 787 radix /= BLIST_META_RADIX; 788 789 /* 790 * ALL-FREE special case, initialize sublevel 791 */ 792 for (i = 1; i <= skip; i += next_skip) { 793 if (scan[i].bm_bighint == (daddr_t)-1) 794 break; 795 if (next_skip == 1) { 796 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 797 scan[i].bm_bighint = BLIST_BMAP_RADIX; 798 } else { 799 scan[i].bm_bighint = radix; 800 scan[i].u.bmu_avail = radix; 801 } 802 } 803 } else { 804 radix /= BLIST_META_RADIX; 805 } 806 807 if (count > radix) 808 panic("blist_meta_fill: allocation too large"); 809 810 i = (allocBlk - blk) / radix; 811 blk += i * radix; 812 i = i * next_skip + 1; 813 814 while (i <= skip && blk < allocBlk + count) { 815 daddr_t v; 816 817 v = blk + radix - allocBlk; 818 if (v > count) 819 v = count; 820 821 if (scan->bm_bighint == (daddr_t)-1) 822 panic("blst_meta_fill: filling unexpected range"); 823 824 if (next_skip == 1) { 825 nblks += blst_leaf_fill(&scan[i], allocBlk, v); 826 } else { 827 nblks += blst_meta_fill(&scan[i], allocBlk, v, 828 radix, next_skip - 1, blk); 829 } 830 count -= v; 831 allocBlk += v; 832 blk += radix; 833 i += next_skip; 834 } 835 scan->u.bmu_avail -= nblks; 836 return nblks; 837 } 838 839 /* 840 * BLST_RADIX_INIT() - initialize radix tree 841 * 842 * Initialize our meta structures and bitmaps and calculate the exact 843 * amount of space required to manage 'count' blocks - this space may 844 * be considerably less than the calculated radix due to the large 845 * RADIX values we use. 846 */ 847 848 static daddr_t 849 blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 850 { 851 int i; 852 int next_skip; 853 daddr_t memindex = 0; 854 855 /* 856 * Leaf node 857 */ 858 859 if (radix == BLIST_BMAP_RADIX) { 860 if (scan) { 861 scan->bm_bighint = 0; 862 scan->u.bmu_bitmap = 0; 863 } 864 return(memindex); 865 } 866 867 /* 868 * Meta node. If allocating the entire object we can special 869 * case it. However, we need to figure out how much memory 870 * is required to manage 'count' blocks, so we continue on anyway. 871 */ 872 873 if (scan) { 874 scan->bm_bighint = 0; 875 scan->u.bmu_avail = 0; 876 } 877 878 radix /= BLIST_META_RADIX; 879 next_skip = ((u_int)skip / BLIST_META_RADIX); 880 881 for (i = 1; i <= skip; i += next_skip) { 882 if (count >= radix) { 883 /* 884 * Allocate the entire object 885 */ 886 memindex = i + blst_radix_init( 887 ((scan) ? &scan[i] : NULL), 888 radix, 889 next_skip - 1, 890 radix 891 ); 892 count -= radix; 893 } else if (count > 0) { 894 /* 895 * Allocate a partial object 896 */ 897 memindex = i + blst_radix_init( 898 ((scan) ? &scan[i] : NULL), 899 radix, 900 next_skip - 1, 901 count 902 ); 903 count = 0; 904 } else { 905 /* 906 * Add terminator and break out 907 */ 908 if (scan) 909 scan[i].bm_bighint = (daddr_t)-1; 910 break; 911 } 912 } 913 if (memindex < i) 914 memindex = i; 915 return(memindex); 916 } 917 918 #ifdef BLIST_DEBUG 919 920 static void 921 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 922 { 923 int i; 924 int next_skip; 925 int lastState = 0; 926 927 if (radix == BLIST_BMAP_RADIX) { 928 printf( 929 "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n", 930 tab, tab, "", 931 (long long)blk, (long long)radix, 932 (long long)scan->u.bmu_bitmap, 933 (long long)scan->bm_bighint 934 ); 935 return; 936 } 937 938 if (scan->u.bmu_avail == 0) { 939 printf( 940 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 941 tab, tab, "", 942 (long long)blk, 943 (long long)radix 944 ); 945 return; 946 } 947 if (scan->u.bmu_avail == radix) { 948 printf( 949 "%*.*s(%08llx,%lld) ALL FREE\n", 950 tab, tab, "", 951 (long long)blk, 952 (long long)radix 953 ); 954 return; 955 } 956 957 printf( 958 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 959 tab, tab, "", 960 (long long)blk, (long long)radix, 961 (long long)scan->u.bmu_avail, 962 (long long)radix, 963 (long long)scan->bm_bighint 964 ); 965 966 radix /= BLIST_META_RADIX; 967 next_skip = ((u_int)skip / BLIST_META_RADIX); 968 tab += 4; 969 970 for (i = 1; i <= skip; i += next_skip) { 971 if (scan[i].bm_bighint == (daddr_t)-1) { 972 printf( 973 "%*.*s(%08llx,%lld): Terminator\n", 974 tab, tab, "", 975 (long long)blk, (long long)radix 976 ); 977 lastState = 0; 978 break; 979 } 980 blst_radix_print( 981 &scan[i], 982 blk, 983 radix, 984 next_skip - 1, 985 tab 986 ); 987 blk += radix; 988 } 989 tab -= 4; 990 991 printf( 992 "%*.*s}\n", 993 tab, tab, "" 994 ); 995 } 996 997 #endif 998 999 #ifdef BLIST_DEBUG 1000 1001 int 1002 main(int ac, char **av) 1003 { 1004 int size = 1024; 1005 int i; 1006 blist_t bl; 1007 1008 for (i = 1; i < ac; ++i) { 1009 const char *ptr = av[i]; 1010 if (*ptr != '-') { 1011 size = strtol(ptr, NULL, 0); 1012 continue; 1013 } 1014 ptr += 2; 1015 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1016 exit(1); 1017 } 1018 bl = blist_create(size, M_WAITOK); 1019 blist_free(bl, 0, size); 1020 1021 for (;;) { 1022 char buf[1024]; 1023 daddr_t da = 0; 1024 daddr_t count = 0; 1025 1026 1027 printf("%lld/%lld/%lld> ", (long long)bl->bl_free, 1028 (long long)size, (long long)bl->bl_radix); 1029 fflush(stdout); 1030 if (fgets(buf, sizeof(buf), stdin) == NULL) 1031 break; 1032 switch(buf[0]) { 1033 case 'r': 1034 if (sscanf(buf + 1, "%lld", &count) == 1) { 1035 blist_resize(&bl, count, 1); 1036 } else { 1037 printf("?\n"); 1038 } 1039 case 'p': 1040 blist_print(bl); 1041 break; 1042 case 'a': 1043 if (sscanf(buf + 1, "%lld", &count) == 1) { 1044 daddr_t blk = blist_alloc(bl, count); 1045 printf(" R=%08llx\n", (long long)blk); 1046 } else { 1047 printf("?\n"); 1048 } 1049 break; 1050 case 'f': 1051 if (sscanf(buf + 1, "%llx %lld", 1052 (long long *)&da, (long long *)&count) == 2) { 1053 blist_free(bl, da, count); 1054 } else { 1055 printf("?\n"); 1056 } 1057 break; 1058 case 'l': 1059 if (sscanf(buf + 1, "%llx %lld", 1060 (long long *)&da, (long long *)&count) == 2) { 1061 printf(" n=%d\n", 1062 blist_fill(bl, da, count)); 1063 } else { 1064 printf("?\n"); 1065 } 1066 break; 1067 case '?': 1068 case 'h': 1069 puts( 1070 "p -print\n" 1071 "a %d -allocate\n" 1072 "f %x %d -free\n" 1073 "l %x %d -fill\n" 1074 "r %d -resize\n" 1075 "h/? -help" 1076 ); 1077 break; 1078 default: 1079 printf("?\n"); 1080 break; 1081 } 1082 } 1083 return(0); 1084 } 1085 1086 void 1087 panic(const char *ctl, ...) 1088 { 1089 va_list va; 1090 1091 va_start(va, ctl); 1092 vfprintf(stderr, ctl, va); 1093 fprintf(stderr, "\n"); 1094 va_end(va); 1095 exit(1); 1096 } 1097 1098 #endif 1099 1100