1 /*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 4. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other then to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 32), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * Unlike the rlist code, the blist code wires all necessary memory at 56 * creation time. Neither allocations nor frees require interaction with 57 * the memory subsystem. In contrast, the rlist code may allocate memory 58 * on an rlist_free() call. The non-blocking features of the blist code 59 * are used to great advantage in the swap code (vm/nswap_pager.c). The 60 * rlist code uses a little less overall memory then the blist code (but 61 * due to swap interleaving not all that much less), but the blist code 62 * scales much, much better. 63 * 64 * LAYOUT: The radix tree is layed out recursively using a 65 * linear array. Each meta node is immediately followed (layed out 66 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 67 * is a recursive structure but one that can be easily scanned through 68 * a very simple 'skip' calculation. In order to support large radixes, 69 * portions of the tree may reside outside our memory allocation. We 70 * handle this with an early-termination optimization (when bighint is 71 * set to -1) on the scan. The memory allocation is only large enough 72 * to cover the number of blocks requested at creation time even if it 73 * must be encompassed in larger root-node radix. 74 * 75 * NOTE: the allocator cannot currently allocate more then 76 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 77 * large' if you try. This is an area that could use improvement. The 78 * radix is large enough that this restriction does not effect the swap 79 * system, though. Currently only the allocation code is effected by 80 * this algorithmic unfeature. The freeing code can handle arbitrary 81 * ranges. 82 * 83 * This code can be compiled stand-alone for debugging. 84 */ 85 86 #include <sys/cdefs.h> 87 __FBSDID("$FreeBSD$"); 88 89 #ifdef _KERNEL 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/lock.h> 94 #include <sys/kernel.h> 95 #include <sys/blist.h> 96 #include <sys/malloc.h> 97 #include <sys/proc.h> 98 #include <sys/mutex.h> 99 100 #else 101 102 #ifndef BLIST_NO_DEBUG 103 #define BLIST_DEBUG 104 #endif 105 106 #define SWAPBLK_NONE ((daddr_t)-1) 107 108 #include <sys/types.h> 109 #include <stdio.h> 110 #include <string.h> 111 #include <stdlib.h> 112 #include <stdarg.h> 113 114 #define malloc(a,b,c) calloc(a, 1) 115 #define free(a,b) free(a) 116 117 typedef unsigned int u_daddr_t; 118 119 #include <sys/blist.h> 120 121 void panic(const char *ctl, ...); 122 123 #endif 124 125 /* 126 * static support functions 127 */ 128 129 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 130 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 131 daddr_t count, daddr_t radix, int skip); 132 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 133 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 134 daddr_t radix, int skip, daddr_t blk); 135 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 136 daddr_t skip, blist_t dest, daddr_t count); 137 static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 138 static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 139 daddr_t radix, int skip, daddr_t blk); 140 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 141 int skip, daddr_t count); 142 #ifndef _KERNEL 143 static void blst_radix_print(blmeta_t *scan, daddr_t blk, 144 daddr_t radix, int skip, int tab); 145 #endif 146 147 #ifdef _KERNEL 148 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 149 #endif 150 151 /* 152 * blist_create() - create a blist capable of handling up to the specified 153 * number of blocks 154 * 155 * blocks must be greater then 0 156 * 157 * The smallest blist consists of a single leaf node capable of 158 * managing BLIST_BMAP_RADIX blocks. 159 */ 160 161 blist_t 162 blist_create(daddr_t blocks) 163 { 164 blist_t bl; 165 int radix; 166 int skip = 0; 167 168 /* 169 * Calculate radix and skip field used for scanning. 170 */ 171 radix = BLIST_BMAP_RADIX; 172 173 while (radix < blocks) { 174 radix *= BLIST_META_RADIX; 175 skip = (skip + 1) * BLIST_META_RADIX; 176 } 177 178 bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO); 179 180 bl->bl_blocks = blocks; 181 bl->bl_radix = radix; 182 bl->bl_skip = skip; 183 bl->bl_rootblks = 1 + 184 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 185 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK); 186 187 #if defined(BLIST_DEBUG) 188 printf( 189 "BLIST representing %lld blocks (%lld MB of swap)" 190 ", requiring %lldK of ram\n", 191 (long long)bl->bl_blocks, 192 (long long)bl->bl_blocks * 4 / 1024, 193 (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 194 ); 195 printf("BLIST raw radix tree contains %lld records\n", 196 (long long)bl->bl_rootblks); 197 #endif 198 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 199 200 return(bl); 201 } 202 203 void 204 blist_destroy(blist_t bl) 205 { 206 free(bl->bl_root, M_SWAP); 207 free(bl, M_SWAP); 208 } 209 210 /* 211 * blist_alloc() - reserve space in the block bitmap. Return the base 212 * of a contiguous region or SWAPBLK_NONE if space could 213 * not be allocated. 214 */ 215 216 daddr_t 217 blist_alloc(blist_t bl, daddr_t count) 218 { 219 daddr_t blk = SWAPBLK_NONE; 220 221 if (bl) { 222 if (bl->bl_radix == BLIST_BMAP_RADIX) 223 blk = blst_leaf_alloc(bl->bl_root, 0, count); 224 else 225 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip); 226 if (blk != SWAPBLK_NONE) 227 bl->bl_free -= count; 228 } 229 return(blk); 230 } 231 232 /* 233 * blist_free() - free up space in the block bitmap. Return the base 234 * of a contiguous region. Panic if an inconsistancy is 235 * found. 236 */ 237 238 void 239 blist_free(blist_t bl, daddr_t blkno, daddr_t count) 240 { 241 if (bl) { 242 if (bl->bl_radix == BLIST_BMAP_RADIX) 243 blst_leaf_free(bl->bl_root, blkno, count); 244 else 245 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 246 bl->bl_free += count; 247 } 248 } 249 250 /* 251 * blist_fill() - mark a region in the block bitmap as off-limits 252 * to the allocator (i.e. allocate it), ignoring any 253 * existing allocations. Return the number of blocks 254 * actually filled that were free before the call. 255 */ 256 257 int 258 blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 259 { 260 int filled; 261 262 if (bl) { 263 if (bl->bl_radix == BLIST_BMAP_RADIX) 264 filled = blst_leaf_fill(bl->bl_root, blkno, count); 265 else 266 filled = blst_meta_fill(bl->bl_root, blkno, count, 267 bl->bl_radix, bl->bl_skip, 0); 268 bl->bl_free -= filled; 269 return filled; 270 } else 271 return 0; 272 } 273 274 /* 275 * blist_resize() - resize an existing radix tree to handle the 276 * specified number of blocks. This will reallocate 277 * the tree and transfer the previous bitmap to the new 278 * one. When extending the tree you can specify whether 279 * the new blocks are to left allocated or freed. 280 */ 281 282 void 283 blist_resize(blist_t *pbl, daddr_t count, int freenew) 284 { 285 blist_t newbl = blist_create(count); 286 blist_t save = *pbl; 287 288 *pbl = newbl; 289 if (count > save->bl_blocks) 290 count = save->bl_blocks; 291 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 292 293 /* 294 * If resizing upwards, should we free the new space or not? 295 */ 296 if (freenew && count < newbl->bl_blocks) { 297 blist_free(newbl, count, newbl->bl_blocks - count); 298 } 299 blist_destroy(save); 300 } 301 302 #ifdef BLIST_DEBUG 303 304 /* 305 * blist_print() - dump radix tree 306 */ 307 308 void 309 blist_print(blist_t bl) 310 { 311 printf("BLIST {\n"); 312 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 313 printf("}\n"); 314 } 315 316 #endif 317 318 /************************************************************************ 319 * ALLOCATION SUPPORT FUNCTIONS * 320 ************************************************************************ 321 * 322 * These support functions do all the actual work. They may seem 323 * rather longish, but that's because I've commented them up. The 324 * actual code is straight forward. 325 * 326 */ 327 328 /* 329 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 330 * 331 * This is the core of the allocator and is optimized for the 1 block 332 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 333 * somewhat slower. The 1 block allocation case is log2 and extremely 334 * quick. 335 */ 336 337 static daddr_t 338 blst_leaf_alloc( 339 blmeta_t *scan, 340 daddr_t blk, 341 int count 342 ) { 343 u_daddr_t orig = scan->u.bmu_bitmap; 344 345 if (orig == 0) { 346 /* 347 * Optimize bitmap all-allocated case. Also, count = 1 348 * case assumes at least 1 bit is free in the bitmap, so 349 * we have to take care of this case here. 350 */ 351 scan->bm_bighint = 0; 352 return(SWAPBLK_NONE); 353 } 354 if (count == 1) { 355 /* 356 * Optimized code to allocate one bit out of the bitmap 357 */ 358 u_daddr_t mask; 359 int j = BLIST_BMAP_RADIX/2; 360 int r = 0; 361 362 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 363 364 while (j) { 365 if ((orig & mask) == 0) { 366 r += j; 367 orig >>= j; 368 } 369 j >>= 1; 370 mask >>= j; 371 } 372 scan->u.bmu_bitmap &= ~(1 << r); 373 return(blk + r); 374 } 375 if (count <= BLIST_BMAP_RADIX) { 376 /* 377 * non-optimized code to allocate N bits out of the bitmap. 378 * The more bits, the faster the code runs. It will run 379 * the slowest allocating 2 bits, but since there aren't any 380 * memory ops in the core loop (or shouldn't be, anyway), 381 * you probably won't notice the difference. 382 */ 383 int j; 384 int n = BLIST_BMAP_RADIX - count; 385 u_daddr_t mask; 386 387 mask = (u_daddr_t)-1 >> n; 388 389 for (j = 0; j <= n; ++j) { 390 if ((orig & mask) == mask) { 391 scan->u.bmu_bitmap &= ~mask; 392 return(blk + j); 393 } 394 mask = (mask << 1); 395 } 396 } 397 /* 398 * We couldn't allocate count in this subtree, update bighint. 399 */ 400 scan->bm_bighint = count - 1; 401 return(SWAPBLK_NONE); 402 } 403 404 /* 405 * blist_meta_alloc() - allocate at a meta in the radix tree. 406 * 407 * Attempt to allocate at a meta node. If we can't, we update 408 * bighint and return a failure. Updating bighint optimize future 409 * calls that hit this node. We have to check for our collapse cases 410 * and we have a few optimizations strewn in as well. 411 */ 412 413 static daddr_t 414 blst_meta_alloc( 415 blmeta_t *scan, 416 daddr_t blk, 417 daddr_t count, 418 daddr_t radix, 419 int skip 420 ) { 421 int i; 422 int next_skip = ((u_int)skip / BLIST_META_RADIX); 423 424 if (scan->u.bmu_avail == 0) { 425 /* 426 * ALL-ALLOCATED special case 427 */ 428 scan->bm_bighint = count; 429 return(SWAPBLK_NONE); 430 } 431 432 if (scan->u.bmu_avail == radix) { 433 radix /= BLIST_META_RADIX; 434 435 /* 436 * ALL-FREE special case, initialize uninitialize 437 * sublevel. 438 */ 439 for (i = 1; i <= skip; i += next_skip) { 440 if (scan[i].bm_bighint == (daddr_t)-1) 441 break; 442 if (next_skip == 1) { 443 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 444 scan[i].bm_bighint = BLIST_BMAP_RADIX; 445 } else { 446 scan[i].bm_bighint = radix; 447 scan[i].u.bmu_avail = radix; 448 } 449 } 450 } else { 451 radix /= BLIST_META_RADIX; 452 } 453 454 for (i = 1; i <= skip; i += next_skip) { 455 if (count <= scan[i].bm_bighint) { 456 /* 457 * count fits in object 458 */ 459 daddr_t r; 460 if (next_skip == 1) { 461 r = blst_leaf_alloc(&scan[i], blk, count); 462 } else { 463 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1); 464 } 465 if (r != SWAPBLK_NONE) { 466 scan->u.bmu_avail -= count; 467 if (scan->bm_bighint > scan->u.bmu_avail) 468 scan->bm_bighint = scan->u.bmu_avail; 469 return(r); 470 } 471 } else if (scan[i].bm_bighint == (daddr_t)-1) { 472 /* 473 * Terminator 474 */ 475 break; 476 } else if (count > radix) { 477 /* 478 * count does not fit in object even if it were 479 * complete free. 480 */ 481 panic("blist_meta_alloc: allocation too large"); 482 } 483 blk += radix; 484 } 485 486 /* 487 * We couldn't allocate count in this subtree, update bighint. 488 */ 489 if (scan->bm_bighint >= count) 490 scan->bm_bighint = count - 1; 491 return(SWAPBLK_NONE); 492 } 493 494 /* 495 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 496 * 497 */ 498 499 static void 500 blst_leaf_free( 501 blmeta_t *scan, 502 daddr_t blk, 503 int count 504 ) { 505 /* 506 * free some data in this bitmap 507 * 508 * e.g. 509 * 0000111111111110000 510 * \_________/\__/ 511 * v n 512 */ 513 int n = blk & (BLIST_BMAP_RADIX - 1); 514 u_daddr_t mask; 515 516 mask = ((u_daddr_t)-1 << n) & 517 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 518 519 if (scan->u.bmu_bitmap & mask) 520 panic("blst_radix_free: freeing free block"); 521 scan->u.bmu_bitmap |= mask; 522 523 /* 524 * We could probably do a better job here. We are required to make 525 * bighint at least as large as the biggest contiguous block of 526 * data. If we just shoehorn it, a little extra overhead will 527 * be incured on the next allocation (but only that one typically). 528 */ 529 scan->bm_bighint = BLIST_BMAP_RADIX; 530 } 531 532 /* 533 * BLST_META_FREE() - free allocated blocks from radix tree meta info 534 * 535 * This support routine frees a range of blocks from the bitmap. 536 * The range must be entirely enclosed by this radix node. If a 537 * meta node, we break the range down recursively to free blocks 538 * in subnodes (which means that this code can free an arbitrary 539 * range whereas the allocation code cannot allocate an arbitrary 540 * range). 541 */ 542 543 static void 544 blst_meta_free( 545 blmeta_t *scan, 546 daddr_t freeBlk, 547 daddr_t count, 548 daddr_t radix, 549 int skip, 550 daddr_t blk 551 ) { 552 int i; 553 int next_skip = ((u_int)skip / BLIST_META_RADIX); 554 555 #if 0 556 printf("FREE (%llx,%lld) FROM (%llx,%lld)\n", 557 (long long)freeBlk, (long long)count, 558 (long long)blk, (long long)radix 559 ); 560 #endif 561 562 if (scan->u.bmu_avail == 0) { 563 /* 564 * ALL-ALLOCATED special case, with possible 565 * shortcut to ALL-FREE special case. 566 */ 567 scan->u.bmu_avail = count; 568 scan->bm_bighint = count; 569 570 if (count != radix) { 571 for (i = 1; i <= skip; i += next_skip) { 572 if (scan[i].bm_bighint == (daddr_t)-1) 573 break; 574 scan[i].bm_bighint = 0; 575 if (next_skip == 1) { 576 scan[i].u.bmu_bitmap = 0; 577 } else { 578 scan[i].u.bmu_avail = 0; 579 } 580 } 581 /* fall through */ 582 } 583 } else { 584 scan->u.bmu_avail += count; 585 /* scan->bm_bighint = radix; */ 586 } 587 588 /* 589 * ALL-FREE special case. 590 */ 591 592 if (scan->u.bmu_avail == radix) 593 return; 594 if (scan->u.bmu_avail > radix) 595 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 596 (long long)count, (long long)scan->u.bmu_avail, 597 (long long)radix); 598 599 /* 600 * Break the free down into its components 601 */ 602 603 radix /= BLIST_META_RADIX; 604 605 i = (freeBlk - blk) / radix; 606 blk += i * radix; 607 i = i * next_skip + 1; 608 609 while (i <= skip && blk < freeBlk + count) { 610 daddr_t v; 611 612 v = blk + radix - freeBlk; 613 if (v > count) 614 v = count; 615 616 if (scan->bm_bighint == (daddr_t)-1) 617 panic("blst_meta_free: freeing unexpected range"); 618 619 if (next_skip == 1) { 620 blst_leaf_free(&scan[i], freeBlk, v); 621 } else { 622 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 623 } 624 if (scan->bm_bighint < scan[i].bm_bighint) 625 scan->bm_bighint = scan[i].bm_bighint; 626 count -= v; 627 freeBlk += v; 628 blk += radix; 629 i += next_skip; 630 } 631 } 632 633 /* 634 * BLIST_RADIX_COPY() - copy one radix tree to another 635 * 636 * Locates free space in the source tree and frees it in the destination 637 * tree. The space may not already be free in the destination. 638 */ 639 640 static void blst_copy( 641 blmeta_t *scan, 642 daddr_t blk, 643 daddr_t radix, 644 daddr_t skip, 645 blist_t dest, 646 daddr_t count 647 ) { 648 int next_skip; 649 int i; 650 651 /* 652 * Leaf node 653 */ 654 655 if (radix == BLIST_BMAP_RADIX) { 656 u_daddr_t v = scan->u.bmu_bitmap; 657 658 if (v == (u_daddr_t)-1) { 659 blist_free(dest, blk, count); 660 } else if (v != 0) { 661 int i; 662 663 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 664 if (v & (1 << i)) 665 blist_free(dest, blk + i, 1); 666 } 667 } 668 return; 669 } 670 671 /* 672 * Meta node 673 */ 674 675 if (scan->u.bmu_avail == 0) { 676 /* 677 * Source all allocated, leave dest allocated 678 */ 679 return; 680 } 681 if (scan->u.bmu_avail == radix) { 682 /* 683 * Source all free, free entire dest 684 */ 685 if (count < radix) 686 blist_free(dest, blk, count); 687 else 688 blist_free(dest, blk, radix); 689 return; 690 } 691 692 693 radix /= BLIST_META_RADIX; 694 next_skip = ((u_int)skip / BLIST_META_RADIX); 695 696 for (i = 1; count && i <= skip; i += next_skip) { 697 if (scan[i].bm_bighint == (daddr_t)-1) 698 break; 699 700 if (count >= radix) { 701 blst_copy( 702 &scan[i], 703 blk, 704 radix, 705 next_skip - 1, 706 dest, 707 radix 708 ); 709 count -= radix; 710 } else { 711 if (count) { 712 blst_copy( 713 &scan[i], 714 blk, 715 radix, 716 next_skip - 1, 717 dest, 718 count 719 ); 720 } 721 count = 0; 722 } 723 blk += radix; 724 } 725 } 726 727 /* 728 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 729 * 730 * This routine allocates all blocks in the specified range 731 * regardless of any existing allocations in that range. Returns 732 * the number of blocks allocated by the call. 733 */ 734 735 static int 736 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 737 { 738 int n = blk & (BLIST_BMAP_RADIX - 1); 739 int nblks; 740 u_daddr_t mask, bitmap; 741 742 mask = ((u_daddr_t)-1 << n) & 743 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 744 745 /* Count the number of blocks we're about to allocate */ 746 bitmap = scan->u.bmu_bitmap & mask; 747 for (nblks = 0; bitmap != 0; nblks++) 748 bitmap &= bitmap - 1; 749 750 scan->u.bmu_bitmap &= ~mask; 751 return nblks; 752 } 753 754 /* 755 * BLIST_META_FILL() - allocate specific blocks at a meta node 756 * 757 * This routine allocates the specified range of blocks, 758 * regardless of any existing allocations in the range. The 759 * range must be within the extent of this node. Returns the 760 * number of blocks allocated by the call. 761 */ 762 static int 763 blst_meta_fill( 764 blmeta_t *scan, 765 daddr_t allocBlk, 766 daddr_t count, 767 daddr_t radix, 768 int skip, 769 daddr_t blk 770 ) { 771 int i; 772 int next_skip = ((u_int)skip / BLIST_META_RADIX); 773 int nblks = 0; 774 775 if (count == radix || scan->u.bmu_avail == 0) { 776 /* 777 * ALL-ALLOCATED special case 778 */ 779 nblks = scan->u.bmu_avail; 780 scan->u.bmu_avail = 0; 781 scan->bm_bighint = count; 782 return nblks; 783 } 784 785 if (scan->u.bmu_avail == radix) { 786 radix /= BLIST_META_RADIX; 787 788 /* 789 * ALL-FREE special case, initialize sublevel 790 */ 791 for (i = 1; i <= skip; i += next_skip) { 792 if (scan[i].bm_bighint == (daddr_t)-1) 793 break; 794 if (next_skip == 1) { 795 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 796 scan[i].bm_bighint = BLIST_BMAP_RADIX; 797 } else { 798 scan[i].bm_bighint = radix; 799 scan[i].u.bmu_avail = radix; 800 } 801 } 802 } else { 803 radix /= BLIST_META_RADIX; 804 } 805 806 if (count > radix) 807 panic("blist_meta_fill: allocation too large"); 808 809 i = (allocBlk - blk) / radix; 810 blk += i * radix; 811 i = i * next_skip + 1; 812 813 while (i <= skip && blk < allocBlk + count) { 814 daddr_t v; 815 816 v = blk + radix - allocBlk; 817 if (v > count) 818 v = count; 819 820 if (scan->bm_bighint == (daddr_t)-1) 821 panic("blst_meta_fill: filling unexpected range"); 822 823 if (next_skip == 1) { 824 nblks += blst_leaf_fill(&scan[i], allocBlk, v); 825 } else { 826 nblks += blst_meta_fill(&scan[i], allocBlk, v, 827 radix, next_skip - 1, blk); 828 } 829 count -= v; 830 allocBlk += v; 831 blk += radix; 832 i += next_skip; 833 } 834 scan->u.bmu_avail -= nblks; 835 return nblks; 836 } 837 838 /* 839 * BLST_RADIX_INIT() - initialize radix tree 840 * 841 * Initialize our meta structures and bitmaps and calculate the exact 842 * amount of space required to manage 'count' blocks - this space may 843 * be considerably less then the calculated radix due to the large 844 * RADIX values we use. 845 */ 846 847 static daddr_t 848 blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 849 { 850 int i; 851 int next_skip; 852 daddr_t memindex = 0; 853 854 /* 855 * Leaf node 856 */ 857 858 if (radix == BLIST_BMAP_RADIX) { 859 if (scan) { 860 scan->bm_bighint = 0; 861 scan->u.bmu_bitmap = 0; 862 } 863 return(memindex); 864 } 865 866 /* 867 * Meta node. If allocating the entire object we can special 868 * case it. However, we need to figure out how much memory 869 * is required to manage 'count' blocks, so we continue on anyway. 870 */ 871 872 if (scan) { 873 scan->bm_bighint = 0; 874 scan->u.bmu_avail = 0; 875 } 876 877 radix /= BLIST_META_RADIX; 878 next_skip = ((u_int)skip / BLIST_META_RADIX); 879 880 for (i = 1; i <= skip; i += next_skip) { 881 if (count >= radix) { 882 /* 883 * Allocate the entire object 884 */ 885 memindex = i + blst_radix_init( 886 ((scan) ? &scan[i] : NULL), 887 radix, 888 next_skip - 1, 889 radix 890 ); 891 count -= radix; 892 } else if (count > 0) { 893 /* 894 * Allocate a partial object 895 */ 896 memindex = i + blst_radix_init( 897 ((scan) ? &scan[i] : NULL), 898 radix, 899 next_skip - 1, 900 count 901 ); 902 count = 0; 903 } else { 904 /* 905 * Add terminator and break out 906 */ 907 if (scan) 908 scan[i].bm_bighint = (daddr_t)-1; 909 break; 910 } 911 } 912 if (memindex < i) 913 memindex = i; 914 return(memindex); 915 } 916 917 #ifdef BLIST_DEBUG 918 919 static void 920 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 921 { 922 int i; 923 int next_skip; 924 int lastState = 0; 925 926 if (radix == BLIST_BMAP_RADIX) { 927 printf( 928 "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n", 929 tab, tab, "", 930 (long long)blk, (long long)radix, 931 (long long)scan->u.bmu_bitmap, 932 (long long)scan->bm_bighint 933 ); 934 return; 935 } 936 937 if (scan->u.bmu_avail == 0) { 938 printf( 939 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 940 tab, tab, "", 941 (long long)blk, 942 (long long)radix 943 ); 944 return; 945 } 946 if (scan->u.bmu_avail == radix) { 947 printf( 948 "%*.*s(%08llx,%lld) ALL FREE\n", 949 tab, tab, "", 950 (long long)blk, 951 (long long)radix 952 ); 953 return; 954 } 955 956 printf( 957 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 958 tab, tab, "", 959 (long long)blk, (long long)radix, 960 (long long)scan->u.bmu_avail, 961 (long long)radix, 962 (long long)scan->bm_bighint 963 ); 964 965 radix /= BLIST_META_RADIX; 966 next_skip = ((u_int)skip / BLIST_META_RADIX); 967 tab += 4; 968 969 for (i = 1; i <= skip; i += next_skip) { 970 if (scan[i].bm_bighint == (daddr_t)-1) { 971 printf( 972 "%*.*s(%08llx,%lld): Terminator\n", 973 tab, tab, "", 974 (long long)blk, (long long)radix 975 ); 976 lastState = 0; 977 break; 978 } 979 blst_radix_print( 980 &scan[i], 981 blk, 982 radix, 983 next_skip - 1, 984 tab 985 ); 986 blk += radix; 987 } 988 tab -= 4; 989 990 printf( 991 "%*.*s}\n", 992 tab, tab, "" 993 ); 994 } 995 996 #endif 997 998 #ifdef BLIST_DEBUG 999 1000 int 1001 main(int ac, char **av) 1002 { 1003 int size = 1024; 1004 int i; 1005 blist_t bl; 1006 1007 for (i = 1; i < ac; ++i) { 1008 const char *ptr = av[i]; 1009 if (*ptr != '-') { 1010 size = strtol(ptr, NULL, 0); 1011 continue; 1012 } 1013 ptr += 2; 1014 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1015 exit(1); 1016 } 1017 bl = blist_create(size); 1018 blist_free(bl, 0, size); 1019 1020 for (;;) { 1021 char buf[1024]; 1022 daddr_t da = 0; 1023 daddr_t count = 0; 1024 1025 1026 printf("%lld/%lld/%lld> ", (long long)bl->bl_free, 1027 (long long)size, (long long)bl->bl_radix); 1028 fflush(stdout); 1029 if (fgets(buf, sizeof(buf), stdin) == NULL) 1030 break; 1031 switch(buf[0]) { 1032 case 'r': 1033 if (sscanf(buf + 1, "%lld", &count) == 1) { 1034 blist_resize(&bl, count, 1); 1035 } else { 1036 printf("?\n"); 1037 } 1038 case 'p': 1039 blist_print(bl); 1040 break; 1041 case 'a': 1042 if (sscanf(buf + 1, "%lld", &count) == 1) { 1043 daddr_t blk = blist_alloc(bl, count); 1044 printf(" R=%08llx\n", (long long)blk); 1045 } else { 1046 printf("?\n"); 1047 } 1048 break; 1049 case 'f': 1050 if (sscanf(buf + 1, "%llx %lld", 1051 (long long *)&da, (long long *)&count) == 2) { 1052 blist_free(bl, da, count); 1053 } else { 1054 printf("?\n"); 1055 } 1056 break; 1057 case 'l': 1058 if (sscanf(buf + 1, "%llx %lld", 1059 (long long *)&da, (long long *)&count) == 2) { 1060 printf(" n=%d\n", 1061 blist_fill(bl, da, count)); 1062 } else { 1063 printf("?\n"); 1064 } 1065 break; 1066 case '?': 1067 case 'h': 1068 puts( 1069 "p -print\n" 1070 "a %d -allocate\n" 1071 "f %x %d -free\n" 1072 "l %x %d -fill\n" 1073 "r %d -resize\n" 1074 "h/? -help" 1075 ); 1076 break; 1077 default: 1078 printf("?\n"); 1079 break; 1080 } 1081 } 1082 return(0); 1083 } 1084 1085 void 1086 panic(const char *ctl, ...) 1087 { 1088 va_list va; 1089 1090 va_start(va, ctl); 1091 vfprintf(stderr, ctl, va); 1092 fprintf(stderr, "\n"); 1093 va_end(va); 1094 exit(1); 1095 } 1096 1097 #endif 1098 1099