1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Sparse bit array 4 * 5 * Copyright (C) 2018, Google LLC. 6 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) 7 * 8 * This library provides functions to support a memory efficient bit array, 9 * with an index size of 2^64. A sparsebit array is allocated through 10 * the use sparsebit_alloc() and free'd via sparsebit_free(), 11 * such as in the following: 12 * 13 * struct sparsebit *s; 14 * s = sparsebit_alloc(); 15 * sparsebit_free(&s); 16 * 17 * The struct sparsebit type resolves down to a struct sparsebit. 18 * Note that, sparsebit_free() takes a pointer to the sparsebit 19 * structure. This is so that sparsebit_free() is able to poison 20 * the pointer (e.g. set it to NULL) to the struct sparsebit before 21 * returning to the caller. 22 * 23 * Between the return of sparsebit_alloc() and the call of 24 * sparsebit_free(), there are multiple query and modifying operations 25 * that can be performed on the allocated sparsebit array. All of 26 * these operations take as a parameter the value returned from 27 * sparsebit_alloc() and most also take a bit index. Frequently 28 * used routines include: 29 * 30 * ---- Query Operations 31 * sparsebit_is_set(s, idx) 32 * sparsebit_is_clear(s, idx) 33 * sparsebit_any_set(s) 34 * sparsebit_first_set(s) 35 * sparsebit_next_set(s, prev_idx) 36 * 37 * ---- Modifying Operations 38 * sparsebit_set(s, idx) 39 * sparsebit_clear(s, idx) 40 * sparsebit_set_num(s, idx, num); 41 * sparsebit_clear_num(s, idx, num); 42 * 43 * A common operation, is to itterate over all the bits set in a test 44 * sparsebit array. This can be done via code with the following structure: 45 * 46 * sparsebit_idx_t idx; 47 * if (sparsebit_any_set(s)) { 48 * idx = sparsebit_first_set(s); 49 * do { 50 * ... 51 * idx = sparsebit_next_set(s, idx); 52 * } while (idx != 0); 53 * } 54 * 55 * The index of the first bit set needs to be obtained via 56 * sparsebit_first_set(), because sparsebit_next_set(), needs 57 * the index of the previously set. The sparsebit_idx_t type is 58 * unsigned, so there is no previous index before 0 that is available. 59 * Also, the call to sparsebit_first_set() is not made unless there 60 * is at least 1 bit in the array set. This is because sparsebit_first_set() 61 * aborts if sparsebit_first_set() is called with no bits set. 62 * It is the callers responsibility to assure that the 63 * sparsebit array has at least a single bit set before calling 64 * sparsebit_first_set(). 65 * 66 * ==== Implementation Overview ==== 67 * For the most part the internal implementation of sparsebit is 68 * opaque to the caller. One important implementation detail that the 69 * caller may need to be aware of is the spatial complexity of the 70 * implementation. This implementation of a sparsebit array is not 71 * only sparse, in that it uses memory proportional to the number of bits 72 * set. It is also efficient in memory usage when most of the bits are 73 * set. 74 * 75 * At a high-level the state of the bit settings are maintained through 76 * the use of a binary-search tree, where each node contains at least 77 * the following members: 78 * 79 * typedef uint64_t sparsebit_idx_t; 80 * typedef uint64_t sparsebit_num_t; 81 * 82 * sparsebit_idx_t idx; 83 * uint32_t mask; 84 * sparsebit_num_t num_after; 85 * 86 * The idx member contains the bit index of the first bit described by this 87 * node, while the mask member stores the setting of the first 32-bits. 88 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the 89 * mask member at 1 << n. 90 * 91 * Nodes are sorted by idx and the bits described by two nodes will never 92 * overlap. The idx member is always aligned to the mask size, i.e. a 93 * multiple of 32. 94 * 95 * Beyond a typical implementation, the nodes in this implementation also 96 * contains a member named num_after. The num_after member holds the 97 * number of bits immediately after the mask bits that are contiguously set. 98 * The use of the num_after member allows this implementation to efficiently 99 * represent cases where most bits are set. For example, the case of all 100 * but the last two bits set, is represented by the following two nodes: 101 * 102 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 103 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 104 * 105 * ==== Invariants ==== 106 * This implementation usses the following invariants: 107 * 108 * + Node are only used to represent bits that are set. 109 * Nodes with a mask of 0 and num_after of 0 are not allowed. 110 * 111 * + Sum of bits set in all the nodes is equal to the value of 112 * the struct sparsebit_pvt num_set member. 113 * 114 * + The setting of at least one bit is always described in a nodes 115 * mask (mask >= 1). 116 * 117 * + A node with all mask bits set only occurs when the last bit 118 * described by the previous node is not equal to this nodes 119 * starting index - 1. All such occurences of this condition are 120 * avoided by moving the setting of the nodes mask bits into 121 * the previous nodes num_after setting. 122 * 123 * + Node starting index is evenly divisible by the number of bits 124 * within a nodes mask member. 125 * 126 * + Nodes never represent a range of bits that wrap around the 127 * highest supported index. 128 * 129 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) 130 * 131 * As a consequence of the above, the num_after member of a node 132 * will always be <=: 133 * 134 * maximum_index - nodes_starting_index - number_of_mask_bits 135 * 136 * + Nodes within the binary search tree are sorted based on each 137 * nodes starting index. 138 * 139 * + The range of bits described by any two nodes do not overlap. The 140 * range of bits described by a single node is: 141 * 142 * start: node->idx 143 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; 144 * 145 * Note, at times these invariants are temporarily violated for a 146 * specific portion of the code. For example, when setting a mask 147 * bit, there is a small delay between when the mask bit is set and the 148 * value in the struct sparsebit_pvt num_set member is updated. Other 149 * temporary violations occur when node_split() is called with a specified 150 * index and assures that a node where its mask represents the bit 151 * at the specified index exists. At times to do this node_split() 152 * must split an existing node into two nodes or create a node that 153 * has no bits set. Such temporary violations must be corrected before 154 * returning to the caller. These corrections are typically performed 155 * by the local function node_reduce(). 156 */ 157 158 #include "test_util.h" 159 #include "sparsebit.h" 160 #include <limits.h> 161 #include <assert.h> 162 163 #define DUMP_LINE_MAX 100 /* Does not include indent amount */ 164 165 typedef uint32_t mask_t; 166 #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) 167 168 struct node { 169 struct node *parent; 170 struct node *left; 171 struct node *right; 172 sparsebit_idx_t idx; /* index of least-significant bit in mask */ 173 sparsebit_num_t num_after; /* num contiguously set after mask */ 174 mask_t mask; 175 }; 176 177 struct sparsebit { 178 /* 179 * Points to root node of the binary search 180 * tree. Equal to NULL when no bits are set in 181 * the entire sparsebit array. 182 */ 183 struct node *root; 184 185 /* 186 * A redundant count of the total number of bits set. Used for 187 * diagnostic purposes and to change the time complexity of 188 * sparsebit_num_set() from O(n) to O(1). 189 * Note: Due to overflow, a value of 0 means none or all set. 190 */ 191 sparsebit_num_t num_set; 192 }; 193 194 /* Returns the number of set bits described by the settings 195 * of the node pointed to by nodep. 196 */ 197 static sparsebit_num_t node_num_set(struct node *nodep) 198 { 199 return nodep->num_after + __builtin_popcount(nodep->mask); 200 } 201 202 /* Returns a pointer to the node that describes the 203 * lowest bit index. 204 */ 205 static struct node *node_first(struct sparsebit *s) 206 { 207 struct node *nodep; 208 209 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) 210 ; 211 212 return nodep; 213 } 214 215 /* Returns a pointer to the node that describes the 216 * lowest bit index > the index of the node pointed to by np. 217 * Returns NULL if no node with a higher index exists. 218 */ 219 static struct node *node_next(struct sparsebit *s, struct node *np) 220 { 221 struct node *nodep = np; 222 223 /* 224 * If current node has a right child, next node is the left-most 225 * of the right child. 226 */ 227 if (nodep->right) { 228 for (nodep = nodep->right; nodep->left; nodep = nodep->left) 229 ; 230 return nodep; 231 } 232 233 /* 234 * No right child. Go up until node is left child of a parent. 235 * That parent is then the next node. 236 */ 237 while (nodep->parent && nodep == nodep->parent->right) 238 nodep = nodep->parent; 239 240 return nodep->parent; 241 } 242 243 /* Searches for and returns a pointer to the node that describes the 244 * highest index < the index of the node pointed to by np. 245 * Returns NULL if no node with a lower index exists. 246 */ 247 static struct node *node_prev(struct sparsebit *s, struct node *np) 248 { 249 struct node *nodep = np; 250 251 /* 252 * If current node has a left child, next node is the right-most 253 * of the left child. 254 */ 255 if (nodep->left) { 256 for (nodep = nodep->left; nodep->right; nodep = nodep->right) 257 ; 258 return (struct node *) nodep; 259 } 260 261 /* 262 * No left child. Go up until node is right child of a parent. 263 * That parent is then the next node. 264 */ 265 while (nodep->parent && nodep == nodep->parent->left) 266 nodep = nodep->parent; 267 268 return (struct node *) nodep->parent; 269 } 270 271 272 /* Allocates space to hold a copy of the node sub-tree pointed to by 273 * subtree and duplicates the bit settings to the newly allocated nodes. 274 * Returns the newly allocated copy of subtree. 275 */ 276 static struct node *node_copy_subtree(struct node *subtree) 277 { 278 struct node *root; 279 280 /* Duplicate the node at the root of the subtree */ 281 root = calloc(1, sizeof(*root)); 282 if (!root) { 283 perror("calloc"); 284 abort(); 285 } 286 287 root->idx = subtree->idx; 288 root->mask = subtree->mask; 289 root->num_after = subtree->num_after; 290 291 /* As needed, recursively duplicate the left and right subtrees */ 292 if (subtree->left) { 293 root->left = node_copy_subtree(subtree->left); 294 root->left->parent = root; 295 } 296 297 if (subtree->right) { 298 root->right = node_copy_subtree(subtree->right); 299 root->right->parent = root; 300 } 301 302 return root; 303 } 304 305 /* Searches for and returns a pointer to the node that describes the setting 306 * of the bit given by idx. A node describes the setting of a bit if its 307 * index is within the bits described by the mask bits or the number of 308 * contiguous bits set after the mask. Returns NULL if there is no such node. 309 */ 310 static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) 311 { 312 struct node *nodep; 313 314 /* Find the node that describes the setting of the bit at idx */ 315 for (nodep = s->root; nodep; 316 nodep = nodep->idx > idx ? nodep->left : nodep->right) { 317 if (idx >= nodep->idx && 318 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 319 break; 320 } 321 322 return nodep; 323 } 324 325 /* Entry Requirements: 326 * + A node that describes the setting of idx is not already present. 327 * 328 * Adds a new node to describe the setting of the bit at the index given 329 * by idx. Returns a pointer to the newly added node. 330 * 331 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. 332 */ 333 static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) 334 { 335 struct node *nodep, *parentp, *prev; 336 337 /* Allocate and initialize the new node. */ 338 nodep = calloc(1, sizeof(*nodep)); 339 if (!nodep) { 340 perror("calloc"); 341 abort(); 342 } 343 344 nodep->idx = idx & -MASK_BITS; 345 346 /* If no nodes, set it up as the root node. */ 347 if (!s->root) { 348 s->root = nodep; 349 return nodep; 350 } 351 352 /* 353 * Find the parent where the new node should be attached 354 * and add the node there. 355 */ 356 parentp = s->root; 357 while (true) { 358 if (idx < parentp->idx) { 359 if (!parentp->left) { 360 parentp->left = nodep; 361 nodep->parent = parentp; 362 break; 363 } 364 parentp = parentp->left; 365 } else { 366 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); 367 if (!parentp->right) { 368 parentp->right = nodep; 369 nodep->parent = parentp; 370 break; 371 } 372 parentp = parentp->right; 373 } 374 } 375 376 /* 377 * Does num_after bits of previous node overlap with the mask 378 * of the new node? If so set the bits in the new nodes mask 379 * and reduce the previous nodes num_after. 380 */ 381 prev = node_prev(s, nodep); 382 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { 383 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) 384 - nodep->idx; 385 assert(prev->num_after > 0); 386 assert(n1 < MASK_BITS); 387 assert(!(nodep->mask & (1 << n1))); 388 nodep->mask |= (1 << n1); 389 prev->num_after--; 390 } 391 392 return nodep; 393 } 394 395 /* Returns whether all the bits in the sparsebit array are set. */ 396 bool sparsebit_all_set(struct sparsebit *s) 397 { 398 /* 399 * If any nodes there must be at least one bit set. Only case 400 * where a bit is set and total num set is 0, is when all bits 401 * are set. 402 */ 403 return s->root && s->num_set == 0; 404 } 405 406 /* Clears all bits described by the node pointed to by nodep, then 407 * removes the node. 408 */ 409 static void node_rm(struct sparsebit *s, struct node *nodep) 410 { 411 struct node *tmp; 412 sparsebit_num_t num_set; 413 414 num_set = node_num_set(nodep); 415 assert(s->num_set >= num_set || sparsebit_all_set(s)); 416 s->num_set -= node_num_set(nodep); 417 418 /* Have both left and right child */ 419 if (nodep->left && nodep->right) { 420 /* 421 * Move left children to the leftmost leaf node 422 * of the right child. 423 */ 424 for (tmp = nodep->right; tmp->left; tmp = tmp->left) 425 ; 426 tmp->left = nodep->left; 427 nodep->left = NULL; 428 tmp->left->parent = tmp; 429 } 430 431 /* Left only child */ 432 if (nodep->left) { 433 if (!nodep->parent) { 434 s->root = nodep->left; 435 nodep->left->parent = NULL; 436 } else { 437 nodep->left->parent = nodep->parent; 438 if (nodep == nodep->parent->left) 439 nodep->parent->left = nodep->left; 440 else { 441 assert(nodep == nodep->parent->right); 442 nodep->parent->right = nodep->left; 443 } 444 } 445 446 nodep->parent = nodep->left = nodep->right = NULL; 447 free(nodep); 448 449 return; 450 } 451 452 453 /* Right only child */ 454 if (nodep->right) { 455 if (!nodep->parent) { 456 s->root = nodep->right; 457 nodep->right->parent = NULL; 458 } else { 459 nodep->right->parent = nodep->parent; 460 if (nodep == nodep->parent->left) 461 nodep->parent->left = nodep->right; 462 else { 463 assert(nodep == nodep->parent->right); 464 nodep->parent->right = nodep->right; 465 } 466 } 467 468 nodep->parent = nodep->left = nodep->right = NULL; 469 free(nodep); 470 471 return; 472 } 473 474 /* Leaf Node */ 475 if (!nodep->parent) { 476 s->root = NULL; 477 } else { 478 if (nodep->parent->left == nodep) 479 nodep->parent->left = NULL; 480 else { 481 assert(nodep == nodep->parent->right); 482 nodep->parent->right = NULL; 483 } 484 } 485 486 nodep->parent = nodep->left = nodep->right = NULL; 487 free(nodep); 488 489 return; 490 } 491 492 /* Splits the node containing the bit at idx so that there is a node 493 * that starts at the specified index. If no such node exists, a new 494 * node at the specified index is created. Returns the new node. 495 * 496 * idx must start of a mask boundary. 497 */ 498 static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) 499 { 500 struct node *nodep1, *nodep2; 501 sparsebit_idx_t offset; 502 sparsebit_num_t orig_num_after; 503 504 assert(!(idx % MASK_BITS)); 505 506 /* 507 * Is there a node that describes the setting of idx? 508 * If not, add it. 509 */ 510 nodep1 = node_find(s, idx); 511 if (!nodep1) 512 return node_add(s, idx); 513 514 /* 515 * All done if the starting index of the node is where the 516 * split should occur. 517 */ 518 if (nodep1->idx == idx) 519 return nodep1; 520 521 /* 522 * Split point not at start of mask, so it must be part of 523 * bits described by num_after. 524 */ 525 526 /* 527 * Calculate offset within num_after for where the split is 528 * to occur. 529 */ 530 offset = idx - (nodep1->idx + MASK_BITS); 531 orig_num_after = nodep1->num_after; 532 533 /* 534 * Add a new node to describe the bits starting at 535 * the split point. 536 */ 537 nodep1->num_after = offset; 538 nodep2 = node_add(s, idx); 539 540 /* Move bits after the split point into the new node */ 541 nodep2->num_after = orig_num_after - offset; 542 if (nodep2->num_after >= MASK_BITS) { 543 nodep2->mask = ~(mask_t) 0; 544 nodep2->num_after -= MASK_BITS; 545 } else { 546 nodep2->mask = (1 << nodep2->num_after) - 1; 547 nodep2->num_after = 0; 548 } 549 550 return nodep2; 551 } 552 553 /* Iteratively reduces the node pointed to by nodep and its adjacent 554 * nodes into a more compact form. For example, a node with a mask with 555 * all bits set adjacent to a previous node, will get combined into a 556 * single node with an increased num_after setting. 557 * 558 * After each reduction, a further check is made to see if additional 559 * reductions are possible with the new previous and next nodes. Note, 560 * a search for a reduction is only done across the nodes nearest nodep 561 * and those that became part of a reduction. Reductions beyond nodep 562 * and the adjacent nodes that are reduced are not discovered. It is the 563 * responsibility of the caller to pass a nodep that is within one node 564 * of each possible reduction. 565 * 566 * This function does not fix the temporary violation of all invariants. 567 * For example it does not fix the case where the bit settings described 568 * by two or more nodes overlap. Such a violation introduces the potential 569 * complication of a bit setting for a specific index having different settings 570 * in different nodes. This would then introduce the further complication 571 * of which node has the correct setting of the bit and thus such conditions 572 * are not allowed. 573 * 574 * This function is designed to fix invariant violations that are introduced 575 * by node_split() and by changes to the nodes mask or num_after members. 576 * For example, when setting a bit within a nodes mask, the function that 577 * sets the bit doesn't have to worry about whether the setting of that 578 * bit caused the mask to have leading only or trailing only bits set. 579 * Instead, the function can call node_reduce(), with nodep equal to the 580 * node address that it set a mask bit in, and node_reduce() will notice 581 * the cases of leading or trailing only bits and that there is an 582 * adjacent node that the bit settings could be merged into. 583 * 584 * This implementation specifically detects and corrects violation of the 585 * following invariants: 586 * 587 * + Node are only used to represent bits that are set. 588 * Nodes with a mask of 0 and num_after of 0 are not allowed. 589 * 590 * + The setting of at least one bit is always described in a nodes 591 * mask (mask >= 1). 592 * 593 * + A node with all mask bits set only occurs when the last bit 594 * described by the previous node is not equal to this nodes 595 * starting index - 1. All such occurences of this condition are 596 * avoided by moving the setting of the nodes mask bits into 597 * the previous nodes num_after setting. 598 */ 599 static void node_reduce(struct sparsebit *s, struct node *nodep) 600 { 601 bool reduction_performed; 602 603 do { 604 reduction_performed = false; 605 struct node *prev, *next, *tmp; 606 607 /* 1) Potential reductions within the current node. */ 608 609 /* Nodes with all bits cleared may be removed. */ 610 if (nodep->mask == 0 && nodep->num_after == 0) { 611 /* 612 * About to remove the node pointed to by 613 * nodep, which normally would cause a problem 614 * for the next pass through the reduction loop, 615 * because the node at the starting point no longer 616 * exists. This potential problem is handled 617 * by first remembering the location of the next 618 * or previous nodes. Doesn't matter which, because 619 * once the node at nodep is removed, there will be 620 * no other nodes between prev and next. 621 * 622 * Note, the checks performed on nodep against both 623 * both prev and next both check for an adjacent 624 * node that can be reduced into a single node. As 625 * such, after removing the node at nodep, doesn't 626 * matter whether the nodep for the next pass 627 * through the loop is equal to the previous pass 628 * prev or next node. Either way, on the next pass 629 * the one not selected will become either the 630 * prev or next node. 631 */ 632 tmp = node_next(s, nodep); 633 if (!tmp) 634 tmp = node_prev(s, nodep); 635 636 node_rm(s, nodep); 637 638 nodep = tmp; 639 reduction_performed = true; 640 continue; 641 } 642 643 /* 644 * When the mask is 0, can reduce the amount of num_after 645 * bits by moving the initial num_after bits into the mask. 646 */ 647 if (nodep->mask == 0) { 648 assert(nodep->num_after != 0); 649 assert(nodep->idx + MASK_BITS > nodep->idx); 650 651 nodep->idx += MASK_BITS; 652 653 if (nodep->num_after >= MASK_BITS) { 654 nodep->mask = ~0; 655 nodep->num_after -= MASK_BITS; 656 } else { 657 nodep->mask = (1u << nodep->num_after) - 1; 658 nodep->num_after = 0; 659 } 660 661 reduction_performed = true; 662 continue; 663 } 664 665 /* 666 * 2) Potential reductions between the current and 667 * previous nodes. 668 */ 669 prev = node_prev(s, nodep); 670 if (prev) { 671 sparsebit_idx_t prev_highest_bit; 672 673 /* Nodes with no bits set can be removed. */ 674 if (prev->mask == 0 && prev->num_after == 0) { 675 node_rm(s, prev); 676 677 reduction_performed = true; 678 continue; 679 } 680 681 /* 682 * All mask bits set and previous node has 683 * adjacent index. 684 */ 685 if (nodep->mask + 1 == 0 && 686 prev->idx + MASK_BITS == nodep->idx) { 687 prev->num_after += MASK_BITS + nodep->num_after; 688 nodep->mask = 0; 689 nodep->num_after = 0; 690 691 reduction_performed = true; 692 continue; 693 } 694 695 /* 696 * Is node adjacent to previous node and the node 697 * contains a single contiguous range of bits 698 * starting from the beginning of the mask? 699 */ 700 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; 701 if (prev_highest_bit + 1 == nodep->idx && 702 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { 703 /* 704 * How many contiguous bits are there? 705 * Is equal to the total number of set 706 * bits, due to an earlier check that 707 * there is a single contiguous range of 708 * set bits. 709 */ 710 unsigned int num_contiguous 711 = __builtin_popcount(nodep->mask); 712 assert((num_contiguous > 0) && 713 ((1ULL << num_contiguous) - 1) == nodep->mask); 714 715 prev->num_after += num_contiguous; 716 nodep->mask = 0; 717 718 /* 719 * For predictable performance, handle special 720 * case where all mask bits are set and there 721 * is a non-zero num_after setting. This code 722 * is functionally correct without the following 723 * conditionalized statements, but without them 724 * the value of num_after is only reduced by 725 * the number of mask bits per pass. There are 726 * cases where num_after can be close to 2^64. 727 * Without this code it could take nearly 728 * (2^64) / 32 passes to perform the full 729 * reduction. 730 */ 731 if (num_contiguous == MASK_BITS) { 732 prev->num_after += nodep->num_after; 733 nodep->num_after = 0; 734 } 735 736 reduction_performed = true; 737 continue; 738 } 739 } 740 741 /* 742 * 3) Potential reductions between the current and 743 * next nodes. 744 */ 745 next = node_next(s, nodep); 746 if (next) { 747 /* Nodes with no bits set can be removed. */ 748 if (next->mask == 0 && next->num_after == 0) { 749 node_rm(s, next); 750 reduction_performed = true; 751 continue; 752 } 753 754 /* 755 * Is next node index adjacent to current node 756 * and has a mask with all bits set? 757 */ 758 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && 759 next->mask == ~(mask_t) 0) { 760 nodep->num_after += MASK_BITS; 761 next->mask = 0; 762 nodep->num_after += next->num_after; 763 next->num_after = 0; 764 765 node_rm(s, next); 766 next = NULL; 767 768 reduction_performed = true; 769 continue; 770 } 771 } 772 } while (nodep && reduction_performed); 773 } 774 775 /* Returns whether the bit at the index given by idx, within the 776 * sparsebit array is set or not. 777 */ 778 bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) 779 { 780 struct node *nodep; 781 782 /* Find the node that describes the setting of the bit at idx */ 783 for (nodep = s->root; nodep; 784 nodep = nodep->idx > idx ? nodep->left : nodep->right) 785 if (idx >= nodep->idx && 786 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 787 goto have_node; 788 789 return false; 790 791 have_node: 792 /* Bit is set if it is any of the bits described by num_after */ 793 if (nodep->num_after && idx >= nodep->idx + MASK_BITS) 794 return true; 795 796 /* Is the corresponding mask bit set */ 797 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); 798 return !!(nodep->mask & (1 << (idx - nodep->idx))); 799 } 800 801 /* Within the sparsebit array pointed to by s, sets the bit 802 * at the index given by idx. 803 */ 804 static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) 805 { 806 struct node *nodep; 807 808 /* Skip bits that are already set */ 809 if (sparsebit_is_set(s, idx)) 810 return; 811 812 /* 813 * Get a node where the bit at idx is described by the mask. 814 * The node_split will also create a node, if there isn't 815 * already a node that describes the setting of bit. 816 */ 817 nodep = node_split(s, idx & -MASK_BITS); 818 819 /* Set the bit within the nodes mask */ 820 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 821 assert(!(nodep->mask & (1 << (idx - nodep->idx)))); 822 nodep->mask |= 1 << (idx - nodep->idx); 823 s->num_set++; 824 825 node_reduce(s, nodep); 826 } 827 828 /* Within the sparsebit array pointed to by s, clears the bit 829 * at the index given by idx. 830 */ 831 static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) 832 { 833 struct node *nodep; 834 835 /* Skip bits that are already cleared */ 836 if (!sparsebit_is_set(s, idx)) 837 return; 838 839 /* Is there a node that describes the setting of this bit? */ 840 nodep = node_find(s, idx); 841 if (!nodep) 842 return; 843 844 /* 845 * If a num_after bit, split the node, so that the bit is 846 * part of a node mask. 847 */ 848 if (idx >= nodep->idx + MASK_BITS) 849 nodep = node_split(s, idx & -MASK_BITS); 850 851 /* 852 * After node_split above, bit at idx should be within the mask. 853 * Clear that bit. 854 */ 855 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 856 assert(nodep->mask & (1 << (idx - nodep->idx))); 857 nodep->mask &= ~(1 << (idx - nodep->idx)); 858 assert(s->num_set > 0 || sparsebit_all_set(s)); 859 s->num_set--; 860 861 node_reduce(s, nodep); 862 } 863 864 /* Recursively dumps to the FILE stream given by stream the contents 865 * of the sub-tree of nodes pointed to by nodep. Each line of output 866 * is prefixed by the number of spaces given by indent. On each 867 * recursion, the indent amount is increased by 2. This causes nodes 868 * at each level deeper into the binary search tree to be displayed 869 * with a greater indent. 870 */ 871 static void dump_nodes(FILE *stream, struct node *nodep, 872 unsigned int indent) 873 { 874 char *node_type; 875 876 /* Dump contents of node */ 877 if (!nodep->parent) 878 node_type = "root"; 879 else if (nodep == nodep->parent->left) 880 node_type = "left"; 881 else { 882 assert(nodep == nodep->parent->right); 883 node_type = "right"; 884 } 885 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); 886 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", 887 nodep->parent, nodep->left, nodep->right); 888 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", 889 indent, "", nodep->idx, nodep->mask, nodep->num_after); 890 891 /* If present, dump contents of left child nodes */ 892 if (nodep->left) 893 dump_nodes(stream, nodep->left, indent + 2); 894 895 /* If present, dump contents of right child nodes */ 896 if (nodep->right) 897 dump_nodes(stream, nodep->right, indent + 2); 898 } 899 900 static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) 901 { 902 mask_t leading = (mask_t)1 << start; 903 int n1 = __builtin_ctz(nodep->mask & -leading); 904 905 return nodep->idx + n1; 906 } 907 908 static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) 909 { 910 mask_t leading = (mask_t)1 << start; 911 int n1 = __builtin_ctz(~nodep->mask & -leading); 912 913 return nodep->idx + n1; 914 } 915 916 /* Dumps to the FILE stream specified by stream, the implementation dependent 917 * internal state of s. Each line of output is prefixed with the number 918 * of spaces given by indent. The output is completely implementation 919 * dependent and subject to change. Output from this function should only 920 * be used for diagnostic purposes. For example, this function can be 921 * used by test cases after they detect an unexpected condition, as a means 922 * to capture diagnostic information. 923 */ 924 static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, 925 unsigned int indent) 926 { 927 /* Dump the contents of s */ 928 fprintf(stream, "%*sroot: %p\n", indent, "", s->root); 929 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); 930 931 if (s->root) 932 dump_nodes(stream, s->root, indent); 933 } 934 935 /* Allocates and returns a new sparsebit array. The initial state 936 * of the newly allocated sparsebit array has all bits cleared. 937 */ 938 struct sparsebit *sparsebit_alloc(void) 939 { 940 struct sparsebit *s; 941 942 /* Allocate top level structure. */ 943 s = calloc(1, sizeof(*s)); 944 if (!s) { 945 perror("calloc"); 946 abort(); 947 } 948 949 return s; 950 } 951 952 /* Frees the implementation dependent data for the sparsebit array 953 * pointed to by s and poisons the pointer to that data. 954 */ 955 void sparsebit_free(struct sparsebit **sbitp) 956 { 957 struct sparsebit *s = *sbitp; 958 959 if (!s) 960 return; 961 962 sparsebit_clear_all(s); 963 free(s); 964 *sbitp = NULL; 965 } 966 967 /* Makes a copy of the sparsebit array given by s, to the sparsebit 968 * array given by d. Note, d must have already been allocated via 969 * sparsebit_alloc(). It can though already have bits set, which 970 * if different from src will be cleared. 971 */ 972 void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) 973 { 974 /* First clear any bits already set in the destination */ 975 sparsebit_clear_all(d); 976 977 if (s->root) { 978 d->root = node_copy_subtree(s->root); 979 d->num_set = s->num_set; 980 } 981 } 982 983 /* Returns whether num consecutive bits starting at idx are all set. */ 984 bool sparsebit_is_set_num(struct sparsebit *s, 985 sparsebit_idx_t idx, sparsebit_num_t num) 986 { 987 sparsebit_idx_t next_cleared; 988 989 assert(num > 0); 990 assert(idx + num - 1 >= idx); 991 992 /* With num > 0, the first bit must be set. */ 993 if (!sparsebit_is_set(s, idx)) 994 return false; 995 996 /* Find the next cleared bit */ 997 next_cleared = sparsebit_next_clear(s, idx); 998 999 /* 1000 * If no cleared bits beyond idx, then there are at least num 1001 * set bits. idx + num doesn't wrap. Otherwise check if 1002 * there are enough set bits between idx and the next cleared bit. 1003 */ 1004 return next_cleared == 0 || next_cleared - idx >= num; 1005 } 1006 1007 /* Returns whether the bit at the index given by idx. */ 1008 bool sparsebit_is_clear(struct sparsebit *s, 1009 sparsebit_idx_t idx) 1010 { 1011 return !sparsebit_is_set(s, idx); 1012 } 1013 1014 /* Returns whether num consecutive bits starting at idx are all cleared. */ 1015 bool sparsebit_is_clear_num(struct sparsebit *s, 1016 sparsebit_idx_t idx, sparsebit_num_t num) 1017 { 1018 sparsebit_idx_t next_set; 1019 1020 assert(num > 0); 1021 assert(idx + num - 1 >= idx); 1022 1023 /* With num > 0, the first bit must be cleared. */ 1024 if (!sparsebit_is_clear(s, idx)) 1025 return false; 1026 1027 /* Find the next set bit */ 1028 next_set = sparsebit_next_set(s, idx); 1029 1030 /* 1031 * If no set bits beyond idx, then there are at least num 1032 * cleared bits. idx + num doesn't wrap. Otherwise check if 1033 * there are enough cleared bits between idx and the next set bit. 1034 */ 1035 return next_set == 0 || next_set - idx >= num; 1036 } 1037 1038 /* Returns the total number of bits set. Note: 0 is also returned for 1039 * the case of all bits set. This is because with all bits set, there 1040 * is 1 additional bit set beyond what can be represented in the return 1041 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, 1042 * to determine if the sparsebit array has any bits set. 1043 */ 1044 sparsebit_num_t sparsebit_num_set(struct sparsebit *s) 1045 { 1046 return s->num_set; 1047 } 1048 1049 /* Returns whether any bit is set in the sparsebit array. */ 1050 bool sparsebit_any_set(struct sparsebit *s) 1051 { 1052 /* 1053 * Nodes only describe set bits. If any nodes then there 1054 * is at least 1 bit set. 1055 */ 1056 if (!s->root) 1057 return false; 1058 1059 /* 1060 * Every node should have a non-zero mask. For now will 1061 * just assure that the root node has a non-zero mask, 1062 * which is a quick check that at least 1 bit is set. 1063 */ 1064 assert(s->root->mask != 0); 1065 assert(s->num_set > 0 || 1066 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && 1067 s->root->mask == ~(mask_t) 0)); 1068 1069 return true; 1070 } 1071 1072 /* Returns whether all the bits in the sparsebit array are cleared. */ 1073 bool sparsebit_all_clear(struct sparsebit *s) 1074 { 1075 return !sparsebit_any_set(s); 1076 } 1077 1078 /* Returns whether all the bits in the sparsebit array are set. */ 1079 bool sparsebit_any_clear(struct sparsebit *s) 1080 { 1081 return !sparsebit_all_set(s); 1082 } 1083 1084 /* Returns the index of the first set bit. Abort if no bits are set. 1085 */ 1086 sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) 1087 { 1088 struct node *nodep; 1089 1090 /* Validate at least 1 bit is set */ 1091 assert(sparsebit_any_set(s)); 1092 1093 nodep = node_first(s); 1094 return node_first_set(nodep, 0); 1095 } 1096 1097 /* Returns the index of the first cleared bit. Abort if 1098 * no bits are cleared. 1099 */ 1100 sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) 1101 { 1102 struct node *nodep1, *nodep2; 1103 1104 /* Validate at least 1 bit is cleared. */ 1105 assert(sparsebit_any_clear(s)); 1106 1107 /* If no nodes or first node index > 0 then lowest cleared is 0 */ 1108 nodep1 = node_first(s); 1109 if (!nodep1 || nodep1->idx > 0) 1110 return 0; 1111 1112 /* Does the mask in the first node contain any cleared bits. */ 1113 if (nodep1->mask != ~(mask_t) 0) 1114 return node_first_clear(nodep1, 0); 1115 1116 /* 1117 * All mask bits set in first node. If there isn't a second node 1118 * then the first cleared bit is the first bit after the bits 1119 * described by the first node. 1120 */ 1121 nodep2 = node_next(s, nodep1); 1122 if (!nodep2) { 1123 /* 1124 * No second node. First cleared bit is first bit beyond 1125 * bits described by first node. 1126 */ 1127 assert(nodep1->mask == ~(mask_t) 0); 1128 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); 1129 return nodep1->idx + MASK_BITS + nodep1->num_after; 1130 } 1131 1132 /* 1133 * There is a second node. 1134 * If it is not adjacent to the first node, then there is a gap 1135 * of cleared bits between the nodes, and the first cleared bit 1136 * is the first bit within the gap. 1137 */ 1138 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1139 return nodep1->idx + MASK_BITS + nodep1->num_after; 1140 1141 /* 1142 * Second node is adjacent to the first node. 1143 * Because it is adjacent, its mask should be non-zero. If all 1144 * its mask bits are set, then with it being adjacent, it should 1145 * have had the mask bits moved into the num_after setting of the 1146 * previous node. 1147 */ 1148 return node_first_clear(nodep2, 0); 1149 } 1150 1151 /* Returns index of next bit set within s after the index given by prev. 1152 * Returns 0 if there are no bits after prev that are set. 1153 */ 1154 sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, 1155 sparsebit_idx_t prev) 1156 { 1157 sparsebit_idx_t lowest_possible = prev + 1; 1158 sparsebit_idx_t start; 1159 struct node *nodep; 1160 1161 /* A bit after the highest index can't be set. */ 1162 if (lowest_possible == 0) 1163 return 0; 1164 1165 /* 1166 * Find the leftmost 'candidate' overlapping or to the right 1167 * of lowest_possible. 1168 */ 1169 struct node *candidate = NULL; 1170 1171 /* True iff lowest_possible is within candidate */ 1172 bool contains = false; 1173 1174 /* 1175 * Find node that describes setting of bit at lowest_possible. 1176 * If such a node doesn't exist, find the node with the lowest 1177 * starting index that is > lowest_possible. 1178 */ 1179 for (nodep = s->root; nodep;) { 1180 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) 1181 >= lowest_possible) { 1182 candidate = nodep; 1183 if (candidate->idx <= lowest_possible) { 1184 contains = true; 1185 break; 1186 } 1187 nodep = nodep->left; 1188 } else { 1189 nodep = nodep->right; 1190 } 1191 } 1192 if (!candidate) 1193 return 0; 1194 1195 assert(candidate->mask != 0); 1196 1197 /* Does the candidate node describe the setting of lowest_possible? */ 1198 if (!contains) { 1199 /* 1200 * Candidate doesn't describe setting of bit at lowest_possible. 1201 * Candidate points to the first node with a starting index 1202 * > lowest_possible. 1203 */ 1204 assert(candidate->idx > lowest_possible); 1205 1206 return node_first_set(candidate, 0); 1207 } 1208 1209 /* 1210 * Candidate describes setting of bit at lowest_possible. 1211 * Note: although the node describes the setting of the bit 1212 * at lowest_possible, its possible that its setting and the 1213 * setting of all latter bits described by this node are 0. 1214 * For now, just handle the cases where this node describes 1215 * a bit at or after an index of lowest_possible that is set. 1216 */ 1217 start = lowest_possible - candidate->idx; 1218 1219 if (start < MASK_BITS && candidate->mask >= (1 << start)) 1220 return node_first_set(candidate, start); 1221 1222 if (candidate->num_after) { 1223 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; 1224 1225 return lowest_possible < first_num_after_idx 1226 ? first_num_after_idx : lowest_possible; 1227 } 1228 1229 /* 1230 * Although candidate node describes setting of bit at 1231 * the index of lowest_possible, all bits at that index and 1232 * latter that are described by candidate are cleared. With 1233 * this, the next bit is the first bit in the next node, if 1234 * such a node exists. If a next node doesn't exist, then 1235 * there is no next set bit. 1236 */ 1237 candidate = node_next(s, candidate); 1238 if (!candidate) 1239 return 0; 1240 1241 return node_first_set(candidate, 0); 1242 } 1243 1244 /* Returns index of next bit cleared within s after the index given by prev. 1245 * Returns 0 if there are no bits after prev that are cleared. 1246 */ 1247 sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, 1248 sparsebit_idx_t prev) 1249 { 1250 sparsebit_idx_t lowest_possible = prev + 1; 1251 sparsebit_idx_t idx; 1252 struct node *nodep1, *nodep2; 1253 1254 /* A bit after the highest index can't be set. */ 1255 if (lowest_possible == 0) 1256 return 0; 1257 1258 /* 1259 * Does a node describing the setting of lowest_possible exist? 1260 * If not, the bit at lowest_possible is cleared. 1261 */ 1262 nodep1 = node_find(s, lowest_possible); 1263 if (!nodep1) 1264 return lowest_possible; 1265 1266 /* Does a mask bit in node 1 describe the next cleared bit. */ 1267 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) 1268 if (!(nodep1->mask & (1 << idx))) 1269 return nodep1->idx + idx; 1270 1271 /* 1272 * Next cleared bit is not described by node 1. If there 1273 * isn't a next node, then next cleared bit is described 1274 * by bit after the bits described by the first node. 1275 */ 1276 nodep2 = node_next(s, nodep1); 1277 if (!nodep2) 1278 return nodep1->idx + MASK_BITS + nodep1->num_after; 1279 1280 /* 1281 * There is a second node. 1282 * If it is not adjacent to the first node, then there is a gap 1283 * of cleared bits between the nodes, and the next cleared bit 1284 * is the first bit within the gap. 1285 */ 1286 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1287 return nodep1->idx + MASK_BITS + nodep1->num_after; 1288 1289 /* 1290 * Second node is adjacent to the first node. 1291 * Because it is adjacent, its mask should be non-zero. If all 1292 * its mask bits are set, then with it being adjacent, it should 1293 * have had the mask bits moved into the num_after setting of the 1294 * previous node. 1295 */ 1296 return node_first_clear(nodep2, 0); 1297 } 1298 1299 /* Starting with the index 1 greater than the index given by start, finds 1300 * and returns the index of the first sequence of num consecutively set 1301 * bits. Returns a value of 0 of no such sequence exists. 1302 */ 1303 sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, 1304 sparsebit_idx_t start, sparsebit_num_t num) 1305 { 1306 sparsebit_idx_t idx; 1307 1308 assert(num >= 1); 1309 1310 for (idx = sparsebit_next_set(s, start); 1311 idx != 0 && idx + num - 1 >= idx; 1312 idx = sparsebit_next_set(s, idx)) { 1313 assert(sparsebit_is_set(s, idx)); 1314 1315 /* 1316 * Does the sequence of bits starting at idx consist of 1317 * num set bits? 1318 */ 1319 if (sparsebit_is_set_num(s, idx, num)) 1320 return idx; 1321 1322 /* 1323 * Sequence of set bits at idx isn't large enough. 1324 * Skip this entire sequence of set bits. 1325 */ 1326 idx = sparsebit_next_clear(s, idx); 1327 if (idx == 0) 1328 return 0; 1329 } 1330 1331 return 0; 1332 } 1333 1334 /* Starting with the index 1 greater than the index given by start, finds 1335 * and returns the index of the first sequence of num consecutively cleared 1336 * bits. Returns a value of 0 of no such sequence exists. 1337 */ 1338 sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, 1339 sparsebit_idx_t start, sparsebit_num_t num) 1340 { 1341 sparsebit_idx_t idx; 1342 1343 assert(num >= 1); 1344 1345 for (idx = sparsebit_next_clear(s, start); 1346 idx != 0 && idx + num - 1 >= idx; 1347 idx = sparsebit_next_clear(s, idx)) { 1348 assert(sparsebit_is_clear(s, idx)); 1349 1350 /* 1351 * Does the sequence of bits starting at idx consist of 1352 * num cleared bits? 1353 */ 1354 if (sparsebit_is_clear_num(s, idx, num)) 1355 return idx; 1356 1357 /* 1358 * Sequence of cleared bits at idx isn't large enough. 1359 * Skip this entire sequence of cleared bits. 1360 */ 1361 idx = sparsebit_next_set(s, idx); 1362 if (idx == 0) 1363 return 0; 1364 } 1365 1366 return 0; 1367 } 1368 1369 /* Sets the bits * in the inclusive range idx through idx + num - 1. */ 1370 void sparsebit_set_num(struct sparsebit *s, 1371 sparsebit_idx_t start, sparsebit_num_t num) 1372 { 1373 struct node *nodep, *next; 1374 unsigned int n1; 1375 sparsebit_idx_t idx; 1376 sparsebit_num_t n; 1377 sparsebit_idx_t middle_start, middle_end; 1378 1379 assert(num > 0); 1380 assert(start + num - 1 >= start); 1381 1382 /* 1383 * Leading - bits before first mask boundary. 1384 * 1385 * TODO(lhuemill): With some effort it may be possible to 1386 * replace the following loop with a sequential sequence 1387 * of statements. High level sequence would be: 1388 * 1389 * 1. Use node_split() to force node that describes setting 1390 * of idx to be within the mask portion of a node. 1391 * 2. Form mask of bits to be set. 1392 * 3. Determine number of mask bits already set in the node 1393 * and store in a local variable named num_already_set. 1394 * 4. Set the appropriate mask bits within the node. 1395 * 5. Increment struct sparsebit_pvt num_set member 1396 * by the number of bits that were actually set. 1397 * Exclude from the counts bits that were already set. 1398 * 6. Before returning to the caller, use node_reduce() to 1399 * handle the multiple corner cases that this method 1400 * introduces. 1401 */ 1402 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1403 bit_set(s, idx); 1404 1405 /* Middle - bits spanning one or more entire mask */ 1406 middle_start = idx; 1407 middle_end = middle_start + (n & -MASK_BITS) - 1; 1408 if (n >= MASK_BITS) { 1409 nodep = node_split(s, middle_start); 1410 1411 /* 1412 * As needed, split just after end of middle bits. 1413 * No split needed if end of middle bits is at highest 1414 * supported bit index. 1415 */ 1416 if (middle_end + 1 > middle_end) 1417 (void) node_split(s, middle_end + 1); 1418 1419 /* Delete nodes that only describe bits within the middle. */ 1420 for (next = node_next(s, nodep); 1421 next && (next->idx < middle_end); 1422 next = node_next(s, nodep)) { 1423 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1424 node_rm(s, next); 1425 next = NULL; 1426 } 1427 1428 /* As needed set each of the mask bits */ 1429 for (n1 = 0; n1 < MASK_BITS; n1++) { 1430 if (!(nodep->mask & (1 << n1))) { 1431 nodep->mask |= 1 << n1; 1432 s->num_set++; 1433 } 1434 } 1435 1436 s->num_set -= nodep->num_after; 1437 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; 1438 s->num_set += nodep->num_after; 1439 1440 node_reduce(s, nodep); 1441 } 1442 idx = middle_end + 1; 1443 n -= middle_end - middle_start + 1; 1444 1445 /* Trailing - bits at and beyond last mask boundary */ 1446 assert(n < MASK_BITS); 1447 for (; n > 0; idx++, n--) 1448 bit_set(s, idx); 1449 } 1450 1451 /* Clears the bits * in the inclusive range idx through idx + num - 1. */ 1452 void sparsebit_clear_num(struct sparsebit *s, 1453 sparsebit_idx_t start, sparsebit_num_t num) 1454 { 1455 struct node *nodep, *next; 1456 unsigned int n1; 1457 sparsebit_idx_t idx; 1458 sparsebit_num_t n; 1459 sparsebit_idx_t middle_start, middle_end; 1460 1461 assert(num > 0); 1462 assert(start + num - 1 >= start); 1463 1464 /* Leading - bits before first mask boundary */ 1465 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1466 bit_clear(s, idx); 1467 1468 /* Middle - bits spanning one or more entire mask */ 1469 middle_start = idx; 1470 middle_end = middle_start + (n & -MASK_BITS) - 1; 1471 if (n >= MASK_BITS) { 1472 nodep = node_split(s, middle_start); 1473 1474 /* 1475 * As needed, split just after end of middle bits. 1476 * No split needed if end of middle bits is at highest 1477 * supported bit index. 1478 */ 1479 if (middle_end + 1 > middle_end) 1480 (void) node_split(s, middle_end + 1); 1481 1482 /* Delete nodes that only describe bits within the middle. */ 1483 for (next = node_next(s, nodep); 1484 next && (next->idx < middle_end); 1485 next = node_next(s, nodep)) { 1486 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1487 node_rm(s, next); 1488 next = NULL; 1489 } 1490 1491 /* As needed clear each of the mask bits */ 1492 for (n1 = 0; n1 < MASK_BITS; n1++) { 1493 if (nodep->mask & (1 << n1)) { 1494 nodep->mask &= ~(1 << n1); 1495 s->num_set--; 1496 } 1497 } 1498 1499 /* Clear any bits described by num_after */ 1500 s->num_set -= nodep->num_after; 1501 nodep->num_after = 0; 1502 1503 /* 1504 * Delete the node that describes the beginning of 1505 * the middle bits and perform any allowed reductions 1506 * with the nodes prev or next of nodep. 1507 */ 1508 node_reduce(s, nodep); 1509 nodep = NULL; 1510 } 1511 idx = middle_end + 1; 1512 n -= middle_end - middle_start + 1; 1513 1514 /* Trailing - bits at and beyond last mask boundary */ 1515 assert(n < MASK_BITS); 1516 for (; n > 0; idx++, n--) 1517 bit_clear(s, idx); 1518 } 1519 1520 /* Sets the bit at the index given by idx. */ 1521 void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) 1522 { 1523 sparsebit_set_num(s, idx, 1); 1524 } 1525 1526 /* Clears the bit at the index given by idx. */ 1527 void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) 1528 { 1529 sparsebit_clear_num(s, idx, 1); 1530 } 1531 1532 /* Sets the bits in the entire addressable range of the sparsebit array. */ 1533 void sparsebit_set_all(struct sparsebit *s) 1534 { 1535 sparsebit_set(s, 0); 1536 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); 1537 assert(sparsebit_all_set(s)); 1538 } 1539 1540 /* Clears the bits in the entire addressable range of the sparsebit array. */ 1541 void sparsebit_clear_all(struct sparsebit *s) 1542 { 1543 sparsebit_clear(s, 0); 1544 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); 1545 assert(!sparsebit_any_set(s)); 1546 } 1547 1548 static size_t display_range(FILE *stream, sparsebit_idx_t low, 1549 sparsebit_idx_t high, bool prepend_comma_space) 1550 { 1551 char *fmt_str; 1552 size_t sz; 1553 1554 /* Determine the printf format string */ 1555 if (low == high) 1556 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; 1557 else 1558 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; 1559 1560 /* 1561 * When stream is NULL, just determine the size of what would 1562 * have been printed, else print the range. 1563 */ 1564 if (!stream) 1565 sz = snprintf(NULL, 0, fmt_str, low, high); 1566 else 1567 sz = fprintf(stream, fmt_str, low, high); 1568 1569 return sz; 1570 } 1571 1572 1573 /* Dumps to the FILE stream given by stream, the bit settings 1574 * of s. Each line of output is prefixed with the number of 1575 * spaces given by indent. The length of each line is implementation 1576 * dependent and does not depend on the indent amount. The following 1577 * is an example output of a sparsebit array that has bits: 1578 * 1579 * 0x5, 0x8, 0xa:0xe, 0x12 1580 * 1581 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 1582 * are set. Note that a ':', instead of a '-' is used to specify a range of 1583 * contiguous bits. This is done because '-' is used to specify command-line 1584 * options, and sometimes ranges are specified as command-line arguments. 1585 */ 1586 void sparsebit_dump(FILE *stream, struct sparsebit *s, 1587 unsigned int indent) 1588 { 1589 size_t current_line_len = 0; 1590 size_t sz; 1591 struct node *nodep; 1592 1593 if (!sparsebit_any_set(s)) 1594 return; 1595 1596 /* Display initial indent */ 1597 fprintf(stream, "%*s", indent, ""); 1598 1599 /* For each node */ 1600 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { 1601 unsigned int n1; 1602 sparsebit_idx_t low, high; 1603 1604 /* For each group of bits in the mask */ 1605 for (n1 = 0; n1 < MASK_BITS; n1++) { 1606 if (nodep->mask & (1 << n1)) { 1607 low = high = nodep->idx + n1; 1608 1609 for (; n1 < MASK_BITS; n1++) { 1610 if (nodep->mask & (1 << n1)) 1611 high = nodep->idx + n1; 1612 else 1613 break; 1614 } 1615 1616 if ((n1 == MASK_BITS) && nodep->num_after) 1617 high += nodep->num_after; 1618 1619 /* 1620 * How much room will it take to display 1621 * this range. 1622 */ 1623 sz = display_range(NULL, low, high, 1624 current_line_len != 0); 1625 1626 /* 1627 * If there is not enough room, display 1628 * a newline plus the indent of the next 1629 * line. 1630 */ 1631 if (current_line_len + sz > DUMP_LINE_MAX) { 1632 fputs("\n", stream); 1633 fprintf(stream, "%*s", indent, ""); 1634 current_line_len = 0; 1635 } 1636 1637 /* Display the range */ 1638 sz = display_range(stream, low, high, 1639 current_line_len != 0); 1640 current_line_len += sz; 1641 } 1642 } 1643 1644 /* 1645 * If num_after and most significant-bit of mask is not 1646 * set, then still need to display a range for the bits 1647 * described by num_after. 1648 */ 1649 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { 1650 low = nodep->idx + MASK_BITS; 1651 high = nodep->idx + MASK_BITS + nodep->num_after - 1; 1652 1653 /* 1654 * How much room will it take to display 1655 * this range. 1656 */ 1657 sz = display_range(NULL, low, high, 1658 current_line_len != 0); 1659 1660 /* 1661 * If there is not enough room, display 1662 * a newline plus the indent of the next 1663 * line. 1664 */ 1665 if (current_line_len + sz > DUMP_LINE_MAX) { 1666 fputs("\n", stream); 1667 fprintf(stream, "%*s", indent, ""); 1668 current_line_len = 0; 1669 } 1670 1671 /* Display the range */ 1672 sz = display_range(stream, low, high, 1673 current_line_len != 0); 1674 current_line_len += sz; 1675 } 1676 } 1677 fputs("\n", stream); 1678 } 1679 1680 /* Validates the internal state of the sparsebit array given by 1681 * s. On error, diagnostic information is printed to stderr and 1682 * abort is called. 1683 */ 1684 void sparsebit_validate_internal(struct sparsebit *s) 1685 { 1686 bool error_detected = false; 1687 struct node *nodep, *prev = NULL; 1688 sparsebit_num_t total_bits_set = 0; 1689 unsigned int n1; 1690 1691 /* For each node */ 1692 for (nodep = node_first(s); nodep; 1693 prev = nodep, nodep = node_next(s, nodep)) { 1694 1695 /* 1696 * Increase total bits set by the number of bits set 1697 * in this node. 1698 */ 1699 for (n1 = 0; n1 < MASK_BITS; n1++) 1700 if (nodep->mask & (1 << n1)) 1701 total_bits_set++; 1702 1703 total_bits_set += nodep->num_after; 1704 1705 /* 1706 * Arbitrary choice as to whether a mask of 0 is allowed 1707 * or not. For diagnostic purposes it is beneficial to 1708 * have only one valid means to represent a set of bits. 1709 * To support this an arbitrary choice has been made 1710 * to not allow a mask of zero. 1711 */ 1712 if (nodep->mask == 0) { 1713 fprintf(stderr, "Node mask of zero, " 1714 "nodep: %p nodep->mask: 0x%x", 1715 nodep, nodep->mask); 1716 error_detected = true; 1717 break; 1718 } 1719 1720 /* 1721 * Validate num_after is not greater than the max index 1722 * - the number of mask bits. The num_after member 1723 * uses 0-based indexing and thus has no value that 1724 * represents all bits set. This limitation is handled 1725 * by requiring a non-zero mask. With a non-zero mask, 1726 * MASK_BITS worth of bits are described by the mask, 1727 * which makes the largest needed num_after equal to: 1728 * 1729 * (~(sparsebit_num_t) 0) - MASK_BITS + 1 1730 */ 1731 if (nodep->num_after 1732 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { 1733 fprintf(stderr, "num_after too large, " 1734 "nodep: %p nodep->num_after: 0x%lx", 1735 nodep, nodep->num_after); 1736 error_detected = true; 1737 break; 1738 } 1739 1740 /* Validate node index is divisible by the mask size */ 1741 if (nodep->idx % MASK_BITS) { 1742 fprintf(stderr, "Node index not divisible by " 1743 "mask size,\n" 1744 " nodep: %p nodep->idx: 0x%lx " 1745 "MASK_BITS: %lu\n", 1746 nodep, nodep->idx, MASK_BITS); 1747 error_detected = true; 1748 break; 1749 } 1750 1751 /* 1752 * Validate bits described by node don't wrap beyond the 1753 * highest supported index. 1754 */ 1755 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { 1756 fprintf(stderr, "Bits described by node wrap " 1757 "beyond highest supported index,\n" 1758 " nodep: %p nodep->idx: 0x%lx\n" 1759 " MASK_BITS: %lu nodep->num_after: 0x%lx", 1760 nodep, nodep->idx, MASK_BITS, nodep->num_after); 1761 error_detected = true; 1762 break; 1763 } 1764 1765 /* Check parent pointers. */ 1766 if (nodep->left) { 1767 if (nodep->left->parent != nodep) { 1768 fprintf(stderr, "Left child parent pointer " 1769 "doesn't point to this node,\n" 1770 " nodep: %p nodep->left: %p " 1771 "nodep->left->parent: %p", 1772 nodep, nodep->left, 1773 nodep->left->parent); 1774 error_detected = true; 1775 break; 1776 } 1777 } 1778 1779 if (nodep->right) { 1780 if (nodep->right->parent != nodep) { 1781 fprintf(stderr, "Right child parent pointer " 1782 "doesn't point to this node,\n" 1783 " nodep: %p nodep->right: %p " 1784 "nodep->right->parent: %p", 1785 nodep, nodep->right, 1786 nodep->right->parent); 1787 error_detected = true; 1788 break; 1789 } 1790 } 1791 1792 if (!nodep->parent) { 1793 if (s->root != nodep) { 1794 fprintf(stderr, "Unexpected root node, " 1795 "s->root: %p nodep: %p", 1796 s->root, nodep); 1797 error_detected = true; 1798 break; 1799 } 1800 } 1801 1802 if (prev) { 1803 /* 1804 * Is index of previous node before index of 1805 * current node? 1806 */ 1807 if (prev->idx >= nodep->idx) { 1808 fprintf(stderr, "Previous node index " 1809 ">= current node index,\n" 1810 " prev: %p prev->idx: 0x%lx\n" 1811 " nodep: %p nodep->idx: 0x%lx", 1812 prev, prev->idx, nodep, nodep->idx); 1813 error_detected = true; 1814 break; 1815 } 1816 1817 /* 1818 * Nodes occur in asscending order, based on each 1819 * nodes starting index. 1820 */ 1821 if ((prev->idx + MASK_BITS + prev->num_after - 1) 1822 >= nodep->idx) { 1823 fprintf(stderr, "Previous node bit range " 1824 "overlap with current node bit range,\n" 1825 " prev: %p prev->idx: 0x%lx " 1826 "prev->num_after: 0x%lx\n" 1827 " nodep: %p nodep->idx: 0x%lx " 1828 "nodep->num_after: 0x%lx\n" 1829 " MASK_BITS: %lu", 1830 prev, prev->idx, prev->num_after, 1831 nodep, nodep->idx, nodep->num_after, 1832 MASK_BITS); 1833 error_detected = true; 1834 break; 1835 } 1836 1837 /* 1838 * When the node has all mask bits set, it shouldn't 1839 * be adjacent to the last bit described by the 1840 * previous node. 1841 */ 1842 if (nodep->mask == ~(mask_t) 0 && 1843 prev->idx + MASK_BITS + prev->num_after == nodep->idx) { 1844 fprintf(stderr, "Current node has mask with " 1845 "all bits set and is adjacent to the " 1846 "previous node,\n" 1847 " prev: %p prev->idx: 0x%lx " 1848 "prev->num_after: 0x%lx\n" 1849 " nodep: %p nodep->idx: 0x%lx " 1850 "nodep->num_after: 0x%lx\n" 1851 " MASK_BITS: %lu", 1852 prev, prev->idx, prev->num_after, 1853 nodep, nodep->idx, nodep->num_after, 1854 MASK_BITS); 1855 1856 error_detected = true; 1857 break; 1858 } 1859 } 1860 } 1861 1862 if (!error_detected) { 1863 /* 1864 * Is sum of bits set in each node equal to the count 1865 * of total bits set. 1866 */ 1867 if (s->num_set != total_bits_set) { 1868 fprintf(stderr, "Number of bits set mismatch,\n" 1869 " s->num_set: 0x%lx total_bits_set: 0x%lx", 1870 s->num_set, total_bits_set); 1871 1872 error_detected = true; 1873 } 1874 } 1875 1876 if (error_detected) { 1877 fputs(" dump_internal:\n", stderr); 1878 sparsebit_dump_internal(stderr, s, 4); 1879 abort(); 1880 } 1881 } 1882 1883 1884 #ifdef FUZZ 1885 /* A simple but effective fuzzing driver. Look for bugs with the help 1886 * of some invariants and of a trivial representation of sparsebit. 1887 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let 1888 * afl-fuzz do the magic. :) 1889 */ 1890 1891 #include <stdlib.h> 1892 1893 struct range { 1894 sparsebit_idx_t first, last; 1895 bool set; 1896 }; 1897 1898 struct sparsebit *s; 1899 struct range ranges[1000]; 1900 int num_ranges; 1901 1902 static bool get_value(sparsebit_idx_t idx) 1903 { 1904 int i; 1905 1906 for (i = num_ranges; --i >= 0; ) 1907 if (ranges[i].first <= idx && idx <= ranges[i].last) 1908 return ranges[i].set; 1909 1910 return false; 1911 } 1912 1913 static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) 1914 { 1915 sparsebit_num_t num; 1916 sparsebit_idx_t next; 1917 1918 if (first < last) { 1919 num = last - first + 1; 1920 } else { 1921 num = first - last + 1; 1922 first = last; 1923 last = first + num - 1; 1924 } 1925 1926 switch (code) { 1927 case 0: 1928 sparsebit_set(s, first); 1929 assert(sparsebit_is_set(s, first)); 1930 assert(!sparsebit_is_clear(s, first)); 1931 assert(sparsebit_any_set(s)); 1932 assert(!sparsebit_all_clear(s)); 1933 if (get_value(first)) 1934 return; 1935 if (num_ranges == 1000) 1936 exit(0); 1937 ranges[num_ranges++] = (struct range) 1938 { .first = first, .last = first, .set = true }; 1939 break; 1940 case 1: 1941 sparsebit_clear(s, first); 1942 assert(!sparsebit_is_set(s, first)); 1943 assert(sparsebit_is_clear(s, first)); 1944 assert(sparsebit_any_clear(s)); 1945 assert(!sparsebit_all_set(s)); 1946 if (!get_value(first)) 1947 return; 1948 if (num_ranges == 1000) 1949 exit(0); 1950 ranges[num_ranges++] = (struct range) 1951 { .first = first, .last = first, .set = false }; 1952 break; 1953 case 2: 1954 assert(sparsebit_is_set(s, first) == get_value(first)); 1955 assert(sparsebit_is_clear(s, first) == !get_value(first)); 1956 break; 1957 case 3: 1958 if (sparsebit_any_set(s)) 1959 assert(get_value(sparsebit_first_set(s))); 1960 if (sparsebit_any_clear(s)) 1961 assert(!get_value(sparsebit_first_clear(s))); 1962 sparsebit_set_all(s); 1963 assert(!sparsebit_any_clear(s)); 1964 assert(sparsebit_all_set(s)); 1965 num_ranges = 0; 1966 ranges[num_ranges++] = (struct range) 1967 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; 1968 break; 1969 case 4: 1970 if (sparsebit_any_set(s)) 1971 assert(get_value(sparsebit_first_set(s))); 1972 if (sparsebit_any_clear(s)) 1973 assert(!get_value(sparsebit_first_clear(s))); 1974 sparsebit_clear_all(s); 1975 assert(!sparsebit_any_set(s)); 1976 assert(sparsebit_all_clear(s)); 1977 num_ranges = 0; 1978 break; 1979 case 5: 1980 next = sparsebit_next_set(s, first); 1981 assert(next == 0 || next > first); 1982 assert(next == 0 || get_value(next)); 1983 break; 1984 case 6: 1985 next = sparsebit_next_clear(s, first); 1986 assert(next == 0 || next > first); 1987 assert(next == 0 || !get_value(next)); 1988 break; 1989 case 7: 1990 next = sparsebit_next_clear(s, first); 1991 if (sparsebit_is_set_num(s, first, num)) { 1992 assert(next == 0 || next > last); 1993 if (first) 1994 next = sparsebit_next_set(s, first - 1); 1995 else if (sparsebit_any_set(s)) 1996 next = sparsebit_first_set(s); 1997 else 1998 return; 1999 assert(next == first); 2000 } else { 2001 assert(sparsebit_is_clear(s, first) || next <= last); 2002 } 2003 break; 2004 case 8: 2005 next = sparsebit_next_set(s, first); 2006 if (sparsebit_is_clear_num(s, first, num)) { 2007 assert(next == 0 || next > last); 2008 if (first) 2009 next = sparsebit_next_clear(s, first - 1); 2010 else if (sparsebit_any_clear(s)) 2011 next = sparsebit_first_clear(s); 2012 else 2013 return; 2014 assert(next == first); 2015 } else { 2016 assert(sparsebit_is_set(s, first) || next <= last); 2017 } 2018 break; 2019 case 9: 2020 sparsebit_set_num(s, first, num); 2021 assert(sparsebit_is_set_num(s, first, num)); 2022 assert(!sparsebit_is_clear_num(s, first, num)); 2023 assert(sparsebit_any_set(s)); 2024 assert(!sparsebit_all_clear(s)); 2025 if (num_ranges == 1000) 2026 exit(0); 2027 ranges[num_ranges++] = (struct range) 2028 { .first = first, .last = last, .set = true }; 2029 break; 2030 case 10: 2031 sparsebit_clear_num(s, first, num); 2032 assert(!sparsebit_is_set_num(s, first, num)); 2033 assert(sparsebit_is_clear_num(s, first, num)); 2034 assert(sparsebit_any_clear(s)); 2035 assert(!sparsebit_all_set(s)); 2036 if (num_ranges == 1000) 2037 exit(0); 2038 ranges[num_ranges++] = (struct range) 2039 { .first = first, .last = last, .set = false }; 2040 break; 2041 case 11: 2042 sparsebit_validate_internal(s); 2043 break; 2044 default: 2045 break; 2046 } 2047 } 2048 2049 unsigned char get8(void) 2050 { 2051 int ch; 2052 2053 ch = getchar(); 2054 if (ch == EOF) 2055 exit(0); 2056 return ch; 2057 } 2058 2059 uint64_t get64(void) 2060 { 2061 uint64_t x; 2062 2063 x = get8(); 2064 x = (x << 8) | get8(); 2065 x = (x << 8) | get8(); 2066 x = (x << 8) | get8(); 2067 x = (x << 8) | get8(); 2068 x = (x << 8) | get8(); 2069 x = (x << 8) | get8(); 2070 return (x << 8) | get8(); 2071 } 2072 2073 int main(void) 2074 { 2075 s = sparsebit_alloc(); 2076 for (;;) { 2077 uint8_t op = get8() & 0xf; 2078 uint64_t first = get64(); 2079 uint64_t last = get64(); 2080 2081 operate(op, first, last); 2082 } 2083 } 2084 #endif 2085