1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #if PCTRIE_WIDTH == 3 66 typedef uint8_t pn_popmap_t; 67 #elif PCTRIE_WIDTH == 4 68 typedef uint16_t pn_popmap_t; 69 #elif PCTRIE_WIDTH == 5 70 typedef uint32_t pn_popmap_t; 71 #else 72 #error Unsupported width 73 #endif 74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 75 "pn_popmap_t too wide"); 76 77 struct pctrie_node; 78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 79 80 struct pctrie_node { 81 uint64_t pn_owner; /* Owner of record. */ 82 pn_popmap_t pn_popmap; /* Valid children. */ 83 uint8_t pn_clev; /* Level * WIDTH. */ 84 smr_pctnode_t pn_parent; /* Parent node. */ 85 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 86 }; 87 88 /* 89 * Map index to an array position for the children of node, 90 */ 91 static __inline int 92 pctrie_slot(struct pctrie_node *node, uint64_t index) 93 { 94 return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); 95 } 96 97 /* 98 * Returns true if index does not belong to the specified node. Otherwise, 99 * sets slot value, and returns false. 100 */ 101 static __inline bool 102 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 103 { 104 index = (index - node->pn_owner) >> node->pn_clev; 105 if (index >= PCTRIE_COUNT) 106 return (true); 107 *slot = index; 108 return (false); 109 } 110 111 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 112 113 /* 114 * Fetch a node pointer from a slot. 115 */ 116 static __inline struct pctrie_node * 117 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 118 { 119 switch (access) { 120 case PCTRIE_UNSERIALIZED: 121 return (smr_unserialized_load(p, true)); 122 case PCTRIE_LOCKED: 123 return (smr_serialized_load(p, true)); 124 case PCTRIE_SMR: 125 return (smr_entered_load(p, smr)); 126 } 127 __assert_unreachable(); 128 } 129 130 static __inline void 131 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 132 { 133 switch (access) { 134 case PCTRIE_UNSERIALIZED: 135 smr_unserialized_store(p, v, true); 136 break; 137 case PCTRIE_LOCKED: 138 smr_serialized_store(p, v, true); 139 break; 140 case PCTRIE_SMR: 141 panic("%s: Not supported in SMR section.", __func__); 142 break; 143 default: 144 __assert_unreachable(); 145 break; 146 } 147 } 148 149 /* 150 * Get the root address, cast to proper type for load/store. 151 */ 152 static __inline smr_pctnode_t * 153 pctrie_root(struct pctrie *ptree) 154 { 155 return ((smr_pctnode_t *)&ptree->pt_root); 156 } 157 158 /* 159 * Get the root node for a tree. 160 */ 161 static __inline struct pctrie_node * 162 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 163 { 164 return (pctrie_node_load(pctrie_root(ptree), smr, access)); 165 } 166 167 /* 168 * Get the child of a node. 169 */ 170 static __inline smr_pctnode_t * 171 pctrie_child(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 172 { 173 return (node == NULL ? pctrie_root(ptree) : 174 &node->pn_child[pctrie_slot(node, index)]); 175 } 176 177 /* 178 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 179 */ 180 static __inline bool 181 pctrie_isleaf(struct pctrie_node *node) 182 { 183 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184 } 185 186 /* 187 * Returns val with leaf bit set. 188 */ 189 static __inline void * 190 pctrie_toleaf(uint64_t *val) 191 { 192 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 193 } 194 195 /* 196 * Returns the associated val extracted from node. 197 */ 198 static __inline uint64_t * 199 pctrie_toval(struct pctrie_node *node) 200 { 201 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 202 } 203 204 /* 205 * Returns the associated pointer extracted from node and field offset. 206 */ 207 static __inline void * 208 pctrie_toptr(struct pctrie_node *node, int keyoff) 209 { 210 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 211 } 212 213 /* 214 * Make 'parent' a parent of 'child'. 215 */ 216 static __inline void 217 pctrie_setparent(struct pctrie_node *child, struct pctrie_node *parent) 218 { 219 pctrie_node_store(&child->pn_parent, parent, PCTRIE_UNSERIALIZED); 220 } 221 222 /* 223 * Return the parent of 'node'. 224 */ 225 static __inline struct pctrie_node * 226 pctrie_parent(struct pctrie_node *node) 227 { 228 return (pctrie_node_load(&node->pn_parent, NULL, PCTRIE_UNSERIALIZED)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 /* 271 * Look for where to insert the key-value pair into the trie. Complete the 272 * insertion if it replaces a null leaf. Return the insertion location if the 273 * insertion needs to be completed by the caller; otherwise return NULL. 274 * 275 * If the key is already present in the trie, populate *found_out as if by 276 * pctrie_lookup(). 277 */ 278 static __always_inline void * 279 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val, 280 struct pctrie_node **parent_out, uint64_t **found_out) 281 { 282 uint64_t index; 283 struct pctrie_node *node, *parent; 284 int slot; 285 286 index = *val; 287 288 /* 289 * The owner of record for root is not really important because it 290 * will never be used. 291 */ 292 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 293 parent = NULL; 294 for (;;) { 295 if (pctrie_isleaf(node)) { 296 if (node == PCTRIE_NULL) { 297 if (parent == NULL) 298 pctrie_node_store(pctrie_root(ptree), 299 pctrie_toleaf(val), PCTRIE_LOCKED); 300 else 301 pctrie_addnode(parent, index, 302 pctrie_toleaf(val), PCTRIE_LOCKED); 303 *parent_out = parent; 304 return (NULL); 305 } 306 if (*pctrie_toval(node) == index) { 307 *found_out = pctrie_toval(node); 308 *parent_out = parent; 309 return (NULL); 310 } 311 break; 312 } 313 if (pctrie_keybarr(node, index, &slot)) 314 break; 315 parent = node; 316 node = pctrie_node_load(&node->pn_child[slot], NULL, 317 PCTRIE_LOCKED); 318 } 319 320 /* 321 * 'node' must be replaced in the tree with a new branch node, with 322 * children 'node' and 'val'. Return the place that points to 'node' 323 * now, and will point to to the new branching node later. 324 */ 325 *parent_out = parent; 326 return ((parent == NULL) ? pctrie_root(ptree): &parent->pn_child[slot]); 327 } 328 329 /* 330 * Wrap pctrie_insert_lookup_compound to implement a strict insertion. Panic 331 * if the key already exists, and do not look for neighboring entries. 332 */ 333 void * 334 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val, 335 struct pctrie_node **parent_out) 336 { 337 void *parentp; 338 uint64_t *found; 339 340 found = NULL; 341 parentp = pctrie_insert_lookup_compound(ptree, val, parent_out, 342 &found); 343 if (__predict_false(found != NULL)) 344 panic("%s: key %jx is already present", __func__, 345 (uintmax_t)*val); 346 return (parentp); 347 } 348 349 /* 350 * Wrap pctrie_insert_lookup_compound to implement find-or-insert. Do not look 351 * for neighboring entries. 352 */ 353 void * 354 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 355 struct pctrie_node **parent_out, uint64_t **found_out) 356 { 357 *found_out = NULL; 358 return (pctrie_insert_lookup_compound(ptree, val, parent_out, 359 found_out)); 360 } 361 362 /* 363 * Inserts newly allocated node 'child' into trie at location 'parentp', with 364 * parent 'parent' and two children, 'val' and whatever non-NULL node or leaf 365 * was at 'parentp' to begin with. 366 */ 367 void 368 pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, void *parentp, 369 struct pctrie_node *child) 370 { 371 struct pctrie_node *node; 372 uint64_t index, newind; 373 374 /* 375 * Clear the last child pointer of the newly allocated child. We want 376 * to clear it after the final section has exited so lookup can not 377 * return false negatives. It is done here because it will be 378 * cache-cold in the dtor callback. 379 */ 380 if (child->pn_popmap != 0) { 381 pctrie_node_store(&child->pn_child[ffs(child->pn_popmap) - 1], 382 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 383 child->pn_popmap = 0; 384 } 385 386 /* 387 * Recover the values of the two children of the new child node. If 388 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 389 * which must be first in the node. 390 */ 391 index = *val; 392 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 393 pctrie_setparent(child, parent); 394 if (!pctrie_isleaf(node)) 395 pctrie_setparent(node, child); 396 newind = *pctrie_toval(node); 397 398 /* 399 * From the highest-order bit where the indexes differ, 400 * compute the highest level in the trie where they differ. Then, 401 * compute the least index of this subtrie. 402 */ 403 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 404 "uint64 too wide"); 405 _Static_assert(sizeof(uint64_t) * NBBY <= 406 (1 << (sizeof(child->pn_clev) * NBBY)), "pn_clev too narrow"); 407 child->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 408 child->pn_owner = PCTRIE_COUNT; 409 child->pn_owner = index & -(child->pn_owner << child->pn_clev); 410 411 412 /* These writes are not yet visible due to ordering. */ 413 pctrie_addnode(child, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 414 pctrie_addnode(child, newind, node, PCTRIE_UNSERIALIZED); 415 /* Synchronize to make the above visible. */ 416 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 417 } 418 419 /* 420 * Return the value associated with the node, if the node is a leaf that matches 421 * the index; otherwise NULL. 422 */ 423 static __always_inline uint64_t * 424 pctrie_match_value(struct pctrie_node *node, uint64_t index) 425 { 426 uint64_t *m; 427 428 if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || 429 *m != index) 430 m = NULL; 431 return (m); 432 } 433 434 /* 435 * Returns the value stored at the index. If the index is not present, 436 * NULL is returned. 437 */ 438 static __always_inline uint64_t * 439 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 440 enum pctrie_access access) 441 { 442 struct pctrie_node *node; 443 int slot; 444 445 node = pctrie_root_load(ptree, smr, access); 446 /* Seek a node that matches index. */ 447 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) 448 node = pctrie_node_load(&node->pn_child[slot], smr, access); 449 return (pctrie_match_value(node, index)); 450 } 451 452 /* 453 * Returns the value stored at the index, assuming access is externally 454 * synchronized by a lock. 455 * 456 * If the index is not present, NULL is returned. 457 */ 458 uint64_t * 459 pctrie_lookup(struct pctrie *ptree, uint64_t index) 460 { 461 return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 462 } 463 464 /* 465 * Returns the value stored at the index without requiring an external lock. 466 * 467 * If the index is not present, NULL is returned. 468 */ 469 uint64_t * 470 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 471 { 472 uint64_t *res; 473 474 smr_enter(smr); 475 res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 476 smr_exit(smr); 477 return (res); 478 } 479 480 /* 481 * Returns the last node examined in the search for the index, and sets the 482 * parent of that node. 483 */ 484 static __always_inline struct pctrie_node * 485 _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, 486 uint64_t index, struct pctrie_node **parent_out, 487 smr_t smr, enum pctrie_access access) 488 { 489 struct pctrie_node *parent; 490 int slot; 491 492 /* 493 * Climb the search path to find the lowest node from which to start the 494 * search for a value matching 'index'. 495 */ 496 while (node != NULL) { 497 KASSERT(access == PCTRIE_SMR || !powerof2(node->pn_popmap), 498 ("%s: freed node in iter path", __func__)); 499 if (!pctrie_keybarr(node, index, &slot)) 500 break; 501 node = pctrie_parent(node); 502 } 503 504 if (node == NULL) { 505 parent = NULL; 506 node = pctrie_root_load(ptree, smr, access); 507 } else { 508 parent = node; 509 node = pctrie_node_load(&node->pn_child[slot], smr, access); 510 } 511 512 /* Seek a node that matches index. */ 513 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { 514 parent = node; 515 node = pctrie_node_load(&node->pn_child[slot], smr, access); 516 } 517 if (parent_out != NULL) 518 *parent_out = parent; 519 return (node); 520 } 521 522 /* 523 * Returns the value stored at a given index value, possibly NULL, assuming 524 * access is externally synchronized by a lock. 525 */ 526 uint64_t * 527 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) 528 { 529 struct pctrie_node *node; 530 531 it->index = index; 532 node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, 533 NULL, PCTRIE_LOCKED); 534 return (pctrie_match_value(node, index)); 535 } 536 537 /* 538 * Insert the val in the trie, starting search with iterator. Return a pointer 539 * to indicate where a new node must be allocated to complete insertion. 540 * Assumes access is externally synchronized by a lock. 541 */ 542 void * 543 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) 544 { 545 struct pctrie_node *node; 546 547 it->index = *val; 548 node = _pctrie_lookup_node(it->ptree, it->node, *val, &it->node, 549 NULL, PCTRIE_LOCKED); 550 if (node == PCTRIE_NULL) { 551 if (it->node == NULL) 552 pctrie_node_store(pctrie_root(it->ptree), 553 pctrie_toleaf(val), PCTRIE_LOCKED); 554 else 555 pctrie_addnode(it->node, it->index, 556 pctrie_toleaf(val), PCTRIE_LOCKED); 557 return (NULL); 558 } 559 if (__predict_false(pctrie_match_value(node, it->index) != NULL)) 560 panic("%s: key %jx is already present", __func__, 561 (uintmax_t)it->index); 562 563 /* 564 * 'node' must be replaced in the tree with a new branch node, with 565 * children 'node' and 'val'. Return the place that points to 'node' 566 * now, and will point to to the new branching node later. 567 */ 568 return (pctrie_child(it->ptree, it->node, it->index)); 569 } 570 571 /* 572 * Returns the value stored at a fixed offset from the current index value, 573 * possibly NULL. 574 */ 575 uint64_t * 576 pctrie_iter_stride(struct pctrie_iter *it, int stride) 577 { 578 uint64_t index = it->index + stride; 579 580 /* Detect stride overflow. */ 581 if ((stride > 0) != (index > it->index)) 582 return (NULL); 583 /* Detect crossing limit */ 584 if ((index < it->limit) != (it->index < it->limit)) 585 return (NULL); 586 587 return (pctrie_iter_lookup(it, index)); 588 } 589 590 /* 591 * Returns the value stored at one more than the current index value, possibly 592 * NULL, assuming access is externally synchronized by a lock. 593 */ 594 uint64_t * 595 pctrie_iter_next(struct pctrie_iter *it) 596 { 597 return (pctrie_iter_stride(it, 1)); 598 } 599 600 /* 601 * Returns the value stored at one less than the current index value, possibly 602 * NULL, assuming access is externally synchronized by a lock. 603 */ 604 uint64_t * 605 pctrie_iter_prev(struct pctrie_iter *it) 606 { 607 return (pctrie_iter_stride(it, -1)); 608 } 609 610 /* 611 * Returns the number of contiguous, non-NULL entries read into the value[] 612 * array, starting at index. 613 */ 614 static __always_inline int 615 _pctrie_lookup_range(struct pctrie *ptree, struct pctrie_node *node, 616 uint64_t index, uint64_t *value[], int count, 617 struct pctrie_node **parent_out, smr_t smr, enum pctrie_access access) 618 { 619 struct pctrie_node *parent; 620 uint64_t *val; 621 int base, end, i; 622 623 parent = node; 624 for (i = 0; i < count;) { 625 node = _pctrie_lookup_node(ptree, parent, index + i, &parent, 626 smr, access); 627 if ((val = pctrie_match_value(node, index + i)) == NULL) 628 break; 629 value[i++] = val; 630 base = (index + i) % PCTRIE_COUNT; 631 if (base == 0 || parent == NULL || parent->pn_clev != 0) 632 continue; 633 634 /* 635 * For PCTRIE_SMR, compute an upper bound on the number of 636 * children of this parent left to examine. For PCTRIE_LOCKED, 637 * compute the number of non-NULL children from base up to the 638 * first NULL child, if any, using the fact that pn_popmap has 639 * bits set for only the non-NULL children. 640 * 641 * The pn_popmap field is accessed only when a lock is held. 642 * To use it for PCTRIE_SMR here would require that we know that 643 * race conditions cannot occur if the tree is modified while 644 * accessed here. Guarantees about the visibility of changes to 645 * child pointers, enforced by memory barriers on the writing of 646 * pointers, are not present for the pn_popmap field, so that 647 * the popmap bit for a child page may, for an instant, 648 * misrepresent the nullness of the child page because an 649 * operation modifying the pctrie is in progress. 650 */ 651 end = (access == PCTRIE_SMR) ? PCTRIE_COUNT - base : 652 ffs((parent->pn_popmap >> base) + 1) - 1; 653 end = MIN(count, i + end); 654 while (i < end) { 655 node = pctrie_node_load(&parent->pn_child[base++], 656 smr, access); 657 val = pctrie_toval(node); 658 if (access == PCTRIE_SMR && val == NULL) 659 break; 660 value[i++] = val; 661 KASSERT(val != NULL, 662 ("%s: null child written to range", __func__)); 663 } 664 if (access == PCTRIE_SMR) { 665 if (i < end) 666 break; 667 } else { 668 if (base < PCTRIE_COUNT) 669 break; 670 } 671 } 672 if (parent_out != NULL) 673 *parent_out = parent; 674 return (i); 675 } 676 677 /* 678 * Returns the number of contiguous, non-NULL entries read into the value[] 679 * array, starting at index, assuming access is externally synchronized by a 680 * lock. 681 */ 682 int 683 pctrie_lookup_range(struct pctrie *ptree, uint64_t index, 684 uint64_t *value[], int count) 685 { 686 return (_pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 687 NULL, PCTRIE_LOCKED)); 688 } 689 690 /* 691 * Returns the number of contiguous, non-NULL entries read into the value[] 692 * array, starting at index, without requiring an external lock. These entries 693 * *may* never have been in the pctrie all at one time, but for a series of 694 * times t0, t1, t2, ..., with ti <= t(i+1), value[i] was in the trie at time 695 * ti. 696 */ 697 int 698 pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, 699 uint64_t *value[], int count, smr_t smr) 700 { 701 int res; 702 703 smr_enter(smr); 704 res = _pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 705 smr, PCTRIE_SMR); 706 smr_exit(smr); 707 return (res); 708 } 709 710 /* 711 * Returns the number of contiguous, non-NULL entries read into the value[] 712 * array, starting at index, assuming access is externally synchronized by a 713 * lock. Uses an iterator. 714 */ 715 int 716 pctrie_iter_lookup_range(struct pctrie_iter *it, uint64_t index, 717 uint64_t *value[], int count) 718 { 719 return (_pctrie_lookup_range(it->ptree, it->node, index, value, count, 720 &it->node, NULL, PCTRIE_LOCKED)); 721 } 722 723 /* 724 * Find first leaf >= index, and fill iter with the path to the parent of that 725 * leaf. Return NULL if there is no such leaf less than limit. 726 */ 727 static __inline uint64_t * 728 _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, 729 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 730 { 731 struct pctrie_node *parent; 732 uint64_t *m; 733 int slot; 734 735 /* Seek a node that matches index. */ 736 node = _pctrie_lookup_node(ptree, node, index, &parent, 737 NULL, PCTRIE_LOCKED); 738 739 /* 740 * If no such node was found, and instead this path leads only to nodes 741 * < index, back up to find a subtrie with the least value > index. 742 */ 743 if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { 744 /* Climb the path to find a node with a descendant > index. */ 745 for (node = parent; node != NULL; node = pctrie_parent(node)) { 746 slot = pctrie_slot(node, index) + 1; 747 if ((node->pn_popmap >> slot) != 0) 748 break; 749 } 750 if (node == NULL) { 751 if (parent_out != NULL) 752 *parent_out = NULL; 753 return (NULL); 754 } 755 756 /* Step to the least child with a descendant > index. */ 757 slot += ffs(node->pn_popmap >> slot) - 1; 758 parent = node; 759 node = pctrie_node_load(&node->pn_child[slot], NULL, 760 PCTRIE_LOCKED); 761 } 762 /* Descend to the least leaf of the subtrie. */ 763 while (!pctrie_isleaf(node)) { 764 if (limit != 0 && node->pn_owner >= limit) 765 return (NULL); 766 slot = ffs(node->pn_popmap) - 1; 767 parent = node; 768 node = pctrie_node_load(&node->pn_child[slot], NULL, 769 PCTRIE_LOCKED); 770 } 771 if (parent_out != NULL) 772 *parent_out = parent; 773 m = pctrie_toval(node); 774 if (limit != 0 && *m >= limit) 775 return (NULL); 776 return (m); 777 } 778 779 uint64_t * 780 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 781 { 782 return (_pctrie_lookup_ge(ptree, NULL, index, NULL, 0)); 783 } 784 785 /* 786 * Find first leaf >= index, and fill iter with the path to the parent of that 787 * leaf. Return NULL if there is no such leaf less than limit. 788 */ 789 uint64_t * 790 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) 791 { 792 uint64_t *m; 793 794 m = _pctrie_lookup_ge(it->ptree, it->node, index, &it->node, it->limit); 795 if (m != NULL) 796 it->index = *m; 797 return (m); 798 } 799 800 /* 801 * Find the first leaf with value at least 'jump' greater than the previous 802 * leaf. Return NULL if that value is >= limit. 803 */ 804 uint64_t * 805 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) 806 { 807 uint64_t index = it->index + jump; 808 809 /* Detect jump overflow. */ 810 if ((jump > 0) != (index > it->index)) 811 return (NULL); 812 if (it->limit != 0 && index >= it->limit) 813 return (NULL); 814 return (pctrie_iter_lookup_ge(it, index)); 815 } 816 817 /* 818 * Find first leaf <= index, and fill iter with the path to the parent of that 819 * leaf. Return NULL if there is no such leaf greater than limit. 820 */ 821 static __inline uint64_t * 822 _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, 823 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 824 { 825 struct pctrie_node *parent; 826 uint64_t *m; 827 int slot; 828 829 /* Seek a node that matches index. */ 830 node = _pctrie_lookup_node(ptree, node, index, &parent, NULL, 831 PCTRIE_LOCKED); 832 833 /* 834 * If no such node was found, and instead this path leads only to nodes 835 * > index, back up to find a subtrie with the greatest value < index. 836 */ 837 if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { 838 /* Climb the path to find a node with a descendant < index. */ 839 for (node = parent; node != NULL; node = pctrie_parent(node)) { 840 slot = pctrie_slot(node, index); 841 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 842 break; 843 } 844 if (node == NULL) { 845 if (parent_out != NULL) 846 *parent_out = NULL; 847 return (NULL); 848 } 849 850 /* Step to the greatest child with a descendant < index. */ 851 slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); 852 parent = node; 853 node = pctrie_node_load(&node->pn_child[slot], NULL, 854 PCTRIE_LOCKED); 855 } 856 /* Descend to the greatest leaf of the subtrie. */ 857 while (!pctrie_isleaf(node)) { 858 if (limit != 0 && limit >= node->pn_owner + 859 ((uint64_t)PCTRIE_COUNT << node->pn_clev) - 1) 860 return (NULL); 861 slot = ilog2(node->pn_popmap); 862 parent = node; 863 node = pctrie_node_load(&node->pn_child[slot], NULL, 864 PCTRIE_LOCKED); 865 } 866 if (parent_out != NULL) 867 *parent_out = parent; 868 m = pctrie_toval(node); 869 if (limit != 0 && *m <= limit) 870 return (NULL); 871 return (m); 872 } 873 874 uint64_t * 875 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 876 { 877 return (_pctrie_lookup_le(ptree, NULL, index, NULL, 0)); 878 } 879 880 uint64_t * 881 pctrie_subtree_lookup_lt(struct pctrie *ptree, struct pctrie_node *node, 882 uint64_t index) 883 { 884 if (index == 0) 885 return (NULL); 886 return (_pctrie_lookup_le(ptree, node, index - 1, NULL, 0)); 887 } 888 889 /* 890 * Find first leaf <= index, and fill iter with the path to the parent of that 891 * leaf. Return NULL if there is no such leaf greater than limit. 892 */ 893 uint64_t * 894 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) 895 { 896 uint64_t *m; 897 898 m = _pctrie_lookup_le(it->ptree, it->node, index, &it->node, it->limit); 899 if (m != NULL) 900 it->index = *m; 901 return (m); 902 } 903 904 /* 905 * Find the first leaf with value at most 'jump' less than the previous 906 * leaf. Return NULL if that value is <= limit. 907 */ 908 uint64_t * 909 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) 910 { 911 uint64_t index = it->index - jump; 912 913 /* Detect jump overflow. */ 914 if ((jump > 0) != (index < it->index)) 915 return (NULL); 916 if (it->limit != 0 && index <= it->limit) 917 return (NULL); 918 return (pctrie_iter_lookup_le(it, index)); 919 } 920 921 /* 922 * Remove the non-NULL child identified by 'index' from the set of children of 923 * 'node'. If doing so causes 'node' to have only one child, purge it from the 924 * pctrie and save it in *freenode for later disposal. 925 */ 926 static void 927 pctrie_remove(struct pctrie *ptree, struct pctrie_node *node, uint64_t index, 928 struct pctrie_node **freenode) 929 { 930 smr_pctnode_t *parentp; 931 struct pctrie_node *child; 932 int slot; 933 934 *freenode = NULL; 935 parentp = pctrie_child(ptree, node, index); 936 if (node == NULL) { 937 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 938 return; 939 } 940 slot = pctrie_slot(node, index); 941 KASSERT((node->pn_popmap & (1 << slot)) != 0, 942 ("%s: bad popmap slot %d in node %p", 943 __func__, slot, node)); 944 node->pn_popmap ^= 1 << slot; 945 if (!powerof2(node->pn_popmap)) { 946 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 947 return; 948 } 949 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 950 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 951 slot = ffs(node->pn_popmap) - 1; 952 *freenode = node; 953 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 954 KASSERT(child != PCTRIE_NULL, 955 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 956 node = pctrie_parent(node); 957 if (!pctrie_isleaf(child)) 958 pctrie_setparent(child, node); 959 parentp = pctrie_child(ptree, node, index); 960 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 961 } 962 963 /* 964 * Remove the specified index from the tree, and return the value stored at 965 * that index. If the index is not present, return NULL. 966 */ 967 uint64_t * 968 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 969 struct pctrie_node **freenode) 970 { 971 struct pctrie_node *child, *node; 972 uint64_t *m; 973 int slot; 974 975 node = NULL; 976 child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 977 while (!pctrie_isleaf(child)) { 978 node = child; 979 slot = pctrie_slot(node, index); 980 child = pctrie_node_load(&node->pn_child[slot], NULL, 981 PCTRIE_LOCKED); 982 } 983 if ((m = pctrie_match_value(child, index)) != NULL) 984 pctrie_remove(ptree, node, index, freenode); 985 else 986 *freenode = NULL; 987 return (m); 988 } 989 990 /* 991 * Remove from the trie the leaf last chosen by the iterator, and 992 * adjust the path if it's last member is to be freed. 993 */ 994 void 995 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) 996 { 997 KASSERT(NULL != pctrie_match_value(pctrie_node_load(pctrie_child( 998 it->ptree, it->node, it->index), NULL, PCTRIE_LOCKED), it->index), 999 ("%s: removing value %jx not at iter", __func__, 1000 (uintmax_t)it->index)); 1001 pctrie_remove(it->ptree, it->node, it->index, freenode); 1002 if (*freenode != NULL) 1003 it->node = pctrie_parent(it->node); 1004 } 1005 1006 /* 1007 * Return the current leaf, assuming access is externally synchronized by a 1008 * lock. 1009 */ 1010 uint64_t * 1011 pctrie_iter_value(struct pctrie_iter *it) 1012 { 1013 struct pctrie_node *node; 1014 1015 node = pctrie_node_load(pctrie_child(it->ptree, it->node, it->index), 1016 NULL, PCTRIE_LOCKED); 1017 return (pctrie_toval(node)); 1018 } 1019 1020 /* 1021 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves, 1022 * until an interior node is stripped of all children, and returned for 1023 * deallocation, with *pnode left pointing to the parent of that node. 1024 */ 1025 static __always_inline struct pctrie_node * 1026 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 1027 pctrie_cb_t callback, int keyoff, void *arg) 1028 { 1029 struct pctrie_node *child, *node; 1030 int slot; 1031 1032 node = *pnode; 1033 while (node->pn_popmap != 0) { 1034 slot = ffs(node->pn_popmap) - 1; 1035 node->pn_popmap ^= 1 << slot; 1036 child = pctrie_node_load(&node->pn_child[slot], NULL, 1037 PCTRIE_UNSERIALIZED); 1038 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 1039 PCTRIE_UNSERIALIZED); 1040 if (pctrie_isleaf(child)) { 1041 if (callback != NULL) 1042 callback(pctrie_toptr(child, keyoff), arg); 1043 continue; 1044 } 1045 /* Climb one level down the trie. */ 1046 parent = node; 1047 node = child; 1048 } 1049 *pnode = parent; 1050 return (node); 1051 } 1052 1053 /* 1054 * Recover the node parent from its first child and continue pruning. 1055 */ 1056 static __always_inline struct pctrie_node * 1057 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 1058 pctrie_cb_t callback, int keyoff, void *arg) 1059 { 1060 if (*pnode == NULL) 1061 return (NULL); 1062 /* Climb one level up the trie. */ 1063 return (pctrie_reclaim_prune(pnode, pctrie_parent(*pnode), callback, 1064 keyoff, arg)); 1065 } 1066 1067 /* 1068 * Find the trie root, and start pruning with a NULL parent. 1069 */ 1070 static __always_inline struct pctrie_node * 1071 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 1072 struct pctrie *ptree, 1073 pctrie_cb_t callback, int keyoff, void *arg) 1074 { 1075 struct pctrie_node *node; 1076 1077 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 1078 pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1079 if (pctrie_isleaf(node)) { 1080 if (callback != NULL && node != PCTRIE_NULL) 1081 callback(pctrie_toptr(node, keyoff), arg); 1082 return (NULL); 1083 } 1084 *pnode = node; 1085 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 1086 } 1087 1088 struct pctrie_node * 1089 pctrie_reclaim_resume(struct pctrie_node **pnode) 1090 { 1091 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 1092 } 1093 1094 struct pctrie_node * 1095 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 1096 { 1097 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 1098 } 1099 1100 struct pctrie_node * 1101 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 1102 pctrie_cb_t callback, int keyoff, void *arg) 1103 { 1104 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 1105 } 1106 1107 struct pctrie_node * 1108 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 1109 pctrie_cb_t callback, int keyoff, void *arg) 1110 { 1111 return (pctrie_reclaim_begin_compound(pnode, ptree, 1112 callback, keyoff, arg)); 1113 } 1114 1115 /* 1116 * Replace an existing value in the trie with another one. 1117 * Panics if there is not an old value in the trie at the new value's index. 1118 */ 1119 uint64_t * 1120 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 1121 { 1122 struct pctrie_node *leaf, *parent, *node; 1123 uint64_t *m; 1124 uint64_t index; 1125 int slot; 1126 1127 leaf = pctrie_toleaf(newval); 1128 index = *newval; 1129 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 1130 parent = NULL; 1131 for (;;) { 1132 if (pctrie_isleaf(node)) { 1133 if ((m = pctrie_toval(node)) != NULL && *m == index) { 1134 if (parent == NULL) 1135 pctrie_node_store(pctrie_root(ptree), 1136 leaf, PCTRIE_LOCKED); 1137 else 1138 pctrie_node_store( 1139 &parent->pn_child[slot], leaf, 1140 PCTRIE_LOCKED); 1141 return (m); 1142 } 1143 break; 1144 } 1145 if (pctrie_keybarr(node, index, &slot)) 1146 break; 1147 parent = node; 1148 node = pctrie_node_load(&node->pn_child[slot], NULL, 1149 PCTRIE_LOCKED); 1150 } 1151 panic("%s: original replacing value not found", __func__); 1152 } 1153 1154 #ifdef DDB 1155 /* 1156 * Show details about the given node. 1157 */ 1158 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 1159 { 1160 struct pctrie_node *node, *tmp; 1161 int slot; 1162 pn_popmap_t popmap; 1163 1164 if (!have_addr) 1165 return; 1166 node = (struct pctrie_node *)addr; 1167 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 1168 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 1169 node->pn_clev / PCTRIE_WIDTH); 1170 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 1171 slot = ffs(popmap) - 1; 1172 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 1173 PCTRIE_UNSERIALIZED); 1174 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 1175 slot, (void *)tmp, 1176 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 1177 node->pn_clev / PCTRIE_WIDTH); 1178 } 1179 } 1180 #endif /* DDB */ 1181