1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #define PCTRIE_MASK (PCTRIE_COUNT - 1) 66 #define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) 67 68 #if PCTRIE_WIDTH == 3 69 typedef uint8_t pn_popmap_t; 70 #elif PCTRIE_WIDTH == 4 71 typedef uint16_t pn_popmap_t; 72 #elif PCTRIE_WIDTH == 5 73 typedef uint32_t pn_popmap_t; 74 #else 75 #error Unsupported width 76 #endif 77 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 78 "pn_popmap_t too wide"); 79 80 struct pctrie_node; 81 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 82 83 struct pctrie_node { 84 uint64_t pn_owner; /* Owner of record. */ 85 pn_popmap_t pn_popmap; /* Valid children. */ 86 uint8_t pn_clev; /* Level * WIDTH. */ 87 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 88 }; 89 90 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 91 92 static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, 93 enum pctrie_access access); 94 95 /* 96 * Map index to an array position for the children of node, 97 */ 98 static __inline int 99 pctrie_slot(struct pctrie_node *node, uint64_t index) 100 { 101 return ((index >> node->pn_clev) & PCTRIE_MASK); 102 } 103 104 /* 105 * Returns true if index does not belong to the specified node. Otherwise, 106 * sets slot value, and returns false. 107 */ 108 static __inline bool 109 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 110 { 111 index = (index - node->pn_owner) >> node->pn_clev; 112 if (index >= PCTRIE_COUNT) 113 return (true); 114 *slot = index; 115 return (false); 116 } 117 118 /* 119 * Check radix node. 120 */ 121 static __inline void 122 pctrie_node_put(struct pctrie_node *node) 123 { 124 #ifdef INVARIANTS 125 int slot; 126 127 KASSERT(powerof2(node->pn_popmap), 128 ("pctrie_node_put: node %p has too many children %04x", node, 129 node->pn_popmap)); 130 for (slot = 0; slot < PCTRIE_COUNT; slot++) { 131 if ((node->pn_popmap & (1 << slot)) != 0) 132 continue; 133 KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == 134 PCTRIE_NULL, 135 ("pctrie_node_put: node %p has a child", node)); 136 } 137 #endif 138 } 139 140 /* 141 * Fetch a node pointer from a slot. 142 */ 143 static __inline struct pctrie_node * 144 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 145 { 146 switch (access) { 147 case PCTRIE_UNSERIALIZED: 148 return (smr_unserialized_load(p, true)); 149 case PCTRIE_LOCKED: 150 return (smr_serialized_load(p, true)); 151 case PCTRIE_SMR: 152 return (smr_entered_load(p, smr)); 153 } 154 __assert_unreachable(); 155 } 156 157 static __inline void 158 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 159 { 160 switch (access) { 161 case PCTRIE_UNSERIALIZED: 162 smr_unserialized_store(p, v, true); 163 break; 164 case PCTRIE_LOCKED: 165 smr_serialized_store(p, v, true); 166 break; 167 case PCTRIE_SMR: 168 panic("%s: Not supported in SMR section.", __func__); 169 break; 170 default: 171 __assert_unreachable(); 172 break; 173 } 174 } 175 176 /* 177 * Get the root node for a tree. 178 */ 179 static __inline struct pctrie_node * 180 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 181 { 182 return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access)); 183 } 184 185 /* 186 * Set the root node for a tree. 187 */ 188 static __inline void 189 pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node, 190 enum pctrie_access access) 191 { 192 pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access); 193 } 194 195 /* 196 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 197 */ 198 static __inline bool 199 pctrie_isleaf(struct pctrie_node *node) 200 { 201 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 202 } 203 204 /* 205 * Returns val with leaf bit set. 206 */ 207 static __inline void * 208 pctrie_toleaf(uint64_t *val) 209 { 210 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 211 } 212 213 /* 214 * Returns the associated val extracted from node. 215 */ 216 static __inline uint64_t * 217 pctrie_toval(struct pctrie_node *node) 218 { 219 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 220 } 221 222 /* 223 * Returns the associated pointer extracted from node and field offset. 224 */ 225 static __inline void * 226 pctrie_toptr(struct pctrie_node *node, int keyoff) 227 { 228 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 enum pctrie_insert_neighbor_mode { 271 PCTRIE_INSERT_NEIGHBOR_NONE, 272 PCTRIE_INSERT_NEIGHBOR_LT, 273 PCTRIE_INSERT_NEIGHBOR_GT, 274 }; 275 276 /* 277 * Look for where to insert the key-value pair into the trie. Complete the 278 * insertion if it replaces a null leaf. Return the insertion location if the 279 * insertion needs to be completed by the caller; otherwise return NULL. 280 * 281 * If the key is already present in the trie, populate *found_out as if by 282 * pctrie_lookup(). 283 * 284 * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set 285 * *neighbor_out to the lowest level node we encounter during the insert lookup 286 * that is a parent of the next greater or lesser entry. The value is not 287 * defined if the key was already present in the trie. 288 * 289 * Note that mode is expected to be a compile-time constant, and this procedure 290 * is expected to be inlined into callers with extraneous code optimized out. 291 */ 292 static __always_inline void * 293 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val, 294 uint64_t **found_out, struct pctrie_node **neighbor_out, 295 enum pctrie_insert_neighbor_mode mode) 296 { 297 uint64_t index; 298 struct pctrie_node *node, *parent; 299 int slot; 300 301 index = *val; 302 303 /* 304 * The owner of record for root is not really important because it 305 * will never be used. 306 */ 307 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 308 parent = NULL; 309 for (;;) { 310 if (pctrie_isleaf(node)) { 311 if (node == PCTRIE_NULL) { 312 if (parent == NULL) 313 ptree->pt_root = pctrie_toleaf(val); 314 else 315 pctrie_addnode(parent, index, 316 pctrie_toleaf(val), PCTRIE_LOCKED); 317 return (NULL); 318 } 319 if (*pctrie_toval(node) == index) { 320 *found_out = pctrie_toval(node); 321 return (NULL); 322 } 323 break; 324 } 325 if (pctrie_keybarr(node, index, &slot)) 326 break; 327 /* 328 * Descend. If we're tracking the next neighbor and this node 329 * contains a neighboring entry in the right direction, record 330 * it. 331 */ 332 if (mode == PCTRIE_INSERT_NEIGHBOR_LT) { 333 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 334 *neighbor_out = node; 335 } else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) { 336 if ((node->pn_popmap >> slot) > 1) 337 *neighbor_out = node; 338 } 339 parent = node; 340 node = pctrie_node_load(&node->pn_child[slot], NULL, 341 PCTRIE_LOCKED); 342 } 343 344 /* 345 * The caller will split this node. If we're tracking the next 346 * neighbor, record the old node if the old entry is in the right 347 * direction. 348 */ 349 if (mode == PCTRIE_INSERT_NEIGHBOR_LT) { 350 if (*pctrie_toval(node) < index) 351 *neighbor_out = node; 352 } else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) { 353 if (*pctrie_toval(node) > index) 354 *neighbor_out = node; 355 } 356 357 /* 358 * 'node' must be replaced in the tree with a new branch node, with 359 * children 'node' and 'val'. Return the place that points to 'node' 360 * now, and will point to to the new branching node later. 361 */ 362 return ((parent != NULL) ? &parent->pn_child[slot]: 363 (smr_pctnode_t *)&ptree->pt_root); 364 } 365 366 /* 367 * Wrap pctrie_insert_lookup_compound to implement a strict insertion. Panic 368 * if the key already exists, and do not look for neighboring entries. 369 */ 370 void * 371 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val) 372 { 373 void *parentp; 374 uint64_t *found; 375 376 found = NULL; 377 parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL, 378 PCTRIE_INSERT_NEIGHBOR_NONE); 379 if (__predict_false(found != NULL)) 380 panic("%s: key %jx is already present", __func__, 381 (uintmax_t)*val); 382 return (parentp); 383 } 384 385 /* 386 * Wrap pctrie_insert_lookup_compound to implement find-or-insert. Do not look 387 * for neighboring entries. 388 */ 389 void * 390 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 391 uint64_t **found_out) 392 { 393 *found_out = NULL; 394 return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL, 395 PCTRIE_INSERT_NEIGHBOR_NONE)); 396 } 397 398 /* 399 * Wrap pctrie_insert_lookup_compound to implement find or insert and find next 400 * greater entry. Find a subtree that contains the next entry greater than the 401 * newly-inserted or to-be-inserted entry. 402 */ 403 void * 404 pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val, 405 uint64_t **found_out, struct pctrie_node **neighbor_out) 406 { 407 *found_out = NULL; 408 *neighbor_out = NULL; 409 return (pctrie_insert_lookup_compound(ptree, val, found_out, 410 neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT)); 411 } 412 413 /* 414 * Wrap pctrie_insert_lookup_compound to implement find or insert and find next 415 * lesser entry. Find a subtree that contains the next entry less than the 416 * newly-inserted or to-be-inserted entry. 417 */ 418 void * 419 pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val, 420 uint64_t **found_out, struct pctrie_node **neighbor_out) 421 { 422 *found_out = NULL; 423 *neighbor_out = NULL; 424 return (pctrie_insert_lookup_compound(ptree, val, found_out, 425 neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT)); 426 } 427 428 /* 429 * Uses new node to insert key-value pair into the trie at given location. 430 */ 431 void 432 pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val) 433 { 434 struct pctrie_node *node; 435 uint64_t index, newind; 436 437 /* 438 * Clear the last child pointer of the newly allocated parent. We want 439 * to clear it after the final section has exited so lookup can not 440 * return false negatives. It is done here because it will be 441 * cache-cold in the dtor callback. 442 */ 443 if (parent->pn_popmap != 0) { 444 pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1], 445 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 446 parent->pn_popmap = 0; 447 } 448 449 /* 450 * Recover the values of the two children of the new parent node. If 451 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 452 * which must be first in the node. 453 */ 454 index = *val; 455 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 456 newind = *pctrie_toval(node); 457 458 /* 459 * From the highest-order bit where the indexes differ, 460 * compute the highest level in the trie where they differ. Then, 461 * compute the least index of this subtrie. 462 */ 463 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 464 "uint64 too wide"); 465 _Static_assert(sizeof(uint64_t) * NBBY <= 466 (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow"); 467 parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 468 parent->pn_owner = PCTRIE_COUNT; 469 parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev); 470 471 472 /* These writes are not yet visible due to ordering. */ 473 pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 474 pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED); 475 /* Synchronize to make the above visible. */ 476 pctrie_node_store(parentp, parent, PCTRIE_LOCKED); 477 } 478 479 /* 480 * Returns the value stored at the index. If the index is not present, 481 * NULL is returned. 482 */ 483 static __always_inline uint64_t * 484 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 485 enum pctrie_access access) 486 { 487 struct pctrie_node *node; 488 uint64_t *m; 489 int slot; 490 491 node = pctrie_root_load(ptree, smr, access); 492 for (;;) { 493 if (pctrie_isleaf(node)) { 494 if ((m = pctrie_toval(node)) != NULL && *m == index) 495 return (m); 496 break; 497 } 498 if (pctrie_keybarr(node, index, &slot)) 499 break; 500 node = pctrie_node_load(&node->pn_child[slot], smr, access); 501 } 502 return (NULL); 503 } 504 505 /* 506 * Returns the value stored at the index, assuming access is externally 507 * synchronized by a lock. 508 * 509 * If the index is not present, NULL is returned. 510 */ 511 uint64_t * 512 pctrie_lookup(struct pctrie *ptree, uint64_t index) 513 { 514 return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 515 } 516 517 /* 518 * Returns the value stored at the index without requiring an external lock. 519 * 520 * If the index is not present, NULL is returned. 521 */ 522 uint64_t * 523 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 524 { 525 uint64_t *res; 526 527 smr_enter(smr); 528 res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 529 smr_exit(smr); 530 return (res); 531 } 532 533 /* 534 * Returns the value with the least index that is greater than or equal to the 535 * specified index, or NULL if there are no such values. 536 * 537 * Requires that access be externally synchronized by a lock. 538 */ 539 static __inline uint64_t * 540 pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index) 541 { 542 struct pctrie_node *succ; 543 uint64_t *m; 544 int slot; 545 546 /* 547 * Descend the trie as if performing an ordinary lookup for the 548 * specified value. However, unlike an ordinary lookup, as we descend 549 * the trie, we use "succ" to remember the last branching-off point, 550 * that is, the interior node under which the least value that is both 551 * outside our current path down the trie and greater than the specified 552 * index resides. (The node's popmap makes it fast and easy to 553 * recognize a branching-off point.) If our ordinary lookup fails to 554 * yield a value that is greater than or equal to the specified index, 555 * then we will exit this loop and perform a lookup starting from 556 * "succ". If "succ" is not NULL, then that lookup is guaranteed to 557 * succeed. 558 */ 559 succ = NULL; 560 for (;;) { 561 if (pctrie_isleaf(node)) { 562 if ((m = pctrie_toval(node)) != NULL && *m >= index) 563 return (m); 564 break; 565 } 566 if (pctrie_keybarr(node, index, &slot)) { 567 /* 568 * If all values in this subtree are > index, then the 569 * least value in this subtree is the answer. 570 */ 571 if (node->pn_owner > index) 572 succ = node; 573 break; 574 } 575 576 /* 577 * Just in case the next search step leads to a subtree of all 578 * values < index, check popmap to see if a next bigger step, to 579 * a subtree of all pages with values > index, is available. If 580 * so, remember to restart the search here. 581 */ 582 if ((node->pn_popmap >> slot) > 1) 583 succ = node; 584 node = pctrie_node_load(&node->pn_child[slot], NULL, 585 PCTRIE_LOCKED); 586 } 587 588 /* 589 * Restart the search from the last place visited in the subtree that 590 * included some values > index, if there was such a place. 591 */ 592 if (succ == NULL) 593 return (NULL); 594 if (succ != node) { 595 /* 596 * Take a step to the next bigger sibling of the node chosen 597 * last time. In that subtree, all values > index. 598 */ 599 slot = pctrie_slot(succ, index) + 1; 600 KASSERT((succ->pn_popmap >> slot) != 0, 601 ("%s: no popmap siblings past slot %d in node %p", 602 __func__, slot, succ)); 603 slot += ffs(succ->pn_popmap >> slot) - 1; 604 succ = pctrie_node_load(&succ->pn_child[slot], NULL, 605 PCTRIE_LOCKED); 606 } 607 608 /* 609 * Find the value in the subtree rooted at "succ" with the least index. 610 */ 611 while (!pctrie_isleaf(succ)) { 612 KASSERT(succ->pn_popmap != 0, 613 ("%s: no popmap children in node %p", __func__, succ)); 614 slot = ffs(succ->pn_popmap) - 1; 615 succ = pctrie_node_load(&succ->pn_child[slot], NULL, 616 PCTRIE_LOCKED); 617 } 618 return (pctrie_toval(succ)); 619 } 620 621 uint64_t * 622 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 623 { 624 return (pctrie_lookup_ge_node( 625 pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index)); 626 } 627 628 uint64_t * 629 pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index) 630 { 631 if (node == NULL || index + 1 == 0) 632 return (NULL); 633 return (pctrie_lookup_ge_node(node, index + 1)); 634 } 635 636 #ifdef INVARIANTS 637 void 638 pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index, 639 struct pctrie *ptree, uint64_t *res) 640 { 641 uint64_t *expected; 642 643 if (index + 1 == 0) 644 expected = NULL; 645 else 646 expected = pctrie_lookup_ge(ptree, index + 1); 647 KASSERT(res == expected, 648 ("pctrie subtree lookup gt result different from root lookup: " 649 "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree, 650 (uintmax_t)index, node, res, expected)); 651 } 652 #endif 653 654 /* 655 * Returns the value with the greatest index that is less than or equal to the 656 * specified index, or NULL if there are no such values. 657 * 658 * Requires that access be externally synchronized by a lock. 659 */ 660 static __inline uint64_t * 661 pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index) 662 { 663 struct pctrie_node *pred; 664 uint64_t *m; 665 int slot; 666 667 /* 668 * Mirror the implementation of pctrie_lookup_ge_node, described above. 669 */ 670 pred = NULL; 671 for (;;) { 672 if (pctrie_isleaf(node)) { 673 if ((m = pctrie_toval(node)) != NULL && *m <= index) 674 return (m); 675 break; 676 } 677 if (pctrie_keybarr(node, index, &slot)) { 678 if (node->pn_owner < index) 679 pred = node; 680 break; 681 } 682 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 683 pred = node; 684 node = pctrie_node_load(&node->pn_child[slot], NULL, 685 PCTRIE_LOCKED); 686 } 687 if (pred == NULL) 688 return (NULL); 689 if (pred != node) { 690 slot = pctrie_slot(pred, index); 691 KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, 692 ("%s: no popmap siblings before slot %d in node %p", 693 __func__, slot, pred)); 694 slot = ilog2(pred->pn_popmap & ((1 << slot) - 1)); 695 pred = pctrie_node_load(&pred->pn_child[slot], NULL, 696 PCTRIE_LOCKED); 697 } 698 while (!pctrie_isleaf(pred)) { 699 KASSERT(pred->pn_popmap != 0, 700 ("%s: no popmap children in node %p", __func__, pred)); 701 slot = ilog2(pred->pn_popmap); 702 pred = pctrie_node_load(&pred->pn_child[slot], NULL, 703 PCTRIE_LOCKED); 704 } 705 return (pctrie_toval(pred)); 706 } 707 708 uint64_t * 709 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 710 { 711 return (pctrie_lookup_le_node( 712 pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index)); 713 } 714 715 uint64_t * 716 pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index) 717 { 718 if (node == NULL || index == 0) 719 return (NULL); 720 return (pctrie_lookup_le_node(node, index - 1)); 721 } 722 723 #ifdef INVARIANTS 724 void 725 pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index, 726 struct pctrie *ptree, uint64_t *res) 727 { 728 uint64_t *expected; 729 730 if (index == 0) 731 expected = NULL; 732 else 733 expected = pctrie_lookup_le(ptree, index - 1); 734 KASSERT(res == expected, 735 ("pctrie subtree lookup lt result different from root lookup: " 736 "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree, 737 (uintmax_t)index, node, res, expected)); 738 } 739 #endif 740 741 /* 742 * Remove the specified index from the tree, and return the value stored at 743 * that index. If the index is not present, return NULL. 744 */ 745 uint64_t * 746 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 747 struct pctrie_node **freenode) 748 { 749 struct pctrie_node *child, *node, *parent; 750 uint64_t *m; 751 int slot; 752 753 *freenode = node = NULL; 754 child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 755 for (;;) { 756 if (pctrie_isleaf(child)) 757 break; 758 parent = node; 759 node = child; 760 slot = pctrie_slot(node, index); 761 child = pctrie_node_load(&node->pn_child[slot], NULL, 762 PCTRIE_LOCKED); 763 } 764 if ((m = pctrie_toval(child)) == NULL || *m != index) 765 return (NULL); 766 if (node == NULL) { 767 pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); 768 return (m); 769 } 770 KASSERT((node->pn_popmap & (1 << slot)) != 0, 771 ("%s: bad popmap slot %d in node %p", 772 __func__, slot, node)); 773 node->pn_popmap ^= 1 << slot; 774 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); 775 if (!powerof2(node->pn_popmap)) 776 return (m); 777 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 778 slot = ffs(node->pn_popmap) - 1; 779 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 780 KASSERT(child != PCTRIE_NULL, 781 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 782 if (parent == NULL) 783 pctrie_root_store(ptree, child, PCTRIE_LOCKED); 784 else { 785 slot = pctrie_slot(parent, index); 786 KASSERT(node == 787 pctrie_node_load(&parent->pn_child[slot], NULL, 788 PCTRIE_LOCKED), ("%s: invalid child value", __func__)); 789 pctrie_node_store(&parent->pn_child[slot], child, 790 PCTRIE_LOCKED); 791 } 792 /* 793 * The child is still valid and we can not zero the 794 * pointer until all SMR references are gone. 795 */ 796 pctrie_node_put(node); 797 *freenode = node; 798 return (m); 799 } 800 801 /* 802 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and 803 * using the leftmost child pointer for path reversal, until an interior node 804 * is stripped of all children, and returned for deallocation, with *pnode left 805 * pointing to the parent of that node. 806 */ 807 static __always_inline struct pctrie_node * 808 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 809 pctrie_cb_t callback, int keyoff, void *arg) 810 { 811 struct pctrie_node *child, *node; 812 int slot; 813 814 node = *pnode; 815 while (node->pn_popmap != 0) { 816 slot = ffs(node->pn_popmap) - 1; 817 node->pn_popmap ^= 1 << slot; 818 child = pctrie_node_load(&node->pn_child[slot], NULL, 819 PCTRIE_UNSERIALIZED); 820 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 821 PCTRIE_UNSERIALIZED); 822 if (pctrie_isleaf(child)) { 823 if (callback != NULL) 824 callback(pctrie_toptr(child, keyoff), arg); 825 continue; 826 } 827 /* Climb one level down the trie. */ 828 pctrie_node_store(&node->pn_child[0], parent, 829 PCTRIE_UNSERIALIZED); 830 parent = node; 831 node = child; 832 } 833 *pnode = parent; 834 return (node); 835 } 836 837 /* 838 * Recover the node parent from its first child and continue pruning. 839 */ 840 static __always_inline struct pctrie_node * 841 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 842 pctrie_cb_t callback, int keyoff, void *arg) 843 { 844 struct pctrie_node *parent, *node; 845 846 node = *pnode; 847 if (node == NULL) 848 return (NULL); 849 /* Climb one level up the trie. */ 850 parent = pctrie_node_load(&node->pn_child[0], NULL, 851 PCTRIE_UNSERIALIZED); 852 pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED); 853 return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg)); 854 } 855 856 /* 857 * Find the trie root, and start pruning with a NULL parent. 858 */ 859 static __always_inline struct pctrie_node * 860 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 861 struct pctrie *ptree, 862 pctrie_cb_t callback, int keyoff, void *arg) 863 { 864 struct pctrie_node *node; 865 866 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 867 pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 868 if (pctrie_isleaf(node)) { 869 if (callback != NULL && node != PCTRIE_NULL) 870 callback(pctrie_toptr(node, keyoff), arg); 871 return (NULL); 872 } 873 *pnode = node; 874 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 875 } 876 877 struct pctrie_node * 878 pctrie_reclaim_resume(struct pctrie_node **pnode) 879 { 880 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 881 } 882 883 struct pctrie_node * 884 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 885 { 886 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 887 } 888 889 struct pctrie_node * 890 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 891 pctrie_cb_t callback, int keyoff, void *arg) 892 { 893 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 894 } 895 896 struct pctrie_node * 897 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 898 pctrie_cb_t callback, int keyoff, void *arg) 899 { 900 return (pctrie_reclaim_begin_compound(pnode, ptree, 901 callback, keyoff, arg)); 902 } 903 904 /* 905 * Replace an existing value in the trie with another one. 906 * Panics if there is not an old value in the trie at the new value's index. 907 */ 908 uint64_t * 909 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 910 { 911 struct pctrie_node *leaf, *parent, *node; 912 uint64_t *m; 913 uint64_t index; 914 int slot; 915 916 leaf = pctrie_toleaf(newval); 917 index = *newval; 918 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 919 parent = NULL; 920 for (;;) { 921 if (pctrie_isleaf(node)) { 922 if ((m = pctrie_toval(node)) != NULL && *m == index) { 923 if (parent == NULL) 924 ptree->pt_root = leaf; 925 else 926 pctrie_node_store( 927 &parent->pn_child[slot], leaf, 928 PCTRIE_LOCKED); 929 return (m); 930 } 931 break; 932 } 933 if (pctrie_keybarr(node, index, &slot)) 934 break; 935 parent = node; 936 node = pctrie_node_load(&node->pn_child[slot], NULL, 937 PCTRIE_LOCKED); 938 } 939 panic("%s: original replacing value not found", __func__); 940 } 941 942 #ifdef DDB 943 /* 944 * Show details about the given node. 945 */ 946 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 947 { 948 struct pctrie_node *node, *tmp; 949 int slot; 950 pn_popmap_t popmap; 951 952 if (!have_addr) 953 return; 954 node = (struct pctrie_node *)addr; 955 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 956 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 957 node->pn_clev / PCTRIE_WIDTH); 958 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 959 slot = ffs(popmap) - 1; 960 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 961 PCTRIE_UNSERIALIZED); 962 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 963 slot, (void *)tmp, 964 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 965 node->pn_clev / PCTRIE_WIDTH); 966 } 967 } 968 #endif /* DDB */ 969