1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #if PCTRIE_WIDTH == 3 66 typedef uint8_t pn_popmap_t; 67 #elif PCTRIE_WIDTH == 4 68 typedef uint16_t pn_popmap_t; 69 #elif PCTRIE_WIDTH == 5 70 typedef uint32_t pn_popmap_t; 71 #else 72 #error Unsupported width 73 #endif 74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 75 "pn_popmap_t too wide"); 76 77 struct pctrie_node; 78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 79 80 struct pctrie_node { 81 uint64_t pn_owner; /* Owner of record. */ 82 pn_popmap_t pn_popmap; /* Valid children. */ 83 uint8_t pn_clev; /* Level * WIDTH. */ 84 smr_pctnode_t pn_parent; /* Parent node. */ 85 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 86 }; 87 88 /* 89 * Map index to an array position for the children of node, 90 */ 91 static __inline int 92 pctrie_slot(struct pctrie_node *node, uint64_t index) 93 { 94 return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); 95 } 96 97 /* 98 * Returns true if index does not belong to the specified node. Otherwise, 99 * sets slot value, and returns false. 100 */ 101 static __inline bool 102 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 103 { 104 index = (index - node->pn_owner) >> node->pn_clev; 105 if (index >= PCTRIE_COUNT) 106 return (true); 107 *slot = index; 108 return (false); 109 } 110 111 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 112 113 /* 114 * Fetch a node pointer from a slot. 115 */ 116 static __inline struct pctrie_node * 117 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 118 { 119 switch (access) { 120 case PCTRIE_UNSERIALIZED: 121 return (smr_unserialized_load(p, true)); 122 case PCTRIE_LOCKED: 123 return (smr_serialized_load(p, true)); 124 case PCTRIE_SMR: 125 return (smr_entered_load(p, smr)); 126 } 127 __assert_unreachable(); 128 } 129 130 static __inline void 131 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 132 { 133 switch (access) { 134 case PCTRIE_UNSERIALIZED: 135 smr_unserialized_store(p, v, true); 136 break; 137 case PCTRIE_LOCKED: 138 smr_serialized_store(p, v, true); 139 break; 140 case PCTRIE_SMR: 141 panic("%s: Not supported in SMR section.", __func__); 142 break; 143 default: 144 __assert_unreachable(); 145 break; 146 } 147 } 148 149 /* 150 * Get the root address, cast to proper type for load/store. 151 */ 152 static __inline smr_pctnode_t * 153 pctrie_root(struct pctrie *ptree) 154 { 155 return ((smr_pctnode_t *)&ptree->pt_root); 156 } 157 158 /* 159 * Get the root node for a tree. 160 */ 161 static __inline struct pctrie_node * 162 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 163 { 164 return (pctrie_node_load(pctrie_root(ptree), smr, access)); 165 } 166 167 /* 168 * Get the child of a node. 169 */ 170 static __inline smr_pctnode_t * 171 pctrie_child(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 172 { 173 return (node == NULL ? pctrie_root(ptree) : 174 &node->pn_child[pctrie_slot(node, index)]); 175 } 176 177 /* 178 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 179 */ 180 static __inline bool 181 pctrie_isleaf(struct pctrie_node *node) 182 { 183 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184 } 185 186 /* 187 * Returns val with leaf bit set. 188 */ 189 static __inline void * 190 pctrie_toleaf(uint64_t *val) 191 { 192 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 193 } 194 195 /* 196 * Returns the associated val extracted from node. 197 */ 198 static __inline uint64_t * 199 pctrie_toval(struct pctrie_node *node) 200 { 201 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 202 } 203 204 /* 205 * Returns the associated pointer extracted from node and field offset. 206 */ 207 static __inline void * 208 pctrie_toptr(struct pctrie_node *node, int keyoff) 209 { 210 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 211 } 212 213 /* 214 * Make 'parent' a parent of 'child'. 215 */ 216 static __inline void 217 pctrie_setparent(struct pctrie_node *child, struct pctrie_node *parent) 218 { 219 pctrie_node_store(&child->pn_parent, parent, PCTRIE_UNSERIALIZED); 220 } 221 222 /* 223 * Return the parent of 'node'. 224 */ 225 static __inline struct pctrie_node * 226 pctrie_parent(struct pctrie_node *node) 227 { 228 return (pctrie_node_load(&node->pn_parent, NULL, PCTRIE_UNSERIALIZED)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 /* 271 * Return the value associated with the node, if the node is a leaf that matches 272 * the index; otherwise NULL. 273 */ 274 static __always_inline uint64_t * 275 pctrie_match_value(struct pctrie_node *node, uint64_t index) 276 { 277 uint64_t *m; 278 279 if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || 280 *m != index) 281 m = NULL; 282 return (m); 283 } 284 285 /* 286 * Returns the last node examined in the search for the index, and sets the 287 * parent of that node. 288 */ 289 static __always_inline struct pctrie_node * 290 _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, 291 uint64_t index, struct pctrie_node **parent_out, 292 smr_t smr, enum pctrie_access access) 293 { 294 struct pctrie_node *parent; 295 int slot; 296 297 parent = node; 298 if (parent == NULL) 299 node = pctrie_root_load(ptree, smr, access); 300 301 /* 302 * Climb the search path to find the lowest node from which to start the 303 * search for a value matching 'index'. 304 */ 305 while (parent != NULL) { 306 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 307 ("%s: freed node in iter path", __func__)); 308 node = parent; 309 if (!pctrie_keybarr(node, index, &slot)) 310 break; 311 parent = pctrie_parent(node); 312 } 313 314 /* Seek a node that matches index. */ 315 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { 316 parent = node; 317 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 318 ("%s: freed node in iter path", __func__)); 319 node = pctrie_node_load(&node->pn_child[slot], smr, access); 320 } 321 *parent_out = parent; 322 return (node); 323 } 324 325 /* 326 * Returns the value stored at the index, assuming access is externally 327 * synchronized by a lock. 328 * 329 * If the index is not present, NULL is returned. 330 */ 331 uint64_t * 332 pctrie_lookup(struct pctrie *ptree, uint64_t index) 333 { 334 struct pctrie_node *node, *parent; 335 336 node = _pctrie_lookup_node(ptree, NULL, index, &parent, NULL, 337 PCTRIE_LOCKED); 338 return (pctrie_match_value(node, index)); 339 } 340 341 /* 342 * Returns the value stored at the index without requiring an external lock. 343 * 344 * If the index is not present, NULL is returned. 345 */ 346 uint64_t * 347 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 348 { 349 struct pctrie_node *node, *parent; 350 uint64_t *res; 351 352 smr_enter(smr); 353 node = _pctrie_lookup_node(ptree, NULL, index, &parent, smr, 354 PCTRIE_SMR); 355 res = pctrie_match_value(node, index); 356 smr_exit(smr); 357 return (res); 358 } 359 360 /* 361 * Returns the value stored at a given index value, possibly NULL, assuming 362 * access is externally synchronized by a lock. 363 */ 364 uint64_t * 365 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) 366 { 367 struct pctrie_node *node; 368 369 node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, 370 NULL, PCTRIE_LOCKED); 371 it->index = index; 372 return (pctrie_match_value(node, index)); 373 } 374 375 /* 376 * Look for where to insert the key-value pair into the trie. Complete the 377 * insertion if it replaces a null leaf. Return the insertion location if the 378 * insertion needs to be completed by the caller; otherwise return NULL. 379 * 380 * If the key is already present in the trie, populate *found_out as if by 381 * pctrie_lookup(). 382 */ 383 static __always_inline void * 384 _pctrie_insert_lookup(struct pctrie *ptree, struct pctrie_node *parent, 385 uint64_t *val, struct pctrie_node **parent_out, uint64_t **found_out) 386 { 387 struct pctrie_node *node; 388 389 node = _pctrie_lookup_node(ptree, parent, *val, parent_out, NULL, 390 PCTRIE_LOCKED); 391 *found_out = NULL; 392 if (node == PCTRIE_NULL) { 393 if (*parent_out == NULL) 394 pctrie_node_store(pctrie_root(ptree), 395 pctrie_toleaf(val), PCTRIE_LOCKED); 396 else 397 pctrie_addnode(*parent_out, *val, 398 pctrie_toleaf(val), PCTRIE_LOCKED); 399 return (NULL); 400 } 401 if (__predict_false(pctrie_match_value(node, *val) != NULL)) { 402 *found_out = pctrie_toval(node); 403 return (NULL); 404 } 405 406 /* 407 * 'node' must be replaced in the tree with a new branch node, with 408 * children 'node' and 'val'. Return the place that points to 'node' 409 * now, and will point to to the new branching node later. 410 */ 411 return (pctrie_child(ptree, *parent_out, *val)); 412 } 413 414 /* 415 * Wrap _pctrie_insert_lookup to implement a strict insertion. Panic 416 * if the key already exists, and do not look for neighboring entries. 417 */ 418 void * 419 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val, 420 struct pctrie_node **parent_out) 421 { 422 void *parentp; 423 uint64_t *found; 424 425 parentp = _pctrie_insert_lookup(ptree, NULL, val, parent_out, &found); 426 if (__predict_false(found != NULL)) 427 panic("%s: key %jx is already present", __func__, 428 (uintmax_t)*val); 429 return (parentp); 430 } 431 432 /* 433 * Wrap _pctrie_insert_lookup to implement find-or-insert. Do not look 434 * for neighboring entries. 435 */ 436 void * 437 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 438 struct pctrie_node **parent_out, uint64_t **found_out) 439 { 440 return (_pctrie_insert_lookup(ptree, NULL, val, parent_out, found_out)); 441 } 442 443 /* 444 * Insert the val in the trie, starting search with iterator. Return a pointer 445 * to indicate where a new node must be allocated to complete insertion. 446 * Assumes access is externally synchronized by a lock. 447 */ 448 void * 449 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) 450 { 451 void *res; 452 uint64_t *found; 453 454 it->index = *val; 455 res = _pctrie_insert_lookup(it->ptree, it->node, val, &it->node, 456 &found); 457 if (__predict_false(found != NULL)) 458 panic("%s: key %jx is already present", __func__, 459 (uintmax_t)it->index); 460 return (res); 461 } 462 463 /* 464 * Inserts newly allocated node 'child' into trie at location 'parentp', with 465 * parent 'parent' and two children, 'val' and whatever non-NULL node or leaf 466 * was at 'parentp' to begin with. 467 */ 468 void 469 pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, void *parentp, 470 struct pctrie_node *child) 471 { 472 struct pctrie_node *node; 473 uint64_t index, newind; 474 475 /* 476 * Clear the last child pointer of the newly allocated child. We want 477 * to clear it after the final section has exited so lookup can not 478 * return false negatives. It is done here because it will be 479 * cache-cold in the dtor callback. 480 */ 481 if (child->pn_popmap != 0) { 482 pctrie_node_store(&child->pn_child[ffs(child->pn_popmap) - 1], 483 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 484 child->pn_popmap = 0; 485 } 486 487 /* 488 * Recover the values of the two children of the new child node. If 489 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 490 * which must be first in the node. 491 */ 492 index = *val; 493 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 494 pctrie_setparent(child, parent); 495 if (!pctrie_isleaf(node)) 496 pctrie_setparent(node, child); 497 newind = *pctrie_toval(node); 498 499 /* 500 * From the highest-order bit where the indexes differ, 501 * compute the highest level in the trie where they differ. Then, 502 * compute the least index of this subtrie. 503 */ 504 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 505 "uint64 too wide"); 506 _Static_assert(sizeof(uint64_t) * NBBY <= 507 (1 << (sizeof(child->pn_clev) * NBBY)), "pn_clev too narrow"); 508 child->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 509 child->pn_owner = PCTRIE_COUNT; 510 child->pn_owner = index & -(child->pn_owner << child->pn_clev); 511 512 513 /* These writes are not yet visible due to ordering. */ 514 pctrie_addnode(child, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 515 pctrie_addnode(child, newind, node, PCTRIE_UNSERIALIZED); 516 /* Synchronize to make the above visible. */ 517 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 518 } 519 520 /* 521 * Returns the value stored at a fixed offset from the current index value, 522 * possibly NULL. 523 */ 524 uint64_t * 525 pctrie_iter_stride(struct pctrie_iter *it, int stride) 526 { 527 uint64_t index = it->index + stride; 528 529 /* Detect stride overflow. */ 530 if ((stride > 0) != (index > it->index)) 531 return (NULL); 532 /* Detect crossing limit */ 533 if ((index < it->limit) != (it->index < it->limit)) 534 return (NULL); 535 536 return (pctrie_iter_lookup(it, index)); 537 } 538 539 /* 540 * Returns the value stored at one more than the current index value, possibly 541 * NULL, assuming access is externally synchronized by a lock. 542 */ 543 uint64_t * 544 pctrie_iter_next(struct pctrie_iter *it) 545 { 546 return (pctrie_iter_stride(it, 1)); 547 } 548 549 /* 550 * Returns the value stored at one less than the current index value, possibly 551 * NULL, assuming access is externally synchronized by a lock. 552 */ 553 uint64_t * 554 pctrie_iter_prev(struct pctrie_iter *it) 555 { 556 return (pctrie_iter_stride(it, -1)); 557 } 558 559 /* 560 * Returns the number of contiguous, non-NULL entries read into the value[] 561 * array, starting at index. 562 */ 563 static __always_inline int 564 _pctrie_lookup_range(struct pctrie *ptree, struct pctrie_node *node, 565 uint64_t index, uint64_t *value[], int count, 566 struct pctrie_node **parent_out, smr_t smr, enum pctrie_access access) 567 { 568 struct pctrie_node *parent; 569 uint64_t *val; 570 int base, end, i; 571 572 parent = node; 573 for (i = 0; i < count;) { 574 node = _pctrie_lookup_node(ptree, parent, index + i, &parent, 575 smr, access); 576 if ((val = pctrie_match_value(node, index + i)) == NULL) 577 break; 578 value[i++] = val; 579 base = (index + i) % PCTRIE_COUNT; 580 if (base == 0 || parent == NULL || parent->pn_clev != 0) 581 continue; 582 583 /* 584 * For PCTRIE_SMR, compute an upper bound on the number of 585 * children of this parent left to examine. For PCTRIE_LOCKED, 586 * compute the number of non-NULL children from base up to the 587 * first NULL child, if any, using the fact that pn_popmap has 588 * bits set for only the non-NULL children. 589 * 590 * The pn_popmap field is accessed only when a lock is held. 591 * To use it for PCTRIE_SMR here would require that we know that 592 * race conditions cannot occur if the tree is modified while 593 * accessed here. Guarantees about the visibility of changes to 594 * child pointers, enforced by memory barriers on the writing of 595 * pointers, are not present for the pn_popmap field, so that 596 * the popmap bit for a child page may, for an instant, 597 * misrepresent the nullness of the child page because an 598 * operation modifying the pctrie is in progress. 599 */ 600 end = (access == PCTRIE_SMR) ? PCTRIE_COUNT - base : 601 ffs((parent->pn_popmap >> base) + 1) - 1; 602 end = MIN(count, i + end); 603 while (i < end) { 604 node = pctrie_node_load(&parent->pn_child[base++], 605 smr, access); 606 val = pctrie_toval(node); 607 if (access == PCTRIE_SMR && val == NULL) 608 break; 609 value[i++] = val; 610 KASSERT(val != NULL, 611 ("%s: null child written to range", __func__)); 612 } 613 if (access == PCTRIE_SMR) { 614 if (i < end) 615 break; 616 } else { 617 if (base < PCTRIE_COUNT) 618 break; 619 } 620 } 621 if (parent_out != NULL) 622 *parent_out = parent; 623 return (i); 624 } 625 626 /* 627 * Returns the number of contiguous, non-NULL entries read into the value[] 628 * array, starting at index, assuming access is externally synchronized by a 629 * lock. 630 */ 631 int 632 pctrie_lookup_range(struct pctrie *ptree, uint64_t index, 633 uint64_t *value[], int count) 634 { 635 return (_pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 636 NULL, PCTRIE_LOCKED)); 637 } 638 639 /* 640 * Returns the number of contiguous, non-NULL entries read into the value[] 641 * array, starting at index, without requiring an external lock. These entries 642 * *may* never have been in the pctrie all at one time, but for a series of 643 * times t0, t1, t2, ..., with ti <= t(i+1), value[i] was in the trie at time 644 * ti. 645 */ 646 int 647 pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, 648 uint64_t *value[], int count, smr_t smr) 649 { 650 int res; 651 652 smr_enter(smr); 653 res = _pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 654 smr, PCTRIE_SMR); 655 smr_exit(smr); 656 return (res); 657 } 658 659 /* 660 * Returns the number of contiguous, non-NULL entries read into the value[] 661 * array, starting at index, assuming access is externally synchronized by a 662 * lock. Uses an iterator. 663 */ 664 int 665 pctrie_iter_lookup_range(struct pctrie_iter *it, uint64_t index, 666 uint64_t *value[], int count) 667 { 668 return (_pctrie_lookup_range(it->ptree, it->node, index, value, count, 669 &it->node, NULL, PCTRIE_LOCKED)); 670 } 671 672 /* 673 * Find first leaf >= index, and fill iter with the path to the parent of that 674 * leaf. Return NULL if there is no such leaf less than limit. 675 */ 676 static __inline uint64_t * 677 _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, 678 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 679 { 680 struct pctrie_node *parent; 681 uint64_t *m; 682 int slot; 683 684 /* Seek a node that matches index. */ 685 node = _pctrie_lookup_node(ptree, node, index, &parent, 686 NULL, PCTRIE_LOCKED); 687 688 /* 689 * If no such node was found, and instead this path leads only to nodes 690 * < index, back up to find a subtrie with the least value > index. 691 */ 692 if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { 693 /* Climb the path to find a node with a descendant > index. */ 694 node = NULL; 695 while (parent != NULL) { 696 slot = pctrie_slot(parent, index) + 1; 697 if ((parent->pn_popmap >> slot) != 0) 698 break; 699 node = parent; 700 parent = pctrie_parent(node); 701 } 702 if (parent == NULL) { 703 if (parent_out != NULL) 704 *parent_out = node; 705 return (NULL); 706 } 707 708 /* Step to the least child with a descendant > index. */ 709 slot += ffs(parent->pn_popmap >> slot) - 1; 710 node = pctrie_node_load(&parent->pn_child[slot], NULL, 711 PCTRIE_LOCKED); 712 } 713 /* Descend to the least leaf of the subtrie. */ 714 while (!pctrie_isleaf(node)) { 715 if (limit != 0 && node->pn_owner >= limit) 716 return (NULL); 717 slot = ffs(node->pn_popmap) - 1; 718 parent = node; 719 node = pctrie_node_load(&node->pn_child[slot], NULL, 720 PCTRIE_LOCKED); 721 } 722 if (parent_out != NULL) 723 *parent_out = parent; 724 m = pctrie_toval(node); 725 if (limit != 0 && *m >= limit) 726 return (NULL); 727 return (m); 728 } 729 730 uint64_t * 731 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 732 { 733 return (_pctrie_lookup_ge(ptree, NULL, index, NULL, 0)); 734 } 735 736 /* 737 * Find first leaf >= index, and fill iter with the path to the parent of that 738 * leaf. Return NULL if there is no such leaf less than limit. 739 */ 740 uint64_t * 741 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) 742 { 743 uint64_t *m; 744 745 m = _pctrie_lookup_ge(it->ptree, it->node, index, &it->node, it->limit); 746 if (m != NULL) 747 it->index = *m; 748 return (m); 749 } 750 751 /* 752 * Find the first leaf with value at least 'jump' greater than the previous 753 * leaf. Return NULL if that value is >= limit. 754 */ 755 uint64_t * 756 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) 757 { 758 uint64_t index = it->index + jump; 759 760 /* Detect jump overflow. */ 761 if ((jump > 0) != (index > it->index)) 762 return (NULL); 763 if (it->limit != 0 && index >= it->limit) 764 return (NULL); 765 return (pctrie_iter_lookup_ge(it, index)); 766 } 767 768 /* 769 * Find first leaf <= index, and fill iter with the path to the parent of that 770 * leaf. Return NULL if there is no such leaf greater than limit. 771 */ 772 static __inline uint64_t * 773 _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, 774 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 775 { 776 struct pctrie_node *parent; 777 uint64_t *m; 778 int slot; 779 780 /* Seek a node that matches index. */ 781 node = _pctrie_lookup_node(ptree, node, index, &parent, NULL, 782 PCTRIE_LOCKED); 783 784 /* 785 * If no such node was found, and instead this path leads only to nodes 786 * > index, back up to find a subtrie with the greatest value < index. 787 */ 788 if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { 789 /* Climb the path to find a node with a descendant < index. */ 790 node = NULL; 791 while (parent != NULL) { 792 slot = pctrie_slot(parent, index); 793 if ((parent->pn_popmap & ((1 << slot) - 1)) != 0) 794 break; 795 node = parent; 796 parent = pctrie_parent(node); 797 } 798 if (parent == NULL) { 799 if (parent_out != NULL) 800 *parent_out = node; 801 return (NULL); 802 } 803 804 /* Step to the greatest child with a descendant < index. */ 805 slot = ilog2(parent->pn_popmap & ((1 << slot) - 1)); 806 node = pctrie_node_load(&parent->pn_child[slot], NULL, 807 PCTRIE_LOCKED); 808 } 809 /* Descend to the greatest leaf of the subtrie. */ 810 while (!pctrie_isleaf(node)) { 811 if (limit != 0 && limit >= node->pn_owner + 812 ((uint64_t)PCTRIE_COUNT << node->pn_clev) - 1) 813 return (NULL); 814 slot = ilog2(node->pn_popmap); 815 parent = node; 816 node = pctrie_node_load(&node->pn_child[slot], NULL, 817 PCTRIE_LOCKED); 818 } 819 if (parent_out != NULL) 820 *parent_out = parent; 821 m = pctrie_toval(node); 822 if (limit != 0 && *m <= limit) 823 return (NULL); 824 return (m); 825 } 826 827 uint64_t * 828 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 829 { 830 return (_pctrie_lookup_le(ptree, NULL, index, NULL, 0)); 831 } 832 833 uint64_t * 834 pctrie_subtree_lookup_lt(struct pctrie *ptree, struct pctrie_node *node, 835 uint64_t index) 836 { 837 if (index == 0) 838 return (NULL); 839 return (_pctrie_lookup_le(ptree, node, index - 1, NULL, 0)); 840 } 841 842 /* 843 * Find first leaf <= index, and fill iter with the path to the parent of that 844 * leaf. Return NULL if there is no such leaf greater than limit. 845 */ 846 uint64_t * 847 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) 848 { 849 uint64_t *m; 850 851 m = _pctrie_lookup_le(it->ptree, it->node, index, &it->node, it->limit); 852 if (m != NULL) 853 it->index = *m; 854 return (m); 855 } 856 857 /* 858 * Find the first leaf with value at most 'jump' less than the previous 859 * leaf. Return NULL if that value is <= limit. 860 */ 861 uint64_t * 862 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) 863 { 864 uint64_t index = it->index - jump; 865 866 /* Detect jump overflow. */ 867 if ((jump > 0) != (index < it->index)) 868 return (NULL); 869 if (it->limit != 0 && index <= it->limit) 870 return (NULL); 871 return (pctrie_iter_lookup_le(it, index)); 872 } 873 874 /* 875 * Remove the non-NULL child identified by 'index' from the set of children of 876 * 'node'. If doing so causes 'node' to have only one child, purge it from the 877 * pctrie and save it in *freenode for later disposal. 878 */ 879 static bool 880 pctrie_remove(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 881 { 882 smr_pctnode_t *parentp; 883 struct pctrie_node *child; 884 int slot; 885 886 parentp = pctrie_child(ptree, node, index); 887 if (node == NULL) { 888 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 889 return (false); 890 } 891 slot = pctrie_slot(node, index); 892 KASSERT((node->pn_popmap & (1 << slot)) != 0, 893 ("%s: bad popmap slot %d in node %p", 894 __func__, slot, node)); 895 node->pn_popmap ^= 1 << slot; 896 if (!powerof2(node->pn_popmap)) { 897 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 898 return (false); 899 } 900 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 901 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 902 slot = ffs(node->pn_popmap) - 1; 903 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 904 KASSERT(child != PCTRIE_NULL, 905 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 906 node = pctrie_parent(node); 907 if (!pctrie_isleaf(child)) 908 pctrie_setparent(child, node); 909 parentp = pctrie_child(ptree, node, index); 910 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 911 return (true); 912 } 913 914 /* 915 * Remove the specified index from the tree, and return the value stored at 916 * that index. If the index is not present, return NULL. 917 */ 918 uint64_t * 919 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 920 struct pctrie_node **freenode) 921 { 922 struct pctrie_node *node, *parent; 923 uint64_t *m; 924 925 node = _pctrie_lookup_node(ptree, NULL, index, &parent, NULL, 926 PCTRIE_LOCKED); 927 m = pctrie_match_value(node, index); 928 if (m != NULL && pctrie_remove(ptree, parent, index)) 929 *freenode = parent; 930 else 931 *freenode = NULL; 932 return (m); 933 } 934 935 /* 936 * Remove from the trie the leaf last chosen by the iterator, and 937 * adjust the path if it's last member is to be freed. 938 */ 939 void 940 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) 941 { 942 KASSERT(NULL != pctrie_match_value(pctrie_node_load(pctrie_child( 943 it->ptree, it->node, it->index), NULL, PCTRIE_LOCKED), it->index), 944 ("%s: removing value %jx not at iter", __func__, 945 (uintmax_t)it->index)); 946 if (pctrie_remove(it->ptree, it->node, it->index)) { 947 *freenode = it->node; 948 it->node = pctrie_parent(it->node); 949 } else 950 *freenode = NULL; 951 } 952 953 /* 954 * Return the current leaf, assuming access is externally synchronized by a 955 * lock. 956 */ 957 uint64_t * 958 pctrie_iter_value(struct pctrie_iter *it) 959 { 960 struct pctrie_node *node; 961 962 node = pctrie_node_load(pctrie_child(it->ptree, it->node, it->index), 963 NULL, PCTRIE_LOCKED); 964 return (pctrie_toval(node)); 965 } 966 967 /* 968 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves, 969 * until an interior node is stripped of all children, and returned for 970 * deallocation, with *pnode left pointing to the parent of that node. 971 */ 972 static __always_inline struct pctrie_node * 973 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 974 pctrie_cb_t callback, int keyoff, void *arg) 975 { 976 struct pctrie_node *child, *node; 977 int slot; 978 979 node = *pnode; 980 while (node->pn_popmap != 0) { 981 slot = ffs(node->pn_popmap) - 1; 982 node->pn_popmap ^= 1 << slot; 983 child = pctrie_node_load(&node->pn_child[slot], NULL, 984 PCTRIE_UNSERIALIZED); 985 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 986 PCTRIE_UNSERIALIZED); 987 if (pctrie_isleaf(child)) { 988 if (callback != NULL) 989 callback(pctrie_toptr(child, keyoff), arg); 990 continue; 991 } 992 /* Climb one level down the trie. */ 993 parent = node; 994 node = child; 995 } 996 *pnode = parent; 997 return (node); 998 } 999 1000 /* 1001 * Recover the node parent from its first child and continue pruning. 1002 */ 1003 static __always_inline struct pctrie_node * 1004 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 1005 pctrie_cb_t callback, int keyoff, void *arg) 1006 { 1007 if (*pnode == NULL) 1008 return (NULL); 1009 /* Climb one level up the trie. */ 1010 return (pctrie_reclaim_prune(pnode, pctrie_parent(*pnode), callback, 1011 keyoff, arg)); 1012 } 1013 1014 /* 1015 * Find the trie root, and start pruning with a NULL parent. 1016 */ 1017 static __always_inline struct pctrie_node * 1018 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 1019 struct pctrie *ptree, 1020 pctrie_cb_t callback, int keyoff, void *arg) 1021 { 1022 struct pctrie_node *node; 1023 1024 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 1025 pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1026 if (pctrie_isleaf(node)) { 1027 if (callback != NULL && node != PCTRIE_NULL) 1028 callback(pctrie_toptr(node, keyoff), arg); 1029 return (NULL); 1030 } 1031 *pnode = node; 1032 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 1033 } 1034 1035 struct pctrie_node * 1036 pctrie_reclaim_resume(struct pctrie_node **pnode) 1037 { 1038 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 1039 } 1040 1041 struct pctrie_node * 1042 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 1043 { 1044 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 1045 } 1046 1047 struct pctrie_node * 1048 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 1049 pctrie_cb_t callback, int keyoff, void *arg) 1050 { 1051 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 1052 } 1053 1054 struct pctrie_node * 1055 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 1056 pctrie_cb_t callback, int keyoff, void *arg) 1057 { 1058 return (pctrie_reclaim_begin_compound(pnode, ptree, 1059 callback, keyoff, arg)); 1060 } 1061 1062 /* 1063 * Replace an existing value in the trie with another one. 1064 * Panics if there is not an old value in the trie at the new value's index. 1065 */ 1066 uint64_t * 1067 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 1068 { 1069 struct pctrie_node *node, *parent; 1070 uint64_t *m; 1071 1072 node = _pctrie_lookup_node(ptree, NULL, *newval, &parent, NULL, 1073 PCTRIE_LOCKED); 1074 m = pctrie_match_value(node, *newval); 1075 if (m == NULL) 1076 panic("%s: original replacing value not found", __func__); 1077 pctrie_node_store(pctrie_child(ptree, parent, *newval), 1078 pctrie_toleaf(newval), PCTRIE_LOCKED); 1079 return (m); 1080 } 1081 1082 #ifdef DDB 1083 /* 1084 * Show details about the given node. 1085 */ 1086 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 1087 { 1088 struct pctrie_node *node, *tmp; 1089 int slot; 1090 pn_popmap_t popmap; 1091 1092 if (!have_addr) 1093 return; 1094 node = (struct pctrie_node *)addr; 1095 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 1096 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 1097 node->pn_clev / PCTRIE_WIDTH); 1098 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 1099 slot = ffs(popmap) - 1; 1100 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 1101 PCTRIE_UNSERIALIZED); 1102 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 1103 slot, (void *)tmp, 1104 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 1105 node->pn_clev / PCTRIE_WIDTH); 1106 } 1107 } 1108 #endif /* DDB */ 1109