1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #if PCTRIE_WIDTH == 3 66 typedef uint8_t pn_popmap_t; 67 #elif PCTRIE_WIDTH == 4 68 typedef uint16_t pn_popmap_t; 69 #elif PCTRIE_WIDTH == 5 70 typedef uint32_t pn_popmap_t; 71 #else 72 #error Unsupported width 73 #endif 74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 75 "pn_popmap_t too wide"); 76 77 struct pctrie_node; 78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 79 80 struct pctrie_node { 81 uint64_t pn_owner; /* Owner of record. */ 82 pn_popmap_t pn_popmap; /* Valid children. */ 83 uint8_t pn_clev; /* Level * WIDTH. */ 84 smr_pctnode_t pn_parent; /* Parent node. */ 85 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 86 }; 87 88 /* 89 * Map index to an array position for the children of node, 90 */ 91 static __inline int 92 pctrie_slot(struct pctrie_node *node, uint64_t index) 93 { 94 return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); 95 } 96 97 /* 98 * Returns true if index does not belong to the specified node. Otherwise, 99 * sets slot value, and returns false. 100 */ 101 static __inline bool 102 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 103 { 104 index = (index - node->pn_owner) >> node->pn_clev; 105 if (index >= PCTRIE_COUNT) 106 return (true); 107 *slot = index; 108 return (false); 109 } 110 111 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 112 113 /* 114 * Fetch a node pointer from a slot. 115 */ 116 static __inline struct pctrie_node * 117 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 118 { 119 switch (access) { 120 case PCTRIE_UNSERIALIZED: 121 return (smr_unserialized_load(p, true)); 122 case PCTRIE_LOCKED: 123 return (smr_serialized_load(p, true)); 124 case PCTRIE_SMR: 125 return (smr_entered_load(p, smr)); 126 } 127 __assert_unreachable(); 128 } 129 130 static __inline void 131 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 132 { 133 switch (access) { 134 case PCTRIE_UNSERIALIZED: 135 smr_unserialized_store(p, v, true); 136 break; 137 case PCTRIE_LOCKED: 138 smr_serialized_store(p, v, true); 139 break; 140 case PCTRIE_SMR: 141 panic("%s: Not supported in SMR section.", __func__); 142 break; 143 default: 144 __assert_unreachable(); 145 break; 146 } 147 } 148 149 /* 150 * Get the root address, cast to proper type for load/store. 151 */ 152 static __inline smr_pctnode_t * 153 pctrie_root(struct pctrie *ptree) 154 { 155 return ((smr_pctnode_t *)&ptree->pt_root); 156 } 157 158 /* 159 * Get the root node for a tree. 160 */ 161 static __inline struct pctrie_node * 162 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 163 { 164 return (pctrie_node_load(pctrie_root(ptree), smr, access)); 165 } 166 167 /* 168 * Get the child of a node. 169 */ 170 static __inline smr_pctnode_t * 171 pctrie_child(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 172 { 173 return (node == NULL ? pctrie_root(ptree) : 174 &node->pn_child[pctrie_slot(node, index)]); 175 } 176 177 /* 178 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 179 */ 180 static __inline bool 181 pctrie_isleaf(struct pctrie_node *node) 182 { 183 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184 } 185 186 /* 187 * Returns val with leaf bit set. 188 */ 189 static __inline void * 190 pctrie_toleaf(uint64_t *val) 191 { 192 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 193 } 194 195 /* 196 * Returns the associated val extracted from node. 197 */ 198 static __inline uint64_t * 199 pctrie_toval(struct pctrie_node *node) 200 { 201 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 202 } 203 204 /* 205 * Returns the associated pointer extracted from node and field offset. 206 */ 207 static __inline void * 208 pctrie_toptr(struct pctrie_node *node, int keyoff) 209 { 210 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 211 } 212 213 /* 214 * Make 'parent' a parent of 'child'. 215 */ 216 static __inline void 217 pctrie_setparent(struct pctrie_node *child, struct pctrie_node *parent) 218 { 219 pctrie_node_store(&child->pn_parent, parent, PCTRIE_UNSERIALIZED); 220 } 221 222 /* 223 * Return the parent of 'node'. 224 */ 225 static __inline struct pctrie_node * 226 pctrie_parent(struct pctrie_node *node) 227 { 228 return (pctrie_node_load(&node->pn_parent, NULL, PCTRIE_UNSERIALIZED)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 /* 271 * Return the value associated with the node, if the node is a leaf that matches 272 * the index; otherwise NULL. 273 */ 274 static __always_inline uint64_t * 275 pctrie_match_value(struct pctrie_node *node, uint64_t index) 276 { 277 uint64_t *m; 278 279 if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || 280 *m != index) 281 m = NULL; 282 return (m); 283 } 284 285 /* 286 * Returns the last node examined in the search for the index, and sets the 287 * parent of that node. 288 */ 289 static __always_inline struct pctrie_node * 290 _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, 291 uint64_t index, struct pctrie_node **parent_out, 292 smr_t smr, enum pctrie_access access) 293 { 294 struct pctrie_node *parent; 295 int slot; 296 297 parent = node; 298 if (parent == NULL) 299 node = pctrie_root_load(ptree, smr, access); 300 301 /* 302 * Climb the search path to find the lowest node from which to start the 303 * search for a value matching 'index'. 304 */ 305 while (parent != NULL) { 306 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 307 ("%s: freed node in iter path", __func__)); 308 node = parent; 309 if (!pctrie_keybarr(node, index, &slot)) 310 break; 311 parent = pctrie_parent(node); 312 } 313 314 /* Seek a node that matches index. */ 315 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { 316 parent = node; 317 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 318 ("%s: freed node in iter path", __func__)); 319 node = pctrie_node_load(&node->pn_child[slot], smr, access); 320 } 321 *parent_out = parent; 322 return (node); 323 } 324 325 /* 326 * Returns the value stored at the index, assuming access is externally 327 * synchronized by a lock. 328 * 329 * If the index is not present, NULL is returned. 330 */ 331 uint64_t * 332 pctrie_lookup(struct pctrie *ptree, uint64_t index) 333 { 334 struct pctrie_node *node, *parent; 335 336 node = _pctrie_lookup_node(ptree, NULL, index, &parent, NULL, 337 PCTRIE_LOCKED); 338 return (pctrie_match_value(node, index)); 339 } 340 341 /* 342 * Returns the value stored at the index without requiring an external lock. 343 * 344 * If the index is not present, NULL is returned. 345 */ 346 uint64_t * 347 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 348 { 349 struct pctrie_node *node, *parent; 350 uint64_t *res; 351 352 smr_enter(smr); 353 node = _pctrie_lookup_node(ptree, NULL, index, &parent, smr, 354 PCTRIE_SMR); 355 res = pctrie_match_value(node, index); 356 smr_exit(smr); 357 return (res); 358 } 359 360 /* 361 * Returns the value stored at a given index value, possibly NULL, assuming 362 * access is externally synchronized by a lock. 363 */ 364 uint64_t * 365 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) 366 { 367 struct pctrie_node *node; 368 369 node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, 370 NULL, PCTRIE_LOCKED); 371 it->index = index; 372 return (pctrie_match_value(node, index)); 373 } 374 375 /* 376 * Look for where to insert the key-value pair into the trie. Complete the 377 * insertion if it replaces a null leaf. Return the insertion location if the 378 * insertion needs to be completed by the caller; otherwise return NULL. 379 * 380 * If the key is already present in the trie, populate *found_out as if by 381 * pctrie_lookup(). 382 */ 383 static __always_inline void * 384 _pctrie_insert_lookup(struct pctrie *ptree, struct pctrie_node *parent, 385 uint64_t *val, struct pctrie_node **parent_out, uint64_t **found_out) 386 { 387 struct pctrie_node *node; 388 389 node = _pctrie_lookup_node(ptree, parent, *val, parent_out, NULL, 390 PCTRIE_LOCKED); 391 *found_out = NULL; 392 if (node == PCTRIE_NULL) { 393 if (*parent_out == NULL) 394 pctrie_node_store(pctrie_root(ptree), 395 pctrie_toleaf(val), PCTRIE_LOCKED); 396 else 397 pctrie_addnode(*parent_out, *val, 398 pctrie_toleaf(val), PCTRIE_LOCKED); 399 return (NULL); 400 } 401 if (__predict_false(pctrie_match_value(node, *val) != NULL)) { 402 *found_out = pctrie_toval(node); 403 return (NULL); 404 } 405 406 /* 407 * 'node' must be replaced in the tree with a new branch node, with 408 * children 'node' and 'val'. Return the place that points to 'node' 409 * now, and will point to to the new branching node later. 410 */ 411 return (pctrie_child(ptree, *parent_out, *val)); 412 } 413 414 /* 415 * Wrap _pctrie_insert_lookup to implement a strict insertion. Panic 416 * if the key already exists, and do not look for neighboring entries. 417 */ 418 void * 419 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val, 420 struct pctrie_node **parent_out) 421 { 422 void *parentp; 423 uint64_t *found; 424 425 parentp = _pctrie_insert_lookup(ptree, NULL, val, parent_out, &found); 426 if (__predict_false(found != NULL)) 427 panic("%s: key %jx is already present", __func__, 428 (uintmax_t)*val); 429 return (parentp); 430 } 431 432 /* 433 * Wrap _pctrie_insert_lookup to implement find-or-insert. Do not look 434 * for neighboring entries. 435 */ 436 void * 437 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 438 struct pctrie_node **parent_out, uint64_t **found_out) 439 { 440 return (_pctrie_insert_lookup(ptree, NULL, val, parent_out, found_out)); 441 } 442 443 /* 444 * Insert the val in the trie, starting search with iterator. Return a pointer 445 * to indicate where a new node must be allocated to complete insertion. 446 * Assumes access is externally synchronized by a lock. 447 */ 448 void * 449 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) 450 { 451 void *res; 452 uint64_t *found; 453 454 it->index = *val; 455 res = _pctrie_insert_lookup(it->ptree, it->node, val, &it->node, 456 &found); 457 if (__predict_false(found != NULL)) 458 panic("%s: key %jx is already present", __func__, 459 (uintmax_t)it->index); 460 return (res); 461 } 462 463 /* 464 * Inserts newly allocated node 'child' into trie at location 'parentp', with 465 * parent 'parent' and two children, 'val' and whatever non-NULL node or leaf 466 * was at 'parentp' to begin with. 467 */ 468 void 469 pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, void *parentp, 470 struct pctrie_node *child) 471 { 472 struct pctrie_node *node; 473 uint64_t index, newind; 474 475 /* 476 * Clear the last child pointer of the newly allocated child. We want 477 * to clear it after the final section has exited so lookup can not 478 * return false negatives. It is done here because it will be 479 * cache-cold in the dtor callback. 480 */ 481 if (child->pn_popmap != 0) { 482 pctrie_node_store(&child->pn_child[ffs(child->pn_popmap) - 1], 483 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 484 child->pn_popmap = 0; 485 } 486 487 /* 488 * Recover the values of the two children of the new child node. If 489 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 490 * which must be first in the node. 491 */ 492 index = *val; 493 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 494 pctrie_setparent(child, parent); 495 if (!pctrie_isleaf(node)) 496 pctrie_setparent(node, child); 497 newind = *pctrie_toval(node); 498 499 /* 500 * From the highest-order bit where the indexes differ, 501 * compute the highest level in the trie where they differ. Then, 502 * compute the least index of this subtrie. 503 */ 504 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 505 "uint64 too wide"); 506 _Static_assert(sizeof(uint64_t) * NBBY <= 507 (1 << (sizeof(child->pn_clev) * NBBY)), "pn_clev too narrow"); 508 child->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 509 child->pn_owner = PCTRIE_COUNT; 510 child->pn_owner = index & -(child->pn_owner << child->pn_clev); 511 512 513 /* These writes are not yet visible due to ordering. */ 514 pctrie_addnode(child, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 515 pctrie_addnode(child, newind, node, PCTRIE_UNSERIALIZED); 516 /* Synchronize to make the above visible. */ 517 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 518 } 519 520 /* 521 * Returns the value stored at a fixed offset from the current index value, 522 * possibly NULL. 523 */ 524 uint64_t * 525 pctrie_iter_stride(struct pctrie_iter *it, int stride) 526 { 527 uint64_t index = it->index + stride; 528 529 /* Detect stride overflow. */ 530 if ((stride > 0) != (index > it->index)) 531 return (NULL); 532 /* Detect crossing limit */ 533 if ((index < it->limit) != (it->index < it->limit)) 534 return (NULL); 535 536 return (pctrie_iter_lookup(it, index)); 537 } 538 539 /* 540 * Returns the value stored at one more than the current index value, possibly 541 * NULL, assuming access is externally synchronized by a lock. 542 */ 543 uint64_t * 544 pctrie_iter_next(struct pctrie_iter *it) 545 { 546 return (pctrie_iter_stride(it, 1)); 547 } 548 549 /* 550 * Returns the value stored at one less than the current index value, possibly 551 * NULL, assuming access is externally synchronized by a lock. 552 */ 553 uint64_t * 554 pctrie_iter_prev(struct pctrie_iter *it) 555 { 556 return (pctrie_iter_stride(it, -1)); 557 } 558 559 /* 560 * Returns the number of contiguous, non-NULL entries read into the value[] 561 * array, starting at index. 562 */ 563 static __always_inline int 564 _pctrie_lookup_range(struct pctrie *ptree, struct pctrie_node *node, 565 uint64_t index, uint64_t *value[], int count, 566 struct pctrie_node **parent_out, smr_t smr, enum pctrie_access access) 567 { 568 struct pctrie_node *parent; 569 uint64_t *val; 570 int base, end, i; 571 572 parent = node; 573 for (i = 0; i < count;) { 574 node = _pctrie_lookup_node(ptree, parent, index + i, &parent, 575 smr, access); 576 if ((val = pctrie_match_value(node, index + i)) == NULL) 577 break; 578 value[i++] = val; 579 base = (index + i) % PCTRIE_COUNT; 580 if (base == 0 || parent == NULL || parent->pn_clev != 0) 581 continue; 582 583 /* 584 * For PCTRIE_SMR, compute an upper bound on the number of 585 * children of this parent left to examine. For PCTRIE_LOCKED, 586 * compute the number of non-NULL children from base up to the 587 * first NULL child, if any, using the fact that pn_popmap has 588 * bits set for only the non-NULL children. 589 * 590 * The pn_popmap field is accessed only when a lock is held. 591 * To use it for PCTRIE_SMR here would require that we know that 592 * race conditions cannot occur if the tree is modified while 593 * accessed here. Guarantees about the visibility of changes to 594 * child pointers, enforced by memory barriers on the writing of 595 * pointers, are not present for the pn_popmap field, so that 596 * the popmap bit for a child page may, for an instant, 597 * misrepresent the nullness of the child page because an 598 * operation modifying the pctrie is in progress. 599 */ 600 end = (access == PCTRIE_SMR) ? PCTRIE_COUNT - base : 601 ffs((parent->pn_popmap >> base) + 1) - 1; 602 end = MIN(count, i + end); 603 while (i < end) { 604 node = pctrie_node_load(&parent->pn_child[base++], 605 smr, access); 606 val = pctrie_toval(node); 607 if (access == PCTRIE_SMR && val == NULL) 608 break; 609 value[i++] = val; 610 KASSERT(val != NULL, 611 ("%s: null child written to range", __func__)); 612 } 613 if (access == PCTRIE_SMR) { 614 if (i < end) 615 break; 616 } else { 617 if (base < PCTRIE_COUNT) 618 break; 619 } 620 } 621 if (parent_out != NULL) 622 *parent_out = parent; 623 return (i); 624 } 625 626 /* 627 * Returns the number of contiguous, non-NULL entries read into the value[] 628 * array, starting at index, assuming access is externally synchronized by a 629 * lock. 630 */ 631 int 632 pctrie_lookup_range(struct pctrie *ptree, uint64_t index, 633 uint64_t *value[], int count) 634 { 635 return (_pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 636 NULL, PCTRIE_LOCKED)); 637 } 638 639 /* 640 * Returns the number of contiguous, non-NULL entries read into the value[] 641 * array, starting at index, without requiring an external lock. These entries 642 * *may* never have been in the pctrie all at one time, but for a series of 643 * times t0, t1, t2, ..., with ti <= t(i+1), value[i] was in the trie at time 644 * ti. 645 */ 646 int 647 pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, 648 uint64_t *value[], int count, smr_t smr) 649 { 650 int res; 651 652 smr_enter(smr); 653 res = _pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 654 smr, PCTRIE_SMR); 655 smr_exit(smr); 656 return (res); 657 } 658 659 /* 660 * Returns the number of contiguous, non-NULL entries read into the value[] 661 * array, starting at index, assuming access is externally synchronized by a 662 * lock. Uses an iterator. 663 */ 664 int 665 pctrie_iter_lookup_range(struct pctrie_iter *it, uint64_t index, 666 uint64_t *value[], int count) 667 { 668 return (_pctrie_lookup_range(it->ptree, it->node, index, value, count, 669 &it->node, NULL, PCTRIE_LOCKED)); 670 } 671 672 /* 673 * Find first leaf >= index, and fill iter with the path to the parent of that 674 * leaf. Return NULL if there is no such leaf less than limit. 675 */ 676 static __inline uint64_t * 677 _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, 678 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 679 { 680 struct pctrie_node *parent; 681 uint64_t *m; 682 int slot; 683 684 /* Seek a node that matches index. */ 685 node = _pctrie_lookup_node(ptree, node, index, &parent, 686 NULL, PCTRIE_LOCKED); 687 688 /* 689 * If no such node was found, and instead this path leads only to nodes 690 * < index, back up to find a subtrie with the least value > index. 691 */ 692 if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { 693 /* Climb the path to find a node with a descendant > index. */ 694 for (node = parent; node != NULL; node = pctrie_parent(node)) { 695 slot = pctrie_slot(node, index) + 1; 696 if ((node->pn_popmap >> slot) != 0) 697 break; 698 } 699 if (node == NULL) { 700 if (parent_out != NULL) 701 *parent_out = NULL; 702 return (NULL); 703 } 704 705 /* Step to the least child with a descendant > index. */ 706 slot += ffs(node->pn_popmap >> slot) - 1; 707 parent = node; 708 node = pctrie_node_load(&node->pn_child[slot], NULL, 709 PCTRIE_LOCKED); 710 } 711 /* Descend to the least leaf of the subtrie. */ 712 while (!pctrie_isleaf(node)) { 713 if (limit != 0 && node->pn_owner >= limit) 714 return (NULL); 715 slot = ffs(node->pn_popmap) - 1; 716 parent = node; 717 node = pctrie_node_load(&node->pn_child[slot], NULL, 718 PCTRIE_LOCKED); 719 } 720 if (parent_out != NULL) 721 *parent_out = parent; 722 m = pctrie_toval(node); 723 if (limit != 0 && *m >= limit) 724 return (NULL); 725 return (m); 726 } 727 728 uint64_t * 729 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 730 { 731 return (_pctrie_lookup_ge(ptree, NULL, index, NULL, 0)); 732 } 733 734 /* 735 * Find first leaf >= index, and fill iter with the path to the parent of that 736 * leaf. Return NULL if there is no such leaf less than limit. 737 */ 738 uint64_t * 739 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) 740 { 741 uint64_t *m; 742 743 m = _pctrie_lookup_ge(it->ptree, it->node, index, &it->node, it->limit); 744 if (m != NULL) 745 it->index = *m; 746 return (m); 747 } 748 749 /* 750 * Find the first leaf with value at least 'jump' greater than the previous 751 * leaf. Return NULL if that value is >= limit. 752 */ 753 uint64_t * 754 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) 755 { 756 uint64_t index = it->index + jump; 757 758 /* Detect jump overflow. */ 759 if ((jump > 0) != (index > it->index)) 760 return (NULL); 761 if (it->limit != 0 && index >= it->limit) 762 return (NULL); 763 return (pctrie_iter_lookup_ge(it, index)); 764 } 765 766 /* 767 * Find first leaf <= index, and fill iter with the path to the parent of that 768 * leaf. Return NULL if there is no such leaf greater than limit. 769 */ 770 static __inline uint64_t * 771 _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, 772 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 773 { 774 struct pctrie_node *parent; 775 uint64_t *m; 776 int slot; 777 778 /* Seek a node that matches index. */ 779 node = _pctrie_lookup_node(ptree, node, index, &parent, NULL, 780 PCTRIE_LOCKED); 781 782 /* 783 * If no such node was found, and instead this path leads only to nodes 784 * > index, back up to find a subtrie with the greatest value < index. 785 */ 786 if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { 787 /* Climb the path to find a node with a descendant < index. */ 788 for (node = parent; node != NULL; node = pctrie_parent(node)) { 789 slot = pctrie_slot(node, index); 790 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 791 break; 792 } 793 if (node == NULL) { 794 if (parent_out != NULL) 795 *parent_out = NULL; 796 return (NULL); 797 } 798 799 /* Step to the greatest child with a descendant < index. */ 800 slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); 801 parent = node; 802 node = pctrie_node_load(&node->pn_child[slot], NULL, 803 PCTRIE_LOCKED); 804 } 805 /* Descend to the greatest leaf of the subtrie. */ 806 while (!pctrie_isleaf(node)) { 807 if (limit != 0 && limit >= node->pn_owner + 808 ((uint64_t)PCTRIE_COUNT << node->pn_clev) - 1) 809 return (NULL); 810 slot = ilog2(node->pn_popmap); 811 parent = node; 812 node = pctrie_node_load(&node->pn_child[slot], NULL, 813 PCTRIE_LOCKED); 814 } 815 if (parent_out != NULL) 816 *parent_out = parent; 817 m = pctrie_toval(node); 818 if (limit != 0 && *m <= limit) 819 return (NULL); 820 return (m); 821 } 822 823 uint64_t * 824 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 825 { 826 return (_pctrie_lookup_le(ptree, NULL, index, NULL, 0)); 827 } 828 829 uint64_t * 830 pctrie_subtree_lookup_lt(struct pctrie *ptree, struct pctrie_node *node, 831 uint64_t index) 832 { 833 if (index == 0) 834 return (NULL); 835 return (_pctrie_lookup_le(ptree, node, index - 1, NULL, 0)); 836 } 837 838 /* 839 * Find first leaf <= index, and fill iter with the path to the parent of that 840 * leaf. Return NULL if there is no such leaf greater than limit. 841 */ 842 uint64_t * 843 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) 844 { 845 uint64_t *m; 846 847 m = _pctrie_lookup_le(it->ptree, it->node, index, &it->node, it->limit); 848 if (m != NULL) 849 it->index = *m; 850 return (m); 851 } 852 853 /* 854 * Find the first leaf with value at most 'jump' less than the previous 855 * leaf. Return NULL if that value is <= limit. 856 */ 857 uint64_t * 858 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) 859 { 860 uint64_t index = it->index - jump; 861 862 /* Detect jump overflow. */ 863 if ((jump > 0) != (index < it->index)) 864 return (NULL); 865 if (it->limit != 0 && index <= it->limit) 866 return (NULL); 867 return (pctrie_iter_lookup_le(it, index)); 868 } 869 870 /* 871 * Remove the non-NULL child identified by 'index' from the set of children of 872 * 'node'. If doing so causes 'node' to have only one child, purge it from the 873 * pctrie and save it in *freenode for later disposal. 874 */ 875 static bool 876 pctrie_remove(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 877 { 878 smr_pctnode_t *parentp; 879 struct pctrie_node *child; 880 int slot; 881 882 parentp = pctrie_child(ptree, node, index); 883 if (node == NULL) { 884 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 885 return (false); 886 } 887 slot = pctrie_slot(node, index); 888 KASSERT((node->pn_popmap & (1 << slot)) != 0, 889 ("%s: bad popmap slot %d in node %p", 890 __func__, slot, node)); 891 node->pn_popmap ^= 1 << slot; 892 if (!powerof2(node->pn_popmap)) { 893 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 894 return (false); 895 } 896 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 897 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 898 slot = ffs(node->pn_popmap) - 1; 899 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 900 KASSERT(child != PCTRIE_NULL, 901 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 902 node = pctrie_parent(node); 903 if (!pctrie_isleaf(child)) 904 pctrie_setparent(child, node); 905 parentp = pctrie_child(ptree, node, index); 906 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 907 return (true); 908 } 909 910 /* 911 * Remove the specified index from the tree, and return the value stored at 912 * that index. If the index is not present, return NULL. 913 */ 914 uint64_t * 915 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 916 struct pctrie_node **freenode) 917 { 918 struct pctrie_node *node, *parent; 919 uint64_t *m; 920 921 node = _pctrie_lookup_node(ptree, NULL, index, &parent, NULL, 922 PCTRIE_LOCKED); 923 m = pctrie_match_value(node, index); 924 if (m != NULL && pctrie_remove(ptree, parent, index)) 925 *freenode = parent; 926 else 927 *freenode = NULL; 928 return (m); 929 } 930 931 /* 932 * Remove from the trie the leaf last chosen by the iterator, and 933 * adjust the path if it's last member is to be freed. 934 */ 935 void 936 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) 937 { 938 KASSERT(NULL != pctrie_match_value(pctrie_node_load(pctrie_child( 939 it->ptree, it->node, it->index), NULL, PCTRIE_LOCKED), it->index), 940 ("%s: removing value %jx not at iter", __func__, 941 (uintmax_t)it->index)); 942 if (pctrie_remove(it->ptree, it->node, it->index)) { 943 *freenode = it->node; 944 it->node = pctrie_parent(it->node); 945 } else 946 *freenode = NULL; 947 } 948 949 /* 950 * Return the current leaf, assuming access is externally synchronized by a 951 * lock. 952 */ 953 uint64_t * 954 pctrie_iter_value(struct pctrie_iter *it) 955 { 956 struct pctrie_node *node; 957 958 node = pctrie_node_load(pctrie_child(it->ptree, it->node, it->index), 959 NULL, PCTRIE_LOCKED); 960 return (pctrie_toval(node)); 961 } 962 963 /* 964 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves, 965 * until an interior node is stripped of all children, and returned for 966 * deallocation, with *pnode left pointing to the parent of that node. 967 */ 968 static __always_inline struct pctrie_node * 969 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 970 pctrie_cb_t callback, int keyoff, void *arg) 971 { 972 struct pctrie_node *child, *node; 973 int slot; 974 975 node = *pnode; 976 while (node->pn_popmap != 0) { 977 slot = ffs(node->pn_popmap) - 1; 978 node->pn_popmap ^= 1 << slot; 979 child = pctrie_node_load(&node->pn_child[slot], NULL, 980 PCTRIE_UNSERIALIZED); 981 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 982 PCTRIE_UNSERIALIZED); 983 if (pctrie_isleaf(child)) { 984 if (callback != NULL) 985 callback(pctrie_toptr(child, keyoff), arg); 986 continue; 987 } 988 /* Climb one level down the trie. */ 989 parent = node; 990 node = child; 991 } 992 *pnode = parent; 993 return (node); 994 } 995 996 /* 997 * Recover the node parent from its first child and continue pruning. 998 */ 999 static __always_inline struct pctrie_node * 1000 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 1001 pctrie_cb_t callback, int keyoff, void *arg) 1002 { 1003 if (*pnode == NULL) 1004 return (NULL); 1005 /* Climb one level up the trie. */ 1006 return (pctrie_reclaim_prune(pnode, pctrie_parent(*pnode), callback, 1007 keyoff, arg)); 1008 } 1009 1010 /* 1011 * Find the trie root, and start pruning with a NULL parent. 1012 */ 1013 static __always_inline struct pctrie_node * 1014 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 1015 struct pctrie *ptree, 1016 pctrie_cb_t callback, int keyoff, void *arg) 1017 { 1018 struct pctrie_node *node; 1019 1020 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 1021 pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1022 if (pctrie_isleaf(node)) { 1023 if (callback != NULL && node != PCTRIE_NULL) 1024 callback(pctrie_toptr(node, keyoff), arg); 1025 return (NULL); 1026 } 1027 *pnode = node; 1028 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 1029 } 1030 1031 struct pctrie_node * 1032 pctrie_reclaim_resume(struct pctrie_node **pnode) 1033 { 1034 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 1035 } 1036 1037 struct pctrie_node * 1038 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 1039 { 1040 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 1041 } 1042 1043 struct pctrie_node * 1044 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 1045 pctrie_cb_t callback, int keyoff, void *arg) 1046 { 1047 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 1048 } 1049 1050 struct pctrie_node * 1051 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 1052 pctrie_cb_t callback, int keyoff, void *arg) 1053 { 1054 return (pctrie_reclaim_begin_compound(pnode, ptree, 1055 callback, keyoff, arg)); 1056 } 1057 1058 /* 1059 * Replace an existing value in the trie with another one. 1060 * Panics if there is not an old value in the trie at the new value's index. 1061 */ 1062 uint64_t * 1063 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 1064 { 1065 struct pctrie_node *node, *parent; 1066 uint64_t *m; 1067 1068 node = _pctrie_lookup_node(ptree, NULL, *newval, &parent, NULL, 1069 PCTRIE_LOCKED); 1070 m = pctrie_match_value(node, *newval); 1071 if (m == NULL) 1072 panic("%s: original replacing value not found", __func__); 1073 pctrie_node_store(pctrie_child(ptree, parent, *newval), 1074 pctrie_toleaf(newval), PCTRIE_LOCKED); 1075 return (m); 1076 } 1077 1078 #ifdef DDB 1079 /* 1080 * Show details about the given node. 1081 */ 1082 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 1083 { 1084 struct pctrie_node *node, *tmp; 1085 int slot; 1086 pn_popmap_t popmap; 1087 1088 if (!have_addr) 1089 return; 1090 node = (struct pctrie_node *)addr; 1091 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 1092 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 1093 node->pn_clev / PCTRIE_WIDTH); 1094 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 1095 slot = ffs(popmap) - 1; 1096 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 1097 PCTRIE_UNSERIALIZED); 1098 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 1099 slot, (void *)tmp, 1100 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 1101 node->pn_clev / PCTRIE_WIDTH); 1102 } 1103 } 1104 #endif /* DDB */ 1105