1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #if PCTRIE_WIDTH == 3 66 typedef uint8_t pn_popmap_t; 67 #elif PCTRIE_WIDTH == 4 68 typedef uint16_t pn_popmap_t; 69 #elif PCTRIE_WIDTH == 5 70 typedef uint32_t pn_popmap_t; 71 #else 72 #error Unsupported width 73 #endif 74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 75 "pn_popmap_t too wide"); 76 77 struct pctrie_node; 78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 79 80 struct pctrie_node { 81 uint64_t pn_owner; /* Owner of record. */ 82 pn_popmap_t pn_popmap; /* Valid children. */ 83 uint8_t pn_clev; /* Level * WIDTH. */ 84 smr_pctnode_t pn_parent; /* Parent node. */ 85 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 86 }; 87 88 /* 89 * Map index to an array position for the children of node, 90 */ 91 static __inline int 92 pctrie_slot(struct pctrie_node *node, uint64_t index) 93 { 94 return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); 95 } 96 97 /* 98 * Returns true if index does not belong to the specified node. Otherwise, 99 * sets slot value, and returns false. 100 */ 101 static __inline bool 102 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 103 { 104 index = (index - node->pn_owner) >> node->pn_clev; 105 if (index >= PCTRIE_COUNT) 106 return (true); 107 *slot = index; 108 return (false); 109 } 110 111 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 112 113 /* 114 * Fetch a node pointer from a slot. 115 */ 116 static __inline struct pctrie_node * 117 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 118 { 119 switch (access) { 120 case PCTRIE_UNSERIALIZED: 121 return (smr_unserialized_load(p, true)); 122 case PCTRIE_LOCKED: 123 return (smr_serialized_load(p, true)); 124 case PCTRIE_SMR: 125 return (smr_entered_load(p, smr)); 126 } 127 __assert_unreachable(); 128 } 129 130 static __inline void 131 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 132 { 133 switch (access) { 134 case PCTRIE_UNSERIALIZED: 135 smr_unserialized_store(p, v, true); 136 break; 137 case PCTRIE_LOCKED: 138 smr_serialized_store(p, v, true); 139 break; 140 case PCTRIE_SMR: 141 panic("%s: Not supported in SMR section.", __func__); 142 break; 143 default: 144 __assert_unreachable(); 145 break; 146 } 147 } 148 149 /* 150 * Get the root address, cast to proper type for load/store. 151 */ 152 static __inline smr_pctnode_t * 153 pctrie_root(struct pctrie *ptree) 154 { 155 return ((smr_pctnode_t *)&ptree->pt_root); 156 } 157 158 /* 159 * Get the root node for a tree. 160 */ 161 static __inline struct pctrie_node * 162 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 163 { 164 return (pctrie_node_load(pctrie_root(ptree), smr, access)); 165 } 166 167 /* 168 * Get the child of a node. 169 */ 170 static __inline smr_pctnode_t * 171 pctrie_child(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 172 { 173 return (node == NULL ? pctrie_root(ptree) : 174 &node->pn_child[pctrie_slot(node, index)]); 175 } 176 177 /* 178 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 179 */ 180 static __inline bool 181 pctrie_isleaf(struct pctrie_node *node) 182 { 183 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184 } 185 186 /* 187 * Returns val with leaf bit set. 188 */ 189 static __inline void * 190 pctrie_toleaf(uint64_t *val) 191 { 192 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 193 } 194 195 /* 196 * Returns the associated val extracted from node. 197 */ 198 static __inline uint64_t * 199 pctrie_toval(struct pctrie_node *node) 200 { 201 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 202 } 203 204 /* 205 * Returns the associated pointer extracted from node and field offset. 206 */ 207 static __inline void * 208 pctrie_toptr(struct pctrie_node *node, int keyoff) 209 { 210 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 211 } 212 213 /* 214 * Make 'parent' a parent of 'child'. 215 */ 216 static __inline void 217 pctrie_setparent(struct pctrie_node *child, struct pctrie_node *parent) 218 { 219 pctrie_node_store(&child->pn_parent, parent, PCTRIE_UNSERIALIZED); 220 } 221 222 /* 223 * Return the parent of 'node'. 224 */ 225 static __inline struct pctrie_node * 226 pctrie_parent(struct pctrie_node *node) 227 { 228 return (pctrie_node_load(&node->pn_parent, NULL, PCTRIE_UNSERIALIZED)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 /* 271 * Look for where to insert the key-value pair into the trie. Complete the 272 * insertion if it replaces a null leaf. Return the insertion location if the 273 * insertion needs to be completed by the caller; otherwise return NULL. 274 * 275 * If the key is already present in the trie, populate *found_out as if by 276 * pctrie_lookup(). 277 */ 278 static __always_inline void * 279 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val, 280 struct pctrie_node **parent_out, uint64_t **found_out) 281 { 282 uint64_t index; 283 struct pctrie_node *node, *parent; 284 int slot; 285 286 index = *val; 287 288 /* 289 * The owner of record for root is not really important because it 290 * will never be used. 291 */ 292 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 293 parent = NULL; 294 for (;;) { 295 if (pctrie_isleaf(node)) { 296 if (node == PCTRIE_NULL) { 297 if (parent == NULL) 298 pctrie_node_store(pctrie_root(ptree), 299 pctrie_toleaf(val), PCTRIE_LOCKED); 300 else 301 pctrie_addnode(parent, index, 302 pctrie_toleaf(val), PCTRIE_LOCKED); 303 *parent_out = parent; 304 return (NULL); 305 } 306 if (*pctrie_toval(node) == index) { 307 *found_out = pctrie_toval(node); 308 *parent_out = parent; 309 return (NULL); 310 } 311 break; 312 } 313 if (pctrie_keybarr(node, index, &slot)) 314 break; 315 parent = node; 316 node = pctrie_node_load(&node->pn_child[slot], NULL, 317 PCTRIE_LOCKED); 318 } 319 320 /* 321 * 'node' must be replaced in the tree with a new branch node, with 322 * children 'node' and 'val'. Return the place that points to 'node' 323 * now, and will point to to the new branching node later. 324 */ 325 *parent_out = parent; 326 return ((parent == NULL) ? pctrie_root(ptree): &parent->pn_child[slot]); 327 } 328 329 /* 330 * Wrap pctrie_insert_lookup_compound to implement a strict insertion. Panic 331 * if the key already exists, and do not look for neighboring entries. 332 */ 333 void * 334 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val, 335 struct pctrie_node **parent_out) 336 { 337 void *parentp; 338 uint64_t *found; 339 340 found = NULL; 341 parentp = pctrie_insert_lookup_compound(ptree, val, parent_out, 342 &found); 343 if (__predict_false(found != NULL)) 344 panic("%s: key %jx is already present", __func__, 345 (uintmax_t)*val); 346 return (parentp); 347 } 348 349 /* 350 * Wrap pctrie_insert_lookup_compound to implement find-or-insert. Do not look 351 * for neighboring entries. 352 */ 353 void * 354 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 355 struct pctrie_node **parent_out, uint64_t **found_out) 356 { 357 *found_out = NULL; 358 return (pctrie_insert_lookup_compound(ptree, val, parent_out, 359 found_out)); 360 } 361 362 /* 363 * Inserts newly allocated node 'child' into trie at location 'parentp', with 364 * parent 'parent' and two children, 'val' and whatever non-NULL node or leaf 365 * was at 'parentp' to begin with. 366 */ 367 void 368 pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, void *parentp, 369 struct pctrie_node *child) 370 { 371 struct pctrie_node *node; 372 uint64_t index, newind; 373 374 /* 375 * Clear the last child pointer of the newly allocated child. We want 376 * to clear it after the final section has exited so lookup can not 377 * return false negatives. It is done here because it will be 378 * cache-cold in the dtor callback. 379 */ 380 if (child->pn_popmap != 0) { 381 pctrie_node_store(&child->pn_child[ffs(child->pn_popmap) - 1], 382 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 383 child->pn_popmap = 0; 384 } 385 386 /* 387 * Recover the values of the two children of the new child node. If 388 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 389 * which must be first in the node. 390 */ 391 index = *val; 392 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 393 pctrie_setparent(child, parent); 394 if (!pctrie_isleaf(node)) 395 pctrie_setparent(node, child); 396 newind = *pctrie_toval(node); 397 398 /* 399 * From the highest-order bit where the indexes differ, 400 * compute the highest level in the trie where they differ. Then, 401 * compute the least index of this subtrie. 402 */ 403 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 404 "uint64 too wide"); 405 _Static_assert(sizeof(uint64_t) * NBBY <= 406 (1 << (sizeof(child->pn_clev) * NBBY)), "pn_clev too narrow"); 407 child->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 408 child->pn_owner = PCTRIE_COUNT; 409 child->pn_owner = index & -(child->pn_owner << child->pn_clev); 410 411 412 /* These writes are not yet visible due to ordering. */ 413 pctrie_addnode(child, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 414 pctrie_addnode(child, newind, node, PCTRIE_UNSERIALIZED); 415 /* Synchronize to make the above visible. */ 416 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 417 } 418 419 /* 420 * Return the value associated with the node, if the node is a leaf that matches 421 * the index; otherwise NULL. 422 */ 423 static __always_inline uint64_t * 424 pctrie_match_value(struct pctrie_node *node, uint64_t index) 425 { 426 uint64_t *m; 427 428 if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || 429 *m != index) 430 m = NULL; 431 return (m); 432 } 433 434 /* 435 * Returns the value stored at the index. If the index is not present, 436 * NULL is returned. 437 */ 438 static __always_inline uint64_t * 439 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 440 enum pctrie_access access) 441 { 442 struct pctrie_node *node; 443 int slot; 444 445 node = pctrie_root_load(ptree, smr, access); 446 /* Seek a node that matches index. */ 447 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) 448 node = pctrie_node_load(&node->pn_child[slot], smr, access); 449 return (pctrie_match_value(node, index)); 450 } 451 452 /* 453 * Returns the value stored at the index, assuming access is externally 454 * synchronized by a lock. 455 * 456 * If the index is not present, NULL is returned. 457 */ 458 uint64_t * 459 pctrie_lookup(struct pctrie *ptree, uint64_t index) 460 { 461 return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 462 } 463 464 /* 465 * Returns the value stored at the index without requiring an external lock. 466 * 467 * If the index is not present, NULL is returned. 468 */ 469 uint64_t * 470 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 471 { 472 uint64_t *res; 473 474 smr_enter(smr); 475 res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 476 smr_exit(smr); 477 return (res); 478 } 479 480 /* 481 * Returns the last node examined in the search for the index, and sets the 482 * parent of that node. 483 */ 484 static __always_inline struct pctrie_node * 485 _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, 486 uint64_t index, struct pctrie_node **parent_out, 487 smr_t smr, enum pctrie_access access) 488 { 489 struct pctrie_node *parent; 490 int slot; 491 492 parent = node; 493 if (parent == NULL) 494 node = pctrie_root_load(ptree, smr, access); 495 496 /* 497 * Climb the search path to find the lowest node from which to start the 498 * search for a value matching 'index'. 499 */ 500 while (parent != NULL) { 501 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 502 ("%s: freed node in iter path", __func__)); 503 node = parent; 504 if (!pctrie_keybarr(node, index, &slot)) 505 break; 506 parent = pctrie_parent(node); 507 } 508 509 /* Seek a node that matches index. */ 510 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { 511 parent = node; 512 KASSERT(access == PCTRIE_SMR || !powerof2(parent->pn_popmap), 513 ("%s: freed node in iter path", __func__)); 514 node = pctrie_node_load(&node->pn_child[slot], smr, access); 515 } 516 *parent_out = parent; 517 return (node); 518 } 519 520 /* 521 * Returns the value stored at a given index value, possibly NULL, assuming 522 * access is externally synchronized by a lock. 523 */ 524 uint64_t * 525 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) 526 { 527 struct pctrie_node *node; 528 529 node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, 530 NULL, PCTRIE_LOCKED); 531 it->index = index; 532 return (pctrie_match_value(node, index)); 533 } 534 535 /* 536 * Insert the val in the trie, starting search with iterator. Return a pointer 537 * to indicate where a new node must be allocated to complete insertion. 538 * Assumes access is externally synchronized by a lock. 539 */ 540 void * 541 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) 542 { 543 struct pctrie_node *node; 544 545 node = _pctrie_lookup_node(it->ptree, it->node, *val, &it->node, 546 NULL, PCTRIE_LOCKED); 547 it->index = *val; 548 if (node == PCTRIE_NULL) { 549 if (it->node == NULL) 550 pctrie_node_store(pctrie_root(it->ptree), 551 pctrie_toleaf(val), PCTRIE_LOCKED); 552 else 553 pctrie_addnode(it->node, it->index, 554 pctrie_toleaf(val), PCTRIE_LOCKED); 555 return (NULL); 556 } 557 if (__predict_false(pctrie_match_value(node, it->index) != NULL)) 558 panic("%s: key %jx is already present", __func__, 559 (uintmax_t)it->index); 560 561 /* 562 * 'node' must be replaced in the tree with a new branch node, with 563 * children 'node' and 'val'. Return the place that points to 'node' 564 * now, and will point to to the new branching node later. 565 */ 566 return (pctrie_child(it->ptree, it->node, it->index)); 567 } 568 569 /* 570 * Returns the value stored at a fixed offset from the current index value, 571 * possibly NULL. 572 */ 573 uint64_t * 574 pctrie_iter_stride(struct pctrie_iter *it, int stride) 575 { 576 uint64_t index = it->index + stride; 577 578 /* Detect stride overflow. */ 579 if ((stride > 0) != (index > it->index)) 580 return (NULL); 581 /* Detect crossing limit */ 582 if ((index < it->limit) != (it->index < it->limit)) 583 return (NULL); 584 585 return (pctrie_iter_lookup(it, index)); 586 } 587 588 /* 589 * Returns the value stored at one more than the current index value, possibly 590 * NULL, assuming access is externally synchronized by a lock. 591 */ 592 uint64_t * 593 pctrie_iter_next(struct pctrie_iter *it) 594 { 595 return (pctrie_iter_stride(it, 1)); 596 } 597 598 /* 599 * Returns the value stored at one less than the current index value, possibly 600 * NULL, assuming access is externally synchronized by a lock. 601 */ 602 uint64_t * 603 pctrie_iter_prev(struct pctrie_iter *it) 604 { 605 return (pctrie_iter_stride(it, -1)); 606 } 607 608 /* 609 * Returns the number of contiguous, non-NULL entries read into the value[] 610 * array, starting at index. 611 */ 612 static __always_inline int 613 _pctrie_lookup_range(struct pctrie *ptree, struct pctrie_node *node, 614 uint64_t index, uint64_t *value[], int count, 615 struct pctrie_node **parent_out, smr_t smr, enum pctrie_access access) 616 { 617 struct pctrie_node *parent; 618 uint64_t *val; 619 int base, end, i; 620 621 parent = node; 622 for (i = 0; i < count;) { 623 node = _pctrie_lookup_node(ptree, parent, index + i, &parent, 624 smr, access); 625 if ((val = pctrie_match_value(node, index + i)) == NULL) 626 break; 627 value[i++] = val; 628 base = (index + i) % PCTRIE_COUNT; 629 if (base == 0 || parent == NULL || parent->pn_clev != 0) 630 continue; 631 632 /* 633 * For PCTRIE_SMR, compute an upper bound on the number of 634 * children of this parent left to examine. For PCTRIE_LOCKED, 635 * compute the number of non-NULL children from base up to the 636 * first NULL child, if any, using the fact that pn_popmap has 637 * bits set for only the non-NULL children. 638 * 639 * The pn_popmap field is accessed only when a lock is held. 640 * To use it for PCTRIE_SMR here would require that we know that 641 * race conditions cannot occur if the tree is modified while 642 * accessed here. Guarantees about the visibility of changes to 643 * child pointers, enforced by memory barriers on the writing of 644 * pointers, are not present for the pn_popmap field, so that 645 * the popmap bit for a child page may, for an instant, 646 * misrepresent the nullness of the child page because an 647 * operation modifying the pctrie is in progress. 648 */ 649 end = (access == PCTRIE_SMR) ? PCTRIE_COUNT - base : 650 ffs((parent->pn_popmap >> base) + 1) - 1; 651 end = MIN(count, i + end); 652 while (i < end) { 653 node = pctrie_node_load(&parent->pn_child[base++], 654 smr, access); 655 val = pctrie_toval(node); 656 if (access == PCTRIE_SMR && val == NULL) 657 break; 658 value[i++] = val; 659 KASSERT(val != NULL, 660 ("%s: null child written to range", __func__)); 661 } 662 if (access == PCTRIE_SMR) { 663 if (i < end) 664 break; 665 } else { 666 if (base < PCTRIE_COUNT) 667 break; 668 } 669 } 670 if (parent_out != NULL) 671 *parent_out = parent; 672 return (i); 673 } 674 675 /* 676 * Returns the number of contiguous, non-NULL entries read into the value[] 677 * array, starting at index, assuming access is externally synchronized by a 678 * lock. 679 */ 680 int 681 pctrie_lookup_range(struct pctrie *ptree, uint64_t index, 682 uint64_t *value[], int count) 683 { 684 return (_pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 685 NULL, PCTRIE_LOCKED)); 686 } 687 688 /* 689 * Returns the number of contiguous, non-NULL entries read into the value[] 690 * array, starting at index, without requiring an external lock. These entries 691 * *may* never have been in the pctrie all at one time, but for a series of 692 * times t0, t1, t2, ..., with ti <= t(i+1), value[i] was in the trie at time 693 * ti. 694 */ 695 int 696 pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, 697 uint64_t *value[], int count, smr_t smr) 698 { 699 int res; 700 701 smr_enter(smr); 702 res = _pctrie_lookup_range(ptree, NULL, index, value, count, NULL, 703 smr, PCTRIE_SMR); 704 smr_exit(smr); 705 return (res); 706 } 707 708 /* 709 * Returns the number of contiguous, non-NULL entries read into the value[] 710 * array, starting at index, assuming access is externally synchronized by a 711 * lock. Uses an iterator. 712 */ 713 int 714 pctrie_iter_lookup_range(struct pctrie_iter *it, uint64_t index, 715 uint64_t *value[], int count) 716 { 717 return (_pctrie_lookup_range(it->ptree, it->node, index, value, count, 718 &it->node, NULL, PCTRIE_LOCKED)); 719 } 720 721 /* 722 * Find first leaf >= index, and fill iter with the path to the parent of that 723 * leaf. Return NULL if there is no such leaf less than limit. 724 */ 725 static __inline uint64_t * 726 _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, 727 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 728 { 729 struct pctrie_node *parent; 730 uint64_t *m; 731 int slot; 732 733 /* Seek a node that matches index. */ 734 node = _pctrie_lookup_node(ptree, node, index, &parent, 735 NULL, PCTRIE_LOCKED); 736 737 /* 738 * If no such node was found, and instead this path leads only to nodes 739 * < index, back up to find a subtrie with the least value > index. 740 */ 741 if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { 742 /* Climb the path to find a node with a descendant > index. */ 743 for (node = parent; node != NULL; node = pctrie_parent(node)) { 744 slot = pctrie_slot(node, index) + 1; 745 if ((node->pn_popmap >> slot) != 0) 746 break; 747 } 748 if (node == NULL) { 749 if (parent_out != NULL) 750 *parent_out = NULL; 751 return (NULL); 752 } 753 754 /* Step to the least child with a descendant > index. */ 755 slot += ffs(node->pn_popmap >> slot) - 1; 756 parent = node; 757 node = pctrie_node_load(&node->pn_child[slot], NULL, 758 PCTRIE_LOCKED); 759 } 760 /* Descend to the least leaf of the subtrie. */ 761 while (!pctrie_isleaf(node)) { 762 if (limit != 0 && node->pn_owner >= limit) 763 return (NULL); 764 slot = ffs(node->pn_popmap) - 1; 765 parent = node; 766 node = pctrie_node_load(&node->pn_child[slot], NULL, 767 PCTRIE_LOCKED); 768 } 769 if (parent_out != NULL) 770 *parent_out = parent; 771 m = pctrie_toval(node); 772 if (limit != 0 && *m >= limit) 773 return (NULL); 774 return (m); 775 } 776 777 uint64_t * 778 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 779 { 780 return (_pctrie_lookup_ge(ptree, NULL, index, NULL, 0)); 781 } 782 783 /* 784 * Find first leaf >= index, and fill iter with the path to the parent of that 785 * leaf. Return NULL if there is no such leaf less than limit. 786 */ 787 uint64_t * 788 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) 789 { 790 uint64_t *m; 791 792 m = _pctrie_lookup_ge(it->ptree, it->node, index, &it->node, it->limit); 793 if (m != NULL) 794 it->index = *m; 795 return (m); 796 } 797 798 /* 799 * Find the first leaf with value at least 'jump' greater than the previous 800 * leaf. Return NULL if that value is >= limit. 801 */ 802 uint64_t * 803 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) 804 { 805 uint64_t index = it->index + jump; 806 807 /* Detect jump overflow. */ 808 if ((jump > 0) != (index > it->index)) 809 return (NULL); 810 if (it->limit != 0 && index >= it->limit) 811 return (NULL); 812 return (pctrie_iter_lookup_ge(it, index)); 813 } 814 815 /* 816 * Find first leaf <= index, and fill iter with the path to the parent of that 817 * leaf. Return NULL if there is no such leaf greater than limit. 818 */ 819 static __inline uint64_t * 820 _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, 821 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 822 { 823 struct pctrie_node *parent; 824 uint64_t *m; 825 int slot; 826 827 /* Seek a node that matches index. */ 828 node = _pctrie_lookup_node(ptree, node, index, &parent, NULL, 829 PCTRIE_LOCKED); 830 831 /* 832 * If no such node was found, and instead this path leads only to nodes 833 * > index, back up to find a subtrie with the greatest value < index. 834 */ 835 if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { 836 /* Climb the path to find a node with a descendant < index. */ 837 for (node = parent; node != NULL; node = pctrie_parent(node)) { 838 slot = pctrie_slot(node, index); 839 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 840 break; 841 } 842 if (node == NULL) { 843 if (parent_out != NULL) 844 *parent_out = NULL; 845 return (NULL); 846 } 847 848 /* Step to the greatest child with a descendant < index. */ 849 slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); 850 parent = node; 851 node = pctrie_node_load(&node->pn_child[slot], NULL, 852 PCTRIE_LOCKED); 853 } 854 /* Descend to the greatest leaf of the subtrie. */ 855 while (!pctrie_isleaf(node)) { 856 if (limit != 0 && limit >= node->pn_owner + 857 ((uint64_t)PCTRIE_COUNT << node->pn_clev) - 1) 858 return (NULL); 859 slot = ilog2(node->pn_popmap); 860 parent = node; 861 node = pctrie_node_load(&node->pn_child[slot], NULL, 862 PCTRIE_LOCKED); 863 } 864 if (parent_out != NULL) 865 *parent_out = parent; 866 m = pctrie_toval(node); 867 if (limit != 0 && *m <= limit) 868 return (NULL); 869 return (m); 870 } 871 872 uint64_t * 873 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 874 { 875 return (_pctrie_lookup_le(ptree, NULL, index, NULL, 0)); 876 } 877 878 uint64_t * 879 pctrie_subtree_lookup_lt(struct pctrie *ptree, struct pctrie_node *node, 880 uint64_t index) 881 { 882 if (index == 0) 883 return (NULL); 884 return (_pctrie_lookup_le(ptree, node, index - 1, NULL, 0)); 885 } 886 887 /* 888 * Find first leaf <= index, and fill iter with the path to the parent of that 889 * leaf. Return NULL if there is no such leaf greater than limit. 890 */ 891 uint64_t * 892 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) 893 { 894 uint64_t *m; 895 896 m = _pctrie_lookup_le(it->ptree, it->node, index, &it->node, it->limit); 897 if (m != NULL) 898 it->index = *m; 899 return (m); 900 } 901 902 /* 903 * Find the first leaf with value at most 'jump' less than the previous 904 * leaf. Return NULL if that value is <= limit. 905 */ 906 uint64_t * 907 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) 908 { 909 uint64_t index = it->index - jump; 910 911 /* Detect jump overflow. */ 912 if ((jump > 0) != (index < it->index)) 913 return (NULL); 914 if (it->limit != 0 && index <= it->limit) 915 return (NULL); 916 return (pctrie_iter_lookup_le(it, index)); 917 } 918 919 /* 920 * Remove the non-NULL child identified by 'index' from the set of children of 921 * 'node'. If doing so causes 'node' to have only one child, purge it from the 922 * pctrie and save it in *freenode for later disposal. 923 */ 924 static void 925 pctrie_remove(struct pctrie *ptree, struct pctrie_node *node, uint64_t index, 926 struct pctrie_node **freenode) 927 { 928 smr_pctnode_t *parentp; 929 struct pctrie_node *child; 930 int slot; 931 932 *freenode = NULL; 933 parentp = pctrie_child(ptree, node, index); 934 if (node == NULL) { 935 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 936 return; 937 } 938 slot = pctrie_slot(node, index); 939 KASSERT((node->pn_popmap & (1 << slot)) != 0, 940 ("%s: bad popmap slot %d in node %p", 941 __func__, slot, node)); 942 node->pn_popmap ^= 1 << slot; 943 if (!powerof2(node->pn_popmap)) { 944 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 945 return; 946 } 947 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 948 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 949 slot = ffs(node->pn_popmap) - 1; 950 *freenode = node; 951 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 952 KASSERT(child != PCTRIE_NULL, 953 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 954 node = pctrie_parent(node); 955 if (!pctrie_isleaf(child)) 956 pctrie_setparent(child, node); 957 parentp = pctrie_child(ptree, node, index); 958 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 959 } 960 961 /* 962 * Remove the specified index from the tree, and return the value stored at 963 * that index. If the index is not present, return NULL. 964 */ 965 uint64_t * 966 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 967 struct pctrie_node **freenode) 968 { 969 struct pctrie_node *child, *node; 970 uint64_t *m; 971 int slot; 972 973 node = NULL; 974 child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 975 while (!pctrie_isleaf(child)) { 976 node = child; 977 slot = pctrie_slot(node, index); 978 child = pctrie_node_load(&node->pn_child[slot], NULL, 979 PCTRIE_LOCKED); 980 } 981 if ((m = pctrie_match_value(child, index)) != NULL) 982 pctrie_remove(ptree, node, index, freenode); 983 else 984 *freenode = NULL; 985 return (m); 986 } 987 988 /* 989 * Remove from the trie the leaf last chosen by the iterator, and 990 * adjust the path if it's last member is to be freed. 991 */ 992 void 993 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) 994 { 995 KASSERT(NULL != pctrie_match_value(pctrie_node_load(pctrie_child( 996 it->ptree, it->node, it->index), NULL, PCTRIE_LOCKED), it->index), 997 ("%s: removing value %jx not at iter", __func__, 998 (uintmax_t)it->index)); 999 pctrie_remove(it->ptree, it->node, it->index, freenode); 1000 if (*freenode != NULL) 1001 it->node = pctrie_parent(it->node); 1002 } 1003 1004 /* 1005 * Return the current leaf, assuming access is externally synchronized by a 1006 * lock. 1007 */ 1008 uint64_t * 1009 pctrie_iter_value(struct pctrie_iter *it) 1010 { 1011 struct pctrie_node *node; 1012 1013 node = pctrie_node_load(pctrie_child(it->ptree, it->node, it->index), 1014 NULL, PCTRIE_LOCKED); 1015 return (pctrie_toval(node)); 1016 } 1017 1018 /* 1019 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves, 1020 * until an interior node is stripped of all children, and returned for 1021 * deallocation, with *pnode left pointing to the parent of that node. 1022 */ 1023 static __always_inline struct pctrie_node * 1024 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 1025 pctrie_cb_t callback, int keyoff, void *arg) 1026 { 1027 struct pctrie_node *child, *node; 1028 int slot; 1029 1030 node = *pnode; 1031 while (node->pn_popmap != 0) { 1032 slot = ffs(node->pn_popmap) - 1; 1033 node->pn_popmap ^= 1 << slot; 1034 child = pctrie_node_load(&node->pn_child[slot], NULL, 1035 PCTRIE_UNSERIALIZED); 1036 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 1037 PCTRIE_UNSERIALIZED); 1038 if (pctrie_isleaf(child)) { 1039 if (callback != NULL) 1040 callback(pctrie_toptr(child, keyoff), arg); 1041 continue; 1042 } 1043 /* Climb one level down the trie. */ 1044 parent = node; 1045 node = child; 1046 } 1047 *pnode = parent; 1048 return (node); 1049 } 1050 1051 /* 1052 * Recover the node parent from its first child and continue pruning. 1053 */ 1054 static __always_inline struct pctrie_node * 1055 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 1056 pctrie_cb_t callback, int keyoff, void *arg) 1057 { 1058 if (*pnode == NULL) 1059 return (NULL); 1060 /* Climb one level up the trie. */ 1061 return (pctrie_reclaim_prune(pnode, pctrie_parent(*pnode), callback, 1062 keyoff, arg)); 1063 } 1064 1065 /* 1066 * Find the trie root, and start pruning with a NULL parent. 1067 */ 1068 static __always_inline struct pctrie_node * 1069 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 1070 struct pctrie *ptree, 1071 pctrie_cb_t callback, int keyoff, void *arg) 1072 { 1073 struct pctrie_node *node; 1074 1075 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 1076 pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1077 if (pctrie_isleaf(node)) { 1078 if (callback != NULL && node != PCTRIE_NULL) 1079 callback(pctrie_toptr(node, keyoff), arg); 1080 return (NULL); 1081 } 1082 *pnode = node; 1083 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 1084 } 1085 1086 struct pctrie_node * 1087 pctrie_reclaim_resume(struct pctrie_node **pnode) 1088 { 1089 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 1090 } 1091 1092 struct pctrie_node * 1093 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 1094 { 1095 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 1096 } 1097 1098 struct pctrie_node * 1099 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 1100 pctrie_cb_t callback, int keyoff, void *arg) 1101 { 1102 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 1103 } 1104 1105 struct pctrie_node * 1106 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 1107 pctrie_cb_t callback, int keyoff, void *arg) 1108 { 1109 return (pctrie_reclaim_begin_compound(pnode, ptree, 1110 callback, keyoff, arg)); 1111 } 1112 1113 /* 1114 * Replace an existing value in the trie with another one. 1115 * Panics if there is not an old value in the trie at the new value's index. 1116 */ 1117 uint64_t * 1118 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 1119 { 1120 struct pctrie_node *leaf, *parent, *node; 1121 uint64_t *m; 1122 uint64_t index; 1123 int slot; 1124 1125 leaf = pctrie_toleaf(newval); 1126 index = *newval; 1127 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 1128 parent = NULL; 1129 for (;;) { 1130 if (pctrie_isleaf(node)) { 1131 if ((m = pctrie_toval(node)) != NULL && *m == index) { 1132 if (parent == NULL) 1133 pctrie_node_store(pctrie_root(ptree), 1134 leaf, PCTRIE_LOCKED); 1135 else 1136 pctrie_node_store( 1137 &parent->pn_child[slot], leaf, 1138 PCTRIE_LOCKED); 1139 return (m); 1140 } 1141 break; 1142 } 1143 if (pctrie_keybarr(node, index, &slot)) 1144 break; 1145 parent = node; 1146 node = pctrie_node_load(&node->pn_child[slot], NULL, 1147 PCTRIE_LOCKED); 1148 } 1149 panic("%s: original replacing value not found", __func__); 1150 } 1151 1152 #ifdef DDB 1153 /* 1154 * Show details about the given node. 1155 */ 1156 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 1157 { 1158 struct pctrie_node *node, *tmp; 1159 int slot; 1160 pn_popmap_t popmap; 1161 1162 if (!have_addr) 1163 return; 1164 node = (struct pctrie_node *)addr; 1165 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 1166 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 1167 node->pn_clev / PCTRIE_WIDTH); 1168 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 1169 slot = ffs(popmap) - 1; 1170 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 1171 PCTRIE_UNSERIALIZED); 1172 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 1173 slot, (void *)tmp, 1174 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 1175 node->pn_clev / PCTRIE_WIDTH); 1176 } 1177 } 1178 #endif /* DDB */ 1179