1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * 35 * The implementation takes into account the following rationale: 36 * - Size of the nodes should be as small as possible but still big enough 37 * to avoid a large maximum depth for the trie. This is a balance 38 * between the necessity to not wire too much physical memory for the nodes 39 * and the necessity to avoid too much cache pollution during the trie 40 * operations. 41 * - There is not a huge bias toward the number of lookup operations over 42 * the number of insert and remove operations. This basically implies 43 * that optimizations supposedly helping one operation but hurting the 44 * other might be carefully evaluated. 45 * - On average not many nodes are expected to be fully populated, hence 46 * level compression may just complicate things. 47 */ 48 49 #include <sys/cdefs.h> 50 #include "opt_ddb.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/pctrie.h> 57 #include <sys/proc.h> /* smr.h depends on struct thread. */ 58 #include <sys/smr.h> 59 #include <sys/smr_types.h> 60 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #if PCTRIE_WIDTH == 3 66 typedef uint8_t pn_popmap_t; 67 #elif PCTRIE_WIDTH == 4 68 typedef uint16_t pn_popmap_t; 69 #elif PCTRIE_WIDTH == 5 70 typedef uint32_t pn_popmap_t; 71 #else 72 #error Unsupported width 73 #endif 74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 75 "pn_popmap_t too wide"); 76 77 struct pctrie_node; 78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 79 80 struct pctrie_node { 81 uint64_t pn_owner; /* Owner of record. */ 82 pn_popmap_t pn_popmap; /* Valid children. */ 83 uint8_t pn_clev; /* Level * WIDTH. */ 84 smr_pctnode_t pn_parent; /* Parent node. */ 85 smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 86 }; 87 88 /* 89 * Map index to an array position for the children of node, 90 */ 91 static __inline int 92 pctrie_slot(struct pctrie_node *node, uint64_t index) 93 { 94 return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); 95 } 96 97 /* 98 * Returns true if index does not belong to the specified node. Otherwise, 99 * sets slot value, and returns false. 100 */ 101 static __inline bool 102 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 103 { 104 index = (index - node->pn_owner) >> node->pn_clev; 105 if (index >= PCTRIE_COUNT) 106 return (true); 107 *slot = index; 108 return (false); 109 } 110 111 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 112 113 /* 114 * Fetch a node pointer from a slot. 115 */ 116 static __inline struct pctrie_node * 117 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 118 { 119 switch (access) { 120 case PCTRIE_UNSERIALIZED: 121 return (smr_unserialized_load(p, true)); 122 case PCTRIE_LOCKED: 123 return (smr_serialized_load(p, true)); 124 case PCTRIE_SMR: 125 return (smr_entered_load(p, smr)); 126 } 127 __assert_unreachable(); 128 } 129 130 static __inline void 131 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 132 { 133 switch (access) { 134 case PCTRIE_UNSERIALIZED: 135 smr_unserialized_store(p, v, true); 136 break; 137 case PCTRIE_LOCKED: 138 smr_serialized_store(p, v, true); 139 break; 140 case PCTRIE_SMR: 141 panic("%s: Not supported in SMR section.", __func__); 142 break; 143 default: 144 __assert_unreachable(); 145 break; 146 } 147 } 148 149 /* 150 * Get the root address, cast to proper type for load/store. 151 */ 152 static __inline smr_pctnode_t * 153 pctrie_root(struct pctrie *ptree) 154 { 155 return ((smr_pctnode_t *)&ptree->pt_root); 156 } 157 158 /* 159 * Get the root node for a tree. 160 */ 161 static __inline struct pctrie_node * 162 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 163 { 164 return (pctrie_node_load(pctrie_root(ptree), smr, access)); 165 } 166 167 /* 168 * Get the child of a node. 169 */ 170 static __inline smr_pctnode_t * 171 pctrie_child(struct pctrie *ptree, struct pctrie_node *node, uint64_t index) 172 { 173 return (node == NULL ? pctrie_root(ptree) : 174 &node->pn_child[pctrie_slot(node, index)]); 175 } 176 177 /* 178 * Returns TRUE if the specified node is a leaf and FALSE otherwise. 179 */ 180 static __inline bool 181 pctrie_isleaf(struct pctrie_node *node) 182 { 183 return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184 } 185 186 /* 187 * Returns val with leaf bit set. 188 */ 189 static __inline void * 190 pctrie_toleaf(uint64_t *val) 191 { 192 return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 193 } 194 195 /* 196 * Returns the associated val extracted from node. 197 */ 198 static __inline uint64_t * 199 pctrie_toval(struct pctrie_node *node) 200 { 201 return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 202 } 203 204 /* 205 * Returns the associated pointer extracted from node and field offset. 206 */ 207 static __inline void * 208 pctrie_toptr(struct pctrie_node *node, int keyoff) 209 { 210 return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff)); 211 } 212 213 /* 214 * Make 'parent' a parent of 'child'. 215 */ 216 static __inline void 217 pctrie_setparent(struct pctrie_node *child, struct pctrie_node *parent) 218 { 219 pctrie_node_store(&child->pn_parent, parent, PCTRIE_UNSERIALIZED); 220 } 221 222 /* 223 * Return the parent of 'node'. 224 */ 225 static __inline struct pctrie_node * 226 pctrie_parent(struct pctrie_node *node) 227 { 228 return (pctrie_node_load(&node->pn_parent, NULL, PCTRIE_UNSERIALIZED)); 229 } 230 231 /* 232 * Make 'child' a child of 'node'. 233 */ 234 static __inline void 235 pctrie_addnode(struct pctrie_node *node, uint64_t index, 236 struct pctrie_node *child, enum pctrie_access access) 237 { 238 int slot; 239 240 slot = pctrie_slot(node, index); 241 pctrie_node_store(&node->pn_child[slot], child, access); 242 node->pn_popmap ^= 1 << slot; 243 KASSERT((node->pn_popmap & (1 << slot)) != 0, 244 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 245 } 246 247 /* 248 * pctrie node zone initializer. 249 */ 250 int 251 pctrie_zone_init(void *mem, int size __unused, int flags __unused) 252 { 253 struct pctrie_node *node; 254 255 node = mem; 256 node->pn_popmap = 0; 257 for (int i = 0; i < nitems(node->pn_child); i++) 258 pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 259 PCTRIE_UNSERIALIZED); 260 return (0); 261 } 262 263 size_t 264 pctrie_node_size(void) 265 { 266 267 return (sizeof(struct pctrie_node)); 268 } 269 270 /* 271 * Look for where to insert the key-value pair into the trie. Complete the 272 * insertion if it replaces a null leaf. Return the insertion location if the 273 * insertion needs to be completed by the caller; otherwise return NULL. 274 * 275 * If the key is already present in the trie, populate *found_out as if by 276 * pctrie_lookup(). 277 */ 278 static __always_inline void * 279 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val, 280 struct pctrie_node **parent_out, uint64_t **found_out) 281 { 282 uint64_t index; 283 struct pctrie_node *node, *parent; 284 int slot; 285 286 index = *val; 287 288 /* 289 * The owner of record for root is not really important because it 290 * will never be used. 291 */ 292 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 293 parent = NULL; 294 for (;;) { 295 if (pctrie_isleaf(node)) { 296 if (node == PCTRIE_NULL) { 297 if (parent == NULL) 298 pctrie_node_store(pctrie_root(ptree), 299 pctrie_toleaf(val), PCTRIE_LOCKED); 300 else 301 pctrie_addnode(parent, index, 302 pctrie_toleaf(val), PCTRIE_LOCKED); 303 *parent_out = parent; 304 return (NULL); 305 } 306 if (*pctrie_toval(node) == index) { 307 *found_out = pctrie_toval(node); 308 *parent_out = parent; 309 return (NULL); 310 } 311 break; 312 } 313 if (pctrie_keybarr(node, index, &slot)) 314 break; 315 parent = node; 316 node = pctrie_node_load(&node->pn_child[slot], NULL, 317 PCTRIE_LOCKED); 318 } 319 320 /* 321 * 'node' must be replaced in the tree with a new branch node, with 322 * children 'node' and 'val'. Return the place that points to 'node' 323 * now, and will point to to the new branching node later. 324 */ 325 *parent_out = parent; 326 return ((parent == NULL) ? pctrie_root(ptree): &parent->pn_child[slot]); 327 } 328 329 /* 330 * Wrap pctrie_insert_lookup_compound to implement a strict insertion. Panic 331 * if the key already exists, and do not look for neighboring entries. 332 */ 333 void * 334 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val, 335 struct pctrie_node **parent_out) 336 { 337 void *parentp; 338 uint64_t *found; 339 340 found = NULL; 341 parentp = pctrie_insert_lookup_compound(ptree, val, parent_out, 342 &found); 343 if (__predict_false(found != NULL)) 344 panic("%s: key %jx is already present", __func__, 345 (uintmax_t)*val); 346 return (parentp); 347 } 348 349 /* 350 * Wrap pctrie_insert_lookup_compound to implement find-or-insert. Do not look 351 * for neighboring entries. 352 */ 353 void * 354 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, 355 struct pctrie_node **parent_out, uint64_t **found_out) 356 { 357 *found_out = NULL; 358 return (pctrie_insert_lookup_compound(ptree, val, parent_out, 359 found_out)); 360 } 361 362 /* 363 * Inserts newly allocated node 'child' into trie at location 'parentp', with 364 * parent 'parent' and two children, 'val' and whatever non-NULL node or leaf 365 * was at 'parentp' to begin with. 366 */ 367 void 368 pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, void *parentp, 369 struct pctrie_node *child) 370 { 371 struct pctrie_node *node; 372 uint64_t index, newind; 373 374 /* 375 * Clear the last child pointer of the newly allocated child. We want 376 * to clear it after the final section has exited so lookup can not 377 * return false negatives. It is done here because it will be 378 * cache-cold in the dtor callback. 379 */ 380 if (child->pn_popmap != 0) { 381 pctrie_node_store(&child->pn_child[ffs(child->pn_popmap) - 1], 382 PCTRIE_NULL, PCTRIE_UNSERIALIZED); 383 child->pn_popmap = 0; 384 } 385 386 /* 387 * Recover the values of the two children of the new child node. If 388 * 'node' is not a leaf, this stores into 'newind' the 'owner' field, 389 * which must be first in the node. 390 */ 391 index = *val; 392 node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED); 393 pctrie_setparent(child, parent); 394 if (!pctrie_isleaf(node)) 395 pctrie_setparent(node, child); 396 newind = *pctrie_toval(node); 397 398 /* 399 * From the highest-order bit where the indexes differ, 400 * compute the highest level in the trie where they differ. Then, 401 * compute the least index of this subtrie. 402 */ 403 _Static_assert(sizeof(long long) >= sizeof(uint64_t), 404 "uint64 too wide"); 405 _Static_assert(sizeof(uint64_t) * NBBY <= 406 (1 << (sizeof(child->pn_clev) * NBBY)), "pn_clev too narrow"); 407 child->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); 408 child->pn_owner = PCTRIE_COUNT; 409 child->pn_owner = index & -(child->pn_owner << child->pn_clev); 410 411 412 /* These writes are not yet visible due to ordering. */ 413 pctrie_addnode(child, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED); 414 pctrie_addnode(child, newind, node, PCTRIE_UNSERIALIZED); 415 /* Synchronize to make the above visible. */ 416 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 417 } 418 419 /* 420 * Return the value associated with the node, if the node is a leaf that matches 421 * the index; otherwise NULL. 422 */ 423 static __always_inline uint64_t * 424 pctrie_match_value(struct pctrie_node *node, uint64_t index) 425 { 426 uint64_t *m; 427 428 if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || 429 *m != index) 430 m = NULL; 431 return (m); 432 } 433 434 /* 435 * Returns the value stored at the index. If the index is not present, 436 * NULL is returned. 437 */ 438 static __always_inline uint64_t * 439 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 440 enum pctrie_access access) 441 { 442 struct pctrie_node *node; 443 int slot; 444 445 node = pctrie_root_load(ptree, smr, access); 446 /* Seek a node that matches index. */ 447 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) 448 node = pctrie_node_load(&node->pn_child[slot], smr, access); 449 return (pctrie_match_value(node, index)); 450 } 451 452 /* 453 * Returns the value stored at the index, assuming access is externally 454 * synchronized by a lock. 455 * 456 * If the index is not present, NULL is returned. 457 */ 458 uint64_t * 459 pctrie_lookup(struct pctrie *ptree, uint64_t index) 460 { 461 return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 462 } 463 464 /* 465 * Returns the value stored at the index without requiring an external lock. 466 * 467 * If the index is not present, NULL is returned. 468 */ 469 uint64_t * 470 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 471 { 472 uint64_t *res; 473 474 smr_enter(smr); 475 res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 476 smr_exit(smr); 477 return (res); 478 } 479 480 /* 481 * Returns the last node examined in the search for the index, and sets the 482 * parent of that node. 483 */ 484 static __always_inline struct pctrie_node * 485 _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, 486 uint64_t index, struct pctrie_node **parent_out, 487 smr_t smr, enum pctrie_access access) 488 { 489 struct pctrie_node *parent; 490 int slot; 491 492 /* 493 * Climb the search path to find the lowest node from which to start the 494 * search for a value matching 'index'. 495 */ 496 while (node != NULL) { 497 KASSERT(!powerof2(node->pn_popmap), 498 ("%s: freed node in iter path", __func__)); 499 if (!pctrie_keybarr(node, index, &slot)) 500 break; 501 node = pctrie_parent(node); 502 } 503 504 if (node == NULL) { 505 parent = NULL; 506 node = pctrie_root_load(ptree, smr, access); 507 } else { 508 parent = node; 509 node = pctrie_node_load(&node->pn_child[slot], smr, access); 510 } 511 512 /* Seek a node that matches index. */ 513 while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { 514 parent = node; 515 node = pctrie_node_load(&node->pn_child[slot], smr, access); 516 } 517 if (parent_out != NULL) 518 *parent_out = parent; 519 return (node); 520 } 521 522 /* 523 * Returns the value stored at a given index value, possibly NULL. 524 */ 525 static __always_inline uint64_t * 526 _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr, 527 enum pctrie_access access) 528 { 529 struct pctrie_node *node; 530 531 it->index = index; 532 node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, 533 smr, access); 534 return (pctrie_match_value(node, index)); 535 } 536 537 /* 538 * Returns the value stored at a given index value, possibly NULL. 539 */ 540 uint64_t * 541 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) 542 { 543 return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED)); 544 } 545 546 /* 547 * Insert the val in the trie, starting search with iterator. Return a pointer 548 * to indicate where a new node must be allocated to complete insertion. 549 * Assumes access is externally synchronized by a lock. 550 */ 551 void * 552 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) 553 { 554 struct pctrie_node *node; 555 556 it->index = *val; 557 node = _pctrie_lookup_node(it->ptree, it->node, *val, &it->node, 558 NULL, PCTRIE_LOCKED); 559 if (node == PCTRIE_NULL) { 560 if (it->node == NULL) 561 pctrie_node_store(pctrie_root(it->ptree), 562 pctrie_toleaf(val), PCTRIE_LOCKED); 563 else 564 pctrie_addnode(it->node, it->index, 565 pctrie_toleaf(val), PCTRIE_LOCKED); 566 return (NULL); 567 } 568 if (__predict_false(pctrie_match_value(node, it->index) != NULL)) 569 panic("%s: key %jx is already present", __func__, 570 (uintmax_t)it->index); 571 572 /* 573 * 'node' must be replaced in the tree with a new branch node, with 574 * children 'node' and 'val'. Return the place that points to 'node' 575 * now, and will point to to the new branching node later. 576 */ 577 return (pctrie_child(it->ptree, it->node, it->index)); 578 } 579 580 /* 581 * Returns the value stored at a fixed offset from the current index value, 582 * possibly NULL. 583 */ 584 static __always_inline uint64_t * 585 _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr, 586 enum pctrie_access access) 587 { 588 uint64_t index = it->index + stride; 589 590 /* Detect stride overflow. */ 591 if ((stride > 0) != (index > it->index)) 592 return (NULL); 593 /* Detect crossing limit */ 594 if ((index < it->limit) != (it->index < it->limit)) 595 return (NULL); 596 597 return (_pctrie_iter_lookup(it, index, smr, access)); 598 } 599 600 /* 601 * Returns the value stored at a fixed offset from the current index value, 602 * possibly NULL. 603 */ 604 uint64_t * 605 pctrie_iter_stride(struct pctrie_iter *it, int stride) 606 { 607 return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED)); 608 } 609 610 /* 611 * Returns the value stored at one more than the current index value, possibly 612 * NULL, assuming access is externally synchronized by a lock. 613 */ 614 uint64_t * 615 pctrie_iter_next(struct pctrie_iter *it) 616 { 617 return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED)); 618 } 619 620 /* 621 * Returns the value stored at one less than the current index value, possibly 622 * NULL, assuming access is externally synchronized by a lock. 623 */ 624 uint64_t * 625 pctrie_iter_prev(struct pctrie_iter *it) 626 { 627 return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED)); 628 } 629 630 /* 631 * Find first leaf >= index, and fill iter with the path to the parent of that 632 * leaf. Return NULL if there is no such leaf less than limit. 633 */ 634 static __inline uint64_t * 635 _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, 636 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 637 { 638 struct pctrie_node *parent; 639 uint64_t *m; 640 int slot; 641 642 /* Seek a node that matches index. */ 643 node = _pctrie_lookup_node(ptree, node, index, &parent, 644 NULL, PCTRIE_LOCKED); 645 646 /* 647 * If no such node was found, and instead this path leads only to nodes 648 * < index, back up to find a subtrie with the least value > index. 649 */ 650 if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { 651 /* Climb the path to find a node with a descendant > index. */ 652 for (node = parent; node != NULL; node = pctrie_parent(node)) { 653 slot = pctrie_slot(node, index) + 1; 654 if ((node->pn_popmap >> slot) != 0) 655 break; 656 } 657 if (node == NULL) { 658 if (parent_out != NULL) 659 *parent_out = NULL; 660 return (NULL); 661 } 662 663 /* Step to the least child with a descendant > index. */ 664 slot += ffs(node->pn_popmap >> slot) - 1; 665 parent = node; 666 node = pctrie_node_load(&node->pn_child[slot], NULL, 667 PCTRIE_LOCKED); 668 } 669 /* Descend to the least leaf of the subtrie. */ 670 while (!pctrie_isleaf(node)) { 671 if (limit != 0 && node->pn_owner >= limit) 672 return (NULL); 673 slot = ffs(node->pn_popmap) - 1; 674 parent = node; 675 node = pctrie_node_load(&node->pn_child[slot], NULL, 676 PCTRIE_LOCKED); 677 } 678 if (parent_out != NULL) 679 *parent_out = parent; 680 m = pctrie_toval(node); 681 if (limit != 0 && *m >= limit) 682 return (NULL); 683 return (m); 684 } 685 686 uint64_t * 687 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 688 { 689 return (_pctrie_lookup_ge(ptree, NULL, index, NULL, 0)); 690 } 691 692 /* 693 * Find first leaf >= index, and fill iter with the path to the parent of that 694 * leaf. Return NULL if there is no such leaf less than limit. 695 */ 696 uint64_t * 697 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) 698 { 699 uint64_t *m; 700 701 m = _pctrie_lookup_ge(it->ptree, it->node, index, &it->node, it->limit); 702 if (m != NULL) 703 it->index = *m; 704 return (m); 705 } 706 707 /* 708 * Find the first leaf with value at least 'jump' greater than the previous 709 * leaf. Return NULL if that value is >= limit. 710 */ 711 uint64_t * 712 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) 713 { 714 uint64_t index = it->index + jump; 715 716 /* Detect jump overflow. */ 717 if ((jump > 0) != (index > it->index)) 718 return (NULL); 719 if (it->limit != 0 && index >= it->limit) 720 return (NULL); 721 return (pctrie_iter_lookup_ge(it, index)); 722 } 723 724 /* 725 * Find first leaf <= index, and fill iter with the path to the parent of that 726 * leaf. Return NULL if there is no such leaf greater than limit. 727 */ 728 static __inline uint64_t * 729 _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, 730 uint64_t index, struct pctrie_node **parent_out, uint64_t limit) 731 { 732 struct pctrie_node *parent; 733 uint64_t *m; 734 int slot; 735 736 /* Seek a node that matches index. */ 737 node = _pctrie_lookup_node(ptree, node, index, &parent, NULL, 738 PCTRIE_LOCKED); 739 740 /* 741 * If no such node was found, and instead this path leads only to nodes 742 * > index, back up to find a subtrie with the greatest value < index. 743 */ 744 if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { 745 /* Climb the path to find a node with a descendant < index. */ 746 for (node = parent; node != NULL; node = pctrie_parent(node)) { 747 slot = pctrie_slot(node, index); 748 if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 749 break; 750 } 751 if (node == NULL) { 752 if (parent_out != NULL) 753 *parent_out = NULL; 754 return (NULL); 755 } 756 757 /* Step to the greatest child with a descendant < index. */ 758 slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); 759 parent = node; 760 node = pctrie_node_load(&node->pn_child[slot], NULL, 761 PCTRIE_LOCKED); 762 } 763 /* Descend to the greatest leaf of the subtrie. */ 764 while (!pctrie_isleaf(node)) { 765 if (limit != 0 && limit >= node->pn_owner + 766 ((uint64_t)PCTRIE_COUNT << node->pn_clev) - 1) 767 return (NULL); 768 slot = ilog2(node->pn_popmap); 769 parent = node; 770 node = pctrie_node_load(&node->pn_child[slot], NULL, 771 PCTRIE_LOCKED); 772 } 773 if (parent_out != NULL) 774 *parent_out = parent; 775 m = pctrie_toval(node); 776 if (limit != 0 && *m <= limit) 777 return (NULL); 778 return (m); 779 } 780 781 uint64_t * 782 pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 783 { 784 return (_pctrie_lookup_le(ptree, NULL, index, NULL, 0)); 785 } 786 787 uint64_t * 788 pctrie_subtree_lookup_lt(struct pctrie *ptree, struct pctrie_node *node, 789 uint64_t index) 790 { 791 if (index == 0) 792 return (NULL); 793 return (_pctrie_lookup_le(ptree, node, index - 1, NULL, 0)); 794 } 795 796 /* 797 * Find first leaf <= index, and fill iter with the path to the parent of that 798 * leaf. Return NULL if there is no such leaf greater than limit. 799 */ 800 uint64_t * 801 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) 802 { 803 uint64_t *m; 804 805 m = _pctrie_lookup_le(it->ptree, it->node, index, &it->node, it->limit); 806 if (m != NULL) 807 it->index = *m; 808 return (m); 809 } 810 811 /* 812 * Find the first leaf with value at most 'jump' less than the previous 813 * leaf. Return NULL if that value is <= limit. 814 */ 815 uint64_t * 816 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) 817 { 818 uint64_t index = it->index - jump; 819 820 /* Detect jump overflow. */ 821 if ((jump > 0) != (index < it->index)) 822 return (NULL); 823 if (it->limit != 0 && index <= it->limit) 824 return (NULL); 825 return (pctrie_iter_lookup_le(it, index)); 826 } 827 828 /* 829 * Remove the non-NULL child identified by 'index' from the set of children of 830 * 'node'. If doing so causes 'node' to have only one child, purge it from the 831 * pctrie and save it in *freenode for later disposal. 832 */ 833 static void 834 pctrie_remove(struct pctrie *ptree, struct pctrie_node *node, uint64_t index, 835 struct pctrie_node **freenode) 836 { 837 smr_pctnode_t *parentp; 838 struct pctrie_node *child; 839 int slot; 840 841 *freenode = NULL; 842 parentp = pctrie_child(ptree, node, index); 843 if (node == NULL) { 844 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 845 return; 846 } 847 slot = pctrie_slot(node, index); 848 KASSERT((node->pn_popmap & (1 << slot)) != 0, 849 ("%s: bad popmap slot %d in node %p", 850 __func__, slot, node)); 851 node->pn_popmap ^= 1 << slot; 852 if (!powerof2(node->pn_popmap)) { 853 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_LOCKED); 854 return; 855 } 856 pctrie_node_store(parentp, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 857 KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 858 slot = ffs(node->pn_popmap) - 1; 859 *freenode = node; 860 child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 861 KASSERT(child != PCTRIE_NULL, 862 ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 863 node = pctrie_parent(node); 864 if (!pctrie_isleaf(child)) 865 pctrie_setparent(child, node); 866 parentp = pctrie_child(ptree, node, index); 867 pctrie_node_store(parentp, child, PCTRIE_LOCKED); 868 } 869 870 /* 871 * Remove the specified index from the tree, and return the value stored at 872 * that index. If the index is not present, return NULL. 873 */ 874 uint64_t * 875 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, 876 struct pctrie_node **freenode) 877 { 878 struct pctrie_node *child, *node; 879 uint64_t *m; 880 int slot; 881 882 node = NULL; 883 child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 884 while (!pctrie_isleaf(child)) { 885 node = child; 886 slot = pctrie_slot(node, index); 887 child = pctrie_node_load(&node->pn_child[slot], NULL, 888 PCTRIE_LOCKED); 889 } 890 if ((m = pctrie_match_value(child, index)) != NULL) 891 pctrie_remove(ptree, node, index, freenode); 892 else 893 *freenode = NULL; 894 return (m); 895 } 896 897 /* 898 * Remove from the trie the leaf last chosen by the iterator, and 899 * adjust the path if it's last member is to be freed. 900 */ 901 void 902 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) 903 { 904 KASSERT(NULL != pctrie_match_value(pctrie_node_load(pctrie_child( 905 it->ptree, it->node, it->index), NULL, PCTRIE_LOCKED), it->index), 906 ("%s: removing value %jx not at iter", __func__, 907 (uintmax_t)it->index)); 908 pctrie_remove(it->ptree, it->node, it->index, freenode); 909 if (*freenode != NULL) 910 it->node = pctrie_parent(it->node); 911 } 912 913 /* 914 * Return the current leaf, assuming access is externally synchronized by a 915 * lock. 916 */ 917 uint64_t * 918 pctrie_iter_value(struct pctrie_iter *it) 919 { 920 struct pctrie_node *node; 921 922 node = pctrie_node_load(pctrie_child(it->ptree, it->node, it->index), 923 NULL, PCTRIE_LOCKED); 924 return (pctrie_toval(node)); 925 } 926 927 /* 928 * Walk the subtrie rooted at *pnode in order, invoking callback on leaves, 929 * until an interior node is stripped of all children, and returned for 930 * deallocation, with *pnode left pointing to the parent of that node. 931 */ 932 static __always_inline struct pctrie_node * 933 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent, 934 pctrie_cb_t callback, int keyoff, void *arg) 935 { 936 struct pctrie_node *child, *node; 937 int slot; 938 939 node = *pnode; 940 while (node->pn_popmap != 0) { 941 slot = ffs(node->pn_popmap) - 1; 942 node->pn_popmap ^= 1 << slot; 943 child = pctrie_node_load(&node->pn_child[slot], NULL, 944 PCTRIE_UNSERIALIZED); 945 pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 946 PCTRIE_UNSERIALIZED); 947 if (pctrie_isleaf(child)) { 948 if (callback != NULL) 949 callback(pctrie_toptr(child, keyoff), arg); 950 continue; 951 } 952 /* Climb one level down the trie. */ 953 parent = node; 954 node = child; 955 } 956 *pnode = parent; 957 return (node); 958 } 959 960 /* 961 * Recover the node parent from its first child and continue pruning. 962 */ 963 static __always_inline struct pctrie_node * 964 pctrie_reclaim_resume_compound(struct pctrie_node **pnode, 965 pctrie_cb_t callback, int keyoff, void *arg) 966 { 967 if (*pnode == NULL) 968 return (NULL); 969 /* Climb one level up the trie. */ 970 return (pctrie_reclaim_prune(pnode, pctrie_parent(*pnode), callback, 971 keyoff, arg)); 972 } 973 974 /* 975 * Find the trie root, and start pruning with a NULL parent. 976 */ 977 static __always_inline struct pctrie_node * 978 pctrie_reclaim_begin_compound(struct pctrie_node **pnode, 979 struct pctrie *ptree, 980 pctrie_cb_t callback, int keyoff, void *arg) 981 { 982 struct pctrie_node *node; 983 984 node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED); 985 pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED); 986 if (pctrie_isleaf(node)) { 987 if (callback != NULL && node != PCTRIE_NULL) 988 callback(pctrie_toptr(node, keyoff), arg); 989 return (NULL); 990 } 991 *pnode = node; 992 return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg)); 993 } 994 995 struct pctrie_node * 996 pctrie_reclaim_resume(struct pctrie_node **pnode) 997 { 998 return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL)); 999 } 1000 1001 struct pctrie_node * 1002 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree) 1003 { 1004 return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL)); 1005 } 1006 1007 struct pctrie_node * 1008 pctrie_reclaim_resume_cb(struct pctrie_node **pnode, 1009 pctrie_cb_t callback, int keyoff, void *arg) 1010 { 1011 return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg)); 1012 } 1013 1014 struct pctrie_node * 1015 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree, 1016 pctrie_cb_t callback, int keyoff, void *arg) 1017 { 1018 return (pctrie_reclaim_begin_compound(pnode, ptree, 1019 callback, keyoff, arg)); 1020 } 1021 1022 /* 1023 * Replace an existing value in the trie with another one. 1024 * Panics if there is not an old value in the trie at the new value's index. 1025 */ 1026 uint64_t * 1027 pctrie_replace(struct pctrie *ptree, uint64_t *newval) 1028 { 1029 struct pctrie_node *leaf, *parent, *node; 1030 uint64_t *m; 1031 uint64_t index; 1032 int slot; 1033 1034 leaf = pctrie_toleaf(newval); 1035 index = *newval; 1036 node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 1037 parent = NULL; 1038 for (;;) { 1039 if (pctrie_isleaf(node)) { 1040 if ((m = pctrie_toval(node)) != NULL && *m == index) { 1041 if (parent == NULL) 1042 pctrie_node_store(pctrie_root(ptree), 1043 leaf, PCTRIE_LOCKED); 1044 else 1045 pctrie_node_store( 1046 &parent->pn_child[slot], leaf, 1047 PCTRIE_LOCKED); 1048 return (m); 1049 } 1050 break; 1051 } 1052 if (pctrie_keybarr(node, index, &slot)) 1053 break; 1054 parent = node; 1055 node = pctrie_node_load(&node->pn_child[slot], NULL, 1056 PCTRIE_LOCKED); 1057 } 1058 panic("%s: original replacing value not found", __func__); 1059 } 1060 1061 #ifdef DDB 1062 /* 1063 * Show details about the given node. 1064 */ 1065 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 1066 { 1067 struct pctrie_node *node, *tmp; 1068 int slot; 1069 pn_popmap_t popmap; 1070 1071 if (!have_addr) 1072 return; 1073 node = (struct pctrie_node *)addr; 1074 db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 1075 (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 1076 node->pn_clev / PCTRIE_WIDTH); 1077 for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 1078 slot = ffs(popmap) - 1; 1079 tmp = pctrie_node_load(&node->pn_child[slot], NULL, 1080 PCTRIE_UNSERIALIZED); 1081 db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 1082 slot, (void *)tmp, 1083 pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 1084 node->pn_clev / PCTRIE_WIDTH); 1085 } 1086 } 1087 #endif /* DDB */ 1088