1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 EMC Corp. 5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * Path-compressed radix trie implementation. 34 * The following code is not generalized into a general purpose library 35 * because there are way too many parameters embedded that should really 36 * be decided by the library consumers. At the same time, consumers 37 * of this code must achieve highest possible performance. 38 * 39 * The implementation takes into account the following rationale: 40 * - Size of the nodes should be as small as possible but still big enough 41 * to avoid a large maximum depth for the trie. This is a balance 42 * between the necessity to not wire too much physical memory for the nodes 43 * and the necessity to avoid too much cache pollution during the trie 44 * operations. 45 * - There is not a huge bias toward the number of lookup operations over 46 * the number of insert and remove operations. This basically implies 47 * that optimizations supposedly helping one operation but hurting the 48 * other might be carefully evaluated. 49 * - On average not many nodes are expected to be fully populated, hence 50 * level compression may just complicate things. 51 */ 52 53 #include <sys/cdefs.h> 54 #include "opt_ddb.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/libkern.h> 60 #include <sys/proc.h> 61 #include <sys/vmmeter.h> 62 #include <sys/smr.h> 63 #include <sys/smr_types.h> 64 65 #include <vm/uma.h> 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_radix.h> 71 72 #ifdef DDB 73 #include <ddb/ddb.h> 74 #endif 75 76 /* 77 * These widths should allow the pointers to a node's children to fit within 78 * a single cache line. The extra levels from a narrow width should not be 79 * a problem thanks to path compression. 80 */ 81 #ifdef __LP64__ 82 #define VM_RADIX_WIDTH 4 83 #else 84 #define VM_RADIX_WIDTH 3 85 #endif 86 87 #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 88 #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 89 #define VM_RADIX_LIMIT \ 90 (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) 91 92 #if VM_RADIX_WIDTH == 3 93 typedef uint8_t rn_popmap_t; 94 #elif VM_RADIX_WIDTH == 4 95 typedef uint16_t rn_popmap_t; 96 #elif VM_RADIX_WIDTH == 5 97 typedef uint32_t rn_popmap_t; 98 #else 99 #error Unsupported width 100 #endif 101 _Static_assert(sizeof(rn_popmap_t) <= sizeof(int), 102 "rn_popmap_t too wide"); 103 104 /* Set of all flag bits stored in node pointers. */ 105 #define VM_RADIX_FLAGS (VM_RADIX_ISLEAF) 106 #define VM_RADIX_PAD VM_RADIX_FLAGS 107 108 enum vm_radix_access { SMR, LOCKED, UNSERIALIZED }; 109 110 struct vm_radix_node; 111 typedef SMR_POINTER(struct vm_radix_node *) smrnode_t; 112 113 struct vm_radix_node { 114 vm_pindex_t rn_owner; /* Owner of record. */ 115 rn_popmap_t rn_popmap; /* Valid children. */ 116 uint8_t rn_clev; /* Level * WIDTH. */ 117 smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 118 }; 119 120 static uma_zone_t vm_radix_node_zone; 121 static smr_t vm_radix_smr; 122 123 static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 124 enum vm_radix_access access); 125 126 /* 127 * Map index to an array position for the children of rnode, 128 */ 129 static __inline int 130 vm_radix_slot(struct vm_radix_node *rnode, vm_pindex_t index) 131 { 132 return ((index >> rnode->rn_clev) & VM_RADIX_MASK); 133 } 134 135 /* 136 * Returns true if index does not belong to the specified rnode. Otherwise, 137 * sets slot value, and returns false. 138 */ 139 static __inline bool 140 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t index, int *slot) 141 { 142 index = (index - rnode->rn_owner) >> rnode->rn_clev; 143 if (index >= VM_RADIX_COUNT) 144 return (true); 145 *slot = index; 146 return (false); 147 } 148 149 /* 150 * Allocate a radix node. 151 */ 152 static struct vm_radix_node * 153 vm_radix_node_get(vm_pindex_t index, vm_pindex_t newind) 154 { 155 struct vm_radix_node *rnode; 156 157 rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT); 158 if (rnode == NULL) 159 return (NULL); 160 161 /* 162 * We want to clear the last child pointer after the final section 163 * has exited so lookup can not return false negatives. It is done 164 * here because it will be cache-cold in the dtor callback. 165 */ 166 if (rnode->rn_popmap != 0) { 167 vm_radix_node_store(&rnode->rn_child[ffs(rnode->rn_popmap) - 1], 168 VM_RADIX_NULL, UNSERIALIZED); 169 rnode->rn_popmap = 0; 170 } 171 172 /* 173 * From the highest-order bit where the indexes differ, 174 * compute the highest level in the trie where they differ. Then, 175 * compute the least index of this subtrie. 176 */ 177 KASSERT(index != newind, ("%s: passing the same key value %jx", 178 __func__, (uintmax_t)index)); 179 _Static_assert(sizeof(long long) >= sizeof(vm_pindex_t), 180 "vm_pindex_t too wide"); 181 _Static_assert(sizeof(vm_pindex_t) * NBBY <= 182 (1 << (sizeof(rnode->rn_clev) * NBBY)), "rn_clev too narrow"); 183 rnode->rn_clev = rounddown(flsll(index ^ newind) - 1, VM_RADIX_WIDTH); 184 rnode->rn_owner = VM_RADIX_COUNT; 185 rnode->rn_owner = index & -(rnode->rn_owner << rnode->rn_clev); 186 return (rnode); 187 } 188 189 /* 190 * Free radix node. 191 */ 192 static __inline void 193 vm_radix_node_put(struct vm_radix_node *rnode) 194 { 195 #ifdef INVARIANTS 196 int slot; 197 198 KASSERT(powerof2(rnode->rn_popmap), 199 ("vm_radix_node_put: rnode %p has too many children %04x", rnode, 200 rnode->rn_popmap)); 201 for (slot = 0; slot < VM_RADIX_COUNT; slot++) { 202 if ((rnode->rn_popmap & (1 << slot)) != 0) 203 continue; 204 KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) == 205 VM_RADIX_NULL, 206 ("vm_radix_node_put: rnode %p has a child", rnode)); 207 } 208 #endif 209 uma_zfree_smr(vm_radix_node_zone, rnode); 210 } 211 212 /* 213 * Fetch a node pointer from a slot in another node. 214 */ 215 static __inline struct vm_radix_node * 216 vm_radix_node_load(smrnode_t *p, enum vm_radix_access access) 217 { 218 219 switch (access) { 220 case UNSERIALIZED: 221 return (smr_unserialized_load(p, true)); 222 case LOCKED: 223 return (smr_serialized_load(p, true)); 224 case SMR: 225 return (smr_entered_load(p, vm_radix_smr)); 226 } 227 __assert_unreachable(); 228 } 229 230 static __inline void 231 vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 232 enum vm_radix_access access) 233 { 234 235 switch (access) { 236 case UNSERIALIZED: 237 smr_unserialized_store(p, v, true); 238 break; 239 case LOCKED: 240 smr_serialized_store(p, v, true); 241 break; 242 case SMR: 243 panic("vm_radix_node_store: Not supported in smr section."); 244 } 245 } 246 247 /* 248 * Get the root node for a radix tree. 249 */ 250 static __inline struct vm_radix_node * 251 vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access) 252 { 253 254 return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access)); 255 } 256 257 /* 258 * Set the root node for a radix tree. 259 */ 260 static __inline void 261 vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode, 262 enum vm_radix_access access) 263 { 264 265 vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access); 266 } 267 268 /* 269 * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. 270 */ 271 static __inline bool 272 vm_radix_isleaf(struct vm_radix_node *rnode) 273 { 274 275 return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); 276 } 277 278 /* 279 * Returns page cast to radix node with leaf bit set. 280 */ 281 static __inline struct vm_radix_node * 282 vm_radix_toleaf(vm_page_t page) 283 { 284 return ((struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF)); 285 } 286 287 /* 288 * Returns the associated page extracted from rnode. 289 */ 290 static __inline vm_page_t 291 vm_radix_topage(struct vm_radix_node *rnode) 292 { 293 294 return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); 295 } 296 297 /* 298 * Make 'child' a child of 'rnode'. 299 */ 300 static __inline void 301 vm_radix_addnode(struct vm_radix_node *rnode, vm_pindex_t index, 302 struct vm_radix_node *child, enum vm_radix_access access) 303 { 304 int slot; 305 306 slot = vm_radix_slot(rnode, index); 307 vm_radix_node_store(&rnode->rn_child[slot], child, access); 308 rnode->rn_popmap ^= 1 << slot; 309 KASSERT((rnode->rn_popmap & (1 << slot)) != 0, 310 ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode)); 311 } 312 313 /* 314 * Internal helper for vm_radix_reclaim_allnodes(). 315 * This function is recursive. 316 */ 317 static void 318 vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 319 { 320 struct vm_radix_node *child; 321 int slot; 322 323 while (rnode->rn_popmap != 0) { 324 slot = ffs(rnode->rn_popmap) - 1; 325 child = vm_radix_node_load(&rnode->rn_child[slot], 326 UNSERIALIZED); 327 KASSERT(child != VM_RADIX_NULL, 328 ("%s: bad popmap slot %d in rnode %p", 329 __func__, slot, rnode)); 330 if (!vm_radix_isleaf(child)) 331 vm_radix_reclaim_allnodes_int(child); 332 rnode->rn_popmap ^= 1 << slot; 333 vm_radix_node_store(&rnode->rn_child[slot], VM_RADIX_NULL, 334 UNSERIALIZED); 335 } 336 vm_radix_node_put(rnode); 337 } 338 339 /* 340 * radix node zone initializer. 341 */ 342 static int 343 vm_radix_zone_init(void *mem, int size, int flags) 344 { 345 struct vm_radix_node *rnode; 346 347 rnode = mem; 348 rnode->rn_popmap = 0; 349 for (int i = 0; i < nitems(rnode->rn_child); i++) 350 vm_radix_node_store(&rnode->rn_child[i], VM_RADIX_NULL, 351 UNSERIALIZED); 352 return (0); 353 } 354 355 #ifndef UMA_MD_SMALL_ALLOC 356 void vm_radix_reserve_kva(void); 357 /* 358 * Reserve the KVA necessary to satisfy the node allocation. 359 * This is mandatory in architectures not supporting direct 360 * mapping as they will need otherwise to carve into the kernel maps for 361 * every node allocation, resulting into deadlocks for consumers already 362 * working with kernel maps. 363 */ 364 void 365 vm_radix_reserve_kva(void) 366 { 367 368 /* 369 * Calculate the number of reserved nodes, discounting the pages that 370 * are needed to store them. 371 */ 372 if (!uma_zone_reserve_kva(vm_radix_node_zone, 373 ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 374 sizeof(struct vm_radix_node)))) 375 panic("%s: unable to reserve KVA", __func__); 376 } 377 #endif 378 379 /* 380 * Initialize the UMA slab zone. 381 */ 382 void 383 vm_radix_zinit(void) 384 { 385 386 vm_radix_node_zone = uma_zcreate("RADIX NODE", 387 sizeof(struct vm_radix_node), NULL, NULL, vm_radix_zone_init, NULL, 388 VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR); 389 vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); 390 } 391 392 /* 393 * Inserts the key-value pair into the trie. 394 * Panics if the key already exists. 395 */ 396 int 397 vm_radix_insert(struct vm_radix *rtree, vm_page_t page) 398 { 399 vm_pindex_t index, newind; 400 struct vm_radix_node *leaf, *parent, *rnode; 401 smrnode_t *parentp; 402 int slot; 403 404 index = page->pindex; 405 leaf = vm_radix_toleaf(page); 406 407 /* 408 * The owner of record for root is not really important because it 409 * will never be used. 410 */ 411 rnode = vm_radix_root_load(rtree, LOCKED); 412 parent = NULL; 413 for (;;) { 414 if (vm_radix_isleaf(rnode)) { 415 if (rnode == VM_RADIX_NULL) { 416 if (parent == NULL) 417 rtree->rt_root = leaf; 418 else 419 vm_radix_addnode(parent, index, leaf, 420 LOCKED); 421 return (0); 422 } 423 newind = vm_radix_topage(rnode)->pindex; 424 if (newind == index) 425 panic("%s: key %jx is already present", 426 __func__, (uintmax_t)index); 427 break; 428 } 429 if (vm_radix_keybarr(rnode, index, &slot)) { 430 newind = rnode->rn_owner; 431 break; 432 } 433 parent = rnode; 434 rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 435 } 436 437 /* 438 * A new node is needed because the right insertion level is reached. 439 * Setup the new intermediate node and add the 2 children: the 440 * new object and the older edge or object. 441 */ 442 parentp = (parent != NULL) ? &parent->rn_child[slot]: 443 (smrnode_t *)&rtree->rt_root; 444 parent = vm_radix_node_get(index, newind); 445 if (parent == NULL) 446 return (ENOMEM); 447 /* These writes are not yet visible due to ordering. */ 448 vm_radix_addnode(parent, index, leaf, UNSERIALIZED); 449 vm_radix_addnode(parent, newind, rnode, UNSERIALIZED); 450 /* Serializing write to make the above visible. */ 451 vm_radix_node_store(parentp, parent, LOCKED); 452 return (0); 453 } 454 455 /* 456 * Returns the value stored at the index. If the index is not present, 457 * NULL is returned. 458 */ 459 static __always_inline vm_page_t 460 _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, 461 enum vm_radix_access access) 462 { 463 struct vm_radix_node *rnode; 464 vm_page_t m; 465 int slot; 466 467 rnode = vm_radix_root_load(rtree, access); 468 for (;;) { 469 if (vm_radix_isleaf(rnode)) { 470 if ((m = vm_radix_topage(rnode)) != NULL && 471 m->pindex == index) 472 return (m); 473 break; 474 } 475 if (vm_radix_keybarr(rnode, index, &slot)) 476 break; 477 rnode = vm_radix_node_load(&rnode->rn_child[slot], access); 478 } 479 return (NULL); 480 } 481 482 /* 483 * Returns the value stored at the index assuming there is an external lock. 484 * 485 * If the index is not present, NULL is returned. 486 */ 487 vm_page_t 488 vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 489 { 490 491 return _vm_radix_lookup(rtree, index, LOCKED); 492 } 493 494 /* 495 * Returns the value stored at the index without requiring an external lock. 496 * 497 * If the index is not present, NULL is returned. 498 */ 499 vm_page_t 500 vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) 501 { 502 vm_page_t m; 503 504 smr_enter(vm_radix_smr); 505 m = _vm_radix_lookup(rtree, index, SMR); 506 smr_exit(vm_radix_smr); 507 508 return (m); 509 } 510 511 /* 512 * Returns the page with the least pindex that is greater than or equal to the 513 * specified pindex, or NULL if there are no such pages. 514 * 515 * Requires that access be externally synchronized by a lock. 516 */ 517 vm_page_t 518 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 519 { 520 struct vm_radix_node *rnode, *succ; 521 vm_page_t m; 522 int slot; 523 524 /* 525 * Descend the trie as if performing an ordinary lookup for the page 526 * with the specified pindex. However, unlike an ordinary lookup, as we 527 * descend the trie, we use "succ" to remember the last branching-off 528 * point, that is, the interior node under which the page with the least 529 * pindex that is both outside our current path down the trie and more 530 * than the specified pindex resides. (The node's popmap makes it fast 531 * and easy to recognize a branching-off point.) If our ordinary lookup 532 * fails to yield a page with a pindex that is greater than or equal to 533 * the specified pindex, then we will exit this loop and perform a 534 * lookup starting from "succ". If "succ" is not NULL, then that lookup 535 * is guaranteed to succeed. 536 */ 537 rnode = vm_radix_root_load(rtree, LOCKED); 538 succ = NULL; 539 for (;;) { 540 if (vm_radix_isleaf(rnode)) { 541 if ((m = vm_radix_topage(rnode)) != NULL && 542 m->pindex >= index) 543 return (m); 544 break; 545 } 546 if (vm_radix_keybarr(rnode, index, &slot)) { 547 /* 548 * If all pages in this subtree have pindex > index, 549 * then the page in this subtree with the least pindex 550 * is the answer. 551 */ 552 if (rnode->rn_owner > index) 553 succ = rnode; 554 break; 555 } 556 557 /* 558 * Just in case the next search step leads to a subtree of all 559 * pages with pindex < index, check popmap to see if a next 560 * bigger step, to a subtree of all pages with pindex > index, 561 * is available. If so, remember to restart the search here. 562 */ 563 if ((rnode->rn_popmap >> slot) > 1) 564 succ = rnode; 565 rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 566 } 567 568 /* 569 * Restart the search from the last place visited in the subtree that 570 * included some pages with pindex > index, if there was such a place. 571 */ 572 if (succ == NULL) 573 return (NULL); 574 if (succ != rnode) { 575 /* 576 * Take a step to the next bigger sibling of the node chosen 577 * last time. In that subtree, all pages have pindex > index. 578 */ 579 slot = vm_radix_slot(succ, index) + 1; 580 KASSERT((succ->rn_popmap >> slot) != 0, 581 ("%s: no popmap siblings past slot %d in node %p", 582 __func__, slot, succ)); 583 slot += ffs(succ->rn_popmap >> slot) - 1; 584 succ = vm_radix_node_load(&succ->rn_child[slot], LOCKED); 585 } 586 587 /* 588 * Find the page in the subtree rooted at "succ" with the least pindex. 589 */ 590 while (!vm_radix_isleaf(succ)) { 591 KASSERT(succ->rn_popmap != 0, 592 ("%s: no popmap children in node %p", __func__, succ)); 593 slot = ffs(succ->rn_popmap) - 1; 594 succ = vm_radix_node_load(&succ->rn_child[slot], LOCKED); 595 } 596 return (vm_radix_topage(succ)); 597 } 598 599 /* 600 * Returns the page with the greatest pindex that is less than or equal to the 601 * specified pindex, or NULL if there are no such pages. 602 * 603 * Requires that access be externally synchronized by a lock. 604 */ 605 vm_page_t 606 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 607 { 608 struct vm_radix_node *pred, *rnode; 609 vm_page_t m; 610 int slot; 611 612 /* 613 * Mirror the implementation of vm_radix_lookup_ge, described above. 614 */ 615 rnode = vm_radix_root_load(rtree, LOCKED); 616 pred = NULL; 617 for (;;) { 618 if (vm_radix_isleaf(rnode)) { 619 if ((m = vm_radix_topage(rnode)) != NULL && 620 m->pindex <= index) 621 return (m); 622 break; 623 } 624 if (vm_radix_keybarr(rnode, index, &slot)) { 625 if (rnode->rn_owner < index) 626 pred = rnode; 627 break; 628 } 629 if ((rnode->rn_popmap & ((1 << slot) - 1)) != 0) 630 pred = rnode; 631 rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 632 } 633 if (pred == NULL) 634 return (NULL); 635 if (pred != rnode) { 636 slot = vm_radix_slot(pred, index); 637 KASSERT((pred->rn_popmap & ((1 << slot) - 1)) != 0, 638 ("%s: no popmap siblings before slot %d in node %p", 639 __func__, slot, pred)); 640 slot = fls(pred->rn_popmap & ((1 << slot) - 1)) - 1; 641 pred = vm_radix_node_load(&pred->rn_child[slot], LOCKED); 642 } 643 while (!vm_radix_isleaf(pred)) { 644 KASSERT(pred->rn_popmap != 0, 645 ("%s: no popmap children in node %p", __func__, pred)); 646 slot = fls(pred->rn_popmap) - 1; 647 pred = vm_radix_node_load(&pred->rn_child[slot], LOCKED); 648 } 649 return (vm_radix_topage(pred)); 650 } 651 652 /* 653 * Remove the specified index from the trie, and return the value stored at 654 * that index. If the index is not present, return NULL. 655 */ 656 vm_page_t 657 vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 658 { 659 struct vm_radix_node *child, *parent, *rnode; 660 vm_page_t m; 661 int slot; 662 663 rnode = NULL; 664 child = vm_radix_root_load(rtree, LOCKED); 665 for (;;) { 666 if (vm_radix_isleaf(child)) 667 break; 668 parent = rnode; 669 rnode = child; 670 slot = vm_radix_slot(rnode, index); 671 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 672 } 673 if ((m = vm_radix_topage(child)) == NULL || m->pindex != index) 674 return (NULL); 675 if (rnode == NULL) { 676 vm_radix_root_store(rtree, VM_RADIX_NULL, LOCKED); 677 return (m); 678 } 679 KASSERT((rnode->rn_popmap & (1 << slot)) != 0, 680 ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode)); 681 rnode->rn_popmap ^= 1 << slot; 682 vm_radix_node_store(&rnode->rn_child[slot], VM_RADIX_NULL, LOCKED); 683 if (!powerof2(rnode->rn_popmap)) 684 return (m); 685 KASSERT(rnode->rn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 686 slot = ffs(rnode->rn_popmap) - 1; 687 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 688 KASSERT(child != VM_RADIX_NULL, 689 ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode)); 690 if (parent == NULL) 691 vm_radix_root_store(rtree, child, LOCKED); 692 else { 693 slot = vm_radix_slot(parent, index); 694 KASSERT(rnode == 695 vm_radix_node_load(&parent->rn_child[slot], LOCKED), 696 ("%s: invalid child value", __func__)); 697 vm_radix_node_store(&parent->rn_child[slot], child, LOCKED); 698 } 699 /* 700 * The child is still valid and we can not zero the 701 * pointer until all smr references are gone. 702 */ 703 vm_radix_node_put(rnode); 704 return (m); 705 } 706 707 /* 708 * Remove and free all the nodes from the radix tree. 709 * This function is recursive but there is a tight control on it as the 710 * maximum depth of the tree is fixed. 711 */ 712 void 713 vm_radix_reclaim_allnodes(struct vm_radix *rtree) 714 { 715 struct vm_radix_node *root; 716 717 root = vm_radix_root_load(rtree, LOCKED); 718 if (root == VM_RADIX_NULL) 719 return; 720 vm_radix_root_store(rtree, VM_RADIX_NULL, UNSERIALIZED); 721 if (!vm_radix_isleaf(root)) 722 vm_radix_reclaim_allnodes_int(root); 723 } 724 725 /* 726 * Replace an existing page in the trie with another one. 727 * Panics if there is not an old page in the trie at the new page's index. 728 */ 729 vm_page_t 730 vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) 731 { 732 struct vm_radix_node *leaf, *parent, *rnode; 733 vm_page_t m; 734 vm_pindex_t index; 735 int slot; 736 737 leaf = vm_radix_toleaf(newpage); 738 index = newpage->pindex; 739 rnode = vm_radix_root_load(rtree, LOCKED); 740 parent = NULL; 741 for (;;) { 742 if (vm_radix_isleaf(rnode)) { 743 if ((m = vm_radix_topage(rnode)) != NULL && 744 m->pindex == index) { 745 if (parent == NULL) 746 rtree->rt_root = leaf; 747 else 748 vm_radix_node_store( 749 &parent->rn_child[slot], leaf, 750 LOCKED); 751 return (m); 752 } 753 break; 754 } 755 if (vm_radix_keybarr(rnode, index, &slot)) 756 break; 757 parent = rnode; 758 rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 759 } 760 panic("%s: original replacing page not found", __func__); 761 } 762 763 void 764 vm_radix_wait(void) 765 { 766 uma_zwait(vm_radix_node_zone); 767 } 768 769 #ifdef DDB 770 /* 771 * Show details about the given radix node. 772 */ 773 DB_SHOW_COMMAND(radixnode, db_show_radixnode) 774 { 775 struct vm_radix_node *rnode, *tmp; 776 int slot; 777 rn_popmap_t popmap; 778 779 if (!have_addr) 780 return; 781 rnode = (struct vm_radix_node *)addr; 782 db_printf("radixnode %p, owner %jx, children popmap %04x, level %u:\n", 783 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_popmap, 784 rnode->rn_clev / VM_RADIX_WIDTH); 785 for (popmap = rnode->rn_popmap; popmap != 0; popmap ^= 1 << slot) { 786 slot = ffs(popmap) - 1; 787 tmp = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED); 788 db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 789 slot, (void *)tmp, 790 vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL, 791 rnode->rn_clev / VM_RADIX_WIDTH); 792 } 793 } 794 #endif /* DDB */ 795