1fe267a55SPedro F. Giffuni /*- 2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 4774d251dSAttilio Rao * Copyright (c) 2013 EMC Corp. 5774d251dSAttilio Rao * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6774d251dSAttilio Rao * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7774d251dSAttilio Rao * All rights reserved. 8774d251dSAttilio Rao * 9774d251dSAttilio Rao * Redistribution and use in source and binary forms, with or without 10774d251dSAttilio Rao * modification, are permitted provided that the following conditions 11774d251dSAttilio Rao * are met: 12774d251dSAttilio Rao * 1. Redistributions of source code must retain the above copyright 13774d251dSAttilio Rao * notice, this list of conditions and the following disclaimer. 14774d251dSAttilio Rao * 2. Redistributions in binary form must reproduce the above copyright 15774d251dSAttilio Rao * notice, this list of conditions and the following disclaimer in the 16774d251dSAttilio Rao * documentation and/or other materials provided with the distribution. 17774d251dSAttilio Rao * 18774d251dSAttilio Rao * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19774d251dSAttilio Rao * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20774d251dSAttilio Rao * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21774d251dSAttilio Rao * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22774d251dSAttilio Rao * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23774d251dSAttilio Rao * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24774d251dSAttilio Rao * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25774d251dSAttilio Rao * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26774d251dSAttilio Rao * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27774d251dSAttilio Rao * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28774d251dSAttilio Rao * SUCH DAMAGE. 29774d251dSAttilio Rao * 30774d251dSAttilio Rao */ 31774d251dSAttilio Rao 32774d251dSAttilio Rao /* 33774d251dSAttilio Rao * Path-compressed radix trie implementation. 34774d251dSAttilio Rao * The following code is not generalized into a general purpose library 35774d251dSAttilio Rao * because there are way too many parameters embedded that should really 36774d251dSAttilio Rao * be decided by the library consumers. At the same time, consumers 37774d251dSAttilio Rao * of this code must achieve highest possible performance. 38774d251dSAttilio Rao * 39774d251dSAttilio Rao * The implementation takes into account the following rationale: 40774d251dSAttilio Rao * - Size of the nodes should be as small as possible but still big enough 41774d251dSAttilio Rao * to avoid a large maximum depth for the trie. This is a balance 42774d251dSAttilio Rao * between the necessity to not wire too much physical memory for the nodes 43774d251dSAttilio Rao * and the necessity to avoid too much cache pollution during the trie 44774d251dSAttilio Rao * operations. 45774d251dSAttilio Rao * - There is not a huge bias toward the number of lookup operations over 46774d251dSAttilio Rao * the number of insert and remove operations. This basically implies 47774d251dSAttilio Rao * that optimizations supposedly helping one operation but hurting the 48774d251dSAttilio Rao * other might be carefully evaluated. 49774d251dSAttilio Rao * - On average not many nodes are expected to be fully populated, hence 50774d251dSAttilio Rao * level compression may just complicate things. 51774d251dSAttilio Rao */ 52774d251dSAttilio Rao 53774d251dSAttilio Rao #include <sys/cdefs.h> 54774d251dSAttilio Rao __FBSDID("$FreeBSD$"); 55774d251dSAttilio Rao 56774d251dSAttilio Rao #include "opt_ddb.h" 57774d251dSAttilio Rao 58774d251dSAttilio Rao #include <sys/param.h> 59774d251dSAttilio Rao #include <sys/systm.h> 60774d251dSAttilio Rao #include <sys/kernel.h> 611ddda2ebSJeff Roberson #include <sys/proc.h> 62774d251dSAttilio Rao #include <sys/vmmeter.h> 631ddda2ebSJeff Roberson #include <sys/smr.h> 643fba8868SMark Johnston #include <sys/smr_types.h> 65774d251dSAttilio Rao 66774d251dSAttilio Rao #include <vm/uma.h> 67774d251dSAttilio Rao #include <vm/vm.h> 68774d251dSAttilio Rao #include <vm/vm_param.h> 691ddda2ebSJeff Roberson #include <vm/vm_object.h> 70774d251dSAttilio Rao #include <vm/vm_page.h> 71774d251dSAttilio Rao #include <vm/vm_radix.h> 72774d251dSAttilio Rao 73774d251dSAttilio Rao #ifdef DDB 74774d251dSAttilio Rao #include <ddb/ddb.h> 75774d251dSAttilio Rao #endif 76774d251dSAttilio Rao 77774d251dSAttilio Rao /* 78774d251dSAttilio Rao * These widths should allow the pointers to a node's children to fit within 79774d251dSAttilio Rao * a single cache line. The extra levels from a narrow width should not be 80774d251dSAttilio Rao * a problem thanks to path compression. 81774d251dSAttilio Rao */ 82774d251dSAttilio Rao #ifdef __LP64__ 83774d251dSAttilio Rao #define VM_RADIX_WIDTH 4 84774d251dSAttilio Rao #else 85774d251dSAttilio Rao #define VM_RADIX_WIDTH 3 86774d251dSAttilio Rao #endif 87774d251dSAttilio Rao 88774d251dSAttilio Rao #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 89774d251dSAttilio Rao #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 90774d251dSAttilio Rao #define VM_RADIX_LIMIT \ 91b66bb393SPedro F. Giffuni (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) 92774d251dSAttilio Rao 93774d251dSAttilio Rao /* Flag bits stored in node pointers. */ 94774d251dSAttilio Rao #define VM_RADIX_ISLEAF 0x1 95774d251dSAttilio Rao #define VM_RADIX_FLAGS 0x1 96774d251dSAttilio Rao #define VM_RADIX_PAD VM_RADIX_FLAGS 97774d251dSAttilio Rao 98774d251dSAttilio Rao /* Returns one unit associated with specified level. */ 99774d251dSAttilio Rao #define VM_RADIX_UNITLEVEL(lev) \ 1009f2e6008SAlan Cox ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) 101774d251dSAttilio Rao 1021ddda2ebSJeff Roberson enum vm_radix_access { SMR, LOCKED, UNSERIALIZED }; 1031ddda2ebSJeff Roberson 1041ddda2ebSJeff Roberson struct vm_radix_node; 1053fba8868SMark Johnston typedef SMR_POINTER(struct vm_radix_node *) smrnode_t; 1061ddda2ebSJeff Roberson 107774d251dSAttilio Rao struct vm_radix_node { 108774d251dSAttilio Rao vm_pindex_t rn_owner; /* Owner of record. */ 109774d251dSAttilio Rao uint16_t rn_count; /* Valid children. */ 1101ddda2ebSJeff Roberson uint8_t rn_clev; /* Current level. */ 1111ddda2ebSJeff Roberson int8_t rn_last; /* zero last ptr. */ 1121ddda2ebSJeff Roberson smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 113774d251dSAttilio Rao }; 114774d251dSAttilio Rao 115774d251dSAttilio Rao static uma_zone_t vm_radix_node_zone; 1161ddda2ebSJeff Roberson static smr_t vm_radix_smr; 1171ddda2ebSJeff Roberson 1181ddda2ebSJeff Roberson static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 1191ddda2ebSJeff Roberson enum vm_radix_access access); 120774d251dSAttilio Rao 121774d251dSAttilio Rao /* 122e946b949SAttilio Rao * Allocate a radix node. 123774d251dSAttilio Rao */ 1241ddda2ebSJeff Roberson static struct vm_radix_node * 125774d251dSAttilio Rao vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 126774d251dSAttilio Rao { 127774d251dSAttilio Rao struct vm_radix_node *rnode; 128774d251dSAttilio Rao 1291ddda2ebSJeff Roberson rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT); 130774d251dSAttilio Rao if (rnode == NULL) 131e946b949SAttilio Rao return (NULL); 1321ddda2ebSJeff Roberson 1331ddda2ebSJeff Roberson /* 1341ddda2ebSJeff Roberson * We want to clear the last child pointer after the final section 1351ddda2ebSJeff Roberson * has exited so lookup can not return false negatives. It is done 1361ddda2ebSJeff Roberson * here because it will be cache-cold in the dtor callback. 1371ddda2ebSJeff Roberson */ 1381ddda2ebSJeff Roberson if (rnode->rn_last != 0) { 1391ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1], 1401ddda2ebSJeff Roberson NULL, UNSERIALIZED); 1411ddda2ebSJeff Roberson rnode->rn_last = 0; 1421ddda2ebSJeff Roberson } 143774d251dSAttilio Rao rnode->rn_owner = owner; 144774d251dSAttilio Rao rnode->rn_count = count; 145774d251dSAttilio Rao rnode->rn_clev = clevel; 146774d251dSAttilio Rao return (rnode); 147774d251dSAttilio Rao } 148774d251dSAttilio Rao 149774d251dSAttilio Rao /* 150774d251dSAttilio Rao * Free radix node. 151774d251dSAttilio Rao */ 152774d251dSAttilio Rao static __inline void 1531ddda2ebSJeff Roberson vm_radix_node_put(struct vm_radix_node *rnode, int8_t last) 154774d251dSAttilio Rao { 1551ddda2ebSJeff Roberson #ifdef INVARIANTS 1561ddda2ebSJeff Roberson int slot; 157774d251dSAttilio Rao 1581ddda2ebSJeff Roberson KASSERT(rnode->rn_count == 0, 1591ddda2ebSJeff Roberson ("vm_radix_node_put: rnode %p has %d children", rnode, 1601ddda2ebSJeff Roberson rnode->rn_count)); 1611ddda2ebSJeff Roberson for (slot = 0; slot < VM_RADIX_COUNT; slot++) { 1621ddda2ebSJeff Roberson if (slot == last) 1631ddda2ebSJeff Roberson continue; 1641ddda2ebSJeff Roberson KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) == 1651ddda2ebSJeff Roberson NULL, ("vm_radix_node_put: rnode %p has a child", rnode)); 1661ddda2ebSJeff Roberson } 1671ddda2ebSJeff Roberson #endif 1681ddda2ebSJeff Roberson /* Off by one so a freshly zero'd node is not assigned to. */ 1691ddda2ebSJeff Roberson rnode->rn_last = last + 1; 1701ddda2ebSJeff Roberson uma_zfree_smr(vm_radix_node_zone, rnode); 171774d251dSAttilio Rao } 172774d251dSAttilio Rao 173774d251dSAttilio Rao /* 174774d251dSAttilio Rao * Return the position in the array for a given level. 175774d251dSAttilio Rao */ 176774d251dSAttilio Rao static __inline int 177774d251dSAttilio Rao vm_radix_slot(vm_pindex_t index, uint16_t level) 178774d251dSAttilio Rao { 179774d251dSAttilio Rao 1809f2e6008SAlan Cox return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 181774d251dSAttilio Rao } 182774d251dSAttilio Rao 183774d251dSAttilio Rao /* Trims the key after the specified level. */ 184774d251dSAttilio Rao static __inline vm_pindex_t 185774d251dSAttilio Rao vm_radix_trimkey(vm_pindex_t index, uint16_t level) 186774d251dSAttilio Rao { 187774d251dSAttilio Rao vm_pindex_t ret; 188774d251dSAttilio Rao 189774d251dSAttilio Rao ret = index; 1909f2e6008SAlan Cox if (level > 0) { 1919f2e6008SAlan Cox ret >>= level * VM_RADIX_WIDTH; 1929f2e6008SAlan Cox ret <<= level * VM_RADIX_WIDTH; 193774d251dSAttilio Rao } 194774d251dSAttilio Rao return (ret); 195774d251dSAttilio Rao } 196774d251dSAttilio Rao 197774d251dSAttilio Rao /* 1981ddda2ebSJeff Roberson * Fetch a node pointer from a slot in another node. 1991ddda2ebSJeff Roberson */ 2001ddda2ebSJeff Roberson static __inline struct vm_radix_node * 2011ddda2ebSJeff Roberson vm_radix_node_load(smrnode_t *p, enum vm_radix_access access) 2021ddda2ebSJeff Roberson { 2031ddda2ebSJeff Roberson 2041ddda2ebSJeff Roberson switch (access) { 2051ddda2ebSJeff Roberson case UNSERIALIZED: 2061ddda2ebSJeff Roberson return (smr_unserialized_load(p, true)); 2071ddda2ebSJeff Roberson case LOCKED: 2081ddda2ebSJeff Roberson return (smr_serialized_load(p, true)); 2091ddda2ebSJeff Roberson case SMR: 2101ddda2ebSJeff Roberson return (smr_entered_load(p, vm_radix_smr)); 2111ddda2ebSJeff Roberson } 212c79cee71SKyle Evans __assert_unreachable(); 2131ddda2ebSJeff Roberson } 2141ddda2ebSJeff Roberson 2151ddda2ebSJeff Roberson static __inline void 2161ddda2ebSJeff Roberson vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 2171ddda2ebSJeff Roberson enum vm_radix_access access) 2181ddda2ebSJeff Roberson { 2191ddda2ebSJeff Roberson 2201ddda2ebSJeff Roberson switch (access) { 2211ddda2ebSJeff Roberson case UNSERIALIZED: 2221ddda2ebSJeff Roberson smr_unserialized_store(p, v, true); 2231ddda2ebSJeff Roberson break; 2241ddda2ebSJeff Roberson case LOCKED: 2251ddda2ebSJeff Roberson smr_serialized_store(p, v, true); 2261ddda2ebSJeff Roberson break; 2271ddda2ebSJeff Roberson case SMR: 2281ddda2ebSJeff Roberson panic("vm_radix_node_store: Not supported in smr section."); 2291ddda2ebSJeff Roberson } 2301ddda2ebSJeff Roberson } 2311ddda2ebSJeff Roberson 2321ddda2ebSJeff Roberson /* 233774d251dSAttilio Rao * Get the root node for a radix tree. 234774d251dSAttilio Rao */ 235774d251dSAttilio Rao static __inline struct vm_radix_node * 2361ddda2ebSJeff Roberson vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access) 237774d251dSAttilio Rao { 238774d251dSAttilio Rao 2391ddda2ebSJeff Roberson return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access)); 240774d251dSAttilio Rao } 241774d251dSAttilio Rao 242774d251dSAttilio Rao /* 243774d251dSAttilio Rao * Set the root node for a radix tree. 244774d251dSAttilio Rao */ 245774d251dSAttilio Rao static __inline void 2461ddda2ebSJeff Roberson vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode, 2471ddda2ebSJeff Roberson enum vm_radix_access access) 248774d251dSAttilio Rao { 249774d251dSAttilio Rao 2501ddda2ebSJeff Roberson vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access); 251774d251dSAttilio Rao } 252774d251dSAttilio Rao 253774d251dSAttilio Rao /* 2543fc10b73SAlan Cox * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. 2553fc10b73SAlan Cox */ 2563fc10b73SAlan Cox static __inline boolean_t 2573fc10b73SAlan Cox vm_radix_isleaf(struct vm_radix_node *rnode) 2583fc10b73SAlan Cox { 2593fc10b73SAlan Cox 2603fc10b73SAlan Cox return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); 2613fc10b73SAlan Cox } 2623fc10b73SAlan Cox 2633fc10b73SAlan Cox /* 26496f1a842SAlan Cox * Returns the associated page extracted from rnode. 265774d251dSAttilio Rao */ 266774d251dSAttilio Rao static __inline vm_page_t 26796f1a842SAlan Cox vm_radix_topage(struct vm_radix_node *rnode) 268774d251dSAttilio Rao { 269774d251dSAttilio Rao 27096f1a842SAlan Cox return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); 271774d251dSAttilio Rao } 272774d251dSAttilio Rao 273774d251dSAttilio Rao /* 274774d251dSAttilio Rao * Adds the page as a child of the provided node. 275774d251dSAttilio Rao */ 276774d251dSAttilio Rao static __inline void 277774d251dSAttilio Rao vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 2781ddda2ebSJeff Roberson vm_page_t page, enum vm_radix_access access) 279774d251dSAttilio Rao { 280774d251dSAttilio Rao int slot; 281774d251dSAttilio Rao 282774d251dSAttilio Rao slot = vm_radix_slot(index, clev); 2831ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], 2841ddda2ebSJeff Roberson (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access); 285774d251dSAttilio Rao } 286774d251dSAttilio Rao 287774d251dSAttilio Rao /* 288774d251dSAttilio Rao * Returns the slot where two keys differ. 289774d251dSAttilio Rao * It cannot accept 2 equal keys. 290774d251dSAttilio Rao */ 291774d251dSAttilio Rao static __inline uint16_t 292774d251dSAttilio Rao vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) 293774d251dSAttilio Rao { 294774d251dSAttilio Rao uint16_t clev; 295774d251dSAttilio Rao 296774d251dSAttilio Rao KASSERT(index1 != index2, ("%s: passing the same key value %jx", 297774d251dSAttilio Rao __func__, (uintmax_t)index1)); 298774d251dSAttilio Rao 299774d251dSAttilio Rao index1 ^= index2; 3009f2e6008SAlan Cox for (clev = VM_RADIX_LIMIT;; clev--) 301bb0e1de4SAlan Cox if (vm_radix_slot(index1, clev) != 0) 302774d251dSAttilio Rao return (clev); 303774d251dSAttilio Rao } 304774d251dSAttilio Rao 305774d251dSAttilio Rao /* 306774d251dSAttilio Rao * Returns TRUE if it can be determined that key does not belong to the 307774d251dSAttilio Rao * specified rnode. Otherwise, returns FALSE. 308774d251dSAttilio Rao */ 309774d251dSAttilio Rao static __inline boolean_t 310774d251dSAttilio Rao vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) 311774d251dSAttilio Rao { 312774d251dSAttilio Rao 3139f2e6008SAlan Cox if (rnode->rn_clev < VM_RADIX_LIMIT) { 3149f2e6008SAlan Cox idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); 315c1c82b36SAlan Cox return (idx != rnode->rn_owner); 316774d251dSAttilio Rao } 317774d251dSAttilio Rao return (FALSE); 318774d251dSAttilio Rao } 319774d251dSAttilio Rao 320774d251dSAttilio Rao /* 321774d251dSAttilio Rao * Internal helper for vm_radix_reclaim_allnodes(). 322774d251dSAttilio Rao * This function is recursive. 323774d251dSAttilio Rao */ 324774d251dSAttilio Rao static void 325774d251dSAttilio Rao vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 326774d251dSAttilio Rao { 3271ddda2ebSJeff Roberson struct vm_radix_node *child; 328774d251dSAttilio Rao int slot; 329774d251dSAttilio Rao 330652615dcSAlan Cox KASSERT(rnode->rn_count <= VM_RADIX_COUNT, 331652615dcSAlan Cox ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode)); 332652615dcSAlan Cox for (slot = 0; rnode->rn_count != 0; slot++) { 3331ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED); 3341ddda2ebSJeff Roberson if (child == NULL) 335774d251dSAttilio Rao continue; 3361ddda2ebSJeff Roberson if (!vm_radix_isleaf(child)) 3371ddda2ebSJeff Roberson vm_radix_reclaim_allnodes_int(child); 3381ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED); 339774d251dSAttilio Rao rnode->rn_count--; 340774d251dSAttilio Rao } 3411ddda2ebSJeff Roberson vm_radix_node_put(rnode, -1); 342a3d799fbSMateusz Guzik } 343a3d799fbSMateusz Guzik 344e946b949SAttilio Rao #ifndef UMA_MD_SMALL_ALLOC 345ae941b1bSGleb Smirnoff void vm_radix_reserve_kva(void); 346774d251dSAttilio Rao /* 347e946b949SAttilio Rao * Reserve the KVA necessary to satisfy the node allocation. 348e946b949SAttilio Rao * This is mandatory in architectures not supporting direct 349e946b949SAttilio Rao * mapping as they will need otherwise to carve into the kernel maps for 350e946b949SAttilio Rao * every node allocation, resulting into deadlocks for consumers already 351e946b949SAttilio Rao * working with kernel maps. 352774d251dSAttilio Rao */ 353ae941b1bSGleb Smirnoff void 354ae941b1bSGleb Smirnoff vm_radix_reserve_kva(void) 355774d251dSAttilio Rao { 356774d251dSAttilio Rao 357880659feSAlan Cox /* 358880659feSAlan Cox * Calculate the number of reserved nodes, discounting the pages that 359880659feSAlan Cox * are needed to store them. 360880659feSAlan Cox */ 361e946b949SAttilio Rao if (!uma_zone_reserve_kva(vm_radix_node_zone, 36244f1c916SBryan Drewery ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 363e946b949SAttilio Rao sizeof(struct vm_radix_node)))) 364e946b949SAttilio Rao panic("%s: unable to reserve KVA", __func__); 365774d251dSAttilio Rao } 366e946b949SAttilio Rao #endif 367774d251dSAttilio Rao 368774d251dSAttilio Rao /* 369774d251dSAttilio Rao * Initialize the UMA slab zone. 370774d251dSAttilio Rao */ 371774d251dSAttilio Rao void 372cd1241fbSKonstantin Belousov vm_radix_zinit(void) 373774d251dSAttilio Rao { 374774d251dSAttilio Rao 375774d251dSAttilio Rao vm_radix_node_zone = uma_zcreate("RADIX NODE", 3761ddda2ebSJeff Roberson sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL, 3771ddda2ebSJeff Roberson VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT); 3781ddda2ebSJeff Roberson vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); 379774d251dSAttilio Rao } 380774d251dSAttilio Rao 381774d251dSAttilio Rao /* 382774d251dSAttilio Rao * Inserts the key-value pair into the trie. 383774d251dSAttilio Rao * Panics if the key already exists. 384774d251dSAttilio Rao */ 385e946b949SAttilio Rao int 386774d251dSAttilio Rao vm_radix_insert(struct vm_radix *rtree, vm_page_t page) 387774d251dSAttilio Rao { 388774d251dSAttilio Rao vm_pindex_t index, newind; 389a08f2cf6SAlan Cox struct vm_radix_node *rnode, *tmp; 3901ddda2ebSJeff Roberson smrnode_t *parentp; 391774d251dSAttilio Rao vm_page_t m; 392774d251dSAttilio Rao int slot; 393774d251dSAttilio Rao uint16_t clev; 394774d251dSAttilio Rao 395774d251dSAttilio Rao index = page->pindex; 396774d251dSAttilio Rao 397774d251dSAttilio Rao /* 398774d251dSAttilio Rao * The owner of record for root is not really important because it 399774d251dSAttilio Rao * will never be used. 400774d251dSAttilio Rao */ 4011ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 402774d251dSAttilio Rao if (rnode == NULL) { 403a08f2cf6SAlan Cox rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; 404e946b949SAttilio Rao return (0); 405774d251dSAttilio Rao } 4061ddda2ebSJeff Roberson parentp = (smrnode_t *)&rtree->rt_root; 407a08f2cf6SAlan Cox for (;;) { 408a08f2cf6SAlan Cox if (vm_radix_isleaf(rnode)) { 409a08f2cf6SAlan Cox m = vm_radix_topage(rnode); 410774d251dSAttilio Rao if (m->pindex == index) 411774d251dSAttilio Rao panic("%s: key %jx is already present", 412774d251dSAttilio Rao __func__, (uintmax_t)index); 413774d251dSAttilio Rao clev = vm_radix_keydiff(m->pindex, index); 414774d251dSAttilio Rao tmp = vm_radix_node_get(vm_radix_trimkey(index, 4159f2e6008SAlan Cox clev + 1), 2, clev); 416563a19d5SAlan Cox if (tmp == NULL) 417e946b949SAttilio Rao return (ENOMEM); 4181ddda2ebSJeff Roberson /* These writes are not yet visible due to ordering. */ 4191ddda2ebSJeff Roberson vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); 4201ddda2ebSJeff Roberson vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED); 4211ddda2ebSJeff Roberson /* Synchronize to make leaf visible. */ 4221ddda2ebSJeff Roberson vm_radix_node_store(parentp, tmp, LOCKED); 423e946b949SAttilio Rao return (0); 424a08f2cf6SAlan Cox } else if (vm_radix_keybarr(rnode, index)) 425a08f2cf6SAlan Cox break; 426a08f2cf6SAlan Cox slot = vm_radix_slot(index, rnode->rn_clev); 4271ddda2ebSJeff Roberson parentp = &rnode->rn_child[slot]; 4281ddda2ebSJeff Roberson tmp = vm_radix_node_load(parentp, LOCKED); 4291ddda2ebSJeff Roberson if (tmp == NULL) { 430774d251dSAttilio Rao rnode->rn_count++; 4311ddda2ebSJeff Roberson vm_radix_addpage(rnode, index, rnode->rn_clev, page, 4321ddda2ebSJeff Roberson LOCKED); 433e946b949SAttilio Rao return (0); 434774d251dSAttilio Rao } 4351ddda2ebSJeff Roberson rnode = tmp; 436a08f2cf6SAlan Cox } 437774d251dSAttilio Rao 438774d251dSAttilio Rao /* 439774d251dSAttilio Rao * A new node is needed because the right insertion level is reached. 440774d251dSAttilio Rao * Setup the new intermediate node and add the 2 children: the 441774d251dSAttilio Rao * new object and the older edge. 442774d251dSAttilio Rao */ 44372abda64SAlan Cox newind = rnode->rn_owner; 44472abda64SAlan Cox clev = vm_radix_keydiff(newind, index); 445e946b949SAttilio Rao tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); 446563a19d5SAlan Cox if (tmp == NULL) 447e946b949SAttilio Rao return (ENOMEM); 448774d251dSAttilio Rao slot = vm_radix_slot(newind, clev); 4491ddda2ebSJeff Roberson /* These writes are not yet visible due to ordering. */ 4501ddda2ebSJeff Roberson vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); 4511ddda2ebSJeff Roberson vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED); 4521ddda2ebSJeff Roberson /* Serializing write to make the above visible. */ 4531ddda2ebSJeff Roberson vm_radix_node_store(parentp, tmp, LOCKED); 4541ddda2ebSJeff Roberson 455e946b949SAttilio Rao return (0); 456774d251dSAttilio Rao } 457774d251dSAttilio Rao 458774d251dSAttilio Rao /* 459776cad90SAlan Cox * Returns TRUE if the specified radix tree contains a single leaf and FALSE 460776cad90SAlan Cox * otherwise. 461776cad90SAlan Cox */ 462776cad90SAlan Cox boolean_t 463776cad90SAlan Cox vm_radix_is_singleton(struct vm_radix *rtree) 464776cad90SAlan Cox { 465776cad90SAlan Cox struct vm_radix_node *rnode; 466776cad90SAlan Cox 4671ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 468776cad90SAlan Cox if (rnode == NULL) 469776cad90SAlan Cox return (FALSE); 470776cad90SAlan Cox return (vm_radix_isleaf(rnode)); 471776cad90SAlan Cox } 472776cad90SAlan Cox 473776cad90SAlan Cox /* 474774d251dSAttilio Rao * Returns the value stored at the index. If the index is not present, 475774d251dSAttilio Rao * NULL is returned. 476774d251dSAttilio Rao */ 4771ddda2ebSJeff Roberson static __always_inline vm_page_t 4781ddda2ebSJeff Roberson _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, 4791ddda2ebSJeff Roberson enum vm_radix_access access) 480774d251dSAttilio Rao { 481774d251dSAttilio Rao struct vm_radix_node *rnode; 482774d251dSAttilio Rao vm_page_t m; 483774d251dSAttilio Rao int slot; 484774d251dSAttilio Rao 4851ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, access); 486774d251dSAttilio Rao while (rnode != NULL) { 48796f1a842SAlan Cox if (vm_radix_isleaf(rnode)) { 48896f1a842SAlan Cox m = vm_radix_topage(rnode); 489774d251dSAttilio Rao if (m->pindex == index) 490774d251dSAttilio Rao return (m); 4916f9c0b15SAlan Cox break; 4921ddda2ebSJeff Roberson } 4931ddda2ebSJeff Roberson if (vm_radix_keybarr(rnode, index)) 4946f9c0b15SAlan Cox break; 4956f9c0b15SAlan Cox slot = vm_radix_slot(index, rnode->rn_clev); 4961ddda2ebSJeff Roberson rnode = vm_radix_node_load(&rnode->rn_child[slot], access); 497774d251dSAttilio Rao } 498774d251dSAttilio Rao return (NULL); 499774d251dSAttilio Rao } 500774d251dSAttilio Rao 501774d251dSAttilio Rao /* 5021ddda2ebSJeff Roberson * Returns the value stored at the index assuming there is an external lock. 5031ddda2ebSJeff Roberson * 5041ddda2ebSJeff Roberson * If the index is not present, NULL is returned. 5051ddda2ebSJeff Roberson */ 5061ddda2ebSJeff Roberson vm_page_t 5071ddda2ebSJeff Roberson vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 5081ddda2ebSJeff Roberson { 5091ddda2ebSJeff Roberson 5101ddda2ebSJeff Roberson return _vm_radix_lookup(rtree, index, LOCKED); 5111ddda2ebSJeff Roberson } 5121ddda2ebSJeff Roberson 5131ddda2ebSJeff Roberson /* 5141ddda2ebSJeff Roberson * Returns the value stored at the index without requiring an external lock. 5151ddda2ebSJeff Roberson * 5161ddda2ebSJeff Roberson * If the index is not present, NULL is returned. 5171ddda2ebSJeff Roberson */ 5181ddda2ebSJeff Roberson vm_page_t 5191ddda2ebSJeff Roberson vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) 5201ddda2ebSJeff Roberson { 5211ddda2ebSJeff Roberson vm_page_t m; 5221ddda2ebSJeff Roberson 5231ddda2ebSJeff Roberson smr_enter(vm_radix_smr); 5241ddda2ebSJeff Roberson m = _vm_radix_lookup(rtree, index, SMR); 5251ddda2ebSJeff Roberson smr_exit(vm_radix_smr); 5261ddda2ebSJeff Roberson 5271ddda2ebSJeff Roberson return (m); 5281ddda2ebSJeff Roberson } 5291ddda2ebSJeff Roberson 5301ddda2ebSJeff Roberson /* 5311ddda2ebSJeff Roberson * Look up the nearest entry at a position greater than or equal to index. 532774d251dSAttilio Rao */ 533774d251dSAttilio Rao vm_page_t 534774d251dSAttilio Rao vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 535774d251dSAttilio Rao { 5362d4b9a64SAlan Cox struct vm_radix_node *stack[VM_RADIX_LIMIT]; 537774d251dSAttilio Rao vm_pindex_t inc; 538774d251dSAttilio Rao vm_page_t m; 53996f1a842SAlan Cox struct vm_radix_node *child, *rnode; 540774d251dSAttilio Rao #ifdef INVARIANTS 541774d251dSAttilio Rao int loops = 0; 542774d251dSAttilio Rao #endif 5432d4b9a64SAlan Cox int slot, tos; 544774d251dSAttilio Rao 5451ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 5466f9c0b15SAlan Cox if (rnode == NULL) 5476f9c0b15SAlan Cox return (NULL); 5486f9c0b15SAlan Cox else if (vm_radix_isleaf(rnode)) { 5496f9c0b15SAlan Cox m = vm_radix_topage(rnode); 5506f9c0b15SAlan Cox if (m->pindex >= index) 5516f9c0b15SAlan Cox return (m); 5526f9c0b15SAlan Cox else 5536f9c0b15SAlan Cox return (NULL); 5546f9c0b15SAlan Cox } 5552d4b9a64SAlan Cox tos = 0; 5566f9c0b15SAlan Cox for (;;) { 557774d251dSAttilio Rao /* 55840076ebcSAlan Cox * If the keys differ before the current bisection node, 55940076ebcSAlan Cox * then the search key might rollback to the earliest 56040076ebcSAlan Cox * available bisection node or to the smallest key 5611ddda2ebSJeff Roberson * in the current node (if the owner is greater than the 562774d251dSAttilio Rao * search key). 563774d251dSAttilio Rao */ 564774d251dSAttilio Rao if (vm_radix_keybarr(rnode, index)) { 565774d251dSAttilio Rao if (index > rnode->rn_owner) { 5662d4b9a64SAlan Cox ascend: 5672d4b9a64SAlan Cox KASSERT(++loops < 1000, 5682d4b9a64SAlan Cox ("vm_radix_lookup_ge: too many loops")); 5692d4b9a64SAlan Cox 5702d4b9a64SAlan Cox /* 5712d4b9a64SAlan Cox * Pop nodes from the stack until either the 5722d4b9a64SAlan Cox * stack is empty or a node that could have a 5732d4b9a64SAlan Cox * matching descendant is found. 5742d4b9a64SAlan Cox */ 5752d4b9a64SAlan Cox do { 5762d4b9a64SAlan Cox if (tos == 0) 5772d4b9a64SAlan Cox return (NULL); 5782d4b9a64SAlan Cox rnode = stack[--tos]; 5792d4b9a64SAlan Cox } while (vm_radix_slot(index, 5802d4b9a64SAlan Cox rnode->rn_clev) == (VM_RADIX_COUNT - 1)); 5812d4b9a64SAlan Cox 5822d4b9a64SAlan Cox /* 5832d4b9a64SAlan Cox * The following computation cannot overflow 5842d4b9a64SAlan Cox * because index's slot at the current level 5852d4b9a64SAlan Cox * is less than VM_RADIX_COUNT - 1. 5862d4b9a64SAlan Cox */ 5872d4b9a64SAlan Cox index = vm_radix_trimkey(index, 5882d4b9a64SAlan Cox rnode->rn_clev); 5892d4b9a64SAlan Cox index += VM_RADIX_UNITLEVEL(rnode->rn_clev); 59040076ebcSAlan Cox } else 59140076ebcSAlan Cox index = rnode->rn_owner; 5922d4b9a64SAlan Cox KASSERT(!vm_radix_keybarr(rnode, index), 5932d4b9a64SAlan Cox ("vm_radix_lookup_ge: keybarr failed")); 594774d251dSAttilio Rao } 595774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 5961ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 59796f1a842SAlan Cox if (vm_radix_isleaf(child)) { 59896f1a842SAlan Cox m = vm_radix_topage(child); 59996f1a842SAlan Cox if (m->pindex >= index) 600774d251dSAttilio Rao return (m); 60196f1a842SAlan Cox } else if (child != NULL) 60296f1a842SAlan Cox goto descend; 603774d251dSAttilio Rao 604774d251dSAttilio Rao /* 605774d251dSAttilio Rao * Look for an available edge or page within the current 606774d251dSAttilio Rao * bisection node. 607774d251dSAttilio Rao */ 608774d251dSAttilio Rao if (slot < (VM_RADIX_COUNT - 1)) { 609774d251dSAttilio Rao inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 610774d251dSAttilio Rao index = vm_radix_trimkey(index, rnode->rn_clev); 61196f1a842SAlan Cox do { 612774d251dSAttilio Rao index += inc; 613774d251dSAttilio Rao slot++; 6141ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], 6151ddda2ebSJeff Roberson LOCKED); 61696f1a842SAlan Cox if (vm_radix_isleaf(child)) { 61796f1a842SAlan Cox m = vm_radix_topage(child); 61896f1a842SAlan Cox if (m->pindex >= index) 619774d251dSAttilio Rao return (m); 62096f1a842SAlan Cox } else if (child != NULL) 62196f1a842SAlan Cox goto descend; 62296f1a842SAlan Cox } while (slot < (VM_RADIX_COUNT - 1)); 623774d251dSAttilio Rao } 62496f1a842SAlan Cox KASSERT(child == NULL || vm_radix_isleaf(child), 62596f1a842SAlan Cox ("vm_radix_lookup_ge: child is radix node")); 626774d251dSAttilio Rao 627774d251dSAttilio Rao /* 6281ddda2ebSJeff Roberson * If a page or edge greater than the search slot is not found 6292d4b9a64SAlan Cox * in the current node, ascend to the next higher-level node. 630774d251dSAttilio Rao */ 6312d4b9a64SAlan Cox goto ascend; 63296f1a842SAlan Cox descend: 6339f2e6008SAlan Cox KASSERT(rnode->rn_clev > 0, 6342d4b9a64SAlan Cox ("vm_radix_lookup_ge: pushing leaf's parent")); 6352d4b9a64SAlan Cox KASSERT(tos < VM_RADIX_LIMIT, 6362d4b9a64SAlan Cox ("vm_radix_lookup_ge: stack overflow")); 6372d4b9a64SAlan Cox stack[tos++] = rnode; 63896f1a842SAlan Cox rnode = child; 639774d251dSAttilio Rao } 640774d251dSAttilio Rao } 641774d251dSAttilio Rao 642774d251dSAttilio Rao /* 643774d251dSAttilio Rao * Look up the nearest entry at a position less than or equal to index. 644774d251dSAttilio Rao */ 645774d251dSAttilio Rao vm_page_t 646774d251dSAttilio Rao vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 647774d251dSAttilio Rao { 6482d4b9a64SAlan Cox struct vm_radix_node *stack[VM_RADIX_LIMIT]; 649774d251dSAttilio Rao vm_pindex_t inc; 650774d251dSAttilio Rao vm_page_t m; 65196f1a842SAlan Cox struct vm_radix_node *child, *rnode; 652774d251dSAttilio Rao #ifdef INVARIANTS 653774d251dSAttilio Rao int loops = 0; 654774d251dSAttilio Rao #endif 6552d4b9a64SAlan Cox int slot, tos; 656774d251dSAttilio Rao 6571ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 6586f9c0b15SAlan Cox if (rnode == NULL) 6596f9c0b15SAlan Cox return (NULL); 6606f9c0b15SAlan Cox else if (vm_radix_isleaf(rnode)) { 6616f9c0b15SAlan Cox m = vm_radix_topage(rnode); 6626f9c0b15SAlan Cox if (m->pindex <= index) 6636f9c0b15SAlan Cox return (m); 6646f9c0b15SAlan Cox else 6656f9c0b15SAlan Cox return (NULL); 6666f9c0b15SAlan Cox } 6672d4b9a64SAlan Cox tos = 0; 6686f9c0b15SAlan Cox for (;;) { 669774d251dSAttilio Rao /* 67040076ebcSAlan Cox * If the keys differ before the current bisection node, 67140076ebcSAlan Cox * then the search key might rollback to the earliest 67240076ebcSAlan Cox * available bisection node or to the largest key 67340076ebcSAlan Cox * in the current node (if the owner is smaller than the 674774d251dSAttilio Rao * search key). 675774d251dSAttilio Rao */ 676774d251dSAttilio Rao if (vm_radix_keybarr(rnode, index)) { 677774d251dSAttilio Rao if (index > rnode->rn_owner) { 67840076ebcSAlan Cox index = rnode->rn_owner + VM_RADIX_COUNT * 6792d4b9a64SAlan Cox VM_RADIX_UNITLEVEL(rnode->rn_clev); 68040076ebcSAlan Cox } else { 6812d4b9a64SAlan Cox ascend: 6822d4b9a64SAlan Cox KASSERT(++loops < 1000, 6832d4b9a64SAlan Cox ("vm_radix_lookup_le: too many loops")); 6842d4b9a64SAlan Cox 6852d4b9a64SAlan Cox /* 6862d4b9a64SAlan Cox * Pop nodes from the stack until either the 6872d4b9a64SAlan Cox * stack is empty or a node that could have a 6882d4b9a64SAlan Cox * matching descendant is found. 6892d4b9a64SAlan Cox */ 6902d4b9a64SAlan Cox do { 6912d4b9a64SAlan Cox if (tos == 0) 6922d4b9a64SAlan Cox return (NULL); 6932d4b9a64SAlan Cox rnode = stack[--tos]; 6942d4b9a64SAlan Cox } while (vm_radix_slot(index, 6952d4b9a64SAlan Cox rnode->rn_clev) == 0); 6962d4b9a64SAlan Cox 6972d4b9a64SAlan Cox /* 6982d4b9a64SAlan Cox * The following computation cannot overflow 6992d4b9a64SAlan Cox * because index's slot at the current level 7002d4b9a64SAlan Cox * is greater than 0. 7012d4b9a64SAlan Cox */ 7022d4b9a64SAlan Cox index = vm_radix_trimkey(index, 7032d4b9a64SAlan Cox rnode->rn_clev); 704774d251dSAttilio Rao } 7052d4b9a64SAlan Cox index--; 7062d4b9a64SAlan Cox KASSERT(!vm_radix_keybarr(rnode, index), 7072d4b9a64SAlan Cox ("vm_radix_lookup_le: keybarr failed")); 70840076ebcSAlan Cox } 709774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 7101ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 71196f1a842SAlan Cox if (vm_radix_isleaf(child)) { 71296f1a842SAlan Cox m = vm_radix_topage(child); 71396f1a842SAlan Cox if (m->pindex <= index) 714774d251dSAttilio Rao return (m); 71596f1a842SAlan Cox } else if (child != NULL) 71696f1a842SAlan Cox goto descend; 717774d251dSAttilio Rao 718774d251dSAttilio Rao /* 719774d251dSAttilio Rao * Look for an available edge or page within the current 720774d251dSAttilio Rao * bisection node. 721774d251dSAttilio Rao */ 722774d251dSAttilio Rao if (slot > 0) { 723774d251dSAttilio Rao inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 724774d251dSAttilio Rao index |= inc - 1; 72596f1a842SAlan Cox do { 726774d251dSAttilio Rao index -= inc; 727774d251dSAttilio Rao slot--; 7281ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], 7291ddda2ebSJeff Roberson LOCKED); 73096f1a842SAlan Cox if (vm_radix_isleaf(child)) { 73196f1a842SAlan Cox m = vm_radix_topage(child); 73296f1a842SAlan Cox if (m->pindex <= index) 733774d251dSAttilio Rao return (m); 73496f1a842SAlan Cox } else if (child != NULL) 73596f1a842SAlan Cox goto descend; 73696f1a842SAlan Cox } while (slot > 0); 737774d251dSAttilio Rao } 73896f1a842SAlan Cox KASSERT(child == NULL || vm_radix_isleaf(child), 73996f1a842SAlan Cox ("vm_radix_lookup_le: child is radix node")); 740774d251dSAttilio Rao 741774d251dSAttilio Rao /* 7422d4b9a64SAlan Cox * If a page or edge smaller than the search slot is not found 7432d4b9a64SAlan Cox * in the current node, ascend to the next higher-level node. 744774d251dSAttilio Rao */ 7452d4b9a64SAlan Cox goto ascend; 74696f1a842SAlan Cox descend: 7479f2e6008SAlan Cox KASSERT(rnode->rn_clev > 0, 7482d4b9a64SAlan Cox ("vm_radix_lookup_le: pushing leaf's parent")); 7492d4b9a64SAlan Cox KASSERT(tos < VM_RADIX_LIMIT, 7502d4b9a64SAlan Cox ("vm_radix_lookup_le: stack overflow")); 7512d4b9a64SAlan Cox stack[tos++] = rnode; 75296f1a842SAlan Cox rnode = child; 753774d251dSAttilio Rao } 754774d251dSAttilio Rao } 755774d251dSAttilio Rao 756774d251dSAttilio Rao /* 757e94965d8SAlan Cox * Remove the specified index from the trie, and return the value stored at 758e94965d8SAlan Cox * that index. If the index is not present, return NULL. 759774d251dSAttilio Rao */ 760e94965d8SAlan Cox vm_page_t 761774d251dSAttilio Rao vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 762774d251dSAttilio Rao { 7631ddda2ebSJeff Roberson struct vm_radix_node *rnode, *parent, *tmp; 764774d251dSAttilio Rao vm_page_t m; 765774d251dSAttilio Rao int i, slot; 766774d251dSAttilio Rao 7671ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 768a08f2cf6SAlan Cox if (vm_radix_isleaf(rnode)) { 769a08f2cf6SAlan Cox m = vm_radix_topage(rnode); 770a08f2cf6SAlan Cox if (m->pindex != index) 771e94965d8SAlan Cox return (NULL); 7721ddda2ebSJeff Roberson vm_radix_root_store(rtree, NULL, LOCKED); 773e94965d8SAlan Cox return (m); 774a08f2cf6SAlan Cox } 775a08f2cf6SAlan Cox parent = NULL; 776774d251dSAttilio Rao for (;;) { 777774d251dSAttilio Rao if (rnode == NULL) 778e94965d8SAlan Cox return (NULL); 779774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 7801ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 7811ddda2ebSJeff Roberson if (vm_radix_isleaf(tmp)) { 7821ddda2ebSJeff Roberson m = vm_radix_topage(tmp); 78396f1a842SAlan Cox if (m->pindex != index) 784e94965d8SAlan Cox return (NULL); 7851ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED); 786774d251dSAttilio Rao rnode->rn_count--; 787774d251dSAttilio Rao if (rnode->rn_count > 1) 788e94965d8SAlan Cox return (m); 789774d251dSAttilio Rao for (i = 0; i < VM_RADIX_COUNT; i++) 7901ddda2ebSJeff Roberson if (vm_radix_node_load(&rnode->rn_child[i], 7911ddda2ebSJeff Roberson LOCKED) != NULL) 792774d251dSAttilio Rao break; 793774d251dSAttilio Rao KASSERT(i != VM_RADIX_COUNT, 794774d251dSAttilio Rao ("%s: invalid node configuration", __func__)); 7951ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED); 796a08f2cf6SAlan Cox if (parent == NULL) 7971ddda2ebSJeff Roberson vm_radix_root_store(rtree, tmp, LOCKED); 798a08f2cf6SAlan Cox else { 799774d251dSAttilio Rao slot = vm_radix_slot(index, parent->rn_clev); 8001ddda2ebSJeff Roberson KASSERT(vm_radix_node_load( 8011ddda2ebSJeff Roberson &parent->rn_child[slot], LOCKED) == rnode, 802774d251dSAttilio Rao ("%s: invalid child value", __func__)); 8031ddda2ebSJeff Roberson vm_radix_node_store(&parent->rn_child[slot], 8041ddda2ebSJeff Roberson tmp, LOCKED); 805a08f2cf6SAlan Cox } 8061ddda2ebSJeff Roberson /* 8071ddda2ebSJeff Roberson * The child is still valid and we can not zero the 8081ddda2ebSJeff Roberson * pointer until all smr references are gone. 8091ddda2ebSJeff Roberson */ 810774d251dSAttilio Rao rnode->rn_count--; 8111ddda2ebSJeff Roberson vm_radix_node_put(rnode, i); 812e94965d8SAlan Cox return (m); 813774d251dSAttilio Rao } 814774d251dSAttilio Rao parent = rnode; 8151ddda2ebSJeff Roberson rnode = tmp; 816774d251dSAttilio Rao } 817774d251dSAttilio Rao } 818774d251dSAttilio Rao 819774d251dSAttilio Rao /* 820774d251dSAttilio Rao * Remove and free all the nodes from the radix tree. 821774d251dSAttilio Rao * This function is recursive but there is a tight control on it as the 822774d251dSAttilio Rao * maximum depth of the tree is fixed. 823774d251dSAttilio Rao */ 824774d251dSAttilio Rao void 825774d251dSAttilio Rao vm_radix_reclaim_allnodes(struct vm_radix *rtree) 826774d251dSAttilio Rao { 827774d251dSAttilio Rao struct vm_radix_node *root; 828774d251dSAttilio Rao 8291ddda2ebSJeff Roberson root = vm_radix_root_load(rtree, LOCKED); 830774d251dSAttilio Rao if (root == NULL) 831774d251dSAttilio Rao return; 8321ddda2ebSJeff Roberson vm_radix_root_store(rtree, NULL, UNSERIALIZED); 833a08f2cf6SAlan Cox if (!vm_radix_isleaf(root)) 834652615dcSAlan Cox vm_radix_reclaim_allnodes_int(root); 835774d251dSAttilio Rao } 836774d251dSAttilio Rao 837e946b949SAttilio Rao /* 838703b304fSAlan Cox * Replace an existing page in the trie with another one. 839703b304fSAlan Cox * Panics if there is not an old page in the trie at the new page's index. 840e946b949SAttilio Rao */ 841e946b949SAttilio Rao vm_page_t 842703b304fSAlan Cox vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) 843e946b949SAttilio Rao { 8441ddda2ebSJeff Roberson struct vm_radix_node *rnode, *tmp; 845e946b949SAttilio Rao vm_page_t m; 846703b304fSAlan Cox vm_pindex_t index; 847e946b949SAttilio Rao int slot; 848e946b949SAttilio Rao 849703b304fSAlan Cox index = newpage->pindex; 8501ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 851e946b949SAttilio Rao if (rnode == NULL) 852e946b949SAttilio Rao panic("%s: replacing page on an empty trie", __func__); 853e946b949SAttilio Rao if (vm_radix_isleaf(rnode)) { 854e946b949SAttilio Rao m = vm_radix_topage(rnode); 855e946b949SAttilio Rao if (m->pindex != index) 856e946b949SAttilio Rao panic("%s: original replacing root key not found", 857e946b949SAttilio Rao __func__); 858e946b949SAttilio Rao rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF; 859e946b949SAttilio Rao return (m); 860e946b949SAttilio Rao } 861e946b949SAttilio Rao for (;;) { 862e946b949SAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 8631ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 8641ddda2ebSJeff Roberson if (vm_radix_isleaf(tmp)) { 8651ddda2ebSJeff Roberson m = vm_radix_topage(tmp); 866e946b949SAttilio Rao if (m->pindex == index) { 8671ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], 8681ddda2ebSJeff Roberson (struct vm_radix_node *)((uintptr_t)newpage | 8691ddda2ebSJeff Roberson VM_RADIX_ISLEAF), LOCKED); 870e946b949SAttilio Rao return (m); 871e946b949SAttilio Rao } else 872e946b949SAttilio Rao break; 8731ddda2ebSJeff Roberson } else if (tmp == NULL || vm_radix_keybarr(tmp, index)) 874e946b949SAttilio Rao break; 8751ddda2ebSJeff Roberson rnode = tmp; 876e946b949SAttilio Rao } 877e946b949SAttilio Rao panic("%s: original replacing page not found", __func__); 878e946b949SAttilio Rao } 879e946b949SAttilio Rao 8808d6fbbb8SJeff Roberson void 8818d6fbbb8SJeff Roberson vm_radix_wait(void) 8828d6fbbb8SJeff Roberson { 8838d6fbbb8SJeff Roberson uma_zwait(vm_radix_node_zone); 8848d6fbbb8SJeff Roberson } 8858d6fbbb8SJeff Roberson 886774d251dSAttilio Rao #ifdef DDB 887774d251dSAttilio Rao /* 888774d251dSAttilio Rao * Show details about the given radix node. 889774d251dSAttilio Rao */ 890774d251dSAttilio Rao DB_SHOW_COMMAND(radixnode, db_show_radixnode) 891774d251dSAttilio Rao { 8921ddda2ebSJeff Roberson struct vm_radix_node *rnode, *tmp; 893774d251dSAttilio Rao int i; 894774d251dSAttilio Rao 895774d251dSAttilio Rao if (!have_addr) 896774d251dSAttilio Rao return; 897774d251dSAttilio Rao rnode = (struct vm_radix_node *)addr; 898774d251dSAttilio Rao db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 899774d251dSAttilio Rao (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 900774d251dSAttilio Rao rnode->rn_clev); 9011ddda2ebSJeff Roberson for (i = 0; i < VM_RADIX_COUNT; i++) { 9021ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED); 9031ddda2ebSJeff Roberson if (tmp != NULL) 904774d251dSAttilio Rao db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 9051ddda2ebSJeff Roberson i, (void *)tmp, 9061ddda2ebSJeff Roberson vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL, 907774d251dSAttilio Rao rnode->rn_clev); 908774d251dSAttilio Rao } 9091ddda2ebSJeff Roberson } 910774d251dSAttilio Rao #endif /* DDB */ 911