1fe267a55SPedro F. Giffuni /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 4774d251dSAttilio Rao * Copyright (c) 2013 EMC Corp. 5774d251dSAttilio Rao * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6774d251dSAttilio Rao * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7774d251dSAttilio Rao * All rights reserved. 8774d251dSAttilio Rao * 9774d251dSAttilio Rao * Redistribution and use in source and binary forms, with or without 10774d251dSAttilio Rao * modification, are permitted provided that the following conditions 11774d251dSAttilio Rao * are met: 12774d251dSAttilio Rao * 1. Redistributions of source code must retain the above copyright 13774d251dSAttilio Rao * notice, this list of conditions and the following disclaimer. 14774d251dSAttilio Rao * 2. Redistributions in binary form must reproduce the above copyright 15774d251dSAttilio Rao * notice, this list of conditions and the following disclaimer in the 16774d251dSAttilio Rao * documentation and/or other materials provided with the distribution. 17774d251dSAttilio Rao * 18774d251dSAttilio Rao * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19774d251dSAttilio Rao * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20774d251dSAttilio Rao * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21774d251dSAttilio Rao * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22774d251dSAttilio Rao * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23774d251dSAttilio Rao * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24774d251dSAttilio Rao * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25774d251dSAttilio Rao * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26774d251dSAttilio Rao * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27774d251dSAttilio Rao * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28774d251dSAttilio Rao * SUCH DAMAGE. 29774d251dSAttilio Rao * 30774d251dSAttilio Rao */ 31774d251dSAttilio Rao 32774d251dSAttilio Rao /* 33774d251dSAttilio Rao * Path-compressed radix trie implementation. 34774d251dSAttilio Rao * The following code is not generalized into a general purpose library 35774d251dSAttilio Rao * because there are way too many parameters embedded that should really 36774d251dSAttilio Rao * be decided by the library consumers. At the same time, consumers 37774d251dSAttilio Rao * of this code must achieve highest possible performance. 38774d251dSAttilio Rao * 39774d251dSAttilio Rao * The implementation takes into account the following rationale: 40774d251dSAttilio Rao * - Size of the nodes should be as small as possible but still big enough 41774d251dSAttilio Rao * to avoid a large maximum depth for the trie. This is a balance 42774d251dSAttilio Rao * between the necessity to not wire too much physical memory for the nodes 43774d251dSAttilio Rao * and the necessity to avoid too much cache pollution during the trie 44774d251dSAttilio Rao * operations. 45774d251dSAttilio Rao * - There is not a huge bias toward the number of lookup operations over 46774d251dSAttilio Rao * the number of insert and remove operations. This basically implies 47774d251dSAttilio Rao * that optimizations supposedly helping one operation but hurting the 48774d251dSAttilio Rao * other might be carefully evaluated. 49774d251dSAttilio Rao * - On average not many nodes are expected to be fully populated, hence 50774d251dSAttilio Rao * level compression may just complicate things. 51774d251dSAttilio Rao */ 52774d251dSAttilio Rao 53774d251dSAttilio Rao #include <sys/cdefs.h> 54774d251dSAttilio Rao __FBSDID("$FreeBSD$"); 55774d251dSAttilio Rao 56774d251dSAttilio Rao #include "opt_ddb.h" 57774d251dSAttilio Rao 58774d251dSAttilio Rao #include <sys/param.h> 59774d251dSAttilio Rao #include <sys/systm.h> 60774d251dSAttilio Rao #include <sys/kernel.h> 6105963ea4SDoug Moore #include <sys/libkern.h> 621ddda2ebSJeff Roberson #include <sys/proc.h> 63774d251dSAttilio Rao #include <sys/vmmeter.h> 641ddda2ebSJeff Roberson #include <sys/smr.h> 653fba8868SMark Johnston #include <sys/smr_types.h> 66774d251dSAttilio Rao 67774d251dSAttilio Rao #include <vm/uma.h> 68774d251dSAttilio Rao #include <vm/vm.h> 69774d251dSAttilio Rao #include <vm/vm_param.h> 701ddda2ebSJeff Roberson #include <vm/vm_object.h> 71774d251dSAttilio Rao #include <vm/vm_page.h> 72774d251dSAttilio Rao #include <vm/vm_radix.h> 73774d251dSAttilio Rao 74774d251dSAttilio Rao #ifdef DDB 75774d251dSAttilio Rao #include <ddb/ddb.h> 76774d251dSAttilio Rao #endif 77774d251dSAttilio Rao 78774d251dSAttilio Rao /* 79774d251dSAttilio Rao * These widths should allow the pointers to a node's children to fit within 80774d251dSAttilio Rao * a single cache line. The extra levels from a narrow width should not be 81774d251dSAttilio Rao * a problem thanks to path compression. 82774d251dSAttilio Rao */ 83774d251dSAttilio Rao #ifdef __LP64__ 84774d251dSAttilio Rao #define VM_RADIX_WIDTH 4 85774d251dSAttilio Rao #else 86774d251dSAttilio Rao #define VM_RADIX_WIDTH 3 87774d251dSAttilio Rao #endif 88774d251dSAttilio Rao 89774d251dSAttilio Rao #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 90774d251dSAttilio Rao #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 91774d251dSAttilio Rao #define VM_RADIX_LIMIT \ 92b66bb393SPedro F. Giffuni (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) 93774d251dSAttilio Rao 94774d251dSAttilio Rao /* Flag bits stored in node pointers. */ 95774d251dSAttilio Rao #define VM_RADIX_ISLEAF 0x1 96774d251dSAttilio Rao #define VM_RADIX_FLAGS 0x1 97774d251dSAttilio Rao #define VM_RADIX_PAD VM_RADIX_FLAGS 98774d251dSAttilio Rao 99774d251dSAttilio Rao /* Returns one unit associated with specified level. */ 100774d251dSAttilio Rao #define VM_RADIX_UNITLEVEL(lev) \ 1019f2e6008SAlan Cox ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) 102774d251dSAttilio Rao 1031ddda2ebSJeff Roberson enum vm_radix_access { SMR, LOCKED, UNSERIALIZED }; 1041ddda2ebSJeff Roberson 1051ddda2ebSJeff Roberson struct vm_radix_node; 1063fba8868SMark Johnston typedef SMR_POINTER(struct vm_radix_node *) smrnode_t; 1071ddda2ebSJeff Roberson 108774d251dSAttilio Rao struct vm_radix_node { 109774d251dSAttilio Rao vm_pindex_t rn_owner; /* Owner of record. */ 110774d251dSAttilio Rao uint16_t rn_count; /* Valid children. */ 1111ddda2ebSJeff Roberson uint8_t rn_clev; /* Current level. */ 1121ddda2ebSJeff Roberson int8_t rn_last; /* zero last ptr. */ 1131ddda2ebSJeff Roberson smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 114774d251dSAttilio Rao }; 115774d251dSAttilio Rao 116774d251dSAttilio Rao static uma_zone_t vm_radix_node_zone; 1171ddda2ebSJeff Roberson static smr_t vm_radix_smr; 1181ddda2ebSJeff Roberson 1191ddda2ebSJeff Roberson static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 1201ddda2ebSJeff Roberson enum vm_radix_access access); 121774d251dSAttilio Rao 122774d251dSAttilio Rao /* 123*da72505fSDoug Moore * Return the position in the array for a given level. 124*da72505fSDoug Moore */ 125*da72505fSDoug Moore static __inline int 126*da72505fSDoug Moore vm_radix_slot(vm_pindex_t index, uint16_t level) 127*da72505fSDoug Moore { 128*da72505fSDoug Moore return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 129*da72505fSDoug Moore } 130*da72505fSDoug Moore 131*da72505fSDoug Moore /* Computes the key (index) with the low-order 'level' radix-digits zeroed. */ 132*da72505fSDoug Moore static __inline vm_pindex_t 133*da72505fSDoug Moore vm_radix_trimkey(vm_pindex_t index, uint16_t level) 134*da72505fSDoug Moore { 135*da72505fSDoug Moore return (index & -VM_RADIX_UNITLEVEL(level)); 136*da72505fSDoug Moore } 137*da72505fSDoug Moore 138*da72505fSDoug Moore /* 139e946b949SAttilio Rao * Allocate a radix node. 140774d251dSAttilio Rao */ 1411ddda2ebSJeff Roberson static struct vm_radix_node * 142*da72505fSDoug Moore vm_radix_node_get(vm_pindex_t index, uint16_t clevel) 143774d251dSAttilio Rao { 144774d251dSAttilio Rao struct vm_radix_node *rnode; 145774d251dSAttilio Rao 1461ddda2ebSJeff Roberson rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT); 147774d251dSAttilio Rao if (rnode == NULL) 148e946b949SAttilio Rao return (NULL); 1491ddda2ebSJeff Roberson 1501ddda2ebSJeff Roberson /* 1511ddda2ebSJeff Roberson * We want to clear the last child pointer after the final section 1521ddda2ebSJeff Roberson * has exited so lookup can not return false negatives. It is done 1531ddda2ebSJeff Roberson * here because it will be cache-cold in the dtor callback. 1541ddda2ebSJeff Roberson */ 1551ddda2ebSJeff Roberson if (rnode->rn_last != 0) { 1561ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1], 1571ddda2ebSJeff Roberson NULL, UNSERIALIZED); 1581ddda2ebSJeff Roberson rnode->rn_last = 0; 1591ddda2ebSJeff Roberson } 160*da72505fSDoug Moore rnode->rn_owner = vm_radix_trimkey(index, clevel + 1); 161*da72505fSDoug Moore rnode->rn_count = 2; 162774d251dSAttilio Rao rnode->rn_clev = clevel; 163774d251dSAttilio Rao return (rnode); 164774d251dSAttilio Rao } 165774d251dSAttilio Rao 166774d251dSAttilio Rao /* 167774d251dSAttilio Rao * Free radix node. 168774d251dSAttilio Rao */ 169774d251dSAttilio Rao static __inline void 1701ddda2ebSJeff Roberson vm_radix_node_put(struct vm_radix_node *rnode, int8_t last) 171774d251dSAttilio Rao { 1721ddda2ebSJeff Roberson #ifdef INVARIANTS 1731ddda2ebSJeff Roberson int slot; 174774d251dSAttilio Rao 1751ddda2ebSJeff Roberson KASSERT(rnode->rn_count == 0, 1761ddda2ebSJeff Roberson ("vm_radix_node_put: rnode %p has %d children", rnode, 1771ddda2ebSJeff Roberson rnode->rn_count)); 1781ddda2ebSJeff Roberson for (slot = 0; slot < VM_RADIX_COUNT; slot++) { 1791ddda2ebSJeff Roberson if (slot == last) 1801ddda2ebSJeff Roberson continue; 1811ddda2ebSJeff Roberson KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) == 1821ddda2ebSJeff Roberson NULL, ("vm_radix_node_put: rnode %p has a child", rnode)); 1831ddda2ebSJeff Roberson } 1841ddda2ebSJeff Roberson #endif 1851ddda2ebSJeff Roberson /* Off by one so a freshly zero'd node is not assigned to. */ 1861ddda2ebSJeff Roberson rnode->rn_last = last + 1; 1871ddda2ebSJeff Roberson uma_zfree_smr(vm_radix_node_zone, rnode); 188774d251dSAttilio Rao } 189774d251dSAttilio Rao 190774d251dSAttilio Rao /* 1911ddda2ebSJeff Roberson * Fetch a node pointer from a slot in another node. 1921ddda2ebSJeff Roberson */ 1931ddda2ebSJeff Roberson static __inline struct vm_radix_node * 1941ddda2ebSJeff Roberson vm_radix_node_load(smrnode_t *p, enum vm_radix_access access) 1951ddda2ebSJeff Roberson { 1961ddda2ebSJeff Roberson 1971ddda2ebSJeff Roberson switch (access) { 1981ddda2ebSJeff Roberson case UNSERIALIZED: 1991ddda2ebSJeff Roberson return (smr_unserialized_load(p, true)); 2001ddda2ebSJeff Roberson case LOCKED: 2011ddda2ebSJeff Roberson return (smr_serialized_load(p, true)); 2021ddda2ebSJeff Roberson case SMR: 2031ddda2ebSJeff Roberson return (smr_entered_load(p, vm_radix_smr)); 2041ddda2ebSJeff Roberson } 205c79cee71SKyle Evans __assert_unreachable(); 2061ddda2ebSJeff Roberson } 2071ddda2ebSJeff Roberson 2081ddda2ebSJeff Roberson static __inline void 2091ddda2ebSJeff Roberson vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, 2101ddda2ebSJeff Roberson enum vm_radix_access access) 2111ddda2ebSJeff Roberson { 2121ddda2ebSJeff Roberson 2131ddda2ebSJeff Roberson switch (access) { 2141ddda2ebSJeff Roberson case UNSERIALIZED: 2151ddda2ebSJeff Roberson smr_unserialized_store(p, v, true); 2161ddda2ebSJeff Roberson break; 2171ddda2ebSJeff Roberson case LOCKED: 2181ddda2ebSJeff Roberson smr_serialized_store(p, v, true); 2191ddda2ebSJeff Roberson break; 2201ddda2ebSJeff Roberson case SMR: 2211ddda2ebSJeff Roberson panic("vm_radix_node_store: Not supported in smr section."); 2221ddda2ebSJeff Roberson } 2231ddda2ebSJeff Roberson } 2241ddda2ebSJeff Roberson 2251ddda2ebSJeff Roberson /* 226774d251dSAttilio Rao * Get the root node for a radix tree. 227774d251dSAttilio Rao */ 228774d251dSAttilio Rao static __inline struct vm_radix_node * 2291ddda2ebSJeff Roberson vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access) 230774d251dSAttilio Rao { 231774d251dSAttilio Rao 2321ddda2ebSJeff Roberson return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access)); 233774d251dSAttilio Rao } 234774d251dSAttilio Rao 235774d251dSAttilio Rao /* 236774d251dSAttilio Rao * Set the root node for a radix tree. 237774d251dSAttilio Rao */ 238774d251dSAttilio Rao static __inline void 2391ddda2ebSJeff Roberson vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode, 2401ddda2ebSJeff Roberson enum vm_radix_access access) 241774d251dSAttilio Rao { 242774d251dSAttilio Rao 2431ddda2ebSJeff Roberson vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access); 244774d251dSAttilio Rao } 245774d251dSAttilio Rao 246774d251dSAttilio Rao /* 2473fc10b73SAlan Cox * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. 2483fc10b73SAlan Cox */ 2491efa7dbcSDoug Moore static __inline bool 2503fc10b73SAlan Cox vm_radix_isleaf(struct vm_radix_node *rnode) 2513fc10b73SAlan Cox { 2523fc10b73SAlan Cox 2533fc10b73SAlan Cox return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); 2543fc10b73SAlan Cox } 2553fc10b73SAlan Cox 2563fc10b73SAlan Cox /* 2579cfed089SDoug Moore * Returns page cast to radix node with leaf bit set. 2589cfed089SDoug Moore */ 2599cfed089SDoug Moore static __inline struct vm_radix_node * 2609cfed089SDoug Moore vm_radix_toleaf(vm_page_t page) 2619cfed089SDoug Moore { 2629cfed089SDoug Moore return ((struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF)); 2639cfed089SDoug Moore } 2649cfed089SDoug Moore 2659cfed089SDoug Moore /* 26696f1a842SAlan Cox * Returns the associated page extracted from rnode. 267774d251dSAttilio Rao */ 268774d251dSAttilio Rao static __inline vm_page_t 26996f1a842SAlan Cox vm_radix_topage(struct vm_radix_node *rnode) 270774d251dSAttilio Rao { 271774d251dSAttilio Rao 27296f1a842SAlan Cox return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); 273774d251dSAttilio Rao } 274774d251dSAttilio Rao 275774d251dSAttilio Rao /* 276774d251dSAttilio Rao * Adds the page as a child of the provided node. 277774d251dSAttilio Rao */ 278774d251dSAttilio Rao static __inline void 279774d251dSAttilio Rao vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 2801ddda2ebSJeff Roberson vm_page_t page, enum vm_radix_access access) 281774d251dSAttilio Rao { 282774d251dSAttilio Rao int slot; 283774d251dSAttilio Rao 284774d251dSAttilio Rao slot = vm_radix_slot(index, clev); 2851ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], 2869cfed089SDoug Moore vm_radix_toleaf(page), access); 287774d251dSAttilio Rao } 288774d251dSAttilio Rao 289774d251dSAttilio Rao /* 29005963ea4SDoug Moore * Returns the level where two keys differ. 291774d251dSAttilio Rao * It cannot accept 2 equal keys. 292774d251dSAttilio Rao */ 293774d251dSAttilio Rao static __inline uint16_t 294774d251dSAttilio Rao vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) 295774d251dSAttilio Rao { 296774d251dSAttilio Rao 297774d251dSAttilio Rao KASSERT(index1 != index2, ("%s: passing the same key value %jx", 298774d251dSAttilio Rao __func__, (uintmax_t)index1)); 29905963ea4SDoug Moore CTASSERT(sizeof(long long) >= sizeof(vm_pindex_t)); 300774d251dSAttilio Rao 30105963ea4SDoug Moore /* 30205963ea4SDoug Moore * From the highest-order bit where the indexes differ, 30305963ea4SDoug Moore * compute the highest level in the trie where they differ. 30405963ea4SDoug Moore */ 30505963ea4SDoug Moore return ((flsll(index1 ^ index2) - 1) / VM_RADIX_WIDTH); 306774d251dSAttilio Rao } 307774d251dSAttilio Rao 308774d251dSAttilio Rao /* 309774d251dSAttilio Rao * Returns TRUE if it can be determined that key does not belong to the 310774d251dSAttilio Rao * specified rnode. Otherwise, returns FALSE. 311774d251dSAttilio Rao */ 3121efa7dbcSDoug Moore static __inline bool 313774d251dSAttilio Rao vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) 314774d251dSAttilio Rao { 315774d251dSAttilio Rao 3169f2e6008SAlan Cox if (rnode->rn_clev < VM_RADIX_LIMIT) { 3179f2e6008SAlan Cox idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); 318c1c82b36SAlan Cox return (idx != rnode->rn_owner); 319774d251dSAttilio Rao } 3201efa7dbcSDoug Moore return (false); 321774d251dSAttilio Rao } 322774d251dSAttilio Rao 323774d251dSAttilio Rao /* 324774d251dSAttilio Rao * Internal helper for vm_radix_reclaim_allnodes(). 325774d251dSAttilio Rao * This function is recursive. 326774d251dSAttilio Rao */ 327774d251dSAttilio Rao static void 328774d251dSAttilio Rao vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 329774d251dSAttilio Rao { 3301ddda2ebSJeff Roberson struct vm_radix_node *child; 331774d251dSAttilio Rao int slot; 332774d251dSAttilio Rao 333652615dcSAlan Cox KASSERT(rnode->rn_count <= VM_RADIX_COUNT, 334652615dcSAlan Cox ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode)); 335652615dcSAlan Cox for (slot = 0; rnode->rn_count != 0; slot++) { 3369cfed089SDoug Moore child = vm_radix_node_load(&rnode->rn_child[slot], 3379cfed089SDoug Moore UNSERIALIZED); 3381ddda2ebSJeff Roberson if (child == NULL) 339774d251dSAttilio Rao continue; 3401ddda2ebSJeff Roberson if (!vm_radix_isleaf(child)) 3411ddda2ebSJeff Roberson vm_radix_reclaim_allnodes_int(child); 3421ddda2ebSJeff Roberson vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED); 343774d251dSAttilio Rao rnode->rn_count--; 344774d251dSAttilio Rao } 3451ddda2ebSJeff Roberson vm_radix_node_put(rnode, -1); 346a3d799fbSMateusz Guzik } 347a3d799fbSMateusz Guzik 348e946b949SAttilio Rao #ifndef UMA_MD_SMALL_ALLOC 349ae941b1bSGleb Smirnoff void vm_radix_reserve_kva(void); 350774d251dSAttilio Rao /* 351e946b949SAttilio Rao * Reserve the KVA necessary to satisfy the node allocation. 352e946b949SAttilio Rao * This is mandatory in architectures not supporting direct 353e946b949SAttilio Rao * mapping as they will need otherwise to carve into the kernel maps for 354e946b949SAttilio Rao * every node allocation, resulting into deadlocks for consumers already 355e946b949SAttilio Rao * working with kernel maps. 356774d251dSAttilio Rao */ 357ae941b1bSGleb Smirnoff void 358ae941b1bSGleb Smirnoff vm_radix_reserve_kva(void) 359774d251dSAttilio Rao { 360774d251dSAttilio Rao 361880659feSAlan Cox /* 362880659feSAlan Cox * Calculate the number of reserved nodes, discounting the pages that 363880659feSAlan Cox * are needed to store them. 364880659feSAlan Cox */ 365e946b949SAttilio Rao if (!uma_zone_reserve_kva(vm_radix_node_zone, 36644f1c916SBryan Drewery ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 367e946b949SAttilio Rao sizeof(struct vm_radix_node)))) 368e946b949SAttilio Rao panic("%s: unable to reserve KVA", __func__); 369774d251dSAttilio Rao } 370e946b949SAttilio Rao #endif 371774d251dSAttilio Rao 372774d251dSAttilio Rao /* 373774d251dSAttilio Rao * Initialize the UMA slab zone. 374774d251dSAttilio Rao */ 375774d251dSAttilio Rao void 376cd1241fbSKonstantin Belousov vm_radix_zinit(void) 377774d251dSAttilio Rao { 378774d251dSAttilio Rao 379774d251dSAttilio Rao vm_radix_node_zone = uma_zcreate("RADIX NODE", 3801ddda2ebSJeff Roberson sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL, 3811ddda2ebSJeff Roberson VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT); 3821ddda2ebSJeff Roberson vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); 383774d251dSAttilio Rao } 384774d251dSAttilio Rao 385774d251dSAttilio Rao /* 386774d251dSAttilio Rao * Inserts the key-value pair into the trie. 387774d251dSAttilio Rao * Panics if the key already exists. 388774d251dSAttilio Rao */ 389e946b949SAttilio Rao int 390774d251dSAttilio Rao vm_radix_insert(struct vm_radix *rtree, vm_page_t page) 391774d251dSAttilio Rao { 392774d251dSAttilio Rao vm_pindex_t index, newind; 393a08f2cf6SAlan Cox struct vm_radix_node *rnode, *tmp; 3941ddda2ebSJeff Roberson smrnode_t *parentp; 395774d251dSAttilio Rao vm_page_t m; 396774d251dSAttilio Rao int slot; 397774d251dSAttilio Rao uint16_t clev; 398774d251dSAttilio Rao 399774d251dSAttilio Rao index = page->pindex; 400774d251dSAttilio Rao 401774d251dSAttilio Rao /* 402774d251dSAttilio Rao * The owner of record for root is not really important because it 403774d251dSAttilio Rao * will never be used. 404774d251dSAttilio Rao */ 4051ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 406774d251dSAttilio Rao if (rnode == NULL) { 4079cfed089SDoug Moore rtree->rt_root = (uintptr_t)vm_radix_toleaf(page); 408e946b949SAttilio Rao return (0); 409774d251dSAttilio Rao } 4101ddda2ebSJeff Roberson parentp = (smrnode_t *)&rtree->rt_root; 411a08f2cf6SAlan Cox for (;;) { 412a08f2cf6SAlan Cox if (vm_radix_isleaf(rnode)) { 413a08f2cf6SAlan Cox m = vm_radix_topage(rnode); 414774d251dSAttilio Rao if (m->pindex == index) 415774d251dSAttilio Rao panic("%s: key %jx is already present", 416774d251dSAttilio Rao __func__, (uintmax_t)index); 417774d251dSAttilio Rao clev = vm_radix_keydiff(m->pindex, index); 418*da72505fSDoug Moore tmp = vm_radix_node_get(index, clev); 419563a19d5SAlan Cox if (tmp == NULL) 420e946b949SAttilio Rao return (ENOMEM); 4211ddda2ebSJeff Roberson /* These writes are not yet visible due to ordering. */ 4221ddda2ebSJeff Roberson vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); 4231ddda2ebSJeff Roberson vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED); 4241ddda2ebSJeff Roberson /* Synchronize to make leaf visible. */ 4251ddda2ebSJeff Roberson vm_radix_node_store(parentp, tmp, LOCKED); 426e946b949SAttilio Rao return (0); 427a08f2cf6SAlan Cox } else if (vm_radix_keybarr(rnode, index)) 428a08f2cf6SAlan Cox break; 429a08f2cf6SAlan Cox slot = vm_radix_slot(index, rnode->rn_clev); 4301ddda2ebSJeff Roberson parentp = &rnode->rn_child[slot]; 4311ddda2ebSJeff Roberson tmp = vm_radix_node_load(parentp, LOCKED); 4321ddda2ebSJeff Roberson if (tmp == NULL) { 433774d251dSAttilio Rao rnode->rn_count++; 4341ddda2ebSJeff Roberson vm_radix_addpage(rnode, index, rnode->rn_clev, page, 4351ddda2ebSJeff Roberson LOCKED); 436e946b949SAttilio Rao return (0); 437774d251dSAttilio Rao } 4381ddda2ebSJeff Roberson rnode = tmp; 439a08f2cf6SAlan Cox } 440774d251dSAttilio Rao 441774d251dSAttilio Rao /* 442774d251dSAttilio Rao * A new node is needed because the right insertion level is reached. 443774d251dSAttilio Rao * Setup the new intermediate node and add the 2 children: the 444774d251dSAttilio Rao * new object and the older edge. 445774d251dSAttilio Rao */ 44672abda64SAlan Cox newind = rnode->rn_owner; 44772abda64SAlan Cox clev = vm_radix_keydiff(newind, index); 448*da72505fSDoug Moore tmp = vm_radix_node_get(index, clev); 449563a19d5SAlan Cox if (tmp == NULL) 450e946b949SAttilio Rao return (ENOMEM); 451774d251dSAttilio Rao slot = vm_radix_slot(newind, clev); 4521ddda2ebSJeff Roberson /* These writes are not yet visible due to ordering. */ 4531ddda2ebSJeff Roberson vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); 4541ddda2ebSJeff Roberson vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED); 4551ddda2ebSJeff Roberson /* Serializing write to make the above visible. */ 4561ddda2ebSJeff Roberson vm_radix_node_store(parentp, tmp, LOCKED); 4571ddda2ebSJeff Roberson 458e946b949SAttilio Rao return (0); 459774d251dSAttilio Rao } 460774d251dSAttilio Rao 461774d251dSAttilio Rao /* 462774d251dSAttilio Rao * Returns the value stored at the index. If the index is not present, 463774d251dSAttilio Rao * NULL is returned. 464774d251dSAttilio Rao */ 4651ddda2ebSJeff Roberson static __always_inline vm_page_t 4661ddda2ebSJeff Roberson _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, 4671ddda2ebSJeff Roberson enum vm_radix_access access) 468774d251dSAttilio Rao { 469774d251dSAttilio Rao struct vm_radix_node *rnode; 470774d251dSAttilio Rao vm_page_t m; 471774d251dSAttilio Rao int slot; 472774d251dSAttilio Rao 4731ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, access); 474774d251dSAttilio Rao while (rnode != NULL) { 47596f1a842SAlan Cox if (vm_radix_isleaf(rnode)) { 47696f1a842SAlan Cox m = vm_radix_topage(rnode); 477774d251dSAttilio Rao if (m->pindex == index) 478774d251dSAttilio Rao return (m); 4796f9c0b15SAlan Cox break; 4801ddda2ebSJeff Roberson } 4811ddda2ebSJeff Roberson if (vm_radix_keybarr(rnode, index)) 4826f9c0b15SAlan Cox break; 4836f9c0b15SAlan Cox slot = vm_radix_slot(index, rnode->rn_clev); 4841ddda2ebSJeff Roberson rnode = vm_radix_node_load(&rnode->rn_child[slot], access); 485774d251dSAttilio Rao } 486774d251dSAttilio Rao return (NULL); 487774d251dSAttilio Rao } 488774d251dSAttilio Rao 489774d251dSAttilio Rao /* 4901ddda2ebSJeff Roberson * Returns the value stored at the index assuming there is an external lock. 4911ddda2ebSJeff Roberson * 4921ddda2ebSJeff Roberson * If the index is not present, NULL is returned. 4931ddda2ebSJeff Roberson */ 4941ddda2ebSJeff Roberson vm_page_t 4951ddda2ebSJeff Roberson vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 4961ddda2ebSJeff Roberson { 4971ddda2ebSJeff Roberson 4981ddda2ebSJeff Roberson return _vm_radix_lookup(rtree, index, LOCKED); 4991ddda2ebSJeff Roberson } 5001ddda2ebSJeff Roberson 5011ddda2ebSJeff Roberson /* 5021ddda2ebSJeff Roberson * Returns the value stored at the index without requiring an external lock. 5031ddda2ebSJeff Roberson * 5041ddda2ebSJeff Roberson * If the index is not present, NULL is returned. 5051ddda2ebSJeff Roberson */ 5061ddda2ebSJeff Roberson vm_page_t 5071ddda2ebSJeff Roberson vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) 5081ddda2ebSJeff Roberson { 5091ddda2ebSJeff Roberson vm_page_t m; 5101ddda2ebSJeff Roberson 5111ddda2ebSJeff Roberson smr_enter(vm_radix_smr); 5121ddda2ebSJeff Roberson m = _vm_radix_lookup(rtree, index, SMR); 5131ddda2ebSJeff Roberson smr_exit(vm_radix_smr); 5141ddda2ebSJeff Roberson 5151ddda2ebSJeff Roberson return (m); 5161ddda2ebSJeff Roberson } 5171ddda2ebSJeff Roberson 5181ddda2ebSJeff Roberson /* 5191ddda2ebSJeff Roberson * Look up the nearest entry at a position greater than or equal to index. 520774d251dSAttilio Rao */ 521774d251dSAttilio Rao vm_page_t 522774d251dSAttilio Rao vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 523774d251dSAttilio Rao { 5242d4b9a64SAlan Cox struct vm_radix_node *stack[VM_RADIX_LIMIT]; 525774d251dSAttilio Rao vm_pindex_t inc; 526774d251dSAttilio Rao vm_page_t m; 52796f1a842SAlan Cox struct vm_radix_node *child, *rnode; 528774d251dSAttilio Rao #ifdef INVARIANTS 529774d251dSAttilio Rao int loops = 0; 530774d251dSAttilio Rao #endif 5312d4b9a64SAlan Cox int slot, tos; 532774d251dSAttilio Rao 5331ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 5346f9c0b15SAlan Cox if (rnode == NULL) 5356f9c0b15SAlan Cox return (NULL); 5366f9c0b15SAlan Cox else if (vm_radix_isleaf(rnode)) { 5376f9c0b15SAlan Cox m = vm_radix_topage(rnode); 5386f9c0b15SAlan Cox if (m->pindex >= index) 5396f9c0b15SAlan Cox return (m); 5406f9c0b15SAlan Cox else 5416f9c0b15SAlan Cox return (NULL); 5426f9c0b15SAlan Cox } 5432d4b9a64SAlan Cox tos = 0; 5446f9c0b15SAlan Cox for (;;) { 545774d251dSAttilio Rao /* 54640076ebcSAlan Cox * If the keys differ before the current bisection node, 54740076ebcSAlan Cox * then the search key might rollback to the earliest 54840076ebcSAlan Cox * available bisection node or to the smallest key 5491ddda2ebSJeff Roberson * in the current node (if the owner is greater than the 550774d251dSAttilio Rao * search key). 551774d251dSAttilio Rao */ 552774d251dSAttilio Rao if (vm_radix_keybarr(rnode, index)) { 553774d251dSAttilio Rao if (index > rnode->rn_owner) { 5542d4b9a64SAlan Cox ascend: 5552d4b9a64SAlan Cox KASSERT(++loops < 1000, 5562d4b9a64SAlan Cox ("vm_radix_lookup_ge: too many loops")); 5572d4b9a64SAlan Cox 5582d4b9a64SAlan Cox /* 5592d4b9a64SAlan Cox * Pop nodes from the stack until either the 5602d4b9a64SAlan Cox * stack is empty or a node that could have a 5612d4b9a64SAlan Cox * matching descendant is found. 5622d4b9a64SAlan Cox */ 5632d4b9a64SAlan Cox do { 5642d4b9a64SAlan Cox if (tos == 0) 5652d4b9a64SAlan Cox return (NULL); 5662d4b9a64SAlan Cox rnode = stack[--tos]; 5672d4b9a64SAlan Cox } while (vm_radix_slot(index, 5682d4b9a64SAlan Cox rnode->rn_clev) == (VM_RADIX_COUNT - 1)); 5692d4b9a64SAlan Cox 5702d4b9a64SAlan Cox /* 5712d4b9a64SAlan Cox * The following computation cannot overflow 5722d4b9a64SAlan Cox * because index's slot at the current level 5732d4b9a64SAlan Cox * is less than VM_RADIX_COUNT - 1. 5742d4b9a64SAlan Cox */ 5752d4b9a64SAlan Cox index = vm_radix_trimkey(index, 5762d4b9a64SAlan Cox rnode->rn_clev); 5772d4b9a64SAlan Cox index += VM_RADIX_UNITLEVEL(rnode->rn_clev); 57840076ebcSAlan Cox } else 57940076ebcSAlan Cox index = rnode->rn_owner; 5802d4b9a64SAlan Cox KASSERT(!vm_radix_keybarr(rnode, index), 5812d4b9a64SAlan Cox ("vm_radix_lookup_ge: keybarr failed")); 582774d251dSAttilio Rao } 583774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 5841ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 58596f1a842SAlan Cox if (vm_radix_isleaf(child)) { 58696f1a842SAlan Cox m = vm_radix_topage(child); 58796f1a842SAlan Cox if (m->pindex >= index) 588774d251dSAttilio Rao return (m); 58996f1a842SAlan Cox } else if (child != NULL) 59096f1a842SAlan Cox goto descend; 591774d251dSAttilio Rao 592774d251dSAttilio Rao /* 593774d251dSAttilio Rao * Look for an available edge or page within the current 594774d251dSAttilio Rao * bisection node. 595774d251dSAttilio Rao */ 596774d251dSAttilio Rao if (slot < (VM_RADIX_COUNT - 1)) { 597774d251dSAttilio Rao inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 598774d251dSAttilio Rao index = vm_radix_trimkey(index, rnode->rn_clev); 59996f1a842SAlan Cox do { 600774d251dSAttilio Rao index += inc; 601774d251dSAttilio Rao slot++; 6021ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], 6031ddda2ebSJeff Roberson LOCKED); 60496f1a842SAlan Cox if (vm_radix_isleaf(child)) { 60596f1a842SAlan Cox m = vm_radix_topage(child); 60672c3a43bSDoug Moore KASSERT(m->pindex >= index, 60772c3a43bSDoug Moore ("vm_radix_lookup_ge: leaf<index")); 608774d251dSAttilio Rao return (m); 60996f1a842SAlan Cox } else if (child != NULL) 61096f1a842SAlan Cox goto descend; 61196f1a842SAlan Cox } while (slot < (VM_RADIX_COUNT - 1)); 612774d251dSAttilio Rao } 61396f1a842SAlan Cox KASSERT(child == NULL || vm_radix_isleaf(child), 61496f1a842SAlan Cox ("vm_radix_lookup_ge: child is radix node")); 615774d251dSAttilio Rao 616774d251dSAttilio Rao /* 6171ddda2ebSJeff Roberson * If a page or edge greater than the search slot is not found 6182d4b9a64SAlan Cox * in the current node, ascend to the next higher-level node. 619774d251dSAttilio Rao */ 6202d4b9a64SAlan Cox goto ascend; 62196f1a842SAlan Cox descend: 6229f2e6008SAlan Cox KASSERT(rnode->rn_clev > 0, 6232d4b9a64SAlan Cox ("vm_radix_lookup_ge: pushing leaf's parent")); 6242d4b9a64SAlan Cox KASSERT(tos < VM_RADIX_LIMIT, 6252d4b9a64SAlan Cox ("vm_radix_lookup_ge: stack overflow")); 6262d4b9a64SAlan Cox stack[tos++] = rnode; 62796f1a842SAlan Cox rnode = child; 628774d251dSAttilio Rao } 629774d251dSAttilio Rao } 630774d251dSAttilio Rao 631774d251dSAttilio Rao /* 632774d251dSAttilio Rao * Look up the nearest entry at a position less than or equal to index. 633774d251dSAttilio Rao */ 634774d251dSAttilio Rao vm_page_t 635774d251dSAttilio Rao vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 636774d251dSAttilio Rao { 6372d4b9a64SAlan Cox struct vm_radix_node *stack[VM_RADIX_LIMIT]; 638774d251dSAttilio Rao vm_pindex_t inc; 639774d251dSAttilio Rao vm_page_t m; 64096f1a842SAlan Cox struct vm_radix_node *child, *rnode; 641774d251dSAttilio Rao #ifdef INVARIANTS 642774d251dSAttilio Rao int loops = 0; 643774d251dSAttilio Rao #endif 6442d4b9a64SAlan Cox int slot, tos; 645774d251dSAttilio Rao 6461ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 6476f9c0b15SAlan Cox if (rnode == NULL) 6486f9c0b15SAlan Cox return (NULL); 6496f9c0b15SAlan Cox else if (vm_radix_isleaf(rnode)) { 6506f9c0b15SAlan Cox m = vm_radix_topage(rnode); 6516f9c0b15SAlan Cox if (m->pindex <= index) 6526f9c0b15SAlan Cox return (m); 6536f9c0b15SAlan Cox else 6546f9c0b15SAlan Cox return (NULL); 6556f9c0b15SAlan Cox } 6562d4b9a64SAlan Cox tos = 0; 6576f9c0b15SAlan Cox for (;;) { 658774d251dSAttilio Rao /* 65940076ebcSAlan Cox * If the keys differ before the current bisection node, 66040076ebcSAlan Cox * then the search key might rollback to the earliest 66140076ebcSAlan Cox * available bisection node or to the largest key 66240076ebcSAlan Cox * in the current node (if the owner is smaller than the 663774d251dSAttilio Rao * search key). 664774d251dSAttilio Rao */ 665774d251dSAttilio Rao if (vm_radix_keybarr(rnode, index)) { 666774d251dSAttilio Rao if (index > rnode->rn_owner) { 66740076ebcSAlan Cox index = rnode->rn_owner + VM_RADIX_COUNT * 6682d4b9a64SAlan Cox VM_RADIX_UNITLEVEL(rnode->rn_clev); 66940076ebcSAlan Cox } else { 6702d4b9a64SAlan Cox ascend: 6712d4b9a64SAlan Cox KASSERT(++loops < 1000, 6722d4b9a64SAlan Cox ("vm_radix_lookup_le: too many loops")); 6732d4b9a64SAlan Cox 6742d4b9a64SAlan Cox /* 6752d4b9a64SAlan Cox * Pop nodes from the stack until either the 6762d4b9a64SAlan Cox * stack is empty or a node that could have a 6772d4b9a64SAlan Cox * matching descendant is found. 6782d4b9a64SAlan Cox */ 6792d4b9a64SAlan Cox do { 6802d4b9a64SAlan Cox if (tos == 0) 6812d4b9a64SAlan Cox return (NULL); 6822d4b9a64SAlan Cox rnode = stack[--tos]; 6832d4b9a64SAlan Cox } while (vm_radix_slot(index, 6842d4b9a64SAlan Cox rnode->rn_clev) == 0); 6852d4b9a64SAlan Cox 6862d4b9a64SAlan Cox /* 6872d4b9a64SAlan Cox * The following computation cannot overflow 6882d4b9a64SAlan Cox * because index's slot at the current level 6892d4b9a64SAlan Cox * is greater than 0. 6902d4b9a64SAlan Cox */ 6912d4b9a64SAlan Cox index = vm_radix_trimkey(index, 6922d4b9a64SAlan Cox rnode->rn_clev); 693774d251dSAttilio Rao } 6942d4b9a64SAlan Cox index--; 6952d4b9a64SAlan Cox KASSERT(!vm_radix_keybarr(rnode, index), 6962d4b9a64SAlan Cox ("vm_radix_lookup_le: keybarr failed")); 69740076ebcSAlan Cox } 698774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 6991ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 70096f1a842SAlan Cox if (vm_radix_isleaf(child)) { 70196f1a842SAlan Cox m = vm_radix_topage(child); 70296f1a842SAlan Cox if (m->pindex <= index) 703774d251dSAttilio Rao return (m); 70496f1a842SAlan Cox } else if (child != NULL) 70596f1a842SAlan Cox goto descend; 706774d251dSAttilio Rao 707774d251dSAttilio Rao /* 708774d251dSAttilio Rao * Look for an available edge or page within the current 709774d251dSAttilio Rao * bisection node. 710774d251dSAttilio Rao */ 711774d251dSAttilio Rao if (slot > 0) { 712774d251dSAttilio Rao inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 713774d251dSAttilio Rao index |= inc - 1; 71496f1a842SAlan Cox do { 715774d251dSAttilio Rao index -= inc; 716774d251dSAttilio Rao slot--; 7171ddda2ebSJeff Roberson child = vm_radix_node_load(&rnode->rn_child[slot], 7181ddda2ebSJeff Roberson LOCKED); 71996f1a842SAlan Cox if (vm_radix_isleaf(child)) { 72096f1a842SAlan Cox m = vm_radix_topage(child); 72172c3a43bSDoug Moore KASSERT(m->pindex <= index, 72272c3a43bSDoug Moore ("vm_radix_lookup_le: leaf>index")); 723774d251dSAttilio Rao return (m); 72496f1a842SAlan Cox } else if (child != NULL) 72596f1a842SAlan Cox goto descend; 72696f1a842SAlan Cox } while (slot > 0); 727774d251dSAttilio Rao } 72896f1a842SAlan Cox KASSERT(child == NULL || vm_radix_isleaf(child), 72996f1a842SAlan Cox ("vm_radix_lookup_le: child is radix node")); 730774d251dSAttilio Rao 731774d251dSAttilio Rao /* 7322d4b9a64SAlan Cox * If a page or edge smaller than the search slot is not found 7332d4b9a64SAlan Cox * in the current node, ascend to the next higher-level node. 734774d251dSAttilio Rao */ 7352d4b9a64SAlan Cox goto ascend; 73696f1a842SAlan Cox descend: 7379f2e6008SAlan Cox KASSERT(rnode->rn_clev > 0, 7382d4b9a64SAlan Cox ("vm_radix_lookup_le: pushing leaf's parent")); 7392d4b9a64SAlan Cox KASSERT(tos < VM_RADIX_LIMIT, 7402d4b9a64SAlan Cox ("vm_radix_lookup_le: stack overflow")); 7412d4b9a64SAlan Cox stack[tos++] = rnode; 74296f1a842SAlan Cox rnode = child; 743774d251dSAttilio Rao } 744774d251dSAttilio Rao } 745774d251dSAttilio Rao 746774d251dSAttilio Rao /* 747e94965d8SAlan Cox * Remove the specified index from the trie, and return the value stored at 748e94965d8SAlan Cox * that index. If the index is not present, return NULL. 749774d251dSAttilio Rao */ 750e94965d8SAlan Cox vm_page_t 751774d251dSAttilio Rao vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 752774d251dSAttilio Rao { 7531ddda2ebSJeff Roberson struct vm_radix_node *rnode, *parent, *tmp; 754774d251dSAttilio Rao vm_page_t m; 755774d251dSAttilio Rao int i, slot; 756774d251dSAttilio Rao 7571ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 758a08f2cf6SAlan Cox if (vm_radix_isleaf(rnode)) { 759a08f2cf6SAlan Cox m = vm_radix_topage(rnode); 760a08f2cf6SAlan Cox if (m->pindex != index) 761e94965d8SAlan Cox return (NULL); 7621ddda2ebSJeff Roberson vm_radix_root_store(rtree, NULL, LOCKED); 763e94965d8SAlan Cox return (m); 764a08f2cf6SAlan Cox } 765a08f2cf6SAlan Cox parent = NULL; 766774d251dSAttilio Rao for (;;) { 767774d251dSAttilio Rao if (rnode == NULL) 768e94965d8SAlan Cox return (NULL); 769774d251dSAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 7701ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 7711ddda2ebSJeff Roberson if (vm_radix_isleaf(tmp)) { 7721ddda2ebSJeff Roberson m = vm_radix_topage(tmp); 77396f1a842SAlan Cox if (m->pindex != index) 774e94965d8SAlan Cox return (NULL); 7759cfed089SDoug Moore vm_radix_node_store( 7769cfed089SDoug Moore &rnode->rn_child[slot], NULL, LOCKED); 777774d251dSAttilio Rao rnode->rn_count--; 778774d251dSAttilio Rao if (rnode->rn_count > 1) 779e94965d8SAlan Cox return (m); 780e8efee29SDoug Moore for (i = 0; i < VM_RADIX_COUNT; i++) { 781e8efee29SDoug Moore tmp = vm_radix_node_load(&rnode->rn_child[i], 782e8efee29SDoug Moore LOCKED); 783e8efee29SDoug Moore if (tmp != NULL) 784774d251dSAttilio Rao break; 785e8efee29SDoug Moore } 786e8efee29SDoug Moore KASSERT(tmp != NULL, 787774d251dSAttilio Rao ("%s: invalid node configuration", __func__)); 788a08f2cf6SAlan Cox if (parent == NULL) 7891ddda2ebSJeff Roberson vm_radix_root_store(rtree, tmp, LOCKED); 790a08f2cf6SAlan Cox else { 791774d251dSAttilio Rao slot = vm_radix_slot(index, parent->rn_clev); 7921ddda2ebSJeff Roberson KASSERT(vm_radix_node_load( 7931ddda2ebSJeff Roberson &parent->rn_child[slot], LOCKED) == rnode, 794774d251dSAttilio Rao ("%s: invalid child value", __func__)); 7951ddda2ebSJeff Roberson vm_radix_node_store(&parent->rn_child[slot], 7961ddda2ebSJeff Roberson tmp, LOCKED); 797a08f2cf6SAlan Cox } 7981ddda2ebSJeff Roberson /* 7991ddda2ebSJeff Roberson * The child is still valid and we can not zero the 8001ddda2ebSJeff Roberson * pointer until all smr references are gone. 8011ddda2ebSJeff Roberson */ 802774d251dSAttilio Rao rnode->rn_count--; 8031ddda2ebSJeff Roberson vm_radix_node_put(rnode, i); 804e94965d8SAlan Cox return (m); 805774d251dSAttilio Rao } 806774d251dSAttilio Rao parent = rnode; 8071ddda2ebSJeff Roberson rnode = tmp; 808774d251dSAttilio Rao } 809774d251dSAttilio Rao } 810774d251dSAttilio Rao 811774d251dSAttilio Rao /* 812774d251dSAttilio Rao * Remove and free all the nodes from the radix tree. 813774d251dSAttilio Rao * This function is recursive but there is a tight control on it as the 814774d251dSAttilio Rao * maximum depth of the tree is fixed. 815774d251dSAttilio Rao */ 816774d251dSAttilio Rao void 817774d251dSAttilio Rao vm_radix_reclaim_allnodes(struct vm_radix *rtree) 818774d251dSAttilio Rao { 819774d251dSAttilio Rao struct vm_radix_node *root; 820774d251dSAttilio Rao 8211ddda2ebSJeff Roberson root = vm_radix_root_load(rtree, LOCKED); 822774d251dSAttilio Rao if (root == NULL) 823774d251dSAttilio Rao return; 8241ddda2ebSJeff Roberson vm_radix_root_store(rtree, NULL, UNSERIALIZED); 825a08f2cf6SAlan Cox if (!vm_radix_isleaf(root)) 826652615dcSAlan Cox vm_radix_reclaim_allnodes_int(root); 827774d251dSAttilio Rao } 828774d251dSAttilio Rao 829e946b949SAttilio Rao /* 830703b304fSAlan Cox * Replace an existing page in the trie with another one. 831703b304fSAlan Cox * Panics if there is not an old page in the trie at the new page's index. 832e946b949SAttilio Rao */ 833e946b949SAttilio Rao vm_page_t 834703b304fSAlan Cox vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) 835e946b949SAttilio Rao { 8361ddda2ebSJeff Roberson struct vm_radix_node *rnode, *tmp; 837e946b949SAttilio Rao vm_page_t m; 838703b304fSAlan Cox vm_pindex_t index; 839e946b949SAttilio Rao int slot; 840e946b949SAttilio Rao 841703b304fSAlan Cox index = newpage->pindex; 8421ddda2ebSJeff Roberson rnode = vm_radix_root_load(rtree, LOCKED); 843e946b949SAttilio Rao if (rnode == NULL) 844e946b949SAttilio Rao panic("%s: replacing page on an empty trie", __func__); 845e946b949SAttilio Rao if (vm_radix_isleaf(rnode)) { 846e946b949SAttilio Rao m = vm_radix_topage(rnode); 847e946b949SAttilio Rao if (m->pindex != index) 848e946b949SAttilio Rao panic("%s: original replacing root key not found", 849e946b949SAttilio Rao __func__); 8509cfed089SDoug Moore rtree->rt_root = (uintptr_t)vm_radix_toleaf(newpage); 851e946b949SAttilio Rao return (m); 852e946b949SAttilio Rao } 853e946b949SAttilio Rao for (;;) { 854e946b949SAttilio Rao slot = vm_radix_slot(index, rnode->rn_clev); 8551ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); 8561ddda2ebSJeff Roberson if (vm_radix_isleaf(tmp)) { 8571ddda2ebSJeff Roberson m = vm_radix_topage(tmp); 8589cfed089SDoug Moore if (m->pindex != index) 859e946b949SAttilio Rao break; 8609cfed089SDoug Moore vm_radix_node_store(&rnode->rn_child[slot], 8619cfed089SDoug Moore vm_radix_toleaf(newpage), LOCKED); 8629cfed089SDoug Moore return (m); 8631ddda2ebSJeff Roberson } else if (tmp == NULL || vm_radix_keybarr(tmp, index)) 864e946b949SAttilio Rao break; 8651ddda2ebSJeff Roberson rnode = tmp; 866e946b949SAttilio Rao } 867e946b949SAttilio Rao panic("%s: original replacing page not found", __func__); 868e946b949SAttilio Rao } 869e946b949SAttilio Rao 8708d6fbbb8SJeff Roberson void 8718d6fbbb8SJeff Roberson vm_radix_wait(void) 8728d6fbbb8SJeff Roberson { 8738d6fbbb8SJeff Roberson uma_zwait(vm_radix_node_zone); 8748d6fbbb8SJeff Roberson } 8758d6fbbb8SJeff Roberson 876774d251dSAttilio Rao #ifdef DDB 877774d251dSAttilio Rao /* 878774d251dSAttilio Rao * Show details about the given radix node. 879774d251dSAttilio Rao */ 880774d251dSAttilio Rao DB_SHOW_COMMAND(radixnode, db_show_radixnode) 881774d251dSAttilio Rao { 8821ddda2ebSJeff Roberson struct vm_radix_node *rnode, *tmp; 883774d251dSAttilio Rao int i; 884774d251dSAttilio Rao 885774d251dSAttilio Rao if (!have_addr) 886774d251dSAttilio Rao return; 887774d251dSAttilio Rao rnode = (struct vm_radix_node *)addr; 888774d251dSAttilio Rao db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 889774d251dSAttilio Rao (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 890774d251dSAttilio Rao rnode->rn_clev); 8911ddda2ebSJeff Roberson for (i = 0; i < VM_RADIX_COUNT; i++) { 8921ddda2ebSJeff Roberson tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED); 8931ddda2ebSJeff Roberson if (tmp != NULL) 894774d251dSAttilio Rao db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 8951ddda2ebSJeff Roberson i, (void *)tmp, 8961ddda2ebSJeff Roberson vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL, 897774d251dSAttilio Rao rnode->rn_clev); 898774d251dSAttilio Rao } 8991ddda2ebSJeff Roberson } 900774d251dSAttilio Rao #endif /* DDB */ 901