xref: /freebsd/sys/vm/vm_radix.c (revision 4d846d260e2b9a3d4d0a701462568268cbfe7a5b)
1fe267a55SPedro F. Giffuni /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
4774d251dSAttilio Rao  * Copyright (c) 2013 EMC Corp.
5774d251dSAttilio Rao  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6774d251dSAttilio Rao  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7774d251dSAttilio Rao  * All rights reserved.
8774d251dSAttilio Rao  *
9774d251dSAttilio Rao  * Redistribution and use in source and binary forms, with or without
10774d251dSAttilio Rao  * modification, are permitted provided that the following conditions
11774d251dSAttilio Rao  * are met:
12774d251dSAttilio Rao  * 1. Redistributions of source code must retain the above copyright
13774d251dSAttilio Rao  *    notice, this list of conditions and the following disclaimer.
14774d251dSAttilio Rao  * 2. Redistributions in binary form must reproduce the above copyright
15774d251dSAttilio Rao  *    notice, this list of conditions and the following disclaimer in the
16774d251dSAttilio Rao  *    documentation and/or other materials provided with the distribution.
17774d251dSAttilio Rao  *
18774d251dSAttilio Rao  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19774d251dSAttilio Rao  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20774d251dSAttilio Rao  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21774d251dSAttilio Rao  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22774d251dSAttilio Rao  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23774d251dSAttilio Rao  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24774d251dSAttilio Rao  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25774d251dSAttilio Rao  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26774d251dSAttilio Rao  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27774d251dSAttilio Rao  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28774d251dSAttilio Rao  * SUCH DAMAGE.
29774d251dSAttilio Rao  *
30774d251dSAttilio Rao  */
31774d251dSAttilio Rao 
32774d251dSAttilio Rao /*
33774d251dSAttilio Rao  * Path-compressed radix trie implementation.
34774d251dSAttilio Rao  * The following code is not generalized into a general purpose library
35774d251dSAttilio Rao  * because there are way too many parameters embedded that should really
36774d251dSAttilio Rao  * be decided by the library consumers.  At the same time, consumers
37774d251dSAttilio Rao  * of this code must achieve highest possible performance.
38774d251dSAttilio Rao  *
39774d251dSAttilio Rao  * The implementation takes into account the following rationale:
40774d251dSAttilio Rao  * - Size of the nodes should be as small as possible but still big enough
41774d251dSAttilio Rao  *   to avoid a large maximum depth for the trie.  This is a balance
42774d251dSAttilio Rao  *   between the necessity to not wire too much physical memory for the nodes
43774d251dSAttilio Rao  *   and the necessity to avoid too much cache pollution during the trie
44774d251dSAttilio Rao  *   operations.
45774d251dSAttilio Rao  * - There is not a huge bias toward the number of lookup operations over
46774d251dSAttilio Rao  *   the number of insert and remove operations.  This basically implies
47774d251dSAttilio Rao  *   that optimizations supposedly helping one operation but hurting the
48774d251dSAttilio Rao  *   other might be carefully evaluated.
49774d251dSAttilio Rao  * - On average not many nodes are expected to be fully populated, hence
50774d251dSAttilio Rao  *   level compression may just complicate things.
51774d251dSAttilio Rao  */
52774d251dSAttilio Rao 
53774d251dSAttilio Rao #include <sys/cdefs.h>
54774d251dSAttilio Rao __FBSDID("$FreeBSD$");
55774d251dSAttilio Rao 
56774d251dSAttilio Rao #include "opt_ddb.h"
57774d251dSAttilio Rao 
58774d251dSAttilio Rao #include <sys/param.h>
59774d251dSAttilio Rao #include <sys/systm.h>
60774d251dSAttilio Rao #include <sys/kernel.h>
611ddda2ebSJeff Roberson #include <sys/proc.h>
62774d251dSAttilio Rao #include <sys/vmmeter.h>
631ddda2ebSJeff Roberson #include <sys/smr.h>
643fba8868SMark Johnston #include <sys/smr_types.h>
65774d251dSAttilio Rao 
66774d251dSAttilio Rao #include <vm/uma.h>
67774d251dSAttilio Rao #include <vm/vm.h>
68774d251dSAttilio Rao #include <vm/vm_param.h>
691ddda2ebSJeff Roberson #include <vm/vm_object.h>
70774d251dSAttilio Rao #include <vm/vm_page.h>
71774d251dSAttilio Rao #include <vm/vm_radix.h>
72774d251dSAttilio Rao 
73774d251dSAttilio Rao #ifdef DDB
74774d251dSAttilio Rao #include <ddb/ddb.h>
75774d251dSAttilio Rao #endif
76774d251dSAttilio Rao 
77774d251dSAttilio Rao /*
78774d251dSAttilio Rao  * These widths should allow the pointers to a node's children to fit within
79774d251dSAttilio Rao  * a single cache line.  The extra levels from a narrow width should not be
80774d251dSAttilio Rao  * a problem thanks to path compression.
81774d251dSAttilio Rao  */
82774d251dSAttilio Rao #ifdef __LP64__
83774d251dSAttilio Rao #define	VM_RADIX_WIDTH	4
84774d251dSAttilio Rao #else
85774d251dSAttilio Rao #define	VM_RADIX_WIDTH	3
86774d251dSAttilio Rao #endif
87774d251dSAttilio Rao 
88774d251dSAttilio Rao #define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
89774d251dSAttilio Rao #define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
90774d251dSAttilio Rao #define	VM_RADIX_LIMIT							\
91b66bb393SPedro F. Giffuni 	(howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
92774d251dSAttilio Rao 
93774d251dSAttilio Rao /* Flag bits stored in node pointers. */
94774d251dSAttilio Rao #define	VM_RADIX_ISLEAF	0x1
95774d251dSAttilio Rao #define	VM_RADIX_FLAGS	0x1
96774d251dSAttilio Rao #define	VM_RADIX_PAD	VM_RADIX_FLAGS
97774d251dSAttilio Rao 
98774d251dSAttilio Rao /* Returns one unit associated with specified level. */
99774d251dSAttilio Rao #define	VM_RADIX_UNITLEVEL(lev)						\
1009f2e6008SAlan Cox 	((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
101774d251dSAttilio Rao 
1021ddda2ebSJeff Roberson enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
1031ddda2ebSJeff Roberson 
1041ddda2ebSJeff Roberson struct vm_radix_node;
1053fba8868SMark Johnston typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
1061ddda2ebSJeff Roberson 
107774d251dSAttilio Rao struct vm_radix_node {
108774d251dSAttilio Rao 	vm_pindex_t	rn_owner;			/* Owner of record. */
109774d251dSAttilio Rao 	uint16_t	rn_count;			/* Valid children. */
1101ddda2ebSJeff Roberson 	uint8_t		rn_clev;			/* Current level. */
1111ddda2ebSJeff Roberson 	int8_t		rn_last;			/* zero last ptr. */
1121ddda2ebSJeff Roberson 	smrnode_t	rn_child[VM_RADIX_COUNT];	/* Child nodes. */
113774d251dSAttilio Rao };
114774d251dSAttilio Rao 
115774d251dSAttilio Rao static uma_zone_t vm_radix_node_zone;
1161ddda2ebSJeff Roberson static smr_t vm_radix_smr;
1171ddda2ebSJeff Roberson 
1181ddda2ebSJeff Roberson static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
1191ddda2ebSJeff Roberson     enum vm_radix_access access);
120774d251dSAttilio Rao 
121774d251dSAttilio Rao /*
122e946b949SAttilio Rao  * Allocate a radix node.
123774d251dSAttilio Rao  */
1241ddda2ebSJeff Roberson static struct vm_radix_node *
125774d251dSAttilio Rao vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
126774d251dSAttilio Rao {
127774d251dSAttilio Rao 	struct vm_radix_node *rnode;
128774d251dSAttilio Rao 
1291ddda2ebSJeff Roberson 	rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
130774d251dSAttilio Rao 	if (rnode == NULL)
131e946b949SAttilio Rao 		return (NULL);
1321ddda2ebSJeff Roberson 
1331ddda2ebSJeff Roberson 	/*
1341ddda2ebSJeff Roberson 	 * We want to clear the last child pointer after the final section
1351ddda2ebSJeff Roberson 	 * has exited so lookup can not return false negatives.  It is done
1361ddda2ebSJeff Roberson 	 * here because it will be cache-cold in the dtor callback.
1371ddda2ebSJeff Roberson 	 */
1381ddda2ebSJeff Roberson 	if (rnode->rn_last != 0) {
1391ddda2ebSJeff Roberson 		vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1],
1401ddda2ebSJeff Roberson 		    NULL, UNSERIALIZED);
1411ddda2ebSJeff Roberson 		rnode->rn_last = 0;
1421ddda2ebSJeff Roberson 	}
143774d251dSAttilio Rao 	rnode->rn_owner = owner;
144774d251dSAttilio Rao 	rnode->rn_count = count;
145774d251dSAttilio Rao 	rnode->rn_clev = clevel;
146774d251dSAttilio Rao 	return (rnode);
147774d251dSAttilio Rao }
148774d251dSAttilio Rao 
149774d251dSAttilio Rao /*
150774d251dSAttilio Rao  * Free radix node.
151774d251dSAttilio Rao  */
152774d251dSAttilio Rao static __inline void
1531ddda2ebSJeff Roberson vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
154774d251dSAttilio Rao {
1551ddda2ebSJeff Roberson #ifdef INVARIANTS
1561ddda2ebSJeff Roberson 	int slot;
157774d251dSAttilio Rao 
1581ddda2ebSJeff Roberson 	KASSERT(rnode->rn_count == 0,
1591ddda2ebSJeff Roberson 	    ("vm_radix_node_put: rnode %p has %d children", rnode,
1601ddda2ebSJeff Roberson 	    rnode->rn_count));
1611ddda2ebSJeff Roberson 	for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
1621ddda2ebSJeff Roberson 		if (slot == last)
1631ddda2ebSJeff Roberson 			continue;
1641ddda2ebSJeff Roberson 		KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
1651ddda2ebSJeff Roberson 		    NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
1661ddda2ebSJeff Roberson 	}
1671ddda2ebSJeff Roberson #endif
1681ddda2ebSJeff Roberson 	/* Off by one so a freshly zero'd node is not assigned to. */
1691ddda2ebSJeff Roberson 	rnode->rn_last = last + 1;
1701ddda2ebSJeff Roberson 	uma_zfree_smr(vm_radix_node_zone, rnode);
171774d251dSAttilio Rao }
172774d251dSAttilio Rao 
173774d251dSAttilio Rao /*
174774d251dSAttilio Rao  * Return the position in the array for a given level.
175774d251dSAttilio Rao  */
176774d251dSAttilio Rao static __inline int
177774d251dSAttilio Rao vm_radix_slot(vm_pindex_t index, uint16_t level)
178774d251dSAttilio Rao {
179774d251dSAttilio Rao 
1809f2e6008SAlan Cox 	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
181774d251dSAttilio Rao }
182774d251dSAttilio Rao 
183774d251dSAttilio Rao /* Trims the key after the specified level. */
184774d251dSAttilio Rao static __inline vm_pindex_t
185774d251dSAttilio Rao vm_radix_trimkey(vm_pindex_t index, uint16_t level)
186774d251dSAttilio Rao {
187774d251dSAttilio Rao 	vm_pindex_t ret;
188774d251dSAttilio Rao 
189774d251dSAttilio Rao 	ret = index;
1909f2e6008SAlan Cox 	if (level > 0) {
1919f2e6008SAlan Cox 		ret >>= level * VM_RADIX_WIDTH;
1929f2e6008SAlan Cox 		ret <<= level * VM_RADIX_WIDTH;
193774d251dSAttilio Rao 	}
194774d251dSAttilio Rao 	return (ret);
195774d251dSAttilio Rao }
196774d251dSAttilio Rao 
197774d251dSAttilio Rao /*
1981ddda2ebSJeff Roberson  * Fetch a node pointer from a slot in another node.
1991ddda2ebSJeff Roberson  */
2001ddda2ebSJeff Roberson static __inline struct vm_radix_node *
2011ddda2ebSJeff Roberson vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
2021ddda2ebSJeff Roberson {
2031ddda2ebSJeff Roberson 
2041ddda2ebSJeff Roberson 	switch (access) {
2051ddda2ebSJeff Roberson 	case UNSERIALIZED:
2061ddda2ebSJeff Roberson 		return (smr_unserialized_load(p, true));
2071ddda2ebSJeff Roberson 	case LOCKED:
2081ddda2ebSJeff Roberson 		return (smr_serialized_load(p, true));
2091ddda2ebSJeff Roberson 	case SMR:
2101ddda2ebSJeff Roberson 		return (smr_entered_load(p, vm_radix_smr));
2111ddda2ebSJeff Roberson 	}
212c79cee71SKyle Evans 	__assert_unreachable();
2131ddda2ebSJeff Roberson }
2141ddda2ebSJeff Roberson 
2151ddda2ebSJeff Roberson static __inline void
2161ddda2ebSJeff Roberson vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
2171ddda2ebSJeff Roberson     enum vm_radix_access access)
2181ddda2ebSJeff Roberson {
2191ddda2ebSJeff Roberson 
2201ddda2ebSJeff Roberson 	switch (access) {
2211ddda2ebSJeff Roberson 	case UNSERIALIZED:
2221ddda2ebSJeff Roberson 		smr_unserialized_store(p, v, true);
2231ddda2ebSJeff Roberson 		break;
2241ddda2ebSJeff Roberson 	case LOCKED:
2251ddda2ebSJeff Roberson 		smr_serialized_store(p, v, true);
2261ddda2ebSJeff Roberson 		break;
2271ddda2ebSJeff Roberson 	case SMR:
2281ddda2ebSJeff Roberson 		panic("vm_radix_node_store: Not supported in smr section.");
2291ddda2ebSJeff Roberson 	}
2301ddda2ebSJeff Roberson }
2311ddda2ebSJeff Roberson 
2321ddda2ebSJeff Roberson /*
233774d251dSAttilio Rao  * Get the root node for a radix tree.
234774d251dSAttilio Rao  */
235774d251dSAttilio Rao static __inline struct vm_radix_node *
2361ddda2ebSJeff Roberson vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
237774d251dSAttilio Rao {
238774d251dSAttilio Rao 
2391ddda2ebSJeff Roberson 	return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access));
240774d251dSAttilio Rao }
241774d251dSAttilio Rao 
242774d251dSAttilio Rao /*
243774d251dSAttilio Rao  * Set the root node for a radix tree.
244774d251dSAttilio Rao  */
245774d251dSAttilio Rao static __inline void
2461ddda2ebSJeff Roberson vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode,
2471ddda2ebSJeff Roberson     enum vm_radix_access access)
248774d251dSAttilio Rao {
249774d251dSAttilio Rao 
2501ddda2ebSJeff Roberson 	vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access);
251774d251dSAttilio Rao }
252774d251dSAttilio Rao 
253774d251dSAttilio Rao /*
2543fc10b73SAlan Cox  * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
2553fc10b73SAlan Cox  */
2563fc10b73SAlan Cox static __inline boolean_t
2573fc10b73SAlan Cox vm_radix_isleaf(struct vm_radix_node *rnode)
2583fc10b73SAlan Cox {
2593fc10b73SAlan Cox 
2603fc10b73SAlan Cox 	return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
2613fc10b73SAlan Cox }
2623fc10b73SAlan Cox 
2633fc10b73SAlan Cox /*
26496f1a842SAlan Cox  * Returns the associated page extracted from rnode.
265774d251dSAttilio Rao  */
266774d251dSAttilio Rao static __inline vm_page_t
26796f1a842SAlan Cox vm_radix_topage(struct vm_radix_node *rnode)
268774d251dSAttilio Rao {
269774d251dSAttilio Rao 
27096f1a842SAlan Cox 	return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
271774d251dSAttilio Rao }
272774d251dSAttilio Rao 
273774d251dSAttilio Rao /*
274774d251dSAttilio Rao  * Adds the page as a child of the provided node.
275774d251dSAttilio Rao  */
276774d251dSAttilio Rao static __inline void
277774d251dSAttilio Rao vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
2781ddda2ebSJeff Roberson     vm_page_t page, enum vm_radix_access access)
279774d251dSAttilio Rao {
280774d251dSAttilio Rao 	int slot;
281774d251dSAttilio Rao 
282774d251dSAttilio Rao 	slot = vm_radix_slot(index, clev);
2831ddda2ebSJeff Roberson 	vm_radix_node_store(&rnode->rn_child[slot],
2841ddda2ebSJeff Roberson 	    (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access);
285774d251dSAttilio Rao }
286774d251dSAttilio Rao 
287774d251dSAttilio Rao /*
288774d251dSAttilio Rao  * Returns the slot where two keys differ.
289774d251dSAttilio Rao  * It cannot accept 2 equal keys.
290774d251dSAttilio Rao  */
291774d251dSAttilio Rao static __inline uint16_t
292774d251dSAttilio Rao vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
293774d251dSAttilio Rao {
294774d251dSAttilio Rao 	uint16_t clev;
295774d251dSAttilio Rao 
296774d251dSAttilio Rao 	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
297774d251dSAttilio Rao 	    __func__, (uintmax_t)index1));
298774d251dSAttilio Rao 
299774d251dSAttilio Rao 	index1 ^= index2;
3009f2e6008SAlan Cox 	for (clev = VM_RADIX_LIMIT;; clev--)
301bb0e1de4SAlan Cox 		if (vm_radix_slot(index1, clev) != 0)
302774d251dSAttilio Rao 			return (clev);
303774d251dSAttilio Rao }
304774d251dSAttilio Rao 
305774d251dSAttilio Rao /*
306774d251dSAttilio Rao  * Returns TRUE if it can be determined that key does not belong to the
307774d251dSAttilio Rao  * specified rnode.  Otherwise, returns FALSE.
308774d251dSAttilio Rao  */
309774d251dSAttilio Rao static __inline boolean_t
310774d251dSAttilio Rao vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
311774d251dSAttilio Rao {
312774d251dSAttilio Rao 
3139f2e6008SAlan Cox 	if (rnode->rn_clev < VM_RADIX_LIMIT) {
3149f2e6008SAlan Cox 		idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
315c1c82b36SAlan Cox 		return (idx != rnode->rn_owner);
316774d251dSAttilio Rao 	}
317774d251dSAttilio Rao 	return (FALSE);
318774d251dSAttilio Rao }
319774d251dSAttilio Rao 
320774d251dSAttilio Rao /*
321774d251dSAttilio Rao  * Internal helper for vm_radix_reclaim_allnodes().
322774d251dSAttilio Rao  * This function is recursive.
323774d251dSAttilio Rao  */
324774d251dSAttilio Rao static void
325774d251dSAttilio Rao vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
326774d251dSAttilio Rao {
3271ddda2ebSJeff Roberson 	struct vm_radix_node *child;
328774d251dSAttilio Rao 	int slot;
329774d251dSAttilio Rao 
330652615dcSAlan Cox 	KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
331652615dcSAlan Cox 	    ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
332652615dcSAlan Cox 	for (slot = 0; rnode->rn_count != 0; slot++) {
3331ddda2ebSJeff Roberson 		child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
3341ddda2ebSJeff Roberson 		if (child == NULL)
335774d251dSAttilio Rao 			continue;
3361ddda2ebSJeff Roberson 		if (!vm_radix_isleaf(child))
3371ddda2ebSJeff Roberson 			vm_radix_reclaim_allnodes_int(child);
3381ddda2ebSJeff Roberson 		vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
339774d251dSAttilio Rao 		rnode->rn_count--;
340774d251dSAttilio Rao 	}
3411ddda2ebSJeff Roberson 	vm_radix_node_put(rnode, -1);
342a3d799fbSMateusz Guzik }
343a3d799fbSMateusz Guzik 
344e946b949SAttilio Rao #ifndef UMA_MD_SMALL_ALLOC
345ae941b1bSGleb Smirnoff void vm_radix_reserve_kva(void);
346774d251dSAttilio Rao /*
347e946b949SAttilio Rao  * Reserve the KVA necessary to satisfy the node allocation.
348e946b949SAttilio Rao  * This is mandatory in architectures not supporting direct
349e946b949SAttilio Rao  * mapping as they will need otherwise to carve into the kernel maps for
350e946b949SAttilio Rao  * every node allocation, resulting into deadlocks for consumers already
351e946b949SAttilio Rao  * working with kernel maps.
352774d251dSAttilio Rao  */
353ae941b1bSGleb Smirnoff void
354ae941b1bSGleb Smirnoff vm_radix_reserve_kva(void)
355774d251dSAttilio Rao {
356774d251dSAttilio Rao 
357880659feSAlan Cox 	/*
358880659feSAlan Cox 	 * Calculate the number of reserved nodes, discounting the pages that
359880659feSAlan Cox 	 * are needed to store them.
360880659feSAlan Cox 	 */
361e946b949SAttilio Rao 	if (!uma_zone_reserve_kva(vm_radix_node_zone,
36244f1c916SBryan Drewery 	    ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
363e946b949SAttilio Rao 	    sizeof(struct vm_radix_node))))
364e946b949SAttilio Rao 		panic("%s: unable to reserve KVA", __func__);
365774d251dSAttilio Rao }
366e946b949SAttilio Rao #endif
367774d251dSAttilio Rao 
368774d251dSAttilio Rao /*
369774d251dSAttilio Rao  * Initialize the UMA slab zone.
370774d251dSAttilio Rao  */
371774d251dSAttilio Rao void
372cd1241fbSKonstantin Belousov vm_radix_zinit(void)
373774d251dSAttilio Rao {
374774d251dSAttilio Rao 
375774d251dSAttilio Rao 	vm_radix_node_zone = uma_zcreate("RADIX NODE",
3761ddda2ebSJeff Roberson 	    sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL,
3771ddda2ebSJeff Roberson 	    VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT);
3781ddda2ebSJeff Roberson 	vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone);
379774d251dSAttilio Rao }
380774d251dSAttilio Rao 
381774d251dSAttilio Rao /*
382774d251dSAttilio Rao  * Inserts the key-value pair into the trie.
383774d251dSAttilio Rao  * Panics if the key already exists.
384774d251dSAttilio Rao  */
385e946b949SAttilio Rao int
386774d251dSAttilio Rao vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
387774d251dSAttilio Rao {
388774d251dSAttilio Rao 	vm_pindex_t index, newind;
389a08f2cf6SAlan Cox 	struct vm_radix_node *rnode, *tmp;
3901ddda2ebSJeff Roberson 	smrnode_t *parentp;
391774d251dSAttilio Rao 	vm_page_t m;
392774d251dSAttilio Rao 	int slot;
393774d251dSAttilio Rao 	uint16_t clev;
394774d251dSAttilio Rao 
395774d251dSAttilio Rao 	index = page->pindex;
396774d251dSAttilio Rao 
397774d251dSAttilio Rao 	/*
398774d251dSAttilio Rao 	 * The owner of record for root is not really important because it
399774d251dSAttilio Rao 	 * will never be used.
400774d251dSAttilio Rao 	 */
4011ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
402774d251dSAttilio Rao 	if (rnode == NULL) {
403a08f2cf6SAlan Cox 		rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
404e946b949SAttilio Rao 		return (0);
405774d251dSAttilio Rao 	}
4061ddda2ebSJeff Roberson 	parentp = (smrnode_t *)&rtree->rt_root;
407a08f2cf6SAlan Cox 	for (;;) {
408a08f2cf6SAlan Cox 		if (vm_radix_isleaf(rnode)) {
409a08f2cf6SAlan Cox 			m = vm_radix_topage(rnode);
410774d251dSAttilio Rao 			if (m->pindex == index)
411774d251dSAttilio Rao 				panic("%s: key %jx is already present",
412774d251dSAttilio Rao 				    __func__, (uintmax_t)index);
413774d251dSAttilio Rao 			clev = vm_radix_keydiff(m->pindex, index);
414774d251dSAttilio Rao 			tmp = vm_radix_node_get(vm_radix_trimkey(index,
4159f2e6008SAlan Cox 			    clev + 1), 2, clev);
416563a19d5SAlan Cox 			if (tmp == NULL)
417e946b949SAttilio Rao 				return (ENOMEM);
4181ddda2ebSJeff Roberson 			/* These writes are not yet visible due to ordering. */
4191ddda2ebSJeff Roberson 			vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
4201ddda2ebSJeff Roberson 			vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED);
4211ddda2ebSJeff Roberson 			/* Synchronize to make leaf visible. */
4221ddda2ebSJeff Roberson 			vm_radix_node_store(parentp, tmp, LOCKED);
423e946b949SAttilio Rao 			return (0);
424a08f2cf6SAlan Cox 		} else if (vm_radix_keybarr(rnode, index))
425a08f2cf6SAlan Cox 			break;
426a08f2cf6SAlan Cox 		slot = vm_radix_slot(index, rnode->rn_clev);
4271ddda2ebSJeff Roberson 		parentp = &rnode->rn_child[slot];
4281ddda2ebSJeff Roberson 		tmp = vm_radix_node_load(parentp, LOCKED);
4291ddda2ebSJeff Roberson 		if (tmp == NULL) {
430774d251dSAttilio Rao 			rnode->rn_count++;
4311ddda2ebSJeff Roberson 			vm_radix_addpage(rnode, index, rnode->rn_clev, page,
4321ddda2ebSJeff Roberson 			    LOCKED);
433e946b949SAttilio Rao 			return (0);
434774d251dSAttilio Rao 		}
4351ddda2ebSJeff Roberson 		rnode = tmp;
436a08f2cf6SAlan Cox 	}
437774d251dSAttilio Rao 
438774d251dSAttilio Rao 	/*
439774d251dSAttilio Rao 	 * A new node is needed because the right insertion level is reached.
440774d251dSAttilio Rao 	 * Setup the new intermediate node and add the 2 children: the
441774d251dSAttilio Rao 	 * new object and the older edge.
442774d251dSAttilio Rao 	 */
44372abda64SAlan Cox 	newind = rnode->rn_owner;
44472abda64SAlan Cox 	clev = vm_radix_keydiff(newind, index);
445e946b949SAttilio Rao 	tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
446563a19d5SAlan Cox 	if (tmp == NULL)
447e946b949SAttilio Rao 		return (ENOMEM);
448774d251dSAttilio Rao 	slot = vm_radix_slot(newind, clev);
4491ddda2ebSJeff Roberson 	/* These writes are not yet visible due to ordering. */
4501ddda2ebSJeff Roberson 	vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
4511ddda2ebSJeff Roberson 	vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
4521ddda2ebSJeff Roberson 	/* Serializing write to make the above visible. */
4531ddda2ebSJeff Roberson 	vm_radix_node_store(parentp, tmp, LOCKED);
4541ddda2ebSJeff Roberson 
455e946b949SAttilio Rao 	return (0);
456774d251dSAttilio Rao }
457774d251dSAttilio Rao 
458774d251dSAttilio Rao /*
459776cad90SAlan Cox  * Returns TRUE if the specified radix tree contains a single leaf and FALSE
460776cad90SAlan Cox  * otherwise.
461776cad90SAlan Cox  */
462776cad90SAlan Cox boolean_t
463776cad90SAlan Cox vm_radix_is_singleton(struct vm_radix *rtree)
464776cad90SAlan Cox {
465776cad90SAlan Cox 	struct vm_radix_node *rnode;
466776cad90SAlan Cox 
4671ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
468776cad90SAlan Cox 	if (rnode == NULL)
469776cad90SAlan Cox 		return (FALSE);
470776cad90SAlan Cox 	return (vm_radix_isleaf(rnode));
471776cad90SAlan Cox }
472776cad90SAlan Cox 
473776cad90SAlan Cox /*
474774d251dSAttilio Rao  * Returns the value stored at the index.  If the index is not present,
475774d251dSAttilio Rao  * NULL is returned.
476774d251dSAttilio Rao  */
4771ddda2ebSJeff Roberson static __always_inline vm_page_t
4781ddda2ebSJeff Roberson _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index,
4791ddda2ebSJeff Roberson     enum vm_radix_access access)
480774d251dSAttilio Rao {
481774d251dSAttilio Rao 	struct vm_radix_node *rnode;
482774d251dSAttilio Rao 	vm_page_t m;
483774d251dSAttilio Rao 	int slot;
484774d251dSAttilio Rao 
4851ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, access);
486774d251dSAttilio Rao 	while (rnode != NULL) {
48796f1a842SAlan Cox 		if (vm_radix_isleaf(rnode)) {
48896f1a842SAlan Cox 			m = vm_radix_topage(rnode);
489774d251dSAttilio Rao 			if (m->pindex == index)
490774d251dSAttilio Rao 				return (m);
4916f9c0b15SAlan Cox 			break;
4921ddda2ebSJeff Roberson 		}
4931ddda2ebSJeff Roberson 		if (vm_radix_keybarr(rnode, index))
4946f9c0b15SAlan Cox 			break;
4956f9c0b15SAlan Cox 		slot = vm_radix_slot(index, rnode->rn_clev);
4961ddda2ebSJeff Roberson 		rnode = vm_radix_node_load(&rnode->rn_child[slot], access);
497774d251dSAttilio Rao 	}
498774d251dSAttilio Rao 	return (NULL);
499774d251dSAttilio Rao }
500774d251dSAttilio Rao 
501774d251dSAttilio Rao /*
5021ddda2ebSJeff Roberson  * Returns the value stored at the index assuming there is an external lock.
5031ddda2ebSJeff Roberson  *
5041ddda2ebSJeff Roberson  * If the index is not present, NULL is returned.
5051ddda2ebSJeff Roberson  */
5061ddda2ebSJeff Roberson vm_page_t
5071ddda2ebSJeff Roberson vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
5081ddda2ebSJeff Roberson {
5091ddda2ebSJeff Roberson 
5101ddda2ebSJeff Roberson 	return _vm_radix_lookup(rtree, index, LOCKED);
5111ddda2ebSJeff Roberson }
5121ddda2ebSJeff Roberson 
5131ddda2ebSJeff Roberson /*
5141ddda2ebSJeff Roberson  * Returns the value stored at the index without requiring an external lock.
5151ddda2ebSJeff Roberson  *
5161ddda2ebSJeff Roberson  * If the index is not present, NULL is returned.
5171ddda2ebSJeff Roberson  */
5181ddda2ebSJeff Roberson vm_page_t
5191ddda2ebSJeff Roberson vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
5201ddda2ebSJeff Roberson {
5211ddda2ebSJeff Roberson 	vm_page_t m;
5221ddda2ebSJeff Roberson 
5231ddda2ebSJeff Roberson 	smr_enter(vm_radix_smr);
5241ddda2ebSJeff Roberson 	m = _vm_radix_lookup(rtree, index, SMR);
5251ddda2ebSJeff Roberson 	smr_exit(vm_radix_smr);
5261ddda2ebSJeff Roberson 
5271ddda2ebSJeff Roberson 	return (m);
5281ddda2ebSJeff Roberson }
5291ddda2ebSJeff Roberson 
5301ddda2ebSJeff Roberson /*
5311ddda2ebSJeff Roberson  * Look up the nearest entry at a position greater than or equal to index.
532774d251dSAttilio Rao  */
533774d251dSAttilio Rao vm_page_t
534774d251dSAttilio Rao vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
535774d251dSAttilio Rao {
5362d4b9a64SAlan Cox 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
537774d251dSAttilio Rao 	vm_pindex_t inc;
538774d251dSAttilio Rao 	vm_page_t m;
53996f1a842SAlan Cox 	struct vm_radix_node *child, *rnode;
540774d251dSAttilio Rao #ifdef INVARIANTS
541774d251dSAttilio Rao 	int loops = 0;
542774d251dSAttilio Rao #endif
5432d4b9a64SAlan Cox 	int slot, tos;
544774d251dSAttilio Rao 
5451ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
5466f9c0b15SAlan Cox 	if (rnode == NULL)
5476f9c0b15SAlan Cox 		return (NULL);
5486f9c0b15SAlan Cox 	else if (vm_radix_isleaf(rnode)) {
5496f9c0b15SAlan Cox 		m = vm_radix_topage(rnode);
5506f9c0b15SAlan Cox 		if (m->pindex >= index)
5516f9c0b15SAlan Cox 			return (m);
5526f9c0b15SAlan Cox 		else
5536f9c0b15SAlan Cox 			return (NULL);
5546f9c0b15SAlan Cox 	}
5552d4b9a64SAlan Cox 	tos = 0;
5566f9c0b15SAlan Cox 	for (;;) {
557774d251dSAttilio Rao 		/*
55840076ebcSAlan Cox 		 * If the keys differ before the current bisection node,
55940076ebcSAlan Cox 		 * then the search key might rollback to the earliest
56040076ebcSAlan Cox 		 * available bisection node or to the smallest key
5611ddda2ebSJeff Roberson 		 * in the current node (if the owner is greater than the
562774d251dSAttilio Rao 		 * search key).
563774d251dSAttilio Rao 		 */
564774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index)) {
565774d251dSAttilio Rao 			if (index > rnode->rn_owner) {
5662d4b9a64SAlan Cox ascend:
5672d4b9a64SAlan Cox 				KASSERT(++loops < 1000,
5682d4b9a64SAlan Cox 				    ("vm_radix_lookup_ge: too many loops"));
5692d4b9a64SAlan Cox 
5702d4b9a64SAlan Cox 				/*
5712d4b9a64SAlan Cox 				 * Pop nodes from the stack until either the
5722d4b9a64SAlan Cox 				 * stack is empty or a node that could have a
5732d4b9a64SAlan Cox 				 * matching descendant is found.
5742d4b9a64SAlan Cox 				 */
5752d4b9a64SAlan Cox 				do {
5762d4b9a64SAlan Cox 					if (tos == 0)
5772d4b9a64SAlan Cox 						return (NULL);
5782d4b9a64SAlan Cox 					rnode = stack[--tos];
5792d4b9a64SAlan Cox 				} while (vm_radix_slot(index,
5802d4b9a64SAlan Cox 				    rnode->rn_clev) == (VM_RADIX_COUNT - 1));
5812d4b9a64SAlan Cox 
5822d4b9a64SAlan Cox 				/*
5832d4b9a64SAlan Cox 				 * The following computation cannot overflow
5842d4b9a64SAlan Cox 				 * because index's slot at the current level
5852d4b9a64SAlan Cox 				 * is less than VM_RADIX_COUNT - 1.
5862d4b9a64SAlan Cox 				 */
5872d4b9a64SAlan Cox 				index = vm_radix_trimkey(index,
5882d4b9a64SAlan Cox 				    rnode->rn_clev);
5892d4b9a64SAlan Cox 				index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
59040076ebcSAlan Cox 			} else
59140076ebcSAlan Cox 				index = rnode->rn_owner;
5922d4b9a64SAlan Cox 			KASSERT(!vm_radix_keybarr(rnode, index),
5932d4b9a64SAlan Cox 			    ("vm_radix_lookup_ge: keybarr failed"));
594774d251dSAttilio Rao 		}
595774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
5961ddda2ebSJeff Roberson 		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
59796f1a842SAlan Cox 		if (vm_radix_isleaf(child)) {
59896f1a842SAlan Cox 			m = vm_radix_topage(child);
59996f1a842SAlan Cox 			if (m->pindex >= index)
600774d251dSAttilio Rao 				return (m);
60196f1a842SAlan Cox 		} else if (child != NULL)
60296f1a842SAlan Cox 			goto descend;
603774d251dSAttilio Rao 
604774d251dSAttilio Rao 		/*
605774d251dSAttilio Rao 		 * Look for an available edge or page within the current
606774d251dSAttilio Rao 		 * bisection node.
607774d251dSAttilio Rao 		 */
608774d251dSAttilio Rao                 if (slot < (VM_RADIX_COUNT - 1)) {
609774d251dSAttilio Rao 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
610774d251dSAttilio Rao 			index = vm_radix_trimkey(index, rnode->rn_clev);
61196f1a842SAlan Cox 			do {
612774d251dSAttilio Rao 				index += inc;
613774d251dSAttilio Rao 				slot++;
6141ddda2ebSJeff Roberson 				child = vm_radix_node_load(&rnode->rn_child[slot],
6151ddda2ebSJeff Roberson 				    LOCKED);
61696f1a842SAlan Cox 				if (vm_radix_isleaf(child)) {
61796f1a842SAlan Cox 					m = vm_radix_topage(child);
61896f1a842SAlan Cox 					if (m->pindex >= index)
619774d251dSAttilio Rao 						return (m);
62096f1a842SAlan Cox 				} else if (child != NULL)
62196f1a842SAlan Cox 					goto descend;
62296f1a842SAlan Cox 			} while (slot < (VM_RADIX_COUNT - 1));
623774d251dSAttilio Rao 		}
62496f1a842SAlan Cox 		KASSERT(child == NULL || vm_radix_isleaf(child),
62596f1a842SAlan Cox 		    ("vm_radix_lookup_ge: child is radix node"));
626774d251dSAttilio Rao 
627774d251dSAttilio Rao 		/*
6281ddda2ebSJeff Roberson 		 * If a page or edge greater than the search slot is not found
6292d4b9a64SAlan Cox 		 * in the current node, ascend to the next higher-level node.
630774d251dSAttilio Rao 		 */
6312d4b9a64SAlan Cox 		goto ascend;
63296f1a842SAlan Cox descend:
6339f2e6008SAlan Cox 		KASSERT(rnode->rn_clev > 0,
6342d4b9a64SAlan Cox 		    ("vm_radix_lookup_ge: pushing leaf's parent"));
6352d4b9a64SAlan Cox 		KASSERT(tos < VM_RADIX_LIMIT,
6362d4b9a64SAlan Cox 		    ("vm_radix_lookup_ge: stack overflow"));
6372d4b9a64SAlan Cox 		stack[tos++] = rnode;
63896f1a842SAlan Cox 		rnode = child;
639774d251dSAttilio Rao 	}
640774d251dSAttilio Rao }
641774d251dSAttilio Rao 
642774d251dSAttilio Rao /*
643774d251dSAttilio Rao  * Look up the nearest entry at a position less than or equal to index.
644774d251dSAttilio Rao  */
645774d251dSAttilio Rao vm_page_t
646774d251dSAttilio Rao vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
647774d251dSAttilio Rao {
6482d4b9a64SAlan Cox 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
649774d251dSAttilio Rao 	vm_pindex_t inc;
650774d251dSAttilio Rao 	vm_page_t m;
65196f1a842SAlan Cox 	struct vm_radix_node *child, *rnode;
652774d251dSAttilio Rao #ifdef INVARIANTS
653774d251dSAttilio Rao 	int loops = 0;
654774d251dSAttilio Rao #endif
6552d4b9a64SAlan Cox 	int slot, tos;
656774d251dSAttilio Rao 
6571ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
6586f9c0b15SAlan Cox 	if (rnode == NULL)
6596f9c0b15SAlan Cox 		return (NULL);
6606f9c0b15SAlan Cox 	else if (vm_radix_isleaf(rnode)) {
6616f9c0b15SAlan Cox 		m = vm_radix_topage(rnode);
6626f9c0b15SAlan Cox 		if (m->pindex <= index)
6636f9c0b15SAlan Cox 			return (m);
6646f9c0b15SAlan Cox 		else
6656f9c0b15SAlan Cox 			return (NULL);
6666f9c0b15SAlan Cox 	}
6672d4b9a64SAlan Cox 	tos = 0;
6686f9c0b15SAlan Cox 	for (;;) {
669774d251dSAttilio Rao 		/*
67040076ebcSAlan Cox 		 * If the keys differ before the current bisection node,
67140076ebcSAlan Cox 		 * then the search key might rollback to the earliest
67240076ebcSAlan Cox 		 * available bisection node or to the largest key
67340076ebcSAlan Cox 		 * in the current node (if the owner is smaller than the
674774d251dSAttilio Rao 		 * search key).
675774d251dSAttilio Rao 		 */
676774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index)) {
677774d251dSAttilio Rao 			if (index > rnode->rn_owner) {
67840076ebcSAlan Cox 				index = rnode->rn_owner + VM_RADIX_COUNT *
6792d4b9a64SAlan Cox 				    VM_RADIX_UNITLEVEL(rnode->rn_clev);
68040076ebcSAlan Cox 			} else {
6812d4b9a64SAlan Cox ascend:
6822d4b9a64SAlan Cox 				KASSERT(++loops < 1000,
6832d4b9a64SAlan Cox 				    ("vm_radix_lookup_le: too many loops"));
6842d4b9a64SAlan Cox 
6852d4b9a64SAlan Cox 				/*
6862d4b9a64SAlan Cox 				 * Pop nodes from the stack until either the
6872d4b9a64SAlan Cox 				 * stack is empty or a node that could have a
6882d4b9a64SAlan Cox 				 * matching descendant is found.
6892d4b9a64SAlan Cox 				 */
6902d4b9a64SAlan Cox 				do {
6912d4b9a64SAlan Cox 					if (tos == 0)
6922d4b9a64SAlan Cox 						return (NULL);
6932d4b9a64SAlan Cox 					rnode = stack[--tos];
6942d4b9a64SAlan Cox 				} while (vm_radix_slot(index,
6952d4b9a64SAlan Cox 				    rnode->rn_clev) == 0);
6962d4b9a64SAlan Cox 
6972d4b9a64SAlan Cox 				/*
6982d4b9a64SAlan Cox 				 * The following computation cannot overflow
6992d4b9a64SAlan Cox 				 * because index's slot at the current level
7002d4b9a64SAlan Cox 				 * is greater than 0.
7012d4b9a64SAlan Cox 				 */
7022d4b9a64SAlan Cox 				index = vm_radix_trimkey(index,
7032d4b9a64SAlan Cox 				    rnode->rn_clev);
704774d251dSAttilio Rao 			}
7052d4b9a64SAlan Cox 			index--;
7062d4b9a64SAlan Cox 			KASSERT(!vm_radix_keybarr(rnode, index),
7072d4b9a64SAlan Cox 			    ("vm_radix_lookup_le: keybarr failed"));
70840076ebcSAlan Cox 		}
709774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
7101ddda2ebSJeff Roberson 		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
71196f1a842SAlan Cox 		if (vm_radix_isleaf(child)) {
71296f1a842SAlan Cox 			m = vm_radix_topage(child);
71396f1a842SAlan Cox 			if (m->pindex <= index)
714774d251dSAttilio Rao 				return (m);
71596f1a842SAlan Cox 		} else if (child != NULL)
71696f1a842SAlan Cox 			goto descend;
717774d251dSAttilio Rao 
718774d251dSAttilio Rao 		/*
719774d251dSAttilio Rao 		 * Look for an available edge or page within the current
720774d251dSAttilio Rao 		 * bisection node.
721774d251dSAttilio Rao 		 */
722774d251dSAttilio Rao 		if (slot > 0) {
723774d251dSAttilio Rao 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
724774d251dSAttilio Rao 			index |= inc - 1;
72596f1a842SAlan Cox 			do {
726774d251dSAttilio Rao 				index -= inc;
727774d251dSAttilio Rao 				slot--;
7281ddda2ebSJeff Roberson 				child = vm_radix_node_load(&rnode->rn_child[slot],
7291ddda2ebSJeff Roberson 				    LOCKED);
73096f1a842SAlan Cox 				if (vm_radix_isleaf(child)) {
73196f1a842SAlan Cox 					m = vm_radix_topage(child);
73296f1a842SAlan Cox 					if (m->pindex <= index)
733774d251dSAttilio Rao 						return (m);
73496f1a842SAlan Cox 				} else if (child != NULL)
73596f1a842SAlan Cox 					goto descend;
73696f1a842SAlan Cox 			} while (slot > 0);
737774d251dSAttilio Rao 		}
73896f1a842SAlan Cox 		KASSERT(child == NULL || vm_radix_isleaf(child),
73996f1a842SAlan Cox 		    ("vm_radix_lookup_le: child is radix node"));
740774d251dSAttilio Rao 
741774d251dSAttilio Rao 		/*
7422d4b9a64SAlan Cox 		 * If a page or edge smaller than the search slot is not found
7432d4b9a64SAlan Cox 		 * in the current node, ascend to the next higher-level node.
744774d251dSAttilio Rao 		 */
7452d4b9a64SAlan Cox 		goto ascend;
74696f1a842SAlan Cox descend:
7479f2e6008SAlan Cox 		KASSERT(rnode->rn_clev > 0,
7482d4b9a64SAlan Cox 		    ("vm_radix_lookup_le: pushing leaf's parent"));
7492d4b9a64SAlan Cox 		KASSERT(tos < VM_RADIX_LIMIT,
7502d4b9a64SAlan Cox 		    ("vm_radix_lookup_le: stack overflow"));
7512d4b9a64SAlan Cox 		stack[tos++] = rnode;
75296f1a842SAlan Cox 		rnode = child;
753774d251dSAttilio Rao 	}
754774d251dSAttilio Rao }
755774d251dSAttilio Rao 
756774d251dSAttilio Rao /*
757e94965d8SAlan Cox  * Remove the specified index from the trie, and return the value stored at
758e94965d8SAlan Cox  * that index.  If the index is not present, return NULL.
759774d251dSAttilio Rao  */
760e94965d8SAlan Cox vm_page_t
761774d251dSAttilio Rao vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
762774d251dSAttilio Rao {
7631ddda2ebSJeff Roberson 	struct vm_radix_node *rnode, *parent, *tmp;
764774d251dSAttilio Rao 	vm_page_t m;
765774d251dSAttilio Rao 	int i, slot;
766774d251dSAttilio Rao 
7671ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
768a08f2cf6SAlan Cox 	if (vm_radix_isleaf(rnode)) {
769a08f2cf6SAlan Cox 		m = vm_radix_topage(rnode);
770a08f2cf6SAlan Cox 		if (m->pindex != index)
771e94965d8SAlan Cox 			return (NULL);
7721ddda2ebSJeff Roberson 		vm_radix_root_store(rtree, NULL, LOCKED);
773e94965d8SAlan Cox 		return (m);
774a08f2cf6SAlan Cox 	}
775a08f2cf6SAlan Cox 	parent = NULL;
776774d251dSAttilio Rao 	for (;;) {
777774d251dSAttilio Rao 		if (rnode == NULL)
778e94965d8SAlan Cox 			return (NULL);
779774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
7801ddda2ebSJeff Roberson 		tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
7811ddda2ebSJeff Roberson 		if (vm_radix_isleaf(tmp)) {
7821ddda2ebSJeff Roberson 			m = vm_radix_topage(tmp);
78396f1a842SAlan Cox 			if (m->pindex != index)
784e94965d8SAlan Cox 				return (NULL);
7851ddda2ebSJeff Roberson 			vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED);
786774d251dSAttilio Rao 			rnode->rn_count--;
787774d251dSAttilio Rao 			if (rnode->rn_count > 1)
788e94965d8SAlan Cox 				return (m);
789774d251dSAttilio Rao 			for (i = 0; i < VM_RADIX_COUNT; i++)
7901ddda2ebSJeff Roberson 				if (vm_radix_node_load(&rnode->rn_child[i],
7911ddda2ebSJeff Roberson 				    LOCKED) != NULL)
792774d251dSAttilio Rao 					break;
793774d251dSAttilio Rao 			KASSERT(i != VM_RADIX_COUNT,
794774d251dSAttilio Rao 			    ("%s: invalid node configuration", __func__));
7951ddda2ebSJeff Roberson 			tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED);
796a08f2cf6SAlan Cox 			if (parent == NULL)
7971ddda2ebSJeff Roberson 				vm_radix_root_store(rtree, tmp, LOCKED);
798a08f2cf6SAlan Cox 			else {
799774d251dSAttilio Rao 				slot = vm_radix_slot(index, parent->rn_clev);
8001ddda2ebSJeff Roberson 				KASSERT(vm_radix_node_load(
8011ddda2ebSJeff Roberson 				    &parent->rn_child[slot], LOCKED) == rnode,
802774d251dSAttilio Rao 				    ("%s: invalid child value", __func__));
8031ddda2ebSJeff Roberson 				vm_radix_node_store(&parent->rn_child[slot],
8041ddda2ebSJeff Roberson 				    tmp, LOCKED);
805a08f2cf6SAlan Cox 			}
8061ddda2ebSJeff Roberson 			/*
8071ddda2ebSJeff Roberson 			 * The child is still valid and we can not zero the
8081ddda2ebSJeff Roberson 			 * pointer until all smr references are gone.
8091ddda2ebSJeff Roberson 			 */
810774d251dSAttilio Rao 			rnode->rn_count--;
8111ddda2ebSJeff Roberson 			vm_radix_node_put(rnode, i);
812e94965d8SAlan Cox 			return (m);
813774d251dSAttilio Rao 		}
814774d251dSAttilio Rao 		parent = rnode;
8151ddda2ebSJeff Roberson 		rnode = tmp;
816774d251dSAttilio Rao 	}
817774d251dSAttilio Rao }
818774d251dSAttilio Rao 
819774d251dSAttilio Rao /*
820774d251dSAttilio Rao  * Remove and free all the nodes from the radix tree.
821774d251dSAttilio Rao  * This function is recursive but there is a tight control on it as the
822774d251dSAttilio Rao  * maximum depth of the tree is fixed.
823774d251dSAttilio Rao  */
824774d251dSAttilio Rao void
825774d251dSAttilio Rao vm_radix_reclaim_allnodes(struct vm_radix *rtree)
826774d251dSAttilio Rao {
827774d251dSAttilio Rao 	struct vm_radix_node *root;
828774d251dSAttilio Rao 
8291ddda2ebSJeff Roberson 	root = vm_radix_root_load(rtree, LOCKED);
830774d251dSAttilio Rao 	if (root == NULL)
831774d251dSAttilio Rao 		return;
8321ddda2ebSJeff Roberson 	vm_radix_root_store(rtree, NULL, UNSERIALIZED);
833a08f2cf6SAlan Cox 	if (!vm_radix_isleaf(root))
834652615dcSAlan Cox 		vm_radix_reclaim_allnodes_int(root);
835774d251dSAttilio Rao }
836774d251dSAttilio Rao 
837e946b949SAttilio Rao /*
838703b304fSAlan Cox  * Replace an existing page in the trie with another one.
839703b304fSAlan Cox  * Panics if there is not an old page in the trie at the new page's index.
840e946b949SAttilio Rao  */
841e946b949SAttilio Rao vm_page_t
842703b304fSAlan Cox vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
843e946b949SAttilio Rao {
8441ddda2ebSJeff Roberson 	struct vm_radix_node *rnode, *tmp;
845e946b949SAttilio Rao 	vm_page_t m;
846703b304fSAlan Cox 	vm_pindex_t index;
847e946b949SAttilio Rao 	int slot;
848e946b949SAttilio Rao 
849703b304fSAlan Cox 	index = newpage->pindex;
8501ddda2ebSJeff Roberson 	rnode = vm_radix_root_load(rtree, LOCKED);
851e946b949SAttilio Rao 	if (rnode == NULL)
852e946b949SAttilio Rao 		panic("%s: replacing page on an empty trie", __func__);
853e946b949SAttilio Rao 	if (vm_radix_isleaf(rnode)) {
854e946b949SAttilio Rao 		m = vm_radix_topage(rnode);
855e946b949SAttilio Rao 		if (m->pindex != index)
856e946b949SAttilio Rao 			panic("%s: original replacing root key not found",
857e946b949SAttilio Rao 			    __func__);
858e946b949SAttilio Rao 		rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF;
859e946b949SAttilio Rao 		return (m);
860e946b949SAttilio Rao 	}
861e946b949SAttilio Rao 	for (;;) {
862e946b949SAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
8631ddda2ebSJeff Roberson 		tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
8641ddda2ebSJeff Roberson 		if (vm_radix_isleaf(tmp)) {
8651ddda2ebSJeff Roberson 			m = vm_radix_topage(tmp);
866e946b949SAttilio Rao 			if (m->pindex == index) {
8671ddda2ebSJeff Roberson 				vm_radix_node_store(&rnode->rn_child[slot],
8681ddda2ebSJeff Roberson 				    (struct vm_radix_node *)((uintptr_t)newpage |
8691ddda2ebSJeff Roberson 				    VM_RADIX_ISLEAF), LOCKED);
870e946b949SAttilio Rao 				return (m);
871e946b949SAttilio Rao 			} else
872e946b949SAttilio Rao 				break;
8731ddda2ebSJeff Roberson 		} else if (tmp == NULL || vm_radix_keybarr(tmp, index))
874e946b949SAttilio Rao 			break;
8751ddda2ebSJeff Roberson 		rnode = tmp;
876e946b949SAttilio Rao 	}
877e946b949SAttilio Rao 	panic("%s: original replacing page not found", __func__);
878e946b949SAttilio Rao }
879e946b949SAttilio Rao 
8808d6fbbb8SJeff Roberson void
8818d6fbbb8SJeff Roberson vm_radix_wait(void)
8828d6fbbb8SJeff Roberson {
8838d6fbbb8SJeff Roberson 	uma_zwait(vm_radix_node_zone);
8848d6fbbb8SJeff Roberson }
8858d6fbbb8SJeff Roberson 
886774d251dSAttilio Rao #ifdef DDB
887774d251dSAttilio Rao /*
888774d251dSAttilio Rao  * Show details about the given radix node.
889774d251dSAttilio Rao  */
890774d251dSAttilio Rao DB_SHOW_COMMAND(radixnode, db_show_radixnode)
891774d251dSAttilio Rao {
8921ddda2ebSJeff Roberson 	struct vm_radix_node *rnode, *tmp;
893774d251dSAttilio Rao 	int i;
894774d251dSAttilio Rao 
895774d251dSAttilio Rao         if (!have_addr)
896774d251dSAttilio Rao                 return;
897774d251dSAttilio Rao 	rnode = (struct vm_radix_node *)addr;
898774d251dSAttilio Rao 	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
899774d251dSAttilio Rao 	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
900774d251dSAttilio Rao 	    rnode->rn_clev);
9011ddda2ebSJeff Roberson 	for (i = 0; i < VM_RADIX_COUNT; i++) {
9021ddda2ebSJeff Roberson 		tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
9031ddda2ebSJeff Roberson 		if (tmp != NULL)
904774d251dSAttilio Rao 			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
9051ddda2ebSJeff Roberson 			    i, (void *)tmp,
9061ddda2ebSJeff Roberson 			    vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
907774d251dSAttilio Rao 			    rnode->rn_clev);
908774d251dSAttilio Rao 	}
9091ddda2ebSJeff Roberson }
910774d251dSAttilio Rao #endif /* DDB */
911