xref: /freebsd/sys/vm/vm_radix.c (revision 774d251d994215eb5c7d78199dc77f6f0c378fc7)
1*774d251dSAttilio Rao /*
2*774d251dSAttilio Rao  * Copyright (c) 2013 EMC Corp.
3*774d251dSAttilio Rao  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4*774d251dSAttilio Rao  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5*774d251dSAttilio Rao  * All rights reserved.
6*774d251dSAttilio Rao  *
7*774d251dSAttilio Rao  * Redistribution and use in source and binary forms, with or without
8*774d251dSAttilio Rao  * modification, are permitted provided that the following conditions
9*774d251dSAttilio Rao  * are met:
10*774d251dSAttilio Rao  * 1. Redistributions of source code must retain the above copyright
11*774d251dSAttilio Rao  *    notice, this list of conditions and the following disclaimer.
12*774d251dSAttilio Rao  * 2. Redistributions in binary form must reproduce the above copyright
13*774d251dSAttilio Rao  *    notice, this list of conditions and the following disclaimer in the
14*774d251dSAttilio Rao  *    documentation and/or other materials provided with the distribution.
15*774d251dSAttilio Rao  *
16*774d251dSAttilio Rao  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17*774d251dSAttilio Rao  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*774d251dSAttilio Rao  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*774d251dSAttilio Rao  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*774d251dSAttilio Rao  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*774d251dSAttilio Rao  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*774d251dSAttilio Rao  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*774d251dSAttilio Rao  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*774d251dSAttilio Rao  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*774d251dSAttilio Rao  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*774d251dSAttilio Rao  * SUCH DAMAGE.
27*774d251dSAttilio Rao  *
28*774d251dSAttilio Rao  */
29*774d251dSAttilio Rao 
30*774d251dSAttilio Rao /*
31*774d251dSAttilio Rao  * Path-compressed radix trie implementation.
32*774d251dSAttilio Rao  * The following code is not generalized into a general purpose library
33*774d251dSAttilio Rao  * because there are way too many parameters embedded that should really
34*774d251dSAttilio Rao  * be decided by the library consumers.  At the same time, consumers
35*774d251dSAttilio Rao  * of this code must achieve highest possible performance.
36*774d251dSAttilio Rao  *
37*774d251dSAttilio Rao  * The implementation takes into account the following rationale:
38*774d251dSAttilio Rao  * - Size of the nodes should be as small as possible but still big enough
39*774d251dSAttilio Rao  *   to avoid a large maximum depth for the trie.  This is a balance
40*774d251dSAttilio Rao  *   between the necessity to not wire too much physical memory for the nodes
41*774d251dSAttilio Rao  *   and the necessity to avoid too much cache pollution during the trie
42*774d251dSAttilio Rao  *   operations.
43*774d251dSAttilio Rao  * - There is not a huge bias toward the number of lookup operations over
44*774d251dSAttilio Rao  *   the number of insert and remove operations.  This basically implies
45*774d251dSAttilio Rao  *   that optimizations supposedly helping one operation but hurting the
46*774d251dSAttilio Rao  *   other might be carefully evaluated.
47*774d251dSAttilio Rao  * - On average not many nodes are expected to be fully populated, hence
48*774d251dSAttilio Rao  *   level compression may just complicate things.
49*774d251dSAttilio Rao  */
50*774d251dSAttilio Rao 
51*774d251dSAttilio Rao #include <sys/cdefs.h>
52*774d251dSAttilio Rao __FBSDID("$FreeBSD$");
53*774d251dSAttilio Rao 
54*774d251dSAttilio Rao #include "opt_ddb.h"
55*774d251dSAttilio Rao 
56*774d251dSAttilio Rao #include <sys/param.h>
57*774d251dSAttilio Rao #include <sys/systm.h>
58*774d251dSAttilio Rao #include <sys/kernel.h>
59*774d251dSAttilio Rao #include <sys/vmmeter.h>
60*774d251dSAttilio Rao 
61*774d251dSAttilio Rao #include <vm/uma.h>
62*774d251dSAttilio Rao #include <vm/vm.h>
63*774d251dSAttilio Rao #include <vm/vm_param.h>
64*774d251dSAttilio Rao #include <vm/vm_page.h>
65*774d251dSAttilio Rao #include <vm/vm_radix.h>
66*774d251dSAttilio Rao 
67*774d251dSAttilio Rao #ifdef DDB
68*774d251dSAttilio Rao #include <ddb/ddb.h>
69*774d251dSAttilio Rao #endif
70*774d251dSAttilio Rao 
71*774d251dSAttilio Rao /*
72*774d251dSAttilio Rao  * These widths should allow the pointers to a node's children to fit within
73*774d251dSAttilio Rao  * a single cache line.  The extra levels from a narrow width should not be
74*774d251dSAttilio Rao  * a problem thanks to path compression.
75*774d251dSAttilio Rao  */
76*774d251dSAttilio Rao #ifdef __LP64__
77*774d251dSAttilio Rao #define	VM_RADIX_WIDTH	4
78*774d251dSAttilio Rao #else
79*774d251dSAttilio Rao #define	VM_RADIX_WIDTH	3
80*774d251dSAttilio Rao #endif
81*774d251dSAttilio Rao 
82*774d251dSAttilio Rao #define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
83*774d251dSAttilio Rao #define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
84*774d251dSAttilio Rao #define	VM_RADIX_LIMIT							\
85*774d251dSAttilio Rao 	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
86*774d251dSAttilio Rao 
87*774d251dSAttilio Rao /* Flag bits stored in node pointers. */
88*774d251dSAttilio Rao #define	VM_RADIX_ISLEAF	0x1
89*774d251dSAttilio Rao #define	VM_RADIX_FLAGS	0x1
90*774d251dSAttilio Rao #define	VM_RADIX_PAD	VM_RADIX_FLAGS
91*774d251dSAttilio Rao 
92*774d251dSAttilio Rao /* Returns one unit associated with specified level. */
93*774d251dSAttilio Rao #define	VM_RADIX_UNITLEVEL(lev)						\
94*774d251dSAttilio Rao 	((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
95*774d251dSAttilio Rao 
96*774d251dSAttilio Rao struct vm_radix_node {
97*774d251dSAttilio Rao 	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
98*774d251dSAttilio Rao 	vm_pindex_t	 rn_owner;			/* Owner of record. */
99*774d251dSAttilio Rao 	uint16_t	 rn_count;			/* Valid children. */
100*774d251dSAttilio Rao 	uint16_t	 rn_clev;			/* Current level. */
101*774d251dSAttilio Rao };
102*774d251dSAttilio Rao 
103*774d251dSAttilio Rao static uma_zone_t vm_radix_node_zone;
104*774d251dSAttilio Rao 
105*774d251dSAttilio Rao /*
106*774d251dSAttilio Rao  * Allocate a radix node.  Pre-allocation should ensure that the request
107*774d251dSAttilio Rao  * will always be satisfied.
108*774d251dSAttilio Rao  */
109*774d251dSAttilio Rao static __inline struct vm_radix_node *
110*774d251dSAttilio Rao vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
111*774d251dSAttilio Rao {
112*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
113*774d251dSAttilio Rao 
114*774d251dSAttilio Rao 	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
115*774d251dSAttilio Rao 
116*774d251dSAttilio Rao 	/*
117*774d251dSAttilio Rao 	 * The required number of nodes should already be pre-allocated
118*774d251dSAttilio Rao 	 * by vm_radix_prealloc().  However, UMA can hold a few nodes
119*774d251dSAttilio Rao 	 * in per-CPU buckets, which will not be accessible by the
120*774d251dSAttilio Rao 	 * current CPU.  Thus, the allocation could return NULL when
121*774d251dSAttilio Rao 	 * the pre-allocated pool is close to exhaustion.  Anyway,
122*774d251dSAttilio Rao 	 * in practice this should never occur because a new node
123*774d251dSAttilio Rao 	 * is not always required for insert.  Thus, the pre-allocated
124*774d251dSAttilio Rao 	 * pool should have some extra pages that prevent this from
125*774d251dSAttilio Rao 	 * becoming a problem.
126*774d251dSAttilio Rao 	 */
127*774d251dSAttilio Rao 	if (rnode == NULL)
128*774d251dSAttilio Rao 		panic("%s: uma_zalloc() returned NULL for a new node",
129*774d251dSAttilio Rao 		    __func__);
130*774d251dSAttilio Rao 	rnode->rn_owner = owner;
131*774d251dSAttilio Rao 	rnode->rn_count = count;
132*774d251dSAttilio Rao 	rnode->rn_clev = clevel;
133*774d251dSAttilio Rao 	return (rnode);
134*774d251dSAttilio Rao }
135*774d251dSAttilio Rao 
136*774d251dSAttilio Rao /*
137*774d251dSAttilio Rao  * Free radix node.
138*774d251dSAttilio Rao  */
139*774d251dSAttilio Rao static __inline void
140*774d251dSAttilio Rao vm_radix_node_put(struct vm_radix_node *rnode)
141*774d251dSAttilio Rao {
142*774d251dSAttilio Rao 
143*774d251dSAttilio Rao 	uma_zfree(vm_radix_node_zone, rnode);
144*774d251dSAttilio Rao }
145*774d251dSAttilio Rao 
146*774d251dSAttilio Rao /*
147*774d251dSAttilio Rao  * Return the position in the array for a given level.
148*774d251dSAttilio Rao  */
149*774d251dSAttilio Rao static __inline int
150*774d251dSAttilio Rao vm_radix_slot(vm_pindex_t index, uint16_t level)
151*774d251dSAttilio Rao {
152*774d251dSAttilio Rao 
153*774d251dSAttilio Rao 	return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
154*774d251dSAttilio Rao 	    VM_RADIX_MASK);
155*774d251dSAttilio Rao }
156*774d251dSAttilio Rao 
157*774d251dSAttilio Rao /* Trims the key after the specified level. */
158*774d251dSAttilio Rao static __inline vm_pindex_t
159*774d251dSAttilio Rao vm_radix_trimkey(vm_pindex_t index, uint16_t level)
160*774d251dSAttilio Rao {
161*774d251dSAttilio Rao 	vm_pindex_t ret;
162*774d251dSAttilio Rao 
163*774d251dSAttilio Rao 	ret = index;
164*774d251dSAttilio Rao 	if (level < VM_RADIX_LIMIT) {
165*774d251dSAttilio Rao 		ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
166*774d251dSAttilio Rao 		ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
167*774d251dSAttilio Rao 	}
168*774d251dSAttilio Rao 	return (ret);
169*774d251dSAttilio Rao }
170*774d251dSAttilio Rao 
171*774d251dSAttilio Rao /*
172*774d251dSAttilio Rao  * Get the root node for a radix tree.
173*774d251dSAttilio Rao  */
174*774d251dSAttilio Rao static __inline struct vm_radix_node *
175*774d251dSAttilio Rao vm_radix_getroot(struct vm_radix *rtree)
176*774d251dSAttilio Rao {
177*774d251dSAttilio Rao 
178*774d251dSAttilio Rao 	return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
179*774d251dSAttilio Rao }
180*774d251dSAttilio Rao 
181*774d251dSAttilio Rao /*
182*774d251dSAttilio Rao  * Set the root node for a radix tree.
183*774d251dSAttilio Rao  */
184*774d251dSAttilio Rao static __inline void
185*774d251dSAttilio Rao vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
186*774d251dSAttilio Rao {
187*774d251dSAttilio Rao 
188*774d251dSAttilio Rao 	rtree->rt_root = (uintptr_t)rnode;
189*774d251dSAttilio Rao }
190*774d251dSAttilio Rao 
191*774d251dSAttilio Rao /*
192*774d251dSAttilio Rao  * Returns the associated page extracted from rnode if available,
193*774d251dSAttilio Rao  * and NULL otherwise.
194*774d251dSAttilio Rao  */
195*774d251dSAttilio Rao static __inline vm_page_t
196*774d251dSAttilio Rao vm_radix_node_page(struct vm_radix_node *rnode)
197*774d251dSAttilio Rao {
198*774d251dSAttilio Rao 
199*774d251dSAttilio Rao 	return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
200*774d251dSAttilio Rao 	    (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
201*774d251dSAttilio Rao }
202*774d251dSAttilio Rao 
203*774d251dSAttilio Rao /*
204*774d251dSAttilio Rao  * Adds the page as a child of the provided node.
205*774d251dSAttilio Rao  */
206*774d251dSAttilio Rao static __inline void
207*774d251dSAttilio Rao vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
208*774d251dSAttilio Rao     vm_page_t page)
209*774d251dSAttilio Rao {
210*774d251dSAttilio Rao 	int slot;
211*774d251dSAttilio Rao 
212*774d251dSAttilio Rao 	slot = vm_radix_slot(index, clev);
213*774d251dSAttilio Rao 	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
214*774d251dSAttilio Rao }
215*774d251dSAttilio Rao 
216*774d251dSAttilio Rao /*
217*774d251dSAttilio Rao  * Returns the slot where two keys differ.
218*774d251dSAttilio Rao  * It cannot accept 2 equal keys.
219*774d251dSAttilio Rao  */
220*774d251dSAttilio Rao static __inline uint16_t
221*774d251dSAttilio Rao vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
222*774d251dSAttilio Rao {
223*774d251dSAttilio Rao 	uint16_t clev;
224*774d251dSAttilio Rao 
225*774d251dSAttilio Rao 	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
226*774d251dSAttilio Rao 	    __func__, (uintmax_t)index1));
227*774d251dSAttilio Rao 
228*774d251dSAttilio Rao 	index1 ^= index2;
229*774d251dSAttilio Rao 	for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
230*774d251dSAttilio Rao 		if (vm_radix_slot(index1, clev))
231*774d251dSAttilio Rao 			return (clev);
232*774d251dSAttilio Rao 	panic("%s: cannot reach this point", __func__);
233*774d251dSAttilio Rao 	return (0);
234*774d251dSAttilio Rao }
235*774d251dSAttilio Rao 
236*774d251dSAttilio Rao /*
237*774d251dSAttilio Rao  * Returns TRUE if it can be determined that key does not belong to the
238*774d251dSAttilio Rao  * specified rnode.  Otherwise, returns FALSE.
239*774d251dSAttilio Rao  */
240*774d251dSAttilio Rao static __inline boolean_t
241*774d251dSAttilio Rao vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
242*774d251dSAttilio Rao {
243*774d251dSAttilio Rao 
244*774d251dSAttilio Rao 	if (rnode->rn_clev > 0) {
245*774d251dSAttilio Rao 		idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
246*774d251dSAttilio Rao 		idx -= rnode->rn_owner;
247*774d251dSAttilio Rao 		if (idx != 0)
248*774d251dSAttilio Rao 			return (TRUE);
249*774d251dSAttilio Rao 	}
250*774d251dSAttilio Rao 	return (FALSE);
251*774d251dSAttilio Rao }
252*774d251dSAttilio Rao 
253*774d251dSAttilio Rao /*
254*774d251dSAttilio Rao  * Adjusts the idx key to the first upper level available, based on a valid
255*774d251dSAttilio Rao  * initial level and map of available levels.
256*774d251dSAttilio Rao  * Returns a value bigger than 0 to signal that there are not valid levels
257*774d251dSAttilio Rao  * available.
258*774d251dSAttilio Rao  */
259*774d251dSAttilio Rao static __inline int
260*774d251dSAttilio Rao vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
261*774d251dSAttilio Rao {
262*774d251dSAttilio Rao 	vm_pindex_t wrapidx;
263*774d251dSAttilio Rao 
264*774d251dSAttilio Rao 	for (; levels[ilev] == FALSE ||
265*774d251dSAttilio Rao 	    vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
266*774d251dSAttilio Rao 		if (ilev == 0)
267*774d251dSAttilio Rao 			break;
268*774d251dSAttilio Rao 	KASSERT(ilev > 0 || levels[0],
269*774d251dSAttilio Rao 	    ("%s: levels back-scanning problem", __func__));
270*774d251dSAttilio Rao 	if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
271*774d251dSAttilio Rao 		return (1);
272*774d251dSAttilio Rao 	wrapidx = *idx;
273*774d251dSAttilio Rao 	*idx = vm_radix_trimkey(*idx, ilev);
274*774d251dSAttilio Rao 	*idx += VM_RADIX_UNITLEVEL(ilev);
275*774d251dSAttilio Rao 	return (*idx < wrapidx);
276*774d251dSAttilio Rao }
277*774d251dSAttilio Rao 
278*774d251dSAttilio Rao /*
279*774d251dSAttilio Rao  * Adjusts the idx key to the first lower level available, based on a valid
280*774d251dSAttilio Rao  * initial level and map of available levels.
281*774d251dSAttilio Rao  * Returns a value bigger than 0 to signal that there are not valid levels
282*774d251dSAttilio Rao  * available.
283*774d251dSAttilio Rao  */
284*774d251dSAttilio Rao static __inline int
285*774d251dSAttilio Rao vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
286*774d251dSAttilio Rao {
287*774d251dSAttilio Rao 	vm_pindex_t wrapidx;
288*774d251dSAttilio Rao 
289*774d251dSAttilio Rao 	for (; levels[ilev] == FALSE ||
290*774d251dSAttilio Rao 	    vm_radix_slot(*idx, ilev) == 0; ilev--)
291*774d251dSAttilio Rao 		if (ilev == 0)
292*774d251dSAttilio Rao 			break;
293*774d251dSAttilio Rao 	KASSERT(ilev > 0 || levels[0],
294*774d251dSAttilio Rao 	    ("%s: levels back-scanning problem", __func__));
295*774d251dSAttilio Rao 	if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
296*774d251dSAttilio Rao 		return (1);
297*774d251dSAttilio Rao 	wrapidx = *idx;
298*774d251dSAttilio Rao 	*idx = vm_radix_trimkey(*idx, ilev);
299*774d251dSAttilio Rao 	*idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
300*774d251dSAttilio Rao 	*idx -= VM_RADIX_UNITLEVEL(ilev);
301*774d251dSAttilio Rao 	return (*idx > wrapidx);
302*774d251dSAttilio Rao }
303*774d251dSAttilio Rao 
304*774d251dSAttilio Rao /*
305*774d251dSAttilio Rao  * Internal helper for vm_radix_reclaim_allnodes().
306*774d251dSAttilio Rao  * This function is recursive.
307*774d251dSAttilio Rao  */
308*774d251dSAttilio Rao static void
309*774d251dSAttilio Rao vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
310*774d251dSAttilio Rao {
311*774d251dSAttilio Rao 	int slot;
312*774d251dSAttilio Rao 
313*774d251dSAttilio Rao 	for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
314*774d251dSAttilio Rao 		if (rnode->rn_child[slot] == NULL)
315*774d251dSAttilio Rao 			continue;
316*774d251dSAttilio Rao 		if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
317*774d251dSAttilio Rao 			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
318*774d251dSAttilio Rao 		rnode->rn_child[slot] = NULL;
319*774d251dSAttilio Rao 		rnode->rn_count--;
320*774d251dSAttilio Rao 	}
321*774d251dSAttilio Rao 	vm_radix_node_put(rnode);
322*774d251dSAttilio Rao }
323*774d251dSAttilio Rao 
324*774d251dSAttilio Rao #ifdef INVARIANTS
325*774d251dSAttilio Rao /*
326*774d251dSAttilio Rao  * Radix node zone destructor.
327*774d251dSAttilio Rao  */
328*774d251dSAttilio Rao static void
329*774d251dSAttilio Rao vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
330*774d251dSAttilio Rao {
331*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
332*774d251dSAttilio Rao 	int slot;
333*774d251dSAttilio Rao 
334*774d251dSAttilio Rao 	rnode = mem;
335*774d251dSAttilio Rao 	KASSERT(rnode->rn_count == 0,
336*774d251dSAttilio Rao 	    ("vm_radix_node_put: rnode %p has %d children", rnode,
337*774d251dSAttilio Rao 	    rnode->rn_count));
338*774d251dSAttilio Rao 	for (slot = 0; slot < VM_RADIX_COUNT; slot++)
339*774d251dSAttilio Rao 		KASSERT(rnode->rn_child[slot] == NULL,
340*774d251dSAttilio Rao 		    ("vm_radix_node_put: rnode %p has a child", rnode));
341*774d251dSAttilio Rao }
342*774d251dSAttilio Rao #endif
343*774d251dSAttilio Rao 
344*774d251dSAttilio Rao /*
345*774d251dSAttilio Rao  * Radix node zone initializer.
346*774d251dSAttilio Rao  */
347*774d251dSAttilio Rao static int
348*774d251dSAttilio Rao vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
349*774d251dSAttilio Rao {
350*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
351*774d251dSAttilio Rao 
352*774d251dSAttilio Rao 	rnode = mem;
353*774d251dSAttilio Rao 	memset(rnode->rn_child, 0, sizeof(rnode->rn_child));
354*774d251dSAttilio Rao 	return (0);
355*774d251dSAttilio Rao }
356*774d251dSAttilio Rao 
357*774d251dSAttilio Rao /*
358*774d251dSAttilio Rao  * Pre-allocate intermediate nodes from the UMA slab zone.
359*774d251dSAttilio Rao  */
360*774d251dSAttilio Rao static void
361*774d251dSAttilio Rao vm_radix_prealloc(void *arg __unused)
362*774d251dSAttilio Rao {
363*774d251dSAttilio Rao 
364*774d251dSAttilio Rao 	if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
365*774d251dSAttilio Rao 		panic("%s: unable to create new zone", __func__);
366*774d251dSAttilio Rao 	uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
367*774d251dSAttilio Rao }
368*774d251dSAttilio Rao SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
369*774d251dSAttilio Rao     NULL);
370*774d251dSAttilio Rao 
371*774d251dSAttilio Rao /*
372*774d251dSAttilio Rao  * Initialize the UMA slab zone.
373*774d251dSAttilio Rao  * Until vm_radix_prealloc() is called, the zone will be served by the
374*774d251dSAttilio Rao  * UMA boot-time pre-allocated pool of pages.
375*774d251dSAttilio Rao  */
376*774d251dSAttilio Rao void
377*774d251dSAttilio Rao vm_radix_init(void)
378*774d251dSAttilio Rao {
379*774d251dSAttilio Rao 
380*774d251dSAttilio Rao 	vm_radix_node_zone = uma_zcreate("RADIX NODE",
381*774d251dSAttilio Rao 	    sizeof(struct vm_radix_node), NULL,
382*774d251dSAttilio Rao #ifdef INVARIANTS
383*774d251dSAttilio Rao 	    vm_radix_node_zone_dtor,
384*774d251dSAttilio Rao #else
385*774d251dSAttilio Rao 	    NULL,
386*774d251dSAttilio Rao #endif
387*774d251dSAttilio Rao 	    vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM |
388*774d251dSAttilio Rao 	    UMA_ZONE_NOFREE);
389*774d251dSAttilio Rao }
390*774d251dSAttilio Rao 
391*774d251dSAttilio Rao /*
392*774d251dSAttilio Rao  * Inserts the key-value pair into the trie.
393*774d251dSAttilio Rao  * Panics if the key already exists.
394*774d251dSAttilio Rao  */
395*774d251dSAttilio Rao void
396*774d251dSAttilio Rao vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
397*774d251dSAttilio Rao {
398*774d251dSAttilio Rao 	vm_pindex_t index, newind;
399*774d251dSAttilio Rao 	struct vm_radix_node *rnode, *tmp, *tmp2;
400*774d251dSAttilio Rao 	vm_page_t m;
401*774d251dSAttilio Rao 	int slot;
402*774d251dSAttilio Rao 	uint16_t clev;
403*774d251dSAttilio Rao 
404*774d251dSAttilio Rao 	index = page->pindex;
405*774d251dSAttilio Rao 
406*774d251dSAttilio Rao 	/*
407*774d251dSAttilio Rao 	 * The owner of record for root is not really important because it
408*774d251dSAttilio Rao 	 * will never be used.
409*774d251dSAttilio Rao 	 */
410*774d251dSAttilio Rao 	rnode = vm_radix_getroot(rtree);
411*774d251dSAttilio Rao 	if (rnode == NULL) {
412*774d251dSAttilio Rao 		rnode = vm_radix_node_get(0, 1, 0);
413*774d251dSAttilio Rao 		vm_radix_setroot(rtree, rnode);
414*774d251dSAttilio Rao 		vm_radix_addpage(rnode, index, 0, page);
415*774d251dSAttilio Rao 		return;
416*774d251dSAttilio Rao 	}
417*774d251dSAttilio Rao 	while (rnode != NULL) {
418*774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index))
419*774d251dSAttilio Rao 			break;
420*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
421*774d251dSAttilio Rao 		m = vm_radix_node_page(rnode->rn_child[slot]);
422*774d251dSAttilio Rao 		if (m != NULL) {
423*774d251dSAttilio Rao 			if (m->pindex == index)
424*774d251dSAttilio Rao 				panic("%s: key %jx is already present",
425*774d251dSAttilio Rao 				    __func__, (uintmax_t)index);
426*774d251dSAttilio Rao 			clev = vm_radix_keydiff(m->pindex, index);
427*774d251dSAttilio Rao 			tmp = vm_radix_node_get(vm_radix_trimkey(index,
428*774d251dSAttilio Rao 			    clev - 1), 2, clev);
429*774d251dSAttilio Rao 			rnode->rn_child[slot] = tmp;
430*774d251dSAttilio Rao 			vm_radix_addpage(tmp, index, clev, page);
431*774d251dSAttilio Rao 			vm_radix_addpage(tmp, m->pindex, clev, m);
432*774d251dSAttilio Rao 			return;
433*774d251dSAttilio Rao 		}
434*774d251dSAttilio Rao 		if (rnode->rn_child[slot] == NULL) {
435*774d251dSAttilio Rao 			rnode->rn_count++;
436*774d251dSAttilio Rao 			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
437*774d251dSAttilio Rao 			return;
438*774d251dSAttilio Rao 		}
439*774d251dSAttilio Rao 		rnode = rnode->rn_child[slot];
440*774d251dSAttilio Rao 	}
441*774d251dSAttilio Rao 	if (rnode == NULL)
442*774d251dSAttilio Rao 		panic("%s: path traversal ended unexpectedly", __func__);
443*774d251dSAttilio Rao 
444*774d251dSAttilio Rao 	/*
445*774d251dSAttilio Rao 	 * Scan the trie from the top and find the parent to insert
446*774d251dSAttilio Rao 	 * the new object.
447*774d251dSAttilio Rao 	 */
448*774d251dSAttilio Rao 	newind = rnode->rn_owner;
449*774d251dSAttilio Rao 	clev = vm_radix_keydiff(newind, index);
450*774d251dSAttilio Rao 	slot = VM_RADIX_COUNT;
451*774d251dSAttilio Rao 	for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
452*774d251dSAttilio Rao 		KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
453*774d251dSAttilio Rao 		    __func__));
454*774d251dSAttilio Rao 		KASSERT(clev >= rnode->rn_clev,
455*774d251dSAttilio Rao 		    ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
456*774d251dSAttilio Rao 		    __func__, clev, rnode->rn_clev));
457*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
458*774d251dSAttilio Rao 		tmp = rnode->rn_child[slot];
459*774d251dSAttilio Rao 		KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
460*774d251dSAttilio Rao 		    ("%s: unexpected lookup interruption", __func__));
461*774d251dSAttilio Rao 		if (tmp->rn_clev > clev)
462*774d251dSAttilio Rao 			break;
463*774d251dSAttilio Rao 	}
464*774d251dSAttilio Rao 	KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
465*774d251dSAttilio Rao 	    ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
466*774d251dSAttilio Rao 	    __func__, (void *)rnode, (void *)tmp, slot));
467*774d251dSAttilio Rao 
468*774d251dSAttilio Rao 	/*
469*774d251dSAttilio Rao 	 * A new node is needed because the right insertion level is reached.
470*774d251dSAttilio Rao 	 * Setup the new intermediate node and add the 2 children: the
471*774d251dSAttilio Rao 	 * new object and the older edge.
472*774d251dSAttilio Rao 	 */
473*774d251dSAttilio Rao 	tmp2 = vm_radix_node_get(vm_radix_trimkey(index, clev - 1), 2,
474*774d251dSAttilio Rao 	    clev);
475*774d251dSAttilio Rao 	rnode->rn_child[slot] = tmp2;
476*774d251dSAttilio Rao 	vm_radix_addpage(tmp2, index, clev, page);
477*774d251dSAttilio Rao 	slot = vm_radix_slot(newind, clev);
478*774d251dSAttilio Rao 	tmp2->rn_child[slot] = tmp;
479*774d251dSAttilio Rao }
480*774d251dSAttilio Rao 
481*774d251dSAttilio Rao /*
482*774d251dSAttilio Rao  * Returns the value stored at the index.  If the index is not present,
483*774d251dSAttilio Rao  * NULL is returned.
484*774d251dSAttilio Rao  */
485*774d251dSAttilio Rao vm_page_t
486*774d251dSAttilio Rao vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
487*774d251dSAttilio Rao {
488*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
489*774d251dSAttilio Rao 	vm_page_t m;
490*774d251dSAttilio Rao 	int slot;
491*774d251dSAttilio Rao 
492*774d251dSAttilio Rao 	rnode = vm_radix_getroot(rtree);
493*774d251dSAttilio Rao 	while (rnode != NULL) {
494*774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index))
495*774d251dSAttilio Rao 			return (NULL);
496*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
497*774d251dSAttilio Rao 		rnode = rnode->rn_child[slot];
498*774d251dSAttilio Rao 		m = vm_radix_node_page(rnode);
499*774d251dSAttilio Rao 		if (m != NULL) {
500*774d251dSAttilio Rao 			if (m->pindex == index)
501*774d251dSAttilio Rao 				return (m);
502*774d251dSAttilio Rao 			else
503*774d251dSAttilio Rao 				return (NULL);
504*774d251dSAttilio Rao 		}
505*774d251dSAttilio Rao 	}
506*774d251dSAttilio Rao 	return (NULL);
507*774d251dSAttilio Rao }
508*774d251dSAttilio Rao 
509*774d251dSAttilio Rao /*
510*774d251dSAttilio Rao  * Look up the nearest entry at a position bigger than or equal to index.
511*774d251dSAttilio Rao  */
512*774d251dSAttilio Rao vm_page_t
513*774d251dSAttilio Rao vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
514*774d251dSAttilio Rao {
515*774d251dSAttilio Rao 	vm_pindex_t inc;
516*774d251dSAttilio Rao 	vm_page_t m;
517*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
518*774d251dSAttilio Rao 	int slot;
519*774d251dSAttilio Rao 	uint16_t difflev;
520*774d251dSAttilio Rao 	boolean_t maplevels[VM_RADIX_LIMIT + 1];
521*774d251dSAttilio Rao #ifdef INVARIANTS
522*774d251dSAttilio Rao 	int loops = 0;
523*774d251dSAttilio Rao #endif
524*774d251dSAttilio Rao 
525*774d251dSAttilio Rao restart:
526*774d251dSAttilio Rao 	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
527*774d251dSAttilio Rao 	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
528*774d251dSAttilio Rao 		maplevels[difflev] = FALSE;
529*774d251dSAttilio Rao 	rnode = vm_radix_getroot(rtree);
530*774d251dSAttilio Rao 	while (rnode != NULL) {
531*774d251dSAttilio Rao 		maplevels[rnode->rn_clev] = TRUE;
532*774d251dSAttilio Rao 
533*774d251dSAttilio Rao 		/*
534*774d251dSAttilio Rao 		 * If the keys differ before the current bisection node
535*774d251dSAttilio Rao 		 * the search key might rollback to the earliest
536*774d251dSAttilio Rao 		 * available bisection node, or to the smaller value
537*774d251dSAttilio Rao 		 * in the current domain (if the owner is bigger than the
538*774d251dSAttilio Rao 		 * search key).
539*774d251dSAttilio Rao 		 * The maplevels array records any node has been seen
540*774d251dSAttilio Rao 		 * at a given level.  This aids the search for a valid
541*774d251dSAttilio Rao 		 * bisection node.
542*774d251dSAttilio Rao 		 */
543*774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index)) {
544*774d251dSAttilio Rao 			difflev = vm_radix_keydiff(index, rnode->rn_owner);
545*774d251dSAttilio Rao 			if (index > rnode->rn_owner) {
546*774d251dSAttilio Rao 				if (vm_radix_addlev(&index, maplevels,
547*774d251dSAttilio Rao 				    difflev) > 0)
548*774d251dSAttilio Rao 					break;
549*774d251dSAttilio Rao 			} else
550*774d251dSAttilio Rao 				index = vm_radix_trimkey(rnode->rn_owner,
551*774d251dSAttilio Rao 				    difflev);
552*774d251dSAttilio Rao 			goto restart;
553*774d251dSAttilio Rao 		}
554*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
555*774d251dSAttilio Rao 		m = vm_radix_node_page(rnode->rn_child[slot]);
556*774d251dSAttilio Rao 		if (m != NULL && m->pindex >= index)
557*774d251dSAttilio Rao 			return (m);
558*774d251dSAttilio Rao 		if (rnode->rn_child[slot] != NULL && m == NULL) {
559*774d251dSAttilio Rao 			rnode = rnode->rn_child[slot];
560*774d251dSAttilio Rao 			continue;
561*774d251dSAttilio Rao 		}
562*774d251dSAttilio Rao 
563*774d251dSAttilio Rao 		/*
564*774d251dSAttilio Rao 		 * Look for an available edge or page within the current
565*774d251dSAttilio Rao 		 * bisection node.
566*774d251dSAttilio Rao 		 */
567*774d251dSAttilio Rao                 if (slot < (VM_RADIX_COUNT - 1)) {
568*774d251dSAttilio Rao 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
569*774d251dSAttilio Rao 			index = vm_radix_trimkey(index, rnode->rn_clev);
570*774d251dSAttilio Rao 			index += inc;
571*774d251dSAttilio Rao 			slot++;
572*774d251dSAttilio Rao 			for (;; index += inc, slot++) {
573*774d251dSAttilio Rao 				m = vm_radix_node_page(rnode->rn_child[slot]);
574*774d251dSAttilio Rao 				if (m != NULL && m->pindex >= index)
575*774d251dSAttilio Rao 					return (m);
576*774d251dSAttilio Rao 				if ((rnode->rn_child[slot] != NULL &&
577*774d251dSAttilio Rao 				    m == NULL) || slot == (VM_RADIX_COUNT - 1))
578*774d251dSAttilio Rao 					break;
579*774d251dSAttilio Rao 			}
580*774d251dSAttilio Rao 		}
581*774d251dSAttilio Rao 
582*774d251dSAttilio Rao 		/*
583*774d251dSAttilio Rao 		 * If a valid page or edge bigger than the search slot is
584*774d251dSAttilio Rao 		 * found in the traversal, skip to the next higher-level key.
585*774d251dSAttilio Rao 		 */
586*774d251dSAttilio Rao 		if (slot == (VM_RADIX_COUNT - 1) &&
587*774d251dSAttilio Rao 		    (rnode->rn_child[slot] == NULL || m != NULL)) {
588*774d251dSAttilio Rao 			if (rnode->rn_clev == 0  || vm_radix_addlev(&index,
589*774d251dSAttilio Rao 			    maplevels, rnode->rn_clev - 1) > 0)
590*774d251dSAttilio Rao 				break;
591*774d251dSAttilio Rao 			goto restart;
592*774d251dSAttilio Rao 		}
593*774d251dSAttilio Rao 		rnode = rnode->rn_child[slot];
594*774d251dSAttilio Rao 	}
595*774d251dSAttilio Rao 	return (NULL);
596*774d251dSAttilio Rao }
597*774d251dSAttilio Rao 
598*774d251dSAttilio Rao /*
599*774d251dSAttilio Rao  * Look up the nearest entry at a position less than or equal to index.
600*774d251dSAttilio Rao  */
601*774d251dSAttilio Rao vm_page_t
602*774d251dSAttilio Rao vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
603*774d251dSAttilio Rao {
604*774d251dSAttilio Rao 	vm_pindex_t inc;
605*774d251dSAttilio Rao 	vm_page_t m;
606*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
607*774d251dSAttilio Rao 	int slot;
608*774d251dSAttilio Rao 	uint16_t difflev;
609*774d251dSAttilio Rao 	boolean_t maplevels[VM_RADIX_LIMIT + 1];
610*774d251dSAttilio Rao #ifdef INVARIANTS
611*774d251dSAttilio Rao 	int loops = 0;
612*774d251dSAttilio Rao #endif
613*774d251dSAttilio Rao 
614*774d251dSAttilio Rao restart:
615*774d251dSAttilio Rao 	KASSERT(++loops < 1000, ("%s: too many loops", __func__));
616*774d251dSAttilio Rao 	for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
617*774d251dSAttilio Rao 		maplevels[difflev] = FALSE;
618*774d251dSAttilio Rao 	rnode = vm_radix_getroot(rtree);
619*774d251dSAttilio Rao 	while (rnode != NULL) {
620*774d251dSAttilio Rao 		maplevels[rnode->rn_clev] = TRUE;
621*774d251dSAttilio Rao 
622*774d251dSAttilio Rao 		/*
623*774d251dSAttilio Rao 		 * If the keys differ before the current bisection node
624*774d251dSAttilio Rao 		 * the search key might rollback to the earliest
625*774d251dSAttilio Rao 		 * available bisection node, or to the higher value
626*774d251dSAttilio Rao 		 * in the current domain (if the owner is smaller than the
627*774d251dSAttilio Rao 		 * search key).
628*774d251dSAttilio Rao 		 * The maplevels array records any node has been seen
629*774d251dSAttilio Rao 		 * at a given level.  This aids the search for a valid
630*774d251dSAttilio Rao 		 * bisection node.
631*774d251dSAttilio Rao 		 */
632*774d251dSAttilio Rao 		if (vm_radix_keybarr(rnode, index)) {
633*774d251dSAttilio Rao 			difflev = vm_radix_keydiff(index, rnode->rn_owner);
634*774d251dSAttilio Rao 			if (index > rnode->rn_owner) {
635*774d251dSAttilio Rao 				index = vm_radix_trimkey(rnode->rn_owner,
636*774d251dSAttilio Rao 				    difflev);
637*774d251dSAttilio Rao 				index |= VM_RADIX_UNITLEVEL(difflev) - 1;
638*774d251dSAttilio Rao 			} else if (vm_radix_declev(&index, maplevels,
639*774d251dSAttilio Rao 			    difflev) > 0)
640*774d251dSAttilio Rao 				break;
641*774d251dSAttilio Rao 			goto restart;
642*774d251dSAttilio Rao 		}
643*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
644*774d251dSAttilio Rao 		m = vm_radix_node_page(rnode->rn_child[slot]);
645*774d251dSAttilio Rao 		if (m != NULL && m->pindex <= index)
646*774d251dSAttilio Rao 			return (m);
647*774d251dSAttilio Rao 		if (rnode->rn_child[slot] != NULL && m == NULL) {
648*774d251dSAttilio Rao 			rnode = rnode->rn_child[slot];
649*774d251dSAttilio Rao 			continue;
650*774d251dSAttilio Rao 		}
651*774d251dSAttilio Rao 
652*774d251dSAttilio Rao 		/*
653*774d251dSAttilio Rao 		 * Look for an available edge or page within the current
654*774d251dSAttilio Rao 		 * bisection node.
655*774d251dSAttilio Rao 		 */
656*774d251dSAttilio Rao 		if (slot > 0) {
657*774d251dSAttilio Rao 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
658*774d251dSAttilio Rao 			index = vm_radix_trimkey(index, rnode->rn_clev);
659*774d251dSAttilio Rao 			index |= inc - 1;
660*774d251dSAttilio Rao 			index -= inc;
661*774d251dSAttilio Rao 			slot--;
662*774d251dSAttilio Rao 			for (;; index -= inc, slot--) {
663*774d251dSAttilio Rao 				m = vm_radix_node_page(rnode->rn_child[slot]);
664*774d251dSAttilio Rao 				if (m != NULL && m->pindex <= index)
665*774d251dSAttilio Rao 					return (m);
666*774d251dSAttilio Rao 				if ((rnode->rn_child[slot] != NULL &&
667*774d251dSAttilio Rao 				    m == NULL) || slot == 0)
668*774d251dSAttilio Rao 					break;
669*774d251dSAttilio Rao 			}
670*774d251dSAttilio Rao 		}
671*774d251dSAttilio Rao 
672*774d251dSAttilio Rao 		/*
673*774d251dSAttilio Rao 		 * If a valid page or edge smaller than the search slot is
674*774d251dSAttilio Rao 		 * found in the traversal, skip to the next higher-level key.
675*774d251dSAttilio Rao 		 */
676*774d251dSAttilio Rao 		if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
677*774d251dSAttilio Rao 			if (rnode->rn_clev == 0 || vm_radix_declev(&index,
678*774d251dSAttilio Rao 			    maplevels, rnode->rn_clev - 1) > 0)
679*774d251dSAttilio Rao 				break;
680*774d251dSAttilio Rao 			goto restart;
681*774d251dSAttilio Rao 		}
682*774d251dSAttilio Rao 		rnode = rnode->rn_child[slot];
683*774d251dSAttilio Rao 	}
684*774d251dSAttilio Rao 	return (NULL);
685*774d251dSAttilio Rao }
686*774d251dSAttilio Rao 
687*774d251dSAttilio Rao /*
688*774d251dSAttilio Rao  * Remove the specified index from the tree.
689*774d251dSAttilio Rao  * Panics if the key is not present.
690*774d251dSAttilio Rao  */
691*774d251dSAttilio Rao void
692*774d251dSAttilio Rao vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
693*774d251dSAttilio Rao {
694*774d251dSAttilio Rao 	struct vm_radix_node *rnode, *parent;
695*774d251dSAttilio Rao 	vm_page_t m;
696*774d251dSAttilio Rao 	int i, slot;
697*774d251dSAttilio Rao 
698*774d251dSAttilio Rao 	parent = NULL;
699*774d251dSAttilio Rao 	rnode = vm_radix_getroot(rtree);
700*774d251dSAttilio Rao 	for (;;) {
701*774d251dSAttilio Rao 		if (rnode == NULL)
702*774d251dSAttilio Rao 			panic("vm_radix_remove: impossible to locate the key");
703*774d251dSAttilio Rao 		slot = vm_radix_slot(index, rnode->rn_clev);
704*774d251dSAttilio Rao 		m = vm_radix_node_page(rnode->rn_child[slot]);
705*774d251dSAttilio Rao 		if (m != NULL && m->pindex == index) {
706*774d251dSAttilio Rao 			rnode->rn_child[slot] = NULL;
707*774d251dSAttilio Rao 			rnode->rn_count--;
708*774d251dSAttilio Rao 			if (rnode->rn_count > 1)
709*774d251dSAttilio Rao 				break;
710*774d251dSAttilio Rao 			if (parent == NULL) {
711*774d251dSAttilio Rao 				if (rnode->rn_count == 0) {
712*774d251dSAttilio Rao 					vm_radix_node_put(rnode);
713*774d251dSAttilio Rao 					vm_radix_setroot(rtree, NULL);
714*774d251dSAttilio Rao 				}
715*774d251dSAttilio Rao 				break;
716*774d251dSAttilio Rao 			}
717*774d251dSAttilio Rao 			for (i = 0; i < VM_RADIX_COUNT; i++)
718*774d251dSAttilio Rao 				if (rnode->rn_child[i] != NULL)
719*774d251dSAttilio Rao 					break;
720*774d251dSAttilio Rao 			KASSERT(i != VM_RADIX_COUNT,
721*774d251dSAttilio Rao 			    ("%s: invalid node configuration", __func__));
722*774d251dSAttilio Rao 			slot = vm_radix_slot(index, parent->rn_clev);
723*774d251dSAttilio Rao 			KASSERT(parent->rn_child[slot] == rnode,
724*774d251dSAttilio Rao 			    ("%s: invalid child value", __func__));
725*774d251dSAttilio Rao 			parent->rn_child[slot] = rnode->rn_child[i];
726*774d251dSAttilio Rao 			rnode->rn_count--;
727*774d251dSAttilio Rao 			rnode->rn_child[i] = NULL;
728*774d251dSAttilio Rao 			vm_radix_node_put(rnode);
729*774d251dSAttilio Rao 			break;
730*774d251dSAttilio Rao 		}
731*774d251dSAttilio Rao 		if (m != NULL && m->pindex != index)
732*774d251dSAttilio Rao 			panic("%s: invalid key found", __func__);
733*774d251dSAttilio Rao 		parent = rnode;
734*774d251dSAttilio Rao 		rnode = rnode->rn_child[slot];
735*774d251dSAttilio Rao 	}
736*774d251dSAttilio Rao }
737*774d251dSAttilio Rao 
738*774d251dSAttilio Rao /*
739*774d251dSAttilio Rao  * Remove and free all the nodes from the radix tree.
740*774d251dSAttilio Rao  * This function is recursive but there is a tight control on it as the
741*774d251dSAttilio Rao  * maximum depth of the tree is fixed.
742*774d251dSAttilio Rao  */
743*774d251dSAttilio Rao void
744*774d251dSAttilio Rao vm_radix_reclaim_allnodes(struct vm_radix *rtree)
745*774d251dSAttilio Rao {
746*774d251dSAttilio Rao 	struct vm_radix_node *root;
747*774d251dSAttilio Rao 
748*774d251dSAttilio Rao 	root = vm_radix_getroot(rtree);
749*774d251dSAttilio Rao 	if (root == NULL)
750*774d251dSAttilio Rao 		return;
751*774d251dSAttilio Rao 	vm_radix_reclaim_allnodes_int(root);
752*774d251dSAttilio Rao 	vm_radix_setroot(rtree, NULL);
753*774d251dSAttilio Rao }
754*774d251dSAttilio Rao 
755*774d251dSAttilio Rao #ifdef DDB
756*774d251dSAttilio Rao /*
757*774d251dSAttilio Rao  * Show details about the given radix node.
758*774d251dSAttilio Rao  */
759*774d251dSAttilio Rao DB_SHOW_COMMAND(radixnode, db_show_radixnode)
760*774d251dSAttilio Rao {
761*774d251dSAttilio Rao 	struct vm_radix_node *rnode;
762*774d251dSAttilio Rao 	int i;
763*774d251dSAttilio Rao 
764*774d251dSAttilio Rao         if (!have_addr)
765*774d251dSAttilio Rao                 return;
766*774d251dSAttilio Rao 	rnode = (struct vm_radix_node *)addr;
767*774d251dSAttilio Rao 	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
768*774d251dSAttilio Rao 	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
769*774d251dSAttilio Rao 	    rnode->rn_clev);
770*774d251dSAttilio Rao 	for (i = 0; i < VM_RADIX_COUNT; i++)
771*774d251dSAttilio Rao 		if (rnode->rn_child[i] != NULL)
772*774d251dSAttilio Rao 			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
773*774d251dSAttilio Rao 			    i, (void *)rnode->rn_child[i],
774*774d251dSAttilio Rao 			    (void *)vm_radix_node_page(rnode->rn_child[i]),
775*774d251dSAttilio Rao 			    rnode->rn_clev);
776*774d251dSAttilio Rao }
777*774d251dSAttilio Rao #endif /* DDB */
778