xref: /freebsd/sys/vm/vm_radix.c (revision c3755aa30cbddc30cbdc26707aac2606e9cd6ec5)
1 /*
2  * Copyright (c) 2013 EMC Corp.
3  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
4  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 /*
31  * Path-compressed radix trie implementation.
32  * The following code is not generalized into a general purpose library
33  * because there are way too many parameters embedded that should really
34  * be decided by the library consumers.  At the same time, consumers
35  * of this code must achieve highest possible performance.
36  *
37  * The implementation takes into account the following rationale:
38  * - Size of the nodes should be as small as possible but still big enough
39  *   to avoid a large maximum depth for the trie.  This is a balance
40  *   between the necessity to not wire too much physical memory for the nodes
41  *   and the necessity to avoid too much cache pollution during the trie
42  *   operations.
43  * - There is not a huge bias toward the number of lookup operations over
44  *   the number of insert and remove operations.  This basically implies
45  *   that optimizations supposedly helping one operation but hurting the
46  *   other might be carefully evaluated.
47  * - On average not many nodes are expected to be fully populated, hence
48  *   level compression may just complicate things.
49  */
50 
51 #include <sys/cdefs.h>
52 __FBSDID("$FreeBSD$");
53 
54 #include "opt_ddb.h"
55 
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/vmmeter.h>
60 
61 #include <vm/uma.h>
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_radix.h>
66 
67 #ifdef DDB
68 #include <ddb/ddb.h>
69 #endif
70 
71 /*
72  * These widths should allow the pointers to a node's children to fit within
73  * a single cache line.  The extra levels from a narrow width should not be
74  * a problem thanks to path compression.
75  */
76 #ifdef __LP64__
77 #define	VM_RADIX_WIDTH	4
78 #else
79 #define	VM_RADIX_WIDTH	3
80 #endif
81 
82 #define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
83 #define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
84 #define	VM_RADIX_LIMIT							\
85 	(howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
86 
87 /* Flag bits stored in node pointers. */
88 #define	VM_RADIX_ISLEAF	0x1
89 #define	VM_RADIX_FLAGS	0x1
90 #define	VM_RADIX_PAD	VM_RADIX_FLAGS
91 
92 /* Returns one unit associated with specified level. */
93 #define	VM_RADIX_UNITLEVEL(lev)						\
94 	((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
95 
96 struct vm_radix_node {
97 	vm_pindex_t	 rn_owner;			/* Owner of record. */
98 	uint16_t	 rn_count;			/* Valid children. */
99 	uint16_t	 rn_clev;			/* Current level. */
100 	void		*rn_child[VM_RADIX_COUNT];	/* Child nodes. */
101 };
102 
103 static uma_zone_t vm_radix_node_zone;
104 
105 /*
106  * Allocate a radix node.  Pre-allocation should ensure that the request
107  * will always be satisfied.
108  */
109 static __inline struct vm_radix_node *
110 vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
111 {
112 	struct vm_radix_node *rnode;
113 
114 	rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
115 
116 	/*
117 	 * The required number of nodes should already be pre-allocated
118 	 * by vm_radix_prealloc().  However, UMA can hold a few nodes
119 	 * in per-CPU buckets, which will not be accessible by the
120 	 * current CPU.  Thus, the allocation could return NULL when
121 	 * the pre-allocated pool is close to exhaustion.  Anyway,
122 	 * in practice this should never occur because a new node
123 	 * is not always required for insert.  Thus, the pre-allocated
124 	 * pool should have some extra pages that prevent this from
125 	 * becoming a problem.
126 	 */
127 	if (rnode == NULL)
128 		panic("%s: uma_zalloc() returned NULL for a new node",
129 		    __func__);
130 	rnode->rn_owner = owner;
131 	rnode->rn_count = count;
132 	rnode->rn_clev = clevel;
133 	return (rnode);
134 }
135 
136 /*
137  * Free radix node.
138  */
139 static __inline void
140 vm_radix_node_put(struct vm_radix_node *rnode)
141 {
142 
143 	uma_zfree(vm_radix_node_zone, rnode);
144 }
145 
146 /*
147  * Return the position in the array for a given level.
148  */
149 static __inline int
150 vm_radix_slot(vm_pindex_t index, uint16_t level)
151 {
152 
153 	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
154 }
155 
156 /* Trims the key after the specified level. */
157 static __inline vm_pindex_t
158 vm_radix_trimkey(vm_pindex_t index, uint16_t level)
159 {
160 	vm_pindex_t ret;
161 
162 	ret = index;
163 	if (level > 0) {
164 		ret >>= level * VM_RADIX_WIDTH;
165 		ret <<= level * VM_RADIX_WIDTH;
166 	}
167 	return (ret);
168 }
169 
170 /*
171  * Get the root node for a radix tree.
172  */
173 static __inline struct vm_radix_node *
174 vm_radix_getroot(struct vm_radix *rtree)
175 {
176 
177 	return ((struct vm_radix_node *)rtree->rt_root);
178 }
179 
180 /*
181  * Set the root node for a radix tree.
182  */
183 static __inline void
184 vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
185 {
186 
187 	rtree->rt_root = (uintptr_t)rnode;
188 }
189 
190 /*
191  * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
192  */
193 static __inline boolean_t
194 vm_radix_isleaf(struct vm_radix_node *rnode)
195 {
196 
197 	return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
198 }
199 
200 /*
201  * Returns the associated page extracted from rnode.
202  */
203 static __inline vm_page_t
204 vm_radix_topage(struct vm_radix_node *rnode)
205 {
206 
207 	return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
208 }
209 
210 /*
211  * Adds the page as a child of the provided node.
212  */
213 static __inline void
214 vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
215     vm_page_t page)
216 {
217 	int slot;
218 
219 	slot = vm_radix_slot(index, clev);
220 	rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
221 }
222 
223 /*
224  * Returns the slot where two keys differ.
225  * It cannot accept 2 equal keys.
226  */
227 static __inline uint16_t
228 vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
229 {
230 	uint16_t clev;
231 
232 	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
233 	    __func__, (uintmax_t)index1));
234 
235 	index1 ^= index2;
236 	for (clev = VM_RADIX_LIMIT;; clev--)
237 		if (vm_radix_slot(index1, clev) != 0)
238 			return (clev);
239 }
240 
241 /*
242  * Returns TRUE if it can be determined that key does not belong to the
243  * specified rnode.  Otherwise, returns FALSE.
244  */
245 static __inline boolean_t
246 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
247 {
248 
249 	if (rnode->rn_clev < VM_RADIX_LIMIT) {
250 		idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
251 		return (idx != rnode->rn_owner);
252 	}
253 	return (FALSE);
254 }
255 
256 /*
257  * Internal helper for vm_radix_reclaim_allnodes().
258  * This function is recursive.
259  */
260 static void
261 vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
262 {
263 	int slot;
264 
265 	KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
266 	    ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
267 	for (slot = 0; rnode->rn_count != 0; slot++) {
268 		if (rnode->rn_child[slot] == NULL)
269 			continue;
270 		if (!vm_radix_isleaf(rnode->rn_child[slot]))
271 			vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
272 		rnode->rn_child[slot] = NULL;
273 		rnode->rn_count--;
274 	}
275 	vm_radix_node_put(rnode);
276 }
277 
278 #ifdef INVARIANTS
279 /*
280  * Radix node zone destructor.
281  */
282 static void
283 vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
284 {
285 	struct vm_radix_node *rnode;
286 	int slot;
287 
288 	rnode = mem;
289 	KASSERT(rnode->rn_count == 0,
290 	    ("vm_radix_node_put: rnode %p has %d children", rnode,
291 	    rnode->rn_count));
292 	for (slot = 0; slot < VM_RADIX_COUNT; slot++)
293 		KASSERT(rnode->rn_child[slot] == NULL,
294 		    ("vm_radix_node_put: rnode %p has a child", rnode));
295 }
296 #endif
297 
298 /*
299  * Radix node zone initializer.
300  */
301 static int
302 vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
303 {
304 	struct vm_radix_node *rnode;
305 
306 	rnode = mem;
307 	memset(rnode->rn_child, 0, sizeof(rnode->rn_child));
308 	return (0);
309 }
310 
311 /*
312  * Pre-allocate intermediate nodes from the UMA slab zone.
313  */
314 static void
315 vm_radix_prealloc(void *arg __unused)
316 {
317 	int nodes;
318 
319 	/*
320 	 * Calculate the number of reserved nodes, discounting the pages that
321 	 * are needed to store them.
322 	 */
323 	nodes = ((vm_paddr_t)cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
324 	    sizeof(struct vm_radix_node));
325 	if (!uma_zone_reserve_kva(vm_radix_node_zone, nodes))
326 		panic("%s: unable to create new zone", __func__);
327 	uma_prealloc(vm_radix_node_zone, nodes);
328 }
329 SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
330     NULL);
331 
332 /*
333  * Initialize the UMA slab zone.
334  * Until vm_radix_prealloc() is called, the zone will be served by the
335  * UMA boot-time pre-allocated pool of pages.
336  */
337 void
338 vm_radix_init(void)
339 {
340 
341 	vm_radix_node_zone = uma_zcreate("RADIX NODE",
342 	    sizeof(struct vm_radix_node), NULL,
343 #ifdef INVARIANTS
344 	    vm_radix_node_zone_dtor,
345 #else
346 	    NULL,
347 #endif
348 	    vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM |
349 	    UMA_ZONE_NOFREE);
350 }
351 
352 /*
353  * Inserts the key-value pair into the trie.
354  * Panics if the key already exists.
355  */
356 void
357 vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
358 {
359 	vm_pindex_t index, newind;
360 	void **parentp;
361 	struct vm_radix_node *rnode, *tmp;
362 	vm_page_t m;
363 	int slot;
364 	uint16_t clev;
365 
366 	index = page->pindex;
367 
368 	/*
369 	 * The owner of record for root is not really important because it
370 	 * will never be used.
371 	 */
372 	rnode = vm_radix_getroot(rtree);
373 	if (rnode == NULL) {
374 		rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
375 		return;
376 	}
377 	parentp = (void **)&rtree->rt_root;
378 	for (;;) {
379 		if (vm_radix_isleaf(rnode)) {
380 			m = vm_radix_topage(rnode);
381 			if (m->pindex == index)
382 				panic("%s: key %jx is already present",
383 				    __func__, (uintmax_t)index);
384 			clev = vm_radix_keydiff(m->pindex, index);
385 			tmp = vm_radix_node_get(vm_radix_trimkey(index,
386 			    clev + 1), 2, clev);
387 			*parentp = tmp;
388 			vm_radix_addpage(tmp, index, clev, page);
389 			vm_radix_addpage(tmp, m->pindex, clev, m);
390 			return;
391 		} else if (vm_radix_keybarr(rnode, index))
392 			break;
393 		slot = vm_radix_slot(index, rnode->rn_clev);
394 		if (rnode->rn_child[slot] == NULL) {
395 			rnode->rn_count++;
396 			vm_radix_addpage(rnode, index, rnode->rn_clev, page);
397 			return;
398 		}
399 		parentp = &rnode->rn_child[slot];
400 		rnode = rnode->rn_child[slot];
401 	}
402 
403 	/*
404 	 * A new node is needed because the right insertion level is reached.
405 	 * Setup the new intermediate node and add the 2 children: the
406 	 * new object and the older edge.
407 	 */
408 	newind = rnode->rn_owner;
409 	clev = vm_radix_keydiff(newind, index);
410 	tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2,
411 	    clev);
412 	*parentp = tmp;
413 	vm_radix_addpage(tmp, index, clev, page);
414 	slot = vm_radix_slot(newind, clev);
415 	tmp->rn_child[slot] = rnode;
416 }
417 
418 /*
419  * Returns the value stored at the index.  If the index is not present,
420  * NULL is returned.
421  */
422 vm_page_t
423 vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
424 {
425 	struct vm_radix_node *rnode;
426 	vm_page_t m;
427 	int slot;
428 
429 	rnode = vm_radix_getroot(rtree);
430 	while (rnode != NULL) {
431 		if (vm_radix_isleaf(rnode)) {
432 			m = vm_radix_topage(rnode);
433 			if (m->pindex == index)
434 				return (m);
435 			else
436 				break;
437 		} else if (vm_radix_keybarr(rnode, index))
438 			break;
439 		slot = vm_radix_slot(index, rnode->rn_clev);
440 		rnode = rnode->rn_child[slot];
441 	}
442 	return (NULL);
443 }
444 
445 /*
446  * Look up the nearest entry at a position bigger than or equal to index.
447  */
448 vm_page_t
449 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
450 {
451 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
452 	vm_pindex_t inc;
453 	vm_page_t m;
454 	struct vm_radix_node *child, *rnode;
455 #ifdef INVARIANTS
456 	int loops = 0;
457 #endif
458 	int slot, tos;
459 
460 	rnode = vm_radix_getroot(rtree);
461 	if (rnode == NULL)
462 		return (NULL);
463 	else if (vm_radix_isleaf(rnode)) {
464 		m = vm_radix_topage(rnode);
465 		if (m->pindex >= index)
466 			return (m);
467 		else
468 			return (NULL);
469 	}
470 	tos = 0;
471 	for (;;) {
472 		/*
473 		 * If the keys differ before the current bisection node,
474 		 * then the search key might rollback to the earliest
475 		 * available bisection node or to the smallest key
476 		 * in the current node (if the owner is bigger than the
477 		 * search key).
478 		 */
479 		if (vm_radix_keybarr(rnode, index)) {
480 			if (index > rnode->rn_owner) {
481 ascend:
482 				KASSERT(++loops < 1000,
483 				    ("vm_radix_lookup_ge: too many loops"));
484 
485 				/*
486 				 * Pop nodes from the stack until either the
487 				 * stack is empty or a node that could have a
488 				 * matching descendant is found.
489 				 */
490 				do {
491 					if (tos == 0)
492 						return (NULL);
493 					rnode = stack[--tos];
494 				} while (vm_radix_slot(index,
495 				    rnode->rn_clev) == (VM_RADIX_COUNT - 1));
496 
497 				/*
498 				 * The following computation cannot overflow
499 				 * because index's slot at the current level
500 				 * is less than VM_RADIX_COUNT - 1.
501 				 */
502 				index = vm_radix_trimkey(index,
503 				    rnode->rn_clev);
504 				index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
505 			} else
506 				index = rnode->rn_owner;
507 			KASSERT(!vm_radix_keybarr(rnode, index),
508 			    ("vm_radix_lookup_ge: keybarr failed"));
509 		}
510 		slot = vm_radix_slot(index, rnode->rn_clev);
511 		child = rnode->rn_child[slot];
512 		if (vm_radix_isleaf(child)) {
513 			m = vm_radix_topage(child);
514 			if (m->pindex >= index)
515 				return (m);
516 		} else if (child != NULL)
517 			goto descend;
518 
519 		/*
520 		 * Look for an available edge or page within the current
521 		 * bisection node.
522 		 */
523                 if (slot < (VM_RADIX_COUNT - 1)) {
524 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
525 			index = vm_radix_trimkey(index, rnode->rn_clev);
526 			do {
527 				index += inc;
528 				slot++;
529 				child = rnode->rn_child[slot];
530 				if (vm_radix_isleaf(child)) {
531 					m = vm_radix_topage(child);
532 					if (m->pindex >= index)
533 						return (m);
534 				} else if (child != NULL)
535 					goto descend;
536 			} while (slot < (VM_RADIX_COUNT - 1));
537 		}
538 		KASSERT(child == NULL || vm_radix_isleaf(child),
539 		    ("vm_radix_lookup_ge: child is radix node"));
540 
541 		/*
542 		 * If a page or edge bigger than the search slot is not found
543 		 * in the current node, ascend to the next higher-level node.
544 		 */
545 		goto ascend;
546 descend:
547 		KASSERT(rnode->rn_clev > 0,
548 		    ("vm_radix_lookup_ge: pushing leaf's parent"));
549 		KASSERT(tos < VM_RADIX_LIMIT,
550 		    ("vm_radix_lookup_ge: stack overflow"));
551 		stack[tos++] = rnode;
552 		rnode = child;
553 	}
554 }
555 
556 /*
557  * Look up the nearest entry at a position less than or equal to index.
558  */
559 vm_page_t
560 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
561 {
562 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
563 	vm_pindex_t inc;
564 	vm_page_t m;
565 	struct vm_radix_node *child, *rnode;
566 #ifdef INVARIANTS
567 	int loops = 0;
568 #endif
569 	int slot, tos;
570 
571 	rnode = vm_radix_getroot(rtree);
572 	if (rnode == NULL)
573 		return (NULL);
574 	else if (vm_radix_isleaf(rnode)) {
575 		m = vm_radix_topage(rnode);
576 		if (m->pindex <= index)
577 			return (m);
578 		else
579 			return (NULL);
580 	}
581 	tos = 0;
582 	for (;;) {
583 		/*
584 		 * If the keys differ before the current bisection node,
585 		 * then the search key might rollback to the earliest
586 		 * available bisection node or to the largest key
587 		 * in the current node (if the owner is smaller than the
588 		 * search key).
589 		 */
590 		if (vm_radix_keybarr(rnode, index)) {
591 			if (index > rnode->rn_owner) {
592 				index = rnode->rn_owner + VM_RADIX_COUNT *
593 				    VM_RADIX_UNITLEVEL(rnode->rn_clev);
594 			} else {
595 ascend:
596 				KASSERT(++loops < 1000,
597 				    ("vm_radix_lookup_le: too many loops"));
598 
599 				/*
600 				 * Pop nodes from the stack until either the
601 				 * stack is empty or a node that could have a
602 				 * matching descendant is found.
603 				 */
604 				do {
605 					if (tos == 0)
606 						return (NULL);
607 					rnode = stack[--tos];
608 				} while (vm_radix_slot(index,
609 				    rnode->rn_clev) == 0);
610 
611 				/*
612 				 * The following computation cannot overflow
613 				 * because index's slot at the current level
614 				 * is greater than 0.
615 				 */
616 				index = vm_radix_trimkey(index,
617 				    rnode->rn_clev);
618 			}
619 			index--;
620 			KASSERT(!vm_radix_keybarr(rnode, index),
621 			    ("vm_radix_lookup_le: keybarr failed"));
622 		}
623 		slot = vm_radix_slot(index, rnode->rn_clev);
624 		child = rnode->rn_child[slot];
625 		if (vm_radix_isleaf(child)) {
626 			m = vm_radix_topage(child);
627 			if (m->pindex <= index)
628 				return (m);
629 		} else if (child != NULL)
630 			goto descend;
631 
632 		/*
633 		 * Look for an available edge or page within the current
634 		 * bisection node.
635 		 */
636 		if (slot > 0) {
637 			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
638 			index |= inc - 1;
639 			do {
640 				index -= inc;
641 				slot--;
642 				child = rnode->rn_child[slot];
643 				if (vm_radix_isleaf(child)) {
644 					m = vm_radix_topage(child);
645 					if (m->pindex <= index)
646 						return (m);
647 				} else if (child != NULL)
648 					goto descend;
649 			} while (slot > 0);
650 		}
651 		KASSERT(child == NULL || vm_radix_isleaf(child),
652 		    ("vm_radix_lookup_le: child is radix node"));
653 
654 		/*
655 		 * If a page or edge smaller than the search slot is not found
656 		 * in the current node, ascend to the next higher-level node.
657 		 */
658 		goto ascend;
659 descend:
660 		KASSERT(rnode->rn_clev > 0,
661 		    ("vm_radix_lookup_le: pushing leaf's parent"));
662 		KASSERT(tos < VM_RADIX_LIMIT,
663 		    ("vm_radix_lookup_le: stack overflow"));
664 		stack[tos++] = rnode;
665 		rnode = child;
666 	}
667 }
668 
669 /*
670  * Remove the specified index from the tree.
671  * Panics if the key is not present.
672  */
673 void
674 vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
675 {
676 	struct vm_radix_node *rnode, *parent;
677 	vm_page_t m;
678 	int i, slot;
679 
680 	rnode = vm_radix_getroot(rtree);
681 	if (vm_radix_isleaf(rnode)) {
682 		m = vm_radix_topage(rnode);
683 		if (m->pindex != index)
684 			panic("%s: invalid key found", __func__);
685 		vm_radix_setroot(rtree, NULL);
686 		return;
687 	}
688 	parent = NULL;
689 	for (;;) {
690 		if (rnode == NULL)
691 			panic("vm_radix_remove: impossible to locate the key");
692 		slot = vm_radix_slot(index, rnode->rn_clev);
693 		if (vm_radix_isleaf(rnode->rn_child[slot])) {
694 			m = vm_radix_topage(rnode->rn_child[slot]);
695 			if (m->pindex != index)
696 				panic("%s: invalid key found", __func__);
697 			rnode->rn_child[slot] = NULL;
698 			rnode->rn_count--;
699 			if (rnode->rn_count > 1)
700 				break;
701 			for (i = 0; i < VM_RADIX_COUNT; i++)
702 				if (rnode->rn_child[i] != NULL)
703 					break;
704 			KASSERT(i != VM_RADIX_COUNT,
705 			    ("%s: invalid node configuration", __func__));
706 			if (parent == NULL)
707 				vm_radix_setroot(rtree, rnode->rn_child[i]);
708 			else {
709 				slot = vm_radix_slot(index, parent->rn_clev);
710 				KASSERT(parent->rn_child[slot] == rnode,
711 				    ("%s: invalid child value", __func__));
712 				parent->rn_child[slot] = rnode->rn_child[i];
713 			}
714 			rnode->rn_count--;
715 			rnode->rn_child[i] = NULL;
716 			vm_radix_node_put(rnode);
717 			break;
718 		}
719 		parent = rnode;
720 		rnode = rnode->rn_child[slot];
721 	}
722 }
723 
724 /*
725  * Remove and free all the nodes from the radix tree.
726  * This function is recursive but there is a tight control on it as the
727  * maximum depth of the tree is fixed.
728  */
729 void
730 vm_radix_reclaim_allnodes(struct vm_radix *rtree)
731 {
732 	struct vm_radix_node *root;
733 
734 	root = vm_radix_getroot(rtree);
735 	if (root == NULL)
736 		return;
737 	vm_radix_setroot(rtree, NULL);
738 	if (!vm_radix_isleaf(root))
739 		vm_radix_reclaim_allnodes_int(root);
740 }
741 
742 #ifdef DDB
743 /*
744  * Show details about the given radix node.
745  */
746 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
747 {
748 	struct vm_radix_node *rnode;
749 	int i;
750 
751         if (!have_addr)
752                 return;
753 	rnode = (struct vm_radix_node *)addr;
754 	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
755 	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
756 	    rnode->rn_clev);
757 	for (i = 0; i < VM_RADIX_COUNT; i++)
758 		if (rnode->rn_child[i] != NULL)
759 			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
760 			    i, (void *)rnode->rn_child[i],
761 			    vm_radix_isleaf(rnode->rn_child[i]) ?
762 			    vm_radix_topage(rnode->rn_child[i]) : NULL,
763 			    rnode->rn_clev);
764 }
765 #endif /* DDB */
766