xref: /freebsd/sys/kern/subr_pctrie.c (revision 73551d4f6053992fdc73b5d6dd57f26043e9548b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 EMC Corp.
5  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  */
31 
32 /*
33  * Path-compressed radix trie implementation.
34  *
35  * The implementation takes into account the following rationale:
36  * - Size of the nodes should be as small as possible but still big enough
37  *   to avoid a large maximum depth for the trie.  This is a balance
38  *   between the necessity to not wire too much physical memory for the nodes
39  *   and the necessity to avoid too much cache pollution during the trie
40  *   operations.
41  * - There is not a huge bias toward the number of lookup operations over
42  *   the number of insert and remove operations.  This basically implies
43  *   that optimizations supposedly helping one operation but hurting the
44  *   other might be carefully evaluated.
45  * - On average not many nodes are expected to be fully populated, hence
46  *   level compression may just complicate things.
47  */
48 
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
51 
52 #include "opt_ddb.h"
53 
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/kernel.h>
57 #include <sys/libkern.h>
58 #include <sys/pctrie.h>
59 #include <sys/proc.h>	/* smr.h depends on struct thread. */
60 #include <sys/smr.h>
61 #include <sys/smr_types.h>
62 
63 #ifdef DDB
64 #include <ddb/ddb.h>
65 #endif
66 
67 #define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
68 #define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
69 
70 #if PCTRIE_WIDTH == 3
71 typedef uint8_t pn_popmap_t;
72 #elif PCTRIE_WIDTH == 4
73 typedef uint16_t pn_popmap_t;
74 #elif PCTRIE_WIDTH == 5
75 typedef uint32_t pn_popmap_t;
76 #else
77 #error Unsupported width
78 #endif
79 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
80     "pn_popmap_t too wide");
81 
82 /* Flag bits stored in node pointers. */
83 #define	PCTRIE_ISLEAF	0x1
84 #define	PCTRIE_FLAGS	0x1
85 #define	PCTRIE_PAD	PCTRIE_FLAGS
86 
87 /* Returns one unit associated with specified level. */
88 #define	PCTRIE_UNITLEVEL(lev)						\
89 	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
90 
91 struct pctrie_node;
92 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
93 
94 struct pctrie_node {
95 	uint64_t	pn_owner;			/* Owner of record. */
96 	pn_popmap_t	pn_popmap;			/* Valid children. */
97 	uint8_t		pn_clev;			/* Current level. */
98 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
99 };
100 
101 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
102 
103 static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
104     enum pctrie_access access);
105 
106 /*
107  * Return the position in the array for a given level.
108  */
109 static __inline int
110 pctrie_slot(uint64_t index, uint16_t level)
111 {
112 	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
113 }
114 
115 /* Computes the key (index) with the low-order 'level' radix-digits zeroed. */
116 static __inline uint64_t
117 pctrie_trimkey(uint64_t index, uint16_t level)
118 {
119 	return (index & -PCTRIE_UNITLEVEL(level));
120 }
121 
122 /*
123  * Allocate a node.  Pre-allocation should ensure that the request
124  * will always be satisfied.
125  */
126 static struct pctrie_node *
127 pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
128     uint16_t clevel)
129 {
130 	struct pctrie_node *node;
131 
132 	node = allocfn(ptree);
133 	if (node == NULL)
134 		return (NULL);
135 
136 	/*
137 	 * We want to clear the last child pointer after the final section
138 	 * has exited so lookup can not return false negatives.  It is done
139 	 * here because it will be cache-cold in the dtor callback.
140 	 */
141 	if (node->pn_popmap != 0) {
142 		pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1],
143 		    NULL, PCTRIE_UNSERIALIZED);
144 		node->pn_popmap = 0;
145 	}
146 	node->pn_owner = pctrie_trimkey(index, clevel + 1);
147 	node->pn_clev = clevel;
148 	return (node);
149 }
150 
151 /*
152  * Free radix node.
153  */
154 static __inline void
155 pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
156     pctrie_free_t freefn)
157 {
158 #ifdef INVARIANTS
159 	int slot;
160 
161 	KASSERT(powerof2(node->pn_popmap),
162 	    ("pctrie_node_put: node %p has too many children %04x", node,
163 	    node->pn_popmap));
164 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
165 		if ((node->pn_popmap & (1 << slot)) != 0)
166 			continue;
167 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
168 		    NULL, ("pctrie_node_put: node %p has a child", node));
169 	}
170 #endif
171 	freefn(ptree, node);
172 }
173 
174 /*
175  * Fetch a node pointer from a slot.
176  */
177 static __inline struct pctrie_node *
178 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
179 {
180 	switch (access) {
181 	case PCTRIE_UNSERIALIZED:
182 		return (smr_unserialized_load(p, true));
183 	case PCTRIE_LOCKED:
184 		return (smr_serialized_load(p, true));
185 	case PCTRIE_SMR:
186 		return (smr_entered_load(p, smr));
187 	}
188 	__assert_unreachable();
189 }
190 
191 static __inline void
192 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
193 {
194 	switch (access) {
195 	case PCTRIE_UNSERIALIZED:
196 		smr_unserialized_store(p, v, true);
197 		break;
198 	case PCTRIE_LOCKED:
199 		smr_serialized_store(p, v, true);
200 		break;
201 	case PCTRIE_SMR:
202 		panic("%s: Not supported in SMR section.", __func__);
203 		break;
204 	default:
205 		__assert_unreachable();
206 		break;
207 	}
208 }
209 
210 /*
211  * Get the root node for a tree.
212  */
213 static __inline struct pctrie_node *
214 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
215 {
216 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
217 }
218 
219 /*
220  * Set the root node for a tree.
221  */
222 static __inline void
223 pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
224     enum pctrie_access access)
225 {
226 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
227 }
228 
229 /*
230  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
231  */
232 static __inline bool
233 pctrie_isleaf(struct pctrie_node *node)
234 {
235 
236 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
237 }
238 
239 /*
240  * Returns val with leaf bit set.
241  */
242 static __inline void *
243 pctrie_toleaf(uint64_t *val)
244 {
245 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
246 }
247 
248 /*
249  * Returns the associated val extracted from node.
250  */
251 static __inline uint64_t *
252 pctrie_toval(struct pctrie_node *node)
253 {
254 
255 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
256 }
257 
258 /*
259  * Make 'child' a child of 'node'.
260  */
261 static __inline void
262 pctrie_addnode(struct pctrie_node *node, uint64_t index, uint16_t clev,
263     struct pctrie_node *child, enum pctrie_access access)
264 {
265 	int slot;
266 
267 	slot = pctrie_slot(index, clev);
268 	pctrie_node_store(&node->pn_child[slot], child, access);
269 	node->pn_popmap ^= 1 << slot;
270 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
271 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
272 }
273 
274 /*
275  * Returns the level where two keys differ.
276  * It cannot accept 2 equal keys.
277  */
278 static __inline uint16_t
279 pctrie_keydiff(uint64_t index1, uint64_t index2)
280 {
281 
282 	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
283 	    __func__, (uintmax_t)index1));
284 	CTASSERT(sizeof(long long) >= sizeof(uint64_t));
285 
286 	/*
287 	 * From the highest-order bit where the indexes differ,
288 	 * compute the highest level in the trie where they differ.
289 	 */
290 	return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH);
291 }
292 
293 /*
294  * Returns TRUE if it can be determined that key does not belong to the
295  * specified node.  Otherwise, returns FALSE.
296  */
297 static __inline bool
298 pctrie_keybarr(struct pctrie_node *node, uint64_t idx)
299 {
300 
301 	if (node->pn_clev < PCTRIE_LIMIT) {
302 		idx = pctrie_trimkey(idx, node->pn_clev + 1);
303 		return (idx != node->pn_owner);
304 	}
305 	return (false);
306 }
307 
308 /*
309  * Internal helper for pctrie_reclaim_allnodes().
310  * This function is recursive.
311  */
312 static void
313 pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
314     pctrie_free_t freefn)
315 {
316 	struct pctrie_node *child;
317 	int slot;
318 
319 	while (node->pn_popmap != 0) {
320 		slot = ffs(node->pn_popmap) - 1;
321 		child = pctrie_node_load(&node->pn_child[slot], NULL,
322 		    PCTRIE_UNSERIALIZED);
323 		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
324 		    __func__, slot, node));
325 		if (!pctrie_isleaf(child))
326 			pctrie_reclaim_allnodes_int(ptree, child, freefn);
327 		node->pn_popmap ^= 1 << slot;
328 		pctrie_node_store(&node->pn_child[slot], NULL,
329 		    PCTRIE_UNSERIALIZED);
330 	}
331 	pctrie_node_put(ptree, node, freefn);
332 }
333 
334 /*
335  * pctrie node zone initializer.
336  */
337 int
338 pctrie_zone_init(void *mem, int size __unused, int flags __unused)
339 {
340 	struct pctrie_node *node;
341 
342 	node = mem;
343 	node->pn_popmap = 0;
344 	memset(node->pn_child, 0, sizeof(node->pn_child));
345 	return (0);
346 }
347 
348 size_t
349 pctrie_node_size(void)
350 {
351 
352 	return (sizeof(struct pctrie_node));
353 }
354 
355 /*
356  * Inserts the key-value pair into the trie.
357  * Panics if the key already exists.
358  */
359 int
360 pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
361 {
362 	uint64_t index, newind;
363 	struct pctrie_node *leaf, *node, *tmp;
364 	smr_pctnode_t *parentp;
365 	int slot;
366 	uint16_t clev;
367 
368 	index = *val;
369 	leaf = pctrie_toleaf(val);
370 
371 	/*
372 	 * The owner of record for root is not really important because it
373 	 * will never be used.
374 	 */
375 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
376 	if (node == NULL) {
377 		ptree->pt_root = (uintptr_t)leaf;
378 		return (0);
379 	}
380 	for (parentp = (smr_pctnode_t *)&ptree->pt_root;; node = tmp) {
381 		if (pctrie_isleaf(node)) {
382 			newind = *pctrie_toval(node);
383 			if (newind == index)
384 				panic("%s: key %jx is already present",
385 				    __func__, (uintmax_t)index);
386 			break;
387 		} else if (pctrie_keybarr(node, index)) {
388 			newind = node->pn_owner;
389 			break;
390 		}
391 		slot = pctrie_slot(index, node->pn_clev);
392 		parentp = &node->pn_child[slot];
393 		tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED);
394 		if (tmp == NULL) {
395 			pctrie_addnode(node, index, node->pn_clev, leaf,
396 			    PCTRIE_LOCKED);
397 			return (0);
398 		}
399 	}
400 
401 	/*
402 	 * A new node is needed because the right insertion level is reached.
403 	 * Setup the new intermediate node and add the 2 children: the
404 	 * new object and the older edge or object.
405 	 */
406 	clev = pctrie_keydiff(newind, index);
407 	tmp = pctrie_node_get(ptree, allocfn, index, clev);
408 	if (tmp == NULL)
409 		return (ENOMEM);
410 	/* These writes are not yet visible due to ordering. */
411 	pctrie_addnode(tmp, index, clev, leaf, PCTRIE_UNSERIALIZED);
412 	pctrie_addnode(tmp, newind, clev, node, PCTRIE_UNSERIALIZED);
413 	/* Synchronize to make the above visible. */
414 	pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
415 	return (0);
416 }
417 
418 /*
419  * Returns the value stored at the index.  If the index is not present,
420  * NULL is returned.
421  */
422 static __always_inline uint64_t *
423 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
424     enum pctrie_access access)
425 {
426 	struct pctrie_node *node;
427 	uint64_t *m;
428 	int slot;
429 
430 	node = pctrie_root_load(ptree, smr, access);
431 	while (node != NULL) {
432 		if (pctrie_isleaf(node)) {
433 			m = pctrie_toval(node);
434 			if (*m == index)
435 				return (m);
436 			break;
437 		}
438 		if (pctrie_keybarr(node, index))
439 			break;
440 		slot = pctrie_slot(index, node->pn_clev);
441 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
442 	}
443 	return (NULL);
444 }
445 
446 /*
447  * Returns the value stored at the index, assuming access is externally
448  * synchronized by a lock.
449  *
450  * If the index is not present, NULL is returned.
451  */
452 uint64_t *
453 pctrie_lookup(struct pctrie *ptree, uint64_t index)
454 {
455 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
456 }
457 
458 /*
459  * Returns the value stored at the index without requiring an external lock.
460  *
461  * If the index is not present, NULL is returned.
462  */
463 uint64_t *
464 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
465 {
466 	uint64_t *res;
467 
468 	smr_enter(smr);
469 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
470 	smr_exit(smr);
471 	return (res);
472 }
473 
474 /*
475  * Returns the value with the least index that is greater than or equal to the
476  * specified index, or NULL if there are no such values.
477  *
478  * Requires that access be externally synchronized by a lock.
479  */
480 uint64_t *
481 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
482 {
483 	struct pctrie_node *node, *succ;
484 	uint64_t *m;
485 	int slot;
486 
487 	/*
488 	 * Descend the trie as if performing an ordinary lookup for the
489 	 * specified value.  However, unlike an ordinary lookup, as we descend
490 	 * the trie, we use "succ" to remember the last branching-off point,
491 	 * that is, the interior node under which the least value that is both
492 	 * outside our current path down the trie and greater than the specified
493 	 * index resides.  (The node's popmap makes it fast and easy to
494 	 * recognize a branching-off point.)  If our ordinary lookup fails to
495 	 * yield a value that is greater than or equal to the specified index,
496 	 * then we will exit this loop and perform a lookup starting from
497 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
498 	 * succeed.
499 	 */
500 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
501 	succ = NULL;
502 	while (node != NULL) {
503 		if (pctrie_isleaf(node)) {
504 			m = pctrie_toval(node);
505 			if (*m >= index)
506 				return (m);
507 			break;
508 		}
509 		if (pctrie_keybarr(node, index)) {
510 			/*
511 			 * If all values in this subtree are > index, then the
512 			 * least value in this subtree is the answer.
513 			 */
514 			if (node->pn_owner > index)
515 				succ = node;
516 			break;
517 		}
518 		slot = pctrie_slot(index, node->pn_clev);
519 
520 		/*
521 		 * Just in case the next search step leads to a subtree of all
522 		 * values < index, check popmap to see if a next bigger step, to
523 		 * a subtree of all pages with values > index, is available.  If
524 		 * so, remember to restart the search here.
525 		 */
526 		if ((node->pn_popmap >> slot) > 1)
527 			succ = node;
528 		node = pctrie_node_load(&node->pn_child[slot], NULL,
529 		    PCTRIE_LOCKED);
530 	}
531 
532 	/*
533 	 * Restart the search from the last place visited in the subtree that
534 	 * included some values > index, if there was such a place.
535 	 */
536 	if (succ == NULL)
537 		return (NULL);
538 	if (succ != node) {
539 		/*
540 		 * Take a step to the next bigger sibling of the node chosen
541 		 * last time.  In that subtree, all values > index.
542 		 */
543 		slot = pctrie_slot(index, succ->pn_clev) + 1;
544 		KASSERT((succ->pn_popmap >> slot) != 0,
545 		    ("%s: no popmap siblings past slot %d in node %p",
546 		    __func__, slot, succ));
547 		slot += ffs(succ->pn_popmap >> slot) - 1;
548 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
549 		    PCTRIE_LOCKED);
550 	}
551 
552 	/*
553 	 * Find the value in the subtree rooted at "succ" with the least index.
554 	 */
555 	while (!pctrie_isleaf(succ)) {
556 		KASSERT(succ->pn_popmap != 0,
557 		    ("%s: no popmap children in node %p",  __func__, succ));
558 		slot = ffs(succ->pn_popmap) - 1;
559 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
560 		    PCTRIE_LOCKED);
561 	}
562 	return (pctrie_toval(succ));
563 }
564 
565 /*
566  * Returns the value with the greatest index that is less than or equal to the
567  * specified index, or NULL if there are no such values.
568  *
569  * Requires that access be externally synchronized by a lock.
570  */
571 uint64_t *
572 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
573 {
574 	struct pctrie_node *node, *pred;
575 	uint64_t *m;
576 	int slot;
577 
578 	/*
579 	 * Mirror the implementation of pctrie_lookup_ge, described above.
580 	 */
581 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
582 	pred = NULL;
583 	while (node != NULL) {
584 		if (pctrie_isleaf(node)) {
585 			m = pctrie_toval(node);
586 			if (*m <= index)
587 				return (m);
588 			break;
589 		}
590 		if (pctrie_keybarr(node, index)) {
591 			if (node->pn_owner < index)
592 				pred = node;
593 			break;
594 		}
595 		slot = pctrie_slot(index, node->pn_clev);
596 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
597 			pred = node;
598 		node = pctrie_node_load(&node->pn_child[slot], NULL,
599 		    PCTRIE_LOCKED);
600 	}
601 	if (pred == NULL)
602 		return (NULL);
603 	if (pred != node) {
604 		slot = pctrie_slot(index, pred->pn_clev);
605 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
606 		    ("%s: no popmap siblings before slot %d in node %p",
607 		    __func__, slot, pred));
608 		slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1;
609 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
610 		    PCTRIE_LOCKED);
611 	}
612 	while (!pctrie_isleaf(pred)) {
613 		KASSERT(pred->pn_popmap != 0,
614 		    ("%s: no popmap children in node %p",  __func__, pred));
615 		slot = fls(pred->pn_popmap) - 1;
616 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
617 		    PCTRIE_LOCKED);
618 	}
619 	return (pctrie_toval(pred));
620 }
621 
622 /*
623  * Remove the specified index from the tree.
624  * Panics if the key is not present.
625  */
626 void
627 pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
628 {
629 	struct pctrie_node *node, *parent, *tmp;
630 	uint64_t *m;
631 	int slot;
632 
633 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
634 	if (pctrie_isleaf(node)) {
635 		m = pctrie_toval(node);
636 		if (*m != index)
637 			panic("%s: invalid key found", __func__);
638 		pctrie_root_store(ptree, NULL, PCTRIE_LOCKED);
639 		return;
640 	}
641 	parent = NULL;
642 	for (;;) {
643 		if (node == NULL)
644 			panic("pctrie_remove: impossible to locate the key");
645 		slot = pctrie_slot(index, node->pn_clev);
646 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
647 		    PCTRIE_LOCKED);
648 		if (pctrie_isleaf(tmp)) {
649 			m = pctrie_toval(tmp);
650 			if (*m != index)
651 				panic("%s: invalid key found", __func__);
652 			KASSERT((node->pn_popmap & (1 << slot)) != 0,
653 			    ("%s: bad popmap slot %d in node %p",
654 			    __func__, slot, node));
655 			node->pn_popmap ^= 1 << slot;
656 			pctrie_node_store(&node->pn_child[slot], NULL,
657 			    PCTRIE_LOCKED);
658 			if (!powerof2(node->pn_popmap))
659 				break;
660 			KASSERT(node->pn_popmap != 0,
661 			    ("%s: bad popmap all zeroes", __func__));
662 			slot = ffs(node->pn_popmap) - 1;
663 			tmp = pctrie_node_load(&node->pn_child[slot],
664 			    NULL, PCTRIE_LOCKED);
665 			KASSERT(tmp != NULL,
666 			    ("%s: bad popmap slot %d in node %p",
667 			    __func__, slot, node));
668 			if (parent == NULL)
669 				pctrie_root_store(ptree, tmp, PCTRIE_LOCKED);
670 			else {
671 				slot = pctrie_slot(index, parent->pn_clev);
672 				KASSERT(pctrie_node_load(
673 					&parent->pn_child[slot], NULL,
674 					PCTRIE_LOCKED) == node,
675 				    ("%s: invalid child value", __func__));
676 				pctrie_node_store(&parent->pn_child[slot], tmp,
677 				    PCTRIE_LOCKED);
678 			}
679 			/*
680 			 * The child is still valid and we can not zero the
681 			 * pointer until all SMR references are gone.
682 			 */
683 			pctrie_node_put(ptree, node, freefn);
684 			break;
685 		}
686 		parent = node;
687 		node = tmp;
688 	}
689 }
690 
691 /*
692  * Remove and free all the nodes from the tree.
693  * This function is recursive but there is a tight control on it as the
694  * maximum depth of the tree is fixed.
695  */
696 void
697 pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
698 {
699 	struct pctrie_node *root;
700 
701 	root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
702 	if (root == NULL)
703 		return;
704 	pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED);
705 	if (!pctrie_isleaf(root))
706 		pctrie_reclaim_allnodes_int(ptree, root, freefn);
707 }
708 
709 #ifdef DDB
710 /*
711  * Show details about the given node.
712  */
713 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
714 {
715 	struct pctrie_node *node, *tmp;
716 	int slot;
717 	pn_popmap_t popmap;
718 
719         if (!have_addr)
720                 return;
721 	node = (struct pctrie_node *)addr;
722 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
723 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
724 	    node->pn_clev);
725 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
726 		slot = ffs(popmap) - 1;
727 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
728 		    PCTRIE_UNSERIALIZED);
729 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
730 		    slot, (void *)tmp,
731 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
732 		    node->pn_clev);
733 	}
734 }
735 #endif /* DDB */
736