xref: /freebsd/sys/kern/subr_pctrie.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 EMC Corp.
5  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  */
31 
32 /*
33  * Path-compressed radix trie implementation.
34  *
35  * The implementation takes into account the following rationale:
36  * - Size of the nodes should be as small as possible but still big enough
37  *   to avoid a large maximum depth for the trie.  This is a balance
38  *   between the necessity to not wire too much physical memory for the nodes
39  *   and the necessity to avoid too much cache pollution during the trie
40  *   operations.
41  * - There is not a huge bias toward the number of lookup operations over
42  *   the number of insert and remove operations.  This basically implies
43  *   that optimizations supposedly helping one operation but hurting the
44  *   other might be carefully evaluated.
45  * - On average not many nodes are expected to be fully populated, hence
46  *   level compression may just complicate things.
47  */
48 
49 #include <sys/cdefs.h>
50 #include "opt_ddb.h"
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/libkern.h>
56 #include <sys/pctrie.h>
57 #include <sys/proc.h>	/* smr.h depends on struct thread. */
58 #include <sys/smr.h>
59 #include <sys/smr_types.h>
60 
61 #ifdef DDB
62 #include <ddb/ddb.h>
63 #endif
64 
65 #if PCTRIE_WIDTH == 3
66 typedef uint8_t pn_popmap_t;
67 #elif PCTRIE_WIDTH == 4
68 typedef uint16_t pn_popmap_t;
69 #elif PCTRIE_WIDTH == 5
70 typedef uint32_t pn_popmap_t;
71 #else
72 #error Unsupported width
73 #endif
74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
75     "pn_popmap_t too wide");
76 
77 struct pctrie_node;
78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
79 
80 struct pctrie_node {
81 	uint64_t	pn_owner;			/* Owner of record. */
82 	pn_popmap_t	pn_popmap;			/* Valid children. */
83 	uint8_t		pn_clev;			/* Level * WIDTH. */
84 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
85 };
86 
87 /*
88  * Map index to an array position for the children of node,
89  */
90 static __inline int
91 pctrie_slot(struct pctrie_node *node, uint64_t index)
92 {
93 	return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
94 }
95 
96 /*
97  * Returns true if index does not belong to the specified node.  Otherwise,
98  * sets slot value, and returns false.
99  */
100 static __inline bool
101 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
102 {
103 	index = (index - node->pn_owner) >> node->pn_clev;
104 	if (index >= PCTRIE_COUNT)
105 		return (true);
106 	*slot = index;
107 	return (false);
108 }
109 
110 /*
111  * Check radix node.
112  */
113 static __inline void
114 pctrie_node_put(struct pctrie_node *node)
115 {
116 #ifdef INVARIANTS
117 	int slot;
118 
119 	KASSERT(powerof2(node->pn_popmap),
120 	    ("pctrie_node_put: node %p has too many children %04x", node,
121 	    node->pn_popmap));
122 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
123 		if ((node->pn_popmap & (1 << slot)) != 0)
124 			continue;
125 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
126 		    PCTRIE_NULL,
127 		    ("pctrie_node_put: node %p has a child", node));
128 	}
129 #endif
130 }
131 
132 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
133 
134 /*
135  * Fetch a node pointer from a slot.
136  */
137 static __inline struct pctrie_node *
138 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
139 {
140 	switch (access) {
141 	case PCTRIE_UNSERIALIZED:
142 		return (smr_unserialized_load(p, true));
143 	case PCTRIE_LOCKED:
144 		return (smr_serialized_load(p, true));
145 	case PCTRIE_SMR:
146 		return (smr_entered_load(p, smr));
147 	}
148 	__assert_unreachable();
149 }
150 
151 static __inline void
152 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
153 {
154 	switch (access) {
155 	case PCTRIE_UNSERIALIZED:
156 		smr_unserialized_store(p, v, true);
157 		break;
158 	case PCTRIE_LOCKED:
159 		smr_serialized_store(p, v, true);
160 		break;
161 	case PCTRIE_SMR:
162 		panic("%s: Not supported in SMR section.", __func__);
163 		break;
164 	default:
165 		__assert_unreachable();
166 		break;
167 	}
168 }
169 
170 /*
171  * Get the root node for a tree.
172  */
173 static __inline struct pctrie_node *
174 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
175 {
176 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
177 }
178 
179 /*
180  * Set the root node for a tree.
181  */
182 static __inline void
183 pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
184     enum pctrie_access access)
185 {
186 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
187 }
188 
189 /*
190  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
191  */
192 static __inline bool
193 pctrie_isleaf(struct pctrie_node *node)
194 {
195 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
196 }
197 
198 /*
199  * Returns val with leaf bit set.
200  */
201 static __inline void *
202 pctrie_toleaf(uint64_t *val)
203 {
204 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
205 }
206 
207 /*
208  * Returns the associated val extracted from node.
209  */
210 static __inline uint64_t *
211 pctrie_toval(struct pctrie_node *node)
212 {
213 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
214 }
215 
216 /*
217  * Returns the associated pointer extracted from node and field offset.
218  */
219 static __inline void *
220 pctrie_toptr(struct pctrie_node *node, int keyoff)
221 {
222 	return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff));
223 }
224 
225 /*
226  * Make 'child' a child of 'node'.
227  */
228 static __inline void
229 pctrie_addnode(struct pctrie_node *node, uint64_t index,
230     struct pctrie_node *child, enum pctrie_access access)
231 {
232 	int slot;
233 
234 	slot = pctrie_slot(node, index);
235 	pctrie_node_store(&node->pn_child[slot], child, access);
236 	node->pn_popmap ^= 1 << slot;
237 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
238 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
239 }
240 
241 /*
242  * pctrie node zone initializer.
243  */
244 int
245 pctrie_zone_init(void *mem, int size __unused, int flags __unused)
246 {
247 	struct pctrie_node *node;
248 
249 	node = mem;
250 	node->pn_popmap = 0;
251 	for (int i = 0; i < nitems(node->pn_child); i++)
252 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
253 		    PCTRIE_UNSERIALIZED);
254 	return (0);
255 }
256 
257 size_t
258 pctrie_node_size(void)
259 {
260 
261 	return (sizeof(struct pctrie_node));
262 }
263 
264 enum pctrie_insert_neighbor_mode {
265 	PCTRIE_INSERT_NEIGHBOR_NONE,
266 	PCTRIE_INSERT_NEIGHBOR_LT,
267 	PCTRIE_INSERT_NEIGHBOR_GT,
268 };
269 
270 /*
271  * Look for where to insert the key-value pair into the trie.  Complete the
272  * insertion if it replaces a null leaf.  Return the insertion location if the
273  * insertion needs to be completed by the caller; otherwise return NULL.
274  *
275  * If the key is already present in the trie, populate *found_out as if by
276  * pctrie_lookup().
277  *
278  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
279  * *neighbor_out to the lowest level node we encounter during the insert lookup
280  * that is a parent of the next greater or lesser entry.  The value is not
281  * defined if the key was already present in the trie.
282  *
283  * Note that mode is expected to be a compile-time constant, and this procedure
284  * is expected to be inlined into callers with extraneous code optimized out.
285  */
286 static __always_inline void *
287 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
288     uint64_t **found_out, struct pctrie_node **neighbor_out,
289     enum pctrie_insert_neighbor_mode mode)
290 {
291 	uint64_t index;
292 	struct pctrie_node *node, *parent;
293 	int slot;
294 
295 	index = *val;
296 
297 	/*
298 	 * The owner of record for root is not really important because it
299 	 * will never be used.
300 	 */
301 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
302 	parent = NULL;
303 	for (;;) {
304 		if (pctrie_isleaf(node)) {
305 			if (node == PCTRIE_NULL) {
306 				if (parent == NULL)
307 					pctrie_root_store(ptree,
308 					    pctrie_toleaf(val), PCTRIE_LOCKED);
309 				else
310 					pctrie_addnode(parent, index,
311 					    pctrie_toleaf(val), PCTRIE_LOCKED);
312 				return (NULL);
313 			}
314 			if (*pctrie_toval(node) == index) {
315 				*found_out = pctrie_toval(node);
316 				return (NULL);
317 			}
318 			break;
319 		}
320 		if (pctrie_keybarr(node, index, &slot))
321 			break;
322 		/*
323 		 * Descend.  If we're tracking the next neighbor and this node
324 		 * contains a neighboring entry in the right direction, record
325 		 * it.
326 		 */
327 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
328 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
329 				*neighbor_out = node;
330 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
331 			if ((node->pn_popmap >> slot) > 1)
332 				*neighbor_out = node;
333 		}
334 		parent = node;
335 		node = pctrie_node_load(&node->pn_child[slot], NULL,
336 		    PCTRIE_LOCKED);
337 	}
338 
339 	/*
340 	 * The caller will split this node.  If we're tracking the next
341 	 * neighbor, record the old node if the old entry is in the right
342 	 * direction.
343 	 */
344 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
345 		if (*pctrie_toval(node) < index)
346 			*neighbor_out = node;
347 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
348 		if (*pctrie_toval(node) > index)
349 			*neighbor_out = node;
350 	}
351 
352 	/*
353 	 * 'node' must be replaced in the tree with a new branch node, with
354 	 * children 'node' and 'val'. Return the place that points to 'node'
355 	 * now, and will point to to the new branching node later.
356 	 */
357 	return ((parent != NULL) ? &parent->pn_child[slot]:
358 	    (smr_pctnode_t *)&ptree->pt_root);
359 }
360 
361 /*
362  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
363  * if the key already exists, and do not look for neighboring entries.
364  */
365 void *
366 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
367 {
368 	void *parentp;
369 	uint64_t *found;
370 
371 	found = NULL;
372 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
373 	    PCTRIE_INSERT_NEIGHBOR_NONE);
374 	if (__predict_false(found != NULL))
375 		panic("%s: key %jx is already present", __func__,
376 		    (uintmax_t)*val);
377 	return (parentp);
378 }
379 
380 /*
381  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
382  * for neighboring entries.
383  */
384 void *
385 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
386     uint64_t **found_out)
387 {
388 	*found_out = NULL;
389 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
390 	    PCTRIE_INSERT_NEIGHBOR_NONE));
391 }
392 
393 /*
394  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
395  * greater entry.  Find a subtree that contains the next entry greater than the
396  * newly-inserted or to-be-inserted entry.
397  */
398 void *
399 pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
400     uint64_t **found_out, struct pctrie_node **neighbor_out)
401 {
402 	*found_out = NULL;
403 	*neighbor_out = NULL;
404 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
405 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
406 }
407 
408 /*
409  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
410  * lesser entry.  Find a subtree that contains the next entry less than the
411  * newly-inserted or to-be-inserted entry.
412  */
413 void *
414 pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
415     uint64_t **found_out, struct pctrie_node **neighbor_out)
416 {
417 	*found_out = NULL;
418 	*neighbor_out = NULL;
419 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
420 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
421 }
422 
423 /*
424  * Uses new node to insert key-value pair into the trie at given location.
425  */
426 void
427 pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
428 {
429 	struct pctrie_node *node;
430 	uint64_t index, newind;
431 
432 	/*
433 	 * Clear the last child pointer of the newly allocated parent.  We want
434 	 * to clear it after the final section has exited so lookup can not
435 	 * return false negatives.  It is done here because it will be
436 	 * cache-cold in the dtor callback.
437 	 */
438 	if (parent->pn_popmap != 0) {
439 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
440 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
441 		parent->pn_popmap = 0;
442 	}
443 
444 	/*
445 	 * Recover the values of the two children of the new parent node.  If
446 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
447 	 * which must be first in the node.
448 	 */
449 	index = *val;
450 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
451 	newind = *pctrie_toval(node);
452 
453 	/*
454 	 * From the highest-order bit where the indexes differ,
455 	 * compute the highest level in the trie where they differ.  Then,
456 	 * compute the least index of this subtrie.
457 	 */
458 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
459 	    "uint64 too wide");
460 	_Static_assert(sizeof(uint64_t) * NBBY <=
461 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
462 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
463 	parent->pn_owner = PCTRIE_COUNT;
464 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
465 
466 
467 	/* These writes are not yet visible due to ordering. */
468 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
469 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
470 	/* Synchronize to make the above visible. */
471 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
472 }
473 
474 /*
475  * Return the value associated with the node, if the node is a leaf that matches
476  * the index; otherwise NULL.
477  */
478 static __always_inline uint64_t *
479 pctrie_match_value(struct pctrie_node *node, uint64_t index)
480 {
481 	uint64_t *m;
482 
483 	if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
484 	    *m != index)
485 		m = NULL;
486 	return (m);
487 }
488 
489 /*
490  * Returns the value stored at the index.  If the index is not present,
491  * NULL is returned.
492  */
493 static __always_inline uint64_t *
494 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
495     enum pctrie_access access)
496 {
497 	struct pctrie_node *node;
498 	int slot;
499 
500 	node = pctrie_root_load(ptree, smr, access);
501 	/* Seek a node that matches index. */
502 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
503 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
504 	return (pctrie_match_value(node, index));
505 }
506 
507 /*
508  * Returns the value stored at the index, assuming access is externally
509  * synchronized by a lock.
510  *
511  * If the index is not present, NULL is returned.
512  */
513 uint64_t *
514 pctrie_lookup(struct pctrie *ptree, uint64_t index)
515 {
516 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
517 }
518 
519 /*
520  * Returns the value stored at the index without requiring an external lock.
521  *
522  * If the index is not present, NULL is returned.
523  */
524 uint64_t *
525 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
526 {
527 	uint64_t *res;
528 
529 	smr_enter(smr);
530 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
531 	smr_exit(smr);
532 	return (res);
533 }
534 
535 /*
536  * Returns the last node examined in the search for the index, and updates the
537  * search path to that node.
538  */
539 static __always_inline struct pctrie_node *
540 _pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
541     enum pctrie_access access)
542 {
543 	struct pctrie_node *node;
544 	int slot;
545 
546 	/*
547 	 * Climb the search path to find the lowest node from which to start the
548 	 * search for a value matching 'index'.
549 	 */
550 	while (it->top != 0) {
551 		node = it->path[it->top - 1];
552 		KASSERT(!powerof2(node->pn_popmap),
553 		    ("%s: freed node in iter path", __func__));
554 		if (!pctrie_keybarr(node, index, &slot)) {
555 			node = pctrie_node_load(
556 			    &node->pn_child[slot], smr, access);
557 			break;
558 		}
559 		--it->top;
560 	}
561 	if (it->top == 0)
562 		node = pctrie_root_load(it->ptree, smr, access);
563 
564 	/* Seek a node that matches index. */
565 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
566 		KASSERT(it->top < nitems(it->path),
567 		    ("%s: path overflow in trie %p", __func__, it->ptree));
568 		it->path[it->top++] = node;
569 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
570 	}
571 	return (node);
572 }
573 
574 /*
575  * Returns the value stored at a given index value, possibly NULL.
576  */
577 static __always_inline uint64_t *
578 _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
579     enum pctrie_access access)
580 {
581 	struct pctrie_node *node;
582 
583 	it->index = index;
584 	node = _pctrie_iter_lookup_node(it, index, smr, access);
585 	return (pctrie_match_value(node, index));
586 }
587 
588 /*
589  * Returns the value stored at a given index value, possibly NULL.
590  */
591 uint64_t *
592 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
593 {
594 	return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
595 }
596 
597 /*
598  * Insert the val in the trie, starting search with iterator.  Return a pointer
599  * to indicate where a new node must be allocated to complete insertion.
600  * Assumes access is externally synchronized by a lock.
601  */
602 void *
603 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
604 {
605 	struct pctrie_node *node;
606 
607 	it->index = *val;
608 	node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
609 	if (node == PCTRIE_NULL) {
610 		if (it->top == 0)
611 			pctrie_root_store(it->ptree,
612 			    pctrie_toleaf(val), PCTRIE_LOCKED);
613 		else
614 			pctrie_addnode(it->path[it->top - 1], it->index,
615 			    pctrie_toleaf(val), PCTRIE_LOCKED);
616 		return (NULL);
617 	}
618 	if (__predict_false(pctrie_match_value(node, it->index) != NULL))
619 		panic("%s: key %jx is already present", __func__,
620 		    (uintmax_t)it->index);
621 
622 	/*
623 	 * 'node' must be replaced in the tree with a new branch node, with
624 	 * children 'node' and 'val'. Return the place that points to 'node'
625 	 * now, and will point to to the new branching node later.
626 	 */
627 	if (it->top == 0)
628 		return ((smr_pctnode_t *)&it->ptree->pt_root);
629 	node = it->path[it->top - 1];
630 	return (&node->pn_child[pctrie_slot(node, it->index)]);
631 }
632 
633 /*
634  * Returns the value stored at a fixed offset from the current index value,
635  * possibly NULL.
636  */
637 static __always_inline uint64_t *
638 _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
639     enum pctrie_access access)
640 {
641 	uint64_t index = it->index + stride;
642 
643 	/* Detect stride overflow. */
644 	if ((stride > 0) != (index > it->index))
645 		return (NULL);
646 	/* Detect crossing limit */
647 	if ((index < it->limit) != (it->index < it->limit))
648 		return (NULL);
649 
650 	return (_pctrie_iter_lookup(it, index, smr, access));
651 }
652 
653 /*
654  * Returns the value stored at a fixed offset from the current index value,
655  * possibly NULL.
656  */
657 uint64_t *
658 pctrie_iter_stride(struct pctrie_iter *it, int stride)
659 {
660 	return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
661 }
662 
663 /*
664  * Returns the value stored at one more than the current index value, possibly
665  * NULL, assuming access is externally synchronized by a lock.
666  */
667 uint64_t *
668 pctrie_iter_next(struct pctrie_iter *it)
669 {
670 	return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
671 }
672 
673 /*
674  * Returns the value stored at one less than the current index value, possibly
675  * NULL, assuming access is externally synchronized by a lock.
676  */
677 uint64_t *
678 pctrie_iter_prev(struct pctrie_iter *it)
679 {
680 	return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
681 }
682 
683 /*
684  * Returns the value with the least index that is greater than or equal to the
685  * specified index, or NULL if there are no such values.
686  *
687  * Requires that access be externally synchronized by a lock.
688  */
689 static __inline uint64_t *
690 pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
691 {
692 	struct pctrie_node *succ;
693 	uint64_t *m;
694 	int slot;
695 
696 	/*
697 	 * Descend the trie as if performing an ordinary lookup for the
698 	 * specified value.  However, unlike an ordinary lookup, as we descend
699 	 * the trie, we use "succ" to remember the last branching-off point,
700 	 * that is, the interior node under which the least value that is both
701 	 * outside our current path down the trie and greater than the specified
702 	 * index resides.  (The node's popmap makes it fast and easy to
703 	 * recognize a branching-off point.)  If our ordinary lookup fails to
704 	 * yield a value that is greater than or equal to the specified index,
705 	 * then we will exit this loop and perform a lookup starting from
706 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
707 	 * succeed.
708 	 */
709 	succ = NULL;
710 	for (;;) {
711 		if (pctrie_isleaf(node)) {
712 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
713 				return (m);
714 			break;
715 		}
716 		if (pctrie_keybarr(node, index, &slot)) {
717 			/*
718 			 * If all values in this subtree are > index, then the
719 			 * least value in this subtree is the answer.
720 			 */
721 			if (node->pn_owner > index)
722 				succ = node;
723 			break;
724 		}
725 
726 		/*
727 		 * Just in case the next search step leads to a subtree of all
728 		 * values < index, check popmap to see if a next bigger step, to
729 		 * a subtree of all pages with values > index, is available.  If
730 		 * so, remember to restart the search here.
731 		 */
732 		if ((node->pn_popmap >> slot) > 1)
733 			succ = node;
734 		node = pctrie_node_load(&node->pn_child[slot], NULL,
735 		    PCTRIE_LOCKED);
736 	}
737 
738 	/*
739 	 * Restart the search from the last place visited in the subtree that
740 	 * included some values > index, if there was such a place.
741 	 */
742 	if (succ == NULL)
743 		return (NULL);
744 	if (succ != node) {
745 		/*
746 		 * Take a step to the next bigger sibling of the node chosen
747 		 * last time.  In that subtree, all values > index.
748 		 */
749 		slot = pctrie_slot(succ, index) + 1;
750 		KASSERT((succ->pn_popmap >> slot) != 0,
751 		    ("%s: no popmap siblings past slot %d in node %p",
752 		    __func__, slot, succ));
753 		slot += ffs(succ->pn_popmap >> slot) - 1;
754 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
755 		    PCTRIE_LOCKED);
756 	}
757 
758 	/*
759 	 * Find the value in the subtree rooted at "succ" with the least index.
760 	 */
761 	while (!pctrie_isleaf(succ)) {
762 		KASSERT(succ->pn_popmap != 0,
763 		    ("%s: no popmap children in node %p",  __func__, succ));
764 		slot = ffs(succ->pn_popmap) - 1;
765 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
766 		    PCTRIE_LOCKED);
767 	}
768 	return (pctrie_toval(succ));
769 }
770 
771 uint64_t *
772 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
773 {
774 	return (pctrie_lookup_ge_node(
775 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
776 }
777 
778 uint64_t *
779 pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
780 {
781 	if (node == NULL || index + 1 == 0)
782 		return (NULL);
783 	return (pctrie_lookup_ge_node(node, index + 1));
784 }
785 
786 /*
787  * Find first leaf >= index, and fill iter with the path to the parent of that
788  * leaf.  Return NULL if there is no such leaf less than limit.
789  */
790 uint64_t *
791 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
792 {
793 	struct pctrie_node *node;
794 	uint64_t *m;
795 	int slot;
796 
797 	/* Seek a node that matches index. */
798 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
799 
800 	/*
801 	 * If no such node was found, and instead this path leads only to nodes
802 	 * < index, back up to find a subtrie with the least value > index.
803 	 */
804 	if (pctrie_isleaf(node) ?
805 	    (m = pctrie_toval(node)) == NULL || *m < index :
806 	    node->pn_owner < index) {
807 		/* Climb the path to find a node with a descendant > index. */
808 		while (it->top != 0) {
809 			node = it->path[it->top - 1];
810 			slot = pctrie_slot(node, index) + 1;
811 			if ((node->pn_popmap >> slot) != 0)
812 				break;
813 			--it->top;
814 		}
815 		if (it->top == 0)
816 			return (NULL);
817 
818 		/* Step to the least child with a descendant > index. */
819 		slot += ffs(node->pn_popmap >> slot) - 1;
820 		node = pctrie_node_load(&node->pn_child[slot], NULL,
821 		    PCTRIE_LOCKED);
822 	}
823 	/* Descend to the least leaf of the subtrie. */
824 	while (!pctrie_isleaf(node)) {
825 		if (it->limit != 0 && node->pn_owner >= it->limit)
826 			return (NULL);
827 		slot = ffs(node->pn_popmap) - 1;
828 		KASSERT(it->top < nitems(it->path),
829 		    ("%s: path overflow in trie %p", __func__, it->ptree));
830 		it->path[it->top++] = node;
831 		node = pctrie_node_load(&node->pn_child[slot], NULL,
832 		    PCTRIE_LOCKED);
833 	}
834 	m = pctrie_toval(node);
835 	if (it->limit != 0 && *m >= it->limit)
836 		return (NULL);
837 	it->index = *m;
838 	return (m);
839 }
840 
841 /*
842  * Find the first leaf with value at least 'jump' greater than the previous
843  * leaf.  Return NULL if that value is >= limit.
844  */
845 uint64_t *
846 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
847 {
848 	uint64_t index = it->index + jump;
849 
850 	/* Detect jump overflow. */
851 	if ((jump > 0) != (index > it->index))
852 		return (NULL);
853 	if (it->limit != 0 && index >= it->limit)
854 		return (NULL);
855 	return (pctrie_iter_lookup_ge(it, index));
856 }
857 
858 #ifdef INVARIANTS
859 void
860 pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
861     struct pctrie *ptree, uint64_t *res)
862 {
863 	uint64_t *expected;
864 
865 	if (index + 1 == 0)
866 		expected = NULL;
867 	else
868 		expected = pctrie_lookup_ge(ptree, index + 1);
869 	KASSERT(res == expected,
870 	    ("pctrie subtree lookup gt result different from root lookup: "
871 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
872 	    (uintmax_t)index, node, res, expected));
873 }
874 #endif
875 
876 /*
877  * Returns the value with the greatest index that is less than or equal to the
878  * specified index, or NULL if there are no such values.
879  *
880  * Requires that access be externally synchronized by a lock.
881  */
882 static __inline uint64_t *
883 pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
884 {
885 	struct pctrie_node *pred;
886 	uint64_t *m;
887 	int slot;
888 
889 	/*
890 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
891 	 */
892 	pred = NULL;
893 	for (;;) {
894 		if (pctrie_isleaf(node)) {
895 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
896 				return (m);
897 			break;
898 		}
899 		if (pctrie_keybarr(node, index, &slot)) {
900 			if (node->pn_owner < index)
901 				pred = node;
902 			break;
903 		}
904 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
905 			pred = node;
906 		node = pctrie_node_load(&node->pn_child[slot], NULL,
907 		    PCTRIE_LOCKED);
908 	}
909 	if (pred == NULL)
910 		return (NULL);
911 	if (pred != node) {
912 		slot = pctrie_slot(pred, index);
913 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
914 		    ("%s: no popmap siblings before slot %d in node %p",
915 		    __func__, slot, pred));
916 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
917 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
918 		    PCTRIE_LOCKED);
919 	}
920 	while (!pctrie_isleaf(pred)) {
921 		KASSERT(pred->pn_popmap != 0,
922 		    ("%s: no popmap children in node %p",  __func__, pred));
923 		slot = ilog2(pred->pn_popmap);
924 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
925 		    PCTRIE_LOCKED);
926 	}
927 	return (pctrie_toval(pred));
928 }
929 
930 uint64_t *
931 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
932 {
933 	return (pctrie_lookup_le_node(
934 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
935 }
936 
937 uint64_t *
938 pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
939 {
940 	if (node == NULL || index == 0)
941 		return (NULL);
942 	return (pctrie_lookup_le_node(node, index - 1));
943 }
944 
945 /*
946  * Find first leaf <= index, and fill iter with the path to the parent of that
947  * leaf.  Return NULL if there is no such leaf greater than limit.
948  */
949 uint64_t *
950 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
951 {
952 	struct pctrie_node *node;
953 	uint64_t *m;
954 	int slot;
955 
956 	/* Seek a node that matches index. */
957 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
958 
959 	/*
960 	 * If no such node was found, and instead this path leads only to nodes
961 	 * > index, back up to find a subtrie with the least value > index.
962 	 */
963 	if (pctrie_isleaf(node) ?
964 	    (m = pctrie_toval(node)) == NULL || *m > index :
965 	    node->pn_owner > index) {
966 		/* Climb the path to find a node with a descendant < index. */
967 		while (it->top != 0) {
968 			node = it->path[it->top - 1];
969 			slot = pctrie_slot(node, index);
970 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
971 				break;
972 			--it->top;
973 		}
974 		if (it->top == 0)
975 			return (NULL);
976 
977 		/* Step to the greatest child with a descendant < index. */
978 		slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
979 		node = pctrie_node_load(&node->pn_child[slot], NULL,
980 		    PCTRIE_LOCKED);
981 	}
982 	/* Descend to the greatest leaf of the subtrie. */
983 	while (!pctrie_isleaf(node)) {
984 		if (it->limit != 0 && it->limit >=
985 		    node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
986 			return (NULL);
987 		slot = ilog2(node->pn_popmap);
988 		KASSERT(it->top < nitems(it->path),
989 		    ("%s: path overflow in trie %p", __func__, it->ptree));
990 		it->path[it->top++] = node;
991 		node = pctrie_node_load(&node->pn_child[slot], NULL,
992 		    PCTRIE_LOCKED);
993 	}
994 	m = pctrie_toval(node);
995 	if (it->limit != 0 && *m <= it->limit)
996 		return (NULL);
997 	it->index = *m;
998 	return (m);
999 }
1000 
1001 /*
1002  * Find the first leaf with value at most 'jump' less than the previous
1003  * leaf.  Return NULL if that value is <= limit.
1004  */
1005 uint64_t *
1006 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
1007 {
1008 	uint64_t index = it->index - jump;
1009 
1010 	/* Detect jump overflow. */
1011 	if ((jump > 0) != (index < it->index))
1012 		return (NULL);
1013 	if (it->limit != 0 && index <= it->limit)
1014 		return (NULL);
1015 	return (pctrie_iter_lookup_le(it, index));
1016 }
1017 
1018 #ifdef INVARIANTS
1019 void
1020 pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
1021     struct pctrie *ptree, uint64_t *res)
1022 {
1023 	uint64_t *expected;
1024 
1025 	if (index == 0)
1026 		expected = NULL;
1027 	else
1028 		expected = pctrie_lookup_le(ptree, index - 1);
1029 	KASSERT(res == expected,
1030 	    ("pctrie subtree lookup lt result different from root lookup: "
1031 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
1032 	    (uintmax_t)index, node, res, expected));
1033 }
1034 #endif
1035 
1036 static void
1037 pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
1038     struct pctrie_node *node, struct pctrie_node **freenode)
1039 {
1040 	struct pctrie_node *child;
1041 	int slot;
1042 
1043 	if (node == NULL) {
1044 		pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED);
1045 		return;
1046 	}
1047 	slot = pctrie_slot(node, index);
1048 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
1049 	    ("%s: bad popmap slot %d in node %p",
1050 	    __func__, slot, node));
1051 	node->pn_popmap ^= 1 << slot;
1052 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
1053 	if (!powerof2(node->pn_popmap))
1054 		return;
1055 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
1056 	slot = ffs(node->pn_popmap) - 1;
1057 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
1058 	KASSERT(child != PCTRIE_NULL,
1059 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
1060 	if (parent == NULL)
1061 		pctrie_root_store(ptree, child, PCTRIE_LOCKED);
1062 	else {
1063 		slot = pctrie_slot(parent, index);
1064 		KASSERT(node ==
1065 		    pctrie_node_load(&parent->pn_child[slot], NULL,
1066 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
1067 		pctrie_node_store(&parent->pn_child[slot], child,
1068 		    PCTRIE_LOCKED);
1069 	}
1070 	/*
1071 	 * The child is still valid and we can not zero the
1072 	 * pointer until all SMR references are gone.
1073 	 */
1074 	pctrie_node_put(node);
1075 	*freenode = node;
1076 }
1077 
1078 /*
1079  * Remove the specified index from the tree, and return the value stored at
1080  * that index.  If the index is not present, return NULL.
1081  */
1082 uint64_t *
1083 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
1084     struct pctrie_node **freenode)
1085 {
1086 	struct pctrie_node *child, *node, *parent;
1087 	uint64_t *m;
1088 	int slot;
1089 
1090 	DEBUG_POISON_POINTER(parent);
1091 	*freenode = node = NULL;
1092 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1093 	while (!pctrie_isleaf(child)) {
1094 		parent = node;
1095 		node = child;
1096 		slot = pctrie_slot(node, index);
1097 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1098 		    PCTRIE_LOCKED);
1099 	}
1100 	m = pctrie_match_value(child, index);
1101 	if (m != NULL)
1102 		pctrie_remove(ptree, index, parent, node, freenode);
1103 	return (m);
1104 }
1105 
1106 /*
1107  * Remove from the trie the leaf last chosen by the iterator, and
1108  * adjust the path if it's last member is to be freed.
1109  */
1110 uint64_t *
1111 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
1112 {
1113 	struct pctrie_node *child, *node, *parent;
1114 	uint64_t *m;
1115 	int slot;
1116 
1117 	DEBUG_POISON_POINTER(parent);
1118 	*freenode = NULL;
1119 	if (it->top >= 1) {
1120 		parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
1121 		node = it->path[it->top - 1];
1122 		slot = pctrie_slot(node, it->index);
1123 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1124 		    PCTRIE_LOCKED);
1125 	} else {
1126 		node = NULL;
1127 		child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
1128 	}
1129 	m = pctrie_match_value(child, it->index);
1130 	if (m != NULL)
1131 		pctrie_remove(it->ptree, it->index, parent, node, freenode);
1132 	if (*freenode != NULL)
1133 		--it->top;
1134 	return (m);
1135 }
1136 
1137 /*
1138  * Return the current leaf, assuming access is externally synchronized by a
1139  * lock.
1140  */
1141 uint64_t *
1142 pctrie_iter_value(struct pctrie_iter *it)
1143 {
1144 	struct pctrie_node *node;
1145 	int slot;
1146 
1147 	if (it->top == 0)
1148 		node = pctrie_root_load(it->ptree, NULL,
1149 		    PCTRIE_LOCKED);
1150 	else {
1151 		node = it->path[it->top - 1];
1152 		slot = pctrie_slot(node, it->index);
1153 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1154 		    PCTRIE_LOCKED);
1155 	}
1156 	return (pctrie_toval(node));
1157 }
1158 
1159 /*
1160  * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
1161  * using the leftmost child pointer for path reversal, until an interior node
1162  * is stripped of all children, and returned for deallocation, with *pnode left
1163  * pointing to the parent of that node.
1164  */
1165 static __always_inline struct pctrie_node *
1166 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent,
1167     pctrie_cb_t callback, int keyoff, void *arg)
1168 {
1169 	struct pctrie_node *child, *node;
1170 	int slot;
1171 
1172 	node = *pnode;
1173 	while (node->pn_popmap != 0) {
1174 		slot = ffs(node->pn_popmap) - 1;
1175 		node->pn_popmap ^= 1 << slot;
1176 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1177 		    PCTRIE_UNSERIALIZED);
1178 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
1179 		    PCTRIE_UNSERIALIZED);
1180 		if (pctrie_isleaf(child)) {
1181 			if (callback != NULL)
1182 				callback(pctrie_toptr(child, keyoff), arg);
1183 			continue;
1184 		}
1185 		/* Climb one level down the trie. */
1186 		pctrie_node_store(&node->pn_child[0], parent,
1187 		    PCTRIE_UNSERIALIZED);
1188 		parent = node;
1189 		node = child;
1190 	}
1191 	*pnode = parent;
1192 	return (node);
1193 }
1194 
1195 /*
1196  * Recover the node parent from its first child and continue pruning.
1197  */
1198 static __always_inline struct pctrie_node *
1199 pctrie_reclaim_resume_compound(struct pctrie_node **pnode,
1200     pctrie_cb_t callback, int keyoff, void *arg)
1201 {
1202 	struct pctrie_node *parent, *node;
1203 
1204 	node = *pnode;
1205 	if (node == NULL)
1206 		return (NULL);
1207 	/* Climb one level up the trie. */
1208 	parent = pctrie_node_load(&node->pn_child[0], NULL,
1209 	    PCTRIE_UNSERIALIZED);
1210 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1211 	return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg));
1212 }
1213 
1214 /*
1215  * Find the trie root, and start pruning with a NULL parent.
1216  */
1217 static __always_inline struct pctrie_node *
1218 pctrie_reclaim_begin_compound(struct pctrie_node **pnode,
1219     struct pctrie *ptree,
1220     pctrie_cb_t callback, int keyoff, void *arg)
1221 {
1222 	struct pctrie_node *node;
1223 
1224 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
1225 	pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1226 	if (pctrie_isleaf(node)) {
1227 		if (callback != NULL && node != PCTRIE_NULL)
1228 			callback(pctrie_toptr(node, keyoff), arg);
1229 		return (NULL);
1230 	}
1231 	*pnode = node;
1232 	return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg));
1233 }
1234 
1235 struct pctrie_node *
1236 pctrie_reclaim_resume(struct pctrie_node **pnode)
1237 {
1238 	return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL));
1239 }
1240 
1241 struct pctrie_node *
1242 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree)
1243 {
1244 	return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL));
1245 }
1246 
1247 struct pctrie_node *
1248 pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
1249     pctrie_cb_t callback, int keyoff, void *arg)
1250 {
1251 	return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg));
1252 }
1253 
1254 struct pctrie_node *
1255 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree,
1256     pctrie_cb_t callback, int keyoff, void *arg)
1257 {
1258 	return (pctrie_reclaim_begin_compound(pnode, ptree,
1259 	    callback, keyoff, arg));
1260 }
1261 
1262 /*
1263  * Replace an existing value in the trie with another one.
1264  * Panics if there is not an old value in the trie at the new value's index.
1265  */
1266 uint64_t *
1267 pctrie_replace(struct pctrie *ptree, uint64_t *newval)
1268 {
1269 	struct pctrie_node *leaf, *parent, *node;
1270 	uint64_t *m;
1271 	uint64_t index;
1272 	int slot;
1273 
1274 	leaf = pctrie_toleaf(newval);
1275 	index = *newval;
1276 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1277 	parent = NULL;
1278 	for (;;) {
1279 		if (pctrie_isleaf(node)) {
1280 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
1281 				if (parent == NULL)
1282 					pctrie_root_store(ptree,
1283 					    leaf, PCTRIE_LOCKED);
1284 				else
1285 					pctrie_node_store(
1286 					    &parent->pn_child[slot], leaf,
1287 					    PCTRIE_LOCKED);
1288 				return (m);
1289 			}
1290 			break;
1291 		}
1292 		if (pctrie_keybarr(node, index, &slot))
1293 			break;
1294 		parent = node;
1295 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1296 		    PCTRIE_LOCKED);
1297 	}
1298 	panic("%s: original replacing value not found", __func__);
1299 }
1300 
1301 #ifdef DDB
1302 /*
1303  * Show details about the given node.
1304  */
1305 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
1306 {
1307 	struct pctrie_node *node, *tmp;
1308 	int slot;
1309 	pn_popmap_t popmap;
1310 
1311         if (!have_addr)
1312                 return;
1313 	node = (struct pctrie_node *)addr;
1314 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
1315 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
1316 	    node->pn_clev / PCTRIE_WIDTH);
1317 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
1318 		slot = ffs(popmap) - 1;
1319 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
1320 		    PCTRIE_UNSERIALIZED);
1321 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
1322 		    slot, (void *)tmp,
1323 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
1324 		    node->pn_clev / PCTRIE_WIDTH);
1325 	}
1326 }
1327 #endif /* DDB */
1328