xref: /freebsd/sys/kern/subr_pctrie.c (revision 47ef2a131091508e049ab10cad7f91a3c1342cd9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 EMC Corp.
5  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  */
31 
32 /*
33  * Path-compressed radix trie implementation.
34  *
35  * The implementation takes into account the following rationale:
36  * - Size of the nodes should be as small as possible but still big enough
37  *   to avoid a large maximum depth for the trie.  This is a balance
38  *   between the necessity to not wire too much physical memory for the nodes
39  *   and the necessity to avoid too much cache pollution during the trie
40  *   operations.
41  * - There is not a huge bias toward the number of lookup operations over
42  *   the number of insert and remove operations.  This basically implies
43  *   that optimizations supposedly helping one operation but hurting the
44  *   other might be carefully evaluated.
45  * - On average not many nodes are expected to be fully populated, hence
46  *   level compression may just complicate things.
47  */
48 
49 #include <sys/cdefs.h>
50 #include "opt_ddb.h"
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/libkern.h>
56 #include <sys/pctrie.h>
57 #include <sys/proc.h>	/* smr.h depends on struct thread. */
58 #include <sys/smr.h>
59 #include <sys/smr_types.h>
60 
61 #ifdef DDB
62 #include <ddb/ddb.h>
63 #endif
64 
65 #if PCTRIE_WIDTH == 3
66 typedef uint8_t pn_popmap_t;
67 #elif PCTRIE_WIDTH == 4
68 typedef uint16_t pn_popmap_t;
69 #elif PCTRIE_WIDTH == 5
70 typedef uint32_t pn_popmap_t;
71 #else
72 #error Unsupported width
73 #endif
74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
75     "pn_popmap_t too wide");
76 
77 struct pctrie_node;
78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
79 
80 struct pctrie_node {
81 	uint64_t	pn_owner;			/* Owner of record. */
82 	pn_popmap_t	pn_popmap;			/* Valid children. */
83 	uint8_t		pn_clev;			/* Level * WIDTH. */
84 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
85 };
86 
87 /*
88  * Map index to an array position for the children of node,
89  */
90 static __inline int
91 pctrie_slot(struct pctrie_node *node, uint64_t index)
92 {
93 	return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
94 }
95 
96 /*
97  * Returns true if index does not belong to the specified node.  Otherwise,
98  * sets slot value, and returns false.
99  */
100 static __inline bool
101 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
102 {
103 	index = (index - node->pn_owner) >> node->pn_clev;
104 	if (index >= PCTRIE_COUNT)
105 		return (true);
106 	*slot = index;
107 	return (false);
108 }
109 
110 /*
111  * Check radix node.
112  */
113 static __inline void
114 pctrie_node_put(struct pctrie_node *node)
115 {
116 #ifdef INVARIANTS
117 	int slot;
118 
119 	KASSERT(powerof2(node->pn_popmap),
120 	    ("pctrie_node_put: node %p has too many children %04x", node,
121 	    node->pn_popmap));
122 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
123 		if ((node->pn_popmap & (1 << slot)) != 0)
124 			continue;
125 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
126 		    PCTRIE_NULL,
127 		    ("pctrie_node_put: node %p has a child", node));
128 	}
129 #endif
130 }
131 
132 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
133 
134 /*
135  * Fetch a node pointer from a slot.
136  */
137 static __inline struct pctrie_node *
138 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
139 {
140 	switch (access) {
141 	case PCTRIE_UNSERIALIZED:
142 		return (smr_unserialized_load(p, true));
143 	case PCTRIE_LOCKED:
144 		return (smr_serialized_load(p, true));
145 	case PCTRIE_SMR:
146 		return (smr_entered_load(p, smr));
147 	}
148 	__assert_unreachable();
149 }
150 
151 static __inline void
152 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
153 {
154 	switch (access) {
155 	case PCTRIE_UNSERIALIZED:
156 		smr_unserialized_store(p, v, true);
157 		break;
158 	case PCTRIE_LOCKED:
159 		smr_serialized_store(p, v, true);
160 		break;
161 	case PCTRIE_SMR:
162 		panic("%s: Not supported in SMR section.", __func__);
163 		break;
164 	default:
165 		__assert_unreachable();
166 		break;
167 	}
168 }
169 
170 /*
171  * Get the root node for a tree.
172  */
173 static __inline struct pctrie_node *
174 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
175 {
176 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
177 }
178 
179 /*
180  * Set the root node for a tree.
181  */
182 static __inline void
183 pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
184     enum pctrie_access access)
185 {
186 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
187 }
188 
189 /*
190  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
191  */
192 static __inline bool
193 pctrie_isleaf(struct pctrie_node *node)
194 {
195 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
196 }
197 
198 /*
199  * Returns val with leaf bit set.
200  */
201 static __inline void *
202 pctrie_toleaf(uint64_t *val)
203 {
204 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
205 }
206 
207 /*
208  * Returns the associated val extracted from node.
209  */
210 static __inline uint64_t *
211 pctrie_toval(struct pctrie_node *node)
212 {
213 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
214 }
215 
216 /*
217  * Returns the associated pointer extracted from node and field offset.
218  */
219 static __inline void *
220 pctrie_toptr(struct pctrie_node *node, int keyoff)
221 {
222 	return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff));
223 }
224 
225 /*
226  * Make 'child' a child of 'node'.
227  */
228 static __inline void
229 pctrie_addnode(struct pctrie_node *node, uint64_t index,
230     struct pctrie_node *child, enum pctrie_access access)
231 {
232 	int slot;
233 
234 	slot = pctrie_slot(node, index);
235 	pctrie_node_store(&node->pn_child[slot], child, access);
236 	node->pn_popmap ^= 1 << slot;
237 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
238 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
239 }
240 
241 /*
242  * pctrie node zone initializer.
243  */
244 int
245 pctrie_zone_init(void *mem, int size __unused, int flags __unused)
246 {
247 	struct pctrie_node *node;
248 
249 	node = mem;
250 	node->pn_popmap = 0;
251 	for (int i = 0; i < nitems(node->pn_child); i++)
252 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
253 		    PCTRIE_UNSERIALIZED);
254 	return (0);
255 }
256 
257 size_t
258 pctrie_node_size(void)
259 {
260 
261 	return (sizeof(struct pctrie_node));
262 }
263 
264 enum pctrie_insert_neighbor_mode {
265 	PCTRIE_INSERT_NEIGHBOR_NONE,
266 	PCTRIE_INSERT_NEIGHBOR_LT,
267 	PCTRIE_INSERT_NEIGHBOR_GT,
268 };
269 
270 /*
271  * Look for where to insert the key-value pair into the trie.  Complete the
272  * insertion if it replaces a null leaf.  Return the insertion location if the
273  * insertion needs to be completed by the caller; otherwise return NULL.
274  *
275  * If the key is already present in the trie, populate *found_out as if by
276  * pctrie_lookup().
277  *
278  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
279  * *neighbor_out to the lowest level node we encounter during the insert lookup
280  * that is a parent of the next greater or lesser entry.  The value is not
281  * defined if the key was already present in the trie.
282  *
283  * Note that mode is expected to be a compile-time constant, and this procedure
284  * is expected to be inlined into callers with extraneous code optimized out.
285  */
286 static __always_inline void *
287 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
288     uint64_t **found_out, struct pctrie_node **neighbor_out,
289     enum pctrie_insert_neighbor_mode mode)
290 {
291 	uint64_t index;
292 	struct pctrie_node *node, *parent;
293 	int slot;
294 
295 	index = *val;
296 
297 	/*
298 	 * The owner of record for root is not really important because it
299 	 * will never be used.
300 	 */
301 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
302 	parent = NULL;
303 	for (;;) {
304 		if (pctrie_isleaf(node)) {
305 			if (node == PCTRIE_NULL) {
306 				if (parent == NULL)
307 					pctrie_root_store(ptree,
308 					    pctrie_toleaf(val), PCTRIE_LOCKED);
309 				else
310 					pctrie_addnode(parent, index,
311 					    pctrie_toleaf(val), PCTRIE_LOCKED);
312 				return (NULL);
313 			}
314 			if (*pctrie_toval(node) == index) {
315 				*found_out = pctrie_toval(node);
316 				return (NULL);
317 			}
318 			break;
319 		}
320 		if (pctrie_keybarr(node, index, &slot))
321 			break;
322 		/*
323 		 * Descend.  If we're tracking the next neighbor and this node
324 		 * contains a neighboring entry in the right direction, record
325 		 * it.
326 		 */
327 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
328 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
329 				*neighbor_out = node;
330 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
331 			if ((node->pn_popmap >> slot) > 1)
332 				*neighbor_out = node;
333 		}
334 		parent = node;
335 		node = pctrie_node_load(&node->pn_child[slot], NULL,
336 		    PCTRIE_LOCKED);
337 	}
338 
339 	/*
340 	 * The caller will split this node.  If we're tracking the next
341 	 * neighbor, record the old node if the old entry is in the right
342 	 * direction.
343 	 */
344 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
345 		if (*pctrie_toval(node) < index)
346 			*neighbor_out = node;
347 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
348 		if (*pctrie_toval(node) > index)
349 			*neighbor_out = node;
350 	}
351 
352 	/*
353 	 * 'node' must be replaced in the tree with a new branch node, with
354 	 * children 'node' and 'val'. Return the place that points to 'node'
355 	 * now, and will point to to the new branching node later.
356 	 */
357 	return ((parent != NULL) ? &parent->pn_child[slot]:
358 	    (smr_pctnode_t *)&ptree->pt_root);
359 }
360 
361 /*
362  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
363  * if the key already exists, and do not look for neighboring entries.
364  */
365 void *
366 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
367 {
368 	void *parentp;
369 	uint64_t *found;
370 
371 	found = NULL;
372 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
373 	    PCTRIE_INSERT_NEIGHBOR_NONE);
374 	if (__predict_false(found != NULL))
375 		panic("%s: key %jx is already present", __func__,
376 		    (uintmax_t)*val);
377 	return (parentp);
378 }
379 
380 /*
381  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
382  * for neighboring entries.
383  */
384 void *
385 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
386     uint64_t **found_out)
387 {
388 	*found_out = NULL;
389 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
390 	    PCTRIE_INSERT_NEIGHBOR_NONE));
391 }
392 
393 /*
394  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
395  * greater entry.  Find a subtree that contains the next entry greater than the
396  * newly-inserted or to-be-inserted entry.
397  */
398 void *
399 pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
400     uint64_t **found_out, struct pctrie_node **neighbor_out)
401 {
402 	*found_out = NULL;
403 	*neighbor_out = NULL;
404 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
405 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
406 }
407 
408 /*
409  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
410  * lesser entry.  Find a subtree that contains the next entry less than the
411  * newly-inserted or to-be-inserted entry.
412  */
413 void *
414 pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
415     uint64_t **found_out, struct pctrie_node **neighbor_out)
416 {
417 	*found_out = NULL;
418 	*neighbor_out = NULL;
419 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
420 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
421 }
422 
423 /*
424  * Uses new node to insert key-value pair into the trie at given location.
425  */
426 void
427 pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
428 {
429 	struct pctrie_node *node;
430 	uint64_t index, newind;
431 
432 	/*
433 	 * Clear the last child pointer of the newly allocated parent.  We want
434 	 * to clear it after the final section has exited so lookup can not
435 	 * return false negatives.  It is done here because it will be
436 	 * cache-cold in the dtor callback.
437 	 */
438 	if (parent->pn_popmap != 0) {
439 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
440 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
441 		parent->pn_popmap = 0;
442 	}
443 
444 	/*
445 	 * Recover the values of the two children of the new parent node.  If
446 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
447 	 * which must be first in the node.
448 	 */
449 	index = *val;
450 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
451 	newind = *pctrie_toval(node);
452 
453 	/*
454 	 * From the highest-order bit where the indexes differ,
455 	 * compute the highest level in the trie where they differ.  Then,
456 	 * compute the least index of this subtrie.
457 	 */
458 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
459 	    "uint64 too wide");
460 	_Static_assert(sizeof(uint64_t) * NBBY <=
461 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
462 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
463 	parent->pn_owner = PCTRIE_COUNT;
464 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
465 
466 
467 	/* These writes are not yet visible due to ordering. */
468 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
469 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
470 	/* Synchronize to make the above visible. */
471 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
472 }
473 
474 /*
475  * Return the value associated with the node, if the node is a leaf that matches
476  * the index; otherwise NULL.
477  */
478 static __always_inline uint64_t *
479 pctrie_match_value(struct pctrie_node *node, uint64_t index)
480 {
481 	uint64_t *m;
482 
483 	if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
484 	    *m != index)
485 		m = NULL;
486 	return (m);
487 }
488 
489 /*
490  * Returns the value stored at the index.  If the index is not present,
491  * NULL is returned.
492  */
493 static __always_inline uint64_t *
494 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
495     enum pctrie_access access)
496 {
497 	struct pctrie_node *node;
498 	int slot;
499 
500 	node = pctrie_root_load(ptree, smr, access);
501 	/* Seek a node that matches index. */
502 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
503 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
504 	return (pctrie_match_value(node, index));
505 }
506 
507 /*
508  * Returns the value stored at the index, assuming access is externally
509  * synchronized by a lock.
510  *
511  * If the index is not present, NULL is returned.
512  */
513 uint64_t *
514 pctrie_lookup(struct pctrie *ptree, uint64_t index)
515 {
516 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
517 }
518 
519 /*
520  * Returns the value stored at the index without requiring an external lock.
521  *
522  * If the index is not present, NULL is returned.
523  */
524 uint64_t *
525 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
526 {
527 	uint64_t *res;
528 
529 	smr_enter(smr);
530 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
531 	smr_exit(smr);
532 	return (res);
533 }
534 
535 /*
536  * Returns the last node examined in the search for the index, and updates the
537  * search path to that node.
538  */
539 static __always_inline struct pctrie_node *
540 _pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
541     enum pctrie_access access)
542 {
543 	struct pctrie_node *node;
544 	int slot;
545 
546 	/*
547 	 * Climb the search path to find the lowest node from which to start the
548 	 * search for a value matching 'index'.
549 	 */
550 	while (it->top != 0) {
551 		node = it->path[it->top - 1];
552 		KASSERT(!powerof2(node->pn_popmap),
553 		    ("%s: freed node in iter path", __func__));
554 		if (!pctrie_keybarr(node, index, &slot)) {
555 			node = pctrie_node_load(
556 			    &node->pn_child[slot], smr, access);
557 			break;
558 		}
559 		--it->top;
560 	}
561 	if (it->top == 0)
562 		node = pctrie_root_load(it->ptree, smr, access);
563 
564 	/* Seek a node that matches index. */
565 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
566 		KASSERT(it->top < nitems(it->path),
567 		    ("%s: path overflow in trie %p", __func__, it->ptree));
568 		it->path[it->top++] = node;
569 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
570 	}
571 	return (node);
572 }
573 
574 /*
575  * Returns the value stored at a given index value, possibly NULL.
576  */
577 static __always_inline uint64_t *
578 _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
579     enum pctrie_access access)
580 {
581 	struct pctrie_node *node;
582 
583 	it->index = index;
584 	node = _pctrie_iter_lookup_node(it, index, smr, access);
585 	return (pctrie_match_value(node, index));
586 }
587 
588 /*
589  * Returns the value stored at a given index value, possibly NULL.
590  */
591 uint64_t *
592 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
593 {
594 	return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
595 }
596 
597 /*
598  * Insert the val in the trie, starting search with iterator.  Return a pointer
599  * to indicate where a new node must be allocated to complete insertion.
600  * Assumes access is externally synchronized by a lock.
601  */
602 void *
603 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
604 {
605 	struct pctrie_node *node;
606 
607 	it->index = *val;
608 	node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
609 	if (node == PCTRIE_NULL) {
610 		if (it->top == 0)
611 			pctrie_root_store(it->ptree,
612 			    pctrie_toleaf(val), PCTRIE_LOCKED);
613 		else
614 			pctrie_addnode(it->path[it->top - 1], it->index,
615 			    pctrie_toleaf(val), PCTRIE_LOCKED);
616 		return (NULL);
617 	}
618 	if (__predict_false(pctrie_match_value(node, it->index) != NULL))
619 		panic("%s: key %jx is already present", __func__,
620 		    (uintmax_t)it->index);
621 
622 	/*
623 	 * 'node' must be replaced in the tree with a new branch node, with
624 	 * children 'node' and 'val'. Return the place that points to 'node'
625 	 * now, and will point to to the new branching node later.
626 	 */
627 	if (it->top == 0)
628 		return ((smr_pctnode_t *)&it->ptree->pt_root);
629 	node = it->path[it->top - 1];
630 	return (&node->pn_child[pctrie_slot(node, it->index)]);
631 }
632 
633 /*
634  * Returns the value stored at a fixed offset from the current index value,
635  * possibly NULL.
636  */
637 static __always_inline uint64_t *
638 _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
639     enum pctrie_access access)
640 {
641 	uint64_t index = it->index + stride;
642 
643 	/* Detect stride overflow. */
644 	if ((stride > 0) != (index > it->index))
645 		return (NULL);
646 	/* Detect crossing limit */
647 	if ((index < it->limit) != (it->index < it->limit))
648 		return (NULL);
649 
650 	return (_pctrie_iter_lookup(it, index, smr, access));
651 }
652 
653 /*
654  * Returns the value stored at a fixed offset from the current index value,
655  * possibly NULL.
656  */
657 uint64_t *
658 pctrie_iter_stride(struct pctrie_iter *it, int stride)
659 {
660 	return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
661 }
662 
663 /*
664  * Returns the value stored at one more than the current index value, possibly
665  * NULL, assuming access is externally synchronized by a lock.
666  */
667 uint64_t *
668 pctrie_iter_next(struct pctrie_iter *it)
669 {
670 	return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
671 }
672 
673 /*
674  * Returns the value stored at one less than the current index value, possibly
675  * NULL, assuming access is externally synchronized by a lock.
676  */
677 uint64_t *
678 pctrie_iter_prev(struct pctrie_iter *it)
679 {
680 	return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
681 }
682 
683 /*
684  * Returns the value with the least index that is greater than or equal to the
685  * specified index, or NULL if there are no such values.
686  *
687  * Requires that access be externally synchronized by a lock.
688  */
689 static __inline uint64_t *
690 pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
691 {
692 	struct pctrie_node *succ;
693 	uint64_t *m;
694 	int slot;
695 
696 	/*
697 	 * Descend the trie as if performing an ordinary lookup for the
698 	 * specified value.  However, unlike an ordinary lookup, as we descend
699 	 * the trie, we use "succ" to remember the last branching-off point,
700 	 * that is, the interior node under which the least value that is both
701 	 * outside our current path down the trie and greater than the specified
702 	 * index resides.  (The node's popmap makes it fast and easy to
703 	 * recognize a branching-off point.)  If our ordinary lookup fails to
704 	 * yield a value that is greater than or equal to the specified index,
705 	 * then we will exit this loop and perform a lookup starting from
706 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
707 	 * succeed.
708 	 */
709 	succ = NULL;
710 	for (;;) {
711 		if (pctrie_isleaf(node)) {
712 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
713 				return (m);
714 			break;
715 		}
716 		if (pctrie_keybarr(node, index, &slot)) {
717 			/*
718 			 * If all values in this subtree are > index, then the
719 			 * least value in this subtree is the answer.
720 			 */
721 			if (node->pn_owner > index)
722 				succ = node;
723 			break;
724 		}
725 
726 		/*
727 		 * Just in case the next search step leads to a subtree of all
728 		 * values < index, check popmap to see if a next bigger step, to
729 		 * a subtree of all pages with values > index, is available.  If
730 		 * so, remember to restart the search here.
731 		 */
732 		if ((node->pn_popmap >> slot) > 1)
733 			succ = node;
734 		node = pctrie_node_load(&node->pn_child[slot], NULL,
735 		    PCTRIE_LOCKED);
736 	}
737 
738 	/*
739 	 * Restart the search from the last place visited in the subtree that
740 	 * included some values > index, if there was such a place.
741 	 */
742 	if (succ == NULL)
743 		return (NULL);
744 	if (succ != node) {
745 		/*
746 		 * Take a step to the next bigger sibling of the node chosen
747 		 * last time.  In that subtree, all values > index.
748 		 */
749 		slot = pctrie_slot(succ, index) + 1;
750 		KASSERT((succ->pn_popmap >> slot) != 0,
751 		    ("%s: no popmap siblings past slot %d in node %p",
752 		    __func__, slot, succ));
753 		slot += ffs(succ->pn_popmap >> slot) - 1;
754 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
755 		    PCTRIE_LOCKED);
756 	}
757 
758 	/*
759 	 * Find the value in the subtree rooted at "succ" with the least index.
760 	 */
761 	while (!pctrie_isleaf(succ)) {
762 		KASSERT(succ->pn_popmap != 0,
763 		    ("%s: no popmap children in node %p",  __func__, succ));
764 		slot = ffs(succ->pn_popmap) - 1;
765 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
766 		    PCTRIE_LOCKED);
767 	}
768 	return (pctrie_toval(succ));
769 }
770 
771 uint64_t *
772 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
773 {
774 	return (pctrie_lookup_ge_node(
775 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
776 }
777 
778 uint64_t *
779 pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
780 {
781 	if (node == NULL || index + 1 == 0)
782 		return (NULL);
783 	return (pctrie_lookup_ge_node(node, index + 1));
784 }
785 
786 /*
787  * Find first leaf >= index, and fill iter with the path to the parent of that
788  * leaf.  Return NULL if there is no such leaf less than limit.
789  */
790 uint64_t *
791 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
792 {
793 	struct pctrie_node *node;
794 	uint64_t *m;
795 	int slot;
796 
797 	/* Seek a node that matches index. */
798 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
799 
800 	/*
801 	 * If no such node was found, and instead this path leads only to nodes
802 	 * < index, back up to find a subtrie with the least value > index.
803 	 */
804 	if (node == PCTRIE_NULL || *pctrie_toval(node) < index) {
805 		/* Climb the path to find a node with a descendant > index. */
806 		while (it->top != 0) {
807 			node = it->path[it->top - 1];
808 			slot = pctrie_slot(node, index) + 1;
809 			if ((node->pn_popmap >> slot) != 0)
810 				break;
811 			--it->top;
812 		}
813 		if (it->top == 0)
814 			return (NULL);
815 
816 		/* Step to the least child with a descendant > index. */
817 		slot += ffs(node->pn_popmap >> slot) - 1;
818 		node = pctrie_node_load(&node->pn_child[slot], NULL,
819 		    PCTRIE_LOCKED);
820 	}
821 	/* Descend to the least leaf of the subtrie. */
822 	while (!pctrie_isleaf(node)) {
823 		if (it->limit != 0 && node->pn_owner >= it->limit)
824 			return (NULL);
825 		slot = ffs(node->pn_popmap) - 1;
826 		KASSERT(it->top < nitems(it->path),
827 		    ("%s: path overflow in trie %p", __func__, it->ptree));
828 		it->path[it->top++] = node;
829 		node = pctrie_node_load(&node->pn_child[slot], NULL,
830 		    PCTRIE_LOCKED);
831 	}
832 	m = pctrie_toval(node);
833 	if (it->limit != 0 && *m >= it->limit)
834 		return (NULL);
835 	it->index = *m;
836 	return (m);
837 }
838 
839 /*
840  * Find the first leaf with value at least 'jump' greater than the previous
841  * leaf.  Return NULL if that value is >= limit.
842  */
843 uint64_t *
844 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
845 {
846 	uint64_t index = it->index + jump;
847 
848 	/* Detect jump overflow. */
849 	if ((jump > 0) != (index > it->index))
850 		return (NULL);
851 	if (it->limit != 0 && index >= it->limit)
852 		return (NULL);
853 	return (pctrie_iter_lookup_ge(it, index));
854 }
855 
856 #ifdef INVARIANTS
857 void
858 pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
859     struct pctrie *ptree, uint64_t *res)
860 {
861 	uint64_t *expected;
862 
863 	if (index + 1 == 0)
864 		expected = NULL;
865 	else
866 		expected = pctrie_lookup_ge(ptree, index + 1);
867 	KASSERT(res == expected,
868 	    ("pctrie subtree lookup gt result different from root lookup: "
869 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
870 	    (uintmax_t)index, node, res, expected));
871 }
872 #endif
873 
874 /*
875  * Returns the value with the greatest index that is less than or equal to the
876  * specified index, or NULL if there are no such values.
877  *
878  * Requires that access be externally synchronized by a lock.
879  */
880 static __inline uint64_t *
881 pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
882 {
883 	struct pctrie_node *pred;
884 	uint64_t *m;
885 	int slot;
886 
887 	/*
888 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
889 	 */
890 	pred = NULL;
891 	for (;;) {
892 		if (pctrie_isleaf(node)) {
893 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
894 				return (m);
895 			break;
896 		}
897 		if (pctrie_keybarr(node, index, &slot)) {
898 			if (node->pn_owner < index)
899 				pred = node;
900 			break;
901 		}
902 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
903 			pred = node;
904 		node = pctrie_node_load(&node->pn_child[slot], NULL,
905 		    PCTRIE_LOCKED);
906 	}
907 	if (pred == NULL)
908 		return (NULL);
909 	if (pred != node) {
910 		slot = pctrie_slot(pred, index);
911 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
912 		    ("%s: no popmap siblings before slot %d in node %p",
913 		    __func__, slot, pred));
914 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
915 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
916 		    PCTRIE_LOCKED);
917 	}
918 	while (!pctrie_isleaf(pred)) {
919 		KASSERT(pred->pn_popmap != 0,
920 		    ("%s: no popmap children in node %p",  __func__, pred));
921 		slot = ilog2(pred->pn_popmap);
922 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
923 		    PCTRIE_LOCKED);
924 	}
925 	return (pctrie_toval(pred));
926 }
927 
928 uint64_t *
929 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
930 {
931 	return (pctrie_lookup_le_node(
932 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
933 }
934 
935 uint64_t *
936 pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
937 {
938 	if (node == NULL || index == 0)
939 		return (NULL);
940 	return (pctrie_lookup_le_node(node, index - 1));
941 }
942 
943 /*
944  * Find first leaf <= index, and fill iter with the path to the parent of that
945  * leaf.  Return NULL if there is no such leaf greater than limit.
946  */
947 uint64_t *
948 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
949 {
950 	struct pctrie_node *node;
951 	uint64_t *m;
952 	int slot;
953 
954 	/* Seek a node that matches index. */
955 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
956 
957 	/*
958 	 * If no such node was found, and instead this path leads only to nodes
959 	 * > index, back up to find a subtrie with the greatest value < index.
960 	 */
961 	if (node == PCTRIE_NULL || *pctrie_toval(node) > index) {
962 		/* Climb the path to find a node with a descendant < index. */
963 		while (it->top != 0) {
964 			node = it->path[it->top - 1];
965 			slot = pctrie_slot(node, index);
966 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
967 				break;
968 			--it->top;
969 		}
970 		if (it->top == 0)
971 			return (NULL);
972 
973 		/* Step to the greatest child with a descendant < index. */
974 		slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
975 		node = pctrie_node_load(&node->pn_child[slot], NULL,
976 		    PCTRIE_LOCKED);
977 	}
978 	/* Descend to the greatest leaf of the subtrie. */
979 	while (!pctrie_isleaf(node)) {
980 		if (it->limit != 0 && it->limit >=
981 		    node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
982 			return (NULL);
983 		slot = ilog2(node->pn_popmap);
984 		KASSERT(it->top < nitems(it->path),
985 		    ("%s: path overflow in trie %p", __func__, it->ptree));
986 		it->path[it->top++] = node;
987 		node = pctrie_node_load(&node->pn_child[slot], NULL,
988 		    PCTRIE_LOCKED);
989 	}
990 	m = pctrie_toval(node);
991 	if (it->limit != 0 && *m <= it->limit)
992 		return (NULL);
993 	it->index = *m;
994 	return (m);
995 }
996 
997 /*
998  * Find the first leaf with value at most 'jump' less than the previous
999  * leaf.  Return NULL if that value is <= limit.
1000  */
1001 uint64_t *
1002 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
1003 {
1004 	uint64_t index = it->index - jump;
1005 
1006 	/* Detect jump overflow. */
1007 	if ((jump > 0) != (index < it->index))
1008 		return (NULL);
1009 	if (it->limit != 0 && index <= it->limit)
1010 		return (NULL);
1011 	return (pctrie_iter_lookup_le(it, index));
1012 }
1013 
1014 #ifdef INVARIANTS
1015 void
1016 pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
1017     struct pctrie *ptree, uint64_t *res)
1018 {
1019 	uint64_t *expected;
1020 
1021 	if (index == 0)
1022 		expected = NULL;
1023 	else
1024 		expected = pctrie_lookup_le(ptree, index - 1);
1025 	KASSERT(res == expected,
1026 	    ("pctrie subtree lookup lt result different from root lookup: "
1027 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
1028 	    (uintmax_t)index, node, res, expected));
1029 }
1030 #endif
1031 
1032 static void
1033 pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
1034     struct pctrie_node *node, struct pctrie_node **freenode)
1035 {
1036 	struct pctrie_node *child;
1037 	int slot;
1038 
1039 	if (node == NULL) {
1040 		pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED);
1041 		return;
1042 	}
1043 	slot = pctrie_slot(node, index);
1044 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
1045 	    ("%s: bad popmap slot %d in node %p",
1046 	    __func__, slot, node));
1047 	node->pn_popmap ^= 1 << slot;
1048 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
1049 	if (!powerof2(node->pn_popmap))
1050 		return;
1051 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
1052 	slot = ffs(node->pn_popmap) - 1;
1053 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
1054 	KASSERT(child != PCTRIE_NULL,
1055 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
1056 	if (parent == NULL)
1057 		pctrie_root_store(ptree, child, PCTRIE_LOCKED);
1058 	else {
1059 		slot = pctrie_slot(parent, index);
1060 		KASSERT(node ==
1061 		    pctrie_node_load(&parent->pn_child[slot], NULL,
1062 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
1063 		pctrie_node_store(&parent->pn_child[slot], child,
1064 		    PCTRIE_LOCKED);
1065 	}
1066 	/*
1067 	 * The child is still valid and we can not zero the
1068 	 * pointer until all SMR references are gone.
1069 	 */
1070 	pctrie_node_put(node);
1071 	*freenode = node;
1072 }
1073 
1074 /*
1075  * Remove the specified index from the tree, and return the value stored at
1076  * that index.  If the index is not present, return NULL.
1077  */
1078 uint64_t *
1079 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
1080     struct pctrie_node **freenode)
1081 {
1082 	struct pctrie_node *child, *node, *parent;
1083 	uint64_t *m;
1084 	int slot;
1085 
1086 	DEBUG_POISON_POINTER(parent);
1087 	*freenode = node = NULL;
1088 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1089 	while (!pctrie_isleaf(child)) {
1090 		parent = node;
1091 		node = child;
1092 		slot = pctrie_slot(node, index);
1093 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1094 		    PCTRIE_LOCKED);
1095 	}
1096 	m = pctrie_match_value(child, index);
1097 	if (m != NULL)
1098 		pctrie_remove(ptree, index, parent, node, freenode);
1099 	return (m);
1100 }
1101 
1102 /*
1103  * Remove from the trie the leaf last chosen by the iterator, and
1104  * adjust the path if it's last member is to be freed.
1105  */
1106 uint64_t *
1107 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
1108 {
1109 	struct pctrie_node *child, *node, *parent;
1110 	uint64_t *m;
1111 	int slot;
1112 
1113 	DEBUG_POISON_POINTER(parent);
1114 	*freenode = NULL;
1115 	if (it->top >= 1) {
1116 		parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
1117 		node = it->path[it->top - 1];
1118 		slot = pctrie_slot(node, it->index);
1119 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1120 		    PCTRIE_LOCKED);
1121 	} else {
1122 		node = NULL;
1123 		child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
1124 	}
1125 	m = pctrie_match_value(child, it->index);
1126 	if (m != NULL)
1127 		pctrie_remove(it->ptree, it->index, parent, node, freenode);
1128 	if (*freenode != NULL)
1129 		--it->top;
1130 	return (m);
1131 }
1132 
1133 /*
1134  * Return the current leaf, assuming access is externally synchronized by a
1135  * lock.
1136  */
1137 uint64_t *
1138 pctrie_iter_value(struct pctrie_iter *it)
1139 {
1140 	struct pctrie_node *node;
1141 	int slot;
1142 
1143 	if (it->top == 0)
1144 		node = pctrie_root_load(it->ptree, NULL,
1145 		    PCTRIE_LOCKED);
1146 	else {
1147 		node = it->path[it->top - 1];
1148 		slot = pctrie_slot(node, it->index);
1149 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1150 		    PCTRIE_LOCKED);
1151 	}
1152 	return (pctrie_toval(node));
1153 }
1154 
1155 /*
1156  * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
1157  * using the leftmost child pointer for path reversal, until an interior node
1158  * is stripped of all children, and returned for deallocation, with *pnode left
1159  * pointing to the parent of that node.
1160  */
1161 static __always_inline struct pctrie_node *
1162 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent,
1163     pctrie_cb_t callback, int keyoff, void *arg)
1164 {
1165 	struct pctrie_node *child, *node;
1166 	int slot;
1167 
1168 	node = *pnode;
1169 	while (node->pn_popmap != 0) {
1170 		slot = ffs(node->pn_popmap) - 1;
1171 		node->pn_popmap ^= 1 << slot;
1172 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1173 		    PCTRIE_UNSERIALIZED);
1174 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
1175 		    PCTRIE_UNSERIALIZED);
1176 		if (pctrie_isleaf(child)) {
1177 			if (callback != NULL)
1178 				callback(pctrie_toptr(child, keyoff), arg);
1179 			continue;
1180 		}
1181 		/* Climb one level down the trie. */
1182 		pctrie_node_store(&node->pn_child[0], parent,
1183 		    PCTRIE_UNSERIALIZED);
1184 		parent = node;
1185 		node = child;
1186 	}
1187 	*pnode = parent;
1188 	return (node);
1189 }
1190 
1191 /*
1192  * Recover the node parent from its first child and continue pruning.
1193  */
1194 static __always_inline struct pctrie_node *
1195 pctrie_reclaim_resume_compound(struct pctrie_node **pnode,
1196     pctrie_cb_t callback, int keyoff, void *arg)
1197 {
1198 	struct pctrie_node *parent, *node;
1199 
1200 	node = *pnode;
1201 	if (node == NULL)
1202 		return (NULL);
1203 	/* Climb one level up the trie. */
1204 	parent = pctrie_node_load(&node->pn_child[0], NULL,
1205 	    PCTRIE_UNSERIALIZED);
1206 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1207 	return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg));
1208 }
1209 
1210 /*
1211  * Find the trie root, and start pruning with a NULL parent.
1212  */
1213 static __always_inline struct pctrie_node *
1214 pctrie_reclaim_begin_compound(struct pctrie_node **pnode,
1215     struct pctrie *ptree,
1216     pctrie_cb_t callback, int keyoff, void *arg)
1217 {
1218 	struct pctrie_node *node;
1219 
1220 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
1221 	pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1222 	if (pctrie_isleaf(node)) {
1223 		if (callback != NULL && node != PCTRIE_NULL)
1224 			callback(pctrie_toptr(node, keyoff), arg);
1225 		return (NULL);
1226 	}
1227 	*pnode = node;
1228 	return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg));
1229 }
1230 
1231 struct pctrie_node *
1232 pctrie_reclaim_resume(struct pctrie_node **pnode)
1233 {
1234 	return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL));
1235 }
1236 
1237 struct pctrie_node *
1238 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree)
1239 {
1240 	return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL));
1241 }
1242 
1243 struct pctrie_node *
1244 pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
1245     pctrie_cb_t callback, int keyoff, void *arg)
1246 {
1247 	return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg));
1248 }
1249 
1250 struct pctrie_node *
1251 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree,
1252     pctrie_cb_t callback, int keyoff, void *arg)
1253 {
1254 	return (pctrie_reclaim_begin_compound(pnode, ptree,
1255 	    callback, keyoff, arg));
1256 }
1257 
1258 /*
1259  * Replace an existing value in the trie with another one.
1260  * Panics if there is not an old value in the trie at the new value's index.
1261  */
1262 uint64_t *
1263 pctrie_replace(struct pctrie *ptree, uint64_t *newval)
1264 {
1265 	struct pctrie_node *leaf, *parent, *node;
1266 	uint64_t *m;
1267 	uint64_t index;
1268 	int slot;
1269 
1270 	leaf = pctrie_toleaf(newval);
1271 	index = *newval;
1272 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1273 	parent = NULL;
1274 	for (;;) {
1275 		if (pctrie_isleaf(node)) {
1276 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
1277 				if (parent == NULL)
1278 					pctrie_root_store(ptree,
1279 					    leaf, PCTRIE_LOCKED);
1280 				else
1281 					pctrie_node_store(
1282 					    &parent->pn_child[slot], leaf,
1283 					    PCTRIE_LOCKED);
1284 				return (m);
1285 			}
1286 			break;
1287 		}
1288 		if (pctrie_keybarr(node, index, &slot))
1289 			break;
1290 		parent = node;
1291 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1292 		    PCTRIE_LOCKED);
1293 	}
1294 	panic("%s: original replacing value not found", __func__);
1295 }
1296 
1297 #ifdef DDB
1298 /*
1299  * Show details about the given node.
1300  */
1301 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
1302 {
1303 	struct pctrie_node *node, *tmp;
1304 	int slot;
1305 	pn_popmap_t popmap;
1306 
1307         if (!have_addr)
1308                 return;
1309 	node = (struct pctrie_node *)addr;
1310 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
1311 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
1312 	    node->pn_clev / PCTRIE_WIDTH);
1313 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
1314 		slot = ffs(popmap) - 1;
1315 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
1316 		    PCTRIE_UNSERIALIZED);
1317 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
1318 		    slot, (void *)tmp,
1319 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
1320 		    node->pn_clev / PCTRIE_WIDTH);
1321 	}
1322 }
1323 #endif /* DDB */
1324