xref: /freebsd/sys/kern/subr_pctrie.c (revision a905c589be67db98bbb9c99932511be70e5027fb)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 EMC Corp.
5  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  */
31 
32 /*
33  * Path-compressed radix trie implementation.
34  *
35  * The implementation takes into account the following rationale:
36  * - Size of the nodes should be as small as possible but still big enough
37  *   to avoid a large maximum depth for the trie.  This is a balance
38  *   between the necessity to not wire too much physical memory for the nodes
39  *   and the necessity to avoid too much cache pollution during the trie
40  *   operations.
41  * - There is not a huge bias toward the number of lookup operations over
42  *   the number of insert and remove operations.  This basically implies
43  *   that optimizations supposedly helping one operation but hurting the
44  *   other might be carefully evaluated.
45  * - On average not many nodes are expected to be fully populated, hence
46  *   level compression may just complicate things.
47  */
48 
49 #include <sys/cdefs.h>
50 #include "opt_ddb.h"
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/libkern.h>
56 #include <sys/pctrie.h>
57 #include <sys/proc.h>	/* smr.h depends on struct thread. */
58 #include <sys/smr.h>
59 #include <sys/smr_types.h>
60 
61 #ifdef DDB
62 #include <ddb/ddb.h>
63 #endif
64 
65 #if PCTRIE_WIDTH == 3
66 typedef uint8_t pn_popmap_t;
67 #elif PCTRIE_WIDTH == 4
68 typedef uint16_t pn_popmap_t;
69 #elif PCTRIE_WIDTH == 5
70 typedef uint32_t pn_popmap_t;
71 #else
72 #error Unsupported width
73 #endif
74 _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
75     "pn_popmap_t too wide");
76 
77 struct pctrie_node;
78 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
79 
80 struct pctrie_node {
81 	uint64_t	pn_owner;			/* Owner of record. */
82 	pn_popmap_t	pn_popmap;			/* Valid children. */
83 	uint8_t		pn_clev;			/* Level * WIDTH. */
84 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
85 };
86 
87 /*
88  * Map index to an array position for the children of node,
89  */
90 static __inline int
pctrie_slot(struct pctrie_node * node,uint64_t index)91 pctrie_slot(struct pctrie_node *node, uint64_t index)
92 {
93 	return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
94 }
95 
96 /*
97  * Returns true if index does not belong to the specified node.  Otherwise,
98  * sets slot value, and returns false.
99  */
100 static __inline bool
pctrie_keybarr(struct pctrie_node * node,uint64_t index,int * slot)101 pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
102 {
103 	index = (index - node->pn_owner) >> node->pn_clev;
104 	if (index >= PCTRIE_COUNT)
105 		return (true);
106 	*slot = index;
107 	return (false);
108 }
109 
110 /*
111  * Check radix node.
112  */
113 static __inline void
pctrie_node_put(struct pctrie_node * node)114 pctrie_node_put(struct pctrie_node *node)
115 {
116 #ifdef INVARIANTS
117 	int slot;
118 
119 	KASSERT(powerof2(node->pn_popmap),
120 	    ("pctrie_node_put: node %p has too many children %04x", node,
121 	    node->pn_popmap));
122 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
123 		if ((node->pn_popmap & (1 << slot)) != 0)
124 			continue;
125 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
126 		    PCTRIE_NULL,
127 		    ("pctrie_node_put: node %p has a child", node));
128 	}
129 #endif
130 }
131 
132 enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
133 
134 /*
135  * Fetch a node pointer from a slot.
136  */
137 static __inline struct pctrie_node *
pctrie_node_load(smr_pctnode_t * p,smr_t smr,enum pctrie_access access)138 pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
139 {
140 	switch (access) {
141 	case PCTRIE_UNSERIALIZED:
142 		return (smr_unserialized_load(p, true));
143 	case PCTRIE_LOCKED:
144 		return (smr_serialized_load(p, true));
145 	case PCTRIE_SMR:
146 		return (smr_entered_load(p, smr));
147 	}
148 	__assert_unreachable();
149 }
150 
151 static __inline void
pctrie_node_store(smr_pctnode_t * p,void * v,enum pctrie_access access)152 pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
153 {
154 	switch (access) {
155 	case PCTRIE_UNSERIALIZED:
156 		smr_unserialized_store(p, v, true);
157 		break;
158 	case PCTRIE_LOCKED:
159 		smr_serialized_store(p, v, true);
160 		break;
161 	case PCTRIE_SMR:
162 		panic("%s: Not supported in SMR section.", __func__);
163 		break;
164 	default:
165 		__assert_unreachable();
166 		break;
167 	}
168 }
169 
170 /*
171  * Get the root address, cast to proper type for load/store.
172  */
173 static __inline smr_pctnode_t *
pctrie_root(struct pctrie * ptree)174 pctrie_root(struct pctrie *ptree)
175 {
176 	return ((smr_pctnode_t *)&ptree->pt_root);
177 }
178 
179 /*
180  * Get the root node for a tree.
181  */
182 static __inline struct pctrie_node *
pctrie_root_load(struct pctrie * ptree,smr_t smr,enum pctrie_access access)183 pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
184 {
185 	return (pctrie_node_load(pctrie_root(ptree), smr, access));
186 }
187 
188 /*
189  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
190  */
191 static __inline bool
pctrie_isleaf(struct pctrie_node * node)192 pctrie_isleaf(struct pctrie_node *node)
193 {
194 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
195 }
196 
197 /*
198  * Returns val with leaf bit set.
199  */
200 static __inline void *
pctrie_toleaf(uint64_t * val)201 pctrie_toleaf(uint64_t *val)
202 {
203 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
204 }
205 
206 /*
207  * Returns the associated val extracted from node.
208  */
209 static __inline uint64_t *
pctrie_toval(struct pctrie_node * node)210 pctrie_toval(struct pctrie_node *node)
211 {
212 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
213 }
214 
215 /*
216  * Returns the associated pointer extracted from node and field offset.
217  */
218 static __inline void *
pctrie_toptr(struct pctrie_node * node,int keyoff)219 pctrie_toptr(struct pctrie_node *node, int keyoff)
220 {
221 	return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff));
222 }
223 
224 /*
225  * Make 'child' a child of 'node'.
226  */
227 static __inline void
pctrie_addnode(struct pctrie_node * node,uint64_t index,struct pctrie_node * child,enum pctrie_access access)228 pctrie_addnode(struct pctrie_node *node, uint64_t index,
229     struct pctrie_node *child, enum pctrie_access access)
230 {
231 	int slot;
232 
233 	slot = pctrie_slot(node, index);
234 	pctrie_node_store(&node->pn_child[slot], child, access);
235 	node->pn_popmap ^= 1 << slot;
236 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
237 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
238 }
239 
240 /*
241  * pctrie node zone initializer.
242  */
243 int
pctrie_zone_init(void * mem,int size __unused,int flags __unused)244 pctrie_zone_init(void *mem, int size __unused, int flags __unused)
245 {
246 	struct pctrie_node *node;
247 
248 	node = mem;
249 	node->pn_popmap = 0;
250 	for (int i = 0; i < nitems(node->pn_child); i++)
251 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
252 		    PCTRIE_UNSERIALIZED);
253 	return (0);
254 }
255 
256 size_t
pctrie_node_size(void)257 pctrie_node_size(void)
258 {
259 
260 	return (sizeof(struct pctrie_node));
261 }
262 
263 enum pctrie_insert_neighbor_mode {
264 	PCTRIE_INSERT_NEIGHBOR_NONE,
265 	PCTRIE_INSERT_NEIGHBOR_LT,
266 	PCTRIE_INSERT_NEIGHBOR_GT,
267 };
268 
269 /*
270  * Look for where to insert the key-value pair into the trie.  Complete the
271  * insertion if it replaces a null leaf.  Return the insertion location if the
272  * insertion needs to be completed by the caller; otherwise return NULL.
273  *
274  * If the key is already present in the trie, populate *found_out as if by
275  * pctrie_lookup().
276  *
277  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
278  * *neighbor_out to the lowest level node we encounter during the insert lookup
279  * that is a parent of the next greater or lesser entry.  The value is not
280  * defined if the key was already present in the trie.
281  *
282  * Note that mode is expected to be a compile-time constant, and this procedure
283  * is expected to be inlined into callers with extraneous code optimized out.
284  */
285 static __always_inline void *
pctrie_insert_lookup_compound(struct pctrie * ptree,uint64_t * val,uint64_t ** found_out,struct pctrie_node ** neighbor_out,enum pctrie_insert_neighbor_mode mode)286 pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
287     uint64_t **found_out, struct pctrie_node **neighbor_out,
288     enum pctrie_insert_neighbor_mode mode)
289 {
290 	uint64_t index;
291 	struct pctrie_node *node, *parent;
292 	int slot;
293 
294 	index = *val;
295 
296 	/*
297 	 * The owner of record for root is not really important because it
298 	 * will never be used.
299 	 */
300 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
301 	parent = NULL;
302 	for (;;) {
303 		if (pctrie_isleaf(node)) {
304 			if (node == PCTRIE_NULL) {
305 				if (parent == NULL)
306 					pctrie_node_store(pctrie_root(ptree),
307 					    pctrie_toleaf(val), PCTRIE_LOCKED);
308 				else
309 					pctrie_addnode(parent, index,
310 					    pctrie_toleaf(val), PCTRIE_LOCKED);
311 				return (NULL);
312 			}
313 			if (*pctrie_toval(node) == index) {
314 				*found_out = pctrie_toval(node);
315 				return (NULL);
316 			}
317 			break;
318 		}
319 		if (pctrie_keybarr(node, index, &slot))
320 			break;
321 		/*
322 		 * Descend.  If we're tracking the next neighbor and this node
323 		 * contains a neighboring entry in the right direction, record
324 		 * it.
325 		 */
326 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
327 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
328 				*neighbor_out = node;
329 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
330 			if ((node->pn_popmap >> slot) > 1)
331 				*neighbor_out = node;
332 		}
333 		parent = node;
334 		node = pctrie_node_load(&node->pn_child[slot], NULL,
335 		    PCTRIE_LOCKED);
336 	}
337 
338 	/*
339 	 * The caller will split this node.  If we're tracking the next
340 	 * neighbor, record the old node if the old entry is in the right
341 	 * direction.
342 	 */
343 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
344 		if (*pctrie_toval(node) < index)
345 			*neighbor_out = node;
346 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
347 		if (*pctrie_toval(node) > index)
348 			*neighbor_out = node;
349 	}
350 
351 	/*
352 	 * 'node' must be replaced in the tree with a new branch node, with
353 	 * children 'node' and 'val'. Return the place that points to 'node'
354 	 * now, and will point to to the new branching node later.
355 	 */
356 	return ((parent == NULL) ? pctrie_root(ptree): &parent->pn_child[slot]);
357 }
358 
359 /*
360  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
361  * if the key already exists, and do not look for neighboring entries.
362  */
363 void *
pctrie_insert_lookup_strict(struct pctrie * ptree,uint64_t * val)364 pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
365 {
366 	void *parentp;
367 	uint64_t *found;
368 
369 	found = NULL;
370 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
371 	    PCTRIE_INSERT_NEIGHBOR_NONE);
372 	if (__predict_false(found != NULL))
373 		panic("%s: key %jx is already present", __func__,
374 		    (uintmax_t)*val);
375 	return (parentp);
376 }
377 
378 /*
379  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
380  * for neighboring entries.
381  */
382 void *
pctrie_insert_lookup(struct pctrie * ptree,uint64_t * val,uint64_t ** found_out)383 pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
384     uint64_t **found_out)
385 {
386 	*found_out = NULL;
387 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
388 	    PCTRIE_INSERT_NEIGHBOR_NONE));
389 }
390 
391 /*
392  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
393  * greater entry.  Find a subtree that contains the next entry greater than the
394  * newly-inserted or to-be-inserted entry.
395  */
396 void *
pctrie_insert_lookup_gt(struct pctrie * ptree,uint64_t * val,uint64_t ** found_out,struct pctrie_node ** neighbor_out)397 pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
398     uint64_t **found_out, struct pctrie_node **neighbor_out)
399 {
400 	*found_out = NULL;
401 	*neighbor_out = NULL;
402 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
403 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
404 }
405 
406 /*
407  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
408  * lesser entry.  Find a subtree that contains the next entry less than the
409  * newly-inserted or to-be-inserted entry.
410  */
411 void *
pctrie_insert_lookup_lt(struct pctrie * ptree,uint64_t * val,uint64_t ** found_out,struct pctrie_node ** neighbor_out)412 pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
413     uint64_t **found_out, struct pctrie_node **neighbor_out)
414 {
415 	*found_out = NULL;
416 	*neighbor_out = NULL;
417 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
418 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
419 }
420 
421 /*
422  * Uses new node to insert key-value pair into the trie at given location.
423  */
424 void
pctrie_insert_node(void * parentp,struct pctrie_node * parent,uint64_t * val)425 pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
426 {
427 	struct pctrie_node *node;
428 	uint64_t index, newind;
429 
430 	/*
431 	 * Clear the last child pointer of the newly allocated parent.  We want
432 	 * to clear it after the final section has exited so lookup can not
433 	 * return false negatives.  It is done here because it will be
434 	 * cache-cold in the dtor callback.
435 	 */
436 	if (parent->pn_popmap != 0) {
437 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
438 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
439 		parent->pn_popmap = 0;
440 	}
441 
442 	/*
443 	 * Recover the values of the two children of the new parent node.  If
444 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
445 	 * which must be first in the node.
446 	 */
447 	index = *val;
448 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
449 	newind = *pctrie_toval(node);
450 
451 	/*
452 	 * From the highest-order bit where the indexes differ,
453 	 * compute the highest level in the trie where they differ.  Then,
454 	 * compute the least index of this subtrie.
455 	 */
456 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
457 	    "uint64 too wide");
458 	_Static_assert(sizeof(uint64_t) * NBBY <=
459 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
460 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
461 	parent->pn_owner = PCTRIE_COUNT;
462 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
463 
464 
465 	/* These writes are not yet visible due to ordering. */
466 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
467 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
468 	/* Synchronize to make the above visible. */
469 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
470 }
471 
472 /*
473  * Return the value associated with the node, if the node is a leaf that matches
474  * the index; otherwise NULL.
475  */
476 static __always_inline uint64_t *
pctrie_match_value(struct pctrie_node * node,uint64_t index)477 pctrie_match_value(struct pctrie_node *node, uint64_t index)
478 {
479 	uint64_t *m;
480 
481 	if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
482 	    *m != index)
483 		m = NULL;
484 	return (m);
485 }
486 
487 /*
488  * Returns the value stored at the index.  If the index is not present,
489  * NULL is returned.
490  */
491 static __always_inline uint64_t *
_pctrie_lookup(struct pctrie * ptree,uint64_t index,smr_t smr,enum pctrie_access access)492 _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
493     enum pctrie_access access)
494 {
495 	struct pctrie_node *node;
496 	int slot;
497 
498 	node = pctrie_root_load(ptree, smr, access);
499 	/* Seek a node that matches index. */
500 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
501 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
502 	return (pctrie_match_value(node, index));
503 }
504 
505 /*
506  * Returns the value stored at the index, assuming access is externally
507  * synchronized by a lock.
508  *
509  * If the index is not present, NULL is returned.
510  */
511 uint64_t *
pctrie_lookup(struct pctrie * ptree,uint64_t index)512 pctrie_lookup(struct pctrie *ptree, uint64_t index)
513 {
514 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
515 }
516 
517 /*
518  * Returns the value stored at the index without requiring an external lock.
519  *
520  * If the index is not present, NULL is returned.
521  */
522 uint64_t *
pctrie_lookup_unlocked(struct pctrie * ptree,uint64_t index,smr_t smr)523 pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
524 {
525 	uint64_t *res;
526 
527 	smr_enter(smr);
528 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
529 	smr_exit(smr);
530 	return (res);
531 }
532 
533 /*
534  * Returns the last node examined in the search for the index, and updates the
535  * search path to that node.
536  */
537 static __always_inline struct pctrie_node *
_pctrie_iter_lookup_node(struct pctrie_iter * it,uint64_t index,smr_t smr,enum pctrie_access access)538 _pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
539     enum pctrie_access access)
540 {
541 	struct pctrie_node *node;
542 	int slot;
543 
544 	/*
545 	 * Climb the search path to find the lowest node from which to start the
546 	 * search for a value matching 'index'.
547 	 */
548 	while (it->top != 0) {
549 		node = it->path[it->top - 1];
550 		KASSERT(!powerof2(node->pn_popmap),
551 		    ("%s: freed node in iter path", __func__));
552 		if (!pctrie_keybarr(node, index, &slot)) {
553 			node = pctrie_node_load(
554 			    &node->pn_child[slot], smr, access);
555 			break;
556 		}
557 		--it->top;
558 	}
559 	if (it->top == 0)
560 		node = pctrie_root_load(it->ptree, smr, access);
561 
562 	/* Seek a node that matches index. */
563 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
564 		KASSERT(it->top < nitems(it->path),
565 		    ("%s: path overflow in trie %p", __func__, it->ptree));
566 		it->path[it->top++] = node;
567 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
568 	}
569 	return (node);
570 }
571 
572 /*
573  * Returns the value stored at a given index value, possibly NULL.
574  */
575 static __always_inline uint64_t *
_pctrie_iter_lookup(struct pctrie_iter * it,uint64_t index,smr_t smr,enum pctrie_access access)576 _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
577     enum pctrie_access access)
578 {
579 	struct pctrie_node *node;
580 
581 	it->index = index;
582 	node = _pctrie_iter_lookup_node(it, index, smr, access);
583 	return (pctrie_match_value(node, index));
584 }
585 
586 /*
587  * Returns the value stored at a given index value, possibly NULL.
588  */
589 uint64_t *
pctrie_iter_lookup(struct pctrie_iter * it,uint64_t index)590 pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
591 {
592 	return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
593 }
594 
595 /*
596  * Insert the val in the trie, starting search with iterator.  Return a pointer
597  * to indicate where a new node must be allocated to complete insertion.
598  * Assumes access is externally synchronized by a lock.
599  */
600 void *
pctrie_iter_insert_lookup(struct pctrie_iter * it,uint64_t * val)601 pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
602 {
603 	struct pctrie_node *node;
604 
605 	it->index = *val;
606 	node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
607 	if (node == PCTRIE_NULL) {
608 		if (it->top == 0)
609 			pctrie_node_store(pctrie_root(it->ptree),
610 			    pctrie_toleaf(val), PCTRIE_LOCKED);
611 		else
612 			pctrie_addnode(it->path[it->top - 1], it->index,
613 			    pctrie_toleaf(val), PCTRIE_LOCKED);
614 		return (NULL);
615 	}
616 	if (__predict_false(pctrie_match_value(node, it->index) != NULL))
617 		panic("%s: key %jx is already present", __func__,
618 		    (uintmax_t)it->index);
619 
620 	/*
621 	 * 'node' must be replaced in the tree with a new branch node, with
622 	 * children 'node' and 'val'. Return the place that points to 'node'
623 	 * now, and will point to to the new branching node later.
624 	 */
625 	if (it->top == 0)
626 		return (pctrie_root(it->ptree));
627 	node = it->path[it->top - 1];
628 	return (&node->pn_child[pctrie_slot(node, it->index)]);
629 }
630 
631 /*
632  * Returns the value stored at a fixed offset from the current index value,
633  * possibly NULL.
634  */
635 static __always_inline uint64_t *
_pctrie_iter_stride(struct pctrie_iter * it,int stride,smr_t smr,enum pctrie_access access)636 _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
637     enum pctrie_access access)
638 {
639 	uint64_t index = it->index + stride;
640 
641 	/* Detect stride overflow. */
642 	if ((stride > 0) != (index > it->index))
643 		return (NULL);
644 	/* Detect crossing limit */
645 	if ((index < it->limit) != (it->index < it->limit))
646 		return (NULL);
647 
648 	return (_pctrie_iter_lookup(it, index, smr, access));
649 }
650 
651 /*
652  * Returns the value stored at a fixed offset from the current index value,
653  * possibly NULL.
654  */
655 uint64_t *
pctrie_iter_stride(struct pctrie_iter * it,int stride)656 pctrie_iter_stride(struct pctrie_iter *it, int stride)
657 {
658 	return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
659 }
660 
661 /*
662  * Returns the value stored at one more than the current index value, possibly
663  * NULL, assuming access is externally synchronized by a lock.
664  */
665 uint64_t *
pctrie_iter_next(struct pctrie_iter * it)666 pctrie_iter_next(struct pctrie_iter *it)
667 {
668 	return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
669 }
670 
671 /*
672  * Returns the value stored at one less than the current index value, possibly
673  * NULL, assuming access is externally synchronized by a lock.
674  */
675 uint64_t *
pctrie_iter_prev(struct pctrie_iter * it)676 pctrie_iter_prev(struct pctrie_iter *it)
677 {
678 	return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
679 }
680 
681 /*
682  * Returns the value with the least index that is greater than or equal to the
683  * specified index, or NULL if there are no such values.
684  *
685  * Requires that access be externally synchronized by a lock.
686  */
687 static __inline uint64_t *
pctrie_lookup_ge_node(struct pctrie_node * node,uint64_t index)688 pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
689 {
690 	struct pctrie_node *succ;
691 	uint64_t *m;
692 	int slot;
693 
694 	/*
695 	 * Descend the trie as if performing an ordinary lookup for the
696 	 * specified value.  However, unlike an ordinary lookup, as we descend
697 	 * the trie, we use "succ" to remember the last branching-off point,
698 	 * that is, the interior node under which the least value that is both
699 	 * outside our current path down the trie and greater than the specified
700 	 * index resides.  (The node's popmap makes it fast and easy to
701 	 * recognize a branching-off point.)  If our ordinary lookup fails to
702 	 * yield a value that is greater than or equal to the specified index,
703 	 * then we will exit this loop and perform a lookup starting from
704 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
705 	 * succeed.
706 	 */
707 	succ = NULL;
708 	for (;;) {
709 		if (pctrie_isleaf(node)) {
710 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
711 				return (m);
712 			break;
713 		}
714 		if (pctrie_keybarr(node, index, &slot)) {
715 			/*
716 			 * If all values in this subtree are > index, then the
717 			 * least value in this subtree is the answer.
718 			 */
719 			if (node->pn_owner > index)
720 				succ = node;
721 			break;
722 		}
723 
724 		/*
725 		 * Just in case the next search step leads to a subtree of all
726 		 * values < index, check popmap to see if a next bigger step, to
727 		 * a subtree of all pages with values > index, is available.  If
728 		 * so, remember to restart the search here.
729 		 */
730 		if ((node->pn_popmap >> slot) > 1)
731 			succ = node;
732 		node = pctrie_node_load(&node->pn_child[slot], NULL,
733 		    PCTRIE_LOCKED);
734 	}
735 
736 	/*
737 	 * Restart the search from the last place visited in the subtree that
738 	 * included some values > index, if there was such a place.
739 	 */
740 	if (succ == NULL)
741 		return (NULL);
742 	if (succ != node) {
743 		/*
744 		 * Take a step to the next bigger sibling of the node chosen
745 		 * last time.  In that subtree, all values > index.
746 		 */
747 		slot = pctrie_slot(succ, index) + 1;
748 		KASSERT((succ->pn_popmap >> slot) != 0,
749 		    ("%s: no popmap siblings past slot %d in node %p",
750 		    __func__, slot, succ));
751 		slot += ffs(succ->pn_popmap >> slot) - 1;
752 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
753 		    PCTRIE_LOCKED);
754 	}
755 
756 	/*
757 	 * Find the value in the subtree rooted at "succ" with the least index.
758 	 */
759 	while (!pctrie_isleaf(succ)) {
760 		KASSERT(succ->pn_popmap != 0,
761 		    ("%s: no popmap children in node %p",  __func__, succ));
762 		slot = ffs(succ->pn_popmap) - 1;
763 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
764 		    PCTRIE_LOCKED);
765 	}
766 	return (pctrie_toval(succ));
767 }
768 
769 uint64_t *
pctrie_lookup_ge(struct pctrie * ptree,uint64_t index)770 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
771 {
772 	return (pctrie_lookup_ge_node(
773 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
774 }
775 
776 uint64_t *
pctrie_subtree_lookup_gt(struct pctrie_node * node,uint64_t index)777 pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
778 {
779 	if (node == NULL || index + 1 == 0)
780 		return (NULL);
781 	return (pctrie_lookup_ge_node(node, index + 1));
782 }
783 
784 /*
785  * Find first leaf >= index, and fill iter with the path to the parent of that
786  * leaf.  Return NULL if there is no such leaf less than limit.
787  */
788 uint64_t *
pctrie_iter_lookup_ge(struct pctrie_iter * it,uint64_t index)789 pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
790 {
791 	struct pctrie_node *node;
792 	uint64_t *m;
793 	int slot;
794 
795 	/* Seek a node that matches index. */
796 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
797 
798 	/*
799 	 * If no such node was found, and instead this path leads only to nodes
800 	 * < index, back up to find a subtrie with the least value > index.
801 	 */
802 	if (node == PCTRIE_NULL || *pctrie_toval(node) < index) {
803 		/* Climb the path to find a node with a descendant > index. */
804 		while (it->top != 0) {
805 			node = it->path[it->top - 1];
806 			slot = pctrie_slot(node, index) + 1;
807 			if ((node->pn_popmap >> slot) != 0)
808 				break;
809 			--it->top;
810 		}
811 		if (it->top == 0)
812 			return (NULL);
813 
814 		/* Step to the least child with a descendant > index. */
815 		slot += ffs(node->pn_popmap >> slot) - 1;
816 		node = pctrie_node_load(&node->pn_child[slot], NULL,
817 		    PCTRIE_LOCKED);
818 	}
819 	/* Descend to the least leaf of the subtrie. */
820 	while (!pctrie_isleaf(node)) {
821 		if (it->limit != 0 && node->pn_owner >= it->limit)
822 			return (NULL);
823 		slot = ffs(node->pn_popmap) - 1;
824 		KASSERT(it->top < nitems(it->path),
825 		    ("%s: path overflow in trie %p", __func__, it->ptree));
826 		it->path[it->top++] = node;
827 		node = pctrie_node_load(&node->pn_child[slot], NULL,
828 		    PCTRIE_LOCKED);
829 	}
830 	m = pctrie_toval(node);
831 	if (it->limit != 0 && *m >= it->limit)
832 		return (NULL);
833 	it->index = *m;
834 	return (m);
835 }
836 
837 /*
838  * Find the first leaf with value at least 'jump' greater than the previous
839  * leaf.  Return NULL if that value is >= limit.
840  */
841 uint64_t *
pctrie_iter_jump_ge(struct pctrie_iter * it,int64_t jump)842 pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
843 {
844 	uint64_t index = it->index + jump;
845 
846 	/* Detect jump overflow. */
847 	if ((jump > 0) != (index > it->index))
848 		return (NULL);
849 	if (it->limit != 0 && index >= it->limit)
850 		return (NULL);
851 	return (pctrie_iter_lookup_ge(it, index));
852 }
853 
854 #ifdef INVARIANTS
855 void
pctrie_subtree_lookup_gt_assert(struct pctrie_node * node,uint64_t index,struct pctrie * ptree,uint64_t * res)856 pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
857     struct pctrie *ptree, uint64_t *res)
858 {
859 	uint64_t *expected;
860 
861 	if (index + 1 == 0)
862 		expected = NULL;
863 	else
864 		expected = pctrie_lookup_ge(ptree, index + 1);
865 	KASSERT(res == expected,
866 	    ("pctrie subtree lookup gt result different from root lookup: "
867 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
868 	    (uintmax_t)index, node, res, expected));
869 }
870 #endif
871 
872 /*
873  * Returns the value with the greatest index that is less than or equal to the
874  * specified index, or NULL if there are no such values.
875  *
876  * Requires that access be externally synchronized by a lock.
877  */
878 static __inline uint64_t *
pctrie_lookup_le_node(struct pctrie_node * node,uint64_t index)879 pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
880 {
881 	struct pctrie_node *pred;
882 	uint64_t *m;
883 	int slot;
884 
885 	/*
886 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
887 	 */
888 	pred = NULL;
889 	for (;;) {
890 		if (pctrie_isleaf(node)) {
891 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
892 				return (m);
893 			break;
894 		}
895 		if (pctrie_keybarr(node, index, &slot)) {
896 			if (node->pn_owner < index)
897 				pred = node;
898 			break;
899 		}
900 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
901 			pred = node;
902 		node = pctrie_node_load(&node->pn_child[slot], NULL,
903 		    PCTRIE_LOCKED);
904 	}
905 	if (pred == NULL)
906 		return (NULL);
907 	if (pred != node) {
908 		slot = pctrie_slot(pred, index);
909 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
910 		    ("%s: no popmap siblings before slot %d in node %p",
911 		    __func__, slot, pred));
912 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
913 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
914 		    PCTRIE_LOCKED);
915 	}
916 	while (!pctrie_isleaf(pred)) {
917 		KASSERT(pred->pn_popmap != 0,
918 		    ("%s: no popmap children in node %p",  __func__, pred));
919 		slot = ilog2(pred->pn_popmap);
920 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
921 		    PCTRIE_LOCKED);
922 	}
923 	return (pctrie_toval(pred));
924 }
925 
926 uint64_t *
pctrie_lookup_le(struct pctrie * ptree,uint64_t index)927 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
928 {
929 	return (pctrie_lookup_le_node(
930 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
931 }
932 
933 uint64_t *
pctrie_subtree_lookup_lt(struct pctrie_node * node,uint64_t index)934 pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
935 {
936 	if (node == NULL || index == 0)
937 		return (NULL);
938 	return (pctrie_lookup_le_node(node, index - 1));
939 }
940 
941 /*
942  * Find first leaf <= index, and fill iter with the path to the parent of that
943  * leaf.  Return NULL if there is no such leaf greater than limit.
944  */
945 uint64_t *
pctrie_iter_lookup_le(struct pctrie_iter * it,uint64_t index)946 pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
947 {
948 	struct pctrie_node *node;
949 	uint64_t *m;
950 	int slot;
951 
952 	/* Seek a node that matches index. */
953 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
954 
955 	/*
956 	 * If no such node was found, and instead this path leads only to nodes
957 	 * > index, back up to find a subtrie with the greatest value < index.
958 	 */
959 	if (node == PCTRIE_NULL || *pctrie_toval(node) > index) {
960 		/* Climb the path to find a node with a descendant < index. */
961 		while (it->top != 0) {
962 			node = it->path[it->top - 1];
963 			slot = pctrie_slot(node, index);
964 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
965 				break;
966 			--it->top;
967 		}
968 		if (it->top == 0)
969 			return (NULL);
970 
971 		/* Step to the greatest child with a descendant < index. */
972 		slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
973 		node = pctrie_node_load(&node->pn_child[slot], NULL,
974 		    PCTRIE_LOCKED);
975 	}
976 	/* Descend to the greatest leaf of the subtrie. */
977 	while (!pctrie_isleaf(node)) {
978 		if (it->limit != 0 && it->limit >=
979 		    node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
980 			return (NULL);
981 		slot = ilog2(node->pn_popmap);
982 		KASSERT(it->top < nitems(it->path),
983 		    ("%s: path overflow in trie %p", __func__, it->ptree));
984 		it->path[it->top++] = node;
985 		node = pctrie_node_load(&node->pn_child[slot], NULL,
986 		    PCTRIE_LOCKED);
987 	}
988 	m = pctrie_toval(node);
989 	if (it->limit != 0 && *m <= it->limit)
990 		return (NULL);
991 	it->index = *m;
992 	return (m);
993 }
994 
995 /*
996  * Find the first leaf with value at most 'jump' less than the previous
997  * leaf.  Return NULL if that value is <= limit.
998  */
999 uint64_t *
pctrie_iter_jump_le(struct pctrie_iter * it,int64_t jump)1000 pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
1001 {
1002 	uint64_t index = it->index - jump;
1003 
1004 	/* Detect jump overflow. */
1005 	if ((jump > 0) != (index < it->index))
1006 		return (NULL);
1007 	if (it->limit != 0 && index <= it->limit)
1008 		return (NULL);
1009 	return (pctrie_iter_lookup_le(it, index));
1010 }
1011 
1012 #ifdef INVARIANTS
1013 void
pctrie_subtree_lookup_lt_assert(struct pctrie_node * node,uint64_t index,struct pctrie * ptree,uint64_t * res)1014 pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
1015     struct pctrie *ptree, uint64_t *res)
1016 {
1017 	uint64_t *expected;
1018 
1019 	if (index == 0)
1020 		expected = NULL;
1021 	else
1022 		expected = pctrie_lookup_le(ptree, index - 1);
1023 	KASSERT(res == expected,
1024 	    ("pctrie subtree lookup lt result different from root lookup: "
1025 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
1026 	    (uintmax_t)index, node, res, expected));
1027 }
1028 #endif
1029 
1030 static void
pctrie_remove(struct pctrie * ptree,uint64_t index,struct pctrie_node * parent,struct pctrie_node * node,struct pctrie_node ** freenode)1031 pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
1032     struct pctrie_node *node, struct pctrie_node **freenode)
1033 {
1034 	struct pctrie_node *child;
1035 	int slot;
1036 
1037 	if (node == NULL) {
1038 		pctrie_node_store(pctrie_root(ptree),
1039 		    PCTRIE_NULL, PCTRIE_LOCKED);
1040 		return;
1041 	}
1042 	slot = pctrie_slot(node, index);
1043 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
1044 	    ("%s: bad popmap slot %d in node %p",
1045 	    __func__, slot, node));
1046 	node->pn_popmap ^= 1 << slot;
1047 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
1048 	if (!powerof2(node->pn_popmap))
1049 		return;
1050 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
1051 	slot = ffs(node->pn_popmap) - 1;
1052 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
1053 	KASSERT(child != PCTRIE_NULL,
1054 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
1055 	if (parent == NULL)
1056 		pctrie_node_store(pctrie_root(ptree), child, PCTRIE_LOCKED);
1057 	else {
1058 		slot = pctrie_slot(parent, index);
1059 		KASSERT(node ==
1060 		    pctrie_node_load(&parent->pn_child[slot], NULL,
1061 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
1062 		pctrie_node_store(&parent->pn_child[slot], child,
1063 		    PCTRIE_LOCKED);
1064 	}
1065 	/*
1066 	 * The child is still valid and we can not zero the
1067 	 * pointer until all SMR references are gone.
1068 	 */
1069 	pctrie_node_put(node);
1070 	*freenode = node;
1071 }
1072 
1073 /*
1074  * Remove the specified index from the tree, and return the value stored at
1075  * that index.  If the index is not present, return NULL.
1076  */
1077 uint64_t *
pctrie_remove_lookup(struct pctrie * ptree,uint64_t index,struct pctrie_node ** freenode)1078 pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
1079     struct pctrie_node **freenode)
1080 {
1081 	struct pctrie_node *child, *node, *parent;
1082 	uint64_t *m;
1083 	int slot;
1084 
1085 	DEBUG_POISON_POINTER(parent);
1086 	*freenode = node = NULL;
1087 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1088 	while (!pctrie_isleaf(child)) {
1089 		parent = node;
1090 		node = child;
1091 		slot = pctrie_slot(node, index);
1092 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1093 		    PCTRIE_LOCKED);
1094 	}
1095 	m = pctrie_match_value(child, index);
1096 	if (m != NULL)
1097 		pctrie_remove(ptree, index, parent, node, freenode);
1098 	return (m);
1099 }
1100 
1101 /*
1102  * Remove from the trie the leaf last chosen by the iterator, and
1103  * adjust the path if it's last member is to be freed.
1104  */
1105 uint64_t *
pctrie_iter_remove(struct pctrie_iter * it,struct pctrie_node ** freenode)1106 pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
1107 {
1108 	struct pctrie_node *child, *node, *parent;
1109 	uint64_t *m;
1110 	int slot;
1111 
1112 	DEBUG_POISON_POINTER(parent);
1113 	*freenode = NULL;
1114 	if (it->top >= 1) {
1115 		parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
1116 		node = it->path[it->top - 1];
1117 		slot = pctrie_slot(node, it->index);
1118 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1119 		    PCTRIE_LOCKED);
1120 	} else {
1121 		node = NULL;
1122 		child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
1123 	}
1124 	m = pctrie_match_value(child, it->index);
1125 	if (m != NULL)
1126 		pctrie_remove(it->ptree, it->index, parent, node, freenode);
1127 	if (*freenode != NULL)
1128 		--it->top;
1129 	return (m);
1130 }
1131 
1132 /*
1133  * Return the current leaf, assuming access is externally synchronized by a
1134  * lock.
1135  */
1136 uint64_t *
pctrie_iter_value(struct pctrie_iter * it)1137 pctrie_iter_value(struct pctrie_iter *it)
1138 {
1139 	struct pctrie_node *node;
1140 	int slot;
1141 
1142 	if (it->top == 0)
1143 		node = pctrie_root_load(it->ptree, NULL,
1144 		    PCTRIE_LOCKED);
1145 	else {
1146 		node = it->path[it->top - 1];
1147 		slot = pctrie_slot(node, it->index);
1148 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1149 		    PCTRIE_LOCKED);
1150 	}
1151 	return (pctrie_toval(node));
1152 }
1153 
1154 /*
1155  * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
1156  * using the leftmost child pointer for path reversal, until an interior node
1157  * is stripped of all children, and returned for deallocation, with *pnode left
1158  * pointing to the parent of that node.
1159  */
1160 static __always_inline struct pctrie_node *
pctrie_reclaim_prune(struct pctrie_node ** pnode,struct pctrie_node * parent,pctrie_cb_t callback,int keyoff,void * arg)1161 pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent,
1162     pctrie_cb_t callback, int keyoff, void *arg)
1163 {
1164 	struct pctrie_node *child, *node;
1165 	int slot;
1166 
1167 	node = *pnode;
1168 	while (node->pn_popmap != 0) {
1169 		slot = ffs(node->pn_popmap) - 1;
1170 		node->pn_popmap ^= 1 << slot;
1171 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1172 		    PCTRIE_UNSERIALIZED);
1173 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
1174 		    PCTRIE_UNSERIALIZED);
1175 		if (pctrie_isleaf(child)) {
1176 			if (callback != NULL)
1177 				callback(pctrie_toptr(child, keyoff), arg);
1178 			continue;
1179 		}
1180 		/* Climb one level down the trie. */
1181 		pctrie_node_store(&node->pn_child[0], parent,
1182 		    PCTRIE_UNSERIALIZED);
1183 		parent = node;
1184 		node = child;
1185 	}
1186 	*pnode = parent;
1187 	return (node);
1188 }
1189 
1190 /*
1191  * Recover the node parent from its first child and continue pruning.
1192  */
1193 static __always_inline struct pctrie_node *
pctrie_reclaim_resume_compound(struct pctrie_node ** pnode,pctrie_cb_t callback,int keyoff,void * arg)1194 pctrie_reclaim_resume_compound(struct pctrie_node **pnode,
1195     pctrie_cb_t callback, int keyoff, void *arg)
1196 {
1197 	struct pctrie_node *parent, *node;
1198 
1199 	node = *pnode;
1200 	if (node == NULL)
1201 		return (NULL);
1202 	/* Climb one level up the trie. */
1203 	parent = pctrie_node_load(&node->pn_child[0], NULL,
1204 	    PCTRIE_UNSERIALIZED);
1205 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1206 	return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg));
1207 }
1208 
1209 /*
1210  * Find the trie root, and start pruning with a NULL parent.
1211  */
1212 static __always_inline struct pctrie_node *
pctrie_reclaim_begin_compound(struct pctrie_node ** pnode,struct pctrie * ptree,pctrie_cb_t callback,int keyoff,void * arg)1213 pctrie_reclaim_begin_compound(struct pctrie_node **pnode,
1214     struct pctrie *ptree,
1215     pctrie_cb_t callback, int keyoff, void *arg)
1216 {
1217 	struct pctrie_node *node;
1218 
1219 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
1220 	pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1221 	if (pctrie_isleaf(node)) {
1222 		if (callback != NULL && node != PCTRIE_NULL)
1223 			callback(pctrie_toptr(node, keyoff), arg);
1224 		return (NULL);
1225 	}
1226 	*pnode = node;
1227 	return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg));
1228 }
1229 
1230 struct pctrie_node *
pctrie_reclaim_resume(struct pctrie_node ** pnode)1231 pctrie_reclaim_resume(struct pctrie_node **pnode)
1232 {
1233 	return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL));
1234 }
1235 
1236 struct pctrie_node *
pctrie_reclaim_begin(struct pctrie_node ** pnode,struct pctrie * ptree)1237 pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree)
1238 {
1239 	return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL));
1240 }
1241 
1242 struct pctrie_node *
pctrie_reclaim_resume_cb(struct pctrie_node ** pnode,pctrie_cb_t callback,int keyoff,void * arg)1243 pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
1244     pctrie_cb_t callback, int keyoff, void *arg)
1245 {
1246 	return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg));
1247 }
1248 
1249 struct pctrie_node *
pctrie_reclaim_begin_cb(struct pctrie_node ** pnode,struct pctrie * ptree,pctrie_cb_t callback,int keyoff,void * arg)1250 pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree,
1251     pctrie_cb_t callback, int keyoff, void *arg)
1252 {
1253 	return (pctrie_reclaim_begin_compound(pnode, ptree,
1254 	    callback, keyoff, arg));
1255 }
1256 
1257 /*
1258  * Replace an existing value in the trie with another one.
1259  * Panics if there is not an old value in the trie at the new value's index.
1260  */
1261 uint64_t *
pctrie_replace(struct pctrie * ptree,uint64_t * newval)1262 pctrie_replace(struct pctrie *ptree, uint64_t *newval)
1263 {
1264 	struct pctrie_node *leaf, *parent, *node;
1265 	uint64_t *m;
1266 	uint64_t index;
1267 	int slot;
1268 
1269 	leaf = pctrie_toleaf(newval);
1270 	index = *newval;
1271 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1272 	parent = NULL;
1273 	for (;;) {
1274 		if (pctrie_isleaf(node)) {
1275 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
1276 				if (parent == NULL)
1277 					pctrie_node_store(pctrie_root(ptree),
1278 					    leaf, PCTRIE_LOCKED);
1279 				else
1280 					pctrie_node_store(
1281 					    &parent->pn_child[slot], leaf,
1282 					    PCTRIE_LOCKED);
1283 				return (m);
1284 			}
1285 			break;
1286 		}
1287 		if (pctrie_keybarr(node, index, &slot))
1288 			break;
1289 		parent = node;
1290 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1291 		    PCTRIE_LOCKED);
1292 	}
1293 	panic("%s: original replacing value not found", __func__);
1294 }
1295 
1296 #ifdef DDB
1297 /*
1298  * Show details about the given node.
1299  */
DB_SHOW_COMMAND(pctrienode,db_show_pctrienode)1300 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
1301 {
1302 	struct pctrie_node *node, *tmp;
1303 	int slot;
1304 	pn_popmap_t popmap;
1305 
1306         if (!have_addr)
1307                 return;
1308 	node = (struct pctrie_node *)addr;
1309 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
1310 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
1311 	    node->pn_clev / PCTRIE_WIDTH);
1312 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
1313 		slot = ffs(popmap) - 1;
1314 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
1315 		    PCTRIE_UNSERIALIZED);
1316 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
1317 		    slot, (void *)tmp,
1318 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
1319 		    node->pn_clev / PCTRIE_WIDTH);
1320 	}
1321 }
1322 #endif /* DDB */
1323