xref: /freebsd/sys/kern/subr_pctrie.c (revision d0b225d16418e224b5d30d13f45515aa70ad47a3)
18a36da99SPedro F. Giffuni /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
4f2cc1285SJeff Roberson  * Copyright (c) 2013 EMC Corp.
5f2cc1285SJeff Roberson  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6f2cc1285SJeff Roberson  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7f2cc1285SJeff Roberson  * All rights reserved.
8f2cc1285SJeff Roberson  *
9f2cc1285SJeff Roberson  * Redistribution and use in source and binary forms, with or without
10f2cc1285SJeff Roberson  * modification, are permitted provided that the following conditions
11f2cc1285SJeff Roberson  * are met:
12f2cc1285SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
13f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer.
14f2cc1285SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
15f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
16f2cc1285SJeff Roberson  *    documentation and/or other materials provided with the distribution.
17f2cc1285SJeff Roberson  *
18f2cc1285SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19f2cc1285SJeff Roberson  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20f2cc1285SJeff Roberson  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21f2cc1285SJeff Roberson  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22f2cc1285SJeff Roberson  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23f2cc1285SJeff Roberson  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24f2cc1285SJeff Roberson  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25f2cc1285SJeff Roberson  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26f2cc1285SJeff Roberson  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27f2cc1285SJeff Roberson  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28f2cc1285SJeff Roberson  * SUCH DAMAGE.
29f2cc1285SJeff Roberson  *
30f2cc1285SJeff Roberson  */
31f2cc1285SJeff Roberson 
32f2cc1285SJeff Roberson /*
33f2cc1285SJeff Roberson  * Path-compressed radix trie implementation.
34f2cc1285SJeff Roberson  *
35f2cc1285SJeff Roberson  * The implementation takes into account the following rationale:
36f2cc1285SJeff Roberson  * - Size of the nodes should be as small as possible but still big enough
37f2cc1285SJeff Roberson  *   to avoid a large maximum depth for the trie.  This is a balance
38f2cc1285SJeff Roberson  *   between the necessity to not wire too much physical memory for the nodes
39f2cc1285SJeff Roberson  *   and the necessity to avoid too much cache pollution during the trie
40f2cc1285SJeff Roberson  *   operations.
41f2cc1285SJeff Roberson  * - There is not a huge bias toward the number of lookup operations over
42f2cc1285SJeff Roberson  *   the number of insert and remove operations.  This basically implies
43f2cc1285SJeff Roberson  *   that optimizations supposedly helping one operation but hurting the
44f2cc1285SJeff Roberson  *   other might be carefully evaluated.
45f2cc1285SJeff Roberson  * - On average not many nodes are expected to be fully populated, hence
46f2cc1285SJeff Roberson  *   level compression may just complicate things.
47f2cc1285SJeff Roberson  */
48f2cc1285SJeff Roberson 
49f2cc1285SJeff Roberson #include <sys/cdefs.h>
50f2cc1285SJeff Roberson #include "opt_ddb.h"
51f2cc1285SJeff Roberson 
52f2cc1285SJeff Roberson #include <sys/param.h>
53f2cc1285SJeff Roberson #include <sys/systm.h>
54f2cc1285SJeff Roberson #include <sys/kernel.h>
5505963ea4SDoug Moore #include <sys/libkern.h>
56f2cc1285SJeff Roberson #include <sys/pctrie.h>
573c30b235SConrad Meyer #include <sys/proc.h>	/* smr.h depends on struct thread. */
583c30b235SConrad Meyer #include <sys/smr.h>
593c30b235SConrad Meyer #include <sys/smr_types.h>
60f2cc1285SJeff Roberson 
61f2cc1285SJeff Roberson #ifdef DDB
62f2cc1285SJeff Roberson #include <ddb/ddb.h>
63f2cc1285SJeff Roberson #endif
64f2cc1285SJeff Roberson 
658df38859SDoug Moore #if PCTRIE_WIDTH == 3
668df38859SDoug Moore typedef uint8_t pn_popmap_t;
678df38859SDoug Moore #elif PCTRIE_WIDTH == 4
688df38859SDoug Moore typedef uint16_t pn_popmap_t;
698df38859SDoug Moore #elif PCTRIE_WIDTH == 5
708df38859SDoug Moore typedef uint32_t pn_popmap_t;
718df38859SDoug Moore #else
728df38859SDoug Moore #error Unsupported width
738df38859SDoug Moore #endif
748df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
758df38859SDoug Moore     "pn_popmap_t too wide");
768df38859SDoug Moore 
773c30b235SConrad Meyer struct pctrie_node;
783c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
793c30b235SConrad Meyer 
80f2cc1285SJeff Roberson struct pctrie_node {
81f2cc1285SJeff Roberson 	uint64_t	pn_owner;			/* Owner of record. */
828df38859SDoug Moore 	pn_popmap_t	pn_popmap;			/* Valid children. */
8338f5cb1bSDoug Moore 	uint8_t		pn_clev;			/* Level * WIDTH. */
843c30b235SConrad Meyer 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
85f2cc1285SJeff Roberson };
86f2cc1285SJeff Roberson 
87f2cc1285SJeff Roberson /*
88ac0572e6SDoug Moore  * Map index to an array position for the children of node,
89da72505fSDoug Moore  */
90da72505fSDoug Moore static __inline int
91ac0572e6SDoug Moore pctrie_slot(struct pctrie_node *node, uint64_t index)
92da72505fSDoug Moore {
93fd1d6662SDoug Moore 	return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
94da72505fSDoug Moore }
95da72505fSDoug Moore 
96ac0572e6SDoug Moore /*
97ac0572e6SDoug Moore  * Returns true if index does not belong to the specified node.  Otherwise,
98ac0572e6SDoug Moore  * sets slot value, and returns false.
99ac0572e6SDoug Moore  */
100ac0572e6SDoug Moore static __inline bool
101ac0572e6SDoug Moore pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
102da72505fSDoug Moore {
103ac0572e6SDoug Moore 	index = (index - node->pn_owner) >> node->pn_clev;
104ac0572e6SDoug Moore 	if (index >= PCTRIE_COUNT)
105ac0572e6SDoug Moore 		return (true);
106ac0572e6SDoug Moore 	*slot = index;
107ac0572e6SDoug Moore 	return (false);
108da72505fSDoug Moore }
109da72505fSDoug Moore 
110da72505fSDoug Moore /*
1113b7ffacdSDoug Moore  * Check radix node.
112f2cc1285SJeff Roberson  */
113f2cc1285SJeff Roberson static __inline void
1143b7ffacdSDoug Moore pctrie_node_put(struct pctrie_node *node)
115f2cc1285SJeff Roberson {
116f2cc1285SJeff Roberson #ifdef INVARIANTS
117f2cc1285SJeff Roberson 	int slot;
118f2cc1285SJeff Roberson 
1198df38859SDoug Moore 	KASSERT(powerof2(node->pn_popmap),
1208df38859SDoug Moore 	    ("pctrie_node_put: node %p has too many children %04x", node,
1218df38859SDoug Moore 	    node->pn_popmap));
1223c30b235SConrad Meyer 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
1238df38859SDoug Moore 		if ((node->pn_popmap & (1 << slot)) != 0)
1243c30b235SConrad Meyer 			continue;
1253c30b235SConrad Meyer 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
1262d2bcba7SDoug Moore 		    PCTRIE_NULL,
1272d2bcba7SDoug Moore 		    ("pctrie_node_put: node %p has a child", node));
1283c30b235SConrad Meyer 	}
129f2cc1285SJeff Roberson #endif
130f2cc1285SJeff Roberson }
131f2cc1285SJeff Roberson 
132fd1d6662SDoug Moore enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
133fd1d6662SDoug Moore 
134f2cc1285SJeff Roberson /*
1353c30b235SConrad Meyer  * Fetch a node pointer from a slot.
1363c30b235SConrad Meyer  */
1373c30b235SConrad Meyer static __inline struct pctrie_node *
1383c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
1393c30b235SConrad Meyer {
1403c30b235SConrad Meyer 	switch (access) {
1413c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1423c30b235SConrad Meyer 		return (smr_unserialized_load(p, true));
1433c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1443c30b235SConrad Meyer 		return (smr_serialized_load(p, true));
1453c30b235SConrad Meyer 	case PCTRIE_SMR:
1463c30b235SConrad Meyer 		return (smr_entered_load(p, smr));
1473c30b235SConrad Meyer 	}
1483c30b235SConrad Meyer 	__assert_unreachable();
1493c30b235SConrad Meyer }
1503c30b235SConrad Meyer 
1513c30b235SConrad Meyer static __inline void
1523c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
1533c30b235SConrad Meyer {
1543c30b235SConrad Meyer 	switch (access) {
1553c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1563c30b235SConrad Meyer 		smr_unserialized_store(p, v, true);
1573c30b235SConrad Meyer 		break;
1583c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1593c30b235SConrad Meyer 		smr_serialized_store(p, v, true);
1603c30b235SConrad Meyer 		break;
1613c30b235SConrad Meyer 	case PCTRIE_SMR:
1623c30b235SConrad Meyer 		panic("%s: Not supported in SMR section.", __func__);
1633c30b235SConrad Meyer 		break;
1643c30b235SConrad Meyer 	default:
1653c30b235SConrad Meyer 		__assert_unreachable();
1663c30b235SConrad Meyer 		break;
1673c30b235SConrad Meyer 	}
1683c30b235SConrad Meyer }
1693c30b235SConrad Meyer 
1703c30b235SConrad Meyer /*
171f2cc1285SJeff Roberson  * Get the root node for a tree.
172f2cc1285SJeff Roberson  */
173f2cc1285SJeff Roberson static __inline struct pctrie_node *
1743c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
175f2cc1285SJeff Roberson {
1763c30b235SConrad Meyer 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
177f2cc1285SJeff Roberson }
178f2cc1285SJeff Roberson 
179f2cc1285SJeff Roberson /*
180f2cc1285SJeff Roberson  * Set the root node for a tree.
181f2cc1285SJeff Roberson  */
182f2cc1285SJeff Roberson static __inline void
1833c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
1843c30b235SConrad Meyer     enum pctrie_access access)
185f2cc1285SJeff Roberson {
1863c30b235SConrad Meyer 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
187f2cc1285SJeff Roberson }
188f2cc1285SJeff Roberson 
189f2cc1285SJeff Roberson /*
190f2cc1285SJeff Roberson  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
191f2cc1285SJeff Roberson  */
19204f9afaeSConrad Meyer static __inline bool
193f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node)
194f2cc1285SJeff Roberson {
195f2cc1285SJeff Roberson 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
196f2cc1285SJeff Roberson }
197f2cc1285SJeff Roberson 
198f2cc1285SJeff Roberson /*
1999cfed089SDoug Moore  * Returns val with leaf bit set.
2009cfed089SDoug Moore  */
2019cfed089SDoug Moore static __inline void *
2029cfed089SDoug Moore pctrie_toleaf(uint64_t *val)
2039cfed089SDoug Moore {
2049cfed089SDoug Moore 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
2059cfed089SDoug Moore }
2069cfed089SDoug Moore 
2079cfed089SDoug Moore /*
208f2cc1285SJeff Roberson  * Returns the associated val extracted from node.
209f2cc1285SJeff Roberson  */
210f2cc1285SJeff Roberson static __inline uint64_t *
211f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node)
212f2cc1285SJeff Roberson {
213f2cc1285SJeff Roberson 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
214f2cc1285SJeff Roberson }
215f2cc1285SJeff Roberson 
216f2cc1285SJeff Roberson /*
217c0d0bc2bSDoug Moore  * Returns the associated pointer extracted from node and field offset.
218c0d0bc2bSDoug Moore  */
219c0d0bc2bSDoug Moore static __inline void *
220c0d0bc2bSDoug Moore pctrie_toptr(struct pctrie_node *node, int keyoff)
221c0d0bc2bSDoug Moore {
222c0d0bc2bSDoug Moore 	return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff));
223c0d0bc2bSDoug Moore }
224c0d0bc2bSDoug Moore 
225c0d0bc2bSDoug Moore /*
22616e01c05SDoug Moore  * Make 'child' a child of 'node'.
227f2cc1285SJeff Roberson  */
228f2cc1285SJeff Roberson static __inline void
22938f5cb1bSDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index,
23016e01c05SDoug Moore     struct pctrie_node *child, enum pctrie_access access)
231f2cc1285SJeff Roberson {
232f2cc1285SJeff Roberson 	int slot;
233f2cc1285SJeff Roberson 
234ac0572e6SDoug Moore 	slot = pctrie_slot(node, index);
23516e01c05SDoug Moore 	pctrie_node_store(&node->pn_child[slot], child, access);
2368df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
2378df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
2388df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
239f2cc1285SJeff Roberson }
240f2cc1285SJeff Roberson 
241f2cc1285SJeff Roberson /*
242f2cc1285SJeff Roberson  * pctrie node zone initializer.
243f2cc1285SJeff Roberson  */
244f2cc1285SJeff Roberson int
245f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused)
246f2cc1285SJeff Roberson {
247f2cc1285SJeff Roberson 	struct pctrie_node *node;
248f2cc1285SJeff Roberson 
249f2cc1285SJeff Roberson 	node = mem;
2508df38859SDoug Moore 	node->pn_popmap = 0;
2512d2bcba7SDoug Moore 	for (int i = 0; i < nitems(node->pn_child); i++)
2522d2bcba7SDoug Moore 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
2532d2bcba7SDoug Moore 		    PCTRIE_UNSERIALIZED);
254f2cc1285SJeff Roberson 	return (0);
255f2cc1285SJeff Roberson }
256f2cc1285SJeff Roberson 
257f2cc1285SJeff Roberson size_t
258f2cc1285SJeff Roberson pctrie_node_size(void)
259f2cc1285SJeff Roberson {
260f2cc1285SJeff Roberson 
261f2cc1285SJeff Roberson 	return (sizeof(struct pctrie_node));
262f2cc1285SJeff Roberson }
263f2cc1285SJeff Roberson 
264bbf81f46SRyan Libby enum pctrie_insert_neighbor_mode {
265bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_NONE,
266bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_LT,
267bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_GT,
268bbf81f46SRyan Libby };
269bbf81f46SRyan Libby 
270f2cc1285SJeff Roberson /*
271bbf81f46SRyan Libby  * Look for where to insert the key-value pair into the trie.  Complete the
272bbf81f46SRyan Libby  * insertion if it replaces a null leaf.  Return the insertion location if the
273bbf81f46SRyan Libby  * insertion needs to be completed by the caller; otherwise return NULL.
274bbf81f46SRyan Libby  *
275bbf81f46SRyan Libby  * If the key is already present in the trie, populate *found_out as if by
276bbf81f46SRyan Libby  * pctrie_lookup().
277bbf81f46SRyan Libby  *
278bbf81f46SRyan Libby  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
279bbf81f46SRyan Libby  * *neighbor_out to the lowest level node we encounter during the insert lookup
280bbf81f46SRyan Libby  * that is a parent of the next greater or lesser entry.  The value is not
281bbf81f46SRyan Libby  * defined if the key was already present in the trie.
282bbf81f46SRyan Libby  *
283bbf81f46SRyan Libby  * Note that mode is expected to be a compile-time constant, and this procedure
284bbf81f46SRyan Libby  * is expected to be inlined into callers with extraneous code optimized out.
285f2cc1285SJeff Roberson  */
286bbf81f46SRyan Libby static __always_inline void *
287bbf81f46SRyan Libby pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
288bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out,
289bbf81f46SRyan Libby     enum pctrie_insert_neighbor_mode mode)
290f2cc1285SJeff Roberson {
2913b7ffacdSDoug Moore 	uint64_t index;
2923b7ffacdSDoug Moore 	struct pctrie_node *node, *parent;
293f2cc1285SJeff Roberson 	int slot;
294f2cc1285SJeff Roberson 
295f2cc1285SJeff Roberson 	index = *val;
296f2cc1285SJeff Roberson 
297f2cc1285SJeff Roberson 	/*
298f2cc1285SJeff Roberson 	 * The owner of record for root is not really important because it
299f2cc1285SJeff Roberson 	 * will never be used.
300f2cc1285SJeff Roberson 	 */
3013c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
3022d2bcba7SDoug Moore 	parent = NULL;
3032d2bcba7SDoug Moore 	for (;;) {
3042d2bcba7SDoug Moore 		if (pctrie_isleaf(node)) {
3052d2bcba7SDoug Moore 			if (node == PCTRIE_NULL) {
3062d2bcba7SDoug Moore 				if (parent == NULL)
3079147a0c9SDoug Moore 					pctrie_root_store(ptree,
3089147a0c9SDoug Moore 					    pctrie_toleaf(val), PCTRIE_LOCKED);
3092d2bcba7SDoug Moore 				else
3103b7ffacdSDoug Moore 					pctrie_addnode(parent, index,
3113b7ffacdSDoug Moore 					    pctrie_toleaf(val), PCTRIE_LOCKED);
3123b7ffacdSDoug Moore 				return (NULL);
313f2cc1285SJeff Roberson 			}
314bbf81f46SRyan Libby 			if (*pctrie_toval(node) == index) {
315bbf81f46SRyan Libby 				*found_out = pctrie_toval(node);
316bbf81f46SRyan Libby 				return (NULL);
317bbf81f46SRyan Libby 			}
318f2cc1285SJeff Roberson 			break;
3192d2bcba7SDoug Moore 		}
3203b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
32116e01c05SDoug Moore 			break;
322bbf81f46SRyan Libby 		/*
323bbf81f46SRyan Libby 		 * Descend.  If we're tracking the next neighbor and this node
324bbf81f46SRyan Libby 		 * contains a neighboring entry in the right direction, record
325bbf81f46SRyan Libby 		 * it.
326bbf81f46SRyan Libby 		 */
327bbf81f46SRyan Libby 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
328bbf81f46SRyan Libby 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
329bbf81f46SRyan Libby 				*neighbor_out = node;
330bbf81f46SRyan Libby 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
331bbf81f46SRyan Libby 			if ((node->pn_popmap >> slot) > 1)
332bbf81f46SRyan Libby 				*neighbor_out = node;
333bbf81f46SRyan Libby 		}
3342d2bcba7SDoug Moore 		parent = node;
3352d2bcba7SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
3363c30b235SConrad Meyer 		    PCTRIE_LOCKED);
337f2cc1285SJeff Roberson 	}
338f2cc1285SJeff Roberson 
339f2cc1285SJeff Roberson 	/*
340bbf81f46SRyan Libby 	 * The caller will split this node.  If we're tracking the next
341bbf81f46SRyan Libby 	 * neighbor, record the old node if the old entry is in the right
342bbf81f46SRyan Libby 	 * direction.
343bbf81f46SRyan Libby 	 */
344bbf81f46SRyan Libby 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
345bbf81f46SRyan Libby 		if (*pctrie_toval(node) < index)
346bbf81f46SRyan Libby 			*neighbor_out = node;
347bbf81f46SRyan Libby 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
348bbf81f46SRyan Libby 		if (*pctrie_toval(node) > index)
349bbf81f46SRyan Libby 			*neighbor_out = node;
350bbf81f46SRyan Libby 	}
351bbf81f46SRyan Libby 
352bbf81f46SRyan Libby 	/*
3533b7ffacdSDoug Moore 	 * 'node' must be replaced in the tree with a new branch node, with
3543b7ffacdSDoug Moore 	 * children 'node' and 'val'. Return the place that points to 'node'
3553b7ffacdSDoug Moore 	 * now, and will point to to the new branching node later.
356f2cc1285SJeff Roberson 	 */
3573b7ffacdSDoug Moore 	return ((parent != NULL) ? &parent->pn_child[slot]:
3583b7ffacdSDoug Moore 	    (smr_pctnode_t *)&ptree->pt_root);
3593b7ffacdSDoug Moore }
3603b7ffacdSDoug Moore 
3613b7ffacdSDoug Moore /*
362bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
363bbf81f46SRyan Libby  * if the key already exists, and do not look for neighboring entries.
364bbf81f46SRyan Libby  */
365bbf81f46SRyan Libby void *
366bbf81f46SRyan Libby pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
367bbf81f46SRyan Libby {
368bbf81f46SRyan Libby 	void *parentp;
369bbf81f46SRyan Libby 	uint64_t *found;
370bbf81f46SRyan Libby 
371bbf81f46SRyan Libby 	found = NULL;
372bbf81f46SRyan Libby 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
373bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE);
374bbf81f46SRyan Libby 	if (__predict_false(found != NULL))
375bbf81f46SRyan Libby 		panic("%s: key %jx is already present", __func__,
376bbf81f46SRyan Libby 		    (uintmax_t)*val);
377bbf81f46SRyan Libby 	return (parentp);
378bbf81f46SRyan Libby }
379bbf81f46SRyan Libby 
380bbf81f46SRyan Libby /*
381bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
382bbf81f46SRyan Libby  * for neighboring entries.
383bbf81f46SRyan Libby  */
384bbf81f46SRyan Libby void *
385bbf81f46SRyan Libby pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
386bbf81f46SRyan Libby     uint64_t **found_out)
387bbf81f46SRyan Libby {
388bbf81f46SRyan Libby 	*found_out = NULL;
389bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
390bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE));
391bbf81f46SRyan Libby }
392bbf81f46SRyan Libby 
393bbf81f46SRyan Libby /*
394bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
395bbf81f46SRyan Libby  * greater entry.  Find a subtree that contains the next entry greater than the
396bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
397bbf81f46SRyan Libby  */
398bbf81f46SRyan Libby void *
399bbf81f46SRyan Libby pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
400bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
401bbf81f46SRyan Libby {
402bbf81f46SRyan Libby 	*found_out = NULL;
403bbf81f46SRyan Libby 	*neighbor_out = NULL;
404bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
405bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
406bbf81f46SRyan Libby }
407bbf81f46SRyan Libby 
408bbf81f46SRyan Libby /*
409bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
410bbf81f46SRyan Libby  * lesser entry.  Find a subtree that contains the next entry less than the
411bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
412bbf81f46SRyan Libby  */
413bbf81f46SRyan Libby void *
414bbf81f46SRyan Libby pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
415bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
416bbf81f46SRyan Libby {
417bbf81f46SRyan Libby 	*found_out = NULL;
418bbf81f46SRyan Libby 	*neighbor_out = NULL;
419bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
420bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
421bbf81f46SRyan Libby }
422bbf81f46SRyan Libby 
423bbf81f46SRyan Libby /*
4243b7ffacdSDoug Moore  * Uses new node to insert key-value pair into the trie at given location.
4253b7ffacdSDoug Moore  */
4263b7ffacdSDoug Moore void
4273b7ffacdSDoug Moore pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
4283b7ffacdSDoug Moore {
4293b7ffacdSDoug Moore 	struct pctrie_node *node;
4303b7ffacdSDoug Moore 	uint64_t index, newind;
4313b7ffacdSDoug Moore 
4323b7ffacdSDoug Moore 	/*
4333b7ffacdSDoug Moore 	 * Clear the last child pointer of the newly allocated parent.  We want
4343b7ffacdSDoug Moore 	 * to clear it after the final section has exited so lookup can not
4353b7ffacdSDoug Moore 	 * return false negatives.  It is done here because it will be
4363b7ffacdSDoug Moore 	 * cache-cold in the dtor callback.
4373b7ffacdSDoug Moore 	 */
4383b7ffacdSDoug Moore 	if (parent->pn_popmap != 0) {
4393b7ffacdSDoug Moore 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
4403b7ffacdSDoug Moore 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
4413b7ffacdSDoug Moore 		parent->pn_popmap = 0;
4423b7ffacdSDoug Moore 	}
4433b7ffacdSDoug Moore 
4443b7ffacdSDoug Moore 	/*
4453b7ffacdSDoug Moore 	 * Recover the values of the two children of the new parent node.  If
4463b7ffacdSDoug Moore 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
4473b7ffacdSDoug Moore 	 * which must be first in the node.
4483b7ffacdSDoug Moore 	 */
4493b7ffacdSDoug Moore 	index = *val;
4503b7ffacdSDoug Moore 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
4513b7ffacdSDoug Moore 	newind = *pctrie_toval(node);
4523b7ffacdSDoug Moore 
4533b7ffacdSDoug Moore 	/*
4543b7ffacdSDoug Moore 	 * From the highest-order bit where the indexes differ,
4553b7ffacdSDoug Moore 	 * compute the highest level in the trie where they differ.  Then,
4563b7ffacdSDoug Moore 	 * compute the least index of this subtrie.
4573b7ffacdSDoug Moore 	 */
4583b7ffacdSDoug Moore 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
4593b7ffacdSDoug Moore 	    "uint64 too wide");
4603b7ffacdSDoug Moore 	_Static_assert(sizeof(uint64_t) * NBBY <=
4613b7ffacdSDoug Moore 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
462749c249dSDoug Moore 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
4633b7ffacdSDoug Moore 	parent->pn_owner = PCTRIE_COUNT;
4643b7ffacdSDoug Moore 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
4653b7ffacdSDoug Moore 
4663b7ffacdSDoug Moore 
4673c30b235SConrad Meyer 	/* These writes are not yet visible due to ordering. */
4683b7ffacdSDoug Moore 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
46938f5cb1bSDoug Moore 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
4703c30b235SConrad Meyer 	/* Synchronize to make the above visible. */
4712d2bcba7SDoug Moore 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
472f2cc1285SJeff Roberson }
473f2cc1285SJeff Roberson 
474f2cc1285SJeff Roberson /*
475fd1d6662SDoug Moore  * Return the value associated with the node, if the node is a leaf that matches
476fd1d6662SDoug Moore  * the index; otherwise NULL.
477fd1d6662SDoug Moore  */
478fd1d6662SDoug Moore static __always_inline uint64_t *
479fd1d6662SDoug Moore pctrie_match_value(struct pctrie_node *node, uint64_t index)
480fd1d6662SDoug Moore {
481fd1d6662SDoug Moore 	uint64_t *m;
482fd1d6662SDoug Moore 
483fd1d6662SDoug Moore 	if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
484fd1d6662SDoug Moore 	    *m != index)
485fd1d6662SDoug Moore 		m = NULL;
486fd1d6662SDoug Moore 	return (m);
487fd1d6662SDoug Moore }
488fd1d6662SDoug Moore 
489fd1d6662SDoug Moore /*
490f2cc1285SJeff Roberson  * Returns the value stored at the index.  If the index is not present,
491f2cc1285SJeff Roberson  * NULL is returned.
492f2cc1285SJeff Roberson  */
4933c30b235SConrad Meyer static __always_inline uint64_t *
4943c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
4953c30b235SConrad Meyer     enum pctrie_access access)
496f2cc1285SJeff Roberson {
497f2cc1285SJeff Roberson 	struct pctrie_node *node;
498f2cc1285SJeff Roberson 	int slot;
499f2cc1285SJeff Roberson 
5003c30b235SConrad Meyer 	node = pctrie_root_load(ptree, smr, access);
501fd1d6662SDoug Moore 	/* Seek a node that matches index. */
502fd1d6662SDoug Moore 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
5033c30b235SConrad Meyer 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
504fd1d6662SDoug Moore 	return (pctrie_match_value(node, index));
505f2cc1285SJeff Roberson }
506f2cc1285SJeff Roberson 
507f2cc1285SJeff Roberson /*
5083c30b235SConrad Meyer  * Returns the value stored at the index, assuming access is externally
5093c30b235SConrad Meyer  * synchronized by a lock.
5103c30b235SConrad Meyer  *
5113c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5123c30b235SConrad Meyer  */
5133c30b235SConrad Meyer uint64_t *
5143c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index)
5153c30b235SConrad Meyer {
5163c30b235SConrad Meyer 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
5173c30b235SConrad Meyer }
5183c30b235SConrad Meyer 
5193c30b235SConrad Meyer /*
5203c30b235SConrad Meyer  * Returns the value stored at the index without requiring an external lock.
5213c30b235SConrad Meyer  *
5223c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5233c30b235SConrad Meyer  */
5243c30b235SConrad Meyer uint64_t *
5253c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
5263c30b235SConrad Meyer {
5273c30b235SConrad Meyer 	uint64_t *res;
5283c30b235SConrad Meyer 
5293c30b235SConrad Meyer 	smr_enter(smr);
5303c30b235SConrad Meyer 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
5313c30b235SConrad Meyer 	smr_exit(smr);
5323c30b235SConrad Meyer 	return (res);
5333c30b235SConrad Meyer }
5343c30b235SConrad Meyer 
5353c30b235SConrad Meyer /*
536fd1d6662SDoug Moore  * Returns the last node examined in the search for the index, and updates the
537fd1d6662SDoug Moore  * search path to that node.
538fd1d6662SDoug Moore  */
539fd1d6662SDoug Moore static __always_inline struct pctrie_node *
540fd1d6662SDoug Moore _pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
541fd1d6662SDoug Moore     enum pctrie_access access)
542fd1d6662SDoug Moore {
543fd1d6662SDoug Moore 	struct pctrie_node *node;
544fd1d6662SDoug Moore 	int slot;
545fd1d6662SDoug Moore 
546fd1d6662SDoug Moore 	/*
547fd1d6662SDoug Moore 	 * Climb the search path to find the lowest node from which to start the
548fd1d6662SDoug Moore 	 * search for a value matching 'index'.
549fd1d6662SDoug Moore 	 */
550fd1d6662SDoug Moore 	while (it->top != 0) {
551fd1d6662SDoug Moore 		node = it->path[it->top - 1];
552fd1d6662SDoug Moore 		KASSERT(!powerof2(node->pn_popmap),
553fd1d6662SDoug Moore 		    ("%s: freed node in iter path", __func__));
554fd1d6662SDoug Moore 		if (!pctrie_keybarr(node, index, &slot)) {
555fd1d6662SDoug Moore 			node = pctrie_node_load(
556fd1d6662SDoug Moore 			    &node->pn_child[slot], smr, access);
557fd1d6662SDoug Moore 			break;
558fd1d6662SDoug Moore 		}
559fd1d6662SDoug Moore 		--it->top;
560fd1d6662SDoug Moore 	}
561fd1d6662SDoug Moore 	if (it->top == 0)
562fd1d6662SDoug Moore 		node = pctrie_root_load(it->ptree, smr, access);
563fd1d6662SDoug Moore 
564fd1d6662SDoug Moore 	/* Seek a node that matches index. */
565fd1d6662SDoug Moore 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
566fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
567fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
568fd1d6662SDoug Moore 		it->path[it->top++] = node;
569fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
570fd1d6662SDoug Moore 	}
571fd1d6662SDoug Moore 	return (node);
572fd1d6662SDoug Moore }
573fd1d6662SDoug Moore 
574fd1d6662SDoug Moore /*
575fd1d6662SDoug Moore  * Returns the value stored at a given index value, possibly NULL.
576fd1d6662SDoug Moore  */
577fd1d6662SDoug Moore static __always_inline uint64_t *
578fd1d6662SDoug Moore _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
579fd1d6662SDoug Moore     enum pctrie_access access)
580fd1d6662SDoug Moore {
581fd1d6662SDoug Moore 	struct pctrie_node *node;
582fd1d6662SDoug Moore 
583fd1d6662SDoug Moore 	it->index = index;
584fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, smr, access);
585fd1d6662SDoug Moore 	return (pctrie_match_value(node, index));
586fd1d6662SDoug Moore }
587fd1d6662SDoug Moore 
588fd1d6662SDoug Moore /*
589fd1d6662SDoug Moore  * Returns the value stored at a given index value, possibly NULL.
590fd1d6662SDoug Moore  */
591fd1d6662SDoug Moore uint64_t *
592fd1d6662SDoug Moore pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
593fd1d6662SDoug Moore {
594fd1d6662SDoug Moore 	return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
595fd1d6662SDoug Moore }
596fd1d6662SDoug Moore 
597fd1d6662SDoug Moore /*
598*d0b225d1SDoug Moore  * Insert the val in the trie, starting search with iterator.  Return a pointer
599*d0b225d1SDoug Moore  * to indicate where a new node must be allocated to complete insertion.
600*d0b225d1SDoug Moore  * Assumes access is externally synchronized by a lock.
601*d0b225d1SDoug Moore  */
602*d0b225d1SDoug Moore void *
603*d0b225d1SDoug Moore pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
604*d0b225d1SDoug Moore {
605*d0b225d1SDoug Moore 	struct pctrie_node *node;
606*d0b225d1SDoug Moore 
607*d0b225d1SDoug Moore 	it->index = *val;
608*d0b225d1SDoug Moore 	node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
609*d0b225d1SDoug Moore 	if (node == PCTRIE_NULL) {
610*d0b225d1SDoug Moore 		if (it->top == 0)
611*d0b225d1SDoug Moore 			pctrie_root_store(it->ptree,
612*d0b225d1SDoug Moore 			    pctrie_toleaf(val), PCTRIE_LOCKED);
613*d0b225d1SDoug Moore 		else
614*d0b225d1SDoug Moore 			pctrie_addnode(it->path[it->top - 1], it->index,
615*d0b225d1SDoug Moore 			    pctrie_toleaf(val), PCTRIE_LOCKED);
616*d0b225d1SDoug Moore 		return (NULL);
617*d0b225d1SDoug Moore 	}
618*d0b225d1SDoug Moore 	if (__predict_false(pctrie_match_value(node, it->index) != NULL))
619*d0b225d1SDoug Moore 		panic("%s: key %jx is already present", __func__,
620*d0b225d1SDoug Moore 		    (uintmax_t)it->index);
621*d0b225d1SDoug Moore 
622*d0b225d1SDoug Moore 	/*
623*d0b225d1SDoug Moore 	 * 'node' must be replaced in the tree with a new branch node, with
624*d0b225d1SDoug Moore 	 * children 'node' and 'val'. Return the place that points to 'node'
625*d0b225d1SDoug Moore 	 * now, and will point to to the new branching node later.
626*d0b225d1SDoug Moore 	 */
627*d0b225d1SDoug Moore 	if (it->top == 0)
628*d0b225d1SDoug Moore 		return ((smr_pctnode_t *)&it->ptree->pt_root);
629*d0b225d1SDoug Moore 	node = it->path[it->top - 1];
630*d0b225d1SDoug Moore 	return (&node->pn_child[pctrie_slot(node, it->index)]);
631*d0b225d1SDoug Moore }
632*d0b225d1SDoug Moore 
633*d0b225d1SDoug Moore /*
634fd1d6662SDoug Moore  * Returns the value stored at a fixed offset from the current index value,
635fd1d6662SDoug Moore  * possibly NULL.
636fd1d6662SDoug Moore  */
637fd1d6662SDoug Moore static __always_inline uint64_t *
638fd1d6662SDoug Moore _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
639fd1d6662SDoug Moore     enum pctrie_access access)
640fd1d6662SDoug Moore {
641fd1d6662SDoug Moore 	uint64_t index = it->index + stride;
642fd1d6662SDoug Moore 
643fd1d6662SDoug Moore 	/* Detect stride overflow. */
644fd1d6662SDoug Moore 	if ((stride > 0) != (index > it->index))
645fd1d6662SDoug Moore 		return (NULL);
646fd1d6662SDoug Moore 	/* Detect crossing limit */
647fd1d6662SDoug Moore 	if ((index < it->limit) != (it->index < it->limit))
648fd1d6662SDoug Moore 		return (NULL);
649fd1d6662SDoug Moore 
650fd1d6662SDoug Moore 	return (_pctrie_iter_lookup(it, index, smr, access));
651fd1d6662SDoug Moore }
652fd1d6662SDoug Moore 
653fd1d6662SDoug Moore /*
654fd1d6662SDoug Moore  * Returns the value stored at a fixed offset from the current index value,
655fd1d6662SDoug Moore  * possibly NULL.
656fd1d6662SDoug Moore  */
657fd1d6662SDoug Moore uint64_t *
658fd1d6662SDoug Moore pctrie_iter_stride(struct pctrie_iter *it, int stride)
659fd1d6662SDoug Moore {
660fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
661fd1d6662SDoug Moore }
662fd1d6662SDoug Moore 
663fd1d6662SDoug Moore /*
664fd1d6662SDoug Moore  * Returns the value stored at one more than the current index value, possibly
665fd1d6662SDoug Moore  * NULL, assuming access is externally synchronized by a lock.
666fd1d6662SDoug Moore  */
667fd1d6662SDoug Moore uint64_t *
668fd1d6662SDoug Moore pctrie_iter_next(struct pctrie_iter *it)
669fd1d6662SDoug Moore {
670fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
671fd1d6662SDoug Moore }
672fd1d6662SDoug Moore 
673fd1d6662SDoug Moore /*
674fd1d6662SDoug Moore  * Returns the value stored at one less than the current index value, possibly
675fd1d6662SDoug Moore  * NULL, assuming access is externally synchronized by a lock.
676fd1d6662SDoug Moore  */
677fd1d6662SDoug Moore uint64_t *
678fd1d6662SDoug Moore pctrie_iter_prev(struct pctrie_iter *it)
679fd1d6662SDoug Moore {
680fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
681fd1d6662SDoug Moore }
682fd1d6662SDoug Moore 
683fd1d6662SDoug Moore /*
6846f251ef2SDoug Moore  * Returns the value with the least index that is greater than or equal to the
6856f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
6866f251ef2SDoug Moore  *
6876f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
688f2cc1285SJeff Roberson  */
689bbf81f46SRyan Libby static __inline uint64_t *
690bbf81f46SRyan Libby pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
691f2cc1285SJeff Roberson {
692bbf81f46SRyan Libby 	struct pctrie_node *succ;
693f2cc1285SJeff Roberson 	uint64_t *m;
694d1139b52SConrad Meyer 	int slot;
695f2cc1285SJeff Roberson 
6966f251ef2SDoug Moore 	/*
6976f251ef2SDoug Moore 	 * Descend the trie as if performing an ordinary lookup for the
6986f251ef2SDoug Moore 	 * specified value.  However, unlike an ordinary lookup, as we descend
6996f251ef2SDoug Moore 	 * the trie, we use "succ" to remember the last branching-off point,
7006f251ef2SDoug Moore 	 * that is, the interior node under which the least value that is both
7016f251ef2SDoug Moore 	 * outside our current path down the trie and greater than the specified
7026f251ef2SDoug Moore 	 * index resides.  (The node's popmap makes it fast and easy to
7036f251ef2SDoug Moore 	 * recognize a branching-off point.)  If our ordinary lookup fails to
7046f251ef2SDoug Moore 	 * yield a value that is greater than or equal to the specified index,
7056f251ef2SDoug Moore 	 * then we will exit this loop and perform a lookup starting from
7066f251ef2SDoug Moore 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
7076f251ef2SDoug Moore 	 * succeed.
7086f251ef2SDoug Moore 	 */
7096f251ef2SDoug Moore 	succ = NULL;
7102d2bcba7SDoug Moore 	for (;;) {
7116f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
7122d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
713f2cc1285SJeff Roberson 				return (m);
7146f251ef2SDoug Moore 			break;
715f2cc1285SJeff Roberson 		}
716ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
717f2cc1285SJeff Roberson 			/*
7186f251ef2SDoug Moore 			 * If all values in this subtree are > index, then the
7196f251ef2SDoug Moore 			 * least value in this subtree is the answer.
720f2cc1285SJeff Roberson 			 */
7216f251ef2SDoug Moore 			if (node->pn_owner > index)
7226f251ef2SDoug Moore 				succ = node;
7236f251ef2SDoug Moore 			break;
724f2cc1285SJeff Roberson 		}
725f2cc1285SJeff Roberson 
726f2cc1285SJeff Roberson 		/*
7276f251ef2SDoug Moore 		 * Just in case the next search step leads to a subtree of all
7286f251ef2SDoug Moore 		 * values < index, check popmap to see if a next bigger step, to
7296f251ef2SDoug Moore 		 * a subtree of all pages with values > index, is available.  If
7306f251ef2SDoug Moore 		 * so, remember to restart the search here.
731f2cc1285SJeff Roberson 		 */
7326f251ef2SDoug Moore 		if ((node->pn_popmap >> slot) > 1)
7336f251ef2SDoug Moore 			succ = node;
7346f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
7356f251ef2SDoug Moore 		    PCTRIE_LOCKED);
736f2cc1285SJeff Roberson 	}
737f2cc1285SJeff Roberson 
738f2cc1285SJeff Roberson 	/*
7396f251ef2SDoug Moore 	 * Restart the search from the last place visited in the subtree that
7406f251ef2SDoug Moore 	 * included some values > index, if there was such a place.
7416f251ef2SDoug Moore 	 */
7426f251ef2SDoug Moore 	if (succ == NULL)
7436f251ef2SDoug Moore 		return (NULL);
7446f251ef2SDoug Moore 	if (succ != node) {
7456f251ef2SDoug Moore 		/*
7466f251ef2SDoug Moore 		 * Take a step to the next bigger sibling of the node chosen
7476f251ef2SDoug Moore 		 * last time.  In that subtree, all values > index.
7486f251ef2SDoug Moore 		 */
749ac0572e6SDoug Moore 		slot = pctrie_slot(succ, index) + 1;
7506f251ef2SDoug Moore 		KASSERT((succ->pn_popmap >> slot) != 0,
7516f251ef2SDoug Moore 		    ("%s: no popmap siblings past slot %d in node %p",
7526f251ef2SDoug Moore 		    __func__, slot, succ));
7536f251ef2SDoug Moore 		slot += ffs(succ->pn_popmap >> slot) - 1;
7546f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
7556f251ef2SDoug Moore 		    PCTRIE_LOCKED);
7566f251ef2SDoug Moore 	}
7576f251ef2SDoug Moore 
7586f251ef2SDoug Moore 	/*
7596f251ef2SDoug Moore 	 * Find the value in the subtree rooted at "succ" with the least index.
7606f251ef2SDoug Moore 	 */
7616f251ef2SDoug Moore 	while (!pctrie_isleaf(succ)) {
7626f251ef2SDoug Moore 		KASSERT(succ->pn_popmap != 0,
7636f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, succ));
7646f251ef2SDoug Moore 		slot = ffs(succ->pn_popmap) - 1;
7656f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
7666f251ef2SDoug Moore 		    PCTRIE_LOCKED);
7676f251ef2SDoug Moore 	}
7686f251ef2SDoug Moore 	return (pctrie_toval(succ));
7696f251ef2SDoug Moore }
7706f251ef2SDoug Moore 
771bbf81f46SRyan Libby uint64_t *
772bbf81f46SRyan Libby pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
773bbf81f46SRyan Libby {
774bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(
775bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
776bbf81f46SRyan Libby }
777bbf81f46SRyan Libby 
778bbf81f46SRyan Libby uint64_t *
779bbf81f46SRyan Libby pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
780bbf81f46SRyan Libby {
781bbf81f46SRyan Libby 	if (node == NULL || index + 1 == 0)
782bbf81f46SRyan Libby 		return (NULL);
783bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(node, index + 1));
784bbf81f46SRyan Libby }
785bbf81f46SRyan Libby 
786fd1d6662SDoug Moore /*
787fd1d6662SDoug Moore  * Find first leaf >= index, and fill iter with the path to the parent of that
788fd1d6662SDoug Moore  * leaf.  Return NULL if there is no such leaf less than limit.
789fd1d6662SDoug Moore  */
790fd1d6662SDoug Moore uint64_t *
791fd1d6662SDoug Moore pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
792fd1d6662SDoug Moore {
793fd1d6662SDoug Moore 	struct pctrie_node *node;
794fd1d6662SDoug Moore 	uint64_t *m;
795fd1d6662SDoug Moore 	int slot;
796fd1d6662SDoug Moore 
797fd1d6662SDoug Moore 	/* Seek a node that matches index. */
798fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
799fd1d6662SDoug Moore 
800fd1d6662SDoug Moore 	/*
801fd1d6662SDoug Moore 	 * If no such node was found, and instead this path leads only to nodes
802fd1d6662SDoug Moore 	 * < index, back up to find a subtrie with the least value > index.
803fd1d6662SDoug Moore 	 */
804fd1d6662SDoug Moore 	if (pctrie_isleaf(node) ?
805fd1d6662SDoug Moore 	    (m = pctrie_toval(node)) == NULL || *m < index :
806fd1d6662SDoug Moore 	    node->pn_owner < index) {
807fd1d6662SDoug Moore 		/* Climb the path to find a node with a descendant > index. */
808fd1d6662SDoug Moore 		while (it->top != 0) {
809fd1d6662SDoug Moore 			node = it->path[it->top - 1];
810fd1d6662SDoug Moore 			slot = pctrie_slot(node, index) + 1;
811fd1d6662SDoug Moore 			if ((node->pn_popmap >> slot) != 0)
812fd1d6662SDoug Moore 				break;
813fd1d6662SDoug Moore 			--it->top;
814fd1d6662SDoug Moore 		}
815fd1d6662SDoug Moore 		if (it->top == 0)
816fd1d6662SDoug Moore 			return (NULL);
817fd1d6662SDoug Moore 
818fd1d6662SDoug Moore 		/* Step to the least child with a descendant > index. */
819fd1d6662SDoug Moore 		slot += ffs(node->pn_popmap >> slot) - 1;
820fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
821fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
822fd1d6662SDoug Moore 	}
823fd1d6662SDoug Moore 	/* Descend to the least leaf of the subtrie. */
824fd1d6662SDoug Moore 	while (!pctrie_isleaf(node)) {
825fd1d6662SDoug Moore 		if (it->limit != 0 && node->pn_owner >= it->limit)
826fd1d6662SDoug Moore 			return (NULL);
827fd1d6662SDoug Moore 		slot = ffs(node->pn_popmap) - 1;
828fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
829fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
830fd1d6662SDoug Moore 		it->path[it->top++] = node;
831fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
832fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
833fd1d6662SDoug Moore 	}
834fd1d6662SDoug Moore 	m = pctrie_toval(node);
835fd1d6662SDoug Moore 	if (it->limit != 0 && *m >= it->limit)
836fd1d6662SDoug Moore 		return (NULL);
837fd1d6662SDoug Moore 	it->index = *m;
838fd1d6662SDoug Moore 	return (m);
839fd1d6662SDoug Moore }
840fd1d6662SDoug Moore 
841fd1d6662SDoug Moore /*
842fd1d6662SDoug Moore  * Find the first leaf with value at least 'jump' greater than the previous
843fd1d6662SDoug Moore  * leaf.  Return NULL if that value is >= limit.
844fd1d6662SDoug Moore  */
845fd1d6662SDoug Moore uint64_t *
846fd1d6662SDoug Moore pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
847fd1d6662SDoug Moore {
848fd1d6662SDoug Moore 	uint64_t index = it->index + jump;
849fd1d6662SDoug Moore 
850fd1d6662SDoug Moore 	/* Detect jump overflow. */
851fd1d6662SDoug Moore 	if ((jump > 0) != (index > it->index))
852fd1d6662SDoug Moore 		return (NULL);
853fd1d6662SDoug Moore 	if (it->limit != 0 && index >= it->limit)
854fd1d6662SDoug Moore 		return (NULL);
855fd1d6662SDoug Moore 	return (pctrie_iter_lookup_ge(it, index));
856fd1d6662SDoug Moore }
857fd1d6662SDoug Moore 
858bbf81f46SRyan Libby #ifdef INVARIANTS
859bbf81f46SRyan Libby void
860bbf81f46SRyan Libby pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
861bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
862bbf81f46SRyan Libby {
863bbf81f46SRyan Libby 	uint64_t *expected;
864bbf81f46SRyan Libby 
865bbf81f46SRyan Libby 	if (index + 1 == 0)
866bbf81f46SRyan Libby 		expected = NULL;
867bbf81f46SRyan Libby 	else
868bbf81f46SRyan Libby 		expected = pctrie_lookup_ge(ptree, index + 1);
869bbf81f46SRyan Libby 	KASSERT(res == expected,
870bbf81f46SRyan Libby 	    ("pctrie subtree lookup gt result different from root lookup: "
871bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
872bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
873bbf81f46SRyan Libby }
874bbf81f46SRyan Libby #endif
875bbf81f46SRyan Libby 
8766f251ef2SDoug Moore /*
8776f251ef2SDoug Moore  * Returns the value with the greatest index that is less than or equal to the
8786f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
8796f251ef2SDoug Moore  *
8806f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
881f2cc1285SJeff Roberson  */
882bbf81f46SRyan Libby static __inline uint64_t *
883bbf81f46SRyan Libby pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
884f2cc1285SJeff Roberson {
885bbf81f46SRyan Libby 	struct pctrie_node *pred;
886f2cc1285SJeff Roberson 	uint64_t *m;
887d1139b52SConrad Meyer 	int slot;
888f2cc1285SJeff Roberson 
8896f251ef2SDoug Moore 	/*
890bbf81f46SRyan Libby 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
8916f251ef2SDoug Moore 	 */
8926f251ef2SDoug Moore 	pred = NULL;
8932d2bcba7SDoug Moore 	for (;;) {
8946f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
8952d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
896f2cc1285SJeff Roberson 				return (m);
8976f251ef2SDoug Moore 			break;
898f2cc1285SJeff Roberson 		}
899ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
9006f251ef2SDoug Moore 			if (node->pn_owner < index)
9016f251ef2SDoug Moore 				pred = node;
9026f251ef2SDoug Moore 			break;
903f2cc1285SJeff Roberson 		}
9046f251ef2SDoug Moore 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
9056f251ef2SDoug Moore 			pred = node;
9066f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
9073c30b235SConrad Meyer 		    PCTRIE_LOCKED);
9088df38859SDoug Moore 	}
9096f251ef2SDoug Moore 	if (pred == NULL)
9106f251ef2SDoug Moore 		return (NULL);
9116f251ef2SDoug Moore 	if (pred != node) {
912ac0572e6SDoug Moore 		slot = pctrie_slot(pred, index);
9136f251ef2SDoug Moore 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
9146f251ef2SDoug Moore 		    ("%s: no popmap siblings before slot %d in node %p",
9156f251ef2SDoug Moore 		    __func__, slot, pred));
916749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
9176f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
9186f251ef2SDoug Moore 		    PCTRIE_LOCKED);
919f2cc1285SJeff Roberson 	}
9206f251ef2SDoug Moore 	while (!pctrie_isleaf(pred)) {
9216f251ef2SDoug Moore 		KASSERT(pred->pn_popmap != 0,
9226f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, pred));
923749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap);
9246f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
9256f251ef2SDoug Moore 		    PCTRIE_LOCKED);
9266f251ef2SDoug Moore 	}
9276f251ef2SDoug Moore 	return (pctrie_toval(pred));
928f2cc1285SJeff Roberson }
929f2cc1285SJeff Roberson 
930bbf81f46SRyan Libby uint64_t *
931bbf81f46SRyan Libby pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
932bbf81f46SRyan Libby {
933bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(
934bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
935bbf81f46SRyan Libby }
936bbf81f46SRyan Libby 
937bbf81f46SRyan Libby uint64_t *
938bbf81f46SRyan Libby pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
939bbf81f46SRyan Libby {
940bbf81f46SRyan Libby 	if (node == NULL || index == 0)
941bbf81f46SRyan Libby 		return (NULL);
942bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(node, index - 1));
943bbf81f46SRyan Libby }
944bbf81f46SRyan Libby 
945fd1d6662SDoug Moore /*
946fd1d6662SDoug Moore  * Find first leaf <= index, and fill iter with the path to the parent of that
947fd1d6662SDoug Moore  * leaf.  Return NULL if there is no such leaf greater than limit.
948fd1d6662SDoug Moore  */
949fd1d6662SDoug Moore uint64_t *
950fd1d6662SDoug Moore pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
951fd1d6662SDoug Moore {
952fd1d6662SDoug Moore 	struct pctrie_node *node;
953fd1d6662SDoug Moore 	uint64_t *m;
954fd1d6662SDoug Moore 	int slot;
955fd1d6662SDoug Moore 
956fd1d6662SDoug Moore 	/* Seek a node that matches index. */
957fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
958fd1d6662SDoug Moore 
959fd1d6662SDoug Moore 	/*
960fd1d6662SDoug Moore 	 * If no such node was found, and instead this path leads only to nodes
961fd1d6662SDoug Moore 	 * > index, back up to find a subtrie with the least value > index.
962fd1d6662SDoug Moore 	 */
963fd1d6662SDoug Moore 	if (pctrie_isleaf(node) ?
964fd1d6662SDoug Moore 	    (m = pctrie_toval(node)) == NULL || *m > index :
965fd1d6662SDoug Moore 	    node->pn_owner > index) {
966fd1d6662SDoug Moore 		/* Climb the path to find a node with a descendant < index. */
967fd1d6662SDoug Moore 		while (it->top != 0) {
968fd1d6662SDoug Moore 			node = it->path[it->top - 1];
969fd1d6662SDoug Moore 			slot = pctrie_slot(node, index);
970fd1d6662SDoug Moore 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
971fd1d6662SDoug Moore 				break;
972fd1d6662SDoug Moore 			--it->top;
973fd1d6662SDoug Moore 		}
974fd1d6662SDoug Moore 		if (it->top == 0)
975fd1d6662SDoug Moore 			return (NULL);
976fd1d6662SDoug Moore 
977fd1d6662SDoug Moore 		/* Step to the greatest child with a descendant < index. */
978fd1d6662SDoug Moore 		slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
979fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
980fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
981fd1d6662SDoug Moore 	}
982fd1d6662SDoug Moore 	/* Descend to the greatest leaf of the subtrie. */
983fd1d6662SDoug Moore 	while (!pctrie_isleaf(node)) {
984fd1d6662SDoug Moore 		if (it->limit != 0 && it->limit >=
985fd1d6662SDoug Moore 		    node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
986fd1d6662SDoug Moore 			return (NULL);
987fd1d6662SDoug Moore 		slot = ilog2(node->pn_popmap);
988fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
989fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
990fd1d6662SDoug Moore 		it->path[it->top++] = node;
991fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
992fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
993fd1d6662SDoug Moore 	}
994fd1d6662SDoug Moore 	m = pctrie_toval(node);
995fd1d6662SDoug Moore 	if (it->limit != 0 && *m <= it->limit)
996fd1d6662SDoug Moore 		return (NULL);
997fd1d6662SDoug Moore 	it->index = *m;
998fd1d6662SDoug Moore 	return (m);
999fd1d6662SDoug Moore }
1000fd1d6662SDoug Moore 
1001fd1d6662SDoug Moore /*
1002fd1d6662SDoug Moore  * Find the first leaf with value at most 'jump' less than the previous
1003fd1d6662SDoug Moore  * leaf.  Return NULL if that value is <= limit.
1004fd1d6662SDoug Moore  */
1005fd1d6662SDoug Moore uint64_t *
1006fd1d6662SDoug Moore pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
1007fd1d6662SDoug Moore {
1008fd1d6662SDoug Moore 	uint64_t index = it->index - jump;
1009fd1d6662SDoug Moore 
1010fd1d6662SDoug Moore 	/* Detect jump overflow. */
1011fd1d6662SDoug Moore 	if ((jump > 0) != (index < it->index))
1012fd1d6662SDoug Moore 		return (NULL);
1013fd1d6662SDoug Moore 	if (it->limit != 0 && index <= it->limit)
1014fd1d6662SDoug Moore 		return (NULL);
1015fd1d6662SDoug Moore 	return (pctrie_iter_lookup_le(it, index));
1016fd1d6662SDoug Moore }
1017fd1d6662SDoug Moore 
1018bbf81f46SRyan Libby #ifdef INVARIANTS
1019bbf81f46SRyan Libby void
1020bbf81f46SRyan Libby pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
1021bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
1022bbf81f46SRyan Libby {
1023bbf81f46SRyan Libby 	uint64_t *expected;
1024bbf81f46SRyan Libby 
1025bbf81f46SRyan Libby 	if (index == 0)
1026bbf81f46SRyan Libby 		expected = NULL;
1027bbf81f46SRyan Libby 	else
1028bbf81f46SRyan Libby 		expected = pctrie_lookup_le(ptree, index - 1);
1029bbf81f46SRyan Libby 	KASSERT(res == expected,
1030bbf81f46SRyan Libby 	    ("pctrie subtree lookup lt result different from root lookup: "
1031bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
1032bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
1033bbf81f46SRyan Libby }
1034bbf81f46SRyan Libby #endif
1035bbf81f46SRyan Libby 
1036fd1d6662SDoug Moore static void
1037fd1d6662SDoug Moore pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
1038fd1d6662SDoug Moore     struct pctrie_node *node, struct pctrie_node **freenode)
1039f2cc1285SJeff Roberson {
1040fd1d6662SDoug Moore 	struct pctrie_node *child;
10418df38859SDoug Moore 	int slot;
1042f2cc1285SJeff Roberson 
10432d2bcba7SDoug Moore 	if (node == NULL) {
10442d2bcba7SDoug Moore 		pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED);
1045fd1d6662SDoug Moore 		return;
1046f2cc1285SJeff Roberson 	}
1047fd1d6662SDoug Moore 	slot = pctrie_slot(node, index);
10488df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
10498df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p",
10508df38859SDoug Moore 	    __func__, slot, node));
10518df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
10522d2bcba7SDoug Moore 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
10538df38859SDoug Moore 	if (!powerof2(node->pn_popmap))
1054fd1d6662SDoug Moore 		return;
10552d2bcba7SDoug Moore 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
10568df38859SDoug Moore 	slot = ffs(node->pn_popmap) - 1;
10572d2bcba7SDoug Moore 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
10582d2bcba7SDoug Moore 	KASSERT(child != PCTRIE_NULL,
10592d2bcba7SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
1060f2cc1285SJeff Roberson 	if (parent == NULL)
10612d2bcba7SDoug Moore 		pctrie_root_store(ptree, child, PCTRIE_LOCKED);
1062f2cc1285SJeff Roberson 	else {
1063ac0572e6SDoug Moore 		slot = pctrie_slot(parent, index);
10642d2bcba7SDoug Moore 		KASSERT(node ==
10652d2bcba7SDoug Moore 		    pctrie_node_load(&parent->pn_child[slot], NULL,
10662d2bcba7SDoug Moore 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
10672d2bcba7SDoug Moore 		pctrie_node_store(&parent->pn_child[slot], child,
10683c30b235SConrad Meyer 		    PCTRIE_LOCKED);
1069f2cc1285SJeff Roberson 	}
10703c30b235SConrad Meyer 	/*
10713c30b235SConrad Meyer 	 * The child is still valid and we can not zero the
10723c30b235SConrad Meyer 	 * pointer until all SMR references are gone.
10733c30b235SConrad Meyer 	 */
10743b7ffacdSDoug Moore 	pctrie_node_put(node);
10753b7ffacdSDoug Moore 	*freenode = node;
1076fd1d6662SDoug Moore }
1077fd1d6662SDoug Moore 
1078fd1d6662SDoug Moore /*
1079fd1d6662SDoug Moore  * Remove the specified index from the tree, and return the value stored at
1080fd1d6662SDoug Moore  * that index.  If the index is not present, return NULL.
1081fd1d6662SDoug Moore  */
1082fd1d6662SDoug Moore uint64_t *
1083fd1d6662SDoug Moore pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
1084fd1d6662SDoug Moore     struct pctrie_node **freenode)
1085fd1d6662SDoug Moore {
1086fd1d6662SDoug Moore 	struct pctrie_node *child, *node, *parent;
1087fd1d6662SDoug Moore 	uint64_t *m;
1088fd1d6662SDoug Moore 	int slot;
1089fd1d6662SDoug Moore 
1090fd1d6662SDoug Moore 	DEBUG_POISON_POINTER(parent);
1091fd1d6662SDoug Moore 	*freenode = node = NULL;
1092fd1d6662SDoug Moore 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1093fd1d6662SDoug Moore 	while (!pctrie_isleaf(child)) {
1094fd1d6662SDoug Moore 		parent = node;
1095fd1d6662SDoug Moore 		node = child;
1096fd1d6662SDoug Moore 		slot = pctrie_slot(node, index);
1097fd1d6662SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1098fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1099fd1d6662SDoug Moore 	}
1100fd1d6662SDoug Moore 	m = pctrie_match_value(child, index);
1101fd1d6662SDoug Moore 	if (m != NULL)
1102fd1d6662SDoug Moore 		pctrie_remove(ptree, index, parent, node, freenode);
11033b7ffacdSDoug Moore 	return (m);
1104f2cc1285SJeff Roberson }
1105f2cc1285SJeff Roberson 
1106f2cc1285SJeff Roberson /*
1107fd1d6662SDoug Moore  * Remove from the trie the leaf last chosen by the iterator, and
1108fd1d6662SDoug Moore  * adjust the path if it's last member is to be freed.
1109fd1d6662SDoug Moore  */
1110fd1d6662SDoug Moore uint64_t *
1111fd1d6662SDoug Moore pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
1112fd1d6662SDoug Moore {
1113fd1d6662SDoug Moore 	struct pctrie_node *child, *node, *parent;
1114fd1d6662SDoug Moore 	uint64_t *m;
1115fd1d6662SDoug Moore 	int slot;
1116fd1d6662SDoug Moore 
1117fd1d6662SDoug Moore 	DEBUG_POISON_POINTER(parent);
1118fd1d6662SDoug Moore 	*freenode = NULL;
1119fd1d6662SDoug Moore 	if (it->top >= 1) {
1120fd1d6662SDoug Moore 		parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
1121fd1d6662SDoug Moore 		node = it->path[it->top - 1];
1122fd1d6662SDoug Moore 		slot = pctrie_slot(node, it->index);
1123fd1d6662SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1124fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1125fd1d6662SDoug Moore 	} else {
1126fd1d6662SDoug Moore 		node = NULL;
1127fd1d6662SDoug Moore 		child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
1128fd1d6662SDoug Moore 	}
1129fd1d6662SDoug Moore 	m = pctrie_match_value(child, it->index);
1130fd1d6662SDoug Moore 	if (m != NULL)
1131fd1d6662SDoug Moore 		pctrie_remove(it->ptree, it->index, parent, node, freenode);
1132fd1d6662SDoug Moore 	if (*freenode != NULL)
1133fd1d6662SDoug Moore 		--it->top;
1134fd1d6662SDoug Moore 	return (m);
1135fd1d6662SDoug Moore }
1136fd1d6662SDoug Moore 
1137fd1d6662SDoug Moore /*
1138fd1d6662SDoug Moore  * Return the current leaf, assuming access is externally synchronized by a
1139fd1d6662SDoug Moore  * lock.
1140fd1d6662SDoug Moore  */
1141fd1d6662SDoug Moore uint64_t *
1142fd1d6662SDoug Moore pctrie_iter_value(struct pctrie_iter *it)
1143fd1d6662SDoug Moore {
1144fd1d6662SDoug Moore 	struct pctrie_node *node;
1145fd1d6662SDoug Moore 	int slot;
1146fd1d6662SDoug Moore 
1147fd1d6662SDoug Moore 	if (it->top == 0)
1148fd1d6662SDoug Moore 		node = pctrie_root_load(it->ptree, NULL,
1149fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1150fd1d6662SDoug Moore 	else {
1151fd1d6662SDoug Moore 		node = it->path[it->top - 1];
1152fd1d6662SDoug Moore 		slot = pctrie_slot(node, it->index);
1153fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1154fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1155fd1d6662SDoug Moore 	}
1156fd1d6662SDoug Moore 	return (pctrie_toval(node));
1157fd1d6662SDoug Moore }
1158fd1d6662SDoug Moore 
1159fd1d6662SDoug Moore /*
1160c0d0bc2bSDoug Moore  * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
1161c0d0bc2bSDoug Moore  * using the leftmost child pointer for path reversal, until an interior node
1162c0d0bc2bSDoug Moore  * is stripped of all children, and returned for deallocation, with *pnode left
1163d19851f0SDoug Moore  * pointing to the parent of that node.
1164f2cc1285SJeff Roberson  */
1165c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1166c0d0bc2bSDoug Moore pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent,
1167c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1168f2cc1285SJeff Roberson {
11693b7ffacdSDoug Moore 	struct pctrie_node *child, *node;
11703b7ffacdSDoug Moore 	int slot;
1171f2cc1285SJeff Roberson 
11723b7ffacdSDoug Moore 	node = *pnode;
11733b7ffacdSDoug Moore 	while (node->pn_popmap != 0) {
11743b7ffacdSDoug Moore 		slot = ffs(node->pn_popmap) - 1;
11753b7ffacdSDoug Moore 		node->pn_popmap ^= 1 << slot;
11763b7ffacdSDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
11773b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
11783b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
11793b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
1180c0d0bc2bSDoug Moore 		if (pctrie_isleaf(child)) {
1181c0d0bc2bSDoug Moore 			if (callback != NULL)
1182c0d0bc2bSDoug Moore 				callback(pctrie_toptr(child, keyoff), arg);
11833b7ffacdSDoug Moore 			continue;
1184c0d0bc2bSDoug Moore 		}
11853b7ffacdSDoug Moore 		/* Climb one level down the trie. */
11863b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[0], parent,
11873b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
11883b7ffacdSDoug Moore 		parent = node;
11893b7ffacdSDoug Moore 		node = child;
11903b7ffacdSDoug Moore 	}
11913b7ffacdSDoug Moore 	*pnode = parent;
11923b7ffacdSDoug Moore 	return (node);
11933b7ffacdSDoug Moore }
11943b7ffacdSDoug Moore 
11953b7ffacdSDoug Moore /*
11963b7ffacdSDoug Moore  * Recover the node parent from its first child and continue pruning.
11973b7ffacdSDoug Moore  */
1198c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1199c0d0bc2bSDoug Moore pctrie_reclaim_resume_compound(struct pctrie_node **pnode,
1200c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
12013b7ffacdSDoug Moore {
12023b7ffacdSDoug Moore 	struct pctrie_node *parent, *node;
12033b7ffacdSDoug Moore 
12043b7ffacdSDoug Moore 	node = *pnode;
12053b7ffacdSDoug Moore 	if (node == NULL)
12063b7ffacdSDoug Moore 		return (NULL);
12073b7ffacdSDoug Moore 	/* Climb one level up the trie. */
12083b7ffacdSDoug Moore 	parent = pctrie_node_load(&node->pn_child[0], NULL,
12093b7ffacdSDoug Moore 	    PCTRIE_UNSERIALIZED);
12103b7ffacdSDoug Moore 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1211c0d0bc2bSDoug Moore 	return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg));
12123b7ffacdSDoug Moore }
12133b7ffacdSDoug Moore 
12143b7ffacdSDoug Moore /*
12153b7ffacdSDoug Moore  * Find the trie root, and start pruning with a NULL parent.
12163b7ffacdSDoug Moore  */
1217c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1218c0d0bc2bSDoug Moore pctrie_reclaim_begin_compound(struct pctrie_node **pnode,
1219c0d0bc2bSDoug Moore     struct pctrie *ptree,
1220c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
12213b7ffacdSDoug Moore {
12223b7ffacdSDoug Moore 	struct pctrie_node *node;
12233b7ffacdSDoug Moore 
12243b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
12252d2bcba7SDoug Moore 	pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1226c0d0bc2bSDoug Moore 	if (pctrie_isleaf(node)) {
1227c0d0bc2bSDoug Moore 		if (callback != NULL && node != PCTRIE_NULL)
1228c0d0bc2bSDoug Moore 			callback(pctrie_toptr(node, keyoff), arg);
12293b7ffacdSDoug Moore 		return (NULL);
1230c0d0bc2bSDoug Moore 	}
12313b7ffacdSDoug Moore 	*pnode = node;
1232c0d0bc2bSDoug Moore 	return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg));
1233c0d0bc2bSDoug Moore }
1234c0d0bc2bSDoug Moore 
1235c0d0bc2bSDoug Moore struct pctrie_node *
1236c0d0bc2bSDoug Moore pctrie_reclaim_resume(struct pctrie_node **pnode)
1237c0d0bc2bSDoug Moore {
1238c0d0bc2bSDoug Moore 	return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL));
1239c0d0bc2bSDoug Moore }
1240c0d0bc2bSDoug Moore 
1241c0d0bc2bSDoug Moore struct pctrie_node *
1242c0d0bc2bSDoug Moore pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree)
1243c0d0bc2bSDoug Moore {
1244c0d0bc2bSDoug Moore 	return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL));
1245c0d0bc2bSDoug Moore }
1246c0d0bc2bSDoug Moore 
1247c0d0bc2bSDoug Moore struct pctrie_node *
1248c0d0bc2bSDoug Moore pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
1249c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1250c0d0bc2bSDoug Moore {
1251c0d0bc2bSDoug Moore 	return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg));
1252c0d0bc2bSDoug Moore }
1253c0d0bc2bSDoug Moore 
1254c0d0bc2bSDoug Moore struct pctrie_node *
1255c0d0bc2bSDoug Moore pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree,
1256c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1257c0d0bc2bSDoug Moore {
1258c0d0bc2bSDoug Moore 	return (pctrie_reclaim_begin_compound(pnode, ptree,
1259c0d0bc2bSDoug Moore 	    callback, keyoff, arg));
12603b7ffacdSDoug Moore }
12613b7ffacdSDoug Moore 
12623b7ffacdSDoug Moore /*
12633b7ffacdSDoug Moore  * Replace an existing value in the trie with another one.
12643b7ffacdSDoug Moore  * Panics if there is not an old value in the trie at the new value's index.
12653b7ffacdSDoug Moore  */
12663b7ffacdSDoug Moore uint64_t *
12673b7ffacdSDoug Moore pctrie_replace(struct pctrie *ptree, uint64_t *newval)
12683b7ffacdSDoug Moore {
12693b7ffacdSDoug Moore 	struct pctrie_node *leaf, *parent, *node;
12703b7ffacdSDoug Moore 	uint64_t *m;
12713b7ffacdSDoug Moore 	uint64_t index;
12723b7ffacdSDoug Moore 	int slot;
12733b7ffacdSDoug Moore 
12743b7ffacdSDoug Moore 	leaf = pctrie_toleaf(newval);
12753b7ffacdSDoug Moore 	index = *newval;
12763b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
12773b7ffacdSDoug Moore 	parent = NULL;
12783b7ffacdSDoug Moore 	for (;;) {
12793b7ffacdSDoug Moore 		if (pctrie_isleaf(node)) {
12803b7ffacdSDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
12813b7ffacdSDoug Moore 				if (parent == NULL)
12829147a0c9SDoug Moore 					pctrie_root_store(ptree,
12839147a0c9SDoug Moore 					    leaf, PCTRIE_LOCKED);
12843b7ffacdSDoug Moore 				else
12853b7ffacdSDoug Moore 					pctrie_node_store(
12863b7ffacdSDoug Moore 					    &parent->pn_child[slot], leaf,
12873b7ffacdSDoug Moore 					    PCTRIE_LOCKED);
12883b7ffacdSDoug Moore 				return (m);
12893b7ffacdSDoug Moore 			}
12903b7ffacdSDoug Moore 			break;
12913b7ffacdSDoug Moore 		}
12923b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
12933b7ffacdSDoug Moore 			break;
12943b7ffacdSDoug Moore 		parent = node;
12953b7ffacdSDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
12963b7ffacdSDoug Moore 		    PCTRIE_LOCKED);
12973b7ffacdSDoug Moore 	}
12983b7ffacdSDoug Moore 	panic("%s: original replacing value not found", __func__);
1299f2cc1285SJeff Roberson }
1300f2cc1285SJeff Roberson 
1301f2cc1285SJeff Roberson #ifdef DDB
1302f2cc1285SJeff Roberson /*
1303f2cc1285SJeff Roberson  * Show details about the given node.
1304f2cc1285SJeff Roberson  */
1305f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
1306f2cc1285SJeff Roberson {
13073c30b235SConrad Meyer 	struct pctrie_node *node, *tmp;
13088df38859SDoug Moore 	int slot;
13098df38859SDoug Moore 	pn_popmap_t popmap;
1310f2cc1285SJeff Roberson 
1311f2cc1285SJeff Roberson         if (!have_addr)
1312f2cc1285SJeff Roberson                 return;
1313f2cc1285SJeff Roberson 	node = (struct pctrie_node *)addr;
13148df38859SDoug Moore 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
13158df38859SDoug Moore 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
131638f5cb1bSDoug Moore 	    node->pn_clev / PCTRIE_WIDTH);
13178df38859SDoug Moore 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
13188df38859SDoug Moore 		slot = ffs(popmap) - 1;
13198df38859SDoug Moore 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
13203c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
1321f2cc1285SJeff Roberson 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
13228df38859SDoug Moore 		    slot, (void *)tmp,
13233c30b235SConrad Meyer 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
132438f5cb1bSDoug Moore 		    node->pn_clev / PCTRIE_WIDTH);
1325f2cc1285SJeff Roberson 	}
13263c30b235SConrad Meyer }
1327f2cc1285SJeff Roberson #endif /* DDB */
1328