xref: /freebsd/sys/kern/subr_pctrie.c (revision 8df38859d0f92025540bcbe99c9a291a584327f2)
18a36da99SPedro F. Giffuni /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
4f2cc1285SJeff Roberson  * Copyright (c) 2013 EMC Corp.
5f2cc1285SJeff Roberson  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6f2cc1285SJeff Roberson  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7f2cc1285SJeff Roberson  * All rights reserved.
8f2cc1285SJeff Roberson  *
9f2cc1285SJeff Roberson  * Redistribution and use in source and binary forms, with or without
10f2cc1285SJeff Roberson  * modification, are permitted provided that the following conditions
11f2cc1285SJeff Roberson  * are met:
12f2cc1285SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
13f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer.
14f2cc1285SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
15f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
16f2cc1285SJeff Roberson  *    documentation and/or other materials provided with the distribution.
17f2cc1285SJeff Roberson  *
18f2cc1285SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19f2cc1285SJeff Roberson  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20f2cc1285SJeff Roberson  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21f2cc1285SJeff Roberson  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22f2cc1285SJeff Roberson  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23f2cc1285SJeff Roberson  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24f2cc1285SJeff Roberson  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25f2cc1285SJeff Roberson  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26f2cc1285SJeff Roberson  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27f2cc1285SJeff Roberson  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28f2cc1285SJeff Roberson  * SUCH DAMAGE.
29f2cc1285SJeff Roberson  *
30f2cc1285SJeff Roberson  */
31f2cc1285SJeff Roberson 
32f2cc1285SJeff Roberson /*
33f2cc1285SJeff Roberson  * Path-compressed radix trie implementation.
34f2cc1285SJeff Roberson  *
35f2cc1285SJeff Roberson  * The implementation takes into account the following rationale:
36f2cc1285SJeff Roberson  * - Size of the nodes should be as small as possible but still big enough
37f2cc1285SJeff Roberson  *   to avoid a large maximum depth for the trie.  This is a balance
38f2cc1285SJeff Roberson  *   between the necessity to not wire too much physical memory for the nodes
39f2cc1285SJeff Roberson  *   and the necessity to avoid too much cache pollution during the trie
40f2cc1285SJeff Roberson  *   operations.
41f2cc1285SJeff Roberson  * - There is not a huge bias toward the number of lookup operations over
42f2cc1285SJeff Roberson  *   the number of insert and remove operations.  This basically implies
43f2cc1285SJeff Roberson  *   that optimizations supposedly helping one operation but hurting the
44f2cc1285SJeff Roberson  *   other might be carefully evaluated.
45f2cc1285SJeff Roberson  * - On average not many nodes are expected to be fully populated, hence
46f2cc1285SJeff Roberson  *   level compression may just complicate things.
47f2cc1285SJeff Roberson  */
48f2cc1285SJeff Roberson 
49f2cc1285SJeff Roberson #include <sys/cdefs.h>
50f2cc1285SJeff Roberson __FBSDID("$FreeBSD$");
51f2cc1285SJeff Roberson 
52f2cc1285SJeff Roberson #include "opt_ddb.h"
53f2cc1285SJeff Roberson 
54f2cc1285SJeff Roberson #include <sys/param.h>
55f2cc1285SJeff Roberson #include <sys/systm.h>
56f2cc1285SJeff Roberson #include <sys/kernel.h>
5705963ea4SDoug Moore #include <sys/libkern.h>
58f2cc1285SJeff Roberson #include <sys/pctrie.h>
593c30b235SConrad Meyer #include <sys/proc.h>	/* smr.h depends on struct thread. */
603c30b235SConrad Meyer #include <sys/smr.h>
613c30b235SConrad Meyer #include <sys/smr_types.h>
62f2cc1285SJeff Roberson 
63f2cc1285SJeff Roberson #ifdef DDB
64f2cc1285SJeff Roberson #include <ddb/ddb.h>
65f2cc1285SJeff Roberson #endif
66f2cc1285SJeff Roberson 
67f2cc1285SJeff Roberson #define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
6855e0987aSPedro F. Giffuni #define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
69f2cc1285SJeff Roberson 
70*8df38859SDoug Moore #if PCTRIE_WIDTH == 3
71*8df38859SDoug Moore typedef uint8_t pn_popmap_t;
72*8df38859SDoug Moore #elif PCTRIE_WIDTH == 4
73*8df38859SDoug Moore typedef uint16_t pn_popmap_t;
74*8df38859SDoug Moore #elif PCTRIE_WIDTH == 5
75*8df38859SDoug Moore typedef uint32_t pn_popmap_t;
76*8df38859SDoug Moore #else
77*8df38859SDoug Moore #error Unsupported width
78*8df38859SDoug Moore #endif
79*8df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
80*8df38859SDoug Moore     "pn_popmap_t too wide");
81*8df38859SDoug Moore 
82f2cc1285SJeff Roberson /* Flag bits stored in node pointers. */
83f2cc1285SJeff Roberson #define	PCTRIE_ISLEAF	0x1
84f2cc1285SJeff Roberson #define	PCTRIE_FLAGS	0x1
85f2cc1285SJeff Roberson #define	PCTRIE_PAD	PCTRIE_FLAGS
86f2cc1285SJeff Roberson 
87f2cc1285SJeff Roberson /* Returns one unit associated with specified level. */
88f2cc1285SJeff Roberson #define	PCTRIE_UNITLEVEL(lev)						\
89f2cc1285SJeff Roberson 	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
90f2cc1285SJeff Roberson 
913c30b235SConrad Meyer struct pctrie_node;
923c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
933c30b235SConrad Meyer 
94f2cc1285SJeff Roberson struct pctrie_node {
95f2cc1285SJeff Roberson 	uint64_t	pn_owner;			/* Owner of record. */
96*8df38859SDoug Moore 	pn_popmap_t	pn_popmap;			/* Valid children. */
973c30b235SConrad Meyer 	uint8_t		pn_clev;			/* Current level. */
983c30b235SConrad Meyer 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
99f2cc1285SJeff Roberson };
100f2cc1285SJeff Roberson 
1013c30b235SConrad Meyer enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
1023c30b235SConrad Meyer 
1033c30b235SConrad Meyer static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
1043c30b235SConrad Meyer     enum pctrie_access access);
1053c30b235SConrad Meyer 
106f2cc1285SJeff Roberson /*
107da72505fSDoug Moore  * Return the position in the array for a given level.
108da72505fSDoug Moore  */
109da72505fSDoug Moore static __inline int
110da72505fSDoug Moore pctrie_slot(uint64_t index, uint16_t level)
111da72505fSDoug Moore {
112da72505fSDoug Moore 	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
113da72505fSDoug Moore }
114da72505fSDoug Moore 
115da72505fSDoug Moore /* Computes the key (index) with the low-order 'level' radix-digits zeroed. */
116da72505fSDoug Moore static __inline uint64_t
117da72505fSDoug Moore pctrie_trimkey(uint64_t index, uint16_t level)
118da72505fSDoug Moore {
119da72505fSDoug Moore 	return (index & -PCTRIE_UNITLEVEL(level));
120da72505fSDoug Moore }
121da72505fSDoug Moore 
122da72505fSDoug Moore /*
123f2cc1285SJeff Roberson  * Allocate a node.  Pre-allocation should ensure that the request
124f2cc1285SJeff Roberson  * will always be satisfied.
125f2cc1285SJeff Roberson  */
1263c30b235SConrad Meyer static struct pctrie_node *
127da72505fSDoug Moore pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
128da72505fSDoug Moore     uint16_t clevel)
129f2cc1285SJeff Roberson {
130f2cc1285SJeff Roberson 	struct pctrie_node *node;
131f2cc1285SJeff Roberson 
132f2cc1285SJeff Roberson 	node = allocfn(ptree);
133f2cc1285SJeff Roberson 	if (node == NULL)
134f2cc1285SJeff Roberson 		return (NULL);
1353c30b235SConrad Meyer 
1363c30b235SConrad Meyer 	/*
1373c30b235SConrad Meyer 	 * We want to clear the last child pointer after the final section
1383c30b235SConrad Meyer 	 * has exited so lookup can not return false negatives.  It is done
1393c30b235SConrad Meyer 	 * here because it will be cache-cold in the dtor callback.
1403c30b235SConrad Meyer 	 */
141*8df38859SDoug Moore 	if (node->pn_popmap != 0) {
142*8df38859SDoug Moore 		pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1],
143*8df38859SDoug Moore 		    NULL, PCTRIE_UNSERIALIZED);
144*8df38859SDoug Moore 		node->pn_popmap = 0;
1453c30b235SConrad Meyer 	}
146da72505fSDoug Moore 	node->pn_owner = pctrie_trimkey(index, clevel + 1);
147f2cc1285SJeff Roberson 	node->pn_clev = clevel;
148f2cc1285SJeff Roberson 	return (node);
149f2cc1285SJeff Roberson }
150f2cc1285SJeff Roberson 
151f2cc1285SJeff Roberson /*
152f2cc1285SJeff Roberson  * Free radix node.
153f2cc1285SJeff Roberson  */
154f2cc1285SJeff Roberson static __inline void
155f2cc1285SJeff Roberson pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
156*8df38859SDoug Moore     pctrie_free_t freefn)
157f2cc1285SJeff Roberson {
158f2cc1285SJeff Roberson #ifdef INVARIANTS
159f2cc1285SJeff Roberson 	int slot;
160f2cc1285SJeff Roberson 
161*8df38859SDoug Moore 	KASSERT(powerof2(node->pn_popmap),
162*8df38859SDoug Moore 	    ("pctrie_node_put: node %p has too many children %04x", node,
163*8df38859SDoug Moore 	    node->pn_popmap));
1643c30b235SConrad Meyer 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
165*8df38859SDoug Moore 		if ((node->pn_popmap & (1 << slot)) != 0)
1663c30b235SConrad Meyer 			continue;
1673c30b235SConrad Meyer 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
1683c30b235SConrad Meyer 		    NULL, ("pctrie_node_put: node %p has a child", node));
1693c30b235SConrad Meyer 	}
170f2cc1285SJeff Roberson #endif
171f2cc1285SJeff Roberson 	freefn(ptree, node);
172f2cc1285SJeff Roberson }
173f2cc1285SJeff Roberson 
174f2cc1285SJeff Roberson /*
1753c30b235SConrad Meyer  * Fetch a node pointer from a slot.
1763c30b235SConrad Meyer  */
1773c30b235SConrad Meyer static __inline struct pctrie_node *
1783c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
1793c30b235SConrad Meyer {
1803c30b235SConrad Meyer 	switch (access) {
1813c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1823c30b235SConrad Meyer 		return (smr_unserialized_load(p, true));
1833c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1843c30b235SConrad Meyer 		return (smr_serialized_load(p, true));
1853c30b235SConrad Meyer 	case PCTRIE_SMR:
1863c30b235SConrad Meyer 		return (smr_entered_load(p, smr));
1873c30b235SConrad Meyer 	}
1883c30b235SConrad Meyer 	__assert_unreachable();
1893c30b235SConrad Meyer }
1903c30b235SConrad Meyer 
1913c30b235SConrad Meyer static __inline void
1923c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
1933c30b235SConrad Meyer {
1943c30b235SConrad Meyer 	switch (access) {
1953c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1963c30b235SConrad Meyer 		smr_unserialized_store(p, v, true);
1973c30b235SConrad Meyer 		break;
1983c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1993c30b235SConrad Meyer 		smr_serialized_store(p, v, true);
2003c30b235SConrad Meyer 		break;
2013c30b235SConrad Meyer 	case PCTRIE_SMR:
2023c30b235SConrad Meyer 		panic("%s: Not supported in SMR section.", __func__);
2033c30b235SConrad Meyer 		break;
2043c30b235SConrad Meyer 	default:
2053c30b235SConrad Meyer 		__assert_unreachable();
2063c30b235SConrad Meyer 		break;
2073c30b235SConrad Meyer 	}
2083c30b235SConrad Meyer }
2093c30b235SConrad Meyer 
2103c30b235SConrad Meyer /*
211f2cc1285SJeff Roberson  * Get the root node for a tree.
212f2cc1285SJeff Roberson  */
213f2cc1285SJeff Roberson static __inline struct pctrie_node *
2143c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
215f2cc1285SJeff Roberson {
2163c30b235SConrad Meyer 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
217f2cc1285SJeff Roberson }
218f2cc1285SJeff Roberson 
219f2cc1285SJeff Roberson /*
220f2cc1285SJeff Roberson  * Set the root node for a tree.
221f2cc1285SJeff Roberson  */
222f2cc1285SJeff Roberson static __inline void
2233c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
2243c30b235SConrad Meyer     enum pctrie_access access)
225f2cc1285SJeff Roberson {
2263c30b235SConrad Meyer 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
227f2cc1285SJeff Roberson }
228f2cc1285SJeff Roberson 
229f2cc1285SJeff Roberson /*
230f2cc1285SJeff Roberson  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
231f2cc1285SJeff Roberson  */
23204f9afaeSConrad Meyer static __inline bool
233f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node)
234f2cc1285SJeff Roberson {
235f2cc1285SJeff Roberson 
236f2cc1285SJeff Roberson 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
237f2cc1285SJeff Roberson }
238f2cc1285SJeff Roberson 
239f2cc1285SJeff Roberson /*
2409cfed089SDoug Moore  * Returns val with leaf bit set.
2419cfed089SDoug Moore  */
2429cfed089SDoug Moore static __inline void *
2439cfed089SDoug Moore pctrie_toleaf(uint64_t *val)
2449cfed089SDoug Moore {
2459cfed089SDoug Moore 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
2469cfed089SDoug Moore }
2479cfed089SDoug Moore 
2489cfed089SDoug Moore /*
249f2cc1285SJeff Roberson  * Returns the associated val extracted from node.
250f2cc1285SJeff Roberson  */
251f2cc1285SJeff Roberson static __inline uint64_t *
252f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node)
253f2cc1285SJeff Roberson {
254f2cc1285SJeff Roberson 
255f2cc1285SJeff Roberson 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
256f2cc1285SJeff Roberson }
257f2cc1285SJeff Roberson 
258f2cc1285SJeff Roberson /*
259f2cc1285SJeff Roberson  * Adds the val as a child of the provided node.
260f2cc1285SJeff Roberson  */
261f2cc1285SJeff Roberson static __inline void
262f2cc1285SJeff Roberson pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
2633c30b235SConrad Meyer     uint64_t *val, enum pctrie_access access)
264f2cc1285SJeff Roberson {
265f2cc1285SJeff Roberson 	int slot;
266f2cc1285SJeff Roberson 
267f2cc1285SJeff Roberson 	slot = pctrie_slot(index, clev);
2683c30b235SConrad Meyer 	pctrie_node_store(&node->pn_child[slot],
2699cfed089SDoug Moore 	    pctrie_toleaf(val), access);
270*8df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
271*8df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
272*8df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
273f2cc1285SJeff Roberson }
274f2cc1285SJeff Roberson 
275f2cc1285SJeff Roberson /*
27605963ea4SDoug Moore  * Returns the level where two keys differ.
277f2cc1285SJeff Roberson  * It cannot accept 2 equal keys.
278f2cc1285SJeff Roberson  */
279f2cc1285SJeff Roberson static __inline uint16_t
280f2cc1285SJeff Roberson pctrie_keydiff(uint64_t index1, uint64_t index2)
281f2cc1285SJeff Roberson {
282f2cc1285SJeff Roberson 
283f2cc1285SJeff Roberson 	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
284f2cc1285SJeff Roberson 	    __func__, (uintmax_t)index1));
28505963ea4SDoug Moore 	CTASSERT(sizeof(long long) >= sizeof(uint64_t));
286f2cc1285SJeff Roberson 
28705963ea4SDoug Moore 	/*
28805963ea4SDoug Moore 	 * From the highest-order bit where the indexes differ,
28905963ea4SDoug Moore 	 * compute the highest level in the trie where they differ.
29005963ea4SDoug Moore 	 */
29105963ea4SDoug Moore 	return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH);
292f2cc1285SJeff Roberson }
293f2cc1285SJeff Roberson 
294f2cc1285SJeff Roberson /*
295f2cc1285SJeff Roberson  * Returns TRUE if it can be determined that key does not belong to the
296f2cc1285SJeff Roberson  * specified node.  Otherwise, returns FALSE.
297f2cc1285SJeff Roberson  */
29804f9afaeSConrad Meyer static __inline bool
299f2cc1285SJeff Roberson pctrie_keybarr(struct pctrie_node *node, uint64_t idx)
300f2cc1285SJeff Roberson {
301f2cc1285SJeff Roberson 
302f2cc1285SJeff Roberson 	if (node->pn_clev < PCTRIE_LIMIT) {
303f2cc1285SJeff Roberson 		idx = pctrie_trimkey(idx, node->pn_clev + 1);
304f2cc1285SJeff Roberson 		return (idx != node->pn_owner);
305f2cc1285SJeff Roberson 	}
30604f9afaeSConrad Meyer 	return (false);
307f2cc1285SJeff Roberson }
308f2cc1285SJeff Roberson 
309f2cc1285SJeff Roberson /*
310f2cc1285SJeff Roberson  * Internal helper for pctrie_reclaim_allnodes().
311f2cc1285SJeff Roberson  * This function is recursive.
312f2cc1285SJeff Roberson  */
313f2cc1285SJeff Roberson static void
314f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
315f2cc1285SJeff Roberson     pctrie_free_t freefn)
316f2cc1285SJeff Roberson {
3173c30b235SConrad Meyer 	struct pctrie_node *child;
318f2cc1285SJeff Roberson 	int slot;
319f2cc1285SJeff Roberson 
320*8df38859SDoug Moore 	while (node->pn_popmap != 0) {
321*8df38859SDoug Moore 		slot = ffs(node->pn_popmap) - 1;
3223c30b235SConrad Meyer 		child = pctrie_node_load(&node->pn_child[slot], NULL,
3233c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
324*8df38859SDoug Moore 		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
325*8df38859SDoug Moore 		    __func__, slot, node));
3263c30b235SConrad Meyer 		if (!pctrie_isleaf(child))
3273c30b235SConrad Meyer 			pctrie_reclaim_allnodes_int(ptree, child, freefn);
328*8df38859SDoug Moore 		node->pn_popmap ^= 1 << slot;
3293c30b235SConrad Meyer 		pctrie_node_store(&node->pn_child[slot], NULL,
3303c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
331f2cc1285SJeff Roberson 	}
332*8df38859SDoug Moore 	pctrie_node_put(ptree, node, freefn);
333f2cc1285SJeff Roberson }
334f2cc1285SJeff Roberson 
335f2cc1285SJeff Roberson /*
336f2cc1285SJeff Roberson  * pctrie node zone initializer.
337f2cc1285SJeff Roberson  */
338f2cc1285SJeff Roberson int
339f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused)
340f2cc1285SJeff Roberson {
341f2cc1285SJeff Roberson 	struct pctrie_node *node;
342f2cc1285SJeff Roberson 
343f2cc1285SJeff Roberson 	node = mem;
344*8df38859SDoug Moore 	node->pn_popmap = 0;
345f2cc1285SJeff Roberson 	memset(node->pn_child, 0, sizeof(node->pn_child));
346f2cc1285SJeff Roberson 	return (0);
347f2cc1285SJeff Roberson }
348f2cc1285SJeff Roberson 
349f2cc1285SJeff Roberson size_t
350f2cc1285SJeff Roberson pctrie_node_size(void)
351f2cc1285SJeff Roberson {
352f2cc1285SJeff Roberson 
353f2cc1285SJeff Roberson 	return (sizeof(struct pctrie_node));
354f2cc1285SJeff Roberson }
355f2cc1285SJeff Roberson 
356f2cc1285SJeff Roberson /*
357f2cc1285SJeff Roberson  * Inserts the key-value pair into the trie.
358f2cc1285SJeff Roberson  * Panics if the key already exists.
359f2cc1285SJeff Roberson  */
360f2cc1285SJeff Roberson int
361f2cc1285SJeff Roberson pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
362f2cc1285SJeff Roberson {
363f2cc1285SJeff Roberson 	uint64_t index, newind;
364f2cc1285SJeff Roberson 	struct pctrie_node *node, *tmp;
3653c30b235SConrad Meyer 	smr_pctnode_t *parentp;
366f2cc1285SJeff Roberson 	uint64_t *m;
367f2cc1285SJeff Roberson 	int slot;
368f2cc1285SJeff Roberson 	uint16_t clev;
369f2cc1285SJeff Roberson 
370f2cc1285SJeff Roberson 	index = *val;
371f2cc1285SJeff Roberson 
372f2cc1285SJeff Roberson 	/*
373f2cc1285SJeff Roberson 	 * The owner of record for root is not really important because it
374f2cc1285SJeff Roberson 	 * will never be used.
375f2cc1285SJeff Roberson 	 */
3763c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
377f2cc1285SJeff Roberson 	if (node == NULL) {
3789cfed089SDoug Moore 		ptree->pt_root = (uintptr_t)pctrie_toleaf(val);
379f2cc1285SJeff Roberson 		return (0);
380f2cc1285SJeff Roberson 	}
3813c30b235SConrad Meyer 	parentp = (smr_pctnode_t *)&ptree->pt_root;
382f2cc1285SJeff Roberson 	for (;;) {
383f2cc1285SJeff Roberson 		if (pctrie_isleaf(node)) {
384f2cc1285SJeff Roberson 			m = pctrie_toval(node);
385f2cc1285SJeff Roberson 			if (*m == index)
386f2cc1285SJeff Roberson 				panic("%s: key %jx is already present",
387f2cc1285SJeff Roberson 				    __func__, (uintmax_t)index);
388f2cc1285SJeff Roberson 			clev = pctrie_keydiff(*m, index);
389da72505fSDoug Moore 			tmp = pctrie_node_get(ptree, allocfn, index, clev);
390f2cc1285SJeff Roberson 			if (tmp == NULL)
391f2cc1285SJeff Roberson 				return (ENOMEM);
3923c30b235SConrad Meyer 			/* These writes are not yet visible due to ordering. */
3933c30b235SConrad Meyer 			pctrie_addval(tmp, index, clev, val,
3943c30b235SConrad Meyer 			    PCTRIE_UNSERIALIZED);
3953c30b235SConrad Meyer 			pctrie_addval(tmp, *m, clev, m, PCTRIE_UNSERIALIZED);
3963c30b235SConrad Meyer 			/* Synchronize to make leaf visible. */
3973c30b235SConrad Meyer 			pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
398f2cc1285SJeff Roberson 			return (0);
399f2cc1285SJeff Roberson 		} else if (pctrie_keybarr(node, index))
400f2cc1285SJeff Roberson 			break;
401f2cc1285SJeff Roberson 		slot = pctrie_slot(index, node->pn_clev);
4023c30b235SConrad Meyer 		parentp = &node->pn_child[slot];
4033c30b235SConrad Meyer 		tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED);
4043c30b235SConrad Meyer 		if (tmp == NULL) {
4053c30b235SConrad Meyer 			pctrie_addval(node, index, node->pn_clev, val,
4063c30b235SConrad Meyer 			    PCTRIE_LOCKED);
407f2cc1285SJeff Roberson 			return (0);
408f2cc1285SJeff Roberson 		}
4093c30b235SConrad Meyer 		node = tmp;
410f2cc1285SJeff Roberson 	}
411f2cc1285SJeff Roberson 
412f2cc1285SJeff Roberson 	/*
413f2cc1285SJeff Roberson 	 * A new node is needed because the right insertion level is reached.
414f2cc1285SJeff Roberson 	 * Setup the new intermediate node and add the 2 children: the
415f2cc1285SJeff Roberson 	 * new object and the older edge.
416f2cc1285SJeff Roberson 	 */
417f2cc1285SJeff Roberson 	newind = node->pn_owner;
418f2cc1285SJeff Roberson 	clev = pctrie_keydiff(newind, index);
419da72505fSDoug Moore 	tmp = pctrie_node_get(ptree, allocfn, index, clev);
420f2cc1285SJeff Roberson 	if (tmp == NULL)
421f2cc1285SJeff Roberson 		return (ENOMEM);
422f2cc1285SJeff Roberson 	slot = pctrie_slot(newind, clev);
4233c30b235SConrad Meyer 	/* These writes are not yet visible due to ordering. */
4243c30b235SConrad Meyer 	pctrie_addval(tmp, index, clev, val, PCTRIE_UNSERIALIZED);
4253c30b235SConrad Meyer 	pctrie_node_store(&tmp->pn_child[slot], node, PCTRIE_UNSERIALIZED);
426*8df38859SDoug Moore 	tmp->pn_popmap ^= 1 << slot;
4273c30b235SConrad Meyer 	/* Synchronize to make the above visible. */
4283c30b235SConrad Meyer 	pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
429f2cc1285SJeff Roberson 
430f2cc1285SJeff Roberson 	return (0);
431f2cc1285SJeff Roberson }
432f2cc1285SJeff Roberson 
433f2cc1285SJeff Roberson /*
434f2cc1285SJeff Roberson  * Returns the value stored at the index.  If the index is not present,
435f2cc1285SJeff Roberson  * NULL is returned.
436f2cc1285SJeff Roberson  */
4373c30b235SConrad Meyer static __always_inline uint64_t *
4383c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
4393c30b235SConrad Meyer     enum pctrie_access access)
440f2cc1285SJeff Roberson {
441f2cc1285SJeff Roberson 	struct pctrie_node *node;
442f2cc1285SJeff Roberson 	uint64_t *m;
443f2cc1285SJeff Roberson 	int slot;
444f2cc1285SJeff Roberson 
4453c30b235SConrad Meyer 	node = pctrie_root_load(ptree, smr, access);
446f2cc1285SJeff Roberson 	while (node != NULL) {
447f2cc1285SJeff Roberson 		if (pctrie_isleaf(node)) {
448f2cc1285SJeff Roberson 			m = pctrie_toval(node);
449f2cc1285SJeff Roberson 			if (*m == index)
450f2cc1285SJeff Roberson 				return (m);
451f2cc1285SJeff Roberson 			break;
4523c30b235SConrad Meyer 		}
4533c30b235SConrad Meyer 		if (pctrie_keybarr(node, index))
454f2cc1285SJeff Roberson 			break;
455f2cc1285SJeff Roberson 		slot = pctrie_slot(index, node->pn_clev);
4563c30b235SConrad Meyer 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
457f2cc1285SJeff Roberson 	}
458f2cc1285SJeff Roberson 	return (NULL);
459f2cc1285SJeff Roberson }
460f2cc1285SJeff Roberson 
461f2cc1285SJeff Roberson /*
4623c30b235SConrad Meyer  * Returns the value stored at the index, assuming access is externally
4633c30b235SConrad Meyer  * synchronized by a lock.
4643c30b235SConrad Meyer  *
4653c30b235SConrad Meyer  * If the index is not present, NULL is returned.
4663c30b235SConrad Meyer  */
4673c30b235SConrad Meyer uint64_t *
4683c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index)
4693c30b235SConrad Meyer {
4703c30b235SConrad Meyer 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
4713c30b235SConrad Meyer }
4723c30b235SConrad Meyer 
4733c30b235SConrad Meyer /*
4743c30b235SConrad Meyer  * Returns the value stored at the index without requiring an external lock.
4753c30b235SConrad Meyer  *
4763c30b235SConrad Meyer  * If the index is not present, NULL is returned.
4773c30b235SConrad Meyer  */
4783c30b235SConrad Meyer uint64_t *
4793c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
4803c30b235SConrad Meyer {
4813c30b235SConrad Meyer 	uint64_t *res;
4823c30b235SConrad Meyer 
4833c30b235SConrad Meyer 	smr_enter(smr);
4843c30b235SConrad Meyer 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
4853c30b235SConrad Meyer 	smr_exit(smr);
4863c30b235SConrad Meyer 	return (res);
4873c30b235SConrad Meyer }
4883c30b235SConrad Meyer 
4893c30b235SConrad Meyer /*
4903c30b235SConrad Meyer  * Look up the nearest entry at a position bigger than or equal to index,
4913c30b235SConrad Meyer  * assuming access is externally synchronized by a lock.
492f2cc1285SJeff Roberson  */
493f2cc1285SJeff Roberson uint64_t *
494f2cc1285SJeff Roberson pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
495f2cc1285SJeff Roberson {
496f2cc1285SJeff Roberson 	struct pctrie_node *stack[PCTRIE_LIMIT];
497f2cc1285SJeff Roberson 	uint64_t *m;
498f2cc1285SJeff Roberson 	struct pctrie_node *child, *node;
499f2cc1285SJeff Roberson #ifdef INVARIANTS
500f2cc1285SJeff Roberson 	int loops = 0;
501f2cc1285SJeff Roberson #endif
502d1139b52SConrad Meyer 	unsigned tos;
503d1139b52SConrad Meyer 	int slot;
504f2cc1285SJeff Roberson 
5053c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
506f2cc1285SJeff Roberson 	if (node == NULL)
507f2cc1285SJeff Roberson 		return (NULL);
508f2cc1285SJeff Roberson 	else if (pctrie_isleaf(node)) {
509f2cc1285SJeff Roberson 		m = pctrie_toval(node);
510f2cc1285SJeff Roberson 		if (*m >= index)
511f2cc1285SJeff Roberson 			return (m);
512f2cc1285SJeff Roberson 		else
513f2cc1285SJeff Roberson 			return (NULL);
514f2cc1285SJeff Roberson 	}
515f2cc1285SJeff Roberson 	tos = 0;
516f2cc1285SJeff Roberson 	for (;;) {
517f2cc1285SJeff Roberson 		/*
518f2cc1285SJeff Roberson 		 * If the keys differ before the current bisection node,
519f2cc1285SJeff Roberson 		 * then the search key might rollback to the earliest
520f2cc1285SJeff Roberson 		 * available bisection node or to the smallest key
5213c30b235SConrad Meyer 		 * in the current node (if the owner is greater than the
522f2cc1285SJeff Roberson 		 * search key).
523f2cc1285SJeff Roberson 		 */
524f2cc1285SJeff Roberson 		if (pctrie_keybarr(node, index)) {
525f2cc1285SJeff Roberson 			if (index > node->pn_owner) {
526f2cc1285SJeff Roberson ascend:
527f2cc1285SJeff Roberson 				KASSERT(++loops < 1000,
528f2cc1285SJeff Roberson 				    ("pctrie_lookup_ge: too many loops"));
529f2cc1285SJeff Roberson 
530f2cc1285SJeff Roberson 				/*
531f2cc1285SJeff Roberson 				 * Pop nodes from the stack until either the
532f2cc1285SJeff Roberson 				 * stack is empty or a node that could have a
533f2cc1285SJeff Roberson 				 * matching descendant is found.
534f2cc1285SJeff Roberson 				 */
535f2cc1285SJeff Roberson 				do {
536f2cc1285SJeff Roberson 					if (tos == 0)
537f2cc1285SJeff Roberson 						return (NULL);
538f2cc1285SJeff Roberson 					node = stack[--tos];
539f2cc1285SJeff Roberson 				} while (pctrie_slot(index,
540f2cc1285SJeff Roberson 				    node->pn_clev) == (PCTRIE_COUNT - 1));
541f2cc1285SJeff Roberson 
542f2cc1285SJeff Roberson 				/*
543f2cc1285SJeff Roberson 				 * The following computation cannot overflow
544f2cc1285SJeff Roberson 				 * because index's slot at the current level
545f2cc1285SJeff Roberson 				 * is less than PCTRIE_COUNT - 1.
546f2cc1285SJeff Roberson 				 */
547f2cc1285SJeff Roberson 				index = pctrie_trimkey(index,
548f2cc1285SJeff Roberson 				    node->pn_clev);
549f2cc1285SJeff Roberson 				index += PCTRIE_UNITLEVEL(node->pn_clev);
550f2cc1285SJeff Roberson 			} else
551f2cc1285SJeff Roberson 				index = node->pn_owner;
552f2cc1285SJeff Roberson 			KASSERT(!pctrie_keybarr(node, index),
553f2cc1285SJeff Roberson 			    ("pctrie_lookup_ge: keybarr failed"));
554f2cc1285SJeff Roberson 		}
555f2cc1285SJeff Roberson 		slot = pctrie_slot(index, node->pn_clev);
5563c30b235SConrad Meyer 		child = pctrie_node_load(&node->pn_child[slot], NULL,
5573c30b235SConrad Meyer 		    PCTRIE_LOCKED);
558f2cc1285SJeff Roberson 		if (pctrie_isleaf(child)) {
559f2cc1285SJeff Roberson 			m = pctrie_toval(child);
560f2cc1285SJeff Roberson 			if (*m >= index)
561f2cc1285SJeff Roberson 				return (m);
562f2cc1285SJeff Roberson 		} else if (child != NULL)
563f2cc1285SJeff Roberson 			goto descend;
564f2cc1285SJeff Roberson 
565*8df38859SDoug Moore 		/* Find the first set bit beyond the first slot+1 bits. */
566*8df38859SDoug Moore 		slot = ffs(node->pn_popmap & (-2 << slot)) - 1;
567*8df38859SDoug Moore 		if (slot < 0) {
568f2cc1285SJeff Roberson 			/*
569*8df38859SDoug Moore 			 * A value or edge greater than the search slot is not
570*8df38859SDoug Moore 			 * found in the current node; ascend to the next
571*8df38859SDoug Moore 			 * higher-level node.
572f2cc1285SJeff Roberson 			 */
573f2cc1285SJeff Roberson 			goto ascend;
574*8df38859SDoug Moore 		}
575*8df38859SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot],
576*8df38859SDoug Moore 		    NULL, PCTRIE_LOCKED);
577*8df38859SDoug Moore 		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
578*8df38859SDoug Moore 		    __func__, slot, node));
579*8df38859SDoug Moore 		if (pctrie_isleaf(child))
580*8df38859SDoug Moore 			return (pctrie_toval(child));
581*8df38859SDoug Moore 		index = pctrie_trimkey(index, node->pn_clev + 1) +
582*8df38859SDoug Moore 		    slot * PCTRIE_UNITLEVEL(node->pn_clev);
583f2cc1285SJeff Roberson descend:
584f2cc1285SJeff Roberson 		KASSERT(node->pn_clev > 0,
585f2cc1285SJeff Roberson 		    ("pctrie_lookup_ge: pushing leaf's parent"));
586f2cc1285SJeff Roberson 		KASSERT(tos < PCTRIE_LIMIT,
587f2cc1285SJeff Roberson 		    ("pctrie_lookup_ge: stack overflow"));
588f2cc1285SJeff Roberson 		stack[tos++] = node;
589f2cc1285SJeff Roberson 		node = child;
590f2cc1285SJeff Roberson 	}
591f2cc1285SJeff Roberson }
592f2cc1285SJeff Roberson 
593f2cc1285SJeff Roberson /*
5943c30b235SConrad Meyer  * Look up the nearest entry at a position less than or equal to index,
5953c30b235SConrad Meyer  * assuming access is externally synchronized by a lock.
596f2cc1285SJeff Roberson  */
597f2cc1285SJeff Roberson uint64_t *
598f2cc1285SJeff Roberson pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
599f2cc1285SJeff Roberson {
600f2cc1285SJeff Roberson 	struct pctrie_node *stack[PCTRIE_LIMIT];
601f2cc1285SJeff Roberson 	uint64_t *m;
602f2cc1285SJeff Roberson 	struct pctrie_node *child, *node;
603f2cc1285SJeff Roberson #ifdef INVARIANTS
604f2cc1285SJeff Roberson 	int loops = 0;
605f2cc1285SJeff Roberson #endif
606d1139b52SConrad Meyer 	unsigned tos;
607d1139b52SConrad Meyer 	int slot;
608f2cc1285SJeff Roberson 
6093c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
610f2cc1285SJeff Roberson 	if (node == NULL)
611f2cc1285SJeff Roberson 		return (NULL);
612f2cc1285SJeff Roberson 	else if (pctrie_isleaf(node)) {
613f2cc1285SJeff Roberson 		m = pctrie_toval(node);
614f2cc1285SJeff Roberson 		if (*m <= index)
615f2cc1285SJeff Roberson 			return (m);
616f2cc1285SJeff Roberson 		else
617f2cc1285SJeff Roberson 			return (NULL);
618f2cc1285SJeff Roberson 	}
619f2cc1285SJeff Roberson 	tos = 0;
620f2cc1285SJeff Roberson 	for (;;) {
621f2cc1285SJeff Roberson 		/*
622f2cc1285SJeff Roberson 		 * If the keys differ before the current bisection node,
623f2cc1285SJeff Roberson 		 * then the search key might rollback to the earliest
624f2cc1285SJeff Roberson 		 * available bisection node or to the largest key
625f2cc1285SJeff Roberson 		 * in the current node (if the owner is smaller than the
626f2cc1285SJeff Roberson 		 * search key).
627f2cc1285SJeff Roberson 		 */
628f2cc1285SJeff Roberson 		if (pctrie_keybarr(node, index)) {
629f2cc1285SJeff Roberson 			if (index > node->pn_owner) {
630f2cc1285SJeff Roberson 				index = node->pn_owner + PCTRIE_COUNT *
631f2cc1285SJeff Roberson 				    PCTRIE_UNITLEVEL(node->pn_clev);
632f2cc1285SJeff Roberson 			} else {
633f2cc1285SJeff Roberson ascend:
634f2cc1285SJeff Roberson 				KASSERT(++loops < 1000,
635f2cc1285SJeff Roberson 				    ("pctrie_lookup_le: too many loops"));
636f2cc1285SJeff Roberson 
637f2cc1285SJeff Roberson 				/*
638f2cc1285SJeff Roberson 				 * Pop nodes from the stack until either the
639f2cc1285SJeff Roberson 				 * stack is empty or a node that could have a
640f2cc1285SJeff Roberson 				 * matching descendant is found.
641f2cc1285SJeff Roberson 				 */
642f2cc1285SJeff Roberson 				do {
643f2cc1285SJeff Roberson 					if (tos == 0)
644f2cc1285SJeff Roberson 						return (NULL);
645f2cc1285SJeff Roberson 					node = stack[--tos];
646f2cc1285SJeff Roberson 				} while (pctrie_slot(index,
647f2cc1285SJeff Roberson 				    node->pn_clev) == 0);
648f2cc1285SJeff Roberson 
649f2cc1285SJeff Roberson 				/*
650f2cc1285SJeff Roberson 				 * The following computation cannot overflow
651f2cc1285SJeff Roberson 				 * because index's slot at the current level
652f2cc1285SJeff Roberson 				 * is greater than 0.
653f2cc1285SJeff Roberson 				 */
654f2cc1285SJeff Roberson 				index = pctrie_trimkey(index,
655f2cc1285SJeff Roberson 				    node->pn_clev);
656f2cc1285SJeff Roberson 			}
657f2cc1285SJeff Roberson 			index--;
658f2cc1285SJeff Roberson 			KASSERT(!pctrie_keybarr(node, index),
659f2cc1285SJeff Roberson 			    ("pctrie_lookup_le: keybarr failed"));
660f2cc1285SJeff Roberson 		}
661f2cc1285SJeff Roberson 		slot = pctrie_slot(index, node->pn_clev);
6623c30b235SConrad Meyer 		child = pctrie_node_load(&node->pn_child[slot], NULL,
6633c30b235SConrad Meyer 		    PCTRIE_LOCKED);
664f2cc1285SJeff Roberson 		if (pctrie_isleaf(child)) {
665f2cc1285SJeff Roberson 			m = pctrie_toval(child);
666f2cc1285SJeff Roberson 			if (*m <= index)
667f2cc1285SJeff Roberson 				return (m);
668f2cc1285SJeff Roberson 		} else if (child != NULL)
669f2cc1285SJeff Roberson 			goto descend;
670f2cc1285SJeff Roberson 
671*8df38859SDoug Moore 		/* Find the last set bit among the first slot bits. */
672*8df38859SDoug Moore 		slot = fls(node->pn_popmap & ((1 << slot) - 1)) - 1;
673*8df38859SDoug Moore 		if (slot < 0) {
674f2cc1285SJeff Roberson 			/*
675*8df38859SDoug Moore 			 * A value or edge smaller than the search slot is not
676*8df38859SDoug Moore 			 * found in the current node; ascend to the next
677*8df38859SDoug Moore 			 * higher-level node.
678f2cc1285SJeff Roberson 			 */
679f2cc1285SJeff Roberson 			goto ascend;
680*8df38859SDoug Moore 		}
681*8df38859SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot],
682*8df38859SDoug Moore 		    NULL, PCTRIE_LOCKED);
683*8df38859SDoug Moore 		if (pctrie_isleaf(child))
684*8df38859SDoug Moore 			return (pctrie_toval(child));
685*8df38859SDoug Moore 		index = pctrie_trimkey(index, node->pn_clev + 1) +
686*8df38859SDoug Moore 		    (slot + 1) * PCTRIE_UNITLEVEL(node->pn_clev) - 1;
687f2cc1285SJeff Roberson descend:
688f2cc1285SJeff Roberson 		KASSERT(node->pn_clev > 0,
689f2cc1285SJeff Roberson 		    ("pctrie_lookup_le: pushing leaf's parent"));
690f2cc1285SJeff Roberson 		KASSERT(tos < PCTRIE_LIMIT,
691f2cc1285SJeff Roberson 		    ("pctrie_lookup_le: stack overflow"));
692f2cc1285SJeff Roberson 		stack[tos++] = node;
693f2cc1285SJeff Roberson 		node = child;
694f2cc1285SJeff Roberson 	}
695f2cc1285SJeff Roberson }
696f2cc1285SJeff Roberson 
697f2cc1285SJeff Roberson /*
698f2cc1285SJeff Roberson  * Remove the specified index from the tree.
699f2cc1285SJeff Roberson  * Panics if the key is not present.
700f2cc1285SJeff Roberson  */
701f2cc1285SJeff Roberson void
702f2cc1285SJeff Roberson pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
703f2cc1285SJeff Roberson {
7043c30b235SConrad Meyer 	struct pctrie_node *node, *parent, *tmp;
705f2cc1285SJeff Roberson 	uint64_t *m;
706*8df38859SDoug Moore 	int slot;
707f2cc1285SJeff Roberson 
7083c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
709f2cc1285SJeff Roberson 	if (pctrie_isleaf(node)) {
710f2cc1285SJeff Roberson 		m = pctrie_toval(node);
711f2cc1285SJeff Roberson 		if (*m != index)
712f2cc1285SJeff Roberson 			panic("%s: invalid key found", __func__);
7133c30b235SConrad Meyer 		pctrie_root_store(ptree, NULL, PCTRIE_LOCKED);
714f2cc1285SJeff Roberson 		return;
715f2cc1285SJeff Roberson 	}
716f2cc1285SJeff Roberson 	parent = NULL;
717f2cc1285SJeff Roberson 	for (;;) {
718f2cc1285SJeff Roberson 		if (node == NULL)
719f2cc1285SJeff Roberson 			panic("pctrie_remove: impossible to locate the key");
720f2cc1285SJeff Roberson 		slot = pctrie_slot(index, node->pn_clev);
7213c30b235SConrad Meyer 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
7223c30b235SConrad Meyer 		    PCTRIE_LOCKED);
7233c30b235SConrad Meyer 		if (pctrie_isleaf(tmp)) {
7243c30b235SConrad Meyer 			m = pctrie_toval(tmp);
725f2cc1285SJeff Roberson 			if (*m != index)
726f2cc1285SJeff Roberson 				panic("%s: invalid key found", __func__);
727*8df38859SDoug Moore 			KASSERT((node->pn_popmap & (1 << slot)) != 0,
728*8df38859SDoug Moore 			    ("%s: bad popmap slot %d in node %p",
729*8df38859SDoug Moore 			    __func__, slot, node));
730*8df38859SDoug Moore 			node->pn_popmap ^= 1 << slot;
7313c30b235SConrad Meyer 			pctrie_node_store(&node->pn_child[slot], NULL,
7323c30b235SConrad Meyer 			    PCTRIE_LOCKED);
733*8df38859SDoug Moore 			if (!powerof2(node->pn_popmap))
734f2cc1285SJeff Roberson 				break;
735*8df38859SDoug Moore 			KASSERT(node->pn_popmap != 0,
736*8df38859SDoug Moore 			    ("%s: bad popmap all zeroes", __func__));
737*8df38859SDoug Moore 			slot = ffs(node->pn_popmap) - 1;
738*8df38859SDoug Moore 			tmp = pctrie_node_load(&node->pn_child[slot],
7393c30b235SConrad Meyer 			    NULL, PCTRIE_LOCKED);
740e8efee29SDoug Moore 			KASSERT(tmp != NULL,
741*8df38859SDoug Moore 			    ("%s: bad popmap slot %d in node %p",
742*8df38859SDoug Moore 			    __func__, slot, node));
743f2cc1285SJeff Roberson 			if (parent == NULL)
7443c30b235SConrad Meyer 				pctrie_root_store(ptree, tmp, PCTRIE_LOCKED);
745f2cc1285SJeff Roberson 			else {
746f2cc1285SJeff Roberson 				slot = pctrie_slot(index, parent->pn_clev);
7473c30b235SConrad Meyer 				KASSERT(pctrie_node_load(
7483c30b235SConrad Meyer 					&parent->pn_child[slot], NULL,
7493c30b235SConrad Meyer 					PCTRIE_LOCKED) == node,
750f2cc1285SJeff Roberson 				    ("%s: invalid child value", __func__));
7513c30b235SConrad Meyer 				pctrie_node_store(&parent->pn_child[slot], tmp,
7523c30b235SConrad Meyer 				    PCTRIE_LOCKED);
753f2cc1285SJeff Roberson 			}
7543c30b235SConrad Meyer 			/*
7553c30b235SConrad Meyer 			 * The child is still valid and we can not zero the
7563c30b235SConrad Meyer 			 * pointer until all SMR references are gone.
7573c30b235SConrad Meyer 			 */
758*8df38859SDoug Moore 			pctrie_node_put(ptree, node, freefn);
759f2cc1285SJeff Roberson 			break;
760f2cc1285SJeff Roberson 		}
761f2cc1285SJeff Roberson 		parent = node;
7623c30b235SConrad Meyer 		node = tmp;
763f2cc1285SJeff Roberson 	}
764f2cc1285SJeff Roberson }
765f2cc1285SJeff Roberson 
766f2cc1285SJeff Roberson /*
767f2cc1285SJeff Roberson  * Remove and free all the nodes from the tree.
768f2cc1285SJeff Roberson  * This function is recursive but there is a tight control on it as the
769f2cc1285SJeff Roberson  * maximum depth of the tree is fixed.
770f2cc1285SJeff Roberson  */
771f2cc1285SJeff Roberson void
772f2cc1285SJeff Roberson pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
773f2cc1285SJeff Roberson {
774f2cc1285SJeff Roberson 	struct pctrie_node *root;
775f2cc1285SJeff Roberson 
7763c30b235SConrad Meyer 	root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
777f2cc1285SJeff Roberson 	if (root == NULL)
778f2cc1285SJeff Roberson 		return;
7793c30b235SConrad Meyer 	pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED);
780f2cc1285SJeff Roberson 	if (!pctrie_isleaf(root))
781f2cc1285SJeff Roberson 		pctrie_reclaim_allnodes_int(ptree, root, freefn);
782f2cc1285SJeff Roberson }
783f2cc1285SJeff Roberson 
784f2cc1285SJeff Roberson #ifdef DDB
785f2cc1285SJeff Roberson /*
786f2cc1285SJeff Roberson  * Show details about the given node.
787f2cc1285SJeff Roberson  */
788f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
789f2cc1285SJeff Roberson {
7903c30b235SConrad Meyer 	struct pctrie_node *node, *tmp;
791*8df38859SDoug Moore 	int slot;
792*8df38859SDoug Moore 	pn_popmap_t popmap;
793f2cc1285SJeff Roberson 
794f2cc1285SJeff Roberson         if (!have_addr)
795f2cc1285SJeff Roberson                 return;
796f2cc1285SJeff Roberson 	node = (struct pctrie_node *)addr;
797*8df38859SDoug Moore 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
798*8df38859SDoug Moore 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
799f2cc1285SJeff Roberson 	    node->pn_clev);
800*8df38859SDoug Moore 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
801*8df38859SDoug Moore 		slot = ffs(popmap) - 1;
802*8df38859SDoug Moore 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
8033c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
804f2cc1285SJeff Roberson 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
805*8df38859SDoug Moore 		    slot, (void *)tmp,
8063c30b235SConrad Meyer 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
807f2cc1285SJeff Roberson 		    node->pn_clev);
808f2cc1285SJeff Roberson 	}
8093c30b235SConrad Meyer }
810f2cc1285SJeff Roberson #endif /* DDB */
811