xref: /freebsd/sys/kern/subr_pctrie.c (revision bbf81f46297f91ed6b4dde8877f4260c2d1e03f2)
18a36da99SPedro F. Giffuni /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
4f2cc1285SJeff Roberson  * Copyright (c) 2013 EMC Corp.
5f2cc1285SJeff Roberson  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6f2cc1285SJeff Roberson  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7f2cc1285SJeff Roberson  * All rights reserved.
8f2cc1285SJeff Roberson  *
9f2cc1285SJeff Roberson  * Redistribution and use in source and binary forms, with or without
10f2cc1285SJeff Roberson  * modification, are permitted provided that the following conditions
11f2cc1285SJeff Roberson  * are met:
12f2cc1285SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
13f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer.
14f2cc1285SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
15f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
16f2cc1285SJeff Roberson  *    documentation and/or other materials provided with the distribution.
17f2cc1285SJeff Roberson  *
18f2cc1285SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19f2cc1285SJeff Roberson  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20f2cc1285SJeff Roberson  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21f2cc1285SJeff Roberson  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22f2cc1285SJeff Roberson  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23f2cc1285SJeff Roberson  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24f2cc1285SJeff Roberson  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25f2cc1285SJeff Roberson  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26f2cc1285SJeff Roberson  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27f2cc1285SJeff Roberson  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28f2cc1285SJeff Roberson  * SUCH DAMAGE.
29f2cc1285SJeff Roberson  *
30f2cc1285SJeff Roberson  */
31f2cc1285SJeff Roberson 
32f2cc1285SJeff Roberson /*
33f2cc1285SJeff Roberson  * Path-compressed radix trie implementation.
34f2cc1285SJeff Roberson  *
35f2cc1285SJeff Roberson  * The implementation takes into account the following rationale:
36f2cc1285SJeff Roberson  * - Size of the nodes should be as small as possible but still big enough
37f2cc1285SJeff Roberson  *   to avoid a large maximum depth for the trie.  This is a balance
38f2cc1285SJeff Roberson  *   between the necessity to not wire too much physical memory for the nodes
39f2cc1285SJeff Roberson  *   and the necessity to avoid too much cache pollution during the trie
40f2cc1285SJeff Roberson  *   operations.
41f2cc1285SJeff Roberson  * - There is not a huge bias toward the number of lookup operations over
42f2cc1285SJeff Roberson  *   the number of insert and remove operations.  This basically implies
43f2cc1285SJeff Roberson  *   that optimizations supposedly helping one operation but hurting the
44f2cc1285SJeff Roberson  *   other might be carefully evaluated.
45f2cc1285SJeff Roberson  * - On average not many nodes are expected to be fully populated, hence
46f2cc1285SJeff Roberson  *   level compression may just complicate things.
47f2cc1285SJeff Roberson  */
48f2cc1285SJeff Roberson 
49f2cc1285SJeff Roberson #include <sys/cdefs.h>
50f2cc1285SJeff Roberson #include "opt_ddb.h"
51f2cc1285SJeff Roberson 
52f2cc1285SJeff Roberson #include <sys/param.h>
53f2cc1285SJeff Roberson #include <sys/systm.h>
54f2cc1285SJeff Roberson #include <sys/kernel.h>
5505963ea4SDoug Moore #include <sys/libkern.h>
56f2cc1285SJeff Roberson #include <sys/pctrie.h>
573c30b235SConrad Meyer #include <sys/proc.h>	/* smr.h depends on struct thread. */
583c30b235SConrad Meyer #include <sys/smr.h>
593c30b235SConrad Meyer #include <sys/smr_types.h>
60f2cc1285SJeff Roberson 
61f2cc1285SJeff Roberson #ifdef DDB
62f2cc1285SJeff Roberson #include <ddb/ddb.h>
63f2cc1285SJeff Roberson #endif
64f2cc1285SJeff Roberson 
65f2cc1285SJeff Roberson #define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
6655e0987aSPedro F. Giffuni #define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
67f2cc1285SJeff Roberson 
688df38859SDoug Moore #if PCTRIE_WIDTH == 3
698df38859SDoug Moore typedef uint8_t pn_popmap_t;
708df38859SDoug Moore #elif PCTRIE_WIDTH == 4
718df38859SDoug Moore typedef uint16_t pn_popmap_t;
728df38859SDoug Moore #elif PCTRIE_WIDTH == 5
738df38859SDoug Moore typedef uint32_t pn_popmap_t;
748df38859SDoug Moore #else
758df38859SDoug Moore #error Unsupported width
768df38859SDoug Moore #endif
778df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
788df38859SDoug Moore     "pn_popmap_t too wide");
798df38859SDoug Moore 
803c30b235SConrad Meyer struct pctrie_node;
813c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
823c30b235SConrad Meyer 
83f2cc1285SJeff Roberson struct pctrie_node {
84f2cc1285SJeff Roberson 	uint64_t	pn_owner;			/* Owner of record. */
858df38859SDoug Moore 	pn_popmap_t	pn_popmap;			/* Valid children. */
8638f5cb1bSDoug Moore 	uint8_t		pn_clev;			/* Level * WIDTH. */
873c30b235SConrad Meyer 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
88f2cc1285SJeff Roberson };
89f2cc1285SJeff Roberson 
903c30b235SConrad Meyer enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
913c30b235SConrad Meyer 
923c30b235SConrad Meyer static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
933c30b235SConrad Meyer     enum pctrie_access access);
943c30b235SConrad Meyer 
95f2cc1285SJeff Roberson /*
96ac0572e6SDoug Moore  * Map index to an array position for the children of node,
97da72505fSDoug Moore  */
98da72505fSDoug Moore static __inline int
99ac0572e6SDoug Moore pctrie_slot(struct pctrie_node *node, uint64_t index)
100da72505fSDoug Moore {
101ac0572e6SDoug Moore 	return ((index >> node->pn_clev) & PCTRIE_MASK);
102da72505fSDoug Moore }
103da72505fSDoug Moore 
104ac0572e6SDoug Moore /*
105ac0572e6SDoug Moore  * Returns true if index does not belong to the specified node.  Otherwise,
106ac0572e6SDoug Moore  * sets slot value, and returns false.
107ac0572e6SDoug Moore  */
108ac0572e6SDoug Moore static __inline bool
109ac0572e6SDoug Moore pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
110da72505fSDoug Moore {
111ac0572e6SDoug Moore 	index = (index - node->pn_owner) >> node->pn_clev;
112ac0572e6SDoug Moore 	if (index >= PCTRIE_COUNT)
113ac0572e6SDoug Moore 		return (true);
114ac0572e6SDoug Moore 	*slot = index;
115ac0572e6SDoug Moore 	return (false);
116da72505fSDoug Moore }
117da72505fSDoug Moore 
118da72505fSDoug Moore /*
1193b7ffacdSDoug Moore  * Check radix node.
120f2cc1285SJeff Roberson  */
121f2cc1285SJeff Roberson static __inline void
1223b7ffacdSDoug Moore pctrie_node_put(struct pctrie_node *node)
123f2cc1285SJeff Roberson {
124f2cc1285SJeff Roberson #ifdef INVARIANTS
125f2cc1285SJeff Roberson 	int slot;
126f2cc1285SJeff Roberson 
1278df38859SDoug Moore 	KASSERT(powerof2(node->pn_popmap),
1288df38859SDoug Moore 	    ("pctrie_node_put: node %p has too many children %04x", node,
1298df38859SDoug Moore 	    node->pn_popmap));
1303c30b235SConrad Meyer 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
1318df38859SDoug Moore 		if ((node->pn_popmap & (1 << slot)) != 0)
1323c30b235SConrad Meyer 			continue;
1333c30b235SConrad Meyer 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
1342d2bcba7SDoug Moore 		    PCTRIE_NULL,
1352d2bcba7SDoug Moore 		    ("pctrie_node_put: node %p has a child", node));
1363c30b235SConrad Meyer 	}
137f2cc1285SJeff Roberson #endif
138f2cc1285SJeff Roberson }
139f2cc1285SJeff Roberson 
140f2cc1285SJeff Roberson /*
1413c30b235SConrad Meyer  * Fetch a node pointer from a slot.
1423c30b235SConrad Meyer  */
1433c30b235SConrad Meyer static __inline struct pctrie_node *
1443c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
1453c30b235SConrad Meyer {
1463c30b235SConrad Meyer 	switch (access) {
1473c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1483c30b235SConrad Meyer 		return (smr_unserialized_load(p, true));
1493c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1503c30b235SConrad Meyer 		return (smr_serialized_load(p, true));
1513c30b235SConrad Meyer 	case PCTRIE_SMR:
1523c30b235SConrad Meyer 		return (smr_entered_load(p, smr));
1533c30b235SConrad Meyer 	}
1543c30b235SConrad Meyer 	__assert_unreachable();
1553c30b235SConrad Meyer }
1563c30b235SConrad Meyer 
1573c30b235SConrad Meyer static __inline void
1583c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
1593c30b235SConrad Meyer {
1603c30b235SConrad Meyer 	switch (access) {
1613c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1623c30b235SConrad Meyer 		smr_unserialized_store(p, v, true);
1633c30b235SConrad Meyer 		break;
1643c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1653c30b235SConrad Meyer 		smr_serialized_store(p, v, true);
1663c30b235SConrad Meyer 		break;
1673c30b235SConrad Meyer 	case PCTRIE_SMR:
1683c30b235SConrad Meyer 		panic("%s: Not supported in SMR section.", __func__);
1693c30b235SConrad Meyer 		break;
1703c30b235SConrad Meyer 	default:
1713c30b235SConrad Meyer 		__assert_unreachable();
1723c30b235SConrad Meyer 		break;
1733c30b235SConrad Meyer 	}
1743c30b235SConrad Meyer }
1753c30b235SConrad Meyer 
1763c30b235SConrad Meyer /*
177f2cc1285SJeff Roberson  * Get the root node for a tree.
178f2cc1285SJeff Roberson  */
179f2cc1285SJeff Roberson static __inline struct pctrie_node *
1803c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
181f2cc1285SJeff Roberson {
1823c30b235SConrad Meyer 	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
183f2cc1285SJeff Roberson }
184f2cc1285SJeff Roberson 
185f2cc1285SJeff Roberson /*
186f2cc1285SJeff Roberson  * Set the root node for a tree.
187f2cc1285SJeff Roberson  */
188f2cc1285SJeff Roberson static __inline void
1893c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
1903c30b235SConrad Meyer     enum pctrie_access access)
191f2cc1285SJeff Roberson {
1923c30b235SConrad Meyer 	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
193f2cc1285SJeff Roberson }
194f2cc1285SJeff Roberson 
195f2cc1285SJeff Roberson /*
196f2cc1285SJeff Roberson  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
197f2cc1285SJeff Roberson  */
19804f9afaeSConrad Meyer static __inline bool
199f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node)
200f2cc1285SJeff Roberson {
201f2cc1285SJeff Roberson 
202f2cc1285SJeff Roberson 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
203f2cc1285SJeff Roberson }
204f2cc1285SJeff Roberson 
205f2cc1285SJeff Roberson /*
2069cfed089SDoug Moore  * Returns val with leaf bit set.
2079cfed089SDoug Moore  */
2089cfed089SDoug Moore static __inline void *
2099cfed089SDoug Moore pctrie_toleaf(uint64_t *val)
2109cfed089SDoug Moore {
2119cfed089SDoug Moore 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
2129cfed089SDoug Moore }
2139cfed089SDoug Moore 
2149cfed089SDoug Moore /*
215f2cc1285SJeff Roberson  * Returns the associated val extracted from node.
216f2cc1285SJeff Roberson  */
217f2cc1285SJeff Roberson static __inline uint64_t *
218f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node)
219f2cc1285SJeff Roberson {
220f2cc1285SJeff Roberson 
221f2cc1285SJeff Roberson 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
222f2cc1285SJeff Roberson }
223f2cc1285SJeff Roberson 
224f2cc1285SJeff Roberson /*
22516e01c05SDoug Moore  * Make 'child' a child of 'node'.
226f2cc1285SJeff Roberson  */
227f2cc1285SJeff Roberson static __inline void
22838f5cb1bSDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index,
22916e01c05SDoug Moore     struct pctrie_node *child, enum pctrie_access access)
230f2cc1285SJeff Roberson {
231f2cc1285SJeff Roberson 	int slot;
232f2cc1285SJeff Roberson 
233ac0572e6SDoug Moore 	slot = pctrie_slot(node, index);
23416e01c05SDoug Moore 	pctrie_node_store(&node->pn_child[slot], child, access);
2358df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
2368df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
2378df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
238f2cc1285SJeff Roberson }
239f2cc1285SJeff Roberson 
240f2cc1285SJeff Roberson /*
241f2cc1285SJeff Roberson  * pctrie node zone initializer.
242f2cc1285SJeff Roberson  */
243f2cc1285SJeff Roberson int
244f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused)
245f2cc1285SJeff Roberson {
246f2cc1285SJeff Roberson 	struct pctrie_node *node;
247f2cc1285SJeff Roberson 
248f2cc1285SJeff Roberson 	node = mem;
2498df38859SDoug Moore 	node->pn_popmap = 0;
2502d2bcba7SDoug Moore 	for (int i = 0; i < nitems(node->pn_child); i++)
2512d2bcba7SDoug Moore 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
2522d2bcba7SDoug Moore 		    PCTRIE_UNSERIALIZED);
253f2cc1285SJeff Roberson 	return (0);
254f2cc1285SJeff Roberson }
255f2cc1285SJeff Roberson 
256f2cc1285SJeff Roberson size_t
257f2cc1285SJeff Roberson pctrie_node_size(void)
258f2cc1285SJeff Roberson {
259f2cc1285SJeff Roberson 
260f2cc1285SJeff Roberson 	return (sizeof(struct pctrie_node));
261f2cc1285SJeff Roberson }
262f2cc1285SJeff Roberson 
263*bbf81f46SRyan Libby enum pctrie_insert_neighbor_mode {
264*bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_NONE,
265*bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_LT,
266*bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_GT,
267*bbf81f46SRyan Libby };
268*bbf81f46SRyan Libby 
269f2cc1285SJeff Roberson /*
270*bbf81f46SRyan Libby  * Look for where to insert the key-value pair into the trie.  Complete the
271*bbf81f46SRyan Libby  * insertion if it replaces a null leaf.  Return the insertion location if the
272*bbf81f46SRyan Libby  * insertion needs to be completed by the caller; otherwise return NULL.
273*bbf81f46SRyan Libby  *
274*bbf81f46SRyan Libby  * If the key is already present in the trie, populate *found_out as if by
275*bbf81f46SRyan Libby  * pctrie_lookup().
276*bbf81f46SRyan Libby  *
277*bbf81f46SRyan Libby  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
278*bbf81f46SRyan Libby  * *neighbor_out to the lowest level node we encounter during the insert lookup
279*bbf81f46SRyan Libby  * that is a parent of the next greater or lesser entry.  The value is not
280*bbf81f46SRyan Libby  * defined if the key was already present in the trie.
281*bbf81f46SRyan Libby  *
282*bbf81f46SRyan Libby  * Note that mode is expected to be a compile-time constant, and this procedure
283*bbf81f46SRyan Libby  * is expected to be inlined into callers with extraneous code optimized out.
284f2cc1285SJeff Roberson  */
285*bbf81f46SRyan Libby static __always_inline void *
286*bbf81f46SRyan Libby pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
287*bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out,
288*bbf81f46SRyan Libby     enum pctrie_insert_neighbor_mode mode)
289f2cc1285SJeff Roberson {
2903b7ffacdSDoug Moore 	uint64_t index;
2913b7ffacdSDoug Moore 	struct pctrie_node *node, *parent;
292f2cc1285SJeff Roberson 	int slot;
293f2cc1285SJeff Roberson 
294f2cc1285SJeff Roberson 	index = *val;
295f2cc1285SJeff Roberson 
296f2cc1285SJeff Roberson 	/*
297f2cc1285SJeff Roberson 	 * The owner of record for root is not really important because it
298f2cc1285SJeff Roberson 	 * will never be used.
299f2cc1285SJeff Roberson 	 */
3003c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
3012d2bcba7SDoug Moore 	parent = NULL;
3022d2bcba7SDoug Moore 	for (;;) {
3032d2bcba7SDoug Moore 		if (pctrie_isleaf(node)) {
3042d2bcba7SDoug Moore 			if (node == PCTRIE_NULL) {
3052d2bcba7SDoug Moore 				if (parent == NULL)
3063b7ffacdSDoug Moore 					ptree->pt_root = pctrie_toleaf(val);
3072d2bcba7SDoug Moore 				else
3083b7ffacdSDoug Moore 					pctrie_addnode(parent, index,
3093b7ffacdSDoug Moore 					    pctrie_toleaf(val), PCTRIE_LOCKED);
3103b7ffacdSDoug Moore 				return (NULL);
311f2cc1285SJeff Roberson 			}
312*bbf81f46SRyan Libby 			if (*pctrie_toval(node) == index) {
313*bbf81f46SRyan Libby 				*found_out = pctrie_toval(node);
314*bbf81f46SRyan Libby 				return (NULL);
315*bbf81f46SRyan Libby 			}
316f2cc1285SJeff Roberson 			break;
3172d2bcba7SDoug Moore 		}
3183b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
31916e01c05SDoug Moore 			break;
320*bbf81f46SRyan Libby 		/*
321*bbf81f46SRyan Libby 		 * Descend.  If we're tracking the next neighbor and this node
322*bbf81f46SRyan Libby 		 * contains a neighboring entry in the right direction, record
323*bbf81f46SRyan Libby 		 * it.
324*bbf81f46SRyan Libby 		 */
325*bbf81f46SRyan Libby 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
326*bbf81f46SRyan Libby 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
327*bbf81f46SRyan Libby 				*neighbor_out = node;
328*bbf81f46SRyan Libby 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
329*bbf81f46SRyan Libby 			if ((node->pn_popmap >> slot) > 1)
330*bbf81f46SRyan Libby 				*neighbor_out = node;
331*bbf81f46SRyan Libby 		}
3322d2bcba7SDoug Moore 		parent = node;
3332d2bcba7SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
3343c30b235SConrad Meyer 		    PCTRIE_LOCKED);
335f2cc1285SJeff Roberson 	}
336f2cc1285SJeff Roberson 
337f2cc1285SJeff Roberson 	/*
338*bbf81f46SRyan Libby 	 * The caller will split this node.  If we're tracking the next
339*bbf81f46SRyan Libby 	 * neighbor, record the old node if the old entry is in the right
340*bbf81f46SRyan Libby 	 * direction.
341*bbf81f46SRyan Libby 	 */
342*bbf81f46SRyan Libby 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
343*bbf81f46SRyan Libby 		if (*pctrie_toval(node) < index)
344*bbf81f46SRyan Libby 			*neighbor_out = node;
345*bbf81f46SRyan Libby 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
346*bbf81f46SRyan Libby 		if (*pctrie_toval(node) > index)
347*bbf81f46SRyan Libby 			*neighbor_out = node;
348*bbf81f46SRyan Libby 	}
349*bbf81f46SRyan Libby 
350*bbf81f46SRyan Libby 	/*
3513b7ffacdSDoug Moore 	 * 'node' must be replaced in the tree with a new branch node, with
3523b7ffacdSDoug Moore 	 * children 'node' and 'val'. Return the place that points to 'node'
3533b7ffacdSDoug Moore 	 * now, and will point to to the new branching node later.
354f2cc1285SJeff Roberson 	 */
3553b7ffacdSDoug Moore 	return ((parent != NULL) ? &parent->pn_child[slot]:
3563b7ffacdSDoug Moore 	    (smr_pctnode_t *)&ptree->pt_root);
3573b7ffacdSDoug Moore }
3583b7ffacdSDoug Moore 
3593b7ffacdSDoug Moore /*
360*bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
361*bbf81f46SRyan Libby  * if the key already exists, and do not look for neighboring entries.
362*bbf81f46SRyan Libby  */
363*bbf81f46SRyan Libby void *
364*bbf81f46SRyan Libby pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
365*bbf81f46SRyan Libby {
366*bbf81f46SRyan Libby 	void *parentp;
367*bbf81f46SRyan Libby 	uint64_t *found;
368*bbf81f46SRyan Libby 
369*bbf81f46SRyan Libby 	found = NULL;
370*bbf81f46SRyan Libby 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
371*bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE);
372*bbf81f46SRyan Libby 	if (__predict_false(found != NULL))
373*bbf81f46SRyan Libby 		panic("%s: key %jx is already present", __func__,
374*bbf81f46SRyan Libby 		    (uintmax_t)*val);
375*bbf81f46SRyan Libby 	return (parentp);
376*bbf81f46SRyan Libby }
377*bbf81f46SRyan Libby 
378*bbf81f46SRyan Libby /*
379*bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
380*bbf81f46SRyan Libby  * for neighboring entries.
381*bbf81f46SRyan Libby  */
382*bbf81f46SRyan Libby void *
383*bbf81f46SRyan Libby pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
384*bbf81f46SRyan Libby     uint64_t **found_out)
385*bbf81f46SRyan Libby {
386*bbf81f46SRyan Libby 	*found_out = NULL;
387*bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
388*bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE));
389*bbf81f46SRyan Libby }
390*bbf81f46SRyan Libby 
391*bbf81f46SRyan Libby /*
392*bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
393*bbf81f46SRyan Libby  * greater entry.  Find a subtree that contains the next entry greater than the
394*bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
395*bbf81f46SRyan Libby  */
396*bbf81f46SRyan Libby void *
397*bbf81f46SRyan Libby pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
398*bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
399*bbf81f46SRyan Libby {
400*bbf81f46SRyan Libby 	*found_out = NULL;
401*bbf81f46SRyan Libby 	*neighbor_out = NULL;
402*bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
403*bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
404*bbf81f46SRyan Libby }
405*bbf81f46SRyan Libby 
406*bbf81f46SRyan Libby /*
407*bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
408*bbf81f46SRyan Libby  * lesser entry.  Find a subtree that contains the next entry less than the
409*bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
410*bbf81f46SRyan Libby  */
411*bbf81f46SRyan Libby void *
412*bbf81f46SRyan Libby pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
413*bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
414*bbf81f46SRyan Libby {
415*bbf81f46SRyan Libby 	*found_out = NULL;
416*bbf81f46SRyan Libby 	*neighbor_out = NULL;
417*bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
418*bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
419*bbf81f46SRyan Libby }
420*bbf81f46SRyan Libby 
421*bbf81f46SRyan Libby /*
4223b7ffacdSDoug Moore  * Uses new node to insert key-value pair into the trie at given location.
4233b7ffacdSDoug Moore  */
4243b7ffacdSDoug Moore void
4253b7ffacdSDoug Moore pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
4263b7ffacdSDoug Moore {
4273b7ffacdSDoug Moore 	struct pctrie_node *node;
4283b7ffacdSDoug Moore 	uint64_t index, newind;
4293b7ffacdSDoug Moore 
4303b7ffacdSDoug Moore 	/*
4313b7ffacdSDoug Moore 	 * Clear the last child pointer of the newly allocated parent.  We want
4323b7ffacdSDoug Moore 	 * to clear it after the final section has exited so lookup can not
4333b7ffacdSDoug Moore 	 * return false negatives.  It is done here because it will be
4343b7ffacdSDoug Moore 	 * cache-cold in the dtor callback.
4353b7ffacdSDoug Moore 	 */
4363b7ffacdSDoug Moore 	if (parent->pn_popmap != 0) {
4373b7ffacdSDoug Moore 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
4383b7ffacdSDoug Moore 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
4393b7ffacdSDoug Moore 		parent->pn_popmap = 0;
4403b7ffacdSDoug Moore 	}
4413b7ffacdSDoug Moore 
4423b7ffacdSDoug Moore 	/*
4433b7ffacdSDoug Moore 	 * Recover the values of the two children of the new parent node.  If
4443b7ffacdSDoug Moore 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
4453b7ffacdSDoug Moore 	 * which must be first in the node.
4463b7ffacdSDoug Moore 	 */
4473b7ffacdSDoug Moore 	index = *val;
4483b7ffacdSDoug Moore 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
4493b7ffacdSDoug Moore 	newind = *pctrie_toval(node);
4503b7ffacdSDoug Moore 
4513b7ffacdSDoug Moore 	/*
4523b7ffacdSDoug Moore 	 * From the highest-order bit where the indexes differ,
4533b7ffacdSDoug Moore 	 * compute the highest level in the trie where they differ.  Then,
4543b7ffacdSDoug Moore 	 * compute the least index of this subtrie.
4553b7ffacdSDoug Moore 	 */
4563b7ffacdSDoug Moore 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
4573b7ffacdSDoug Moore 	    "uint64 too wide");
4583b7ffacdSDoug Moore 	_Static_assert(sizeof(uint64_t) * NBBY <=
4593b7ffacdSDoug Moore 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
460749c249dSDoug Moore 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
4613b7ffacdSDoug Moore 	parent->pn_owner = PCTRIE_COUNT;
4623b7ffacdSDoug Moore 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
4633b7ffacdSDoug Moore 
4643b7ffacdSDoug Moore 
4653c30b235SConrad Meyer 	/* These writes are not yet visible due to ordering. */
4663b7ffacdSDoug Moore 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
46738f5cb1bSDoug Moore 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
4683c30b235SConrad Meyer 	/* Synchronize to make the above visible. */
4692d2bcba7SDoug Moore 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
470f2cc1285SJeff Roberson }
471f2cc1285SJeff Roberson 
472f2cc1285SJeff Roberson /*
473f2cc1285SJeff Roberson  * Returns the value stored at the index.  If the index is not present,
474f2cc1285SJeff Roberson  * NULL is returned.
475f2cc1285SJeff Roberson  */
4763c30b235SConrad Meyer static __always_inline uint64_t *
4773c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
4783c30b235SConrad Meyer     enum pctrie_access access)
479f2cc1285SJeff Roberson {
480f2cc1285SJeff Roberson 	struct pctrie_node *node;
481f2cc1285SJeff Roberson 	uint64_t *m;
482f2cc1285SJeff Roberson 	int slot;
483f2cc1285SJeff Roberson 
4843c30b235SConrad Meyer 	node = pctrie_root_load(ptree, smr, access);
4852d2bcba7SDoug Moore 	for (;;) {
486f2cc1285SJeff Roberson 		if (pctrie_isleaf(node)) {
4872d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m == index)
488f2cc1285SJeff Roberson 				return (m);
489f2cc1285SJeff Roberson 			break;
4903c30b235SConrad Meyer 		}
491ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot))
492f2cc1285SJeff Roberson 			break;
4933c30b235SConrad Meyer 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
494f2cc1285SJeff Roberson 	}
495f2cc1285SJeff Roberson 	return (NULL);
496f2cc1285SJeff Roberson }
497f2cc1285SJeff Roberson 
498f2cc1285SJeff Roberson /*
4993c30b235SConrad Meyer  * Returns the value stored at the index, assuming access is externally
5003c30b235SConrad Meyer  * synchronized by a lock.
5013c30b235SConrad Meyer  *
5023c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5033c30b235SConrad Meyer  */
5043c30b235SConrad Meyer uint64_t *
5053c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index)
5063c30b235SConrad Meyer {
5073c30b235SConrad Meyer 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
5083c30b235SConrad Meyer }
5093c30b235SConrad Meyer 
5103c30b235SConrad Meyer /*
5113c30b235SConrad Meyer  * Returns the value stored at the index without requiring an external lock.
5123c30b235SConrad Meyer  *
5133c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5143c30b235SConrad Meyer  */
5153c30b235SConrad Meyer uint64_t *
5163c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
5173c30b235SConrad Meyer {
5183c30b235SConrad Meyer 	uint64_t *res;
5193c30b235SConrad Meyer 
5203c30b235SConrad Meyer 	smr_enter(smr);
5213c30b235SConrad Meyer 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
5223c30b235SConrad Meyer 	smr_exit(smr);
5233c30b235SConrad Meyer 	return (res);
5243c30b235SConrad Meyer }
5253c30b235SConrad Meyer 
5263c30b235SConrad Meyer /*
5276f251ef2SDoug Moore  * Returns the value with the least index that is greater than or equal to the
5286f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
5296f251ef2SDoug Moore  *
5306f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
531f2cc1285SJeff Roberson  */
532*bbf81f46SRyan Libby static __inline uint64_t *
533*bbf81f46SRyan Libby pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
534f2cc1285SJeff Roberson {
535*bbf81f46SRyan Libby 	struct pctrie_node *succ;
536f2cc1285SJeff Roberson 	uint64_t *m;
537d1139b52SConrad Meyer 	int slot;
538f2cc1285SJeff Roberson 
5396f251ef2SDoug Moore 	/*
5406f251ef2SDoug Moore 	 * Descend the trie as if performing an ordinary lookup for the
5416f251ef2SDoug Moore 	 * specified value.  However, unlike an ordinary lookup, as we descend
5426f251ef2SDoug Moore 	 * the trie, we use "succ" to remember the last branching-off point,
5436f251ef2SDoug Moore 	 * that is, the interior node under which the least value that is both
5446f251ef2SDoug Moore 	 * outside our current path down the trie and greater than the specified
5456f251ef2SDoug Moore 	 * index resides.  (The node's popmap makes it fast and easy to
5466f251ef2SDoug Moore 	 * recognize a branching-off point.)  If our ordinary lookup fails to
5476f251ef2SDoug Moore 	 * yield a value that is greater than or equal to the specified index,
5486f251ef2SDoug Moore 	 * then we will exit this loop and perform a lookup starting from
5496f251ef2SDoug Moore 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
5506f251ef2SDoug Moore 	 * succeed.
5516f251ef2SDoug Moore 	 */
5526f251ef2SDoug Moore 	succ = NULL;
5532d2bcba7SDoug Moore 	for (;;) {
5546f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
5552d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
556f2cc1285SJeff Roberson 				return (m);
5576f251ef2SDoug Moore 			break;
558f2cc1285SJeff Roberson 		}
559ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
560f2cc1285SJeff Roberson 			/*
5616f251ef2SDoug Moore 			 * If all values in this subtree are > index, then the
5626f251ef2SDoug Moore 			 * least value in this subtree is the answer.
563f2cc1285SJeff Roberson 			 */
5646f251ef2SDoug Moore 			if (node->pn_owner > index)
5656f251ef2SDoug Moore 				succ = node;
5666f251ef2SDoug Moore 			break;
567f2cc1285SJeff Roberson 		}
568f2cc1285SJeff Roberson 
569f2cc1285SJeff Roberson 		/*
5706f251ef2SDoug Moore 		 * Just in case the next search step leads to a subtree of all
5716f251ef2SDoug Moore 		 * values < index, check popmap to see if a next bigger step, to
5726f251ef2SDoug Moore 		 * a subtree of all pages with values > index, is available.  If
5736f251ef2SDoug Moore 		 * so, remember to restart the search here.
574f2cc1285SJeff Roberson 		 */
5756f251ef2SDoug Moore 		if ((node->pn_popmap >> slot) > 1)
5766f251ef2SDoug Moore 			succ = node;
5776f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
5786f251ef2SDoug Moore 		    PCTRIE_LOCKED);
579f2cc1285SJeff Roberson 	}
580f2cc1285SJeff Roberson 
581f2cc1285SJeff Roberson 	/*
5826f251ef2SDoug Moore 	 * Restart the search from the last place visited in the subtree that
5836f251ef2SDoug Moore 	 * included some values > index, if there was such a place.
5846f251ef2SDoug Moore 	 */
5856f251ef2SDoug Moore 	if (succ == NULL)
5866f251ef2SDoug Moore 		return (NULL);
5876f251ef2SDoug Moore 	if (succ != node) {
5886f251ef2SDoug Moore 		/*
5896f251ef2SDoug Moore 		 * Take a step to the next bigger sibling of the node chosen
5906f251ef2SDoug Moore 		 * last time.  In that subtree, all values > index.
5916f251ef2SDoug Moore 		 */
592ac0572e6SDoug Moore 		slot = pctrie_slot(succ, index) + 1;
5936f251ef2SDoug Moore 		KASSERT((succ->pn_popmap >> slot) != 0,
5946f251ef2SDoug Moore 		    ("%s: no popmap siblings past slot %d in node %p",
5956f251ef2SDoug Moore 		    __func__, slot, succ));
5966f251ef2SDoug Moore 		slot += ffs(succ->pn_popmap >> slot) - 1;
5976f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
5986f251ef2SDoug Moore 		    PCTRIE_LOCKED);
5996f251ef2SDoug Moore 	}
6006f251ef2SDoug Moore 
6016f251ef2SDoug Moore 	/*
6026f251ef2SDoug Moore 	 * Find the value in the subtree rooted at "succ" with the least index.
6036f251ef2SDoug Moore 	 */
6046f251ef2SDoug Moore 	while (!pctrie_isleaf(succ)) {
6056f251ef2SDoug Moore 		KASSERT(succ->pn_popmap != 0,
6066f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, succ));
6076f251ef2SDoug Moore 		slot = ffs(succ->pn_popmap) - 1;
6086f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
6096f251ef2SDoug Moore 		    PCTRIE_LOCKED);
6106f251ef2SDoug Moore 	}
6116f251ef2SDoug Moore 	return (pctrie_toval(succ));
6126f251ef2SDoug Moore }
6136f251ef2SDoug Moore 
614*bbf81f46SRyan Libby uint64_t *
615*bbf81f46SRyan Libby pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
616*bbf81f46SRyan Libby {
617*bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(
618*bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
619*bbf81f46SRyan Libby }
620*bbf81f46SRyan Libby 
621*bbf81f46SRyan Libby uint64_t *
622*bbf81f46SRyan Libby pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
623*bbf81f46SRyan Libby {
624*bbf81f46SRyan Libby 	if (node == NULL || index + 1 == 0)
625*bbf81f46SRyan Libby 		return (NULL);
626*bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(node, index + 1));
627*bbf81f46SRyan Libby }
628*bbf81f46SRyan Libby 
629*bbf81f46SRyan Libby #ifdef INVARIANTS
630*bbf81f46SRyan Libby void
631*bbf81f46SRyan Libby pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
632*bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
633*bbf81f46SRyan Libby {
634*bbf81f46SRyan Libby 	uint64_t *expected;
635*bbf81f46SRyan Libby 
636*bbf81f46SRyan Libby 	if (index + 1 == 0)
637*bbf81f46SRyan Libby 		expected = NULL;
638*bbf81f46SRyan Libby 	else
639*bbf81f46SRyan Libby 		expected = pctrie_lookup_ge(ptree, index + 1);
640*bbf81f46SRyan Libby 	KASSERT(res == expected,
641*bbf81f46SRyan Libby 	    ("pctrie subtree lookup gt result different from root lookup: "
642*bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
643*bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
644*bbf81f46SRyan Libby }
645*bbf81f46SRyan Libby #endif
646*bbf81f46SRyan Libby 
6476f251ef2SDoug Moore /*
6486f251ef2SDoug Moore  * Returns the value with the greatest index that is less than or equal to the
6496f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
6506f251ef2SDoug Moore  *
6516f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
652f2cc1285SJeff Roberson  */
653*bbf81f46SRyan Libby static __inline uint64_t *
654*bbf81f46SRyan Libby pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
655f2cc1285SJeff Roberson {
656*bbf81f46SRyan Libby 	struct pctrie_node *pred;
657f2cc1285SJeff Roberson 	uint64_t *m;
658d1139b52SConrad Meyer 	int slot;
659f2cc1285SJeff Roberson 
6606f251ef2SDoug Moore 	/*
661*bbf81f46SRyan Libby 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
6626f251ef2SDoug Moore 	 */
6636f251ef2SDoug Moore 	pred = NULL;
6642d2bcba7SDoug Moore 	for (;;) {
6656f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
6662d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
667f2cc1285SJeff Roberson 				return (m);
6686f251ef2SDoug Moore 			break;
669f2cc1285SJeff Roberson 		}
670ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
6716f251ef2SDoug Moore 			if (node->pn_owner < index)
6726f251ef2SDoug Moore 				pred = node;
6736f251ef2SDoug Moore 			break;
674f2cc1285SJeff Roberson 		}
6756f251ef2SDoug Moore 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
6766f251ef2SDoug Moore 			pred = node;
6776f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
6783c30b235SConrad Meyer 		    PCTRIE_LOCKED);
6798df38859SDoug Moore 	}
6806f251ef2SDoug Moore 	if (pred == NULL)
6816f251ef2SDoug Moore 		return (NULL);
6826f251ef2SDoug Moore 	if (pred != node) {
683ac0572e6SDoug Moore 		slot = pctrie_slot(pred, index);
6846f251ef2SDoug Moore 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
6856f251ef2SDoug Moore 		    ("%s: no popmap siblings before slot %d in node %p",
6866f251ef2SDoug Moore 		    __func__, slot, pred));
687749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
6886f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
6896f251ef2SDoug Moore 		    PCTRIE_LOCKED);
690f2cc1285SJeff Roberson 	}
6916f251ef2SDoug Moore 	while (!pctrie_isleaf(pred)) {
6926f251ef2SDoug Moore 		KASSERT(pred->pn_popmap != 0,
6936f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, pred));
694749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap);
6956f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
6966f251ef2SDoug Moore 		    PCTRIE_LOCKED);
6976f251ef2SDoug Moore 	}
6986f251ef2SDoug Moore 	return (pctrie_toval(pred));
699f2cc1285SJeff Roberson }
700f2cc1285SJeff Roberson 
701*bbf81f46SRyan Libby uint64_t *
702*bbf81f46SRyan Libby pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
703*bbf81f46SRyan Libby {
704*bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(
705*bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
706*bbf81f46SRyan Libby }
707*bbf81f46SRyan Libby 
708*bbf81f46SRyan Libby uint64_t *
709*bbf81f46SRyan Libby pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
710*bbf81f46SRyan Libby {
711*bbf81f46SRyan Libby 	if (node == NULL || index == 0)
712*bbf81f46SRyan Libby 		return (NULL);
713*bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(node, index - 1));
714*bbf81f46SRyan Libby }
715*bbf81f46SRyan Libby 
716*bbf81f46SRyan Libby #ifdef INVARIANTS
717*bbf81f46SRyan Libby void
718*bbf81f46SRyan Libby pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
719*bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
720*bbf81f46SRyan Libby {
721*bbf81f46SRyan Libby 	uint64_t *expected;
722*bbf81f46SRyan Libby 
723*bbf81f46SRyan Libby 	if (index == 0)
724*bbf81f46SRyan Libby 		expected = NULL;
725*bbf81f46SRyan Libby 	else
726*bbf81f46SRyan Libby 		expected = pctrie_lookup_le(ptree, index - 1);
727*bbf81f46SRyan Libby 	KASSERT(res == expected,
728*bbf81f46SRyan Libby 	    ("pctrie subtree lookup lt result different from root lookup: "
729*bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
730*bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
731*bbf81f46SRyan Libby }
732*bbf81f46SRyan Libby #endif
733*bbf81f46SRyan Libby 
734f2cc1285SJeff Roberson /*
7353b7ffacdSDoug Moore  * Remove the specified index from the tree, and return the value stored at
7363b7ffacdSDoug Moore  * that index.  If the index is not present, return NULL.
737f2cc1285SJeff Roberson  */
7383b7ffacdSDoug Moore uint64_t *
7393b7ffacdSDoug Moore pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
7403b7ffacdSDoug Moore     struct pctrie_node **freenode)
741f2cc1285SJeff Roberson {
7422d2bcba7SDoug Moore 	struct pctrie_node *child, *node, *parent;
743f2cc1285SJeff Roberson 	uint64_t *m;
7448df38859SDoug Moore 	int slot;
745f2cc1285SJeff Roberson 
7463b7ffacdSDoug Moore 	*freenode = node = NULL;
7472d2bcba7SDoug Moore 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
7482d2bcba7SDoug Moore 	for (;;) {
7492d2bcba7SDoug Moore 		if (pctrie_isleaf(child))
7502d2bcba7SDoug Moore 			break;
7512d2bcba7SDoug Moore 		parent = node;
7522d2bcba7SDoug Moore 		node = child;
753ac0572e6SDoug Moore 		slot = pctrie_slot(node, index);
7542d2bcba7SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
7552d2bcba7SDoug Moore 		    PCTRIE_LOCKED);
7562d2bcba7SDoug Moore 	}
7572d2bcba7SDoug Moore 	if ((m = pctrie_toval(child)) == NULL || *m != index)
7583b7ffacdSDoug Moore 		return (NULL);
7592d2bcba7SDoug Moore 	if (node == NULL) {
7602d2bcba7SDoug Moore 		pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED);
7613b7ffacdSDoug Moore 		return (m);
762f2cc1285SJeff Roberson 	}
7638df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
7648df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p",
7658df38859SDoug Moore 	    __func__, slot, node));
7668df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
7672d2bcba7SDoug Moore 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
7688df38859SDoug Moore 	if (!powerof2(node->pn_popmap))
7693b7ffacdSDoug Moore 		return (m);
7702d2bcba7SDoug Moore 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
7718df38859SDoug Moore 	slot = ffs(node->pn_popmap) - 1;
7722d2bcba7SDoug Moore 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
7732d2bcba7SDoug Moore 	KASSERT(child != PCTRIE_NULL,
7742d2bcba7SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
775f2cc1285SJeff Roberson 	if (parent == NULL)
7762d2bcba7SDoug Moore 		pctrie_root_store(ptree, child, PCTRIE_LOCKED);
777f2cc1285SJeff Roberson 	else {
778ac0572e6SDoug Moore 		slot = pctrie_slot(parent, index);
7792d2bcba7SDoug Moore 		KASSERT(node ==
7802d2bcba7SDoug Moore 		    pctrie_node_load(&parent->pn_child[slot], NULL,
7812d2bcba7SDoug Moore 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
7822d2bcba7SDoug Moore 		pctrie_node_store(&parent->pn_child[slot], child,
7833c30b235SConrad Meyer 		    PCTRIE_LOCKED);
784f2cc1285SJeff Roberson 	}
7853c30b235SConrad Meyer 	/*
7863c30b235SConrad Meyer 	 * The child is still valid and we can not zero the
7873c30b235SConrad Meyer 	 * pointer until all SMR references are gone.
7883c30b235SConrad Meyer 	 */
7893b7ffacdSDoug Moore 	pctrie_node_put(node);
7903b7ffacdSDoug Moore 	*freenode = node;
7913b7ffacdSDoug Moore 	return (m);
792f2cc1285SJeff Roberson }
793f2cc1285SJeff Roberson 
794f2cc1285SJeff Roberson /*
7953b7ffacdSDoug Moore  * Prune all the leaves of 'node' before its first non-leaf child, make child
7963b7ffacdSDoug Moore  * zero of 'node' point up to 'parent', make 'node' into 'parent' and that
7973b7ffacdSDoug Moore  * non-leaf child into 'node'.  Repeat until a node has been stripped of all
7983b7ffacdSDoug Moore  * children, and mark it for freeing, returning its parent.
799f2cc1285SJeff Roberson  */
8003b7ffacdSDoug Moore static struct pctrie_node *
8013b7ffacdSDoug Moore pctrie_reclaim_prune(struct pctrie_node **pnode,
8023b7ffacdSDoug Moore     struct pctrie_node *parent)
803f2cc1285SJeff Roberson {
8043b7ffacdSDoug Moore 	struct pctrie_node *child, *node;
8053b7ffacdSDoug Moore 	int slot;
806f2cc1285SJeff Roberson 
8073b7ffacdSDoug Moore 	node = *pnode;
8083b7ffacdSDoug Moore 	while (node->pn_popmap != 0) {
8093b7ffacdSDoug Moore 		slot = ffs(node->pn_popmap) - 1;
8103b7ffacdSDoug Moore 		node->pn_popmap ^= 1 << slot;
8113b7ffacdSDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
8123b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
8133b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
8143b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
8153b7ffacdSDoug Moore 		if (pctrie_isleaf(child))
8163b7ffacdSDoug Moore 			continue;
8173b7ffacdSDoug Moore 		/* Climb one level down the trie. */
8183b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[0], parent,
8193b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
8203b7ffacdSDoug Moore 		parent = node;
8213b7ffacdSDoug Moore 		node = child;
8223b7ffacdSDoug Moore 	}
8233b7ffacdSDoug Moore 	*pnode = parent;
8243b7ffacdSDoug Moore 	return (node);
8253b7ffacdSDoug Moore }
8263b7ffacdSDoug Moore 
8273b7ffacdSDoug Moore /*
8283b7ffacdSDoug Moore  * Recover the node parent from its first child and continue pruning.
8293b7ffacdSDoug Moore  */
8303b7ffacdSDoug Moore struct pctrie_node *
8313b7ffacdSDoug Moore pctrie_reclaim_resume(struct pctrie_node **pnode)
8323b7ffacdSDoug Moore {
8333b7ffacdSDoug Moore 	struct pctrie_node *parent, *node;
8343b7ffacdSDoug Moore 
8353b7ffacdSDoug Moore 	node = *pnode;
8363b7ffacdSDoug Moore 	if (node == NULL)
8373b7ffacdSDoug Moore 		return (NULL);
8383b7ffacdSDoug Moore 	/* Climb one level up the trie. */
8393b7ffacdSDoug Moore 	parent = pctrie_node_load(&node->pn_child[0], NULL,
8403b7ffacdSDoug Moore 	    PCTRIE_UNSERIALIZED);
8413b7ffacdSDoug Moore 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
8423b7ffacdSDoug Moore 	return (pctrie_reclaim_prune(pnode, parent));
8433b7ffacdSDoug Moore }
8443b7ffacdSDoug Moore 
8453b7ffacdSDoug Moore /*
8463b7ffacdSDoug Moore  * Find the trie root, and start pruning with a NULL parent.
8473b7ffacdSDoug Moore  */
8483b7ffacdSDoug Moore struct pctrie_node *
8493b7ffacdSDoug Moore pctrie_reclaim_begin(struct pctrie_node **pnode,
8503b7ffacdSDoug Moore     struct pctrie *ptree)
8513b7ffacdSDoug Moore {
8523b7ffacdSDoug Moore 	struct pctrie_node *node;
8533b7ffacdSDoug Moore 
8543b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
8552d2bcba7SDoug Moore 	pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED);
8563b7ffacdSDoug Moore 	if (pctrie_isleaf(node))
8573b7ffacdSDoug Moore 		return (NULL);
8583b7ffacdSDoug Moore 	*pnode = node;
8593b7ffacdSDoug Moore 	return (pctrie_reclaim_prune(pnode, NULL));
8603b7ffacdSDoug Moore }
8613b7ffacdSDoug Moore 
8623b7ffacdSDoug Moore /*
8633b7ffacdSDoug Moore  * Replace an existing value in the trie with another one.
8643b7ffacdSDoug Moore  * Panics if there is not an old value in the trie at the new value's index.
8653b7ffacdSDoug Moore  */
8663b7ffacdSDoug Moore uint64_t *
8673b7ffacdSDoug Moore pctrie_replace(struct pctrie *ptree, uint64_t *newval)
8683b7ffacdSDoug Moore {
8693b7ffacdSDoug Moore 	struct pctrie_node *leaf, *parent, *node;
8703b7ffacdSDoug Moore 	uint64_t *m;
8713b7ffacdSDoug Moore 	uint64_t index;
8723b7ffacdSDoug Moore 	int slot;
8733b7ffacdSDoug Moore 
8743b7ffacdSDoug Moore 	leaf = pctrie_toleaf(newval);
8753b7ffacdSDoug Moore 	index = *newval;
8763b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
8773b7ffacdSDoug Moore 	parent = NULL;
8783b7ffacdSDoug Moore 	for (;;) {
8793b7ffacdSDoug Moore 		if (pctrie_isleaf(node)) {
8803b7ffacdSDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
8813b7ffacdSDoug Moore 				if (parent == NULL)
8823b7ffacdSDoug Moore 					ptree->pt_root = leaf;
8833b7ffacdSDoug Moore 				else
8843b7ffacdSDoug Moore 					pctrie_node_store(
8853b7ffacdSDoug Moore 					    &parent->pn_child[slot], leaf,
8863b7ffacdSDoug Moore 					    PCTRIE_LOCKED);
8873b7ffacdSDoug Moore 				return (m);
8883b7ffacdSDoug Moore 			}
8893b7ffacdSDoug Moore 			break;
8903b7ffacdSDoug Moore 		}
8913b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
8923b7ffacdSDoug Moore 			break;
8933b7ffacdSDoug Moore 		parent = node;
8943b7ffacdSDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
8953b7ffacdSDoug Moore 		    PCTRIE_LOCKED);
8963b7ffacdSDoug Moore 	}
8973b7ffacdSDoug Moore 	panic("%s: original replacing value not found", __func__);
898f2cc1285SJeff Roberson }
899f2cc1285SJeff Roberson 
900f2cc1285SJeff Roberson #ifdef DDB
901f2cc1285SJeff Roberson /*
902f2cc1285SJeff Roberson  * Show details about the given node.
903f2cc1285SJeff Roberson  */
904f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
905f2cc1285SJeff Roberson {
9063c30b235SConrad Meyer 	struct pctrie_node *node, *tmp;
9078df38859SDoug Moore 	int slot;
9088df38859SDoug Moore 	pn_popmap_t popmap;
909f2cc1285SJeff Roberson 
910f2cc1285SJeff Roberson         if (!have_addr)
911f2cc1285SJeff Roberson                 return;
912f2cc1285SJeff Roberson 	node = (struct pctrie_node *)addr;
9138df38859SDoug Moore 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
9148df38859SDoug Moore 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
91538f5cb1bSDoug Moore 	    node->pn_clev / PCTRIE_WIDTH);
9168df38859SDoug Moore 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
9178df38859SDoug Moore 		slot = ffs(popmap) - 1;
9188df38859SDoug Moore 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
9193c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
920f2cc1285SJeff Roberson 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
9218df38859SDoug Moore 		    slot, (void *)tmp,
9223c30b235SConrad Meyer 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
92338f5cb1bSDoug Moore 		    node->pn_clev / PCTRIE_WIDTH);
924f2cc1285SJeff Roberson 	}
9253c30b235SConrad Meyer }
926f2cc1285SJeff Roberson #endif /* DDB */
927