18a36da99SPedro F. Giffuni /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 4f2cc1285SJeff Roberson * Copyright (c) 2013 EMC Corp. 5f2cc1285SJeff Roberson * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6f2cc1285SJeff Roberson * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7f2cc1285SJeff Roberson * All rights reserved. 8f2cc1285SJeff Roberson * 9f2cc1285SJeff Roberson * Redistribution and use in source and binary forms, with or without 10f2cc1285SJeff Roberson * modification, are permitted provided that the following conditions 11f2cc1285SJeff Roberson * are met: 12f2cc1285SJeff Roberson * 1. Redistributions of source code must retain the above copyright 13f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer. 14f2cc1285SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 15f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer in the 16f2cc1285SJeff Roberson * documentation and/or other materials provided with the distribution. 17f2cc1285SJeff Roberson * 18f2cc1285SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19f2cc1285SJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20f2cc1285SJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21f2cc1285SJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22f2cc1285SJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23f2cc1285SJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24f2cc1285SJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25f2cc1285SJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26f2cc1285SJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27f2cc1285SJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28f2cc1285SJeff Roberson * SUCH DAMAGE. 29f2cc1285SJeff Roberson * 30f2cc1285SJeff Roberson */ 31f2cc1285SJeff Roberson 32f2cc1285SJeff Roberson /* 33f2cc1285SJeff Roberson * Path-compressed radix trie implementation. 34f2cc1285SJeff Roberson * 35f2cc1285SJeff Roberson * The implementation takes into account the following rationale: 36f2cc1285SJeff Roberson * - Size of the nodes should be as small as possible but still big enough 37f2cc1285SJeff Roberson * to avoid a large maximum depth for the trie. This is a balance 38f2cc1285SJeff Roberson * between the necessity to not wire too much physical memory for the nodes 39f2cc1285SJeff Roberson * and the necessity to avoid too much cache pollution during the trie 40f2cc1285SJeff Roberson * operations. 41f2cc1285SJeff Roberson * - There is not a huge bias toward the number of lookup operations over 42f2cc1285SJeff Roberson * the number of insert and remove operations. This basically implies 43f2cc1285SJeff Roberson * that optimizations supposedly helping one operation but hurting the 44f2cc1285SJeff Roberson * other might be carefully evaluated. 45f2cc1285SJeff Roberson * - On average not many nodes are expected to be fully populated, hence 46f2cc1285SJeff Roberson * level compression may just complicate things. 47f2cc1285SJeff Roberson */ 48f2cc1285SJeff Roberson 49f2cc1285SJeff Roberson #include <sys/cdefs.h> 50f2cc1285SJeff Roberson __FBSDID("$FreeBSD$"); 51f2cc1285SJeff Roberson 52f2cc1285SJeff Roberson #include "opt_ddb.h" 53f2cc1285SJeff Roberson 54f2cc1285SJeff Roberson #include <sys/param.h> 55f2cc1285SJeff Roberson #include <sys/systm.h> 56f2cc1285SJeff Roberson #include <sys/kernel.h> 5705963ea4SDoug Moore #include <sys/libkern.h> 58f2cc1285SJeff Roberson #include <sys/pctrie.h> 593c30b235SConrad Meyer #include <sys/proc.h> /* smr.h depends on struct thread. */ 603c30b235SConrad Meyer #include <sys/smr.h> 613c30b235SConrad Meyer #include <sys/smr_types.h> 62f2cc1285SJeff Roberson 63f2cc1285SJeff Roberson #ifdef DDB 64f2cc1285SJeff Roberson #include <ddb/ddb.h> 65f2cc1285SJeff Roberson #endif 66f2cc1285SJeff Roberson 67f2cc1285SJeff Roberson #define PCTRIE_MASK (PCTRIE_COUNT - 1) 6855e0987aSPedro F. Giffuni #define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) 69f2cc1285SJeff Roberson 708df38859SDoug Moore #if PCTRIE_WIDTH == 3 718df38859SDoug Moore typedef uint8_t pn_popmap_t; 728df38859SDoug Moore #elif PCTRIE_WIDTH == 4 738df38859SDoug Moore typedef uint16_t pn_popmap_t; 748df38859SDoug Moore #elif PCTRIE_WIDTH == 5 758df38859SDoug Moore typedef uint32_t pn_popmap_t; 768df38859SDoug Moore #else 778df38859SDoug Moore #error Unsupported width 788df38859SDoug Moore #endif 798df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 808df38859SDoug Moore "pn_popmap_t too wide"); 818df38859SDoug Moore 822d2bcba7SDoug Moore /* Set of all flag bits stored in node pointers. */ 832d2bcba7SDoug Moore #define PCTRIE_FLAGS (PCTRIE_ISLEAF) 84f2cc1285SJeff Roberson #define PCTRIE_PAD PCTRIE_FLAGS 85f2cc1285SJeff Roberson 863c30b235SConrad Meyer struct pctrie_node; 873c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 883c30b235SConrad Meyer 89f2cc1285SJeff Roberson struct pctrie_node { 90f2cc1285SJeff Roberson uint64_t pn_owner; /* Owner of record. */ 918df38859SDoug Moore pn_popmap_t pn_popmap; /* Valid children. */ 9238f5cb1bSDoug Moore uint8_t pn_clev; /* Level * WIDTH. */ 933c30b235SConrad Meyer smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 94f2cc1285SJeff Roberson }; 95f2cc1285SJeff Roberson 963c30b235SConrad Meyer enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 973c30b235SConrad Meyer 983c30b235SConrad Meyer static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, 993c30b235SConrad Meyer enum pctrie_access access); 1003c30b235SConrad Meyer 101f2cc1285SJeff Roberson /* 102*ac0572e6SDoug Moore * Map index to an array position for the children of node, 103da72505fSDoug Moore */ 104da72505fSDoug Moore static __inline int 105*ac0572e6SDoug Moore pctrie_slot(struct pctrie_node *node, uint64_t index) 106da72505fSDoug Moore { 107*ac0572e6SDoug Moore return ((index >> node->pn_clev) & PCTRIE_MASK); 108da72505fSDoug Moore } 109da72505fSDoug Moore 110*ac0572e6SDoug Moore /* 111*ac0572e6SDoug Moore * Returns true if index does not belong to the specified node. Otherwise, 112*ac0572e6SDoug Moore * sets slot value, and returns false. 113*ac0572e6SDoug Moore */ 114*ac0572e6SDoug Moore static __inline bool 115*ac0572e6SDoug Moore pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot) 116da72505fSDoug Moore { 117*ac0572e6SDoug Moore index = (index - node->pn_owner) >> node->pn_clev; 118*ac0572e6SDoug Moore if (index >= PCTRIE_COUNT) 119*ac0572e6SDoug Moore return (true); 120*ac0572e6SDoug Moore *slot = index; 121*ac0572e6SDoug Moore return (false); 122da72505fSDoug Moore } 123da72505fSDoug Moore 124da72505fSDoug Moore /* 125f2cc1285SJeff Roberson * Allocate a node. Pre-allocation should ensure that the request 126f2cc1285SJeff Roberson * will always be satisfied. 127f2cc1285SJeff Roberson */ 1283c30b235SConrad Meyer static struct pctrie_node * 129da72505fSDoug Moore pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index, 13038f5cb1bSDoug Moore uint64_t newind) 131f2cc1285SJeff Roberson { 132f2cc1285SJeff Roberson struct pctrie_node *node; 133f2cc1285SJeff Roberson 134f2cc1285SJeff Roberson node = allocfn(ptree); 135f2cc1285SJeff Roberson if (node == NULL) 136f2cc1285SJeff Roberson return (NULL); 1373c30b235SConrad Meyer 1383c30b235SConrad Meyer /* 1393c30b235SConrad Meyer * We want to clear the last child pointer after the final section 1403c30b235SConrad Meyer * has exited so lookup can not return false negatives. It is done 1413c30b235SConrad Meyer * here because it will be cache-cold in the dtor callback. 1423c30b235SConrad Meyer */ 1438df38859SDoug Moore if (node->pn_popmap != 0) { 1448df38859SDoug Moore pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1], 1452d2bcba7SDoug Moore PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1468df38859SDoug Moore node->pn_popmap = 0; 1473c30b235SConrad Meyer } 14838f5cb1bSDoug Moore 14938f5cb1bSDoug Moore /* 15038f5cb1bSDoug Moore * From the highest-order bit where the indexes differ, 15138f5cb1bSDoug Moore * compute the highest level in the trie where they differ. Then, 15238f5cb1bSDoug Moore * compute the least index of this subtrie. 15338f5cb1bSDoug Moore */ 15438f5cb1bSDoug Moore KASSERT(index != newind, ("%s: passing the same key value %jx", 15538f5cb1bSDoug Moore __func__, (uintmax_t)index)); 15638f5cb1bSDoug Moore _Static_assert(sizeof(long long) >= sizeof(uint64_t), 15738f5cb1bSDoug Moore "uint64 too wide"); 15838f5cb1bSDoug Moore _Static_assert(sizeof(uint64_t) * NBBY <= 15938f5cb1bSDoug Moore (1 << (sizeof(node->pn_clev) * NBBY)), "pn_clev too narrow"); 16038f5cb1bSDoug Moore node->pn_clev = rounddown(flsll(index ^ newind) - 1, PCTRIE_WIDTH); 161*ac0572e6SDoug Moore node->pn_owner = PCTRIE_COUNT; 162*ac0572e6SDoug Moore node->pn_owner = index & -(node->pn_owner << node->pn_clev); 163f2cc1285SJeff Roberson return (node); 164f2cc1285SJeff Roberson } 165f2cc1285SJeff Roberson 166f2cc1285SJeff Roberson /* 167f2cc1285SJeff Roberson * Free radix node. 168f2cc1285SJeff Roberson */ 169f2cc1285SJeff Roberson static __inline void 170f2cc1285SJeff Roberson pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node, 1718df38859SDoug Moore pctrie_free_t freefn) 172f2cc1285SJeff Roberson { 173f2cc1285SJeff Roberson #ifdef INVARIANTS 174f2cc1285SJeff Roberson int slot; 175f2cc1285SJeff Roberson 1768df38859SDoug Moore KASSERT(powerof2(node->pn_popmap), 1778df38859SDoug Moore ("pctrie_node_put: node %p has too many children %04x", node, 1788df38859SDoug Moore node->pn_popmap)); 1793c30b235SConrad Meyer for (slot = 0; slot < PCTRIE_COUNT; slot++) { 1808df38859SDoug Moore if ((node->pn_popmap & (1 << slot)) != 0) 1813c30b235SConrad Meyer continue; 1823c30b235SConrad Meyer KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == 1832d2bcba7SDoug Moore PCTRIE_NULL, 1842d2bcba7SDoug Moore ("pctrie_node_put: node %p has a child", node)); 1853c30b235SConrad Meyer } 186f2cc1285SJeff Roberson #endif 187f2cc1285SJeff Roberson freefn(ptree, node); 188f2cc1285SJeff Roberson } 189f2cc1285SJeff Roberson 190f2cc1285SJeff Roberson /* 1913c30b235SConrad Meyer * Fetch a node pointer from a slot. 1923c30b235SConrad Meyer */ 1933c30b235SConrad Meyer static __inline struct pctrie_node * 1943c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 1953c30b235SConrad Meyer { 1963c30b235SConrad Meyer switch (access) { 1973c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 1983c30b235SConrad Meyer return (smr_unserialized_load(p, true)); 1993c30b235SConrad Meyer case PCTRIE_LOCKED: 2003c30b235SConrad Meyer return (smr_serialized_load(p, true)); 2013c30b235SConrad Meyer case PCTRIE_SMR: 2023c30b235SConrad Meyer return (smr_entered_load(p, smr)); 2033c30b235SConrad Meyer } 2043c30b235SConrad Meyer __assert_unreachable(); 2053c30b235SConrad Meyer } 2063c30b235SConrad Meyer 2073c30b235SConrad Meyer static __inline void 2083c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 2093c30b235SConrad Meyer { 2103c30b235SConrad Meyer switch (access) { 2113c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 2123c30b235SConrad Meyer smr_unserialized_store(p, v, true); 2133c30b235SConrad Meyer break; 2143c30b235SConrad Meyer case PCTRIE_LOCKED: 2153c30b235SConrad Meyer smr_serialized_store(p, v, true); 2163c30b235SConrad Meyer break; 2173c30b235SConrad Meyer case PCTRIE_SMR: 2183c30b235SConrad Meyer panic("%s: Not supported in SMR section.", __func__); 2193c30b235SConrad Meyer break; 2203c30b235SConrad Meyer default: 2213c30b235SConrad Meyer __assert_unreachable(); 2223c30b235SConrad Meyer break; 2233c30b235SConrad Meyer } 2243c30b235SConrad Meyer } 2253c30b235SConrad Meyer 2263c30b235SConrad Meyer /* 227f2cc1285SJeff Roberson * Get the root node for a tree. 228f2cc1285SJeff Roberson */ 229f2cc1285SJeff Roberson static __inline struct pctrie_node * 2303c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 231f2cc1285SJeff Roberson { 2323c30b235SConrad Meyer return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access)); 233f2cc1285SJeff Roberson } 234f2cc1285SJeff Roberson 235f2cc1285SJeff Roberson /* 236f2cc1285SJeff Roberson * Set the root node for a tree. 237f2cc1285SJeff Roberson */ 238f2cc1285SJeff Roberson static __inline void 2393c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node, 2403c30b235SConrad Meyer enum pctrie_access access) 241f2cc1285SJeff Roberson { 2423c30b235SConrad Meyer pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access); 243f2cc1285SJeff Roberson } 244f2cc1285SJeff Roberson 245f2cc1285SJeff Roberson /* 246f2cc1285SJeff Roberson * Returns TRUE if the specified node is a leaf and FALSE otherwise. 247f2cc1285SJeff Roberson */ 24804f9afaeSConrad Meyer static __inline bool 249f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node) 250f2cc1285SJeff Roberson { 251f2cc1285SJeff Roberson 252f2cc1285SJeff Roberson return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 253f2cc1285SJeff Roberson } 254f2cc1285SJeff Roberson 255f2cc1285SJeff Roberson /* 2569cfed089SDoug Moore * Returns val with leaf bit set. 2579cfed089SDoug Moore */ 2589cfed089SDoug Moore static __inline void * 2599cfed089SDoug Moore pctrie_toleaf(uint64_t *val) 2609cfed089SDoug Moore { 2619cfed089SDoug Moore return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 2629cfed089SDoug Moore } 2639cfed089SDoug Moore 2649cfed089SDoug Moore /* 265f2cc1285SJeff Roberson * Returns the associated val extracted from node. 266f2cc1285SJeff Roberson */ 267f2cc1285SJeff Roberson static __inline uint64_t * 268f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node) 269f2cc1285SJeff Roberson { 270f2cc1285SJeff Roberson 271f2cc1285SJeff Roberson return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 272f2cc1285SJeff Roberson } 273f2cc1285SJeff Roberson 274f2cc1285SJeff Roberson /* 27516e01c05SDoug Moore * Make 'child' a child of 'node'. 276f2cc1285SJeff Roberson */ 277f2cc1285SJeff Roberson static __inline void 27838f5cb1bSDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index, 27916e01c05SDoug Moore struct pctrie_node *child, enum pctrie_access access) 280f2cc1285SJeff Roberson { 281f2cc1285SJeff Roberson int slot; 282f2cc1285SJeff Roberson 283*ac0572e6SDoug Moore slot = pctrie_slot(node, index); 28416e01c05SDoug Moore pctrie_node_store(&node->pn_child[slot], child, access); 2858df38859SDoug Moore node->pn_popmap ^= 1 << slot; 2868df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 2878df38859SDoug Moore ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 288f2cc1285SJeff Roberson } 289f2cc1285SJeff Roberson 290f2cc1285SJeff Roberson /* 291f2cc1285SJeff Roberson * Internal helper for pctrie_reclaim_allnodes(). 292f2cc1285SJeff Roberson * This function is recursive. 293f2cc1285SJeff Roberson */ 294f2cc1285SJeff Roberson static void 295f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node, 296f2cc1285SJeff Roberson pctrie_free_t freefn) 297f2cc1285SJeff Roberson { 2983c30b235SConrad Meyer struct pctrie_node *child; 299f2cc1285SJeff Roberson int slot; 300f2cc1285SJeff Roberson 3018df38859SDoug Moore while (node->pn_popmap != 0) { 3028df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 3033c30b235SConrad Meyer child = pctrie_node_load(&node->pn_child[slot], NULL, 3043c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 3052d2bcba7SDoug Moore KASSERT(child != PCTRIE_NULL, 3062d2bcba7SDoug Moore ("%s: bad popmap slot %d in node %p", 3078df38859SDoug Moore __func__, slot, node)); 3083c30b235SConrad Meyer if (!pctrie_isleaf(child)) 3093c30b235SConrad Meyer pctrie_reclaim_allnodes_int(ptree, child, freefn); 3108df38859SDoug Moore node->pn_popmap ^= 1 << slot; 3112d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 3123c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 313f2cc1285SJeff Roberson } 3148df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 315f2cc1285SJeff Roberson } 316f2cc1285SJeff Roberson 317f2cc1285SJeff Roberson /* 318f2cc1285SJeff Roberson * pctrie node zone initializer. 319f2cc1285SJeff Roberson */ 320f2cc1285SJeff Roberson int 321f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused) 322f2cc1285SJeff Roberson { 323f2cc1285SJeff Roberson struct pctrie_node *node; 324f2cc1285SJeff Roberson 325f2cc1285SJeff Roberson node = mem; 3268df38859SDoug Moore node->pn_popmap = 0; 3272d2bcba7SDoug Moore for (int i = 0; i < nitems(node->pn_child); i++) 3282d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 3292d2bcba7SDoug Moore PCTRIE_UNSERIALIZED); 330f2cc1285SJeff Roberson return (0); 331f2cc1285SJeff Roberson } 332f2cc1285SJeff Roberson 333f2cc1285SJeff Roberson size_t 334f2cc1285SJeff Roberson pctrie_node_size(void) 335f2cc1285SJeff Roberson { 336f2cc1285SJeff Roberson 337f2cc1285SJeff Roberson return (sizeof(struct pctrie_node)); 338f2cc1285SJeff Roberson } 339f2cc1285SJeff Roberson 340f2cc1285SJeff Roberson /* 341f2cc1285SJeff Roberson * Inserts the key-value pair into the trie. 342f2cc1285SJeff Roberson * Panics if the key already exists. 343f2cc1285SJeff Roberson */ 344f2cc1285SJeff Roberson int 345f2cc1285SJeff Roberson pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) 346f2cc1285SJeff Roberson { 347f2cc1285SJeff Roberson uint64_t index, newind; 3482d2bcba7SDoug Moore struct pctrie_node *leaf, *node, *parent; 3493c30b235SConrad Meyer smr_pctnode_t *parentp; 350f2cc1285SJeff Roberson int slot; 351f2cc1285SJeff Roberson 352f2cc1285SJeff Roberson index = *val; 35316e01c05SDoug Moore leaf = pctrie_toleaf(val); 354f2cc1285SJeff Roberson 355f2cc1285SJeff Roberson /* 356f2cc1285SJeff Roberson * The owner of record for root is not really important because it 357f2cc1285SJeff Roberson * will never be used. 358f2cc1285SJeff Roberson */ 3593c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 3602d2bcba7SDoug Moore parent = NULL; 3612d2bcba7SDoug Moore for (;;) { 3622d2bcba7SDoug Moore if (pctrie_isleaf(node)) { 3632d2bcba7SDoug Moore if (node == PCTRIE_NULL) { 3642d2bcba7SDoug Moore if (parent == NULL) 3652d2bcba7SDoug Moore ptree->pt_root = leaf; 3662d2bcba7SDoug Moore else 36738f5cb1bSDoug Moore pctrie_addnode(parent, index, leaf, 3682d2bcba7SDoug Moore PCTRIE_LOCKED); 369f2cc1285SJeff Roberson return (0); 370f2cc1285SJeff Roberson } 37116e01c05SDoug Moore newind = *pctrie_toval(node); 37216e01c05SDoug Moore if (newind == index) 373f2cc1285SJeff Roberson panic("%s: key %jx is already present", 374f2cc1285SJeff Roberson __func__, (uintmax_t)index); 375f2cc1285SJeff Roberson break; 3762d2bcba7SDoug Moore } 377*ac0572e6SDoug Moore if (pctrie_keybarr(node, index, &slot)) { 37816e01c05SDoug Moore newind = node->pn_owner; 37916e01c05SDoug Moore break; 38016e01c05SDoug Moore } 3812d2bcba7SDoug Moore parent = node; 3822d2bcba7SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 3833c30b235SConrad Meyer PCTRIE_LOCKED); 384f2cc1285SJeff Roberson } 385f2cc1285SJeff Roberson 386f2cc1285SJeff Roberson /* 387f2cc1285SJeff Roberson * A new node is needed because the right insertion level is reached. 388f2cc1285SJeff Roberson * Setup the new intermediate node and add the 2 children: the 38916e01c05SDoug Moore * new object and the older edge or object. 390f2cc1285SJeff Roberson */ 3912d2bcba7SDoug Moore parentp = (parent != NULL) ? &parent->pn_child[slot]: 3922d2bcba7SDoug Moore (smr_pctnode_t *)&ptree->pt_root; 39338f5cb1bSDoug Moore parent = pctrie_node_get(ptree, allocfn, index, newind); 3942d2bcba7SDoug Moore if (parent == NULL) 395f2cc1285SJeff Roberson return (ENOMEM); 3963c30b235SConrad Meyer /* These writes are not yet visible due to ordering. */ 39738f5cb1bSDoug Moore pctrie_addnode(parent, index, leaf, PCTRIE_UNSERIALIZED); 39838f5cb1bSDoug Moore pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED); 3993c30b235SConrad Meyer /* Synchronize to make the above visible. */ 4002d2bcba7SDoug Moore pctrie_node_store(parentp, parent, PCTRIE_LOCKED); 401f2cc1285SJeff Roberson return (0); 402f2cc1285SJeff Roberson } 403f2cc1285SJeff Roberson 404f2cc1285SJeff Roberson /* 405f2cc1285SJeff Roberson * Returns the value stored at the index. If the index is not present, 406f2cc1285SJeff Roberson * NULL is returned. 407f2cc1285SJeff Roberson */ 4083c30b235SConrad Meyer static __always_inline uint64_t * 4093c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 4103c30b235SConrad Meyer enum pctrie_access access) 411f2cc1285SJeff Roberson { 412f2cc1285SJeff Roberson struct pctrie_node *node; 413f2cc1285SJeff Roberson uint64_t *m; 414f2cc1285SJeff Roberson int slot; 415f2cc1285SJeff Roberson 4163c30b235SConrad Meyer node = pctrie_root_load(ptree, smr, access); 4172d2bcba7SDoug Moore for (;;) { 418f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 4192d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m == index) 420f2cc1285SJeff Roberson return (m); 421f2cc1285SJeff Roberson break; 4223c30b235SConrad Meyer } 423*ac0572e6SDoug Moore if (pctrie_keybarr(node, index, &slot)) 424f2cc1285SJeff Roberson break; 4253c30b235SConrad Meyer node = pctrie_node_load(&node->pn_child[slot], smr, access); 426f2cc1285SJeff Roberson } 427f2cc1285SJeff Roberson return (NULL); 428f2cc1285SJeff Roberson } 429f2cc1285SJeff Roberson 430f2cc1285SJeff Roberson /* 4313c30b235SConrad Meyer * Returns the value stored at the index, assuming access is externally 4323c30b235SConrad Meyer * synchronized by a lock. 4333c30b235SConrad Meyer * 4343c30b235SConrad Meyer * If the index is not present, NULL is returned. 4353c30b235SConrad Meyer */ 4363c30b235SConrad Meyer uint64_t * 4373c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index) 4383c30b235SConrad Meyer { 4393c30b235SConrad Meyer return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 4403c30b235SConrad Meyer } 4413c30b235SConrad Meyer 4423c30b235SConrad Meyer /* 4433c30b235SConrad Meyer * Returns the value stored at the index without requiring an external lock. 4443c30b235SConrad Meyer * 4453c30b235SConrad Meyer * If the index is not present, NULL is returned. 4463c30b235SConrad Meyer */ 4473c30b235SConrad Meyer uint64_t * 4483c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 4493c30b235SConrad Meyer { 4503c30b235SConrad Meyer uint64_t *res; 4513c30b235SConrad Meyer 4523c30b235SConrad Meyer smr_enter(smr); 4533c30b235SConrad Meyer res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 4543c30b235SConrad Meyer smr_exit(smr); 4553c30b235SConrad Meyer return (res); 4563c30b235SConrad Meyer } 4573c30b235SConrad Meyer 4583c30b235SConrad Meyer /* 4596f251ef2SDoug Moore * Returns the value with the least index that is greater than or equal to the 4606f251ef2SDoug Moore * specified index, or NULL if there are no such values. 4616f251ef2SDoug Moore * 4626f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 463f2cc1285SJeff Roberson */ 464f2cc1285SJeff Roberson uint64_t * 465f2cc1285SJeff Roberson pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 466f2cc1285SJeff Roberson { 4676f251ef2SDoug Moore struct pctrie_node *node, *succ; 468f2cc1285SJeff Roberson uint64_t *m; 469d1139b52SConrad Meyer int slot; 470f2cc1285SJeff Roberson 4716f251ef2SDoug Moore /* 4726f251ef2SDoug Moore * Descend the trie as if performing an ordinary lookup for the 4736f251ef2SDoug Moore * specified value. However, unlike an ordinary lookup, as we descend 4746f251ef2SDoug Moore * the trie, we use "succ" to remember the last branching-off point, 4756f251ef2SDoug Moore * that is, the interior node under which the least value that is both 4766f251ef2SDoug Moore * outside our current path down the trie and greater than the specified 4776f251ef2SDoug Moore * index resides. (The node's popmap makes it fast and easy to 4786f251ef2SDoug Moore * recognize a branching-off point.) If our ordinary lookup fails to 4796f251ef2SDoug Moore * yield a value that is greater than or equal to the specified index, 4806f251ef2SDoug Moore * then we will exit this loop and perform a lookup starting from 4816f251ef2SDoug Moore * "succ". If "succ" is not NULL, then that lookup is guaranteed to 4826f251ef2SDoug Moore * succeed. 4836f251ef2SDoug Moore */ 4843c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 4856f251ef2SDoug Moore succ = NULL; 4862d2bcba7SDoug Moore for (;;) { 4876f251ef2SDoug Moore if (pctrie_isleaf(node)) { 4882d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m >= index) 489f2cc1285SJeff Roberson return (m); 4906f251ef2SDoug Moore break; 491f2cc1285SJeff Roberson } 492*ac0572e6SDoug Moore if (pctrie_keybarr(node, index, &slot)) { 493f2cc1285SJeff Roberson /* 4946f251ef2SDoug Moore * If all values in this subtree are > index, then the 4956f251ef2SDoug Moore * least value in this subtree is the answer. 496f2cc1285SJeff Roberson */ 4976f251ef2SDoug Moore if (node->pn_owner > index) 4986f251ef2SDoug Moore succ = node; 4996f251ef2SDoug Moore break; 500f2cc1285SJeff Roberson } 501f2cc1285SJeff Roberson 502f2cc1285SJeff Roberson /* 5036f251ef2SDoug Moore * Just in case the next search step leads to a subtree of all 5046f251ef2SDoug Moore * values < index, check popmap to see if a next bigger step, to 5056f251ef2SDoug Moore * a subtree of all pages with values > index, is available. If 5066f251ef2SDoug Moore * so, remember to restart the search here. 507f2cc1285SJeff Roberson */ 5086f251ef2SDoug Moore if ((node->pn_popmap >> slot) > 1) 5096f251ef2SDoug Moore succ = node; 5106f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 5116f251ef2SDoug Moore PCTRIE_LOCKED); 512f2cc1285SJeff Roberson } 513f2cc1285SJeff Roberson 514f2cc1285SJeff Roberson /* 5156f251ef2SDoug Moore * Restart the search from the last place visited in the subtree that 5166f251ef2SDoug Moore * included some values > index, if there was such a place. 5176f251ef2SDoug Moore */ 5186f251ef2SDoug Moore if (succ == NULL) 5196f251ef2SDoug Moore return (NULL); 5206f251ef2SDoug Moore if (succ != node) { 5216f251ef2SDoug Moore /* 5226f251ef2SDoug Moore * Take a step to the next bigger sibling of the node chosen 5236f251ef2SDoug Moore * last time. In that subtree, all values > index. 5246f251ef2SDoug Moore */ 525*ac0572e6SDoug Moore slot = pctrie_slot(succ, index) + 1; 5266f251ef2SDoug Moore KASSERT((succ->pn_popmap >> slot) != 0, 5276f251ef2SDoug Moore ("%s: no popmap siblings past slot %d in node %p", 5286f251ef2SDoug Moore __func__, slot, succ)); 5296f251ef2SDoug Moore slot += ffs(succ->pn_popmap >> slot) - 1; 5306f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 5316f251ef2SDoug Moore PCTRIE_LOCKED); 5326f251ef2SDoug Moore } 5336f251ef2SDoug Moore 5346f251ef2SDoug Moore /* 5356f251ef2SDoug Moore * Find the value in the subtree rooted at "succ" with the least index. 5366f251ef2SDoug Moore */ 5376f251ef2SDoug Moore while (!pctrie_isleaf(succ)) { 5386f251ef2SDoug Moore KASSERT(succ->pn_popmap != 0, 5396f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, succ)); 5406f251ef2SDoug Moore slot = ffs(succ->pn_popmap) - 1; 5416f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 5426f251ef2SDoug Moore PCTRIE_LOCKED); 5436f251ef2SDoug Moore } 5446f251ef2SDoug Moore return (pctrie_toval(succ)); 5456f251ef2SDoug Moore } 5466f251ef2SDoug Moore 5476f251ef2SDoug Moore /* 5486f251ef2SDoug Moore * Returns the value with the greatest index that is less than or equal to the 5496f251ef2SDoug Moore * specified index, or NULL if there are no such values. 5506f251ef2SDoug Moore * 5516f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 552f2cc1285SJeff Roberson */ 553f2cc1285SJeff Roberson uint64_t * 554f2cc1285SJeff Roberson pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 555f2cc1285SJeff Roberson { 5566f251ef2SDoug Moore struct pctrie_node *node, *pred; 557f2cc1285SJeff Roberson uint64_t *m; 558d1139b52SConrad Meyer int slot; 559f2cc1285SJeff Roberson 5606f251ef2SDoug Moore /* 5616f251ef2SDoug Moore * Mirror the implementation of pctrie_lookup_ge, described above. 5626f251ef2SDoug Moore */ 5633c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 5646f251ef2SDoug Moore pred = NULL; 5652d2bcba7SDoug Moore for (;;) { 5666f251ef2SDoug Moore if (pctrie_isleaf(node)) { 5672d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m <= index) 568f2cc1285SJeff Roberson return (m); 5696f251ef2SDoug Moore break; 570f2cc1285SJeff Roberson } 571*ac0572e6SDoug Moore if (pctrie_keybarr(node, index, &slot)) { 5726f251ef2SDoug Moore if (node->pn_owner < index) 5736f251ef2SDoug Moore pred = node; 5746f251ef2SDoug Moore break; 575f2cc1285SJeff Roberson } 5766f251ef2SDoug Moore if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 5776f251ef2SDoug Moore pred = node; 5786f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 5793c30b235SConrad Meyer PCTRIE_LOCKED); 5808df38859SDoug Moore } 5816f251ef2SDoug Moore if (pred == NULL) 5826f251ef2SDoug Moore return (NULL); 5836f251ef2SDoug Moore if (pred != node) { 584*ac0572e6SDoug Moore slot = pctrie_slot(pred, index); 5856f251ef2SDoug Moore KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, 5866f251ef2SDoug Moore ("%s: no popmap siblings before slot %d in node %p", 5876f251ef2SDoug Moore __func__, slot, pred)); 5886f251ef2SDoug Moore slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1; 5896f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 5906f251ef2SDoug Moore PCTRIE_LOCKED); 591f2cc1285SJeff Roberson } 5926f251ef2SDoug Moore while (!pctrie_isleaf(pred)) { 5936f251ef2SDoug Moore KASSERT(pred->pn_popmap != 0, 5946f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, pred)); 5956f251ef2SDoug Moore slot = fls(pred->pn_popmap) - 1; 5966f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 5976f251ef2SDoug Moore PCTRIE_LOCKED); 5986f251ef2SDoug Moore } 5996f251ef2SDoug Moore return (pctrie_toval(pred)); 600f2cc1285SJeff Roberson } 601f2cc1285SJeff Roberson 602f2cc1285SJeff Roberson /* 603f2cc1285SJeff Roberson * Remove the specified index from the tree. 604f2cc1285SJeff Roberson * Panics if the key is not present. 605f2cc1285SJeff Roberson */ 606f2cc1285SJeff Roberson void 607f2cc1285SJeff Roberson pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) 608f2cc1285SJeff Roberson { 6092d2bcba7SDoug Moore struct pctrie_node *child, *node, *parent; 610f2cc1285SJeff Roberson uint64_t *m; 6118df38859SDoug Moore int slot; 612f2cc1285SJeff Roberson 6132d2bcba7SDoug Moore node = NULL; 6142d2bcba7SDoug Moore child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 6152d2bcba7SDoug Moore for (;;) { 6162d2bcba7SDoug Moore if (pctrie_isleaf(child)) 6172d2bcba7SDoug Moore break; 6182d2bcba7SDoug Moore parent = node; 6192d2bcba7SDoug Moore node = child; 620*ac0572e6SDoug Moore slot = pctrie_slot(node, index); 6212d2bcba7SDoug Moore child = pctrie_node_load(&node->pn_child[slot], NULL, 6222d2bcba7SDoug Moore PCTRIE_LOCKED); 6232d2bcba7SDoug Moore } 6242d2bcba7SDoug Moore if ((m = pctrie_toval(child)) == NULL || *m != index) 6252d2bcba7SDoug Moore panic("%s: key not found", __func__); 6262d2bcba7SDoug Moore if (node == NULL) { 6272d2bcba7SDoug Moore pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); 628f2cc1285SJeff Roberson return; 629f2cc1285SJeff Roberson } 6308df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 6318df38859SDoug Moore ("%s: bad popmap slot %d in node %p", 6328df38859SDoug Moore __func__, slot, node)); 6338df38859SDoug Moore node->pn_popmap ^= 1 << slot; 6342d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); 6358df38859SDoug Moore if (!powerof2(node->pn_popmap)) 6362d2bcba7SDoug Moore return; 6372d2bcba7SDoug Moore KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 6388df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 6392d2bcba7SDoug Moore child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 6402d2bcba7SDoug Moore KASSERT(child != PCTRIE_NULL, 6412d2bcba7SDoug Moore ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 642f2cc1285SJeff Roberson if (parent == NULL) 6432d2bcba7SDoug Moore pctrie_root_store(ptree, child, PCTRIE_LOCKED); 644f2cc1285SJeff Roberson else { 645*ac0572e6SDoug Moore slot = pctrie_slot(parent, index); 6462d2bcba7SDoug Moore KASSERT(node == 6472d2bcba7SDoug Moore pctrie_node_load(&parent->pn_child[slot], NULL, 6482d2bcba7SDoug Moore PCTRIE_LOCKED), ("%s: invalid child value", __func__)); 6492d2bcba7SDoug Moore pctrie_node_store(&parent->pn_child[slot], child, 6503c30b235SConrad Meyer PCTRIE_LOCKED); 651f2cc1285SJeff Roberson } 6523c30b235SConrad Meyer /* 6533c30b235SConrad Meyer * The child is still valid and we can not zero the 6543c30b235SConrad Meyer * pointer until all SMR references are gone. 6553c30b235SConrad Meyer */ 6568df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 657f2cc1285SJeff Roberson } 658f2cc1285SJeff Roberson 659f2cc1285SJeff Roberson /* 660f2cc1285SJeff Roberson * Remove and free all the nodes from the tree. 661f2cc1285SJeff Roberson * This function is recursive but there is a tight control on it as the 662f2cc1285SJeff Roberson * maximum depth of the tree is fixed. 663f2cc1285SJeff Roberson */ 664f2cc1285SJeff Roberson void 665f2cc1285SJeff Roberson pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn) 666f2cc1285SJeff Roberson { 667f2cc1285SJeff Roberson struct pctrie_node *root; 668f2cc1285SJeff Roberson 6693c30b235SConrad Meyer root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 6702d2bcba7SDoug Moore if (root == PCTRIE_NULL) 671f2cc1285SJeff Roberson return; 6722d2bcba7SDoug Moore pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 673f2cc1285SJeff Roberson if (!pctrie_isleaf(root)) 674f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(ptree, root, freefn); 675f2cc1285SJeff Roberson } 676f2cc1285SJeff Roberson 677f2cc1285SJeff Roberson #ifdef DDB 678f2cc1285SJeff Roberson /* 679f2cc1285SJeff Roberson * Show details about the given node. 680f2cc1285SJeff Roberson */ 681f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 682f2cc1285SJeff Roberson { 6833c30b235SConrad Meyer struct pctrie_node *node, *tmp; 6848df38859SDoug Moore int slot; 6858df38859SDoug Moore pn_popmap_t popmap; 686f2cc1285SJeff Roberson 687f2cc1285SJeff Roberson if (!have_addr) 688f2cc1285SJeff Roberson return; 689f2cc1285SJeff Roberson node = (struct pctrie_node *)addr; 6908df38859SDoug Moore db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 6918df38859SDoug Moore (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 69238f5cb1bSDoug Moore node->pn_clev / PCTRIE_WIDTH); 6938df38859SDoug Moore for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 6948df38859SDoug Moore slot = ffs(popmap) - 1; 6958df38859SDoug Moore tmp = pctrie_node_load(&node->pn_child[slot], NULL, 6963c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 697f2cc1285SJeff Roberson db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 6988df38859SDoug Moore slot, (void *)tmp, 6993c30b235SConrad Meyer pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 70038f5cb1bSDoug Moore node->pn_clev / PCTRIE_WIDTH); 701f2cc1285SJeff Roberson } 7023c30b235SConrad Meyer } 703f2cc1285SJeff Roberson #endif /* DDB */ 704