18a36da99SPedro F. Giffuni /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 4f2cc1285SJeff Roberson * Copyright (c) 2013 EMC Corp. 5f2cc1285SJeff Roberson * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6f2cc1285SJeff Roberson * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7f2cc1285SJeff Roberson * All rights reserved. 8f2cc1285SJeff Roberson * 9f2cc1285SJeff Roberson * Redistribution and use in source and binary forms, with or without 10f2cc1285SJeff Roberson * modification, are permitted provided that the following conditions 11f2cc1285SJeff Roberson * are met: 12f2cc1285SJeff Roberson * 1. Redistributions of source code must retain the above copyright 13f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer. 14f2cc1285SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 15f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer in the 16f2cc1285SJeff Roberson * documentation and/or other materials provided with the distribution. 17f2cc1285SJeff Roberson * 18f2cc1285SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19f2cc1285SJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20f2cc1285SJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21f2cc1285SJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22f2cc1285SJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23f2cc1285SJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24f2cc1285SJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25f2cc1285SJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26f2cc1285SJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27f2cc1285SJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28f2cc1285SJeff Roberson * SUCH DAMAGE. 29f2cc1285SJeff Roberson * 30f2cc1285SJeff Roberson */ 31f2cc1285SJeff Roberson 32f2cc1285SJeff Roberson /* 33f2cc1285SJeff Roberson * Path-compressed radix trie implementation. 34f2cc1285SJeff Roberson * 35f2cc1285SJeff Roberson * The implementation takes into account the following rationale: 36f2cc1285SJeff Roberson * - Size of the nodes should be as small as possible but still big enough 37f2cc1285SJeff Roberson * to avoid a large maximum depth for the trie. This is a balance 38f2cc1285SJeff Roberson * between the necessity to not wire too much physical memory for the nodes 39f2cc1285SJeff Roberson * and the necessity to avoid too much cache pollution during the trie 40f2cc1285SJeff Roberson * operations. 41f2cc1285SJeff Roberson * - There is not a huge bias toward the number of lookup operations over 42f2cc1285SJeff Roberson * the number of insert and remove operations. This basically implies 43f2cc1285SJeff Roberson * that optimizations supposedly helping one operation but hurting the 44f2cc1285SJeff Roberson * other might be carefully evaluated. 45f2cc1285SJeff Roberson * - On average not many nodes are expected to be fully populated, hence 46f2cc1285SJeff Roberson * level compression may just complicate things. 47f2cc1285SJeff Roberson */ 48f2cc1285SJeff Roberson 49f2cc1285SJeff Roberson #include <sys/cdefs.h> 50f2cc1285SJeff Roberson __FBSDID("$FreeBSD$"); 51f2cc1285SJeff Roberson 52f2cc1285SJeff Roberson #include "opt_ddb.h" 53f2cc1285SJeff Roberson 54f2cc1285SJeff Roberson #include <sys/param.h> 55f2cc1285SJeff Roberson #include <sys/systm.h> 56f2cc1285SJeff Roberson #include <sys/kernel.h> 5705963ea4SDoug Moore #include <sys/libkern.h> 58f2cc1285SJeff Roberson #include <sys/pctrie.h> 593c30b235SConrad Meyer #include <sys/proc.h> /* smr.h depends on struct thread. */ 603c30b235SConrad Meyer #include <sys/smr.h> 613c30b235SConrad Meyer #include <sys/smr_types.h> 62f2cc1285SJeff Roberson 63f2cc1285SJeff Roberson #ifdef DDB 64f2cc1285SJeff Roberson #include <ddb/ddb.h> 65f2cc1285SJeff Roberson #endif 66f2cc1285SJeff Roberson 67f2cc1285SJeff Roberson #define PCTRIE_MASK (PCTRIE_COUNT - 1) 6855e0987aSPedro F. Giffuni #define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) 69f2cc1285SJeff Roberson 708df38859SDoug Moore #if PCTRIE_WIDTH == 3 718df38859SDoug Moore typedef uint8_t pn_popmap_t; 728df38859SDoug Moore #elif PCTRIE_WIDTH == 4 738df38859SDoug Moore typedef uint16_t pn_popmap_t; 748df38859SDoug Moore #elif PCTRIE_WIDTH == 5 758df38859SDoug Moore typedef uint32_t pn_popmap_t; 768df38859SDoug Moore #else 778df38859SDoug Moore #error Unsupported width 788df38859SDoug Moore #endif 798df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 808df38859SDoug Moore "pn_popmap_t too wide"); 818df38859SDoug Moore 822d2bcba7SDoug Moore /* Set of all flag bits stored in node pointers. */ 832d2bcba7SDoug Moore #define PCTRIE_FLAGS (PCTRIE_ISLEAF) 84f2cc1285SJeff Roberson #define PCTRIE_PAD PCTRIE_FLAGS 85f2cc1285SJeff Roberson 863c30b235SConrad Meyer struct pctrie_node; 873c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 883c30b235SConrad Meyer 89f2cc1285SJeff Roberson struct pctrie_node { 90f2cc1285SJeff Roberson uint64_t pn_owner; /* Owner of record. */ 918df38859SDoug Moore pn_popmap_t pn_popmap; /* Valid children. */ 92*38f5cb1bSDoug Moore uint8_t pn_clev; /* Level * WIDTH. */ 933c30b235SConrad Meyer smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 94f2cc1285SJeff Roberson }; 95f2cc1285SJeff Roberson 963c30b235SConrad Meyer enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 973c30b235SConrad Meyer 983c30b235SConrad Meyer static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, 993c30b235SConrad Meyer enum pctrie_access access); 1003c30b235SConrad Meyer 101f2cc1285SJeff Roberson /* 102da72505fSDoug Moore * Return the position in the array for a given level. 103da72505fSDoug Moore */ 104da72505fSDoug Moore static __inline int 105da72505fSDoug Moore pctrie_slot(uint64_t index, uint16_t level) 106da72505fSDoug Moore { 107*38f5cb1bSDoug Moore return ((index >> level) & PCTRIE_MASK); 108da72505fSDoug Moore } 109da72505fSDoug Moore 110*38f5cb1bSDoug Moore /* Computes the key (index) with the low-order 'level' + 1 radix-digits zeroed. */ 111da72505fSDoug Moore static __inline uint64_t 112da72505fSDoug Moore pctrie_trimkey(uint64_t index, uint16_t level) 113da72505fSDoug Moore { 114*38f5cb1bSDoug Moore return (index & -((uint64_t)PCTRIE_COUNT << level)); 115da72505fSDoug Moore } 116da72505fSDoug Moore 117da72505fSDoug Moore /* 118f2cc1285SJeff Roberson * Allocate a node. Pre-allocation should ensure that the request 119f2cc1285SJeff Roberson * will always be satisfied. 120f2cc1285SJeff Roberson */ 1213c30b235SConrad Meyer static struct pctrie_node * 122da72505fSDoug Moore pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index, 123*38f5cb1bSDoug Moore uint64_t newind) 124f2cc1285SJeff Roberson { 125f2cc1285SJeff Roberson struct pctrie_node *node; 126f2cc1285SJeff Roberson 127f2cc1285SJeff Roberson node = allocfn(ptree); 128f2cc1285SJeff Roberson if (node == NULL) 129f2cc1285SJeff Roberson return (NULL); 1303c30b235SConrad Meyer 1313c30b235SConrad Meyer /* 1323c30b235SConrad Meyer * We want to clear the last child pointer after the final section 1333c30b235SConrad Meyer * has exited so lookup can not return false negatives. It is done 1343c30b235SConrad Meyer * here because it will be cache-cold in the dtor callback. 1353c30b235SConrad Meyer */ 1368df38859SDoug Moore if (node->pn_popmap != 0) { 1378df38859SDoug Moore pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1], 1382d2bcba7SDoug Moore PCTRIE_NULL, PCTRIE_UNSERIALIZED); 1398df38859SDoug Moore node->pn_popmap = 0; 1403c30b235SConrad Meyer } 141*38f5cb1bSDoug Moore 142*38f5cb1bSDoug Moore /* 143*38f5cb1bSDoug Moore * From the highest-order bit where the indexes differ, 144*38f5cb1bSDoug Moore * compute the highest level in the trie where they differ. Then, 145*38f5cb1bSDoug Moore * compute the least index of this subtrie. 146*38f5cb1bSDoug Moore */ 147*38f5cb1bSDoug Moore KASSERT(index != newind, ("%s: passing the same key value %jx", 148*38f5cb1bSDoug Moore __func__, (uintmax_t)index)); 149*38f5cb1bSDoug Moore _Static_assert(sizeof(long long) >= sizeof(uint64_t), 150*38f5cb1bSDoug Moore "uint64 too wide"); 151*38f5cb1bSDoug Moore _Static_assert(sizeof(uint64_t) * NBBY <= 152*38f5cb1bSDoug Moore (1 << (sizeof(node->pn_clev) * NBBY)), "pn_clev too narrow"); 153*38f5cb1bSDoug Moore node->pn_clev = rounddown(flsll(index ^ newind) - 1, PCTRIE_WIDTH); 154*38f5cb1bSDoug Moore node->pn_owner = pctrie_trimkey(index, node->pn_clev); 155f2cc1285SJeff Roberson return (node); 156f2cc1285SJeff Roberson } 157f2cc1285SJeff Roberson 158f2cc1285SJeff Roberson /* 159f2cc1285SJeff Roberson * Free radix node. 160f2cc1285SJeff Roberson */ 161f2cc1285SJeff Roberson static __inline void 162f2cc1285SJeff Roberson pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node, 1638df38859SDoug Moore pctrie_free_t freefn) 164f2cc1285SJeff Roberson { 165f2cc1285SJeff Roberson #ifdef INVARIANTS 166f2cc1285SJeff Roberson int slot; 167f2cc1285SJeff Roberson 1688df38859SDoug Moore KASSERT(powerof2(node->pn_popmap), 1698df38859SDoug Moore ("pctrie_node_put: node %p has too many children %04x", node, 1708df38859SDoug Moore node->pn_popmap)); 1713c30b235SConrad Meyer for (slot = 0; slot < PCTRIE_COUNT; slot++) { 1728df38859SDoug Moore if ((node->pn_popmap & (1 << slot)) != 0) 1733c30b235SConrad Meyer continue; 1743c30b235SConrad Meyer KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == 1752d2bcba7SDoug Moore PCTRIE_NULL, 1762d2bcba7SDoug Moore ("pctrie_node_put: node %p has a child", node)); 1773c30b235SConrad Meyer } 178f2cc1285SJeff Roberson #endif 179f2cc1285SJeff Roberson freefn(ptree, node); 180f2cc1285SJeff Roberson } 181f2cc1285SJeff Roberson 182f2cc1285SJeff Roberson /* 1833c30b235SConrad Meyer * Fetch a node pointer from a slot. 1843c30b235SConrad Meyer */ 1853c30b235SConrad Meyer static __inline struct pctrie_node * 1863c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 1873c30b235SConrad Meyer { 1883c30b235SConrad Meyer switch (access) { 1893c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 1903c30b235SConrad Meyer return (smr_unserialized_load(p, true)); 1913c30b235SConrad Meyer case PCTRIE_LOCKED: 1923c30b235SConrad Meyer return (smr_serialized_load(p, true)); 1933c30b235SConrad Meyer case PCTRIE_SMR: 1943c30b235SConrad Meyer return (smr_entered_load(p, smr)); 1953c30b235SConrad Meyer } 1963c30b235SConrad Meyer __assert_unreachable(); 1973c30b235SConrad Meyer } 1983c30b235SConrad Meyer 1993c30b235SConrad Meyer static __inline void 2003c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 2013c30b235SConrad Meyer { 2023c30b235SConrad Meyer switch (access) { 2033c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 2043c30b235SConrad Meyer smr_unserialized_store(p, v, true); 2053c30b235SConrad Meyer break; 2063c30b235SConrad Meyer case PCTRIE_LOCKED: 2073c30b235SConrad Meyer smr_serialized_store(p, v, true); 2083c30b235SConrad Meyer break; 2093c30b235SConrad Meyer case PCTRIE_SMR: 2103c30b235SConrad Meyer panic("%s: Not supported in SMR section.", __func__); 2113c30b235SConrad Meyer break; 2123c30b235SConrad Meyer default: 2133c30b235SConrad Meyer __assert_unreachable(); 2143c30b235SConrad Meyer break; 2153c30b235SConrad Meyer } 2163c30b235SConrad Meyer } 2173c30b235SConrad Meyer 2183c30b235SConrad Meyer /* 219f2cc1285SJeff Roberson * Get the root node for a tree. 220f2cc1285SJeff Roberson */ 221f2cc1285SJeff Roberson static __inline struct pctrie_node * 2223c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 223f2cc1285SJeff Roberson { 2243c30b235SConrad Meyer return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access)); 225f2cc1285SJeff Roberson } 226f2cc1285SJeff Roberson 227f2cc1285SJeff Roberson /* 228f2cc1285SJeff Roberson * Set the root node for a tree. 229f2cc1285SJeff Roberson */ 230f2cc1285SJeff Roberson static __inline void 2313c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node, 2323c30b235SConrad Meyer enum pctrie_access access) 233f2cc1285SJeff Roberson { 2343c30b235SConrad Meyer pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access); 235f2cc1285SJeff Roberson } 236f2cc1285SJeff Roberson 237f2cc1285SJeff Roberson /* 238f2cc1285SJeff Roberson * Returns TRUE if the specified node is a leaf and FALSE otherwise. 239f2cc1285SJeff Roberson */ 24004f9afaeSConrad Meyer static __inline bool 241f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node) 242f2cc1285SJeff Roberson { 243f2cc1285SJeff Roberson 244f2cc1285SJeff Roberson return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 245f2cc1285SJeff Roberson } 246f2cc1285SJeff Roberson 247f2cc1285SJeff Roberson /* 2489cfed089SDoug Moore * Returns val with leaf bit set. 2499cfed089SDoug Moore */ 2509cfed089SDoug Moore static __inline void * 2519cfed089SDoug Moore pctrie_toleaf(uint64_t *val) 2529cfed089SDoug Moore { 2539cfed089SDoug Moore return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 2549cfed089SDoug Moore } 2559cfed089SDoug Moore 2569cfed089SDoug Moore /* 257f2cc1285SJeff Roberson * Returns the associated val extracted from node. 258f2cc1285SJeff Roberson */ 259f2cc1285SJeff Roberson static __inline uint64_t * 260f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node) 261f2cc1285SJeff Roberson { 262f2cc1285SJeff Roberson 263f2cc1285SJeff Roberson return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 264f2cc1285SJeff Roberson } 265f2cc1285SJeff Roberson 266f2cc1285SJeff Roberson /* 26716e01c05SDoug Moore * Make 'child' a child of 'node'. 268f2cc1285SJeff Roberson */ 269f2cc1285SJeff Roberson static __inline void 270*38f5cb1bSDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index, 27116e01c05SDoug Moore struct pctrie_node *child, enum pctrie_access access) 272f2cc1285SJeff Roberson { 273f2cc1285SJeff Roberson int slot; 274f2cc1285SJeff Roberson 275*38f5cb1bSDoug Moore slot = pctrie_slot(index, node->pn_clev); 27616e01c05SDoug Moore pctrie_node_store(&node->pn_child[slot], child, access); 2778df38859SDoug Moore node->pn_popmap ^= 1 << slot; 2788df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 2798df38859SDoug Moore ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 280f2cc1285SJeff Roberson } 281f2cc1285SJeff Roberson 282f2cc1285SJeff Roberson /* 283f2cc1285SJeff Roberson * Returns TRUE if it can be determined that key does not belong to the 284f2cc1285SJeff Roberson * specified node. Otherwise, returns FALSE. 285f2cc1285SJeff Roberson */ 28604f9afaeSConrad Meyer static __inline bool 287f2cc1285SJeff Roberson pctrie_keybarr(struct pctrie_node *node, uint64_t idx) 288f2cc1285SJeff Roberson { 289*38f5cb1bSDoug Moore idx = pctrie_trimkey(idx, node->pn_clev); 290f2cc1285SJeff Roberson return (idx != node->pn_owner); 291f2cc1285SJeff Roberson } 292f2cc1285SJeff Roberson 293f2cc1285SJeff Roberson /* 294f2cc1285SJeff Roberson * Internal helper for pctrie_reclaim_allnodes(). 295f2cc1285SJeff Roberson * This function is recursive. 296f2cc1285SJeff Roberson */ 297f2cc1285SJeff Roberson static void 298f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node, 299f2cc1285SJeff Roberson pctrie_free_t freefn) 300f2cc1285SJeff Roberson { 3013c30b235SConrad Meyer struct pctrie_node *child; 302f2cc1285SJeff Roberson int slot; 303f2cc1285SJeff Roberson 3048df38859SDoug Moore while (node->pn_popmap != 0) { 3058df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 3063c30b235SConrad Meyer child = pctrie_node_load(&node->pn_child[slot], NULL, 3073c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 3082d2bcba7SDoug Moore KASSERT(child != PCTRIE_NULL, 3092d2bcba7SDoug Moore ("%s: bad popmap slot %d in node %p", 3108df38859SDoug Moore __func__, slot, node)); 3113c30b235SConrad Meyer if (!pctrie_isleaf(child)) 3123c30b235SConrad Meyer pctrie_reclaim_allnodes_int(ptree, child, freefn); 3138df38859SDoug Moore node->pn_popmap ^= 1 << slot; 3142d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, 3153c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 316f2cc1285SJeff Roberson } 3178df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 318f2cc1285SJeff Roberson } 319f2cc1285SJeff Roberson 320f2cc1285SJeff Roberson /* 321f2cc1285SJeff Roberson * pctrie node zone initializer. 322f2cc1285SJeff Roberson */ 323f2cc1285SJeff Roberson int 324f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused) 325f2cc1285SJeff Roberson { 326f2cc1285SJeff Roberson struct pctrie_node *node; 327f2cc1285SJeff Roberson 328f2cc1285SJeff Roberson node = mem; 3298df38859SDoug Moore node->pn_popmap = 0; 3302d2bcba7SDoug Moore for (int i = 0; i < nitems(node->pn_child); i++) 3312d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, 3322d2bcba7SDoug Moore PCTRIE_UNSERIALIZED); 333f2cc1285SJeff Roberson return (0); 334f2cc1285SJeff Roberson } 335f2cc1285SJeff Roberson 336f2cc1285SJeff Roberson size_t 337f2cc1285SJeff Roberson pctrie_node_size(void) 338f2cc1285SJeff Roberson { 339f2cc1285SJeff Roberson 340f2cc1285SJeff Roberson return (sizeof(struct pctrie_node)); 341f2cc1285SJeff Roberson } 342f2cc1285SJeff Roberson 343f2cc1285SJeff Roberson /* 344f2cc1285SJeff Roberson * Inserts the key-value pair into the trie. 345f2cc1285SJeff Roberson * Panics if the key already exists. 346f2cc1285SJeff Roberson */ 347f2cc1285SJeff Roberson int 348f2cc1285SJeff Roberson pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) 349f2cc1285SJeff Roberson { 350f2cc1285SJeff Roberson uint64_t index, newind; 3512d2bcba7SDoug Moore struct pctrie_node *leaf, *node, *parent; 3523c30b235SConrad Meyer smr_pctnode_t *parentp; 353f2cc1285SJeff Roberson int slot; 354f2cc1285SJeff Roberson 355f2cc1285SJeff Roberson index = *val; 35616e01c05SDoug Moore leaf = pctrie_toleaf(val); 357f2cc1285SJeff Roberson 358f2cc1285SJeff Roberson /* 359f2cc1285SJeff Roberson * The owner of record for root is not really important because it 360f2cc1285SJeff Roberson * will never be used. 361f2cc1285SJeff Roberson */ 3623c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 3632d2bcba7SDoug Moore parent = NULL; 3642d2bcba7SDoug Moore for (;;) { 3652d2bcba7SDoug Moore if (pctrie_isleaf(node)) { 3662d2bcba7SDoug Moore if (node == PCTRIE_NULL) { 3672d2bcba7SDoug Moore if (parent == NULL) 3682d2bcba7SDoug Moore ptree->pt_root = leaf; 3692d2bcba7SDoug Moore else 370*38f5cb1bSDoug Moore pctrie_addnode(parent, index, leaf, 3712d2bcba7SDoug Moore PCTRIE_LOCKED); 372f2cc1285SJeff Roberson return (0); 373f2cc1285SJeff Roberson } 37416e01c05SDoug Moore newind = *pctrie_toval(node); 37516e01c05SDoug Moore if (newind == index) 376f2cc1285SJeff Roberson panic("%s: key %jx is already present", 377f2cc1285SJeff Roberson __func__, (uintmax_t)index); 378f2cc1285SJeff Roberson break; 3792d2bcba7SDoug Moore } 3802d2bcba7SDoug Moore if (pctrie_keybarr(node, index)) { 38116e01c05SDoug Moore newind = node->pn_owner; 38216e01c05SDoug Moore break; 38316e01c05SDoug Moore } 384f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 3852d2bcba7SDoug Moore parent = node; 3862d2bcba7SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 3873c30b235SConrad Meyer PCTRIE_LOCKED); 388f2cc1285SJeff Roberson } 389f2cc1285SJeff Roberson 390f2cc1285SJeff Roberson /* 391f2cc1285SJeff Roberson * A new node is needed because the right insertion level is reached. 392f2cc1285SJeff Roberson * Setup the new intermediate node and add the 2 children: the 39316e01c05SDoug Moore * new object and the older edge or object. 394f2cc1285SJeff Roberson */ 3952d2bcba7SDoug Moore parentp = (parent != NULL) ? &parent->pn_child[slot]: 3962d2bcba7SDoug Moore (smr_pctnode_t *)&ptree->pt_root; 397*38f5cb1bSDoug Moore parent = pctrie_node_get(ptree, allocfn, index, newind); 3982d2bcba7SDoug Moore if (parent == NULL) 399f2cc1285SJeff Roberson return (ENOMEM); 4003c30b235SConrad Meyer /* These writes are not yet visible due to ordering. */ 401*38f5cb1bSDoug Moore pctrie_addnode(parent, index, leaf, PCTRIE_UNSERIALIZED); 402*38f5cb1bSDoug Moore pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED); 4033c30b235SConrad Meyer /* Synchronize to make the above visible. */ 4042d2bcba7SDoug Moore pctrie_node_store(parentp, parent, PCTRIE_LOCKED); 405f2cc1285SJeff Roberson return (0); 406f2cc1285SJeff Roberson } 407f2cc1285SJeff Roberson 408f2cc1285SJeff Roberson /* 409f2cc1285SJeff Roberson * Returns the value stored at the index. If the index is not present, 410f2cc1285SJeff Roberson * NULL is returned. 411f2cc1285SJeff Roberson */ 4123c30b235SConrad Meyer static __always_inline uint64_t * 4133c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 4143c30b235SConrad Meyer enum pctrie_access access) 415f2cc1285SJeff Roberson { 416f2cc1285SJeff Roberson struct pctrie_node *node; 417f2cc1285SJeff Roberson uint64_t *m; 418f2cc1285SJeff Roberson int slot; 419f2cc1285SJeff Roberson 4203c30b235SConrad Meyer node = pctrie_root_load(ptree, smr, access); 4212d2bcba7SDoug Moore for (;;) { 422f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 4232d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m == index) 424f2cc1285SJeff Roberson return (m); 425f2cc1285SJeff Roberson break; 4263c30b235SConrad Meyer } 4273c30b235SConrad Meyer if (pctrie_keybarr(node, index)) 428f2cc1285SJeff Roberson break; 429f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 4303c30b235SConrad Meyer node = pctrie_node_load(&node->pn_child[slot], smr, access); 431f2cc1285SJeff Roberson } 432f2cc1285SJeff Roberson return (NULL); 433f2cc1285SJeff Roberson } 434f2cc1285SJeff Roberson 435f2cc1285SJeff Roberson /* 4363c30b235SConrad Meyer * Returns the value stored at the index, assuming access is externally 4373c30b235SConrad Meyer * synchronized by a lock. 4383c30b235SConrad Meyer * 4393c30b235SConrad Meyer * If the index is not present, NULL is returned. 4403c30b235SConrad Meyer */ 4413c30b235SConrad Meyer uint64_t * 4423c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index) 4433c30b235SConrad Meyer { 4443c30b235SConrad Meyer return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 4453c30b235SConrad Meyer } 4463c30b235SConrad Meyer 4473c30b235SConrad Meyer /* 4483c30b235SConrad Meyer * Returns the value stored at the index without requiring an external lock. 4493c30b235SConrad Meyer * 4503c30b235SConrad Meyer * If the index is not present, NULL is returned. 4513c30b235SConrad Meyer */ 4523c30b235SConrad Meyer uint64_t * 4533c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 4543c30b235SConrad Meyer { 4553c30b235SConrad Meyer uint64_t *res; 4563c30b235SConrad Meyer 4573c30b235SConrad Meyer smr_enter(smr); 4583c30b235SConrad Meyer res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 4593c30b235SConrad Meyer smr_exit(smr); 4603c30b235SConrad Meyer return (res); 4613c30b235SConrad Meyer } 4623c30b235SConrad Meyer 4633c30b235SConrad Meyer /* 4646f251ef2SDoug Moore * Returns the value with the least index that is greater than or equal to the 4656f251ef2SDoug Moore * specified index, or NULL if there are no such values. 4666f251ef2SDoug Moore * 4676f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 468f2cc1285SJeff Roberson */ 469f2cc1285SJeff Roberson uint64_t * 470f2cc1285SJeff Roberson pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 471f2cc1285SJeff Roberson { 4726f251ef2SDoug Moore struct pctrie_node *node, *succ; 473f2cc1285SJeff Roberson uint64_t *m; 474d1139b52SConrad Meyer int slot; 475f2cc1285SJeff Roberson 4766f251ef2SDoug Moore /* 4776f251ef2SDoug Moore * Descend the trie as if performing an ordinary lookup for the 4786f251ef2SDoug Moore * specified value. However, unlike an ordinary lookup, as we descend 4796f251ef2SDoug Moore * the trie, we use "succ" to remember the last branching-off point, 4806f251ef2SDoug Moore * that is, the interior node under which the least value that is both 4816f251ef2SDoug Moore * outside our current path down the trie and greater than the specified 4826f251ef2SDoug Moore * index resides. (The node's popmap makes it fast and easy to 4836f251ef2SDoug Moore * recognize a branching-off point.) If our ordinary lookup fails to 4846f251ef2SDoug Moore * yield a value that is greater than or equal to the specified index, 4856f251ef2SDoug Moore * then we will exit this loop and perform a lookup starting from 4866f251ef2SDoug Moore * "succ". If "succ" is not NULL, then that lookup is guaranteed to 4876f251ef2SDoug Moore * succeed. 4886f251ef2SDoug Moore */ 4893c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 4906f251ef2SDoug Moore succ = NULL; 4912d2bcba7SDoug Moore for (;;) { 4926f251ef2SDoug Moore if (pctrie_isleaf(node)) { 4932d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m >= index) 494f2cc1285SJeff Roberson return (m); 4956f251ef2SDoug Moore break; 496f2cc1285SJeff Roberson } 497f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 498f2cc1285SJeff Roberson /* 4996f251ef2SDoug Moore * If all values in this subtree are > index, then the 5006f251ef2SDoug Moore * least value in this subtree is the answer. 501f2cc1285SJeff Roberson */ 5026f251ef2SDoug Moore if (node->pn_owner > index) 5036f251ef2SDoug Moore succ = node; 5046f251ef2SDoug Moore break; 505f2cc1285SJeff Roberson } 506f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 507f2cc1285SJeff Roberson 508f2cc1285SJeff Roberson /* 5096f251ef2SDoug Moore * Just in case the next search step leads to a subtree of all 5106f251ef2SDoug Moore * values < index, check popmap to see if a next bigger step, to 5116f251ef2SDoug Moore * a subtree of all pages with values > index, is available. If 5126f251ef2SDoug Moore * so, remember to restart the search here. 513f2cc1285SJeff Roberson */ 5146f251ef2SDoug Moore if ((node->pn_popmap >> slot) > 1) 5156f251ef2SDoug Moore succ = node; 5166f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 5176f251ef2SDoug Moore PCTRIE_LOCKED); 518f2cc1285SJeff Roberson } 519f2cc1285SJeff Roberson 520f2cc1285SJeff Roberson /* 5216f251ef2SDoug Moore * Restart the search from the last place visited in the subtree that 5226f251ef2SDoug Moore * included some values > index, if there was such a place. 5236f251ef2SDoug Moore */ 5246f251ef2SDoug Moore if (succ == NULL) 5256f251ef2SDoug Moore return (NULL); 5266f251ef2SDoug Moore if (succ != node) { 5276f251ef2SDoug Moore /* 5286f251ef2SDoug Moore * Take a step to the next bigger sibling of the node chosen 5296f251ef2SDoug Moore * last time. In that subtree, all values > index. 5306f251ef2SDoug Moore */ 5316f251ef2SDoug Moore slot = pctrie_slot(index, succ->pn_clev) + 1; 5326f251ef2SDoug Moore KASSERT((succ->pn_popmap >> slot) != 0, 5336f251ef2SDoug Moore ("%s: no popmap siblings past slot %d in node %p", 5346f251ef2SDoug Moore __func__, slot, succ)); 5356f251ef2SDoug Moore slot += ffs(succ->pn_popmap >> slot) - 1; 5366f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 5376f251ef2SDoug Moore PCTRIE_LOCKED); 5386f251ef2SDoug Moore } 5396f251ef2SDoug Moore 5406f251ef2SDoug Moore /* 5416f251ef2SDoug Moore * Find the value in the subtree rooted at "succ" with the least index. 5426f251ef2SDoug Moore */ 5436f251ef2SDoug Moore while (!pctrie_isleaf(succ)) { 5446f251ef2SDoug Moore KASSERT(succ->pn_popmap != 0, 5456f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, succ)); 5466f251ef2SDoug Moore slot = ffs(succ->pn_popmap) - 1; 5476f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 5486f251ef2SDoug Moore PCTRIE_LOCKED); 5496f251ef2SDoug Moore } 5506f251ef2SDoug Moore return (pctrie_toval(succ)); 5516f251ef2SDoug Moore } 5526f251ef2SDoug Moore 5536f251ef2SDoug Moore /* 5546f251ef2SDoug Moore * Returns the value with the greatest index that is less than or equal to the 5556f251ef2SDoug Moore * specified index, or NULL if there are no such values. 5566f251ef2SDoug Moore * 5576f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 558f2cc1285SJeff Roberson */ 559f2cc1285SJeff Roberson uint64_t * 560f2cc1285SJeff Roberson pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 561f2cc1285SJeff Roberson { 5626f251ef2SDoug Moore struct pctrie_node *node, *pred; 563f2cc1285SJeff Roberson uint64_t *m; 564d1139b52SConrad Meyer int slot; 565f2cc1285SJeff Roberson 5666f251ef2SDoug Moore /* 5676f251ef2SDoug Moore * Mirror the implementation of pctrie_lookup_ge, described above. 5686f251ef2SDoug Moore */ 5693c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 5706f251ef2SDoug Moore pred = NULL; 5712d2bcba7SDoug Moore for (;;) { 5726f251ef2SDoug Moore if (pctrie_isleaf(node)) { 5732d2bcba7SDoug Moore if ((m = pctrie_toval(node)) != NULL && *m <= index) 574f2cc1285SJeff Roberson return (m); 5756f251ef2SDoug Moore break; 576f2cc1285SJeff Roberson } 577f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 5786f251ef2SDoug Moore if (node->pn_owner < index) 5796f251ef2SDoug Moore pred = node; 5806f251ef2SDoug Moore break; 581f2cc1285SJeff Roberson } 582f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 5836f251ef2SDoug Moore if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 5846f251ef2SDoug Moore pred = node; 5856f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 5863c30b235SConrad Meyer PCTRIE_LOCKED); 5878df38859SDoug Moore } 5886f251ef2SDoug Moore if (pred == NULL) 5896f251ef2SDoug Moore return (NULL); 5906f251ef2SDoug Moore if (pred != node) { 5916f251ef2SDoug Moore slot = pctrie_slot(index, pred->pn_clev); 5926f251ef2SDoug Moore KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, 5936f251ef2SDoug Moore ("%s: no popmap siblings before slot %d in node %p", 5946f251ef2SDoug Moore __func__, slot, pred)); 5956f251ef2SDoug Moore slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1; 5966f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 5976f251ef2SDoug Moore PCTRIE_LOCKED); 598f2cc1285SJeff Roberson } 5996f251ef2SDoug Moore while (!pctrie_isleaf(pred)) { 6006f251ef2SDoug Moore KASSERT(pred->pn_popmap != 0, 6016f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, pred)); 6026f251ef2SDoug Moore slot = fls(pred->pn_popmap) - 1; 6036f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 6046f251ef2SDoug Moore PCTRIE_LOCKED); 6056f251ef2SDoug Moore } 6066f251ef2SDoug Moore return (pctrie_toval(pred)); 607f2cc1285SJeff Roberson } 608f2cc1285SJeff Roberson 609f2cc1285SJeff Roberson /* 610f2cc1285SJeff Roberson * Remove the specified index from the tree. 611f2cc1285SJeff Roberson * Panics if the key is not present. 612f2cc1285SJeff Roberson */ 613f2cc1285SJeff Roberson void 614f2cc1285SJeff Roberson pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) 615f2cc1285SJeff Roberson { 6162d2bcba7SDoug Moore struct pctrie_node *child, *node, *parent; 617f2cc1285SJeff Roberson uint64_t *m; 6188df38859SDoug Moore int slot; 619f2cc1285SJeff Roberson 6202d2bcba7SDoug Moore node = NULL; 6212d2bcba7SDoug Moore child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 6222d2bcba7SDoug Moore for (;;) { 6232d2bcba7SDoug Moore if (pctrie_isleaf(child)) 6242d2bcba7SDoug Moore break; 6252d2bcba7SDoug Moore parent = node; 6262d2bcba7SDoug Moore node = child; 6272d2bcba7SDoug Moore slot = pctrie_slot(index, node->pn_clev); 6282d2bcba7SDoug Moore child = pctrie_node_load(&node->pn_child[slot], NULL, 6292d2bcba7SDoug Moore PCTRIE_LOCKED); 6302d2bcba7SDoug Moore } 6312d2bcba7SDoug Moore if ((m = pctrie_toval(child)) == NULL || *m != index) 6322d2bcba7SDoug Moore panic("%s: key not found", __func__); 6332d2bcba7SDoug Moore if (node == NULL) { 6342d2bcba7SDoug Moore pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); 635f2cc1285SJeff Roberson return; 636f2cc1285SJeff Roberson } 6378df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 6388df38859SDoug Moore ("%s: bad popmap slot %d in node %p", 6398df38859SDoug Moore __func__, slot, node)); 6408df38859SDoug Moore node->pn_popmap ^= 1 << slot; 6412d2bcba7SDoug Moore pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); 6428df38859SDoug Moore if (!powerof2(node->pn_popmap)) 6432d2bcba7SDoug Moore return; 6442d2bcba7SDoug Moore KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); 6458df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 6462d2bcba7SDoug Moore child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); 6472d2bcba7SDoug Moore KASSERT(child != PCTRIE_NULL, 6482d2bcba7SDoug Moore ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 649f2cc1285SJeff Roberson if (parent == NULL) 6502d2bcba7SDoug Moore pctrie_root_store(ptree, child, PCTRIE_LOCKED); 651f2cc1285SJeff Roberson else { 652f2cc1285SJeff Roberson slot = pctrie_slot(index, parent->pn_clev); 6532d2bcba7SDoug Moore KASSERT(node == 6542d2bcba7SDoug Moore pctrie_node_load(&parent->pn_child[slot], NULL, 6552d2bcba7SDoug Moore PCTRIE_LOCKED), ("%s: invalid child value", __func__)); 6562d2bcba7SDoug Moore pctrie_node_store(&parent->pn_child[slot], child, 6573c30b235SConrad Meyer PCTRIE_LOCKED); 658f2cc1285SJeff Roberson } 6593c30b235SConrad Meyer /* 6603c30b235SConrad Meyer * The child is still valid and we can not zero the 6613c30b235SConrad Meyer * pointer until all SMR references are gone. 6623c30b235SConrad Meyer */ 6638df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 664f2cc1285SJeff Roberson } 665f2cc1285SJeff Roberson 666f2cc1285SJeff Roberson /* 667f2cc1285SJeff Roberson * Remove and free all the nodes from the tree. 668f2cc1285SJeff Roberson * This function is recursive but there is a tight control on it as the 669f2cc1285SJeff Roberson * maximum depth of the tree is fixed. 670f2cc1285SJeff Roberson */ 671f2cc1285SJeff Roberson void 672f2cc1285SJeff Roberson pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn) 673f2cc1285SJeff Roberson { 674f2cc1285SJeff Roberson struct pctrie_node *root; 675f2cc1285SJeff Roberson 6763c30b235SConrad Meyer root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 6772d2bcba7SDoug Moore if (root == PCTRIE_NULL) 678f2cc1285SJeff Roberson return; 6792d2bcba7SDoug Moore pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED); 680f2cc1285SJeff Roberson if (!pctrie_isleaf(root)) 681f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(ptree, root, freefn); 682f2cc1285SJeff Roberson } 683f2cc1285SJeff Roberson 684f2cc1285SJeff Roberson #ifdef DDB 685f2cc1285SJeff Roberson /* 686f2cc1285SJeff Roberson * Show details about the given node. 687f2cc1285SJeff Roberson */ 688f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 689f2cc1285SJeff Roberson { 6903c30b235SConrad Meyer struct pctrie_node *node, *tmp; 6918df38859SDoug Moore int slot; 6928df38859SDoug Moore pn_popmap_t popmap; 693f2cc1285SJeff Roberson 694f2cc1285SJeff Roberson if (!have_addr) 695f2cc1285SJeff Roberson return; 696f2cc1285SJeff Roberson node = (struct pctrie_node *)addr; 6978df38859SDoug Moore db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 6988df38859SDoug Moore (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 699*38f5cb1bSDoug Moore node->pn_clev / PCTRIE_WIDTH); 7008df38859SDoug Moore for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 7018df38859SDoug Moore slot = ffs(popmap) - 1; 7028df38859SDoug Moore tmp = pctrie_node_load(&node->pn_child[slot], NULL, 7033c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 704f2cc1285SJeff Roberson db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 7058df38859SDoug Moore slot, (void *)tmp, 7063c30b235SConrad Meyer pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 707*38f5cb1bSDoug Moore node->pn_clev / PCTRIE_WIDTH); 708f2cc1285SJeff Roberson } 7093c30b235SConrad Meyer } 710f2cc1285SJeff Roberson #endif /* DDB */ 711