18a36da99SPedro F. Giffuni /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 4f2cc1285SJeff Roberson * Copyright (c) 2013 EMC Corp. 5f2cc1285SJeff Roberson * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 6f2cc1285SJeff Roberson * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 7f2cc1285SJeff Roberson * All rights reserved. 8f2cc1285SJeff Roberson * 9f2cc1285SJeff Roberson * Redistribution and use in source and binary forms, with or without 10f2cc1285SJeff Roberson * modification, are permitted provided that the following conditions 11f2cc1285SJeff Roberson * are met: 12f2cc1285SJeff Roberson * 1. Redistributions of source code must retain the above copyright 13f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer. 14f2cc1285SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 15f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer in the 16f2cc1285SJeff Roberson * documentation and/or other materials provided with the distribution. 17f2cc1285SJeff Roberson * 18f2cc1285SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19f2cc1285SJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20f2cc1285SJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21f2cc1285SJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22f2cc1285SJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23f2cc1285SJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24f2cc1285SJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25f2cc1285SJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26f2cc1285SJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27f2cc1285SJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28f2cc1285SJeff Roberson * SUCH DAMAGE. 29f2cc1285SJeff Roberson * 30f2cc1285SJeff Roberson */ 31f2cc1285SJeff Roberson 32f2cc1285SJeff Roberson /* 33f2cc1285SJeff Roberson * Path-compressed radix trie implementation. 34f2cc1285SJeff Roberson * 35f2cc1285SJeff Roberson * The implementation takes into account the following rationale: 36f2cc1285SJeff Roberson * - Size of the nodes should be as small as possible but still big enough 37f2cc1285SJeff Roberson * to avoid a large maximum depth for the trie. This is a balance 38f2cc1285SJeff Roberson * between the necessity to not wire too much physical memory for the nodes 39f2cc1285SJeff Roberson * and the necessity to avoid too much cache pollution during the trie 40f2cc1285SJeff Roberson * operations. 41f2cc1285SJeff Roberson * - There is not a huge bias toward the number of lookup operations over 42f2cc1285SJeff Roberson * the number of insert and remove operations. This basically implies 43f2cc1285SJeff Roberson * that optimizations supposedly helping one operation but hurting the 44f2cc1285SJeff Roberson * other might be carefully evaluated. 45f2cc1285SJeff Roberson * - On average not many nodes are expected to be fully populated, hence 46f2cc1285SJeff Roberson * level compression may just complicate things. 47f2cc1285SJeff Roberson */ 48f2cc1285SJeff Roberson 49f2cc1285SJeff Roberson #include <sys/cdefs.h> 50f2cc1285SJeff Roberson __FBSDID("$FreeBSD$"); 51f2cc1285SJeff Roberson 52f2cc1285SJeff Roberson #include "opt_ddb.h" 53f2cc1285SJeff Roberson 54f2cc1285SJeff Roberson #include <sys/param.h> 55f2cc1285SJeff Roberson #include <sys/systm.h> 56f2cc1285SJeff Roberson #include <sys/kernel.h> 5705963ea4SDoug Moore #include <sys/libkern.h> 58f2cc1285SJeff Roberson #include <sys/pctrie.h> 593c30b235SConrad Meyer #include <sys/proc.h> /* smr.h depends on struct thread. */ 603c30b235SConrad Meyer #include <sys/smr.h> 613c30b235SConrad Meyer #include <sys/smr_types.h> 62f2cc1285SJeff Roberson 63f2cc1285SJeff Roberson #ifdef DDB 64f2cc1285SJeff Roberson #include <ddb/ddb.h> 65f2cc1285SJeff Roberson #endif 66f2cc1285SJeff Roberson 67f2cc1285SJeff Roberson #define PCTRIE_MASK (PCTRIE_COUNT - 1) 6855e0987aSPedro F. Giffuni #define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) 69f2cc1285SJeff Roberson 708df38859SDoug Moore #if PCTRIE_WIDTH == 3 718df38859SDoug Moore typedef uint8_t pn_popmap_t; 728df38859SDoug Moore #elif PCTRIE_WIDTH == 4 738df38859SDoug Moore typedef uint16_t pn_popmap_t; 748df38859SDoug Moore #elif PCTRIE_WIDTH == 5 758df38859SDoug Moore typedef uint32_t pn_popmap_t; 768df38859SDoug Moore #else 778df38859SDoug Moore #error Unsupported width 788df38859SDoug Moore #endif 798df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int), 808df38859SDoug Moore "pn_popmap_t too wide"); 818df38859SDoug Moore 82f2cc1285SJeff Roberson /* Flag bits stored in node pointers. */ 83f2cc1285SJeff Roberson #define PCTRIE_ISLEAF 0x1 84f2cc1285SJeff Roberson #define PCTRIE_FLAGS 0x1 85f2cc1285SJeff Roberson #define PCTRIE_PAD PCTRIE_FLAGS 86f2cc1285SJeff Roberson 87f2cc1285SJeff Roberson /* Returns one unit associated with specified level. */ 88f2cc1285SJeff Roberson #define PCTRIE_UNITLEVEL(lev) \ 89f2cc1285SJeff Roberson ((uint64_t)1 << ((lev) * PCTRIE_WIDTH)) 90f2cc1285SJeff Roberson 913c30b235SConrad Meyer struct pctrie_node; 923c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; 933c30b235SConrad Meyer 94f2cc1285SJeff Roberson struct pctrie_node { 95f2cc1285SJeff Roberson uint64_t pn_owner; /* Owner of record. */ 968df38859SDoug Moore pn_popmap_t pn_popmap; /* Valid children. */ 973c30b235SConrad Meyer uint8_t pn_clev; /* Current level. */ 983c30b235SConrad Meyer smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ 99f2cc1285SJeff Roberson }; 100f2cc1285SJeff Roberson 1013c30b235SConrad Meyer enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; 1023c30b235SConrad Meyer 1033c30b235SConrad Meyer static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, 1043c30b235SConrad Meyer enum pctrie_access access); 1053c30b235SConrad Meyer 106f2cc1285SJeff Roberson /* 107da72505fSDoug Moore * Return the position in the array for a given level. 108da72505fSDoug Moore */ 109da72505fSDoug Moore static __inline int 110da72505fSDoug Moore pctrie_slot(uint64_t index, uint16_t level) 111da72505fSDoug Moore { 112da72505fSDoug Moore return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK); 113da72505fSDoug Moore } 114da72505fSDoug Moore 115da72505fSDoug Moore /* Computes the key (index) with the low-order 'level' radix-digits zeroed. */ 116da72505fSDoug Moore static __inline uint64_t 117da72505fSDoug Moore pctrie_trimkey(uint64_t index, uint16_t level) 118da72505fSDoug Moore { 119da72505fSDoug Moore return (index & -PCTRIE_UNITLEVEL(level)); 120da72505fSDoug Moore } 121da72505fSDoug Moore 122da72505fSDoug Moore /* 123f2cc1285SJeff Roberson * Allocate a node. Pre-allocation should ensure that the request 124f2cc1285SJeff Roberson * will always be satisfied. 125f2cc1285SJeff Roberson */ 1263c30b235SConrad Meyer static struct pctrie_node * 127da72505fSDoug Moore pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index, 128da72505fSDoug Moore uint16_t clevel) 129f2cc1285SJeff Roberson { 130f2cc1285SJeff Roberson struct pctrie_node *node; 131f2cc1285SJeff Roberson 132f2cc1285SJeff Roberson node = allocfn(ptree); 133f2cc1285SJeff Roberson if (node == NULL) 134f2cc1285SJeff Roberson return (NULL); 1353c30b235SConrad Meyer 1363c30b235SConrad Meyer /* 1373c30b235SConrad Meyer * We want to clear the last child pointer after the final section 1383c30b235SConrad Meyer * has exited so lookup can not return false negatives. It is done 1393c30b235SConrad Meyer * here because it will be cache-cold in the dtor callback. 1403c30b235SConrad Meyer */ 1418df38859SDoug Moore if (node->pn_popmap != 0) { 1428df38859SDoug Moore pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1], 1438df38859SDoug Moore NULL, PCTRIE_UNSERIALIZED); 1448df38859SDoug Moore node->pn_popmap = 0; 1453c30b235SConrad Meyer } 146da72505fSDoug Moore node->pn_owner = pctrie_trimkey(index, clevel + 1); 147f2cc1285SJeff Roberson node->pn_clev = clevel; 148f2cc1285SJeff Roberson return (node); 149f2cc1285SJeff Roberson } 150f2cc1285SJeff Roberson 151f2cc1285SJeff Roberson /* 152f2cc1285SJeff Roberson * Free radix node. 153f2cc1285SJeff Roberson */ 154f2cc1285SJeff Roberson static __inline void 155f2cc1285SJeff Roberson pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node, 1568df38859SDoug Moore pctrie_free_t freefn) 157f2cc1285SJeff Roberson { 158f2cc1285SJeff Roberson #ifdef INVARIANTS 159f2cc1285SJeff Roberson int slot; 160f2cc1285SJeff Roberson 1618df38859SDoug Moore KASSERT(powerof2(node->pn_popmap), 1628df38859SDoug Moore ("pctrie_node_put: node %p has too many children %04x", node, 1638df38859SDoug Moore node->pn_popmap)); 1643c30b235SConrad Meyer for (slot = 0; slot < PCTRIE_COUNT; slot++) { 1658df38859SDoug Moore if ((node->pn_popmap & (1 << slot)) != 0) 1663c30b235SConrad Meyer continue; 1673c30b235SConrad Meyer KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == 1683c30b235SConrad Meyer NULL, ("pctrie_node_put: node %p has a child", node)); 1693c30b235SConrad Meyer } 170f2cc1285SJeff Roberson #endif 171f2cc1285SJeff Roberson freefn(ptree, node); 172f2cc1285SJeff Roberson } 173f2cc1285SJeff Roberson 174f2cc1285SJeff Roberson /* 1753c30b235SConrad Meyer * Fetch a node pointer from a slot. 1763c30b235SConrad Meyer */ 1773c30b235SConrad Meyer static __inline struct pctrie_node * 1783c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) 1793c30b235SConrad Meyer { 1803c30b235SConrad Meyer switch (access) { 1813c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 1823c30b235SConrad Meyer return (smr_unserialized_load(p, true)); 1833c30b235SConrad Meyer case PCTRIE_LOCKED: 1843c30b235SConrad Meyer return (smr_serialized_load(p, true)); 1853c30b235SConrad Meyer case PCTRIE_SMR: 1863c30b235SConrad Meyer return (smr_entered_load(p, smr)); 1873c30b235SConrad Meyer } 1883c30b235SConrad Meyer __assert_unreachable(); 1893c30b235SConrad Meyer } 1903c30b235SConrad Meyer 1913c30b235SConrad Meyer static __inline void 1923c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) 1933c30b235SConrad Meyer { 1943c30b235SConrad Meyer switch (access) { 1953c30b235SConrad Meyer case PCTRIE_UNSERIALIZED: 1963c30b235SConrad Meyer smr_unserialized_store(p, v, true); 1973c30b235SConrad Meyer break; 1983c30b235SConrad Meyer case PCTRIE_LOCKED: 1993c30b235SConrad Meyer smr_serialized_store(p, v, true); 2003c30b235SConrad Meyer break; 2013c30b235SConrad Meyer case PCTRIE_SMR: 2023c30b235SConrad Meyer panic("%s: Not supported in SMR section.", __func__); 2033c30b235SConrad Meyer break; 2043c30b235SConrad Meyer default: 2053c30b235SConrad Meyer __assert_unreachable(); 2063c30b235SConrad Meyer break; 2073c30b235SConrad Meyer } 2083c30b235SConrad Meyer } 2093c30b235SConrad Meyer 2103c30b235SConrad Meyer /* 211f2cc1285SJeff Roberson * Get the root node for a tree. 212f2cc1285SJeff Roberson */ 213f2cc1285SJeff Roberson static __inline struct pctrie_node * 2143c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) 215f2cc1285SJeff Roberson { 2163c30b235SConrad Meyer return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access)); 217f2cc1285SJeff Roberson } 218f2cc1285SJeff Roberson 219f2cc1285SJeff Roberson /* 220f2cc1285SJeff Roberson * Set the root node for a tree. 221f2cc1285SJeff Roberson */ 222f2cc1285SJeff Roberson static __inline void 2233c30b235SConrad Meyer pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node, 2243c30b235SConrad Meyer enum pctrie_access access) 225f2cc1285SJeff Roberson { 2263c30b235SConrad Meyer pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access); 227f2cc1285SJeff Roberson } 228f2cc1285SJeff Roberson 229f2cc1285SJeff Roberson /* 230f2cc1285SJeff Roberson * Returns TRUE if the specified node is a leaf and FALSE otherwise. 231f2cc1285SJeff Roberson */ 23204f9afaeSConrad Meyer static __inline bool 233f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node) 234f2cc1285SJeff Roberson { 235f2cc1285SJeff Roberson 236f2cc1285SJeff Roberson return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 237f2cc1285SJeff Roberson } 238f2cc1285SJeff Roberson 239f2cc1285SJeff Roberson /* 2409cfed089SDoug Moore * Returns val with leaf bit set. 2419cfed089SDoug Moore */ 2429cfed089SDoug Moore static __inline void * 2439cfed089SDoug Moore pctrie_toleaf(uint64_t *val) 2449cfed089SDoug Moore { 2459cfed089SDoug Moore return ((void *)((uintptr_t)val | PCTRIE_ISLEAF)); 2469cfed089SDoug Moore } 2479cfed089SDoug Moore 2489cfed089SDoug Moore /* 249f2cc1285SJeff Roberson * Returns the associated val extracted from node. 250f2cc1285SJeff Roberson */ 251f2cc1285SJeff Roberson static __inline uint64_t * 252f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node) 253f2cc1285SJeff Roberson { 254f2cc1285SJeff Roberson 255f2cc1285SJeff Roberson return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 256f2cc1285SJeff Roberson } 257f2cc1285SJeff Roberson 258f2cc1285SJeff Roberson /* 25916e01c05SDoug Moore * Make 'child' a child of 'node'. 260f2cc1285SJeff Roberson */ 261f2cc1285SJeff Roberson static __inline void 26216e01c05SDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index, uint16_t clev, 26316e01c05SDoug Moore struct pctrie_node *child, enum pctrie_access access) 264f2cc1285SJeff Roberson { 265f2cc1285SJeff Roberson int slot; 266f2cc1285SJeff Roberson 267f2cc1285SJeff Roberson slot = pctrie_slot(index, clev); 26816e01c05SDoug Moore pctrie_node_store(&node->pn_child[slot], child, access); 2698df38859SDoug Moore node->pn_popmap ^= 1 << slot; 2708df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 2718df38859SDoug Moore ("%s: bad popmap slot %d in node %p", __func__, slot, node)); 272f2cc1285SJeff Roberson } 273f2cc1285SJeff Roberson 274f2cc1285SJeff Roberson /* 27505963ea4SDoug Moore * Returns the level where two keys differ. 276f2cc1285SJeff Roberson * It cannot accept 2 equal keys. 277f2cc1285SJeff Roberson */ 278f2cc1285SJeff Roberson static __inline uint16_t 279f2cc1285SJeff Roberson pctrie_keydiff(uint64_t index1, uint64_t index2) 280f2cc1285SJeff Roberson { 281f2cc1285SJeff Roberson 282f2cc1285SJeff Roberson KASSERT(index1 != index2, ("%s: passing the same key value %jx", 283f2cc1285SJeff Roberson __func__, (uintmax_t)index1)); 28405963ea4SDoug Moore CTASSERT(sizeof(long long) >= sizeof(uint64_t)); 285f2cc1285SJeff Roberson 28605963ea4SDoug Moore /* 28705963ea4SDoug Moore * From the highest-order bit where the indexes differ, 28805963ea4SDoug Moore * compute the highest level in the trie where they differ. 28905963ea4SDoug Moore */ 29005963ea4SDoug Moore return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH); 291f2cc1285SJeff Roberson } 292f2cc1285SJeff Roberson 293f2cc1285SJeff Roberson /* 294f2cc1285SJeff Roberson * Returns TRUE if it can be determined that key does not belong to the 295f2cc1285SJeff Roberson * specified node. Otherwise, returns FALSE. 296f2cc1285SJeff Roberson */ 29704f9afaeSConrad Meyer static __inline bool 298f2cc1285SJeff Roberson pctrie_keybarr(struct pctrie_node *node, uint64_t idx) 299f2cc1285SJeff Roberson { 300f2cc1285SJeff Roberson 301f2cc1285SJeff Roberson if (node->pn_clev < PCTRIE_LIMIT) { 302f2cc1285SJeff Roberson idx = pctrie_trimkey(idx, node->pn_clev + 1); 303f2cc1285SJeff Roberson return (idx != node->pn_owner); 304f2cc1285SJeff Roberson } 30504f9afaeSConrad Meyer return (false); 306f2cc1285SJeff Roberson } 307f2cc1285SJeff Roberson 308f2cc1285SJeff Roberson /* 309f2cc1285SJeff Roberson * Internal helper for pctrie_reclaim_allnodes(). 310f2cc1285SJeff Roberson * This function is recursive. 311f2cc1285SJeff Roberson */ 312f2cc1285SJeff Roberson static void 313f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node, 314f2cc1285SJeff Roberson pctrie_free_t freefn) 315f2cc1285SJeff Roberson { 3163c30b235SConrad Meyer struct pctrie_node *child; 317f2cc1285SJeff Roberson int slot; 318f2cc1285SJeff Roberson 3198df38859SDoug Moore while (node->pn_popmap != 0) { 3208df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 3213c30b235SConrad Meyer child = pctrie_node_load(&node->pn_child[slot], NULL, 3223c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 3238df38859SDoug Moore KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p", 3248df38859SDoug Moore __func__, slot, node)); 3253c30b235SConrad Meyer if (!pctrie_isleaf(child)) 3263c30b235SConrad Meyer pctrie_reclaim_allnodes_int(ptree, child, freefn); 3278df38859SDoug Moore node->pn_popmap ^= 1 << slot; 3283c30b235SConrad Meyer pctrie_node_store(&node->pn_child[slot], NULL, 3293c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 330f2cc1285SJeff Roberson } 3318df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 332f2cc1285SJeff Roberson } 333f2cc1285SJeff Roberson 334f2cc1285SJeff Roberson /* 335f2cc1285SJeff Roberson * pctrie node zone initializer. 336f2cc1285SJeff Roberson */ 337f2cc1285SJeff Roberson int 338f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused) 339f2cc1285SJeff Roberson { 340f2cc1285SJeff Roberson struct pctrie_node *node; 341f2cc1285SJeff Roberson 342f2cc1285SJeff Roberson node = mem; 3438df38859SDoug Moore node->pn_popmap = 0; 344f2cc1285SJeff Roberson memset(node->pn_child, 0, sizeof(node->pn_child)); 345f2cc1285SJeff Roberson return (0); 346f2cc1285SJeff Roberson } 347f2cc1285SJeff Roberson 348f2cc1285SJeff Roberson size_t 349f2cc1285SJeff Roberson pctrie_node_size(void) 350f2cc1285SJeff Roberson { 351f2cc1285SJeff Roberson 352f2cc1285SJeff Roberson return (sizeof(struct pctrie_node)); 353f2cc1285SJeff Roberson } 354f2cc1285SJeff Roberson 355f2cc1285SJeff Roberson /* 356f2cc1285SJeff Roberson * Inserts the key-value pair into the trie. 357f2cc1285SJeff Roberson * Panics if the key already exists. 358f2cc1285SJeff Roberson */ 359f2cc1285SJeff Roberson int 360f2cc1285SJeff Roberson pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) 361f2cc1285SJeff Roberson { 362f2cc1285SJeff Roberson uint64_t index, newind; 36316e01c05SDoug Moore struct pctrie_node *leaf, *node, *tmp; 3643c30b235SConrad Meyer smr_pctnode_t *parentp; 365f2cc1285SJeff Roberson int slot; 366f2cc1285SJeff Roberson uint16_t clev; 367f2cc1285SJeff Roberson 368f2cc1285SJeff Roberson index = *val; 36916e01c05SDoug Moore leaf = pctrie_toleaf(val); 370f2cc1285SJeff Roberson 371f2cc1285SJeff Roberson /* 372f2cc1285SJeff Roberson * The owner of record for root is not really important because it 373f2cc1285SJeff Roberson * will never be used. 374f2cc1285SJeff Roberson */ 3753c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 376f2cc1285SJeff Roberson if (node == NULL) { 37716e01c05SDoug Moore ptree->pt_root = (uintptr_t)leaf; 378f2cc1285SJeff Roberson return (0); 379f2cc1285SJeff Roberson } 38016e01c05SDoug Moore for (parentp = (smr_pctnode_t *)&ptree->pt_root;; node = tmp) { 381f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 38216e01c05SDoug Moore newind = *pctrie_toval(node); 38316e01c05SDoug Moore if (newind == index) 384f2cc1285SJeff Roberson panic("%s: key %jx is already present", 385f2cc1285SJeff Roberson __func__, (uintmax_t)index); 386f2cc1285SJeff Roberson break; 38716e01c05SDoug Moore } else if (pctrie_keybarr(node, index)) { 38816e01c05SDoug Moore newind = node->pn_owner; 38916e01c05SDoug Moore break; 39016e01c05SDoug Moore } 391f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 3923c30b235SConrad Meyer parentp = &node->pn_child[slot]; 3933c30b235SConrad Meyer tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED); 3943c30b235SConrad Meyer if (tmp == NULL) { 39516e01c05SDoug Moore pctrie_addnode(node, index, node->pn_clev, leaf, 3963c30b235SConrad Meyer PCTRIE_LOCKED); 397f2cc1285SJeff Roberson return (0); 398f2cc1285SJeff Roberson } 399f2cc1285SJeff Roberson } 400f2cc1285SJeff Roberson 401f2cc1285SJeff Roberson /* 402f2cc1285SJeff Roberson * A new node is needed because the right insertion level is reached. 403f2cc1285SJeff Roberson * Setup the new intermediate node and add the 2 children: the 40416e01c05SDoug Moore * new object and the older edge or object. 405f2cc1285SJeff Roberson */ 406f2cc1285SJeff Roberson clev = pctrie_keydiff(newind, index); 407da72505fSDoug Moore tmp = pctrie_node_get(ptree, allocfn, index, clev); 408f2cc1285SJeff Roberson if (tmp == NULL) 409f2cc1285SJeff Roberson return (ENOMEM); 4103c30b235SConrad Meyer /* These writes are not yet visible due to ordering. */ 41116e01c05SDoug Moore pctrie_addnode(tmp, index, clev, leaf, PCTRIE_UNSERIALIZED); 41216e01c05SDoug Moore pctrie_addnode(tmp, newind, clev, node, PCTRIE_UNSERIALIZED); 4133c30b235SConrad Meyer /* Synchronize to make the above visible. */ 4143c30b235SConrad Meyer pctrie_node_store(parentp, tmp, PCTRIE_LOCKED); 415f2cc1285SJeff Roberson return (0); 416f2cc1285SJeff Roberson } 417f2cc1285SJeff Roberson 418f2cc1285SJeff Roberson /* 419f2cc1285SJeff Roberson * Returns the value stored at the index. If the index is not present, 420f2cc1285SJeff Roberson * NULL is returned. 421f2cc1285SJeff Roberson */ 4223c30b235SConrad Meyer static __always_inline uint64_t * 4233c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, 4243c30b235SConrad Meyer enum pctrie_access access) 425f2cc1285SJeff Roberson { 426f2cc1285SJeff Roberson struct pctrie_node *node; 427f2cc1285SJeff Roberson uint64_t *m; 428f2cc1285SJeff Roberson int slot; 429f2cc1285SJeff Roberson 4303c30b235SConrad Meyer node = pctrie_root_load(ptree, smr, access); 431f2cc1285SJeff Roberson while (node != NULL) { 432f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 433f2cc1285SJeff Roberson m = pctrie_toval(node); 434f2cc1285SJeff Roberson if (*m == index) 435f2cc1285SJeff Roberson return (m); 436f2cc1285SJeff Roberson break; 4373c30b235SConrad Meyer } 4383c30b235SConrad Meyer if (pctrie_keybarr(node, index)) 439f2cc1285SJeff Roberson break; 440f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 4413c30b235SConrad Meyer node = pctrie_node_load(&node->pn_child[slot], smr, access); 442f2cc1285SJeff Roberson } 443f2cc1285SJeff Roberson return (NULL); 444f2cc1285SJeff Roberson } 445f2cc1285SJeff Roberson 446f2cc1285SJeff Roberson /* 4473c30b235SConrad Meyer * Returns the value stored at the index, assuming access is externally 4483c30b235SConrad Meyer * synchronized by a lock. 4493c30b235SConrad Meyer * 4503c30b235SConrad Meyer * If the index is not present, NULL is returned. 4513c30b235SConrad Meyer */ 4523c30b235SConrad Meyer uint64_t * 4533c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index) 4543c30b235SConrad Meyer { 4553c30b235SConrad Meyer return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); 4563c30b235SConrad Meyer } 4573c30b235SConrad Meyer 4583c30b235SConrad Meyer /* 4593c30b235SConrad Meyer * Returns the value stored at the index without requiring an external lock. 4603c30b235SConrad Meyer * 4613c30b235SConrad Meyer * If the index is not present, NULL is returned. 4623c30b235SConrad Meyer */ 4633c30b235SConrad Meyer uint64_t * 4643c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) 4653c30b235SConrad Meyer { 4663c30b235SConrad Meyer uint64_t *res; 4673c30b235SConrad Meyer 4683c30b235SConrad Meyer smr_enter(smr); 4693c30b235SConrad Meyer res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); 4703c30b235SConrad Meyer smr_exit(smr); 4713c30b235SConrad Meyer return (res); 4723c30b235SConrad Meyer } 4733c30b235SConrad Meyer 4743c30b235SConrad Meyer /* 475*6f251ef2SDoug Moore * Returns the value with the least index that is greater than or equal to the 476*6f251ef2SDoug Moore * specified index, or NULL if there are no such values. 477*6f251ef2SDoug Moore * 478*6f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 479f2cc1285SJeff Roberson */ 480f2cc1285SJeff Roberson uint64_t * 481f2cc1285SJeff Roberson pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 482f2cc1285SJeff Roberson { 483*6f251ef2SDoug Moore struct pctrie_node *node, *succ; 484f2cc1285SJeff Roberson uint64_t *m; 485d1139b52SConrad Meyer int slot; 486f2cc1285SJeff Roberson 487*6f251ef2SDoug Moore /* 488*6f251ef2SDoug Moore * Descend the trie as if performing an ordinary lookup for the 489*6f251ef2SDoug Moore * specified value. However, unlike an ordinary lookup, as we descend 490*6f251ef2SDoug Moore * the trie, we use "succ" to remember the last branching-off point, 491*6f251ef2SDoug Moore * that is, the interior node under which the least value that is both 492*6f251ef2SDoug Moore * outside our current path down the trie and greater than the specified 493*6f251ef2SDoug Moore * index resides. (The node's popmap makes it fast and easy to 494*6f251ef2SDoug Moore * recognize a branching-off point.) If our ordinary lookup fails to 495*6f251ef2SDoug Moore * yield a value that is greater than or equal to the specified index, 496*6f251ef2SDoug Moore * then we will exit this loop and perform a lookup starting from 497*6f251ef2SDoug Moore * "succ". If "succ" is not NULL, then that lookup is guaranteed to 498*6f251ef2SDoug Moore * succeed. 499*6f251ef2SDoug Moore */ 5003c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 501*6f251ef2SDoug Moore succ = NULL; 502*6f251ef2SDoug Moore while (node != NULL) { 503*6f251ef2SDoug Moore if (pctrie_isleaf(node)) { 504f2cc1285SJeff Roberson m = pctrie_toval(node); 505f2cc1285SJeff Roberson if (*m >= index) 506f2cc1285SJeff Roberson return (m); 507*6f251ef2SDoug Moore break; 508f2cc1285SJeff Roberson } 509f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 510f2cc1285SJeff Roberson /* 511*6f251ef2SDoug Moore * If all values in this subtree are > index, then the 512*6f251ef2SDoug Moore * least value in this subtree is the answer. 513f2cc1285SJeff Roberson */ 514*6f251ef2SDoug Moore if (node->pn_owner > index) 515*6f251ef2SDoug Moore succ = node; 516*6f251ef2SDoug Moore break; 517f2cc1285SJeff Roberson } 518f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 519f2cc1285SJeff Roberson 520f2cc1285SJeff Roberson /* 521*6f251ef2SDoug Moore * Just in case the next search step leads to a subtree of all 522*6f251ef2SDoug Moore * values < index, check popmap to see if a next bigger step, to 523*6f251ef2SDoug Moore * a subtree of all pages with values > index, is available. If 524*6f251ef2SDoug Moore * so, remember to restart the search here. 525f2cc1285SJeff Roberson */ 526*6f251ef2SDoug Moore if ((node->pn_popmap >> slot) > 1) 527*6f251ef2SDoug Moore succ = node; 528*6f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 529*6f251ef2SDoug Moore PCTRIE_LOCKED); 530f2cc1285SJeff Roberson } 531f2cc1285SJeff Roberson 532f2cc1285SJeff Roberson /* 533*6f251ef2SDoug Moore * Restart the search from the last place visited in the subtree that 534*6f251ef2SDoug Moore * included some values > index, if there was such a place. 535*6f251ef2SDoug Moore */ 536*6f251ef2SDoug Moore if (succ == NULL) 537*6f251ef2SDoug Moore return (NULL); 538*6f251ef2SDoug Moore if (succ != node) { 539*6f251ef2SDoug Moore /* 540*6f251ef2SDoug Moore * Take a step to the next bigger sibling of the node chosen 541*6f251ef2SDoug Moore * last time. In that subtree, all values > index. 542*6f251ef2SDoug Moore */ 543*6f251ef2SDoug Moore slot = pctrie_slot(index, succ->pn_clev) + 1; 544*6f251ef2SDoug Moore KASSERT((succ->pn_popmap >> slot) != 0, 545*6f251ef2SDoug Moore ("%s: no popmap siblings past slot %d in node %p", 546*6f251ef2SDoug Moore __func__, slot, succ)); 547*6f251ef2SDoug Moore slot += ffs(succ->pn_popmap >> slot) - 1; 548*6f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 549*6f251ef2SDoug Moore PCTRIE_LOCKED); 550*6f251ef2SDoug Moore } 551*6f251ef2SDoug Moore 552*6f251ef2SDoug Moore /* 553*6f251ef2SDoug Moore * Find the value in the subtree rooted at "succ" with the least index. 554*6f251ef2SDoug Moore */ 555*6f251ef2SDoug Moore while (!pctrie_isleaf(succ)) { 556*6f251ef2SDoug Moore KASSERT(succ->pn_popmap != 0, 557*6f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, succ)); 558*6f251ef2SDoug Moore slot = ffs(succ->pn_popmap) - 1; 559*6f251ef2SDoug Moore succ = pctrie_node_load(&succ->pn_child[slot], NULL, 560*6f251ef2SDoug Moore PCTRIE_LOCKED); 561*6f251ef2SDoug Moore } 562*6f251ef2SDoug Moore return (pctrie_toval(succ)); 563*6f251ef2SDoug Moore } 564*6f251ef2SDoug Moore 565*6f251ef2SDoug Moore /* 566*6f251ef2SDoug Moore * Returns the value with the greatest index that is less than or equal to the 567*6f251ef2SDoug Moore * specified index, or NULL if there are no such values. 568*6f251ef2SDoug Moore * 569*6f251ef2SDoug Moore * Requires that access be externally synchronized by a lock. 570f2cc1285SJeff Roberson */ 571f2cc1285SJeff Roberson uint64_t * 572f2cc1285SJeff Roberson pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 573f2cc1285SJeff Roberson { 574*6f251ef2SDoug Moore struct pctrie_node *node, *pred; 575f2cc1285SJeff Roberson uint64_t *m; 576d1139b52SConrad Meyer int slot; 577f2cc1285SJeff Roberson 578*6f251ef2SDoug Moore /* 579*6f251ef2SDoug Moore * Mirror the implementation of pctrie_lookup_ge, described above. 580*6f251ef2SDoug Moore */ 5813c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 582*6f251ef2SDoug Moore pred = NULL; 583*6f251ef2SDoug Moore while (node != NULL) { 584*6f251ef2SDoug Moore if (pctrie_isleaf(node)) { 585f2cc1285SJeff Roberson m = pctrie_toval(node); 586f2cc1285SJeff Roberson if (*m <= index) 587f2cc1285SJeff Roberson return (m); 588*6f251ef2SDoug Moore break; 589f2cc1285SJeff Roberson } 590f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 591*6f251ef2SDoug Moore if (node->pn_owner < index) 592*6f251ef2SDoug Moore pred = node; 593*6f251ef2SDoug Moore break; 594f2cc1285SJeff Roberson } 595f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 596*6f251ef2SDoug Moore if ((node->pn_popmap & ((1 << slot) - 1)) != 0) 597*6f251ef2SDoug Moore pred = node; 598*6f251ef2SDoug Moore node = pctrie_node_load(&node->pn_child[slot], NULL, 5993c30b235SConrad Meyer PCTRIE_LOCKED); 6008df38859SDoug Moore } 601*6f251ef2SDoug Moore if (pred == NULL) 602*6f251ef2SDoug Moore return (NULL); 603*6f251ef2SDoug Moore if (pred != node) { 604*6f251ef2SDoug Moore slot = pctrie_slot(index, pred->pn_clev); 605*6f251ef2SDoug Moore KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, 606*6f251ef2SDoug Moore ("%s: no popmap siblings before slot %d in node %p", 607*6f251ef2SDoug Moore __func__, slot, pred)); 608*6f251ef2SDoug Moore slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1; 609*6f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 610*6f251ef2SDoug Moore PCTRIE_LOCKED); 611f2cc1285SJeff Roberson } 612*6f251ef2SDoug Moore while (!pctrie_isleaf(pred)) { 613*6f251ef2SDoug Moore KASSERT(pred->pn_popmap != 0, 614*6f251ef2SDoug Moore ("%s: no popmap children in node %p", __func__, pred)); 615*6f251ef2SDoug Moore slot = fls(pred->pn_popmap) - 1; 616*6f251ef2SDoug Moore pred = pctrie_node_load(&pred->pn_child[slot], NULL, 617*6f251ef2SDoug Moore PCTRIE_LOCKED); 618*6f251ef2SDoug Moore } 619*6f251ef2SDoug Moore return (pctrie_toval(pred)); 620f2cc1285SJeff Roberson } 621f2cc1285SJeff Roberson 622f2cc1285SJeff Roberson /* 623f2cc1285SJeff Roberson * Remove the specified index from the tree. 624f2cc1285SJeff Roberson * Panics if the key is not present. 625f2cc1285SJeff Roberson */ 626f2cc1285SJeff Roberson void 627f2cc1285SJeff Roberson pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) 628f2cc1285SJeff Roberson { 6293c30b235SConrad Meyer struct pctrie_node *node, *parent, *tmp; 630f2cc1285SJeff Roberson uint64_t *m; 6318df38859SDoug Moore int slot; 632f2cc1285SJeff Roberson 6333c30b235SConrad Meyer node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 634f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 635f2cc1285SJeff Roberson m = pctrie_toval(node); 636f2cc1285SJeff Roberson if (*m != index) 637f2cc1285SJeff Roberson panic("%s: invalid key found", __func__); 6383c30b235SConrad Meyer pctrie_root_store(ptree, NULL, PCTRIE_LOCKED); 639f2cc1285SJeff Roberson return; 640f2cc1285SJeff Roberson } 641f2cc1285SJeff Roberson parent = NULL; 642f2cc1285SJeff Roberson for (;;) { 643f2cc1285SJeff Roberson if (node == NULL) 644f2cc1285SJeff Roberson panic("pctrie_remove: impossible to locate the key"); 645f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 6463c30b235SConrad Meyer tmp = pctrie_node_load(&node->pn_child[slot], NULL, 6473c30b235SConrad Meyer PCTRIE_LOCKED); 6483c30b235SConrad Meyer if (pctrie_isleaf(tmp)) { 6493c30b235SConrad Meyer m = pctrie_toval(tmp); 650f2cc1285SJeff Roberson if (*m != index) 651f2cc1285SJeff Roberson panic("%s: invalid key found", __func__); 6528df38859SDoug Moore KASSERT((node->pn_popmap & (1 << slot)) != 0, 6538df38859SDoug Moore ("%s: bad popmap slot %d in node %p", 6548df38859SDoug Moore __func__, slot, node)); 6558df38859SDoug Moore node->pn_popmap ^= 1 << slot; 6563c30b235SConrad Meyer pctrie_node_store(&node->pn_child[slot], NULL, 6573c30b235SConrad Meyer PCTRIE_LOCKED); 6588df38859SDoug Moore if (!powerof2(node->pn_popmap)) 659f2cc1285SJeff Roberson break; 6608df38859SDoug Moore KASSERT(node->pn_popmap != 0, 6618df38859SDoug Moore ("%s: bad popmap all zeroes", __func__)); 6628df38859SDoug Moore slot = ffs(node->pn_popmap) - 1; 6638df38859SDoug Moore tmp = pctrie_node_load(&node->pn_child[slot], 6643c30b235SConrad Meyer NULL, PCTRIE_LOCKED); 665e8efee29SDoug Moore KASSERT(tmp != NULL, 6668df38859SDoug Moore ("%s: bad popmap slot %d in node %p", 6678df38859SDoug Moore __func__, slot, node)); 668f2cc1285SJeff Roberson if (parent == NULL) 6693c30b235SConrad Meyer pctrie_root_store(ptree, tmp, PCTRIE_LOCKED); 670f2cc1285SJeff Roberson else { 671f2cc1285SJeff Roberson slot = pctrie_slot(index, parent->pn_clev); 6723c30b235SConrad Meyer KASSERT(pctrie_node_load( 6733c30b235SConrad Meyer &parent->pn_child[slot], NULL, 6743c30b235SConrad Meyer PCTRIE_LOCKED) == node, 675f2cc1285SJeff Roberson ("%s: invalid child value", __func__)); 6763c30b235SConrad Meyer pctrie_node_store(&parent->pn_child[slot], tmp, 6773c30b235SConrad Meyer PCTRIE_LOCKED); 678f2cc1285SJeff Roberson } 6793c30b235SConrad Meyer /* 6803c30b235SConrad Meyer * The child is still valid and we can not zero the 6813c30b235SConrad Meyer * pointer until all SMR references are gone. 6823c30b235SConrad Meyer */ 6838df38859SDoug Moore pctrie_node_put(ptree, node, freefn); 684f2cc1285SJeff Roberson break; 685f2cc1285SJeff Roberson } 686f2cc1285SJeff Roberson parent = node; 6873c30b235SConrad Meyer node = tmp; 688f2cc1285SJeff Roberson } 689f2cc1285SJeff Roberson } 690f2cc1285SJeff Roberson 691f2cc1285SJeff Roberson /* 692f2cc1285SJeff Roberson * Remove and free all the nodes from the tree. 693f2cc1285SJeff Roberson * This function is recursive but there is a tight control on it as the 694f2cc1285SJeff Roberson * maximum depth of the tree is fixed. 695f2cc1285SJeff Roberson */ 696f2cc1285SJeff Roberson void 697f2cc1285SJeff Roberson pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn) 698f2cc1285SJeff Roberson { 699f2cc1285SJeff Roberson struct pctrie_node *root; 700f2cc1285SJeff Roberson 7013c30b235SConrad Meyer root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); 702f2cc1285SJeff Roberson if (root == NULL) 703f2cc1285SJeff Roberson return; 7043c30b235SConrad Meyer pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED); 705f2cc1285SJeff Roberson if (!pctrie_isleaf(root)) 706f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(ptree, root, freefn); 707f2cc1285SJeff Roberson } 708f2cc1285SJeff Roberson 709f2cc1285SJeff Roberson #ifdef DDB 710f2cc1285SJeff Roberson /* 711f2cc1285SJeff Roberson * Show details about the given node. 712f2cc1285SJeff Roberson */ 713f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 714f2cc1285SJeff Roberson { 7153c30b235SConrad Meyer struct pctrie_node *node, *tmp; 7168df38859SDoug Moore int slot; 7178df38859SDoug Moore pn_popmap_t popmap; 718f2cc1285SJeff Roberson 719f2cc1285SJeff Roberson if (!have_addr) 720f2cc1285SJeff Roberson return; 721f2cc1285SJeff Roberson node = (struct pctrie_node *)addr; 7228df38859SDoug Moore db_printf("node %p, owner %jx, children popmap %04x, level %u:\n", 7238df38859SDoug Moore (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap, 724f2cc1285SJeff Roberson node->pn_clev); 7258df38859SDoug Moore for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) { 7268df38859SDoug Moore slot = ffs(popmap) - 1; 7278df38859SDoug Moore tmp = pctrie_node_load(&node->pn_child[slot], NULL, 7283c30b235SConrad Meyer PCTRIE_UNSERIALIZED); 729f2cc1285SJeff Roberson db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 7308df38859SDoug Moore slot, (void *)tmp, 7313c30b235SConrad Meyer pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, 732f2cc1285SJeff Roberson node->pn_clev); 733f2cc1285SJeff Roberson } 7343c30b235SConrad Meyer } 735f2cc1285SJeff Roberson #endif /* DDB */ 736