1*f2cc1285SJeff Roberson /* 2*f2cc1285SJeff Roberson * Copyright (c) 2013 EMC Corp. 3*f2cc1285SJeff Roberson * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4*f2cc1285SJeff Roberson * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5*f2cc1285SJeff Roberson * All rights reserved. 6*f2cc1285SJeff Roberson * 7*f2cc1285SJeff Roberson * Redistribution and use in source and binary forms, with or without 8*f2cc1285SJeff Roberson * modification, are permitted provided that the following conditions 9*f2cc1285SJeff Roberson * are met: 10*f2cc1285SJeff Roberson * 1. Redistributions of source code must retain the above copyright 11*f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer. 12*f2cc1285SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 13*f2cc1285SJeff Roberson * notice, this list of conditions and the following disclaimer in the 14*f2cc1285SJeff Roberson * documentation and/or other materials provided with the distribution. 15*f2cc1285SJeff Roberson * 16*f2cc1285SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17*f2cc1285SJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18*f2cc1285SJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19*f2cc1285SJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20*f2cc1285SJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21*f2cc1285SJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22*f2cc1285SJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23*f2cc1285SJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24*f2cc1285SJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25*f2cc1285SJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26*f2cc1285SJeff Roberson * SUCH DAMAGE. 27*f2cc1285SJeff Roberson * 28*f2cc1285SJeff Roberson */ 29*f2cc1285SJeff Roberson 30*f2cc1285SJeff Roberson /* 31*f2cc1285SJeff Roberson * Path-compressed radix trie implementation. 32*f2cc1285SJeff Roberson * 33*f2cc1285SJeff Roberson * The implementation takes into account the following rationale: 34*f2cc1285SJeff Roberson * - Size of the nodes should be as small as possible but still big enough 35*f2cc1285SJeff Roberson * to avoid a large maximum depth for the trie. This is a balance 36*f2cc1285SJeff Roberson * between the necessity to not wire too much physical memory for the nodes 37*f2cc1285SJeff Roberson * and the necessity to avoid too much cache pollution during the trie 38*f2cc1285SJeff Roberson * operations. 39*f2cc1285SJeff Roberson * - There is not a huge bias toward the number of lookup operations over 40*f2cc1285SJeff Roberson * the number of insert and remove operations. This basically implies 41*f2cc1285SJeff Roberson * that optimizations supposedly helping one operation but hurting the 42*f2cc1285SJeff Roberson * other might be carefully evaluated. 43*f2cc1285SJeff Roberson * - On average not many nodes are expected to be fully populated, hence 44*f2cc1285SJeff Roberson * level compression may just complicate things. 45*f2cc1285SJeff Roberson */ 46*f2cc1285SJeff Roberson 47*f2cc1285SJeff Roberson #include <sys/cdefs.h> 48*f2cc1285SJeff Roberson __FBSDID("$FreeBSD$"); 49*f2cc1285SJeff Roberson 50*f2cc1285SJeff Roberson #include "opt_ddb.h" 51*f2cc1285SJeff Roberson 52*f2cc1285SJeff Roberson #include <sys/param.h> 53*f2cc1285SJeff Roberson #include <sys/systm.h> 54*f2cc1285SJeff Roberson #include <sys/kernel.h> 55*f2cc1285SJeff Roberson #include <sys/pctrie.h> 56*f2cc1285SJeff Roberson 57*f2cc1285SJeff Roberson #ifdef DDB 58*f2cc1285SJeff Roberson #include <ddb/ddb.h> 59*f2cc1285SJeff Roberson #endif 60*f2cc1285SJeff Roberson 61*f2cc1285SJeff Roberson /* 62*f2cc1285SJeff Roberson * These widths should allow the pointers to a node's children to fit within 63*f2cc1285SJeff Roberson * a single cache line. The extra levels from a narrow width should not be 64*f2cc1285SJeff Roberson * a problem thanks to path compression. 65*f2cc1285SJeff Roberson */ 66*f2cc1285SJeff Roberson #ifdef __LP64__ 67*f2cc1285SJeff Roberson #define PCTRIE_WIDTH 4 68*f2cc1285SJeff Roberson #else 69*f2cc1285SJeff Roberson #define PCTRIE_WIDTH 3 70*f2cc1285SJeff Roberson #endif 71*f2cc1285SJeff Roberson 72*f2cc1285SJeff Roberson #define PCTRIE_COUNT (1 << PCTRIE_WIDTH) 73*f2cc1285SJeff Roberson #define PCTRIE_MASK (PCTRIE_COUNT - 1) 74*f2cc1285SJeff Roberson #define PCTRIE_LIMIT (howmany((sizeof(uint64_t) * NBBY), PCTRIE_WIDTH) - 1) 75*f2cc1285SJeff Roberson 76*f2cc1285SJeff Roberson /* Flag bits stored in node pointers. */ 77*f2cc1285SJeff Roberson #define PCTRIE_ISLEAF 0x1 78*f2cc1285SJeff Roberson #define PCTRIE_FLAGS 0x1 79*f2cc1285SJeff Roberson #define PCTRIE_PAD PCTRIE_FLAGS 80*f2cc1285SJeff Roberson 81*f2cc1285SJeff Roberson /* Returns one unit associated with specified level. */ 82*f2cc1285SJeff Roberson #define PCTRIE_UNITLEVEL(lev) \ 83*f2cc1285SJeff Roberson ((uint64_t)1 << ((lev) * PCTRIE_WIDTH)) 84*f2cc1285SJeff Roberson 85*f2cc1285SJeff Roberson struct pctrie_node { 86*f2cc1285SJeff Roberson uint64_t pn_owner; /* Owner of record. */ 87*f2cc1285SJeff Roberson uint16_t pn_count; /* Valid children. */ 88*f2cc1285SJeff Roberson uint16_t pn_clev; /* Current level. */ 89*f2cc1285SJeff Roberson void *pn_child[PCTRIE_COUNT]; /* Child nodes. */ 90*f2cc1285SJeff Roberson }; 91*f2cc1285SJeff Roberson 92*f2cc1285SJeff Roberson /* 93*f2cc1285SJeff Roberson * Allocate a node. Pre-allocation should ensure that the request 94*f2cc1285SJeff Roberson * will always be satisfied. 95*f2cc1285SJeff Roberson */ 96*f2cc1285SJeff Roberson static __inline struct pctrie_node * 97*f2cc1285SJeff Roberson pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner, 98*f2cc1285SJeff Roberson uint16_t count, uint16_t clevel) 99*f2cc1285SJeff Roberson { 100*f2cc1285SJeff Roberson struct pctrie_node *node; 101*f2cc1285SJeff Roberson 102*f2cc1285SJeff Roberson node = allocfn(ptree); 103*f2cc1285SJeff Roberson if (node == NULL) 104*f2cc1285SJeff Roberson return (NULL); 105*f2cc1285SJeff Roberson node->pn_owner = owner; 106*f2cc1285SJeff Roberson node->pn_count = count; 107*f2cc1285SJeff Roberson node->pn_clev = clevel; 108*f2cc1285SJeff Roberson 109*f2cc1285SJeff Roberson return (node); 110*f2cc1285SJeff Roberson } 111*f2cc1285SJeff Roberson 112*f2cc1285SJeff Roberson /* 113*f2cc1285SJeff Roberson * Free radix node. 114*f2cc1285SJeff Roberson */ 115*f2cc1285SJeff Roberson static __inline void 116*f2cc1285SJeff Roberson pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node, 117*f2cc1285SJeff Roberson pctrie_free_t freefn) 118*f2cc1285SJeff Roberson { 119*f2cc1285SJeff Roberson #ifdef INVARIANTS 120*f2cc1285SJeff Roberson int slot; 121*f2cc1285SJeff Roberson 122*f2cc1285SJeff Roberson KASSERT(node->pn_count == 0, 123*f2cc1285SJeff Roberson ("pctrie_node_put: node %p has %d children", node, 124*f2cc1285SJeff Roberson node->pn_count)); 125*f2cc1285SJeff Roberson for (slot = 0; slot < PCTRIE_COUNT; slot++) 126*f2cc1285SJeff Roberson KASSERT(node->pn_child[slot] == NULL, 127*f2cc1285SJeff Roberson ("pctrie_node_put: node %p has a child", node)); 128*f2cc1285SJeff Roberson #endif 129*f2cc1285SJeff Roberson freefn(ptree, node); 130*f2cc1285SJeff Roberson } 131*f2cc1285SJeff Roberson 132*f2cc1285SJeff Roberson /* 133*f2cc1285SJeff Roberson * Return the position in the array for a given level. 134*f2cc1285SJeff Roberson */ 135*f2cc1285SJeff Roberson static __inline int 136*f2cc1285SJeff Roberson pctrie_slot(uint64_t index, uint16_t level) 137*f2cc1285SJeff Roberson { 138*f2cc1285SJeff Roberson 139*f2cc1285SJeff Roberson return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK); 140*f2cc1285SJeff Roberson } 141*f2cc1285SJeff Roberson 142*f2cc1285SJeff Roberson /* Trims the key after the specified level. */ 143*f2cc1285SJeff Roberson static __inline uint64_t 144*f2cc1285SJeff Roberson pctrie_trimkey(uint64_t index, uint16_t level) 145*f2cc1285SJeff Roberson { 146*f2cc1285SJeff Roberson uint64_t ret; 147*f2cc1285SJeff Roberson 148*f2cc1285SJeff Roberson ret = index; 149*f2cc1285SJeff Roberson if (level > 0) { 150*f2cc1285SJeff Roberson ret >>= level * PCTRIE_WIDTH; 151*f2cc1285SJeff Roberson ret <<= level * PCTRIE_WIDTH; 152*f2cc1285SJeff Roberson } 153*f2cc1285SJeff Roberson return (ret); 154*f2cc1285SJeff Roberson } 155*f2cc1285SJeff Roberson 156*f2cc1285SJeff Roberson /* 157*f2cc1285SJeff Roberson * Get the root node for a tree. 158*f2cc1285SJeff Roberson */ 159*f2cc1285SJeff Roberson static __inline struct pctrie_node * 160*f2cc1285SJeff Roberson pctrie_getroot(struct pctrie *ptree) 161*f2cc1285SJeff Roberson { 162*f2cc1285SJeff Roberson 163*f2cc1285SJeff Roberson return ((struct pctrie_node *)ptree->pt_root); 164*f2cc1285SJeff Roberson } 165*f2cc1285SJeff Roberson 166*f2cc1285SJeff Roberson /* 167*f2cc1285SJeff Roberson * Set the root node for a tree. 168*f2cc1285SJeff Roberson */ 169*f2cc1285SJeff Roberson static __inline void 170*f2cc1285SJeff Roberson pctrie_setroot(struct pctrie *ptree, struct pctrie_node *node) 171*f2cc1285SJeff Roberson { 172*f2cc1285SJeff Roberson 173*f2cc1285SJeff Roberson ptree->pt_root = (uintptr_t)node; 174*f2cc1285SJeff Roberson } 175*f2cc1285SJeff Roberson 176*f2cc1285SJeff Roberson /* 177*f2cc1285SJeff Roberson * Returns TRUE if the specified node is a leaf and FALSE otherwise. 178*f2cc1285SJeff Roberson */ 179*f2cc1285SJeff Roberson static __inline boolean_t 180*f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node) 181*f2cc1285SJeff Roberson { 182*f2cc1285SJeff Roberson 183*f2cc1285SJeff Roberson return (((uintptr_t)node & PCTRIE_ISLEAF) != 0); 184*f2cc1285SJeff Roberson } 185*f2cc1285SJeff Roberson 186*f2cc1285SJeff Roberson /* 187*f2cc1285SJeff Roberson * Returns the associated val extracted from node. 188*f2cc1285SJeff Roberson */ 189*f2cc1285SJeff Roberson static __inline uint64_t * 190*f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node) 191*f2cc1285SJeff Roberson { 192*f2cc1285SJeff Roberson 193*f2cc1285SJeff Roberson return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS)); 194*f2cc1285SJeff Roberson } 195*f2cc1285SJeff Roberson 196*f2cc1285SJeff Roberson /* 197*f2cc1285SJeff Roberson * Adds the val as a child of the provided node. 198*f2cc1285SJeff Roberson */ 199*f2cc1285SJeff Roberson static __inline void 200*f2cc1285SJeff Roberson pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev, 201*f2cc1285SJeff Roberson uint64_t *val) 202*f2cc1285SJeff Roberson { 203*f2cc1285SJeff Roberson int slot; 204*f2cc1285SJeff Roberson 205*f2cc1285SJeff Roberson slot = pctrie_slot(index, clev); 206*f2cc1285SJeff Roberson node->pn_child[slot] = (void *)((uintptr_t)val | PCTRIE_ISLEAF); 207*f2cc1285SJeff Roberson } 208*f2cc1285SJeff Roberson 209*f2cc1285SJeff Roberson /* 210*f2cc1285SJeff Roberson * Returns the slot where two keys differ. 211*f2cc1285SJeff Roberson * It cannot accept 2 equal keys. 212*f2cc1285SJeff Roberson */ 213*f2cc1285SJeff Roberson static __inline uint16_t 214*f2cc1285SJeff Roberson pctrie_keydiff(uint64_t index1, uint64_t index2) 215*f2cc1285SJeff Roberson { 216*f2cc1285SJeff Roberson uint16_t clev; 217*f2cc1285SJeff Roberson 218*f2cc1285SJeff Roberson KASSERT(index1 != index2, ("%s: passing the same key value %jx", 219*f2cc1285SJeff Roberson __func__, (uintmax_t)index1)); 220*f2cc1285SJeff Roberson 221*f2cc1285SJeff Roberson index1 ^= index2; 222*f2cc1285SJeff Roberson for (clev = PCTRIE_LIMIT;; clev--) 223*f2cc1285SJeff Roberson if (pctrie_slot(index1, clev) != 0) 224*f2cc1285SJeff Roberson return (clev); 225*f2cc1285SJeff Roberson } 226*f2cc1285SJeff Roberson 227*f2cc1285SJeff Roberson /* 228*f2cc1285SJeff Roberson * Returns TRUE if it can be determined that key does not belong to the 229*f2cc1285SJeff Roberson * specified node. Otherwise, returns FALSE. 230*f2cc1285SJeff Roberson */ 231*f2cc1285SJeff Roberson static __inline boolean_t 232*f2cc1285SJeff Roberson pctrie_keybarr(struct pctrie_node *node, uint64_t idx) 233*f2cc1285SJeff Roberson { 234*f2cc1285SJeff Roberson 235*f2cc1285SJeff Roberson if (node->pn_clev < PCTRIE_LIMIT) { 236*f2cc1285SJeff Roberson idx = pctrie_trimkey(idx, node->pn_clev + 1); 237*f2cc1285SJeff Roberson return (idx != node->pn_owner); 238*f2cc1285SJeff Roberson } 239*f2cc1285SJeff Roberson return (FALSE); 240*f2cc1285SJeff Roberson } 241*f2cc1285SJeff Roberson 242*f2cc1285SJeff Roberson /* 243*f2cc1285SJeff Roberson * Internal helper for pctrie_reclaim_allnodes(). 244*f2cc1285SJeff Roberson * This function is recursive. 245*f2cc1285SJeff Roberson */ 246*f2cc1285SJeff Roberson static void 247*f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node, 248*f2cc1285SJeff Roberson pctrie_free_t freefn) 249*f2cc1285SJeff Roberson { 250*f2cc1285SJeff Roberson int slot; 251*f2cc1285SJeff Roberson 252*f2cc1285SJeff Roberson KASSERT(node->pn_count <= PCTRIE_COUNT, 253*f2cc1285SJeff Roberson ("pctrie_reclaim_allnodes_int: bad count in node %p", node)); 254*f2cc1285SJeff Roberson for (slot = 0; node->pn_count != 0; slot++) { 255*f2cc1285SJeff Roberson if (node->pn_child[slot] == NULL) 256*f2cc1285SJeff Roberson continue; 257*f2cc1285SJeff Roberson if (!pctrie_isleaf(node->pn_child[slot])) 258*f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(ptree, 259*f2cc1285SJeff Roberson node->pn_child[slot], freefn); 260*f2cc1285SJeff Roberson node->pn_child[slot] = NULL; 261*f2cc1285SJeff Roberson node->pn_count--; 262*f2cc1285SJeff Roberson } 263*f2cc1285SJeff Roberson pctrie_node_put(ptree, node, freefn); 264*f2cc1285SJeff Roberson } 265*f2cc1285SJeff Roberson 266*f2cc1285SJeff Roberson /* 267*f2cc1285SJeff Roberson * pctrie node zone initializer. 268*f2cc1285SJeff Roberson */ 269*f2cc1285SJeff Roberson int 270*f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused) 271*f2cc1285SJeff Roberson { 272*f2cc1285SJeff Roberson struct pctrie_node *node; 273*f2cc1285SJeff Roberson 274*f2cc1285SJeff Roberson node = mem; 275*f2cc1285SJeff Roberson memset(node->pn_child, 0, sizeof(node->pn_child)); 276*f2cc1285SJeff Roberson return (0); 277*f2cc1285SJeff Roberson } 278*f2cc1285SJeff Roberson 279*f2cc1285SJeff Roberson size_t 280*f2cc1285SJeff Roberson pctrie_node_size(void) 281*f2cc1285SJeff Roberson { 282*f2cc1285SJeff Roberson 283*f2cc1285SJeff Roberson return (sizeof(struct pctrie_node)); 284*f2cc1285SJeff Roberson } 285*f2cc1285SJeff Roberson 286*f2cc1285SJeff Roberson /* 287*f2cc1285SJeff Roberson * Inserts the key-value pair into the trie. 288*f2cc1285SJeff Roberson * Panics if the key already exists. 289*f2cc1285SJeff Roberson */ 290*f2cc1285SJeff Roberson int 291*f2cc1285SJeff Roberson pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) 292*f2cc1285SJeff Roberson { 293*f2cc1285SJeff Roberson uint64_t index, newind; 294*f2cc1285SJeff Roberson void **parentp; 295*f2cc1285SJeff Roberson struct pctrie_node *node, *tmp; 296*f2cc1285SJeff Roberson uint64_t *m; 297*f2cc1285SJeff Roberson int slot; 298*f2cc1285SJeff Roberson uint16_t clev; 299*f2cc1285SJeff Roberson 300*f2cc1285SJeff Roberson index = *val; 301*f2cc1285SJeff Roberson 302*f2cc1285SJeff Roberson /* 303*f2cc1285SJeff Roberson * The owner of record for root is not really important because it 304*f2cc1285SJeff Roberson * will never be used. 305*f2cc1285SJeff Roberson */ 306*f2cc1285SJeff Roberson node = pctrie_getroot(ptree); 307*f2cc1285SJeff Roberson if (node == NULL) { 308*f2cc1285SJeff Roberson ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF; 309*f2cc1285SJeff Roberson return (0); 310*f2cc1285SJeff Roberson } 311*f2cc1285SJeff Roberson parentp = (void **)&ptree->pt_root; 312*f2cc1285SJeff Roberson for (;;) { 313*f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 314*f2cc1285SJeff Roberson m = pctrie_toval(node); 315*f2cc1285SJeff Roberson if (*m == index) 316*f2cc1285SJeff Roberson panic("%s: key %jx is already present", 317*f2cc1285SJeff Roberson __func__, (uintmax_t)index); 318*f2cc1285SJeff Roberson clev = pctrie_keydiff(*m, index); 319*f2cc1285SJeff Roberson tmp = pctrie_node_get(ptree, allocfn, 320*f2cc1285SJeff Roberson pctrie_trimkey(index, clev + 1), 2, clev); 321*f2cc1285SJeff Roberson if (tmp == NULL) 322*f2cc1285SJeff Roberson return (ENOMEM); 323*f2cc1285SJeff Roberson *parentp = tmp; 324*f2cc1285SJeff Roberson pctrie_addval(tmp, index, clev, val); 325*f2cc1285SJeff Roberson pctrie_addval(tmp, *m, clev, m); 326*f2cc1285SJeff Roberson return (0); 327*f2cc1285SJeff Roberson } else if (pctrie_keybarr(node, index)) 328*f2cc1285SJeff Roberson break; 329*f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 330*f2cc1285SJeff Roberson if (node->pn_child[slot] == NULL) { 331*f2cc1285SJeff Roberson node->pn_count++; 332*f2cc1285SJeff Roberson pctrie_addval(node, index, node->pn_clev, val); 333*f2cc1285SJeff Roberson return (0); 334*f2cc1285SJeff Roberson } 335*f2cc1285SJeff Roberson parentp = &node->pn_child[slot]; 336*f2cc1285SJeff Roberson node = node->pn_child[slot]; 337*f2cc1285SJeff Roberson } 338*f2cc1285SJeff Roberson 339*f2cc1285SJeff Roberson /* 340*f2cc1285SJeff Roberson * A new node is needed because the right insertion level is reached. 341*f2cc1285SJeff Roberson * Setup the new intermediate node and add the 2 children: the 342*f2cc1285SJeff Roberson * new object and the older edge. 343*f2cc1285SJeff Roberson */ 344*f2cc1285SJeff Roberson newind = node->pn_owner; 345*f2cc1285SJeff Roberson clev = pctrie_keydiff(newind, index); 346*f2cc1285SJeff Roberson tmp = pctrie_node_get(ptree, allocfn, 347*f2cc1285SJeff Roberson pctrie_trimkey(index, clev + 1), 2, clev); 348*f2cc1285SJeff Roberson if (tmp == NULL) 349*f2cc1285SJeff Roberson return (ENOMEM); 350*f2cc1285SJeff Roberson *parentp = tmp; 351*f2cc1285SJeff Roberson pctrie_addval(tmp, index, clev, val); 352*f2cc1285SJeff Roberson slot = pctrie_slot(newind, clev); 353*f2cc1285SJeff Roberson tmp->pn_child[slot] = node; 354*f2cc1285SJeff Roberson 355*f2cc1285SJeff Roberson return (0); 356*f2cc1285SJeff Roberson } 357*f2cc1285SJeff Roberson 358*f2cc1285SJeff Roberson /* 359*f2cc1285SJeff Roberson * Returns the value stored at the index. If the index is not present, 360*f2cc1285SJeff Roberson * NULL is returned. 361*f2cc1285SJeff Roberson */ 362*f2cc1285SJeff Roberson uint64_t * 363*f2cc1285SJeff Roberson pctrie_lookup(struct pctrie *ptree, uint64_t index) 364*f2cc1285SJeff Roberson { 365*f2cc1285SJeff Roberson struct pctrie_node *node; 366*f2cc1285SJeff Roberson uint64_t *m; 367*f2cc1285SJeff Roberson int slot; 368*f2cc1285SJeff Roberson 369*f2cc1285SJeff Roberson node = pctrie_getroot(ptree); 370*f2cc1285SJeff Roberson while (node != NULL) { 371*f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 372*f2cc1285SJeff Roberson m = pctrie_toval(node); 373*f2cc1285SJeff Roberson if (*m == index) 374*f2cc1285SJeff Roberson return (m); 375*f2cc1285SJeff Roberson else 376*f2cc1285SJeff Roberson break; 377*f2cc1285SJeff Roberson } else if (pctrie_keybarr(node, index)) 378*f2cc1285SJeff Roberson break; 379*f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 380*f2cc1285SJeff Roberson node = node->pn_child[slot]; 381*f2cc1285SJeff Roberson } 382*f2cc1285SJeff Roberson return (NULL); 383*f2cc1285SJeff Roberson } 384*f2cc1285SJeff Roberson 385*f2cc1285SJeff Roberson /* 386*f2cc1285SJeff Roberson * Look up the nearest entry at a position bigger than or equal to index. 387*f2cc1285SJeff Roberson */ 388*f2cc1285SJeff Roberson uint64_t * 389*f2cc1285SJeff Roberson pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) 390*f2cc1285SJeff Roberson { 391*f2cc1285SJeff Roberson struct pctrie_node *stack[PCTRIE_LIMIT]; 392*f2cc1285SJeff Roberson uint64_t inc; 393*f2cc1285SJeff Roberson uint64_t *m; 394*f2cc1285SJeff Roberson struct pctrie_node *child, *node; 395*f2cc1285SJeff Roberson #ifdef INVARIANTS 396*f2cc1285SJeff Roberson int loops = 0; 397*f2cc1285SJeff Roberson #endif 398*f2cc1285SJeff Roberson int slot, tos; 399*f2cc1285SJeff Roberson 400*f2cc1285SJeff Roberson node = pctrie_getroot(ptree); 401*f2cc1285SJeff Roberson if (node == NULL) 402*f2cc1285SJeff Roberson return (NULL); 403*f2cc1285SJeff Roberson else if (pctrie_isleaf(node)) { 404*f2cc1285SJeff Roberson m = pctrie_toval(node); 405*f2cc1285SJeff Roberson if (*m >= index) 406*f2cc1285SJeff Roberson return (m); 407*f2cc1285SJeff Roberson else 408*f2cc1285SJeff Roberson return (NULL); 409*f2cc1285SJeff Roberson } 410*f2cc1285SJeff Roberson tos = 0; 411*f2cc1285SJeff Roberson for (;;) { 412*f2cc1285SJeff Roberson /* 413*f2cc1285SJeff Roberson * If the keys differ before the current bisection node, 414*f2cc1285SJeff Roberson * then the search key might rollback to the earliest 415*f2cc1285SJeff Roberson * available bisection node or to the smallest key 416*f2cc1285SJeff Roberson * in the current node (if the owner is bigger than the 417*f2cc1285SJeff Roberson * search key). 418*f2cc1285SJeff Roberson */ 419*f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 420*f2cc1285SJeff Roberson if (index > node->pn_owner) { 421*f2cc1285SJeff Roberson ascend: 422*f2cc1285SJeff Roberson KASSERT(++loops < 1000, 423*f2cc1285SJeff Roberson ("pctrie_lookup_ge: too many loops")); 424*f2cc1285SJeff Roberson 425*f2cc1285SJeff Roberson /* 426*f2cc1285SJeff Roberson * Pop nodes from the stack until either the 427*f2cc1285SJeff Roberson * stack is empty or a node that could have a 428*f2cc1285SJeff Roberson * matching descendant is found. 429*f2cc1285SJeff Roberson */ 430*f2cc1285SJeff Roberson do { 431*f2cc1285SJeff Roberson if (tos == 0) 432*f2cc1285SJeff Roberson return (NULL); 433*f2cc1285SJeff Roberson node = stack[--tos]; 434*f2cc1285SJeff Roberson } while (pctrie_slot(index, 435*f2cc1285SJeff Roberson node->pn_clev) == (PCTRIE_COUNT - 1)); 436*f2cc1285SJeff Roberson 437*f2cc1285SJeff Roberson /* 438*f2cc1285SJeff Roberson * The following computation cannot overflow 439*f2cc1285SJeff Roberson * because index's slot at the current level 440*f2cc1285SJeff Roberson * is less than PCTRIE_COUNT - 1. 441*f2cc1285SJeff Roberson */ 442*f2cc1285SJeff Roberson index = pctrie_trimkey(index, 443*f2cc1285SJeff Roberson node->pn_clev); 444*f2cc1285SJeff Roberson index += PCTRIE_UNITLEVEL(node->pn_clev); 445*f2cc1285SJeff Roberson } else 446*f2cc1285SJeff Roberson index = node->pn_owner; 447*f2cc1285SJeff Roberson KASSERT(!pctrie_keybarr(node, index), 448*f2cc1285SJeff Roberson ("pctrie_lookup_ge: keybarr failed")); 449*f2cc1285SJeff Roberson } 450*f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 451*f2cc1285SJeff Roberson child = node->pn_child[slot]; 452*f2cc1285SJeff Roberson if (pctrie_isleaf(child)) { 453*f2cc1285SJeff Roberson m = pctrie_toval(child); 454*f2cc1285SJeff Roberson if (*m >= index) 455*f2cc1285SJeff Roberson return (m); 456*f2cc1285SJeff Roberson } else if (child != NULL) 457*f2cc1285SJeff Roberson goto descend; 458*f2cc1285SJeff Roberson 459*f2cc1285SJeff Roberson /* 460*f2cc1285SJeff Roberson * Look for an available edge or val within the current 461*f2cc1285SJeff Roberson * bisection node. 462*f2cc1285SJeff Roberson */ 463*f2cc1285SJeff Roberson if (slot < (PCTRIE_COUNT - 1)) { 464*f2cc1285SJeff Roberson inc = PCTRIE_UNITLEVEL(node->pn_clev); 465*f2cc1285SJeff Roberson index = pctrie_trimkey(index, node->pn_clev); 466*f2cc1285SJeff Roberson do { 467*f2cc1285SJeff Roberson index += inc; 468*f2cc1285SJeff Roberson slot++; 469*f2cc1285SJeff Roberson child = node->pn_child[slot]; 470*f2cc1285SJeff Roberson if (pctrie_isleaf(child)) { 471*f2cc1285SJeff Roberson m = pctrie_toval(child); 472*f2cc1285SJeff Roberson if (*m >= index) 473*f2cc1285SJeff Roberson return (m); 474*f2cc1285SJeff Roberson } else if (child != NULL) 475*f2cc1285SJeff Roberson goto descend; 476*f2cc1285SJeff Roberson } while (slot < (PCTRIE_COUNT - 1)); 477*f2cc1285SJeff Roberson } 478*f2cc1285SJeff Roberson KASSERT(child == NULL || pctrie_isleaf(child), 479*f2cc1285SJeff Roberson ("pctrie_lookup_ge: child is radix node")); 480*f2cc1285SJeff Roberson 481*f2cc1285SJeff Roberson /* 482*f2cc1285SJeff Roberson * If a value or edge bigger than the search slot is not found 483*f2cc1285SJeff Roberson * in the current node, ascend to the next higher-level node. 484*f2cc1285SJeff Roberson */ 485*f2cc1285SJeff Roberson goto ascend; 486*f2cc1285SJeff Roberson descend: 487*f2cc1285SJeff Roberson KASSERT(node->pn_clev > 0, 488*f2cc1285SJeff Roberson ("pctrie_lookup_ge: pushing leaf's parent")); 489*f2cc1285SJeff Roberson KASSERT(tos < PCTRIE_LIMIT, 490*f2cc1285SJeff Roberson ("pctrie_lookup_ge: stack overflow")); 491*f2cc1285SJeff Roberson stack[tos++] = node; 492*f2cc1285SJeff Roberson node = child; 493*f2cc1285SJeff Roberson } 494*f2cc1285SJeff Roberson } 495*f2cc1285SJeff Roberson 496*f2cc1285SJeff Roberson /* 497*f2cc1285SJeff Roberson * Look up the nearest entry at a position less than or equal to index. 498*f2cc1285SJeff Roberson */ 499*f2cc1285SJeff Roberson uint64_t * 500*f2cc1285SJeff Roberson pctrie_lookup_le(struct pctrie *ptree, uint64_t index) 501*f2cc1285SJeff Roberson { 502*f2cc1285SJeff Roberson struct pctrie_node *stack[PCTRIE_LIMIT]; 503*f2cc1285SJeff Roberson uint64_t inc; 504*f2cc1285SJeff Roberson uint64_t *m; 505*f2cc1285SJeff Roberson struct pctrie_node *child, *node; 506*f2cc1285SJeff Roberson #ifdef INVARIANTS 507*f2cc1285SJeff Roberson int loops = 0; 508*f2cc1285SJeff Roberson #endif 509*f2cc1285SJeff Roberson int slot, tos; 510*f2cc1285SJeff Roberson 511*f2cc1285SJeff Roberson node = pctrie_getroot(ptree); 512*f2cc1285SJeff Roberson if (node == NULL) 513*f2cc1285SJeff Roberson return (NULL); 514*f2cc1285SJeff Roberson else if (pctrie_isleaf(node)) { 515*f2cc1285SJeff Roberson m = pctrie_toval(node); 516*f2cc1285SJeff Roberson if (*m <= index) 517*f2cc1285SJeff Roberson return (m); 518*f2cc1285SJeff Roberson else 519*f2cc1285SJeff Roberson return (NULL); 520*f2cc1285SJeff Roberson } 521*f2cc1285SJeff Roberson tos = 0; 522*f2cc1285SJeff Roberson for (;;) { 523*f2cc1285SJeff Roberson /* 524*f2cc1285SJeff Roberson * If the keys differ before the current bisection node, 525*f2cc1285SJeff Roberson * then the search key might rollback to the earliest 526*f2cc1285SJeff Roberson * available bisection node or to the largest key 527*f2cc1285SJeff Roberson * in the current node (if the owner is smaller than the 528*f2cc1285SJeff Roberson * search key). 529*f2cc1285SJeff Roberson */ 530*f2cc1285SJeff Roberson if (pctrie_keybarr(node, index)) { 531*f2cc1285SJeff Roberson if (index > node->pn_owner) { 532*f2cc1285SJeff Roberson index = node->pn_owner + PCTRIE_COUNT * 533*f2cc1285SJeff Roberson PCTRIE_UNITLEVEL(node->pn_clev); 534*f2cc1285SJeff Roberson } else { 535*f2cc1285SJeff Roberson ascend: 536*f2cc1285SJeff Roberson KASSERT(++loops < 1000, 537*f2cc1285SJeff Roberson ("pctrie_lookup_le: too many loops")); 538*f2cc1285SJeff Roberson 539*f2cc1285SJeff Roberson /* 540*f2cc1285SJeff Roberson * Pop nodes from the stack until either the 541*f2cc1285SJeff Roberson * stack is empty or a node that could have a 542*f2cc1285SJeff Roberson * matching descendant is found. 543*f2cc1285SJeff Roberson */ 544*f2cc1285SJeff Roberson do { 545*f2cc1285SJeff Roberson if (tos == 0) 546*f2cc1285SJeff Roberson return (NULL); 547*f2cc1285SJeff Roberson node = stack[--tos]; 548*f2cc1285SJeff Roberson } while (pctrie_slot(index, 549*f2cc1285SJeff Roberson node->pn_clev) == 0); 550*f2cc1285SJeff Roberson 551*f2cc1285SJeff Roberson /* 552*f2cc1285SJeff Roberson * The following computation cannot overflow 553*f2cc1285SJeff Roberson * because index's slot at the current level 554*f2cc1285SJeff Roberson * is greater than 0. 555*f2cc1285SJeff Roberson */ 556*f2cc1285SJeff Roberson index = pctrie_trimkey(index, 557*f2cc1285SJeff Roberson node->pn_clev); 558*f2cc1285SJeff Roberson } 559*f2cc1285SJeff Roberson index--; 560*f2cc1285SJeff Roberson KASSERT(!pctrie_keybarr(node, index), 561*f2cc1285SJeff Roberson ("pctrie_lookup_le: keybarr failed")); 562*f2cc1285SJeff Roberson } 563*f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 564*f2cc1285SJeff Roberson child = node->pn_child[slot]; 565*f2cc1285SJeff Roberson if (pctrie_isleaf(child)) { 566*f2cc1285SJeff Roberson m = pctrie_toval(child); 567*f2cc1285SJeff Roberson if (*m <= index) 568*f2cc1285SJeff Roberson return (m); 569*f2cc1285SJeff Roberson } else if (child != NULL) 570*f2cc1285SJeff Roberson goto descend; 571*f2cc1285SJeff Roberson 572*f2cc1285SJeff Roberson /* 573*f2cc1285SJeff Roberson * Look for an available edge or value within the current 574*f2cc1285SJeff Roberson * bisection node. 575*f2cc1285SJeff Roberson */ 576*f2cc1285SJeff Roberson if (slot > 0) { 577*f2cc1285SJeff Roberson inc = PCTRIE_UNITLEVEL(node->pn_clev); 578*f2cc1285SJeff Roberson index |= inc - 1; 579*f2cc1285SJeff Roberson do { 580*f2cc1285SJeff Roberson index -= inc; 581*f2cc1285SJeff Roberson slot--; 582*f2cc1285SJeff Roberson child = node->pn_child[slot]; 583*f2cc1285SJeff Roberson if (pctrie_isleaf(child)) { 584*f2cc1285SJeff Roberson m = pctrie_toval(child); 585*f2cc1285SJeff Roberson if (*m <= index) 586*f2cc1285SJeff Roberson return (m); 587*f2cc1285SJeff Roberson } else if (child != NULL) 588*f2cc1285SJeff Roberson goto descend; 589*f2cc1285SJeff Roberson } while (slot > 0); 590*f2cc1285SJeff Roberson } 591*f2cc1285SJeff Roberson KASSERT(child == NULL || pctrie_isleaf(child), 592*f2cc1285SJeff Roberson ("pctrie_lookup_le: child is radix node")); 593*f2cc1285SJeff Roberson 594*f2cc1285SJeff Roberson /* 595*f2cc1285SJeff Roberson * If a value or edge smaller than the search slot is not found 596*f2cc1285SJeff Roberson * in the current node, ascend to the next higher-level node. 597*f2cc1285SJeff Roberson */ 598*f2cc1285SJeff Roberson goto ascend; 599*f2cc1285SJeff Roberson descend: 600*f2cc1285SJeff Roberson KASSERT(node->pn_clev > 0, 601*f2cc1285SJeff Roberson ("pctrie_lookup_le: pushing leaf's parent")); 602*f2cc1285SJeff Roberson KASSERT(tos < PCTRIE_LIMIT, 603*f2cc1285SJeff Roberson ("pctrie_lookup_le: stack overflow")); 604*f2cc1285SJeff Roberson stack[tos++] = node; 605*f2cc1285SJeff Roberson node = child; 606*f2cc1285SJeff Roberson } 607*f2cc1285SJeff Roberson } 608*f2cc1285SJeff Roberson 609*f2cc1285SJeff Roberson /* 610*f2cc1285SJeff Roberson * Remove the specified index from the tree. 611*f2cc1285SJeff Roberson * Panics if the key is not present. 612*f2cc1285SJeff Roberson */ 613*f2cc1285SJeff Roberson void 614*f2cc1285SJeff Roberson pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) 615*f2cc1285SJeff Roberson { 616*f2cc1285SJeff Roberson struct pctrie_node *node, *parent; 617*f2cc1285SJeff Roberson uint64_t *m; 618*f2cc1285SJeff Roberson int i, slot; 619*f2cc1285SJeff Roberson 620*f2cc1285SJeff Roberson node = pctrie_getroot(ptree); 621*f2cc1285SJeff Roberson if (pctrie_isleaf(node)) { 622*f2cc1285SJeff Roberson m = pctrie_toval(node); 623*f2cc1285SJeff Roberson if (*m != index) 624*f2cc1285SJeff Roberson panic("%s: invalid key found", __func__); 625*f2cc1285SJeff Roberson pctrie_setroot(ptree, NULL); 626*f2cc1285SJeff Roberson return; 627*f2cc1285SJeff Roberson } 628*f2cc1285SJeff Roberson parent = NULL; 629*f2cc1285SJeff Roberson for (;;) { 630*f2cc1285SJeff Roberson if (node == NULL) 631*f2cc1285SJeff Roberson panic("pctrie_remove: impossible to locate the key"); 632*f2cc1285SJeff Roberson slot = pctrie_slot(index, node->pn_clev); 633*f2cc1285SJeff Roberson if (pctrie_isleaf(node->pn_child[slot])) { 634*f2cc1285SJeff Roberson m = pctrie_toval(node->pn_child[slot]); 635*f2cc1285SJeff Roberson if (*m != index) 636*f2cc1285SJeff Roberson panic("%s: invalid key found", __func__); 637*f2cc1285SJeff Roberson node->pn_child[slot] = NULL; 638*f2cc1285SJeff Roberson node->pn_count--; 639*f2cc1285SJeff Roberson if (node->pn_count > 1) 640*f2cc1285SJeff Roberson break; 641*f2cc1285SJeff Roberson for (i = 0; i < PCTRIE_COUNT; i++) 642*f2cc1285SJeff Roberson if (node->pn_child[i] != NULL) 643*f2cc1285SJeff Roberson break; 644*f2cc1285SJeff Roberson KASSERT(i != PCTRIE_COUNT, 645*f2cc1285SJeff Roberson ("%s: invalid node configuration", __func__)); 646*f2cc1285SJeff Roberson if (parent == NULL) 647*f2cc1285SJeff Roberson pctrie_setroot(ptree, node->pn_child[i]); 648*f2cc1285SJeff Roberson else { 649*f2cc1285SJeff Roberson slot = pctrie_slot(index, parent->pn_clev); 650*f2cc1285SJeff Roberson KASSERT(parent->pn_child[slot] == node, 651*f2cc1285SJeff Roberson ("%s: invalid child value", __func__)); 652*f2cc1285SJeff Roberson parent->pn_child[slot] = node->pn_child[i]; 653*f2cc1285SJeff Roberson } 654*f2cc1285SJeff Roberson node->pn_count--; 655*f2cc1285SJeff Roberson node->pn_child[i] = NULL; 656*f2cc1285SJeff Roberson pctrie_node_put(ptree, node, freefn); 657*f2cc1285SJeff Roberson break; 658*f2cc1285SJeff Roberson } 659*f2cc1285SJeff Roberson parent = node; 660*f2cc1285SJeff Roberson node = node->pn_child[slot]; 661*f2cc1285SJeff Roberson } 662*f2cc1285SJeff Roberson } 663*f2cc1285SJeff Roberson 664*f2cc1285SJeff Roberson /* 665*f2cc1285SJeff Roberson * Remove and free all the nodes from the tree. 666*f2cc1285SJeff Roberson * This function is recursive but there is a tight control on it as the 667*f2cc1285SJeff Roberson * maximum depth of the tree is fixed. 668*f2cc1285SJeff Roberson */ 669*f2cc1285SJeff Roberson void 670*f2cc1285SJeff Roberson pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn) 671*f2cc1285SJeff Roberson { 672*f2cc1285SJeff Roberson struct pctrie_node *root; 673*f2cc1285SJeff Roberson 674*f2cc1285SJeff Roberson root = pctrie_getroot(ptree); 675*f2cc1285SJeff Roberson if (root == NULL) 676*f2cc1285SJeff Roberson return; 677*f2cc1285SJeff Roberson pctrie_setroot(ptree, NULL); 678*f2cc1285SJeff Roberson if (!pctrie_isleaf(root)) 679*f2cc1285SJeff Roberson pctrie_reclaim_allnodes_int(ptree, root, freefn); 680*f2cc1285SJeff Roberson } 681*f2cc1285SJeff Roberson 682*f2cc1285SJeff Roberson #ifdef DDB 683*f2cc1285SJeff Roberson /* 684*f2cc1285SJeff Roberson * Show details about the given node. 685*f2cc1285SJeff Roberson */ 686*f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) 687*f2cc1285SJeff Roberson { 688*f2cc1285SJeff Roberson struct pctrie_node *node; 689*f2cc1285SJeff Roberson int i; 690*f2cc1285SJeff Roberson 691*f2cc1285SJeff Roberson if (!have_addr) 692*f2cc1285SJeff Roberson return; 693*f2cc1285SJeff Roberson node = (struct pctrie_node *)addr; 694*f2cc1285SJeff Roberson db_printf("node %p, owner %jx, children count %u, level %u:\n", 695*f2cc1285SJeff Roberson (void *)node, (uintmax_t)node->pn_owner, node->pn_count, 696*f2cc1285SJeff Roberson node->pn_clev); 697*f2cc1285SJeff Roberson for (i = 0; i < PCTRIE_COUNT; i++) 698*f2cc1285SJeff Roberson if (node->pn_child[i] != NULL) 699*f2cc1285SJeff Roberson db_printf("slot: %d, val: %p, value: %p, clev: %d\n", 700*f2cc1285SJeff Roberson i, (void *)node->pn_child[i], 701*f2cc1285SJeff Roberson pctrie_isleaf(node->pn_child[i]) ? 702*f2cc1285SJeff Roberson pctrie_toval(node->pn_child[i]) : NULL, 703*f2cc1285SJeff Roberson node->pn_clev); 704*f2cc1285SJeff Roberson } 705*f2cc1285SJeff Roberson #endif /* DDB */ 706