1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * Copyright (C) 2012 Konstantin Khlebnikov 7 * Copyright (C) 2016 Intel, Matthew Wilcox 8 * Copyright (C) 2016 Intel, Ross Zwisler 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License as 12 * published by the Free Software Foundation; either version 2, or (at 13 * your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 */ 24 25 #include <linux/bitmap.h> 26 #include <linux/bitops.h> 27 #include <linux/cpu.h> 28 #include <linux/errno.h> 29 #include <linux/export.h> 30 #include <linux/idr.h> 31 #include <linux/init.h> 32 #include <linux/kernel.h> 33 #include <linux/kmemleak.h> 34 #include <linux/percpu.h> 35 #include <linux/preempt.h> /* in_interrupt() */ 36 #include <linux/radix-tree.h> 37 #include <linux/rcupdate.h> 38 #include <linux/slab.h> 39 #include <linux/string.h> 40 41 42 /* Number of nodes in fully populated tree of given height */ 43 static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; 44 45 /* 46 * Radix tree node cache. 47 */ 48 static struct kmem_cache *radix_tree_node_cachep; 49 50 /* 51 * The radix tree is variable-height, so an insert operation not only has 52 * to build the branch to its corresponding item, it also has to build the 53 * branch to existing items if the size has to be increased (by 54 * radix_tree_extend). 55 * 56 * The worst case is a zero height tree with just a single item at index 0, 57 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 58 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 59 * Hence: 60 */ 61 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 62 63 /* 64 * The IDR does not have to be as high as the radix tree since it uses 65 * signed integers, not unsigned longs. 66 */ 67 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 68 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 69 RADIX_TREE_MAP_SHIFT)) 70 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 71 72 /* 73 * The IDA is even shorter since it uses a bitmap at the last level. 74 */ 75 #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 76 #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 77 RADIX_TREE_MAP_SHIFT)) 78 #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 79 80 /* 81 * Per-cpu pool of preloaded nodes 82 */ 83 struct radix_tree_preload { 84 unsigned nr; 85 /* nodes->parent points to next preallocated node */ 86 struct radix_tree_node *nodes; 87 }; 88 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 89 90 static inline struct radix_tree_node *entry_to_node(void *ptr) 91 { 92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 93 } 94 95 static inline void *node_to_entry(void *ptr) 96 { 97 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 98 } 99 100 #define RADIX_TREE_RETRY node_to_entry(NULL) 101 102 #ifdef CONFIG_RADIX_TREE_MULTIORDER 103 /* Sibling slots point directly to another slot in the same node */ 104 static inline 105 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 106 { 107 void __rcu **ptr = node; 108 return (parent->slots <= ptr) && 109 (ptr < parent->slots + RADIX_TREE_MAP_SIZE); 110 } 111 #else 112 static inline 113 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 114 { 115 return false; 116 } 117 #endif 118 119 static inline unsigned long 120 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 121 { 122 return slot - parent->slots; 123 } 124 125 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 126 struct radix_tree_node **nodep, unsigned long index) 127 { 128 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 129 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 130 131 #ifdef CONFIG_RADIX_TREE_MULTIORDER 132 if (radix_tree_is_internal_node(entry)) { 133 if (is_sibling_entry(parent, entry)) { 134 void __rcu **sibentry; 135 sibentry = (void __rcu **) entry_to_node(entry); 136 offset = get_slot_offset(parent, sibentry); 137 entry = rcu_dereference_raw(*sibentry); 138 } 139 } 140 #endif 141 142 *nodep = (void *)entry; 143 return offset; 144 } 145 146 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 147 { 148 return root->gfp_mask & __GFP_BITS_MASK; 149 } 150 151 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 152 int offset) 153 { 154 __set_bit(offset, node->tags[tag]); 155 } 156 157 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 158 int offset) 159 { 160 __clear_bit(offset, node->tags[tag]); 161 } 162 163 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 164 int offset) 165 { 166 return test_bit(offset, node->tags[tag]); 167 } 168 169 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 170 { 171 root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 172 } 173 174 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 175 { 176 root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 177 } 178 179 static inline void root_tag_clear_all(struct radix_tree_root *root) 180 { 181 root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; 182 } 183 184 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 185 { 186 return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); 187 } 188 189 static inline unsigned root_tags_get(const struct radix_tree_root *root) 190 { 191 return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; 192 } 193 194 static inline bool is_idr(const struct radix_tree_root *root) 195 { 196 return !!(root->gfp_mask & ROOT_IS_IDR); 197 } 198 199 /* 200 * Returns 1 if any slot in the node has this tag set. 201 * Otherwise returns 0. 202 */ 203 static inline int any_tag_set(const struct radix_tree_node *node, 204 unsigned int tag) 205 { 206 unsigned idx; 207 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 208 if (node->tags[tag][idx]) 209 return 1; 210 } 211 return 0; 212 } 213 214 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 215 { 216 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 217 } 218 219 /** 220 * radix_tree_find_next_bit - find the next set bit in a memory region 221 * 222 * @addr: The address to base the search on 223 * @size: The bitmap size in bits 224 * @offset: The bitnumber to start searching at 225 * 226 * Unrollable variant of find_next_bit() for constant size arrays. 227 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 228 * Returns next bit offset, or size if nothing found. 229 */ 230 static __always_inline unsigned long 231 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 232 unsigned long offset) 233 { 234 const unsigned long *addr = node->tags[tag]; 235 236 if (offset < RADIX_TREE_MAP_SIZE) { 237 unsigned long tmp; 238 239 addr += offset / BITS_PER_LONG; 240 tmp = *addr >> (offset % BITS_PER_LONG); 241 if (tmp) 242 return __ffs(tmp) + offset; 243 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 244 while (offset < RADIX_TREE_MAP_SIZE) { 245 tmp = *++addr; 246 if (tmp) 247 return __ffs(tmp) + offset; 248 offset += BITS_PER_LONG; 249 } 250 } 251 return RADIX_TREE_MAP_SIZE; 252 } 253 254 static unsigned int iter_offset(const struct radix_tree_iter *iter) 255 { 256 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; 257 } 258 259 /* 260 * The maximum index which can be stored in a radix tree 261 */ 262 static inline unsigned long shift_maxindex(unsigned int shift) 263 { 264 return (RADIX_TREE_MAP_SIZE << shift) - 1; 265 } 266 267 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 268 { 269 return shift_maxindex(node->shift); 270 } 271 272 static unsigned long next_index(unsigned long index, 273 const struct radix_tree_node *node, 274 unsigned long offset) 275 { 276 return (index & ~node_maxindex(node)) + (offset << node->shift); 277 } 278 279 #ifndef __KERNEL__ 280 static void dump_node(struct radix_tree_node *node, unsigned long index) 281 { 282 unsigned long i; 283 284 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", 285 node, node->offset, index, index | node_maxindex(node), 286 node->parent, 287 node->tags[0][0], node->tags[1][0], node->tags[2][0], 288 node->shift, node->count, node->exceptional); 289 290 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 291 unsigned long first = index | (i << node->shift); 292 unsigned long last = first | ((1UL << node->shift) - 1); 293 void *entry = node->slots[i]; 294 if (!entry) 295 continue; 296 if (entry == RADIX_TREE_RETRY) { 297 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", 298 i, first, last, node); 299 } else if (!radix_tree_is_internal_node(entry)) { 300 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", 301 entry, i, first, last, node); 302 } else if (is_sibling_entry(node, entry)) { 303 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", 304 entry, i, first, last, node, 305 *(void **)entry_to_node(entry)); 306 } else { 307 dump_node(entry_to_node(entry), first); 308 } 309 } 310 } 311 312 /* For debug */ 313 static void radix_tree_dump(struct radix_tree_root *root) 314 { 315 pr_debug("radix root: %p rnode %p tags %x\n", 316 root, root->rnode, 317 root->gfp_mask >> ROOT_TAG_SHIFT); 318 if (!radix_tree_is_internal_node(root->rnode)) 319 return; 320 dump_node(entry_to_node(root->rnode), 0); 321 } 322 323 static void dump_ida_node(void *entry, unsigned long index) 324 { 325 unsigned long i; 326 327 if (!entry) 328 return; 329 330 if (radix_tree_is_internal_node(entry)) { 331 struct radix_tree_node *node = entry_to_node(entry); 332 333 pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", 334 node, node->offset, index * IDA_BITMAP_BITS, 335 ((index | node_maxindex(node)) + 1) * 336 IDA_BITMAP_BITS - 1, 337 node->parent, node->tags[0][0], node->shift, 338 node->count); 339 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) 340 dump_ida_node(node->slots[i], 341 index | (i << node->shift)); 342 } else if (radix_tree_exceptional_entry(entry)) { 343 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", 344 entry, (int)(index & RADIX_TREE_MAP_MASK), 345 index * IDA_BITMAP_BITS, 346 index * IDA_BITMAP_BITS + BITS_PER_LONG - 347 RADIX_TREE_EXCEPTIONAL_SHIFT, 348 (unsigned long)entry >> 349 RADIX_TREE_EXCEPTIONAL_SHIFT); 350 } else { 351 struct ida_bitmap *bitmap = entry; 352 353 pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, 354 (int)(index & RADIX_TREE_MAP_MASK), 355 index * IDA_BITMAP_BITS, 356 (index + 1) * IDA_BITMAP_BITS - 1); 357 for (i = 0; i < IDA_BITMAP_LONGS; i++) 358 pr_cont(" %lx", bitmap->bitmap[i]); 359 pr_cont("\n"); 360 } 361 } 362 363 static void ida_dump(struct ida *ida) 364 { 365 struct radix_tree_root *root = &ida->ida_rt; 366 pr_debug("ida: %p node %p free %d\n", ida, root->rnode, 367 root->gfp_mask >> ROOT_TAG_SHIFT); 368 dump_ida_node(root->rnode, 0); 369 } 370 #endif 371 372 /* 373 * This assumes that the caller has performed appropriate preallocation, and 374 * that the caller has pinned this thread of control to the current CPU. 375 */ 376 static struct radix_tree_node * 377 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 378 struct radix_tree_root *root, 379 unsigned int shift, unsigned int offset, 380 unsigned int count, unsigned int exceptional) 381 { 382 struct radix_tree_node *ret = NULL; 383 384 /* 385 * Preload code isn't irq safe and it doesn't make sense to use 386 * preloading during an interrupt anyway as all the allocations have 387 * to be atomic. So just do normal allocation when in interrupt. 388 */ 389 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 390 struct radix_tree_preload *rtp; 391 392 /* 393 * Even if the caller has preloaded, try to allocate from the 394 * cache first for the new node to get accounted to the memory 395 * cgroup. 396 */ 397 ret = kmem_cache_alloc(radix_tree_node_cachep, 398 gfp_mask | __GFP_NOWARN); 399 if (ret) 400 goto out; 401 402 /* 403 * Provided the caller has preloaded here, we will always 404 * succeed in getting a node here (and never reach 405 * kmem_cache_alloc) 406 */ 407 rtp = this_cpu_ptr(&radix_tree_preloads); 408 if (rtp->nr) { 409 ret = rtp->nodes; 410 rtp->nodes = ret->parent; 411 rtp->nr--; 412 } 413 /* 414 * Update the allocation stack trace as this is more useful 415 * for debugging. 416 */ 417 kmemleak_update_trace(ret); 418 goto out; 419 } 420 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 421 out: 422 BUG_ON(radix_tree_is_internal_node(ret)); 423 if (ret) { 424 ret->shift = shift; 425 ret->offset = offset; 426 ret->count = count; 427 ret->exceptional = exceptional; 428 ret->parent = parent; 429 ret->root = root; 430 } 431 return ret; 432 } 433 434 static void radix_tree_node_rcu_free(struct rcu_head *head) 435 { 436 struct radix_tree_node *node = 437 container_of(head, struct radix_tree_node, rcu_head); 438 439 /* 440 * Must only free zeroed nodes into the slab. We can be left with 441 * non-NULL entries by radix_tree_free_nodes, so clear the entries 442 * and tags here. 443 */ 444 memset(node->slots, 0, sizeof(node->slots)); 445 memset(node->tags, 0, sizeof(node->tags)); 446 INIT_LIST_HEAD(&node->private_list); 447 448 kmem_cache_free(radix_tree_node_cachep, node); 449 } 450 451 static inline void 452 radix_tree_node_free(struct radix_tree_node *node) 453 { 454 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 455 } 456 457 /* 458 * Load up this CPU's radix_tree_node buffer with sufficient objects to 459 * ensure that the addition of a single element in the tree cannot fail. On 460 * success, return zero, with preemption disabled. On error, return -ENOMEM 461 * with preemption not disabled. 462 * 463 * To make use of this facility, the radix tree must be initialised without 464 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 465 */ 466 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 467 { 468 struct radix_tree_preload *rtp; 469 struct radix_tree_node *node; 470 int ret = -ENOMEM; 471 472 /* 473 * Nodes preloaded by one cgroup can be be used by another cgroup, so 474 * they should never be accounted to any particular memory cgroup. 475 */ 476 gfp_mask &= ~__GFP_ACCOUNT; 477 478 preempt_disable(); 479 rtp = this_cpu_ptr(&radix_tree_preloads); 480 while (rtp->nr < nr) { 481 preempt_enable(); 482 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 483 if (node == NULL) 484 goto out; 485 preempt_disable(); 486 rtp = this_cpu_ptr(&radix_tree_preloads); 487 if (rtp->nr < nr) { 488 node->parent = rtp->nodes; 489 rtp->nodes = node; 490 rtp->nr++; 491 } else { 492 kmem_cache_free(radix_tree_node_cachep, node); 493 } 494 } 495 ret = 0; 496 out: 497 return ret; 498 } 499 500 /* 501 * Load up this CPU's radix_tree_node buffer with sufficient objects to 502 * ensure that the addition of a single element in the tree cannot fail. On 503 * success, return zero, with preemption disabled. On error, return -ENOMEM 504 * with preemption not disabled. 505 * 506 * To make use of this facility, the radix tree must be initialised without 507 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 508 */ 509 int radix_tree_preload(gfp_t gfp_mask) 510 { 511 /* Warn on non-sensical use... */ 512 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 513 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 514 } 515 EXPORT_SYMBOL(radix_tree_preload); 516 517 /* 518 * The same as above function, except we don't guarantee preloading happens. 519 * We do it, if we decide it helps. On success, return zero with preemption 520 * disabled. On error, return -ENOMEM with preemption not disabled. 521 */ 522 int radix_tree_maybe_preload(gfp_t gfp_mask) 523 { 524 if (gfpflags_allow_blocking(gfp_mask)) 525 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 526 /* Preloading doesn't help anything with this gfp mask, skip it */ 527 preempt_disable(); 528 return 0; 529 } 530 EXPORT_SYMBOL(radix_tree_maybe_preload); 531 532 #ifdef CONFIG_RADIX_TREE_MULTIORDER 533 /* 534 * Preload with enough objects to ensure that we can split a single entry 535 * of order @old_order into many entries of size @new_order 536 */ 537 int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, 538 gfp_t gfp_mask) 539 { 540 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); 541 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - 542 (new_order / RADIX_TREE_MAP_SHIFT); 543 unsigned nr = 0; 544 545 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 546 BUG_ON(new_order >= old_order); 547 548 while (layers--) 549 nr = nr * RADIX_TREE_MAP_SIZE + 1; 550 return __radix_tree_preload(gfp_mask, top * nr); 551 } 552 #endif 553 554 /* 555 * The same as function above, but preload number of nodes required to insert 556 * (1 << order) continuous naturally-aligned elements. 557 */ 558 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) 559 { 560 unsigned long nr_subtrees; 561 int nr_nodes, subtree_height; 562 563 /* Preloading doesn't help anything with this gfp mask, skip it */ 564 if (!gfpflags_allow_blocking(gfp_mask)) { 565 preempt_disable(); 566 return 0; 567 } 568 569 /* 570 * Calculate number and height of fully populated subtrees it takes to 571 * store (1 << order) elements. 572 */ 573 nr_subtrees = 1 << order; 574 for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; 575 subtree_height++) 576 nr_subtrees >>= RADIX_TREE_MAP_SHIFT; 577 578 /* 579 * The worst case is zero height tree with a single item at index 0 and 580 * then inserting items starting at ULONG_MAX - (1 << order). 581 * 582 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to 583 * 0-index item. 584 */ 585 nr_nodes = RADIX_TREE_MAX_PATH; 586 587 /* Plus branch to fully populated subtrees. */ 588 nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; 589 590 /* Root node is shared. */ 591 nr_nodes--; 592 593 /* Plus nodes required to build subtrees. */ 594 nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; 595 596 return __radix_tree_preload(gfp_mask, nr_nodes); 597 } 598 599 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 600 struct radix_tree_node **nodep, unsigned long *maxindex) 601 { 602 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 603 604 *nodep = node; 605 606 if (likely(radix_tree_is_internal_node(node))) { 607 node = entry_to_node(node); 608 *maxindex = node_maxindex(node); 609 return node->shift + RADIX_TREE_MAP_SHIFT; 610 } 611 612 *maxindex = 0; 613 return 0; 614 } 615 616 /* 617 * Extend a radix tree so it can store key @index. 618 */ 619 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 620 unsigned long index, unsigned int shift) 621 { 622 void *entry; 623 unsigned int maxshift; 624 int tag; 625 626 /* Figure out what the shift should be. */ 627 maxshift = shift; 628 while (index > shift_maxindex(maxshift)) 629 maxshift += RADIX_TREE_MAP_SHIFT; 630 631 entry = rcu_dereference_raw(root->rnode); 632 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 633 goto out; 634 635 do { 636 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 637 root, shift, 0, 1, 0); 638 if (!node) 639 return -ENOMEM; 640 641 if (is_idr(root)) { 642 all_tag_set(node, IDR_FREE); 643 if (!root_tag_get(root, IDR_FREE)) { 644 tag_clear(node, IDR_FREE, 0); 645 root_tag_set(root, IDR_FREE); 646 } 647 } else { 648 /* Propagate the aggregated tag info to the new child */ 649 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 650 if (root_tag_get(root, tag)) 651 tag_set(node, tag, 0); 652 } 653 } 654 655 BUG_ON(shift > BITS_PER_LONG); 656 if (radix_tree_is_internal_node(entry)) { 657 entry_to_node(entry)->parent = node; 658 } else if (radix_tree_exceptional_entry(entry)) { 659 /* Moving an exceptional root->rnode to a node */ 660 node->exceptional = 1; 661 } 662 /* 663 * entry was already in the radix tree, so we do not need 664 * rcu_assign_pointer here 665 */ 666 node->slots[0] = (void __rcu *)entry; 667 entry = node_to_entry(node); 668 rcu_assign_pointer(root->rnode, entry); 669 shift += RADIX_TREE_MAP_SHIFT; 670 } while (shift <= maxshift); 671 out: 672 return maxshift + RADIX_TREE_MAP_SHIFT; 673 } 674 675 /** 676 * radix_tree_shrink - shrink radix tree to minimum height 677 * @root radix tree root 678 */ 679 static inline bool radix_tree_shrink(struct radix_tree_root *root, 680 radix_tree_update_node_t update_node) 681 { 682 bool shrunk = false; 683 684 for (;;) { 685 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 686 struct radix_tree_node *child; 687 688 if (!radix_tree_is_internal_node(node)) 689 break; 690 node = entry_to_node(node); 691 692 /* 693 * The candidate node has more than one child, or its child 694 * is not at the leftmost slot, or the child is a multiorder 695 * entry, we cannot shrink. 696 */ 697 if (node->count != 1) 698 break; 699 child = rcu_dereference_raw(node->slots[0]); 700 if (!child) 701 break; 702 if (!radix_tree_is_internal_node(child) && node->shift) 703 break; 704 705 if (radix_tree_is_internal_node(child)) 706 entry_to_node(child)->parent = NULL; 707 708 /* 709 * We don't need rcu_assign_pointer(), since we are simply 710 * moving the node from one part of the tree to another: if it 711 * was safe to dereference the old pointer to it 712 * (node->slots[0]), it will be safe to dereference the new 713 * one (root->rnode) as far as dependent read barriers go. 714 */ 715 root->rnode = (void __rcu *)child; 716 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 717 root_tag_clear(root, IDR_FREE); 718 719 /* 720 * We have a dilemma here. The node's slot[0] must not be 721 * NULLed in case there are concurrent lookups expecting to 722 * find the item. However if this was a bottom-level node, 723 * then it may be subject to the slot pointer being visible 724 * to callers dereferencing it. If item corresponding to 725 * slot[0] is subsequently deleted, these callers would expect 726 * their slot to become empty sooner or later. 727 * 728 * For example, lockless pagecache will look up a slot, deref 729 * the page pointer, and if the page has 0 refcount it means it 730 * was concurrently deleted from pagecache so try the deref 731 * again. Fortunately there is already a requirement for logic 732 * to retry the entire slot lookup -- the indirect pointer 733 * problem (replacing direct root node with an indirect pointer 734 * also results in a stale slot). So tag the slot as indirect 735 * to force callers to retry. 736 */ 737 node->count = 0; 738 if (!radix_tree_is_internal_node(child)) { 739 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 740 if (update_node) 741 update_node(node); 742 } 743 744 WARN_ON_ONCE(!list_empty(&node->private_list)); 745 radix_tree_node_free(node); 746 shrunk = true; 747 } 748 749 return shrunk; 750 } 751 752 static bool delete_node(struct radix_tree_root *root, 753 struct radix_tree_node *node, 754 radix_tree_update_node_t update_node) 755 { 756 bool deleted = false; 757 758 do { 759 struct radix_tree_node *parent; 760 761 if (node->count) { 762 if (node_to_entry(node) == 763 rcu_dereference_raw(root->rnode)) 764 deleted |= radix_tree_shrink(root, 765 update_node); 766 return deleted; 767 } 768 769 parent = node->parent; 770 if (parent) { 771 parent->slots[node->offset] = NULL; 772 parent->count--; 773 } else { 774 /* 775 * Shouldn't the tags already have all been cleared 776 * by the caller? 777 */ 778 if (!is_idr(root)) 779 root_tag_clear_all(root); 780 root->rnode = NULL; 781 } 782 783 WARN_ON_ONCE(!list_empty(&node->private_list)); 784 radix_tree_node_free(node); 785 deleted = true; 786 787 node = parent; 788 } while (node); 789 790 return deleted; 791 } 792 793 /** 794 * __radix_tree_create - create a slot in a radix tree 795 * @root: radix tree root 796 * @index: index key 797 * @order: index occupies 2^order aligned slots 798 * @nodep: returns node 799 * @slotp: returns slot 800 * 801 * Create, if necessary, and return the node and slot for an item 802 * at position @index in the radix tree @root. 803 * 804 * Until there is more than one item in the tree, no nodes are 805 * allocated and @root->rnode is used as a direct slot instead of 806 * pointing to a node, in which case *@nodep will be NULL. 807 * 808 * Returns -ENOMEM, or 0 for success. 809 */ 810 int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 811 unsigned order, struct radix_tree_node **nodep, 812 void __rcu ***slotp) 813 { 814 struct radix_tree_node *node = NULL, *child; 815 void __rcu **slot = (void __rcu **)&root->rnode; 816 unsigned long maxindex; 817 unsigned int shift, offset = 0; 818 unsigned long max = index | ((1UL << order) - 1); 819 gfp_t gfp = root_gfp_mask(root); 820 821 shift = radix_tree_load_root(root, &child, &maxindex); 822 823 /* Make sure the tree is high enough. */ 824 if (order > 0 && max == ((1UL << order) - 1)) 825 max++; 826 if (max > maxindex) { 827 int error = radix_tree_extend(root, gfp, max, shift); 828 if (error < 0) 829 return error; 830 shift = error; 831 child = rcu_dereference_raw(root->rnode); 832 } 833 834 while (shift > order) { 835 shift -= RADIX_TREE_MAP_SHIFT; 836 if (child == NULL) { 837 /* Have to add a child node. */ 838 child = radix_tree_node_alloc(gfp, node, root, shift, 839 offset, 0, 0); 840 if (!child) 841 return -ENOMEM; 842 rcu_assign_pointer(*slot, node_to_entry(child)); 843 if (node) 844 node->count++; 845 } else if (!radix_tree_is_internal_node(child)) 846 break; 847 848 /* Go a level down */ 849 node = entry_to_node(child); 850 offset = radix_tree_descend(node, &child, index); 851 slot = &node->slots[offset]; 852 } 853 854 if (nodep) 855 *nodep = node; 856 if (slotp) 857 *slotp = slot; 858 return 0; 859 } 860 861 /* 862 * Free any nodes below this node. The tree is presumed to not need 863 * shrinking, and any user data in the tree is presumed to not need a 864 * destructor called on it. If we need to add a destructor, we can 865 * add that functionality later. Note that we may not clear tags or 866 * slots from the tree as an RCU walker may still have a pointer into 867 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 868 * but we'll still have to clear those in rcu_free. 869 */ 870 static void radix_tree_free_nodes(struct radix_tree_node *node) 871 { 872 unsigned offset = 0; 873 struct radix_tree_node *child = entry_to_node(node); 874 875 for (;;) { 876 void *entry = rcu_dereference_raw(child->slots[offset]); 877 if (radix_tree_is_internal_node(entry) && 878 !is_sibling_entry(child, entry)) { 879 child = entry_to_node(entry); 880 offset = 0; 881 continue; 882 } 883 offset++; 884 while (offset == RADIX_TREE_MAP_SIZE) { 885 struct radix_tree_node *old = child; 886 offset = child->offset + 1; 887 child = child->parent; 888 WARN_ON_ONCE(!list_empty(&old->private_list)); 889 radix_tree_node_free(old); 890 if (old == entry_to_node(node)) 891 return; 892 } 893 } 894 } 895 896 #ifdef CONFIG_RADIX_TREE_MULTIORDER 897 static inline int insert_entries(struct radix_tree_node *node, 898 void __rcu **slot, void *item, unsigned order, bool replace) 899 { 900 struct radix_tree_node *child; 901 unsigned i, n, tag, offset, tags = 0; 902 903 if (node) { 904 if (order > node->shift) 905 n = 1 << (order - node->shift); 906 else 907 n = 1; 908 offset = get_slot_offset(node, slot); 909 } else { 910 n = 1; 911 offset = 0; 912 } 913 914 if (n > 1) { 915 offset = offset & ~(n - 1); 916 slot = &node->slots[offset]; 917 } 918 child = node_to_entry(slot); 919 920 for (i = 0; i < n; i++) { 921 if (slot[i]) { 922 if (replace) { 923 node->count--; 924 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 925 if (tag_get(node, tag, offset + i)) 926 tags |= 1 << tag; 927 } else 928 return -EEXIST; 929 } 930 } 931 932 for (i = 0; i < n; i++) { 933 struct radix_tree_node *old = rcu_dereference_raw(slot[i]); 934 if (i) { 935 rcu_assign_pointer(slot[i], child); 936 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 937 if (tags & (1 << tag)) 938 tag_clear(node, tag, offset + i); 939 } else { 940 rcu_assign_pointer(slot[i], item); 941 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 942 if (tags & (1 << tag)) 943 tag_set(node, tag, offset); 944 } 945 if (radix_tree_is_internal_node(old) && 946 !is_sibling_entry(node, old) && 947 (old != RADIX_TREE_RETRY)) 948 radix_tree_free_nodes(old); 949 if (radix_tree_exceptional_entry(old)) 950 node->exceptional--; 951 } 952 if (node) { 953 node->count += n; 954 if (radix_tree_exceptional_entry(item)) 955 node->exceptional += n; 956 } 957 return n; 958 } 959 #else 960 static inline int insert_entries(struct radix_tree_node *node, 961 void __rcu **slot, void *item, unsigned order, bool replace) 962 { 963 if (*slot) 964 return -EEXIST; 965 rcu_assign_pointer(*slot, item); 966 if (node) { 967 node->count++; 968 if (radix_tree_exceptional_entry(item)) 969 node->exceptional++; 970 } 971 return 1; 972 } 973 #endif 974 975 /** 976 * __radix_tree_insert - insert into a radix tree 977 * @root: radix tree root 978 * @index: index key 979 * @order: key covers the 2^order indices around index 980 * @item: item to insert 981 * 982 * Insert an item into the radix tree at position @index. 983 */ 984 int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 985 unsigned order, void *item) 986 { 987 struct radix_tree_node *node; 988 void __rcu **slot; 989 int error; 990 991 BUG_ON(radix_tree_is_internal_node(item)); 992 993 error = __radix_tree_create(root, index, order, &node, &slot); 994 if (error) 995 return error; 996 997 error = insert_entries(node, slot, item, order, false); 998 if (error < 0) 999 return error; 1000 1001 if (node) { 1002 unsigned offset = get_slot_offset(node, slot); 1003 BUG_ON(tag_get(node, 0, offset)); 1004 BUG_ON(tag_get(node, 1, offset)); 1005 BUG_ON(tag_get(node, 2, offset)); 1006 } else { 1007 BUG_ON(root_tags_get(root)); 1008 } 1009 1010 return 0; 1011 } 1012 EXPORT_SYMBOL(__radix_tree_insert); 1013 1014 /** 1015 * __radix_tree_lookup - lookup an item in a radix tree 1016 * @root: radix tree root 1017 * @index: index key 1018 * @nodep: returns node 1019 * @slotp: returns slot 1020 * 1021 * Lookup and return the item at position @index in the radix 1022 * tree @root. 1023 * 1024 * Until there is more than one item in the tree, no nodes are 1025 * allocated and @root->rnode is used as a direct slot instead of 1026 * pointing to a node, in which case *@nodep will be NULL. 1027 */ 1028 void *__radix_tree_lookup(const struct radix_tree_root *root, 1029 unsigned long index, struct radix_tree_node **nodep, 1030 void __rcu ***slotp) 1031 { 1032 struct radix_tree_node *node, *parent; 1033 unsigned long maxindex; 1034 void __rcu **slot; 1035 1036 restart: 1037 parent = NULL; 1038 slot = (void __rcu **)&root->rnode; 1039 radix_tree_load_root(root, &node, &maxindex); 1040 if (index > maxindex) 1041 return NULL; 1042 1043 while (radix_tree_is_internal_node(node)) { 1044 unsigned offset; 1045 1046 if (node == RADIX_TREE_RETRY) 1047 goto restart; 1048 parent = entry_to_node(node); 1049 offset = radix_tree_descend(parent, &node, index); 1050 slot = parent->slots + offset; 1051 } 1052 1053 if (nodep) 1054 *nodep = parent; 1055 if (slotp) 1056 *slotp = slot; 1057 return node; 1058 } 1059 1060 /** 1061 * radix_tree_lookup_slot - lookup a slot in a radix tree 1062 * @root: radix tree root 1063 * @index: index key 1064 * 1065 * Returns: the slot corresponding to the position @index in the 1066 * radix tree @root. This is useful for update-if-exists operations. 1067 * 1068 * This function can be called under rcu_read_lock iff the slot is not 1069 * modified by radix_tree_replace_slot, otherwise it must be called 1070 * exclusive from other writers. Any dereference of the slot must be done 1071 * using radix_tree_deref_slot. 1072 */ 1073 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 1074 unsigned long index) 1075 { 1076 void __rcu **slot; 1077 1078 if (!__radix_tree_lookup(root, index, NULL, &slot)) 1079 return NULL; 1080 return slot; 1081 } 1082 EXPORT_SYMBOL(radix_tree_lookup_slot); 1083 1084 /** 1085 * radix_tree_lookup - perform lookup operation on a radix tree 1086 * @root: radix tree root 1087 * @index: index key 1088 * 1089 * Lookup the item at the position @index in the radix tree @root. 1090 * 1091 * This function can be called under rcu_read_lock, however the caller 1092 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 1093 * them safely). No RCU barriers are required to access or modify the 1094 * returned item, however. 1095 */ 1096 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 1097 { 1098 return __radix_tree_lookup(root, index, NULL, NULL); 1099 } 1100 EXPORT_SYMBOL(radix_tree_lookup); 1101 1102 static inline void replace_sibling_entries(struct radix_tree_node *node, 1103 void __rcu **slot, int count, int exceptional) 1104 { 1105 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1106 void *ptr = node_to_entry(slot); 1107 unsigned offset = get_slot_offset(node, slot) + 1; 1108 1109 while (offset < RADIX_TREE_MAP_SIZE) { 1110 if (rcu_dereference_raw(node->slots[offset]) != ptr) 1111 break; 1112 if (count < 0) { 1113 node->slots[offset] = NULL; 1114 node->count--; 1115 } 1116 node->exceptional += exceptional; 1117 offset++; 1118 } 1119 #endif 1120 } 1121 1122 static void replace_slot(void __rcu **slot, void *item, 1123 struct radix_tree_node *node, int count, int exceptional) 1124 { 1125 if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) 1126 return; 1127 1128 if (node && (count || exceptional)) { 1129 node->count += count; 1130 node->exceptional += exceptional; 1131 replace_sibling_entries(node, slot, count, exceptional); 1132 } 1133 1134 rcu_assign_pointer(*slot, item); 1135 } 1136 1137 static bool node_tag_get(const struct radix_tree_root *root, 1138 const struct radix_tree_node *node, 1139 unsigned int tag, unsigned int offset) 1140 { 1141 if (node) 1142 return tag_get(node, tag, offset); 1143 return root_tag_get(root, tag); 1144 } 1145 1146 /* 1147 * IDR users want to be able to store NULL in the tree, so if the slot isn't 1148 * free, don't adjust the count, even if it's transitioning between NULL and 1149 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 1150 * have empty bits, but it only stores NULL in slots when they're being 1151 * deleted. 1152 */ 1153 static int calculate_count(struct radix_tree_root *root, 1154 struct radix_tree_node *node, void __rcu **slot, 1155 void *item, void *old) 1156 { 1157 if (is_idr(root)) { 1158 unsigned offset = get_slot_offset(node, slot); 1159 bool free = node_tag_get(root, node, IDR_FREE, offset); 1160 if (!free) 1161 return 0; 1162 if (!old) 1163 return 1; 1164 } 1165 return !!item - !!old; 1166 } 1167 1168 /** 1169 * __radix_tree_replace - replace item in a slot 1170 * @root: radix tree root 1171 * @node: pointer to tree node 1172 * @slot: pointer to slot in @node 1173 * @item: new item to store in the slot. 1174 * @update_node: callback for changing leaf nodes 1175 * 1176 * For use with __radix_tree_lookup(). Caller must hold tree write locked 1177 * across slot lookup and replacement. 1178 */ 1179 void __radix_tree_replace(struct radix_tree_root *root, 1180 struct radix_tree_node *node, 1181 void __rcu **slot, void *item, 1182 radix_tree_update_node_t update_node) 1183 { 1184 void *old = rcu_dereference_raw(*slot); 1185 int exceptional = !!radix_tree_exceptional_entry(item) - 1186 !!radix_tree_exceptional_entry(old); 1187 int count = calculate_count(root, node, slot, item, old); 1188 1189 /* 1190 * This function supports replacing exceptional entries and 1191 * deleting entries, but that needs accounting against the 1192 * node unless the slot is root->rnode. 1193 */ 1194 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && 1195 (count || exceptional)); 1196 replace_slot(slot, item, node, count, exceptional); 1197 1198 if (!node) 1199 return; 1200 1201 if (update_node) 1202 update_node(node); 1203 1204 delete_node(root, node, update_node); 1205 } 1206 1207 /** 1208 * radix_tree_replace_slot - replace item in a slot 1209 * @root: radix tree root 1210 * @slot: pointer to slot 1211 * @item: new item to store in the slot. 1212 * 1213 * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), 1214 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 1215 * across slot lookup and replacement. 1216 * 1217 * NOTE: This cannot be used to switch between non-entries (empty slots), 1218 * regular entries, and exceptional entries, as that requires accounting 1219 * inside the radix tree node. When switching from one type of entry or 1220 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 1221 * radix_tree_iter_replace(). 1222 */ 1223 void radix_tree_replace_slot(struct radix_tree_root *root, 1224 void __rcu **slot, void *item) 1225 { 1226 __radix_tree_replace(root, NULL, slot, item, NULL); 1227 } 1228 EXPORT_SYMBOL(radix_tree_replace_slot); 1229 1230 /** 1231 * radix_tree_iter_replace - replace item in a slot 1232 * @root: radix tree root 1233 * @slot: pointer to slot 1234 * @item: new item to store in the slot. 1235 * 1236 * For use with radix_tree_split() and radix_tree_for_each_slot(). 1237 * Caller must hold tree write locked across split and replacement. 1238 */ 1239 void radix_tree_iter_replace(struct radix_tree_root *root, 1240 const struct radix_tree_iter *iter, 1241 void __rcu **slot, void *item) 1242 { 1243 __radix_tree_replace(root, iter->node, slot, item, NULL); 1244 } 1245 1246 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1247 /** 1248 * radix_tree_join - replace multiple entries with one multiorder entry 1249 * @root: radix tree root 1250 * @index: an index inside the new entry 1251 * @order: order of the new entry 1252 * @item: new entry 1253 * 1254 * Call this function to replace several entries with one larger entry. 1255 * The existing entries are presumed to not need freeing as a result of 1256 * this call. 1257 * 1258 * The replacement entry will have all the tags set on it that were set 1259 * on any of the entries it is replacing. 1260 */ 1261 int radix_tree_join(struct radix_tree_root *root, unsigned long index, 1262 unsigned order, void *item) 1263 { 1264 struct radix_tree_node *node; 1265 void __rcu **slot; 1266 int error; 1267 1268 BUG_ON(radix_tree_is_internal_node(item)); 1269 1270 error = __radix_tree_create(root, index, order, &node, &slot); 1271 if (!error) 1272 error = insert_entries(node, slot, item, order, true); 1273 if (error > 0) 1274 error = 0; 1275 1276 return error; 1277 } 1278 1279 /** 1280 * radix_tree_split - Split an entry into smaller entries 1281 * @root: radix tree root 1282 * @index: An index within the large entry 1283 * @order: Order of new entries 1284 * 1285 * Call this function as the first step in replacing a multiorder entry 1286 * with several entries of lower order. After this function returns, 1287 * loop over the relevant portion of the tree using radix_tree_for_each_slot() 1288 * and call radix_tree_iter_replace() to set up each new entry. 1289 * 1290 * The tags from this entry are replicated to all the new entries. 1291 * 1292 * The radix tree should be locked against modification during the entire 1293 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which 1294 * should prompt RCU walkers to restart the lookup from the root. 1295 */ 1296 int radix_tree_split(struct radix_tree_root *root, unsigned long index, 1297 unsigned order) 1298 { 1299 struct radix_tree_node *parent, *node, *child; 1300 void __rcu **slot; 1301 unsigned int offset, end; 1302 unsigned n, tag, tags = 0; 1303 gfp_t gfp = root_gfp_mask(root); 1304 1305 if (!__radix_tree_lookup(root, index, &parent, &slot)) 1306 return -ENOENT; 1307 if (!parent) 1308 return -ENOENT; 1309 1310 offset = get_slot_offset(parent, slot); 1311 1312 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1313 if (tag_get(parent, tag, offset)) 1314 tags |= 1 << tag; 1315 1316 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { 1317 if (!is_sibling_entry(parent, 1318 rcu_dereference_raw(parent->slots[end]))) 1319 break; 1320 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1321 if (tags & (1 << tag)) 1322 tag_set(parent, tag, end); 1323 /* rcu_assign_pointer ensures tags are set before RETRY */ 1324 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); 1325 } 1326 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); 1327 parent->exceptional -= (end - offset); 1328 1329 if (order == parent->shift) 1330 return 0; 1331 if (order > parent->shift) { 1332 while (offset < end) 1333 offset += insert_entries(parent, &parent->slots[offset], 1334 RADIX_TREE_RETRY, order, true); 1335 return 0; 1336 } 1337 1338 node = parent; 1339 1340 for (;;) { 1341 if (node->shift > order) { 1342 child = radix_tree_node_alloc(gfp, node, root, 1343 node->shift - RADIX_TREE_MAP_SHIFT, 1344 offset, 0, 0); 1345 if (!child) 1346 goto nomem; 1347 if (node != parent) { 1348 node->count++; 1349 rcu_assign_pointer(node->slots[offset], 1350 node_to_entry(child)); 1351 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1352 if (tags & (1 << tag)) 1353 tag_set(node, tag, offset); 1354 } 1355 1356 node = child; 1357 offset = 0; 1358 continue; 1359 } 1360 1361 n = insert_entries(node, &node->slots[offset], 1362 RADIX_TREE_RETRY, order, false); 1363 BUG_ON(n > RADIX_TREE_MAP_SIZE); 1364 1365 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1366 if (tags & (1 << tag)) 1367 tag_set(node, tag, offset); 1368 offset += n; 1369 1370 while (offset == RADIX_TREE_MAP_SIZE) { 1371 if (node == parent) 1372 break; 1373 offset = node->offset; 1374 child = node; 1375 node = node->parent; 1376 rcu_assign_pointer(node->slots[offset], 1377 node_to_entry(child)); 1378 offset++; 1379 } 1380 if ((node == parent) && (offset == end)) 1381 return 0; 1382 } 1383 1384 nomem: 1385 /* Shouldn't happen; did user forget to preload? */ 1386 /* TODO: free all the allocated nodes */ 1387 WARN_ON(1); 1388 return -ENOMEM; 1389 } 1390 #endif 1391 1392 static void node_tag_set(struct radix_tree_root *root, 1393 struct radix_tree_node *node, 1394 unsigned int tag, unsigned int offset) 1395 { 1396 while (node) { 1397 if (tag_get(node, tag, offset)) 1398 return; 1399 tag_set(node, tag, offset); 1400 offset = node->offset; 1401 node = node->parent; 1402 } 1403 1404 if (!root_tag_get(root, tag)) 1405 root_tag_set(root, tag); 1406 } 1407 1408 /** 1409 * radix_tree_tag_set - set a tag on a radix tree node 1410 * @root: radix tree root 1411 * @index: index key 1412 * @tag: tag index 1413 * 1414 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 1415 * corresponding to @index in the radix tree. From 1416 * the root all the way down to the leaf node. 1417 * 1418 * Returns the address of the tagged item. Setting a tag on a not-present 1419 * item is a bug. 1420 */ 1421 void *radix_tree_tag_set(struct radix_tree_root *root, 1422 unsigned long index, unsigned int tag) 1423 { 1424 struct radix_tree_node *node, *parent; 1425 unsigned long maxindex; 1426 1427 radix_tree_load_root(root, &node, &maxindex); 1428 BUG_ON(index > maxindex); 1429 1430 while (radix_tree_is_internal_node(node)) { 1431 unsigned offset; 1432 1433 parent = entry_to_node(node); 1434 offset = radix_tree_descend(parent, &node, index); 1435 BUG_ON(!node); 1436 1437 if (!tag_get(parent, tag, offset)) 1438 tag_set(parent, tag, offset); 1439 } 1440 1441 /* set the root's tag bit */ 1442 if (!root_tag_get(root, tag)) 1443 root_tag_set(root, tag); 1444 1445 return node; 1446 } 1447 EXPORT_SYMBOL(radix_tree_tag_set); 1448 1449 /** 1450 * radix_tree_iter_tag_set - set a tag on the current iterator entry 1451 * @root: radix tree root 1452 * @iter: iterator state 1453 * @tag: tag to set 1454 */ 1455 void radix_tree_iter_tag_set(struct radix_tree_root *root, 1456 const struct radix_tree_iter *iter, unsigned int tag) 1457 { 1458 node_tag_set(root, iter->node, tag, iter_offset(iter)); 1459 } 1460 1461 static void node_tag_clear(struct radix_tree_root *root, 1462 struct radix_tree_node *node, 1463 unsigned int tag, unsigned int offset) 1464 { 1465 while (node) { 1466 if (!tag_get(node, tag, offset)) 1467 return; 1468 tag_clear(node, tag, offset); 1469 if (any_tag_set(node, tag)) 1470 return; 1471 1472 offset = node->offset; 1473 node = node->parent; 1474 } 1475 1476 /* clear the root's tag bit */ 1477 if (root_tag_get(root, tag)) 1478 root_tag_clear(root, tag); 1479 } 1480 1481 /** 1482 * radix_tree_tag_clear - clear a tag on a radix tree node 1483 * @root: radix tree root 1484 * @index: index key 1485 * @tag: tag index 1486 * 1487 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1488 * corresponding to @index in the radix tree. If this causes 1489 * the leaf node to have no tags set then clear the tag in the 1490 * next-to-leaf node, etc. 1491 * 1492 * Returns the address of the tagged item on success, else NULL. ie: 1493 * has the same return value and semantics as radix_tree_lookup(). 1494 */ 1495 void *radix_tree_tag_clear(struct radix_tree_root *root, 1496 unsigned long index, unsigned int tag) 1497 { 1498 struct radix_tree_node *node, *parent; 1499 unsigned long maxindex; 1500 int uninitialized_var(offset); 1501 1502 radix_tree_load_root(root, &node, &maxindex); 1503 if (index > maxindex) 1504 return NULL; 1505 1506 parent = NULL; 1507 1508 while (radix_tree_is_internal_node(node)) { 1509 parent = entry_to_node(node); 1510 offset = radix_tree_descend(parent, &node, index); 1511 } 1512 1513 if (node) 1514 node_tag_clear(root, parent, tag, offset); 1515 1516 return node; 1517 } 1518 EXPORT_SYMBOL(radix_tree_tag_clear); 1519 1520 /** 1521 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1522 * @root: radix tree root 1523 * @iter: iterator state 1524 * @tag: tag to clear 1525 */ 1526 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1527 const struct radix_tree_iter *iter, unsigned int tag) 1528 { 1529 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1530 } 1531 1532 /** 1533 * radix_tree_tag_get - get a tag on a radix tree node 1534 * @root: radix tree root 1535 * @index: index key 1536 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1537 * 1538 * Return values: 1539 * 1540 * 0: tag not present or not set 1541 * 1: tag set 1542 * 1543 * Note that the return value of this function may not be relied on, even if 1544 * the RCU lock is held, unless tag modification and node deletion are excluded 1545 * from concurrency. 1546 */ 1547 int radix_tree_tag_get(const struct radix_tree_root *root, 1548 unsigned long index, unsigned int tag) 1549 { 1550 struct radix_tree_node *node, *parent; 1551 unsigned long maxindex; 1552 1553 if (!root_tag_get(root, tag)) 1554 return 0; 1555 1556 radix_tree_load_root(root, &node, &maxindex); 1557 if (index > maxindex) 1558 return 0; 1559 1560 while (radix_tree_is_internal_node(node)) { 1561 unsigned offset; 1562 1563 parent = entry_to_node(node); 1564 offset = radix_tree_descend(parent, &node, index); 1565 1566 if (!tag_get(parent, tag, offset)) 1567 return 0; 1568 if (node == RADIX_TREE_RETRY) 1569 break; 1570 } 1571 1572 return 1; 1573 } 1574 EXPORT_SYMBOL(radix_tree_tag_get); 1575 1576 static inline void __set_iter_shift(struct radix_tree_iter *iter, 1577 unsigned int shift) 1578 { 1579 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1580 iter->shift = shift; 1581 #endif 1582 } 1583 1584 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1585 static void set_iter_tags(struct radix_tree_iter *iter, 1586 struct radix_tree_node *node, unsigned offset, 1587 unsigned tag) 1588 { 1589 unsigned tag_long = offset / BITS_PER_LONG; 1590 unsigned tag_bit = offset % BITS_PER_LONG; 1591 1592 if (!node) { 1593 iter->tags = 1; 1594 return; 1595 } 1596 1597 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1598 1599 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1600 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1601 /* Pick tags from next element */ 1602 if (tag_bit) 1603 iter->tags |= node->tags[tag][tag_long + 1] << 1604 (BITS_PER_LONG - tag_bit); 1605 /* Clip chunk size, here only BITS_PER_LONG tags */ 1606 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1607 } 1608 } 1609 1610 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1611 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1612 void __rcu **slot, struct radix_tree_iter *iter) 1613 { 1614 void *sib = node_to_entry(slot - 1); 1615 1616 while (iter->index < iter->next_index) { 1617 *nodep = rcu_dereference_raw(*slot); 1618 if (*nodep && *nodep != sib) 1619 return slot; 1620 slot++; 1621 iter->index = __radix_tree_iter_add(iter, 1); 1622 iter->tags >>= 1; 1623 } 1624 1625 *nodep = NULL; 1626 return NULL; 1627 } 1628 1629 void __rcu **__radix_tree_next_slot(void __rcu **slot, 1630 struct radix_tree_iter *iter, unsigned flags) 1631 { 1632 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1633 struct radix_tree_node *node = rcu_dereference_raw(*slot); 1634 1635 slot = skip_siblings(&node, slot, iter); 1636 1637 while (radix_tree_is_internal_node(node)) { 1638 unsigned offset; 1639 unsigned long next_index; 1640 1641 if (node == RADIX_TREE_RETRY) 1642 return slot; 1643 node = entry_to_node(node); 1644 iter->node = node; 1645 iter->shift = node->shift; 1646 1647 if (flags & RADIX_TREE_ITER_TAGGED) { 1648 offset = radix_tree_find_next_bit(node, tag, 0); 1649 if (offset == RADIX_TREE_MAP_SIZE) 1650 return NULL; 1651 slot = &node->slots[offset]; 1652 iter->index = __radix_tree_iter_add(iter, offset); 1653 set_iter_tags(iter, node, offset, tag); 1654 node = rcu_dereference_raw(*slot); 1655 } else { 1656 offset = 0; 1657 slot = &node->slots[0]; 1658 for (;;) { 1659 node = rcu_dereference_raw(*slot); 1660 if (node) 1661 break; 1662 slot++; 1663 offset++; 1664 if (offset == RADIX_TREE_MAP_SIZE) 1665 return NULL; 1666 } 1667 iter->index = __radix_tree_iter_add(iter, offset); 1668 } 1669 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) 1670 goto none; 1671 next_index = (iter->index | shift_maxindex(iter->shift)) + 1; 1672 if (next_index < iter->next_index) 1673 iter->next_index = next_index; 1674 } 1675 1676 return slot; 1677 none: 1678 iter->next_index = 0; 1679 return NULL; 1680 } 1681 EXPORT_SYMBOL(__radix_tree_next_slot); 1682 #else 1683 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1684 void __rcu **slot, struct radix_tree_iter *iter) 1685 { 1686 return slot; 1687 } 1688 #endif 1689 1690 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1691 struct radix_tree_iter *iter) 1692 { 1693 struct radix_tree_node *node; 1694 1695 slot++; 1696 iter->index = __radix_tree_iter_add(iter, 1); 1697 skip_siblings(&node, slot, iter); 1698 iter->next_index = iter->index; 1699 iter->tags = 0; 1700 return NULL; 1701 } 1702 EXPORT_SYMBOL(radix_tree_iter_resume); 1703 1704 /** 1705 * radix_tree_next_chunk - find next chunk of slots for iteration 1706 * 1707 * @root: radix tree root 1708 * @iter: iterator state 1709 * @flags: RADIX_TREE_ITER_* flags and tag index 1710 * Returns: pointer to chunk first slot, or NULL if iteration is over 1711 */ 1712 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1713 struct radix_tree_iter *iter, unsigned flags) 1714 { 1715 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1716 struct radix_tree_node *node, *child; 1717 unsigned long index, offset, maxindex; 1718 1719 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1720 return NULL; 1721 1722 /* 1723 * Catch next_index overflow after ~0UL. iter->index never overflows 1724 * during iterating; it can be zero only at the beginning. 1725 * And we cannot overflow iter->next_index in a single step, 1726 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1727 * 1728 * This condition also used by radix_tree_next_slot() to stop 1729 * contiguous iterating, and forbid switching to the next chunk. 1730 */ 1731 index = iter->next_index; 1732 if (!index && iter->index) 1733 return NULL; 1734 1735 restart: 1736 radix_tree_load_root(root, &child, &maxindex); 1737 if (index > maxindex) 1738 return NULL; 1739 if (!child) 1740 return NULL; 1741 1742 if (!radix_tree_is_internal_node(child)) { 1743 /* Single-slot tree */ 1744 iter->index = index; 1745 iter->next_index = maxindex + 1; 1746 iter->tags = 1; 1747 iter->node = NULL; 1748 __set_iter_shift(iter, 0); 1749 return (void __rcu **)&root->rnode; 1750 } 1751 1752 do { 1753 node = entry_to_node(child); 1754 offset = radix_tree_descend(node, &child, index); 1755 1756 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1757 !tag_get(node, tag, offset) : !child) { 1758 /* Hole detected */ 1759 if (flags & RADIX_TREE_ITER_CONTIG) 1760 return NULL; 1761 1762 if (flags & RADIX_TREE_ITER_TAGGED) 1763 offset = radix_tree_find_next_bit(node, tag, 1764 offset + 1); 1765 else 1766 while (++offset < RADIX_TREE_MAP_SIZE) { 1767 void *slot = rcu_dereference_raw( 1768 node->slots[offset]); 1769 if (is_sibling_entry(node, slot)) 1770 continue; 1771 if (slot) 1772 break; 1773 } 1774 index &= ~node_maxindex(node); 1775 index += offset << node->shift; 1776 /* Overflow after ~0UL */ 1777 if (!index) 1778 return NULL; 1779 if (offset == RADIX_TREE_MAP_SIZE) 1780 goto restart; 1781 child = rcu_dereference_raw(node->slots[offset]); 1782 } 1783 1784 if (!child) 1785 goto restart; 1786 if (child == RADIX_TREE_RETRY) 1787 break; 1788 } while (radix_tree_is_internal_node(child)); 1789 1790 /* Update the iterator state */ 1791 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1792 iter->next_index = (index | node_maxindex(node)) + 1; 1793 iter->node = node; 1794 __set_iter_shift(iter, node->shift); 1795 1796 if (flags & RADIX_TREE_ITER_TAGGED) 1797 set_iter_tags(iter, node, offset, tag); 1798 1799 return node->slots + offset; 1800 } 1801 EXPORT_SYMBOL(radix_tree_next_chunk); 1802 1803 /** 1804 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1805 * @root: radix tree root 1806 * @results: where the results of the lookup are placed 1807 * @first_index: start the lookup from this key 1808 * @max_items: place up to this many items at *results 1809 * 1810 * Performs an index-ascending scan of the tree for present items. Places 1811 * them at *@results and returns the number of items which were placed at 1812 * *@results. 1813 * 1814 * The implementation is naive. 1815 * 1816 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1817 * rcu_read_lock. In this case, rather than the returned results being 1818 * an atomic snapshot of the tree at a single point in time, the 1819 * semantics of an RCU protected gang lookup are as though multiple 1820 * radix_tree_lookups have been issued in individual locks, and results 1821 * stored in 'results'. 1822 */ 1823 unsigned int 1824 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1825 unsigned long first_index, unsigned int max_items) 1826 { 1827 struct radix_tree_iter iter; 1828 void __rcu **slot; 1829 unsigned int ret = 0; 1830 1831 if (unlikely(!max_items)) 1832 return 0; 1833 1834 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1835 results[ret] = rcu_dereference_raw(*slot); 1836 if (!results[ret]) 1837 continue; 1838 if (radix_tree_is_internal_node(results[ret])) { 1839 slot = radix_tree_iter_retry(&iter); 1840 continue; 1841 } 1842 if (++ret == max_items) 1843 break; 1844 } 1845 1846 return ret; 1847 } 1848 EXPORT_SYMBOL(radix_tree_gang_lookup); 1849 1850 /** 1851 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 1852 * @root: radix tree root 1853 * @results: where the results of the lookup are placed 1854 * @indices: where their indices should be placed (but usually NULL) 1855 * @first_index: start the lookup from this key 1856 * @max_items: place up to this many items at *results 1857 * 1858 * Performs an index-ascending scan of the tree for present items. Places 1859 * their slots at *@results and returns the number of items which were 1860 * placed at *@results. 1861 * 1862 * The implementation is naive. 1863 * 1864 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 1865 * be dereferenced with radix_tree_deref_slot, and if using only RCU 1866 * protection, radix_tree_deref_slot may fail requiring a retry. 1867 */ 1868 unsigned int 1869 radix_tree_gang_lookup_slot(const struct radix_tree_root *root, 1870 void __rcu ***results, unsigned long *indices, 1871 unsigned long first_index, unsigned int max_items) 1872 { 1873 struct radix_tree_iter iter; 1874 void __rcu **slot; 1875 unsigned int ret = 0; 1876 1877 if (unlikely(!max_items)) 1878 return 0; 1879 1880 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1881 results[ret] = slot; 1882 if (indices) 1883 indices[ret] = iter.index; 1884 if (++ret == max_items) 1885 break; 1886 } 1887 1888 return ret; 1889 } 1890 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 1891 1892 /** 1893 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1894 * based on a tag 1895 * @root: radix tree root 1896 * @results: where the results of the lookup are placed 1897 * @first_index: start the lookup from this key 1898 * @max_items: place up to this many items at *results 1899 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1900 * 1901 * Performs an index-ascending scan of the tree for present items which 1902 * have the tag indexed by @tag set. Places the items at *@results and 1903 * returns the number of items which were placed at *@results. 1904 */ 1905 unsigned int 1906 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1907 unsigned long first_index, unsigned int max_items, 1908 unsigned int tag) 1909 { 1910 struct radix_tree_iter iter; 1911 void __rcu **slot; 1912 unsigned int ret = 0; 1913 1914 if (unlikely(!max_items)) 1915 return 0; 1916 1917 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1918 results[ret] = rcu_dereference_raw(*slot); 1919 if (!results[ret]) 1920 continue; 1921 if (radix_tree_is_internal_node(results[ret])) { 1922 slot = radix_tree_iter_retry(&iter); 1923 continue; 1924 } 1925 if (++ret == max_items) 1926 break; 1927 } 1928 1929 return ret; 1930 } 1931 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1932 1933 /** 1934 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1935 * radix tree based on a tag 1936 * @root: radix tree root 1937 * @results: where the results of the lookup are placed 1938 * @first_index: start the lookup from this key 1939 * @max_items: place up to this many items at *results 1940 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1941 * 1942 * Performs an index-ascending scan of the tree for present items which 1943 * have the tag indexed by @tag set. Places the slots at *@results and 1944 * returns the number of slots which were placed at *@results. 1945 */ 1946 unsigned int 1947 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1948 void __rcu ***results, unsigned long first_index, 1949 unsigned int max_items, unsigned int tag) 1950 { 1951 struct radix_tree_iter iter; 1952 void __rcu **slot; 1953 unsigned int ret = 0; 1954 1955 if (unlikely(!max_items)) 1956 return 0; 1957 1958 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1959 results[ret] = slot; 1960 if (++ret == max_items) 1961 break; 1962 } 1963 1964 return ret; 1965 } 1966 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1967 1968 /** 1969 * __radix_tree_delete_node - try to free node after clearing a slot 1970 * @root: radix tree root 1971 * @node: node containing @index 1972 * @update_node: callback for changing leaf nodes 1973 * 1974 * After clearing the slot at @index in @node from radix tree 1975 * rooted at @root, call this function to attempt freeing the 1976 * node and shrinking the tree. 1977 */ 1978 void __radix_tree_delete_node(struct radix_tree_root *root, 1979 struct radix_tree_node *node, 1980 radix_tree_update_node_t update_node) 1981 { 1982 delete_node(root, node, update_node); 1983 } 1984 1985 static bool __radix_tree_delete(struct radix_tree_root *root, 1986 struct radix_tree_node *node, void __rcu **slot) 1987 { 1988 void *old = rcu_dereference_raw(*slot); 1989 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; 1990 unsigned offset = get_slot_offset(node, slot); 1991 int tag; 1992 1993 if (is_idr(root)) 1994 node_tag_set(root, node, IDR_FREE, offset); 1995 else 1996 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1997 node_tag_clear(root, node, tag, offset); 1998 1999 replace_slot(slot, NULL, node, -1, exceptional); 2000 return node && delete_node(root, node, NULL); 2001 } 2002 2003 /** 2004 * radix_tree_iter_delete - delete the entry at this iterator position 2005 * @root: radix tree root 2006 * @iter: iterator state 2007 * @slot: pointer to slot 2008 * 2009 * Delete the entry at the position currently pointed to by the iterator. 2010 * This may result in the current node being freed; if it is, the iterator 2011 * is advanced so that it will not reference the freed memory. This 2012 * function may be called without any locking if there are no other threads 2013 * which can access this tree. 2014 */ 2015 void radix_tree_iter_delete(struct radix_tree_root *root, 2016 struct radix_tree_iter *iter, void __rcu **slot) 2017 { 2018 if (__radix_tree_delete(root, iter->node, slot)) 2019 iter->index = iter->next_index; 2020 } 2021 EXPORT_SYMBOL(radix_tree_iter_delete); 2022 2023 /** 2024 * radix_tree_delete_item - delete an item from a radix tree 2025 * @root: radix tree root 2026 * @index: index key 2027 * @item: expected item 2028 * 2029 * Remove @item at @index from the radix tree rooted at @root. 2030 * 2031 * Return: the deleted entry, or %NULL if it was not present 2032 * or the entry at the given @index was not @item. 2033 */ 2034 void *radix_tree_delete_item(struct radix_tree_root *root, 2035 unsigned long index, void *item) 2036 { 2037 struct radix_tree_node *node = NULL; 2038 void __rcu **slot; 2039 void *entry; 2040 2041 entry = __radix_tree_lookup(root, index, &node, &slot); 2042 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 2043 get_slot_offset(node, slot)))) 2044 return NULL; 2045 2046 if (item && entry != item) 2047 return NULL; 2048 2049 __radix_tree_delete(root, node, slot); 2050 2051 return entry; 2052 } 2053 EXPORT_SYMBOL(radix_tree_delete_item); 2054 2055 /** 2056 * radix_tree_delete - delete an entry from a radix tree 2057 * @root: radix tree root 2058 * @index: index key 2059 * 2060 * Remove the entry at @index from the radix tree rooted at @root. 2061 * 2062 * Return: The deleted entry, or %NULL if it was not present. 2063 */ 2064 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 2065 { 2066 return radix_tree_delete_item(root, index, NULL); 2067 } 2068 EXPORT_SYMBOL(radix_tree_delete); 2069 2070 void radix_tree_clear_tags(struct radix_tree_root *root, 2071 struct radix_tree_node *node, 2072 void __rcu **slot) 2073 { 2074 if (node) { 2075 unsigned int tag, offset = get_slot_offset(node, slot); 2076 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 2077 node_tag_clear(root, node, tag, offset); 2078 } else { 2079 root_tag_clear_all(root); 2080 } 2081 } 2082 2083 /** 2084 * radix_tree_tagged - test whether any items in the tree are tagged 2085 * @root: radix tree root 2086 * @tag: tag to test 2087 */ 2088 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 2089 { 2090 return root_tag_get(root, tag); 2091 } 2092 EXPORT_SYMBOL(radix_tree_tagged); 2093 2094 /** 2095 * idr_preload - preload for idr_alloc() 2096 * @gfp_mask: allocation mask to use for preloading 2097 * 2098 * Preallocate memory to use for the next call to idr_alloc(). This function 2099 * returns with preemption disabled. It will be enabled by idr_preload_end(). 2100 */ 2101 void idr_preload(gfp_t gfp_mask) 2102 { 2103 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 2104 preempt_disable(); 2105 } 2106 EXPORT_SYMBOL(idr_preload); 2107 2108 /** 2109 * ida_pre_get - reserve resources for ida allocation 2110 * @ida: ida handle 2111 * @gfp: memory allocation flags 2112 * 2113 * This function should be called before calling ida_get_new_above(). If it 2114 * is unable to allocate memory, it will return %0. On success, it returns %1. 2115 */ 2116 int ida_pre_get(struct ida *ida, gfp_t gfp) 2117 { 2118 /* 2119 * The IDA API has no preload_end() equivalent. Instead, 2120 * ida_get_new() can return -EAGAIN, prompting the caller 2121 * to return to the ida_pre_get() step. 2122 */ 2123 if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) 2124 preempt_enable(); 2125 2126 if (!this_cpu_read(ida_bitmap)) { 2127 struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); 2128 if (!bitmap) 2129 return 0; 2130 if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) 2131 kfree(bitmap); 2132 } 2133 2134 return 1; 2135 } 2136 EXPORT_SYMBOL(ida_pre_get); 2137 2138 void __rcu **idr_get_free_cmn(struct radix_tree_root *root, 2139 struct radix_tree_iter *iter, gfp_t gfp, 2140 unsigned long max) 2141 { 2142 struct radix_tree_node *node = NULL, *child; 2143 void __rcu **slot = (void __rcu **)&root->rnode; 2144 unsigned long maxindex, start = iter->next_index; 2145 unsigned int shift, offset = 0; 2146 2147 grow: 2148 shift = radix_tree_load_root(root, &child, &maxindex); 2149 if (!radix_tree_tagged(root, IDR_FREE)) 2150 start = max(start, maxindex + 1); 2151 if (start > max) 2152 return ERR_PTR(-ENOSPC); 2153 2154 if (start > maxindex) { 2155 int error = radix_tree_extend(root, gfp, start, shift); 2156 if (error < 0) 2157 return ERR_PTR(error); 2158 shift = error; 2159 child = rcu_dereference_raw(root->rnode); 2160 } 2161 2162 while (shift) { 2163 shift -= RADIX_TREE_MAP_SHIFT; 2164 if (child == NULL) { 2165 /* Have to add a child node. */ 2166 child = radix_tree_node_alloc(gfp, node, root, shift, 2167 offset, 0, 0); 2168 if (!child) 2169 return ERR_PTR(-ENOMEM); 2170 all_tag_set(child, IDR_FREE); 2171 rcu_assign_pointer(*slot, node_to_entry(child)); 2172 if (node) 2173 node->count++; 2174 } else if (!radix_tree_is_internal_node(child)) 2175 break; 2176 2177 node = entry_to_node(child); 2178 offset = radix_tree_descend(node, &child, start); 2179 if (!tag_get(node, IDR_FREE, offset)) { 2180 offset = radix_tree_find_next_bit(node, IDR_FREE, 2181 offset + 1); 2182 start = next_index(start, node, offset); 2183 if (start > max) 2184 return ERR_PTR(-ENOSPC); 2185 while (offset == RADIX_TREE_MAP_SIZE) { 2186 offset = node->offset + 1; 2187 node = node->parent; 2188 if (!node) 2189 goto grow; 2190 shift = node->shift; 2191 } 2192 child = rcu_dereference_raw(node->slots[offset]); 2193 } 2194 slot = &node->slots[offset]; 2195 } 2196 2197 iter->index = start; 2198 if (node) 2199 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 2200 else 2201 iter->next_index = 1; 2202 iter->node = node; 2203 __set_iter_shift(iter, shift); 2204 set_iter_tags(iter, node, offset, IDR_FREE); 2205 2206 return slot; 2207 } 2208 2209 /** 2210 * idr_destroy - release all internal memory from an IDR 2211 * @idr: idr handle 2212 * 2213 * After this function is called, the IDR is empty, and may be reused or 2214 * the data structure containing it may be freed. 2215 * 2216 * A typical clean-up sequence for objects stored in an idr tree will use 2217 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 2218 * free the memory used to keep track of those objects. 2219 */ 2220 void idr_destroy(struct idr *idr) 2221 { 2222 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); 2223 if (radix_tree_is_internal_node(node)) 2224 radix_tree_free_nodes(node); 2225 idr->idr_rt.rnode = NULL; 2226 root_tag_set(&idr->idr_rt, IDR_FREE); 2227 } 2228 EXPORT_SYMBOL(idr_destroy); 2229 2230 static void 2231 radix_tree_node_ctor(void *arg) 2232 { 2233 struct radix_tree_node *node = arg; 2234 2235 memset(node, 0, sizeof(*node)); 2236 INIT_LIST_HEAD(&node->private_list); 2237 } 2238 2239 static __init unsigned long __maxindex(unsigned int height) 2240 { 2241 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 2242 int shift = RADIX_TREE_INDEX_BITS - width; 2243 2244 if (shift < 0) 2245 return ~0UL; 2246 if (shift >= BITS_PER_LONG) 2247 return 0UL; 2248 return ~0UL >> shift; 2249 } 2250 2251 static __init void radix_tree_init_maxnodes(void) 2252 { 2253 unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; 2254 unsigned int i, j; 2255 2256 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 2257 height_to_maxindex[i] = __maxindex(i); 2258 for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { 2259 for (j = i; j > 0; j--) 2260 height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; 2261 } 2262 } 2263 2264 static int radix_tree_cpu_dead(unsigned int cpu) 2265 { 2266 struct radix_tree_preload *rtp; 2267 struct radix_tree_node *node; 2268 2269 /* Free per-cpu pool of preloaded nodes */ 2270 rtp = &per_cpu(radix_tree_preloads, cpu); 2271 while (rtp->nr) { 2272 node = rtp->nodes; 2273 rtp->nodes = node->parent; 2274 kmem_cache_free(radix_tree_node_cachep, node); 2275 rtp->nr--; 2276 } 2277 kfree(per_cpu(ida_bitmap, cpu)); 2278 per_cpu(ida_bitmap, cpu) = NULL; 2279 return 0; 2280 } 2281 2282 void __init radix_tree_init(void) 2283 { 2284 int ret; 2285 2286 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 2287 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 2288 sizeof(struct radix_tree_node), 0, 2289 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 2290 radix_tree_node_ctor); 2291 radix_tree_init_maxnodes(); 2292 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 2293 NULL, radix_tree_cpu_dead); 2294 WARN_ON(ret < 0); 2295 } 2296