1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * Copyright (C) 2012 Konstantin Khlebnikov 7 * Copyright (C) 2016 Intel, Matthew Wilcox 8 * Copyright (C) 2016 Intel, Ross Zwisler 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License as 12 * published by the Free Software Foundation; either version 2, or (at 13 * your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 */ 24 25 #include <linux/bitmap.h> 26 #include <linux/bitops.h> 27 #include <linux/cpu.h> 28 #include <linux/errno.h> 29 #include <linux/export.h> 30 #include <linux/idr.h> 31 #include <linux/init.h> 32 #include <linux/kernel.h> 33 #include <linux/kmemleak.h> 34 #include <linux/percpu.h> 35 #include <linux/preempt.h> /* in_interrupt() */ 36 #include <linux/radix-tree.h> 37 #include <linux/rcupdate.h> 38 #include <linux/slab.h> 39 #include <linux/string.h> 40 41 42 /* Number of nodes in fully populated tree of given height */ 43 static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; 44 45 /* 46 * Radix tree node cache. 47 */ 48 static struct kmem_cache *radix_tree_node_cachep; 49 50 /* 51 * The radix tree is variable-height, so an insert operation not only has 52 * to build the branch to its corresponding item, it also has to build the 53 * branch to existing items if the size has to be increased (by 54 * radix_tree_extend). 55 * 56 * The worst case is a zero height tree with just a single item at index 0, 57 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 58 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 59 * Hence: 60 */ 61 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 62 63 /* 64 * The IDR does not have to be as high as the radix tree since it uses 65 * signed integers, not unsigned longs. 66 */ 67 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 68 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 69 RADIX_TREE_MAP_SHIFT)) 70 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 71 72 /* 73 * The IDA is even shorter since it uses a bitmap at the last level. 74 */ 75 #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) 76 #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ 77 RADIX_TREE_MAP_SHIFT)) 78 #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) 79 80 /* 81 * Per-cpu pool of preloaded nodes 82 */ 83 struct radix_tree_preload { 84 unsigned nr; 85 /* nodes->parent points to next preallocated node */ 86 struct radix_tree_node *nodes; 87 }; 88 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 89 90 static inline struct radix_tree_node *entry_to_node(void *ptr) 91 { 92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 93 } 94 95 static inline void *node_to_entry(void *ptr) 96 { 97 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 98 } 99 100 #define RADIX_TREE_RETRY node_to_entry(NULL) 101 102 #ifdef CONFIG_RADIX_TREE_MULTIORDER 103 /* Sibling slots point directly to another slot in the same node */ 104 static inline 105 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 106 { 107 void __rcu **ptr = node; 108 return (parent->slots <= ptr) && 109 (ptr < parent->slots + RADIX_TREE_MAP_SIZE); 110 } 111 #else 112 static inline 113 bool is_sibling_entry(const struct radix_tree_node *parent, void *node) 114 { 115 return false; 116 } 117 #endif 118 119 static inline unsigned long 120 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 121 { 122 return slot - parent->slots; 123 } 124 125 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 126 struct radix_tree_node **nodep, unsigned long index) 127 { 128 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 129 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 130 131 #ifdef CONFIG_RADIX_TREE_MULTIORDER 132 if (radix_tree_is_internal_node(entry)) { 133 if (is_sibling_entry(parent, entry)) { 134 void __rcu **sibentry; 135 sibentry = (void __rcu **) entry_to_node(entry); 136 offset = get_slot_offset(parent, sibentry); 137 entry = rcu_dereference_raw(*sibentry); 138 } 139 } 140 #endif 141 142 *nodep = (void *)entry; 143 return offset; 144 } 145 146 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 147 { 148 return root->gfp_mask & __GFP_BITS_MASK; 149 } 150 151 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 152 int offset) 153 { 154 __set_bit(offset, node->tags[tag]); 155 } 156 157 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 158 int offset) 159 { 160 __clear_bit(offset, node->tags[tag]); 161 } 162 163 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 164 int offset) 165 { 166 return test_bit(offset, node->tags[tag]); 167 } 168 169 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 170 { 171 root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 172 } 173 174 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 175 { 176 root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 177 } 178 179 static inline void root_tag_clear_all(struct radix_tree_root *root) 180 { 181 root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; 182 } 183 184 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 185 { 186 return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); 187 } 188 189 static inline unsigned root_tags_get(const struct radix_tree_root *root) 190 { 191 return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; 192 } 193 194 static inline bool is_idr(const struct radix_tree_root *root) 195 { 196 return !!(root->gfp_mask & ROOT_IS_IDR); 197 } 198 199 /* 200 * Returns 1 if any slot in the node has this tag set. 201 * Otherwise returns 0. 202 */ 203 static inline int any_tag_set(const struct radix_tree_node *node, 204 unsigned int tag) 205 { 206 unsigned idx; 207 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 208 if (node->tags[tag][idx]) 209 return 1; 210 } 211 return 0; 212 } 213 214 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 215 { 216 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 217 } 218 219 /** 220 * radix_tree_find_next_bit - find the next set bit in a memory region 221 * 222 * @addr: The address to base the search on 223 * @size: The bitmap size in bits 224 * @offset: The bitnumber to start searching at 225 * 226 * Unrollable variant of find_next_bit() for constant size arrays. 227 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 228 * Returns next bit offset, or size if nothing found. 229 */ 230 static __always_inline unsigned long 231 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 232 unsigned long offset) 233 { 234 const unsigned long *addr = node->tags[tag]; 235 236 if (offset < RADIX_TREE_MAP_SIZE) { 237 unsigned long tmp; 238 239 addr += offset / BITS_PER_LONG; 240 tmp = *addr >> (offset % BITS_PER_LONG); 241 if (tmp) 242 return __ffs(tmp) + offset; 243 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 244 while (offset < RADIX_TREE_MAP_SIZE) { 245 tmp = *++addr; 246 if (tmp) 247 return __ffs(tmp) + offset; 248 offset += BITS_PER_LONG; 249 } 250 } 251 return RADIX_TREE_MAP_SIZE; 252 } 253 254 static unsigned int iter_offset(const struct radix_tree_iter *iter) 255 { 256 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; 257 } 258 259 /* 260 * The maximum index which can be stored in a radix tree 261 */ 262 static inline unsigned long shift_maxindex(unsigned int shift) 263 { 264 return (RADIX_TREE_MAP_SIZE << shift) - 1; 265 } 266 267 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 268 { 269 return shift_maxindex(node->shift); 270 } 271 272 static unsigned long next_index(unsigned long index, 273 const struct radix_tree_node *node, 274 unsigned long offset) 275 { 276 return (index & ~node_maxindex(node)) + (offset << node->shift); 277 } 278 279 #ifndef __KERNEL__ 280 static void dump_node(struct radix_tree_node *node, unsigned long index) 281 { 282 unsigned long i; 283 284 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", 285 node, node->offset, index, index | node_maxindex(node), 286 node->parent, 287 node->tags[0][0], node->tags[1][0], node->tags[2][0], 288 node->shift, node->count, node->exceptional); 289 290 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 291 unsigned long first = index | (i << node->shift); 292 unsigned long last = first | ((1UL << node->shift) - 1); 293 void *entry = node->slots[i]; 294 if (!entry) 295 continue; 296 if (entry == RADIX_TREE_RETRY) { 297 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", 298 i, first, last, node); 299 } else if (!radix_tree_is_internal_node(entry)) { 300 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", 301 entry, i, first, last, node); 302 } else if (is_sibling_entry(node, entry)) { 303 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", 304 entry, i, first, last, node, 305 *(void **)entry_to_node(entry)); 306 } else { 307 dump_node(entry_to_node(entry), first); 308 } 309 } 310 } 311 312 /* For debug */ 313 static void radix_tree_dump(struct radix_tree_root *root) 314 { 315 pr_debug("radix root: %p rnode %p tags %x\n", 316 root, root->rnode, 317 root->gfp_mask >> ROOT_TAG_SHIFT); 318 if (!radix_tree_is_internal_node(root->rnode)) 319 return; 320 dump_node(entry_to_node(root->rnode), 0); 321 } 322 323 static void dump_ida_node(void *entry, unsigned long index) 324 { 325 unsigned long i; 326 327 if (!entry) 328 return; 329 330 if (radix_tree_is_internal_node(entry)) { 331 struct radix_tree_node *node = entry_to_node(entry); 332 333 pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", 334 node, node->offset, index * IDA_BITMAP_BITS, 335 ((index | node_maxindex(node)) + 1) * 336 IDA_BITMAP_BITS - 1, 337 node->parent, node->tags[0][0], node->shift, 338 node->count); 339 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) 340 dump_ida_node(node->slots[i], 341 index | (i << node->shift)); 342 } else if (radix_tree_exceptional_entry(entry)) { 343 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", 344 entry, (int)(index & RADIX_TREE_MAP_MASK), 345 index * IDA_BITMAP_BITS, 346 index * IDA_BITMAP_BITS + BITS_PER_LONG - 347 RADIX_TREE_EXCEPTIONAL_SHIFT, 348 (unsigned long)entry >> 349 RADIX_TREE_EXCEPTIONAL_SHIFT); 350 } else { 351 struct ida_bitmap *bitmap = entry; 352 353 pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, 354 (int)(index & RADIX_TREE_MAP_MASK), 355 index * IDA_BITMAP_BITS, 356 (index + 1) * IDA_BITMAP_BITS - 1); 357 for (i = 0; i < IDA_BITMAP_LONGS; i++) 358 pr_cont(" %lx", bitmap->bitmap[i]); 359 pr_cont("\n"); 360 } 361 } 362 363 static void ida_dump(struct ida *ida) 364 { 365 struct radix_tree_root *root = &ida->ida_rt; 366 pr_debug("ida: %p node %p free %d\n", ida, root->rnode, 367 root->gfp_mask >> ROOT_TAG_SHIFT); 368 dump_ida_node(root->rnode, 0); 369 } 370 #endif 371 372 /* 373 * This assumes that the caller has performed appropriate preallocation, and 374 * that the caller has pinned this thread of control to the current CPU. 375 */ 376 static struct radix_tree_node * 377 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 378 struct radix_tree_root *root, 379 unsigned int shift, unsigned int offset, 380 unsigned int count, unsigned int exceptional) 381 { 382 struct radix_tree_node *ret = NULL; 383 384 /* 385 * Preload code isn't irq safe and it doesn't make sense to use 386 * preloading during an interrupt anyway as all the allocations have 387 * to be atomic. So just do normal allocation when in interrupt. 388 */ 389 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 390 struct radix_tree_preload *rtp; 391 392 /* 393 * Even if the caller has preloaded, try to allocate from the 394 * cache first for the new node to get accounted to the memory 395 * cgroup. 396 */ 397 ret = kmem_cache_alloc(radix_tree_node_cachep, 398 gfp_mask | __GFP_NOWARN); 399 if (ret) 400 goto out; 401 402 /* 403 * Provided the caller has preloaded here, we will always 404 * succeed in getting a node here (and never reach 405 * kmem_cache_alloc) 406 */ 407 rtp = this_cpu_ptr(&radix_tree_preloads); 408 if (rtp->nr) { 409 ret = rtp->nodes; 410 rtp->nodes = ret->parent; 411 rtp->nr--; 412 } 413 /* 414 * Update the allocation stack trace as this is more useful 415 * for debugging. 416 */ 417 kmemleak_update_trace(ret); 418 goto out; 419 } 420 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 421 out: 422 BUG_ON(radix_tree_is_internal_node(ret)); 423 if (ret) { 424 ret->shift = shift; 425 ret->offset = offset; 426 ret->count = count; 427 ret->exceptional = exceptional; 428 ret->parent = parent; 429 ret->root = root; 430 } 431 return ret; 432 } 433 434 static void radix_tree_node_rcu_free(struct rcu_head *head) 435 { 436 struct radix_tree_node *node = 437 container_of(head, struct radix_tree_node, rcu_head); 438 439 /* 440 * Must only free zeroed nodes into the slab. We can be left with 441 * non-NULL entries by radix_tree_free_nodes, so clear the entries 442 * and tags here. 443 */ 444 memset(node->slots, 0, sizeof(node->slots)); 445 memset(node->tags, 0, sizeof(node->tags)); 446 INIT_LIST_HEAD(&node->private_list); 447 448 kmem_cache_free(radix_tree_node_cachep, node); 449 } 450 451 static inline void 452 radix_tree_node_free(struct radix_tree_node *node) 453 { 454 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 455 } 456 457 /* 458 * Load up this CPU's radix_tree_node buffer with sufficient objects to 459 * ensure that the addition of a single element in the tree cannot fail. On 460 * success, return zero, with preemption disabled. On error, return -ENOMEM 461 * with preemption not disabled. 462 * 463 * To make use of this facility, the radix tree must be initialised without 464 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 465 */ 466 static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 467 { 468 struct radix_tree_preload *rtp; 469 struct radix_tree_node *node; 470 int ret = -ENOMEM; 471 472 /* 473 * Nodes preloaded by one cgroup can be be used by another cgroup, so 474 * they should never be accounted to any particular memory cgroup. 475 */ 476 gfp_mask &= ~__GFP_ACCOUNT; 477 478 preempt_disable(); 479 rtp = this_cpu_ptr(&radix_tree_preloads); 480 while (rtp->nr < nr) { 481 preempt_enable(); 482 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 483 if (node == NULL) 484 goto out; 485 preempt_disable(); 486 rtp = this_cpu_ptr(&radix_tree_preloads); 487 if (rtp->nr < nr) { 488 node->parent = rtp->nodes; 489 rtp->nodes = node; 490 rtp->nr++; 491 } else { 492 kmem_cache_free(radix_tree_node_cachep, node); 493 } 494 } 495 ret = 0; 496 out: 497 return ret; 498 } 499 500 /* 501 * Load up this CPU's radix_tree_node buffer with sufficient objects to 502 * ensure that the addition of a single element in the tree cannot fail. On 503 * success, return zero, with preemption disabled. On error, return -ENOMEM 504 * with preemption not disabled. 505 * 506 * To make use of this facility, the radix tree must be initialised without 507 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 508 */ 509 int radix_tree_preload(gfp_t gfp_mask) 510 { 511 /* Warn on non-sensical use... */ 512 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 513 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 514 } 515 EXPORT_SYMBOL(radix_tree_preload); 516 517 /* 518 * The same as above function, except we don't guarantee preloading happens. 519 * We do it, if we decide it helps. On success, return zero with preemption 520 * disabled. On error, return -ENOMEM with preemption not disabled. 521 */ 522 int radix_tree_maybe_preload(gfp_t gfp_mask) 523 { 524 if (gfpflags_allow_blocking(gfp_mask)) 525 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 526 /* Preloading doesn't help anything with this gfp mask, skip it */ 527 preempt_disable(); 528 return 0; 529 } 530 EXPORT_SYMBOL(radix_tree_maybe_preload); 531 532 #ifdef CONFIG_RADIX_TREE_MULTIORDER 533 /* 534 * Preload with enough objects to ensure that we can split a single entry 535 * of order @old_order into many entries of size @new_order 536 */ 537 int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, 538 gfp_t gfp_mask) 539 { 540 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); 541 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - 542 (new_order / RADIX_TREE_MAP_SHIFT); 543 unsigned nr = 0; 544 545 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 546 BUG_ON(new_order >= old_order); 547 548 while (layers--) 549 nr = nr * RADIX_TREE_MAP_SIZE + 1; 550 return __radix_tree_preload(gfp_mask, top * nr); 551 } 552 #endif 553 554 /* 555 * The same as function above, but preload number of nodes required to insert 556 * (1 << order) continuous naturally-aligned elements. 557 */ 558 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) 559 { 560 unsigned long nr_subtrees; 561 int nr_nodes, subtree_height; 562 563 /* Preloading doesn't help anything with this gfp mask, skip it */ 564 if (!gfpflags_allow_blocking(gfp_mask)) { 565 preempt_disable(); 566 return 0; 567 } 568 569 /* 570 * Calculate number and height of fully populated subtrees it takes to 571 * store (1 << order) elements. 572 */ 573 nr_subtrees = 1 << order; 574 for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; 575 subtree_height++) 576 nr_subtrees >>= RADIX_TREE_MAP_SHIFT; 577 578 /* 579 * The worst case is zero height tree with a single item at index 0 and 580 * then inserting items starting at ULONG_MAX - (1 << order). 581 * 582 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to 583 * 0-index item. 584 */ 585 nr_nodes = RADIX_TREE_MAX_PATH; 586 587 /* Plus branch to fully populated subtrees. */ 588 nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; 589 590 /* Root node is shared. */ 591 nr_nodes--; 592 593 /* Plus nodes required to build subtrees. */ 594 nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; 595 596 return __radix_tree_preload(gfp_mask, nr_nodes); 597 } 598 599 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 600 struct radix_tree_node **nodep, unsigned long *maxindex) 601 { 602 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 603 604 *nodep = node; 605 606 if (likely(radix_tree_is_internal_node(node))) { 607 node = entry_to_node(node); 608 *maxindex = node_maxindex(node); 609 return node->shift + RADIX_TREE_MAP_SHIFT; 610 } 611 612 *maxindex = 0; 613 return 0; 614 } 615 616 /* 617 * Extend a radix tree so it can store key @index. 618 */ 619 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 620 unsigned long index, unsigned int shift) 621 { 622 void *entry; 623 unsigned int maxshift; 624 int tag; 625 626 /* Figure out what the shift should be. */ 627 maxshift = shift; 628 while (index > shift_maxindex(maxshift)) 629 maxshift += RADIX_TREE_MAP_SHIFT; 630 631 entry = rcu_dereference_raw(root->rnode); 632 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 633 goto out; 634 635 do { 636 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 637 root, shift, 0, 1, 0); 638 if (!node) 639 return -ENOMEM; 640 641 if (is_idr(root)) { 642 all_tag_set(node, IDR_FREE); 643 if (!root_tag_get(root, IDR_FREE)) { 644 tag_clear(node, IDR_FREE, 0); 645 root_tag_set(root, IDR_FREE); 646 } 647 } else { 648 /* Propagate the aggregated tag info to the new child */ 649 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 650 if (root_tag_get(root, tag)) 651 tag_set(node, tag, 0); 652 } 653 } 654 655 BUG_ON(shift > BITS_PER_LONG); 656 if (radix_tree_is_internal_node(entry)) { 657 entry_to_node(entry)->parent = node; 658 } else if (radix_tree_exceptional_entry(entry)) { 659 /* Moving an exceptional root->rnode to a node */ 660 node->exceptional = 1; 661 } 662 /* 663 * entry was already in the radix tree, so we do not need 664 * rcu_assign_pointer here 665 */ 666 node->slots[0] = (void __rcu *)entry; 667 entry = node_to_entry(node); 668 rcu_assign_pointer(root->rnode, entry); 669 shift += RADIX_TREE_MAP_SHIFT; 670 } while (shift <= maxshift); 671 out: 672 return maxshift + RADIX_TREE_MAP_SHIFT; 673 } 674 675 /** 676 * radix_tree_shrink - shrink radix tree to minimum height 677 * @root radix tree root 678 */ 679 static inline bool radix_tree_shrink(struct radix_tree_root *root, 680 radix_tree_update_node_t update_node, 681 void *private) 682 { 683 bool shrunk = false; 684 685 for (;;) { 686 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 687 struct radix_tree_node *child; 688 689 if (!radix_tree_is_internal_node(node)) 690 break; 691 node = entry_to_node(node); 692 693 /* 694 * The candidate node has more than one child, or its child 695 * is not at the leftmost slot, or the child is a multiorder 696 * entry, we cannot shrink. 697 */ 698 if (node->count != 1) 699 break; 700 child = rcu_dereference_raw(node->slots[0]); 701 if (!child) 702 break; 703 if (!radix_tree_is_internal_node(child) && node->shift) 704 break; 705 706 if (radix_tree_is_internal_node(child)) 707 entry_to_node(child)->parent = NULL; 708 709 /* 710 * We don't need rcu_assign_pointer(), since we are simply 711 * moving the node from one part of the tree to another: if it 712 * was safe to dereference the old pointer to it 713 * (node->slots[0]), it will be safe to dereference the new 714 * one (root->rnode) as far as dependent read barriers go. 715 */ 716 root->rnode = (void __rcu *)child; 717 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 718 root_tag_clear(root, IDR_FREE); 719 720 /* 721 * We have a dilemma here. The node's slot[0] must not be 722 * NULLed in case there are concurrent lookups expecting to 723 * find the item. However if this was a bottom-level node, 724 * then it may be subject to the slot pointer being visible 725 * to callers dereferencing it. If item corresponding to 726 * slot[0] is subsequently deleted, these callers would expect 727 * their slot to become empty sooner or later. 728 * 729 * For example, lockless pagecache will look up a slot, deref 730 * the page pointer, and if the page has 0 refcount it means it 731 * was concurrently deleted from pagecache so try the deref 732 * again. Fortunately there is already a requirement for logic 733 * to retry the entire slot lookup -- the indirect pointer 734 * problem (replacing direct root node with an indirect pointer 735 * also results in a stale slot). So tag the slot as indirect 736 * to force callers to retry. 737 */ 738 node->count = 0; 739 if (!radix_tree_is_internal_node(child)) { 740 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 741 if (update_node) 742 update_node(node, private); 743 } 744 745 WARN_ON_ONCE(!list_empty(&node->private_list)); 746 radix_tree_node_free(node); 747 shrunk = true; 748 } 749 750 return shrunk; 751 } 752 753 static bool delete_node(struct radix_tree_root *root, 754 struct radix_tree_node *node, 755 radix_tree_update_node_t update_node, void *private) 756 { 757 bool deleted = false; 758 759 do { 760 struct radix_tree_node *parent; 761 762 if (node->count) { 763 if (node_to_entry(node) == 764 rcu_dereference_raw(root->rnode)) 765 deleted |= radix_tree_shrink(root, update_node, 766 private); 767 return deleted; 768 } 769 770 parent = node->parent; 771 if (parent) { 772 parent->slots[node->offset] = NULL; 773 parent->count--; 774 } else { 775 /* 776 * Shouldn't the tags already have all been cleared 777 * by the caller? 778 */ 779 if (!is_idr(root)) 780 root_tag_clear_all(root); 781 root->rnode = NULL; 782 } 783 784 WARN_ON_ONCE(!list_empty(&node->private_list)); 785 radix_tree_node_free(node); 786 deleted = true; 787 788 node = parent; 789 } while (node); 790 791 return deleted; 792 } 793 794 /** 795 * __radix_tree_create - create a slot in a radix tree 796 * @root: radix tree root 797 * @index: index key 798 * @order: index occupies 2^order aligned slots 799 * @nodep: returns node 800 * @slotp: returns slot 801 * 802 * Create, if necessary, and return the node and slot for an item 803 * at position @index in the radix tree @root. 804 * 805 * Until there is more than one item in the tree, no nodes are 806 * allocated and @root->rnode is used as a direct slot instead of 807 * pointing to a node, in which case *@nodep will be NULL. 808 * 809 * Returns -ENOMEM, or 0 for success. 810 */ 811 int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 812 unsigned order, struct radix_tree_node **nodep, 813 void __rcu ***slotp) 814 { 815 struct radix_tree_node *node = NULL, *child; 816 void __rcu **slot = (void __rcu **)&root->rnode; 817 unsigned long maxindex; 818 unsigned int shift, offset = 0; 819 unsigned long max = index | ((1UL << order) - 1); 820 gfp_t gfp = root_gfp_mask(root); 821 822 shift = radix_tree_load_root(root, &child, &maxindex); 823 824 /* Make sure the tree is high enough. */ 825 if (order > 0 && max == ((1UL << order) - 1)) 826 max++; 827 if (max > maxindex) { 828 int error = radix_tree_extend(root, gfp, max, shift); 829 if (error < 0) 830 return error; 831 shift = error; 832 child = rcu_dereference_raw(root->rnode); 833 } 834 835 while (shift > order) { 836 shift -= RADIX_TREE_MAP_SHIFT; 837 if (child == NULL) { 838 /* Have to add a child node. */ 839 child = radix_tree_node_alloc(gfp, node, root, shift, 840 offset, 0, 0); 841 if (!child) 842 return -ENOMEM; 843 rcu_assign_pointer(*slot, node_to_entry(child)); 844 if (node) 845 node->count++; 846 } else if (!radix_tree_is_internal_node(child)) 847 break; 848 849 /* Go a level down */ 850 node = entry_to_node(child); 851 offset = radix_tree_descend(node, &child, index); 852 slot = &node->slots[offset]; 853 } 854 855 if (nodep) 856 *nodep = node; 857 if (slotp) 858 *slotp = slot; 859 return 0; 860 } 861 862 /* 863 * Free any nodes below this node. The tree is presumed to not need 864 * shrinking, and any user data in the tree is presumed to not need a 865 * destructor called on it. If we need to add a destructor, we can 866 * add that functionality later. Note that we may not clear tags or 867 * slots from the tree as an RCU walker may still have a pointer into 868 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 869 * but we'll still have to clear those in rcu_free. 870 */ 871 static void radix_tree_free_nodes(struct radix_tree_node *node) 872 { 873 unsigned offset = 0; 874 struct radix_tree_node *child = entry_to_node(node); 875 876 for (;;) { 877 void *entry = rcu_dereference_raw(child->slots[offset]); 878 if (radix_tree_is_internal_node(entry) && 879 !is_sibling_entry(child, entry)) { 880 child = entry_to_node(entry); 881 offset = 0; 882 continue; 883 } 884 offset++; 885 while (offset == RADIX_TREE_MAP_SIZE) { 886 struct radix_tree_node *old = child; 887 offset = child->offset + 1; 888 child = child->parent; 889 WARN_ON_ONCE(!list_empty(&old->private_list)); 890 radix_tree_node_free(old); 891 if (old == entry_to_node(node)) 892 return; 893 } 894 } 895 } 896 897 #ifdef CONFIG_RADIX_TREE_MULTIORDER 898 static inline int insert_entries(struct radix_tree_node *node, 899 void __rcu **slot, void *item, unsigned order, bool replace) 900 { 901 struct radix_tree_node *child; 902 unsigned i, n, tag, offset, tags = 0; 903 904 if (node) { 905 if (order > node->shift) 906 n = 1 << (order - node->shift); 907 else 908 n = 1; 909 offset = get_slot_offset(node, slot); 910 } else { 911 n = 1; 912 offset = 0; 913 } 914 915 if (n > 1) { 916 offset = offset & ~(n - 1); 917 slot = &node->slots[offset]; 918 } 919 child = node_to_entry(slot); 920 921 for (i = 0; i < n; i++) { 922 if (slot[i]) { 923 if (replace) { 924 node->count--; 925 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 926 if (tag_get(node, tag, offset + i)) 927 tags |= 1 << tag; 928 } else 929 return -EEXIST; 930 } 931 } 932 933 for (i = 0; i < n; i++) { 934 struct radix_tree_node *old = rcu_dereference_raw(slot[i]); 935 if (i) { 936 rcu_assign_pointer(slot[i], child); 937 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 938 if (tags & (1 << tag)) 939 tag_clear(node, tag, offset + i); 940 } else { 941 rcu_assign_pointer(slot[i], item); 942 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 943 if (tags & (1 << tag)) 944 tag_set(node, tag, offset); 945 } 946 if (radix_tree_is_internal_node(old) && 947 !is_sibling_entry(node, old) && 948 (old != RADIX_TREE_RETRY)) 949 radix_tree_free_nodes(old); 950 if (radix_tree_exceptional_entry(old)) 951 node->exceptional--; 952 } 953 if (node) { 954 node->count += n; 955 if (radix_tree_exceptional_entry(item)) 956 node->exceptional += n; 957 } 958 return n; 959 } 960 #else 961 static inline int insert_entries(struct radix_tree_node *node, 962 void __rcu **slot, void *item, unsigned order, bool replace) 963 { 964 if (*slot) 965 return -EEXIST; 966 rcu_assign_pointer(*slot, item); 967 if (node) { 968 node->count++; 969 if (radix_tree_exceptional_entry(item)) 970 node->exceptional++; 971 } 972 return 1; 973 } 974 #endif 975 976 /** 977 * __radix_tree_insert - insert into a radix tree 978 * @root: radix tree root 979 * @index: index key 980 * @order: key covers the 2^order indices around index 981 * @item: item to insert 982 * 983 * Insert an item into the radix tree at position @index. 984 */ 985 int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 986 unsigned order, void *item) 987 { 988 struct radix_tree_node *node; 989 void __rcu **slot; 990 int error; 991 992 BUG_ON(radix_tree_is_internal_node(item)); 993 994 error = __radix_tree_create(root, index, order, &node, &slot); 995 if (error) 996 return error; 997 998 error = insert_entries(node, slot, item, order, false); 999 if (error < 0) 1000 return error; 1001 1002 if (node) { 1003 unsigned offset = get_slot_offset(node, slot); 1004 BUG_ON(tag_get(node, 0, offset)); 1005 BUG_ON(tag_get(node, 1, offset)); 1006 BUG_ON(tag_get(node, 2, offset)); 1007 } else { 1008 BUG_ON(root_tags_get(root)); 1009 } 1010 1011 return 0; 1012 } 1013 EXPORT_SYMBOL(__radix_tree_insert); 1014 1015 /** 1016 * __radix_tree_lookup - lookup an item in a radix tree 1017 * @root: radix tree root 1018 * @index: index key 1019 * @nodep: returns node 1020 * @slotp: returns slot 1021 * 1022 * Lookup and return the item at position @index in the radix 1023 * tree @root. 1024 * 1025 * Until there is more than one item in the tree, no nodes are 1026 * allocated and @root->rnode is used as a direct slot instead of 1027 * pointing to a node, in which case *@nodep will be NULL. 1028 */ 1029 void *__radix_tree_lookup(const struct radix_tree_root *root, 1030 unsigned long index, struct radix_tree_node **nodep, 1031 void __rcu ***slotp) 1032 { 1033 struct radix_tree_node *node, *parent; 1034 unsigned long maxindex; 1035 void __rcu **slot; 1036 1037 restart: 1038 parent = NULL; 1039 slot = (void __rcu **)&root->rnode; 1040 radix_tree_load_root(root, &node, &maxindex); 1041 if (index > maxindex) 1042 return NULL; 1043 1044 while (radix_tree_is_internal_node(node)) { 1045 unsigned offset; 1046 1047 if (node == RADIX_TREE_RETRY) 1048 goto restart; 1049 parent = entry_to_node(node); 1050 offset = radix_tree_descend(parent, &node, index); 1051 slot = parent->slots + offset; 1052 } 1053 1054 if (nodep) 1055 *nodep = parent; 1056 if (slotp) 1057 *slotp = slot; 1058 return node; 1059 } 1060 1061 /** 1062 * radix_tree_lookup_slot - lookup a slot in a radix tree 1063 * @root: radix tree root 1064 * @index: index key 1065 * 1066 * Returns: the slot corresponding to the position @index in the 1067 * radix tree @root. This is useful for update-if-exists operations. 1068 * 1069 * This function can be called under rcu_read_lock iff the slot is not 1070 * modified by radix_tree_replace_slot, otherwise it must be called 1071 * exclusive from other writers. Any dereference of the slot must be done 1072 * using radix_tree_deref_slot. 1073 */ 1074 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 1075 unsigned long index) 1076 { 1077 void __rcu **slot; 1078 1079 if (!__radix_tree_lookup(root, index, NULL, &slot)) 1080 return NULL; 1081 return slot; 1082 } 1083 EXPORT_SYMBOL(radix_tree_lookup_slot); 1084 1085 /** 1086 * radix_tree_lookup - perform lookup operation on a radix tree 1087 * @root: radix tree root 1088 * @index: index key 1089 * 1090 * Lookup the item at the position @index in the radix tree @root. 1091 * 1092 * This function can be called under rcu_read_lock, however the caller 1093 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 1094 * them safely). No RCU barriers are required to access or modify the 1095 * returned item, however. 1096 */ 1097 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 1098 { 1099 return __radix_tree_lookup(root, index, NULL, NULL); 1100 } 1101 EXPORT_SYMBOL(radix_tree_lookup); 1102 1103 static inline void replace_sibling_entries(struct radix_tree_node *node, 1104 void __rcu **slot, int count, int exceptional) 1105 { 1106 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1107 void *ptr = node_to_entry(slot); 1108 unsigned offset = get_slot_offset(node, slot) + 1; 1109 1110 while (offset < RADIX_TREE_MAP_SIZE) { 1111 if (rcu_dereference_raw(node->slots[offset]) != ptr) 1112 break; 1113 if (count < 0) { 1114 node->slots[offset] = NULL; 1115 node->count--; 1116 } 1117 node->exceptional += exceptional; 1118 offset++; 1119 } 1120 #endif 1121 } 1122 1123 static void replace_slot(void __rcu **slot, void *item, 1124 struct radix_tree_node *node, int count, int exceptional) 1125 { 1126 if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) 1127 return; 1128 1129 if (node && (count || exceptional)) { 1130 node->count += count; 1131 node->exceptional += exceptional; 1132 replace_sibling_entries(node, slot, count, exceptional); 1133 } 1134 1135 rcu_assign_pointer(*slot, item); 1136 } 1137 1138 static bool node_tag_get(const struct radix_tree_root *root, 1139 const struct radix_tree_node *node, 1140 unsigned int tag, unsigned int offset) 1141 { 1142 if (node) 1143 return tag_get(node, tag, offset); 1144 return root_tag_get(root, tag); 1145 } 1146 1147 /* 1148 * IDR users want to be able to store NULL in the tree, so if the slot isn't 1149 * free, don't adjust the count, even if it's transitioning between NULL and 1150 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 1151 * have empty bits, but it only stores NULL in slots when they're being 1152 * deleted. 1153 */ 1154 static int calculate_count(struct radix_tree_root *root, 1155 struct radix_tree_node *node, void __rcu **slot, 1156 void *item, void *old) 1157 { 1158 if (is_idr(root)) { 1159 unsigned offset = get_slot_offset(node, slot); 1160 bool free = node_tag_get(root, node, IDR_FREE, offset); 1161 if (!free) 1162 return 0; 1163 if (!old) 1164 return 1; 1165 } 1166 return !!item - !!old; 1167 } 1168 1169 /** 1170 * __radix_tree_replace - replace item in a slot 1171 * @root: radix tree root 1172 * @node: pointer to tree node 1173 * @slot: pointer to slot in @node 1174 * @item: new item to store in the slot. 1175 * @update_node: callback for changing leaf nodes 1176 * @private: private data to pass to @update_node 1177 * 1178 * For use with __radix_tree_lookup(). Caller must hold tree write locked 1179 * across slot lookup and replacement. 1180 */ 1181 void __radix_tree_replace(struct radix_tree_root *root, 1182 struct radix_tree_node *node, 1183 void __rcu **slot, void *item, 1184 radix_tree_update_node_t update_node, void *private) 1185 { 1186 void *old = rcu_dereference_raw(*slot); 1187 int exceptional = !!radix_tree_exceptional_entry(item) - 1188 !!radix_tree_exceptional_entry(old); 1189 int count = calculate_count(root, node, slot, item, old); 1190 1191 /* 1192 * This function supports replacing exceptional entries and 1193 * deleting entries, but that needs accounting against the 1194 * node unless the slot is root->rnode. 1195 */ 1196 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && 1197 (count || exceptional)); 1198 replace_slot(slot, item, node, count, exceptional); 1199 1200 if (!node) 1201 return; 1202 1203 if (update_node) 1204 update_node(node, private); 1205 1206 delete_node(root, node, update_node, private); 1207 } 1208 1209 /** 1210 * radix_tree_replace_slot - replace item in a slot 1211 * @root: radix tree root 1212 * @slot: pointer to slot 1213 * @item: new item to store in the slot. 1214 * 1215 * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), 1216 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 1217 * across slot lookup and replacement. 1218 * 1219 * NOTE: This cannot be used to switch between non-entries (empty slots), 1220 * regular entries, and exceptional entries, as that requires accounting 1221 * inside the radix tree node. When switching from one type of entry or 1222 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 1223 * radix_tree_iter_replace(). 1224 */ 1225 void radix_tree_replace_slot(struct radix_tree_root *root, 1226 void __rcu **slot, void *item) 1227 { 1228 __radix_tree_replace(root, NULL, slot, item, NULL, NULL); 1229 } 1230 EXPORT_SYMBOL(radix_tree_replace_slot); 1231 1232 /** 1233 * radix_tree_iter_replace - replace item in a slot 1234 * @root: radix tree root 1235 * @slot: pointer to slot 1236 * @item: new item to store in the slot. 1237 * 1238 * For use with radix_tree_split() and radix_tree_for_each_slot(). 1239 * Caller must hold tree write locked across split and replacement. 1240 */ 1241 void radix_tree_iter_replace(struct radix_tree_root *root, 1242 const struct radix_tree_iter *iter, 1243 void __rcu **slot, void *item) 1244 { 1245 __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); 1246 } 1247 1248 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1249 /** 1250 * radix_tree_join - replace multiple entries with one multiorder entry 1251 * @root: radix tree root 1252 * @index: an index inside the new entry 1253 * @order: order of the new entry 1254 * @item: new entry 1255 * 1256 * Call this function to replace several entries with one larger entry. 1257 * The existing entries are presumed to not need freeing as a result of 1258 * this call. 1259 * 1260 * The replacement entry will have all the tags set on it that were set 1261 * on any of the entries it is replacing. 1262 */ 1263 int radix_tree_join(struct radix_tree_root *root, unsigned long index, 1264 unsigned order, void *item) 1265 { 1266 struct radix_tree_node *node; 1267 void __rcu **slot; 1268 int error; 1269 1270 BUG_ON(radix_tree_is_internal_node(item)); 1271 1272 error = __radix_tree_create(root, index, order, &node, &slot); 1273 if (!error) 1274 error = insert_entries(node, slot, item, order, true); 1275 if (error > 0) 1276 error = 0; 1277 1278 return error; 1279 } 1280 1281 /** 1282 * radix_tree_split - Split an entry into smaller entries 1283 * @root: radix tree root 1284 * @index: An index within the large entry 1285 * @order: Order of new entries 1286 * 1287 * Call this function as the first step in replacing a multiorder entry 1288 * with several entries of lower order. After this function returns, 1289 * loop over the relevant portion of the tree using radix_tree_for_each_slot() 1290 * and call radix_tree_iter_replace() to set up each new entry. 1291 * 1292 * The tags from this entry are replicated to all the new entries. 1293 * 1294 * The radix tree should be locked against modification during the entire 1295 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which 1296 * should prompt RCU walkers to restart the lookup from the root. 1297 */ 1298 int radix_tree_split(struct radix_tree_root *root, unsigned long index, 1299 unsigned order) 1300 { 1301 struct radix_tree_node *parent, *node, *child; 1302 void __rcu **slot; 1303 unsigned int offset, end; 1304 unsigned n, tag, tags = 0; 1305 gfp_t gfp = root_gfp_mask(root); 1306 1307 if (!__radix_tree_lookup(root, index, &parent, &slot)) 1308 return -ENOENT; 1309 if (!parent) 1310 return -ENOENT; 1311 1312 offset = get_slot_offset(parent, slot); 1313 1314 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1315 if (tag_get(parent, tag, offset)) 1316 tags |= 1 << tag; 1317 1318 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { 1319 if (!is_sibling_entry(parent, 1320 rcu_dereference_raw(parent->slots[end]))) 1321 break; 1322 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1323 if (tags & (1 << tag)) 1324 tag_set(parent, tag, end); 1325 /* rcu_assign_pointer ensures tags are set before RETRY */ 1326 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); 1327 } 1328 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); 1329 parent->exceptional -= (end - offset); 1330 1331 if (order == parent->shift) 1332 return 0; 1333 if (order > parent->shift) { 1334 while (offset < end) 1335 offset += insert_entries(parent, &parent->slots[offset], 1336 RADIX_TREE_RETRY, order, true); 1337 return 0; 1338 } 1339 1340 node = parent; 1341 1342 for (;;) { 1343 if (node->shift > order) { 1344 child = radix_tree_node_alloc(gfp, node, root, 1345 node->shift - RADIX_TREE_MAP_SHIFT, 1346 offset, 0, 0); 1347 if (!child) 1348 goto nomem; 1349 if (node != parent) { 1350 node->count++; 1351 rcu_assign_pointer(node->slots[offset], 1352 node_to_entry(child)); 1353 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1354 if (tags & (1 << tag)) 1355 tag_set(node, tag, offset); 1356 } 1357 1358 node = child; 1359 offset = 0; 1360 continue; 1361 } 1362 1363 n = insert_entries(node, &node->slots[offset], 1364 RADIX_TREE_RETRY, order, false); 1365 BUG_ON(n > RADIX_TREE_MAP_SIZE); 1366 1367 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1368 if (tags & (1 << tag)) 1369 tag_set(node, tag, offset); 1370 offset += n; 1371 1372 while (offset == RADIX_TREE_MAP_SIZE) { 1373 if (node == parent) 1374 break; 1375 offset = node->offset; 1376 child = node; 1377 node = node->parent; 1378 rcu_assign_pointer(node->slots[offset], 1379 node_to_entry(child)); 1380 offset++; 1381 } 1382 if ((node == parent) && (offset == end)) 1383 return 0; 1384 } 1385 1386 nomem: 1387 /* Shouldn't happen; did user forget to preload? */ 1388 /* TODO: free all the allocated nodes */ 1389 WARN_ON(1); 1390 return -ENOMEM; 1391 } 1392 #endif 1393 1394 static void node_tag_set(struct radix_tree_root *root, 1395 struct radix_tree_node *node, 1396 unsigned int tag, unsigned int offset) 1397 { 1398 while (node) { 1399 if (tag_get(node, tag, offset)) 1400 return; 1401 tag_set(node, tag, offset); 1402 offset = node->offset; 1403 node = node->parent; 1404 } 1405 1406 if (!root_tag_get(root, tag)) 1407 root_tag_set(root, tag); 1408 } 1409 1410 /** 1411 * radix_tree_tag_set - set a tag on a radix tree node 1412 * @root: radix tree root 1413 * @index: index key 1414 * @tag: tag index 1415 * 1416 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 1417 * corresponding to @index in the radix tree. From 1418 * the root all the way down to the leaf node. 1419 * 1420 * Returns the address of the tagged item. Setting a tag on a not-present 1421 * item is a bug. 1422 */ 1423 void *radix_tree_tag_set(struct radix_tree_root *root, 1424 unsigned long index, unsigned int tag) 1425 { 1426 struct radix_tree_node *node, *parent; 1427 unsigned long maxindex; 1428 1429 radix_tree_load_root(root, &node, &maxindex); 1430 BUG_ON(index > maxindex); 1431 1432 while (radix_tree_is_internal_node(node)) { 1433 unsigned offset; 1434 1435 parent = entry_to_node(node); 1436 offset = radix_tree_descend(parent, &node, index); 1437 BUG_ON(!node); 1438 1439 if (!tag_get(parent, tag, offset)) 1440 tag_set(parent, tag, offset); 1441 } 1442 1443 /* set the root's tag bit */ 1444 if (!root_tag_get(root, tag)) 1445 root_tag_set(root, tag); 1446 1447 return node; 1448 } 1449 EXPORT_SYMBOL(radix_tree_tag_set); 1450 1451 /** 1452 * radix_tree_iter_tag_set - set a tag on the current iterator entry 1453 * @root: radix tree root 1454 * @iter: iterator state 1455 * @tag: tag to set 1456 */ 1457 void radix_tree_iter_tag_set(struct radix_tree_root *root, 1458 const struct radix_tree_iter *iter, unsigned int tag) 1459 { 1460 node_tag_set(root, iter->node, tag, iter_offset(iter)); 1461 } 1462 1463 static void node_tag_clear(struct radix_tree_root *root, 1464 struct radix_tree_node *node, 1465 unsigned int tag, unsigned int offset) 1466 { 1467 while (node) { 1468 if (!tag_get(node, tag, offset)) 1469 return; 1470 tag_clear(node, tag, offset); 1471 if (any_tag_set(node, tag)) 1472 return; 1473 1474 offset = node->offset; 1475 node = node->parent; 1476 } 1477 1478 /* clear the root's tag bit */ 1479 if (root_tag_get(root, tag)) 1480 root_tag_clear(root, tag); 1481 } 1482 1483 /** 1484 * radix_tree_tag_clear - clear a tag on a radix tree node 1485 * @root: radix tree root 1486 * @index: index key 1487 * @tag: tag index 1488 * 1489 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1490 * corresponding to @index in the radix tree. If this causes 1491 * the leaf node to have no tags set then clear the tag in the 1492 * next-to-leaf node, etc. 1493 * 1494 * Returns the address of the tagged item on success, else NULL. ie: 1495 * has the same return value and semantics as radix_tree_lookup(). 1496 */ 1497 void *radix_tree_tag_clear(struct radix_tree_root *root, 1498 unsigned long index, unsigned int tag) 1499 { 1500 struct radix_tree_node *node, *parent; 1501 unsigned long maxindex; 1502 int uninitialized_var(offset); 1503 1504 radix_tree_load_root(root, &node, &maxindex); 1505 if (index > maxindex) 1506 return NULL; 1507 1508 parent = NULL; 1509 1510 while (radix_tree_is_internal_node(node)) { 1511 parent = entry_to_node(node); 1512 offset = radix_tree_descend(parent, &node, index); 1513 } 1514 1515 if (node) 1516 node_tag_clear(root, parent, tag, offset); 1517 1518 return node; 1519 } 1520 EXPORT_SYMBOL(radix_tree_tag_clear); 1521 1522 /** 1523 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1524 * @root: radix tree root 1525 * @iter: iterator state 1526 * @tag: tag to clear 1527 */ 1528 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1529 const struct radix_tree_iter *iter, unsigned int tag) 1530 { 1531 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1532 } 1533 1534 /** 1535 * radix_tree_tag_get - get a tag on a radix tree node 1536 * @root: radix tree root 1537 * @index: index key 1538 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1539 * 1540 * Return values: 1541 * 1542 * 0: tag not present or not set 1543 * 1: tag set 1544 * 1545 * Note that the return value of this function may not be relied on, even if 1546 * the RCU lock is held, unless tag modification and node deletion are excluded 1547 * from concurrency. 1548 */ 1549 int radix_tree_tag_get(const struct radix_tree_root *root, 1550 unsigned long index, unsigned int tag) 1551 { 1552 struct radix_tree_node *node, *parent; 1553 unsigned long maxindex; 1554 1555 if (!root_tag_get(root, tag)) 1556 return 0; 1557 1558 radix_tree_load_root(root, &node, &maxindex); 1559 if (index > maxindex) 1560 return 0; 1561 1562 while (radix_tree_is_internal_node(node)) { 1563 unsigned offset; 1564 1565 parent = entry_to_node(node); 1566 offset = radix_tree_descend(parent, &node, index); 1567 1568 if (!tag_get(parent, tag, offset)) 1569 return 0; 1570 if (node == RADIX_TREE_RETRY) 1571 break; 1572 } 1573 1574 return 1; 1575 } 1576 EXPORT_SYMBOL(radix_tree_tag_get); 1577 1578 static inline void __set_iter_shift(struct radix_tree_iter *iter, 1579 unsigned int shift) 1580 { 1581 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1582 iter->shift = shift; 1583 #endif 1584 } 1585 1586 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1587 static void set_iter_tags(struct radix_tree_iter *iter, 1588 struct radix_tree_node *node, unsigned offset, 1589 unsigned tag) 1590 { 1591 unsigned tag_long = offset / BITS_PER_LONG; 1592 unsigned tag_bit = offset % BITS_PER_LONG; 1593 1594 if (!node) { 1595 iter->tags = 1; 1596 return; 1597 } 1598 1599 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1600 1601 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1602 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1603 /* Pick tags from next element */ 1604 if (tag_bit) 1605 iter->tags |= node->tags[tag][tag_long + 1] << 1606 (BITS_PER_LONG - tag_bit); 1607 /* Clip chunk size, here only BITS_PER_LONG tags */ 1608 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1609 } 1610 } 1611 1612 #ifdef CONFIG_RADIX_TREE_MULTIORDER 1613 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1614 void __rcu **slot, struct radix_tree_iter *iter) 1615 { 1616 void *sib = node_to_entry(slot - 1); 1617 1618 while (iter->index < iter->next_index) { 1619 *nodep = rcu_dereference_raw(*slot); 1620 if (*nodep && *nodep != sib) 1621 return slot; 1622 slot++; 1623 iter->index = __radix_tree_iter_add(iter, 1); 1624 iter->tags >>= 1; 1625 } 1626 1627 *nodep = NULL; 1628 return NULL; 1629 } 1630 1631 void __rcu **__radix_tree_next_slot(void __rcu **slot, 1632 struct radix_tree_iter *iter, unsigned flags) 1633 { 1634 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1635 struct radix_tree_node *node = rcu_dereference_raw(*slot); 1636 1637 slot = skip_siblings(&node, slot, iter); 1638 1639 while (radix_tree_is_internal_node(node)) { 1640 unsigned offset; 1641 unsigned long next_index; 1642 1643 if (node == RADIX_TREE_RETRY) 1644 return slot; 1645 node = entry_to_node(node); 1646 iter->node = node; 1647 iter->shift = node->shift; 1648 1649 if (flags & RADIX_TREE_ITER_TAGGED) { 1650 offset = radix_tree_find_next_bit(node, tag, 0); 1651 if (offset == RADIX_TREE_MAP_SIZE) 1652 return NULL; 1653 slot = &node->slots[offset]; 1654 iter->index = __radix_tree_iter_add(iter, offset); 1655 set_iter_tags(iter, node, offset, tag); 1656 node = rcu_dereference_raw(*slot); 1657 } else { 1658 offset = 0; 1659 slot = &node->slots[0]; 1660 for (;;) { 1661 node = rcu_dereference_raw(*slot); 1662 if (node) 1663 break; 1664 slot++; 1665 offset++; 1666 if (offset == RADIX_TREE_MAP_SIZE) 1667 return NULL; 1668 } 1669 iter->index = __radix_tree_iter_add(iter, offset); 1670 } 1671 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) 1672 goto none; 1673 next_index = (iter->index | shift_maxindex(iter->shift)) + 1; 1674 if (next_index < iter->next_index) 1675 iter->next_index = next_index; 1676 } 1677 1678 return slot; 1679 none: 1680 iter->next_index = 0; 1681 return NULL; 1682 } 1683 EXPORT_SYMBOL(__radix_tree_next_slot); 1684 #else 1685 static void __rcu **skip_siblings(struct radix_tree_node **nodep, 1686 void __rcu **slot, struct radix_tree_iter *iter) 1687 { 1688 return slot; 1689 } 1690 #endif 1691 1692 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1693 struct radix_tree_iter *iter) 1694 { 1695 struct radix_tree_node *node; 1696 1697 slot++; 1698 iter->index = __radix_tree_iter_add(iter, 1); 1699 skip_siblings(&node, slot, iter); 1700 iter->next_index = iter->index; 1701 iter->tags = 0; 1702 return NULL; 1703 } 1704 EXPORT_SYMBOL(radix_tree_iter_resume); 1705 1706 /** 1707 * radix_tree_next_chunk - find next chunk of slots for iteration 1708 * 1709 * @root: radix tree root 1710 * @iter: iterator state 1711 * @flags: RADIX_TREE_ITER_* flags and tag index 1712 * Returns: pointer to chunk first slot, or NULL if iteration is over 1713 */ 1714 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1715 struct radix_tree_iter *iter, unsigned flags) 1716 { 1717 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1718 struct radix_tree_node *node, *child; 1719 unsigned long index, offset, maxindex; 1720 1721 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1722 return NULL; 1723 1724 /* 1725 * Catch next_index overflow after ~0UL. iter->index never overflows 1726 * during iterating; it can be zero only at the beginning. 1727 * And we cannot overflow iter->next_index in a single step, 1728 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1729 * 1730 * This condition also used by radix_tree_next_slot() to stop 1731 * contiguous iterating, and forbid switching to the next chunk. 1732 */ 1733 index = iter->next_index; 1734 if (!index && iter->index) 1735 return NULL; 1736 1737 restart: 1738 radix_tree_load_root(root, &child, &maxindex); 1739 if (index > maxindex) 1740 return NULL; 1741 if (!child) 1742 return NULL; 1743 1744 if (!radix_tree_is_internal_node(child)) { 1745 /* Single-slot tree */ 1746 iter->index = index; 1747 iter->next_index = maxindex + 1; 1748 iter->tags = 1; 1749 iter->node = NULL; 1750 __set_iter_shift(iter, 0); 1751 return (void __rcu **)&root->rnode; 1752 } 1753 1754 do { 1755 node = entry_to_node(child); 1756 offset = radix_tree_descend(node, &child, index); 1757 1758 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1759 !tag_get(node, tag, offset) : !child) { 1760 /* Hole detected */ 1761 if (flags & RADIX_TREE_ITER_CONTIG) 1762 return NULL; 1763 1764 if (flags & RADIX_TREE_ITER_TAGGED) 1765 offset = radix_tree_find_next_bit(node, tag, 1766 offset + 1); 1767 else 1768 while (++offset < RADIX_TREE_MAP_SIZE) { 1769 void *slot = rcu_dereference_raw( 1770 node->slots[offset]); 1771 if (is_sibling_entry(node, slot)) 1772 continue; 1773 if (slot) 1774 break; 1775 } 1776 index &= ~node_maxindex(node); 1777 index += offset << node->shift; 1778 /* Overflow after ~0UL */ 1779 if (!index) 1780 return NULL; 1781 if (offset == RADIX_TREE_MAP_SIZE) 1782 goto restart; 1783 child = rcu_dereference_raw(node->slots[offset]); 1784 } 1785 1786 if (!child) 1787 goto restart; 1788 if (child == RADIX_TREE_RETRY) 1789 break; 1790 } while (radix_tree_is_internal_node(child)); 1791 1792 /* Update the iterator state */ 1793 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1794 iter->next_index = (index | node_maxindex(node)) + 1; 1795 iter->node = node; 1796 __set_iter_shift(iter, node->shift); 1797 1798 if (flags & RADIX_TREE_ITER_TAGGED) 1799 set_iter_tags(iter, node, offset, tag); 1800 1801 return node->slots + offset; 1802 } 1803 EXPORT_SYMBOL(radix_tree_next_chunk); 1804 1805 /** 1806 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1807 * @root: radix tree root 1808 * @results: where the results of the lookup are placed 1809 * @first_index: start the lookup from this key 1810 * @max_items: place up to this many items at *results 1811 * 1812 * Performs an index-ascending scan of the tree for present items. Places 1813 * them at *@results and returns the number of items which were placed at 1814 * *@results. 1815 * 1816 * The implementation is naive. 1817 * 1818 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1819 * rcu_read_lock. In this case, rather than the returned results being 1820 * an atomic snapshot of the tree at a single point in time, the 1821 * semantics of an RCU protected gang lookup are as though multiple 1822 * radix_tree_lookups have been issued in individual locks, and results 1823 * stored in 'results'. 1824 */ 1825 unsigned int 1826 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1827 unsigned long first_index, unsigned int max_items) 1828 { 1829 struct radix_tree_iter iter; 1830 void __rcu **slot; 1831 unsigned int ret = 0; 1832 1833 if (unlikely(!max_items)) 1834 return 0; 1835 1836 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1837 results[ret] = rcu_dereference_raw(*slot); 1838 if (!results[ret]) 1839 continue; 1840 if (radix_tree_is_internal_node(results[ret])) { 1841 slot = radix_tree_iter_retry(&iter); 1842 continue; 1843 } 1844 if (++ret == max_items) 1845 break; 1846 } 1847 1848 return ret; 1849 } 1850 EXPORT_SYMBOL(radix_tree_gang_lookup); 1851 1852 /** 1853 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 1854 * @root: radix tree root 1855 * @results: where the results of the lookup are placed 1856 * @indices: where their indices should be placed (but usually NULL) 1857 * @first_index: start the lookup from this key 1858 * @max_items: place up to this many items at *results 1859 * 1860 * Performs an index-ascending scan of the tree for present items. Places 1861 * their slots at *@results and returns the number of items which were 1862 * placed at *@results. 1863 * 1864 * The implementation is naive. 1865 * 1866 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 1867 * be dereferenced with radix_tree_deref_slot, and if using only RCU 1868 * protection, radix_tree_deref_slot may fail requiring a retry. 1869 */ 1870 unsigned int 1871 radix_tree_gang_lookup_slot(const struct radix_tree_root *root, 1872 void __rcu ***results, unsigned long *indices, 1873 unsigned long first_index, unsigned int max_items) 1874 { 1875 struct radix_tree_iter iter; 1876 void __rcu **slot; 1877 unsigned int ret = 0; 1878 1879 if (unlikely(!max_items)) 1880 return 0; 1881 1882 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1883 results[ret] = slot; 1884 if (indices) 1885 indices[ret] = iter.index; 1886 if (++ret == max_items) 1887 break; 1888 } 1889 1890 return ret; 1891 } 1892 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 1893 1894 /** 1895 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1896 * based on a tag 1897 * @root: radix tree root 1898 * @results: where the results of the lookup are placed 1899 * @first_index: start the lookup from this key 1900 * @max_items: place up to this many items at *results 1901 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1902 * 1903 * Performs an index-ascending scan of the tree for present items which 1904 * have the tag indexed by @tag set. Places the items at *@results and 1905 * returns the number of items which were placed at *@results. 1906 */ 1907 unsigned int 1908 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1909 unsigned long first_index, unsigned int max_items, 1910 unsigned int tag) 1911 { 1912 struct radix_tree_iter iter; 1913 void __rcu **slot; 1914 unsigned int ret = 0; 1915 1916 if (unlikely(!max_items)) 1917 return 0; 1918 1919 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1920 results[ret] = rcu_dereference_raw(*slot); 1921 if (!results[ret]) 1922 continue; 1923 if (radix_tree_is_internal_node(results[ret])) { 1924 slot = radix_tree_iter_retry(&iter); 1925 continue; 1926 } 1927 if (++ret == max_items) 1928 break; 1929 } 1930 1931 return ret; 1932 } 1933 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1934 1935 /** 1936 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1937 * radix tree based on a tag 1938 * @root: radix tree root 1939 * @results: where the results of the lookup are placed 1940 * @first_index: start the lookup from this key 1941 * @max_items: place up to this many items at *results 1942 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1943 * 1944 * Performs an index-ascending scan of the tree for present items which 1945 * have the tag indexed by @tag set. Places the slots at *@results and 1946 * returns the number of slots which were placed at *@results. 1947 */ 1948 unsigned int 1949 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1950 void __rcu ***results, unsigned long first_index, 1951 unsigned int max_items, unsigned int tag) 1952 { 1953 struct radix_tree_iter iter; 1954 void __rcu **slot; 1955 unsigned int ret = 0; 1956 1957 if (unlikely(!max_items)) 1958 return 0; 1959 1960 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1961 results[ret] = slot; 1962 if (++ret == max_items) 1963 break; 1964 } 1965 1966 return ret; 1967 } 1968 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1969 1970 /** 1971 * __radix_tree_delete_node - try to free node after clearing a slot 1972 * @root: radix tree root 1973 * @node: node containing @index 1974 * @update_node: callback for changing leaf nodes 1975 * @private: private data to pass to @update_node 1976 * 1977 * After clearing the slot at @index in @node from radix tree 1978 * rooted at @root, call this function to attempt freeing the 1979 * node and shrinking the tree. 1980 */ 1981 void __radix_tree_delete_node(struct radix_tree_root *root, 1982 struct radix_tree_node *node, 1983 radix_tree_update_node_t update_node, 1984 void *private) 1985 { 1986 delete_node(root, node, update_node, private); 1987 } 1988 1989 static bool __radix_tree_delete(struct radix_tree_root *root, 1990 struct radix_tree_node *node, void __rcu **slot) 1991 { 1992 void *old = rcu_dereference_raw(*slot); 1993 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; 1994 unsigned offset = get_slot_offset(node, slot); 1995 int tag; 1996 1997 if (is_idr(root)) 1998 node_tag_set(root, node, IDR_FREE, offset); 1999 else 2000 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 2001 node_tag_clear(root, node, tag, offset); 2002 2003 replace_slot(slot, NULL, node, -1, exceptional); 2004 return node && delete_node(root, node, NULL, NULL); 2005 } 2006 2007 /** 2008 * radix_tree_iter_delete - delete the entry at this iterator position 2009 * @root: radix tree root 2010 * @iter: iterator state 2011 * @slot: pointer to slot 2012 * 2013 * Delete the entry at the position currently pointed to by the iterator. 2014 * This may result in the current node being freed; if it is, the iterator 2015 * is advanced so that it will not reference the freed memory. This 2016 * function may be called without any locking if there are no other threads 2017 * which can access this tree. 2018 */ 2019 void radix_tree_iter_delete(struct radix_tree_root *root, 2020 struct radix_tree_iter *iter, void __rcu **slot) 2021 { 2022 if (__radix_tree_delete(root, iter->node, slot)) 2023 iter->index = iter->next_index; 2024 } 2025 2026 /** 2027 * radix_tree_delete_item - delete an item from a radix tree 2028 * @root: radix tree root 2029 * @index: index key 2030 * @item: expected item 2031 * 2032 * Remove @item at @index from the radix tree rooted at @root. 2033 * 2034 * Return: the deleted entry, or %NULL if it was not present 2035 * or the entry at the given @index was not @item. 2036 */ 2037 void *radix_tree_delete_item(struct radix_tree_root *root, 2038 unsigned long index, void *item) 2039 { 2040 struct radix_tree_node *node = NULL; 2041 void __rcu **slot; 2042 void *entry; 2043 2044 entry = __radix_tree_lookup(root, index, &node, &slot); 2045 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 2046 get_slot_offset(node, slot)))) 2047 return NULL; 2048 2049 if (item && entry != item) 2050 return NULL; 2051 2052 __radix_tree_delete(root, node, slot); 2053 2054 return entry; 2055 } 2056 EXPORT_SYMBOL(radix_tree_delete_item); 2057 2058 /** 2059 * radix_tree_delete - delete an entry from a radix tree 2060 * @root: radix tree root 2061 * @index: index key 2062 * 2063 * Remove the entry at @index from the radix tree rooted at @root. 2064 * 2065 * Return: The deleted entry, or %NULL if it was not present. 2066 */ 2067 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 2068 { 2069 return radix_tree_delete_item(root, index, NULL); 2070 } 2071 EXPORT_SYMBOL(radix_tree_delete); 2072 2073 void radix_tree_clear_tags(struct radix_tree_root *root, 2074 struct radix_tree_node *node, 2075 void __rcu **slot) 2076 { 2077 if (node) { 2078 unsigned int tag, offset = get_slot_offset(node, slot); 2079 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 2080 node_tag_clear(root, node, tag, offset); 2081 } else { 2082 root_tag_clear_all(root); 2083 } 2084 } 2085 2086 /** 2087 * radix_tree_tagged - test whether any items in the tree are tagged 2088 * @root: radix tree root 2089 * @tag: tag to test 2090 */ 2091 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 2092 { 2093 return root_tag_get(root, tag); 2094 } 2095 EXPORT_SYMBOL(radix_tree_tagged); 2096 2097 /** 2098 * idr_preload - preload for idr_alloc() 2099 * @gfp_mask: allocation mask to use for preloading 2100 * 2101 * Preallocate memory to use for the next call to idr_alloc(). This function 2102 * returns with preemption disabled. It will be enabled by idr_preload_end(). 2103 */ 2104 void idr_preload(gfp_t gfp_mask) 2105 { 2106 __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE); 2107 } 2108 EXPORT_SYMBOL(idr_preload); 2109 2110 /** 2111 * ida_pre_get - reserve resources for ida allocation 2112 * @ida: ida handle 2113 * @gfp: memory allocation flags 2114 * 2115 * This function should be called before calling ida_get_new_above(). If it 2116 * is unable to allocate memory, it will return %0. On success, it returns %1. 2117 */ 2118 int ida_pre_get(struct ida *ida, gfp_t gfp) 2119 { 2120 __radix_tree_preload(gfp, IDA_PRELOAD_SIZE); 2121 /* 2122 * The IDA API has no preload_end() equivalent. Instead, 2123 * ida_get_new() can return -EAGAIN, prompting the caller 2124 * to return to the ida_pre_get() step. 2125 */ 2126 preempt_enable(); 2127 2128 if (!this_cpu_read(ida_bitmap)) { 2129 struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); 2130 if (!bitmap) 2131 return 0; 2132 if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) 2133 kfree(bitmap); 2134 } 2135 2136 return 1; 2137 } 2138 EXPORT_SYMBOL(ida_pre_get); 2139 2140 void __rcu **idr_get_free(struct radix_tree_root *root, 2141 struct radix_tree_iter *iter, gfp_t gfp, int end) 2142 { 2143 struct radix_tree_node *node = NULL, *child; 2144 void __rcu **slot = (void __rcu **)&root->rnode; 2145 unsigned long maxindex, start = iter->next_index; 2146 unsigned long max = end > 0 ? end - 1 : INT_MAX; 2147 unsigned int shift, offset = 0; 2148 2149 grow: 2150 shift = radix_tree_load_root(root, &child, &maxindex); 2151 if (!radix_tree_tagged(root, IDR_FREE)) 2152 start = max(start, maxindex + 1); 2153 if (start > max) 2154 return ERR_PTR(-ENOSPC); 2155 2156 if (start > maxindex) { 2157 int error = radix_tree_extend(root, gfp, start, shift); 2158 if (error < 0) 2159 return ERR_PTR(error); 2160 shift = error; 2161 child = rcu_dereference_raw(root->rnode); 2162 } 2163 2164 while (shift) { 2165 shift -= RADIX_TREE_MAP_SHIFT; 2166 if (child == NULL) { 2167 /* Have to add a child node. */ 2168 child = radix_tree_node_alloc(gfp, node, root, shift, 2169 offset, 0, 0); 2170 if (!child) 2171 return ERR_PTR(-ENOMEM); 2172 all_tag_set(child, IDR_FREE); 2173 rcu_assign_pointer(*slot, node_to_entry(child)); 2174 if (node) 2175 node->count++; 2176 } else if (!radix_tree_is_internal_node(child)) 2177 break; 2178 2179 node = entry_to_node(child); 2180 offset = radix_tree_descend(node, &child, start); 2181 if (!tag_get(node, IDR_FREE, offset)) { 2182 offset = radix_tree_find_next_bit(node, IDR_FREE, 2183 offset + 1); 2184 start = next_index(start, node, offset); 2185 if (start > max) 2186 return ERR_PTR(-ENOSPC); 2187 while (offset == RADIX_TREE_MAP_SIZE) { 2188 offset = node->offset + 1; 2189 node = node->parent; 2190 if (!node) 2191 goto grow; 2192 shift = node->shift; 2193 } 2194 child = rcu_dereference_raw(node->slots[offset]); 2195 } 2196 slot = &node->slots[offset]; 2197 } 2198 2199 iter->index = start; 2200 if (node) 2201 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 2202 else 2203 iter->next_index = 1; 2204 iter->node = node; 2205 __set_iter_shift(iter, shift); 2206 set_iter_tags(iter, node, offset, IDR_FREE); 2207 2208 return slot; 2209 } 2210 2211 /** 2212 * idr_destroy - release all internal memory from an IDR 2213 * @idr: idr handle 2214 * 2215 * After this function is called, the IDR is empty, and may be reused or 2216 * the data structure containing it may be freed. 2217 * 2218 * A typical clean-up sequence for objects stored in an idr tree will use 2219 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 2220 * free the memory used to keep track of those objects. 2221 */ 2222 void idr_destroy(struct idr *idr) 2223 { 2224 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); 2225 if (radix_tree_is_internal_node(node)) 2226 radix_tree_free_nodes(node); 2227 idr->idr_rt.rnode = NULL; 2228 root_tag_set(&idr->idr_rt, IDR_FREE); 2229 } 2230 EXPORT_SYMBOL(idr_destroy); 2231 2232 static void 2233 radix_tree_node_ctor(void *arg) 2234 { 2235 struct radix_tree_node *node = arg; 2236 2237 memset(node, 0, sizeof(*node)); 2238 INIT_LIST_HEAD(&node->private_list); 2239 } 2240 2241 static __init unsigned long __maxindex(unsigned int height) 2242 { 2243 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 2244 int shift = RADIX_TREE_INDEX_BITS - width; 2245 2246 if (shift < 0) 2247 return ~0UL; 2248 if (shift >= BITS_PER_LONG) 2249 return 0UL; 2250 return ~0UL >> shift; 2251 } 2252 2253 static __init void radix_tree_init_maxnodes(void) 2254 { 2255 unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; 2256 unsigned int i, j; 2257 2258 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 2259 height_to_maxindex[i] = __maxindex(i); 2260 for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { 2261 for (j = i; j > 0; j--) 2262 height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; 2263 } 2264 } 2265 2266 static int radix_tree_cpu_dead(unsigned int cpu) 2267 { 2268 struct radix_tree_preload *rtp; 2269 struct radix_tree_node *node; 2270 2271 /* Free per-cpu pool of preloaded nodes */ 2272 rtp = &per_cpu(radix_tree_preloads, cpu); 2273 while (rtp->nr) { 2274 node = rtp->nodes; 2275 rtp->nodes = node->parent; 2276 kmem_cache_free(radix_tree_node_cachep, node); 2277 rtp->nr--; 2278 } 2279 kfree(per_cpu(ida_bitmap, cpu)); 2280 per_cpu(ida_bitmap, cpu) = NULL; 2281 return 0; 2282 } 2283 2284 void __init radix_tree_init(void) 2285 { 2286 int ret; 2287 2288 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 2289 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 2290 sizeof(struct radix_tree_node), 0, 2291 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 2292 radix_tree_node_ctor); 2293 radix_tree_init_maxnodes(); 2294 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 2295 NULL, radix_tree_cpu_dead); 2296 WARN_ON(ret < 0); 2297 } 2298