1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2001 Momchil Velikov 4 * Portions Copyright (C) 2001 Christoph Hellwig 5 * Copyright (C) 2005 SGI, Christoph Lameter 6 * Copyright (C) 2006 Nick Piggin 7 * Copyright (C) 2012 Konstantin Khlebnikov 8 * Copyright (C) 2016 Intel, Matthew Wilcox 9 * Copyright (C) 2016 Intel, Ross Zwisler 10 */ 11 12 #include <linux/bitmap.h> 13 #include <linux/bitops.h> 14 #include <linux/bug.h> 15 #include <linux/cpu.h> 16 #include <linux/errno.h> 17 #include <linux/export.h> 18 #include <linux/idr.h> 19 #include <linux/init.h> 20 #include <linux/kernel.h> 21 #include <linux/kmemleak.h> 22 #include <linux/percpu.h> 23 #include <linux/preempt.h> /* in_interrupt() */ 24 #include <linux/radix-tree.h> 25 #include <linux/rcupdate.h> 26 #include <linux/slab.h> 27 #include <linux/string.h> 28 #include <linux/xarray.h> 29 30 /* 31 * Radix tree node cache. 32 */ 33 struct kmem_cache *radix_tree_node_cachep; 34 35 /* 36 * The radix tree is variable-height, so an insert operation not only has 37 * to build the branch to its corresponding item, it also has to build the 38 * branch to existing items if the size has to be increased (by 39 * radix_tree_extend). 40 * 41 * The worst case is a zero height tree with just a single item at index 0, 42 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 43 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 44 * Hence: 45 */ 46 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 47 48 /* 49 * The IDR does not have to be as high as the radix tree since it uses 50 * signed integers, not unsigned longs. 51 */ 52 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 53 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 54 RADIX_TREE_MAP_SHIFT)) 55 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 56 57 /* 58 * Per-cpu pool of preloaded nodes 59 */ 60 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 61 .lock = INIT_LOCAL_LOCK(lock), 62 }; 63 EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads); 64 65 static inline struct radix_tree_node *entry_to_node(void *ptr) 66 { 67 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 68 } 69 70 static inline void *node_to_entry(void *ptr) 71 { 72 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 73 } 74 75 #define RADIX_TREE_RETRY XA_RETRY_ENTRY 76 77 static inline unsigned long 78 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 79 { 80 return parent ? slot - parent->slots : 0; 81 } 82 83 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 84 struct radix_tree_node **nodep, unsigned long index) 85 { 86 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 87 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 88 89 *nodep = (void *)entry; 90 return offset; 91 } 92 93 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 94 { 95 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 96 } 97 98 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 99 int offset) 100 { 101 __set_bit(offset, node->tags[tag]); 102 } 103 104 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 105 int offset) 106 { 107 __clear_bit(offset, node->tags[tag]); 108 } 109 110 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 111 int offset) 112 { 113 return test_bit(offset, node->tags[tag]); 114 } 115 116 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 117 { 118 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 119 } 120 121 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 122 { 123 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 124 } 125 126 static inline void root_tag_clear_all(struct radix_tree_root *root) 127 { 128 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 129 } 130 131 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 132 { 133 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 134 } 135 136 static inline unsigned root_tags_get(const struct radix_tree_root *root) 137 { 138 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 139 } 140 141 static inline bool is_idr(const struct radix_tree_root *root) 142 { 143 return !!(root->xa_flags & ROOT_IS_IDR); 144 } 145 146 /* 147 * Returns 1 if any slot in the node has this tag set. 148 * Otherwise returns 0. 149 */ 150 static inline int any_tag_set(const struct radix_tree_node *node, 151 unsigned int tag) 152 { 153 unsigned idx; 154 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 155 if (node->tags[tag][idx]) 156 return 1; 157 } 158 return 0; 159 } 160 161 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 162 { 163 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 164 } 165 166 /** 167 * radix_tree_find_next_bit - find the next set bit in a memory region 168 * 169 * @node: where to begin the search 170 * @tag: the tag index 171 * @offset: the bitnumber to start searching at 172 * 173 * Unrollable variant of find_next_bit() for constant size arrays. 174 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 175 * Returns next bit offset, or size if nothing found. 176 */ 177 static __always_inline unsigned long 178 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 179 unsigned long offset) 180 { 181 const unsigned long *addr = node->tags[tag]; 182 183 if (offset < RADIX_TREE_MAP_SIZE) { 184 unsigned long tmp; 185 186 addr += offset / BITS_PER_LONG; 187 tmp = *addr >> (offset % BITS_PER_LONG); 188 if (tmp) 189 return __ffs(tmp) + offset; 190 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 191 while (offset < RADIX_TREE_MAP_SIZE) { 192 tmp = *++addr; 193 if (tmp) 194 return __ffs(tmp) + offset; 195 offset += BITS_PER_LONG; 196 } 197 } 198 return RADIX_TREE_MAP_SIZE; 199 } 200 201 static unsigned int iter_offset(const struct radix_tree_iter *iter) 202 { 203 return iter->index & RADIX_TREE_MAP_MASK; 204 } 205 206 /* 207 * The maximum index which can be stored in a radix tree 208 */ 209 static inline unsigned long shift_maxindex(unsigned int shift) 210 { 211 return (RADIX_TREE_MAP_SIZE << shift) - 1; 212 } 213 214 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 215 { 216 return shift_maxindex(node->shift); 217 } 218 219 static unsigned long next_index(unsigned long index, 220 const struct radix_tree_node *node, 221 unsigned long offset) 222 { 223 return (index & ~node_maxindex(node)) + (offset << node->shift); 224 } 225 226 /* 227 * This assumes that the caller has performed appropriate preallocation, and 228 * that the caller has pinned this thread of control to the current CPU. 229 */ 230 static struct radix_tree_node * 231 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 232 struct radix_tree_root *root, 233 unsigned int shift, unsigned int offset, 234 unsigned int count, unsigned int nr_values) 235 { 236 struct radix_tree_node *ret = NULL; 237 238 /* 239 * Preload code isn't irq safe and it doesn't make sense to use 240 * preloading during an interrupt anyway as all the allocations have 241 * to be atomic. So just do normal allocation when in interrupt. 242 */ 243 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 244 struct radix_tree_preload *rtp; 245 246 /* 247 * Even if the caller has preloaded, try to allocate from the 248 * cache first for the new node to get accounted to the memory 249 * cgroup. 250 */ 251 ret = kmem_cache_alloc(radix_tree_node_cachep, 252 gfp_mask | __GFP_NOWARN); 253 if (ret) 254 goto out; 255 256 /* 257 * Provided the caller has preloaded here, we will always 258 * succeed in getting a node here (and never reach 259 * kmem_cache_alloc) 260 */ 261 rtp = this_cpu_ptr(&radix_tree_preloads); 262 if (rtp->nr) { 263 ret = rtp->nodes; 264 rtp->nodes = ret->parent; 265 rtp->nr--; 266 } 267 /* 268 * Update the allocation stack trace as this is more useful 269 * for debugging. 270 */ 271 kmemleak_update_trace(ret); 272 goto out; 273 } 274 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 275 out: 276 BUG_ON(radix_tree_is_internal_node(ret)); 277 if (ret) { 278 ret->shift = shift; 279 ret->offset = offset; 280 ret->count = count; 281 ret->nr_values = nr_values; 282 ret->parent = parent; 283 ret->array = root; 284 } 285 return ret; 286 } 287 288 void radix_tree_node_rcu_free(struct rcu_head *head) 289 { 290 struct radix_tree_node *node = 291 container_of(head, struct radix_tree_node, rcu_head); 292 293 /* 294 * Must only free zeroed nodes into the slab. We can be left with 295 * non-NULL entries by radix_tree_free_nodes, so clear the entries 296 * and tags here. 297 */ 298 memset(node->slots, 0, sizeof(node->slots)); 299 memset(node->tags, 0, sizeof(node->tags)); 300 INIT_LIST_HEAD(&node->private_list); 301 302 kmem_cache_free(radix_tree_node_cachep, node); 303 } 304 305 static inline void 306 radix_tree_node_free(struct radix_tree_node *node) 307 { 308 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 309 } 310 311 /* 312 * Load up this CPU's radix_tree_node buffer with sufficient objects to 313 * ensure that the addition of a single element in the tree cannot fail. On 314 * success, return zero, with preemption disabled. On error, return -ENOMEM 315 * with preemption not disabled. 316 * 317 * To make use of this facility, the radix tree must be initialised without 318 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 319 */ 320 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 321 { 322 struct radix_tree_preload *rtp; 323 struct radix_tree_node *node; 324 int ret = -ENOMEM; 325 326 /* 327 * Nodes preloaded by one cgroup can be used by another cgroup, so 328 * they should never be accounted to any particular memory cgroup. 329 */ 330 gfp_mask &= ~__GFP_ACCOUNT; 331 332 local_lock(&radix_tree_preloads.lock); 333 rtp = this_cpu_ptr(&radix_tree_preloads); 334 while (rtp->nr < nr) { 335 local_unlock(&radix_tree_preloads.lock); 336 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 337 if (node == NULL) 338 goto out; 339 local_lock(&radix_tree_preloads.lock); 340 rtp = this_cpu_ptr(&radix_tree_preloads); 341 if (rtp->nr < nr) { 342 node->parent = rtp->nodes; 343 rtp->nodes = node; 344 rtp->nr++; 345 } else { 346 kmem_cache_free(radix_tree_node_cachep, node); 347 } 348 } 349 ret = 0; 350 out: 351 return ret; 352 } 353 354 /* 355 * Load up this CPU's radix_tree_node buffer with sufficient objects to 356 * ensure that the addition of a single element in the tree cannot fail. On 357 * success, return zero, with preemption disabled. On error, return -ENOMEM 358 * with preemption not disabled. 359 * 360 * To make use of this facility, the radix tree must be initialised without 361 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 362 */ 363 int radix_tree_preload(gfp_t gfp_mask) 364 { 365 /* Warn on non-sensical use... */ 366 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 367 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 368 } 369 EXPORT_SYMBOL(radix_tree_preload); 370 371 /* 372 * The same as above function, except we don't guarantee preloading happens. 373 * We do it, if we decide it helps. On success, return zero with preemption 374 * disabled. On error, return -ENOMEM with preemption not disabled. 375 */ 376 int radix_tree_maybe_preload(gfp_t gfp_mask) 377 { 378 if (gfpflags_allow_blocking(gfp_mask)) 379 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 380 /* Preloading doesn't help anything with this gfp mask, skip it */ 381 local_lock(&radix_tree_preloads.lock); 382 return 0; 383 } 384 EXPORT_SYMBOL(radix_tree_maybe_preload); 385 386 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 387 struct radix_tree_node **nodep, unsigned long *maxindex) 388 { 389 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 390 391 *nodep = node; 392 393 if (likely(radix_tree_is_internal_node(node))) { 394 node = entry_to_node(node); 395 *maxindex = node_maxindex(node); 396 return node->shift + RADIX_TREE_MAP_SHIFT; 397 } 398 399 *maxindex = 0; 400 return 0; 401 } 402 403 /* 404 * Extend a radix tree so it can store key @index. 405 */ 406 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 407 unsigned long index, unsigned int shift) 408 { 409 void *entry; 410 unsigned int maxshift; 411 int tag; 412 413 /* Figure out what the shift should be. */ 414 maxshift = shift; 415 while (index > shift_maxindex(maxshift)) 416 maxshift += RADIX_TREE_MAP_SHIFT; 417 418 entry = rcu_dereference_raw(root->xa_head); 419 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 420 goto out; 421 422 do { 423 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 424 root, shift, 0, 1, 0); 425 if (!node) 426 return -ENOMEM; 427 428 if (is_idr(root)) { 429 all_tag_set(node, IDR_FREE); 430 if (!root_tag_get(root, IDR_FREE)) { 431 tag_clear(node, IDR_FREE, 0); 432 root_tag_set(root, IDR_FREE); 433 } 434 } else { 435 /* Propagate the aggregated tag info to the new child */ 436 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 437 if (root_tag_get(root, tag)) 438 tag_set(node, tag, 0); 439 } 440 } 441 442 BUG_ON(shift > BITS_PER_LONG); 443 if (radix_tree_is_internal_node(entry)) { 444 entry_to_node(entry)->parent = node; 445 } else if (xa_is_value(entry)) { 446 /* Moving a value entry root->xa_head to a node */ 447 node->nr_values = 1; 448 } 449 /* 450 * entry was already in the radix tree, so we do not need 451 * rcu_assign_pointer here 452 */ 453 node->slots[0] = (void __rcu *)entry; 454 entry = node_to_entry(node); 455 rcu_assign_pointer(root->xa_head, entry); 456 shift += RADIX_TREE_MAP_SHIFT; 457 } while (shift <= maxshift); 458 out: 459 return maxshift + RADIX_TREE_MAP_SHIFT; 460 } 461 462 /** 463 * radix_tree_shrink - shrink radix tree to minimum height 464 * @root: radix tree root 465 */ 466 static inline bool radix_tree_shrink(struct radix_tree_root *root) 467 { 468 bool shrunk = false; 469 470 for (;;) { 471 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 472 struct radix_tree_node *child; 473 474 if (!radix_tree_is_internal_node(node)) 475 break; 476 node = entry_to_node(node); 477 478 /* 479 * The candidate node has more than one child, or its child 480 * is not at the leftmost slot, we cannot shrink. 481 */ 482 if (node->count != 1) 483 break; 484 child = rcu_dereference_raw(node->slots[0]); 485 if (!child) 486 break; 487 488 /* 489 * For an IDR, we must not shrink entry 0 into the root in 490 * case somebody calls idr_replace() with a pointer that 491 * appears to be an internal entry 492 */ 493 if (!node->shift && is_idr(root)) 494 break; 495 496 if (radix_tree_is_internal_node(child)) 497 entry_to_node(child)->parent = NULL; 498 499 /* 500 * We don't need rcu_assign_pointer(), since we are simply 501 * moving the node from one part of the tree to another: if it 502 * was safe to dereference the old pointer to it 503 * (node->slots[0]), it will be safe to dereference the new 504 * one (root->xa_head) as far as dependent read barriers go. 505 */ 506 root->xa_head = (void __rcu *)child; 507 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 508 root_tag_clear(root, IDR_FREE); 509 510 /* 511 * We have a dilemma here. The node's slot[0] must not be 512 * NULLed in case there are concurrent lookups expecting to 513 * find the item. However if this was a bottom-level node, 514 * then it may be subject to the slot pointer being visible 515 * to callers dereferencing it. If item corresponding to 516 * slot[0] is subsequently deleted, these callers would expect 517 * their slot to become empty sooner or later. 518 * 519 * For example, lockless pagecache will look up a slot, deref 520 * the page pointer, and if the page has 0 refcount it means it 521 * was concurrently deleted from pagecache so try the deref 522 * again. Fortunately there is already a requirement for logic 523 * to retry the entire slot lookup -- the indirect pointer 524 * problem (replacing direct root node with an indirect pointer 525 * also results in a stale slot). So tag the slot as indirect 526 * to force callers to retry. 527 */ 528 node->count = 0; 529 if (!radix_tree_is_internal_node(child)) { 530 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 531 } 532 533 WARN_ON_ONCE(!list_empty(&node->private_list)); 534 radix_tree_node_free(node); 535 shrunk = true; 536 } 537 538 return shrunk; 539 } 540 541 static bool delete_node(struct radix_tree_root *root, 542 struct radix_tree_node *node) 543 { 544 bool deleted = false; 545 546 do { 547 struct radix_tree_node *parent; 548 549 if (node->count) { 550 if (node_to_entry(node) == 551 rcu_dereference_raw(root->xa_head)) 552 deleted |= radix_tree_shrink(root); 553 return deleted; 554 } 555 556 parent = node->parent; 557 if (parent) { 558 parent->slots[node->offset] = NULL; 559 parent->count--; 560 } else { 561 /* 562 * Shouldn't the tags already have all been cleared 563 * by the caller? 564 */ 565 if (!is_idr(root)) 566 root_tag_clear_all(root); 567 root->xa_head = NULL; 568 } 569 570 WARN_ON_ONCE(!list_empty(&node->private_list)); 571 radix_tree_node_free(node); 572 deleted = true; 573 574 node = parent; 575 } while (node); 576 577 return deleted; 578 } 579 580 /** 581 * __radix_tree_create - create a slot in a radix tree 582 * @root: radix tree root 583 * @index: index key 584 * @nodep: returns node 585 * @slotp: returns slot 586 * 587 * Create, if necessary, and return the node and slot for an item 588 * at position @index in the radix tree @root. 589 * 590 * Until there is more than one item in the tree, no nodes are 591 * allocated and @root->xa_head is used as a direct slot instead of 592 * pointing to a node, in which case *@nodep will be NULL. 593 * 594 * Returns -ENOMEM, or 0 for success. 595 */ 596 static int __radix_tree_create(struct radix_tree_root *root, 597 unsigned long index, struct radix_tree_node **nodep, 598 void __rcu ***slotp) 599 { 600 struct radix_tree_node *node = NULL, *child; 601 void __rcu **slot = (void __rcu **)&root->xa_head; 602 unsigned long maxindex; 603 unsigned int shift, offset = 0; 604 unsigned long max = index; 605 gfp_t gfp = root_gfp_mask(root); 606 607 shift = radix_tree_load_root(root, &child, &maxindex); 608 609 /* Make sure the tree is high enough. */ 610 if (max > maxindex) { 611 int error = radix_tree_extend(root, gfp, max, shift); 612 if (error < 0) 613 return error; 614 shift = error; 615 child = rcu_dereference_raw(root->xa_head); 616 } 617 618 while (shift > 0) { 619 shift -= RADIX_TREE_MAP_SHIFT; 620 if (child == NULL) { 621 /* Have to add a child node. */ 622 child = radix_tree_node_alloc(gfp, node, root, shift, 623 offset, 0, 0); 624 if (!child) 625 return -ENOMEM; 626 rcu_assign_pointer(*slot, node_to_entry(child)); 627 if (node) 628 node->count++; 629 } else if (!radix_tree_is_internal_node(child)) 630 break; 631 632 /* Go a level down */ 633 node = entry_to_node(child); 634 offset = radix_tree_descend(node, &child, index); 635 slot = &node->slots[offset]; 636 } 637 638 if (nodep) 639 *nodep = node; 640 if (slotp) 641 *slotp = slot; 642 return 0; 643 } 644 645 /* 646 * Free any nodes below this node. The tree is presumed to not need 647 * shrinking, and any user data in the tree is presumed to not need a 648 * destructor called on it. If we need to add a destructor, we can 649 * add that functionality later. Note that we may not clear tags or 650 * slots from the tree as an RCU walker may still have a pointer into 651 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 652 * but we'll still have to clear those in rcu_free. 653 */ 654 static void radix_tree_free_nodes(struct radix_tree_node *node) 655 { 656 unsigned offset = 0; 657 struct radix_tree_node *child = entry_to_node(node); 658 659 for (;;) { 660 void *entry = rcu_dereference_raw(child->slots[offset]); 661 if (xa_is_node(entry) && child->shift) { 662 child = entry_to_node(entry); 663 offset = 0; 664 continue; 665 } 666 offset++; 667 while (offset == RADIX_TREE_MAP_SIZE) { 668 struct radix_tree_node *old = child; 669 offset = child->offset + 1; 670 child = child->parent; 671 WARN_ON_ONCE(!list_empty(&old->private_list)); 672 radix_tree_node_free(old); 673 if (old == entry_to_node(node)) 674 return; 675 } 676 } 677 } 678 679 static inline int insert_entries(struct radix_tree_node *node, 680 void __rcu **slot, void *item, bool replace) 681 { 682 if (*slot) 683 return -EEXIST; 684 rcu_assign_pointer(*slot, item); 685 if (node) { 686 node->count++; 687 if (xa_is_value(item)) 688 node->nr_values++; 689 } 690 return 1; 691 } 692 693 /** 694 * radix_tree_insert - insert into a radix tree 695 * @root: radix tree root 696 * @index: index key 697 * @item: item to insert 698 * 699 * Insert an item into the radix tree at position @index. 700 */ 701 int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 702 void *item) 703 { 704 struct radix_tree_node *node; 705 void __rcu **slot; 706 int error; 707 708 BUG_ON(radix_tree_is_internal_node(item)); 709 710 error = __radix_tree_create(root, index, &node, &slot); 711 if (error) 712 return error; 713 714 error = insert_entries(node, slot, item, false); 715 if (error < 0) 716 return error; 717 718 if (node) { 719 unsigned offset = get_slot_offset(node, slot); 720 BUG_ON(tag_get(node, 0, offset)); 721 BUG_ON(tag_get(node, 1, offset)); 722 BUG_ON(tag_get(node, 2, offset)); 723 } else { 724 BUG_ON(root_tags_get(root)); 725 } 726 727 return 0; 728 } 729 EXPORT_SYMBOL(radix_tree_insert); 730 731 /** 732 * __radix_tree_lookup - lookup an item in a radix tree 733 * @root: radix tree root 734 * @index: index key 735 * @nodep: returns node 736 * @slotp: returns slot 737 * 738 * Lookup and return the item at position @index in the radix 739 * tree @root. 740 * 741 * Until there is more than one item in the tree, no nodes are 742 * allocated and @root->xa_head is used as a direct slot instead of 743 * pointing to a node, in which case *@nodep will be NULL. 744 */ 745 void *__radix_tree_lookup(const struct radix_tree_root *root, 746 unsigned long index, struct radix_tree_node **nodep, 747 void __rcu ***slotp) 748 { 749 struct radix_tree_node *node, *parent; 750 unsigned long maxindex; 751 void __rcu **slot; 752 753 restart: 754 parent = NULL; 755 slot = (void __rcu **)&root->xa_head; 756 radix_tree_load_root(root, &node, &maxindex); 757 if (index > maxindex) 758 return NULL; 759 760 while (radix_tree_is_internal_node(node)) { 761 unsigned offset; 762 763 parent = entry_to_node(node); 764 offset = radix_tree_descend(parent, &node, index); 765 slot = parent->slots + offset; 766 if (node == RADIX_TREE_RETRY) 767 goto restart; 768 if (parent->shift == 0) 769 break; 770 } 771 772 if (nodep) 773 *nodep = parent; 774 if (slotp) 775 *slotp = slot; 776 return node; 777 } 778 779 /** 780 * radix_tree_lookup_slot - lookup a slot in a radix tree 781 * @root: radix tree root 782 * @index: index key 783 * 784 * Returns: the slot corresponding to the position @index in the 785 * radix tree @root. This is useful for update-if-exists operations. 786 * 787 * This function can be called under rcu_read_lock iff the slot is not 788 * modified by radix_tree_replace_slot, otherwise it must be called 789 * exclusive from other writers. Any dereference of the slot must be done 790 * using radix_tree_deref_slot. 791 */ 792 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 793 unsigned long index) 794 { 795 void __rcu **slot; 796 797 if (!__radix_tree_lookup(root, index, NULL, &slot)) 798 return NULL; 799 return slot; 800 } 801 EXPORT_SYMBOL(radix_tree_lookup_slot); 802 803 /** 804 * radix_tree_lookup - perform lookup operation on a radix tree 805 * @root: radix tree root 806 * @index: index key 807 * 808 * Lookup the item at the position @index in the radix tree @root. 809 * 810 * This function can be called under rcu_read_lock, however the caller 811 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 812 * them safely). No RCU barriers are required to access or modify the 813 * returned item, however. 814 */ 815 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 816 { 817 return __radix_tree_lookup(root, index, NULL, NULL); 818 } 819 EXPORT_SYMBOL(radix_tree_lookup); 820 821 static void replace_slot(void __rcu **slot, void *item, 822 struct radix_tree_node *node, int count, int values) 823 { 824 if (node && (count || values)) { 825 node->count += count; 826 node->nr_values += values; 827 } 828 829 rcu_assign_pointer(*slot, item); 830 } 831 832 static bool node_tag_get(const struct radix_tree_root *root, 833 const struct radix_tree_node *node, 834 unsigned int tag, unsigned int offset) 835 { 836 if (node) 837 return tag_get(node, tag, offset); 838 return root_tag_get(root, tag); 839 } 840 841 /* 842 * IDR users want to be able to store NULL in the tree, so if the slot isn't 843 * free, don't adjust the count, even if it's transitioning between NULL and 844 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 845 * have empty bits, but it only stores NULL in slots when they're being 846 * deleted. 847 */ 848 static int calculate_count(struct radix_tree_root *root, 849 struct radix_tree_node *node, void __rcu **slot, 850 void *item, void *old) 851 { 852 if (is_idr(root)) { 853 unsigned offset = get_slot_offset(node, slot); 854 bool free = node_tag_get(root, node, IDR_FREE, offset); 855 if (!free) 856 return 0; 857 if (!old) 858 return 1; 859 } 860 return !!item - !!old; 861 } 862 863 /** 864 * __radix_tree_replace - replace item in a slot 865 * @root: radix tree root 866 * @node: pointer to tree node 867 * @slot: pointer to slot in @node 868 * @item: new item to store in the slot. 869 * 870 * For use with __radix_tree_lookup(). Caller must hold tree write locked 871 * across slot lookup and replacement. 872 */ 873 void __radix_tree_replace(struct radix_tree_root *root, 874 struct radix_tree_node *node, 875 void __rcu **slot, void *item) 876 { 877 void *old = rcu_dereference_raw(*slot); 878 int values = !!xa_is_value(item) - !!xa_is_value(old); 879 int count = calculate_count(root, node, slot, item, old); 880 881 /* 882 * This function supports replacing value entries and 883 * deleting entries, but that needs accounting against the 884 * node unless the slot is root->xa_head. 885 */ 886 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 887 (count || values)); 888 replace_slot(slot, item, node, count, values); 889 890 if (!node) 891 return; 892 893 delete_node(root, node); 894 } 895 896 /** 897 * radix_tree_replace_slot - replace item in a slot 898 * @root: radix tree root 899 * @slot: pointer to slot 900 * @item: new item to store in the slot. 901 * 902 * For use with radix_tree_lookup_slot() and 903 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 904 * across slot lookup and replacement. 905 * 906 * NOTE: This cannot be used to switch between non-entries (empty slots), 907 * regular entries, and value entries, as that requires accounting 908 * inside the radix tree node. When switching from one type of entry or 909 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 910 * radix_tree_iter_replace(). 911 */ 912 void radix_tree_replace_slot(struct radix_tree_root *root, 913 void __rcu **slot, void *item) 914 { 915 __radix_tree_replace(root, NULL, slot, item); 916 } 917 EXPORT_SYMBOL(radix_tree_replace_slot); 918 919 /** 920 * radix_tree_iter_replace - replace item in a slot 921 * @root: radix tree root 922 * @iter: iterator state 923 * @slot: pointer to slot 924 * @item: new item to store in the slot. 925 * 926 * For use with radix_tree_for_each_slot(). 927 * Caller must hold tree write locked. 928 */ 929 void radix_tree_iter_replace(struct radix_tree_root *root, 930 const struct radix_tree_iter *iter, 931 void __rcu **slot, void *item) 932 { 933 __radix_tree_replace(root, iter->node, slot, item); 934 } 935 936 static void node_tag_set(struct radix_tree_root *root, 937 struct radix_tree_node *node, 938 unsigned int tag, unsigned int offset) 939 { 940 while (node) { 941 if (tag_get(node, tag, offset)) 942 return; 943 tag_set(node, tag, offset); 944 offset = node->offset; 945 node = node->parent; 946 } 947 948 if (!root_tag_get(root, tag)) 949 root_tag_set(root, tag); 950 } 951 952 /** 953 * radix_tree_tag_set - set a tag on a radix tree node 954 * @root: radix tree root 955 * @index: index key 956 * @tag: tag index 957 * 958 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 959 * corresponding to @index in the radix tree. From 960 * the root all the way down to the leaf node. 961 * 962 * Returns the address of the tagged item. Setting a tag on a not-present 963 * item is a bug. 964 */ 965 void *radix_tree_tag_set(struct radix_tree_root *root, 966 unsigned long index, unsigned int tag) 967 { 968 struct radix_tree_node *node, *parent; 969 unsigned long maxindex; 970 971 radix_tree_load_root(root, &node, &maxindex); 972 BUG_ON(index > maxindex); 973 974 while (radix_tree_is_internal_node(node)) { 975 unsigned offset; 976 977 parent = entry_to_node(node); 978 offset = radix_tree_descend(parent, &node, index); 979 BUG_ON(!node); 980 981 if (!tag_get(parent, tag, offset)) 982 tag_set(parent, tag, offset); 983 } 984 985 /* set the root's tag bit */ 986 if (!root_tag_get(root, tag)) 987 root_tag_set(root, tag); 988 989 return node; 990 } 991 EXPORT_SYMBOL(radix_tree_tag_set); 992 993 static void node_tag_clear(struct radix_tree_root *root, 994 struct radix_tree_node *node, 995 unsigned int tag, unsigned int offset) 996 { 997 while (node) { 998 if (!tag_get(node, tag, offset)) 999 return; 1000 tag_clear(node, tag, offset); 1001 if (any_tag_set(node, tag)) 1002 return; 1003 1004 offset = node->offset; 1005 node = node->parent; 1006 } 1007 1008 /* clear the root's tag bit */ 1009 if (root_tag_get(root, tag)) 1010 root_tag_clear(root, tag); 1011 } 1012 1013 /** 1014 * radix_tree_tag_clear - clear a tag on a radix tree node 1015 * @root: radix tree root 1016 * @index: index key 1017 * @tag: tag index 1018 * 1019 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1020 * corresponding to @index in the radix tree. If this causes 1021 * the leaf node to have no tags set then clear the tag in the 1022 * next-to-leaf node, etc. 1023 * 1024 * Returns the address of the tagged item on success, else NULL. ie: 1025 * has the same return value and semantics as radix_tree_lookup(). 1026 */ 1027 void *radix_tree_tag_clear(struct radix_tree_root *root, 1028 unsigned long index, unsigned int tag) 1029 { 1030 struct radix_tree_node *node, *parent; 1031 unsigned long maxindex; 1032 int offset; 1033 1034 radix_tree_load_root(root, &node, &maxindex); 1035 if (index > maxindex) 1036 return NULL; 1037 1038 parent = NULL; 1039 1040 while (radix_tree_is_internal_node(node)) { 1041 parent = entry_to_node(node); 1042 offset = radix_tree_descend(parent, &node, index); 1043 } 1044 1045 if (node) 1046 node_tag_clear(root, parent, tag, offset); 1047 1048 return node; 1049 } 1050 EXPORT_SYMBOL(radix_tree_tag_clear); 1051 1052 /** 1053 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1054 * @root: radix tree root 1055 * @iter: iterator state 1056 * @tag: tag to clear 1057 */ 1058 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1059 const struct radix_tree_iter *iter, unsigned int tag) 1060 { 1061 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1062 } 1063 1064 /** 1065 * radix_tree_tag_get - get a tag on a radix tree node 1066 * @root: radix tree root 1067 * @index: index key 1068 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1069 * 1070 * Return values: 1071 * 1072 * 0: tag not present or not set 1073 * 1: tag set 1074 * 1075 * Note that the return value of this function may not be relied on, even if 1076 * the RCU lock is held, unless tag modification and node deletion are excluded 1077 * from concurrency. 1078 */ 1079 int radix_tree_tag_get(const struct radix_tree_root *root, 1080 unsigned long index, unsigned int tag) 1081 { 1082 struct radix_tree_node *node, *parent; 1083 unsigned long maxindex; 1084 1085 if (!root_tag_get(root, tag)) 1086 return 0; 1087 1088 radix_tree_load_root(root, &node, &maxindex); 1089 if (index > maxindex) 1090 return 0; 1091 1092 while (radix_tree_is_internal_node(node)) { 1093 unsigned offset; 1094 1095 parent = entry_to_node(node); 1096 offset = radix_tree_descend(parent, &node, index); 1097 1098 if (!tag_get(parent, tag, offset)) 1099 return 0; 1100 if (node == RADIX_TREE_RETRY) 1101 break; 1102 } 1103 1104 return 1; 1105 } 1106 EXPORT_SYMBOL(radix_tree_tag_get); 1107 1108 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1109 static void set_iter_tags(struct radix_tree_iter *iter, 1110 struct radix_tree_node *node, unsigned offset, 1111 unsigned tag) 1112 { 1113 unsigned tag_long = offset / BITS_PER_LONG; 1114 unsigned tag_bit = offset % BITS_PER_LONG; 1115 1116 if (!node) { 1117 iter->tags = 1; 1118 return; 1119 } 1120 1121 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1122 1123 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1124 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1125 /* Pick tags from next element */ 1126 if (tag_bit) 1127 iter->tags |= node->tags[tag][tag_long + 1] << 1128 (BITS_PER_LONG - tag_bit); 1129 /* Clip chunk size, here only BITS_PER_LONG tags */ 1130 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1131 } 1132 } 1133 1134 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1135 struct radix_tree_iter *iter) 1136 { 1137 slot++; 1138 iter->index = __radix_tree_iter_add(iter, 1); 1139 iter->next_index = iter->index; 1140 iter->tags = 0; 1141 return NULL; 1142 } 1143 EXPORT_SYMBOL(radix_tree_iter_resume); 1144 1145 /** 1146 * radix_tree_next_chunk - find next chunk of slots for iteration 1147 * 1148 * @root: radix tree root 1149 * @iter: iterator state 1150 * @flags: RADIX_TREE_ITER_* flags and tag index 1151 * Returns: pointer to chunk first slot, or NULL if iteration is over 1152 */ 1153 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1154 struct radix_tree_iter *iter, unsigned flags) 1155 { 1156 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1157 struct radix_tree_node *node, *child; 1158 unsigned long index, offset, maxindex; 1159 1160 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1161 return NULL; 1162 1163 /* 1164 * Catch next_index overflow after ~0UL. iter->index never overflows 1165 * during iterating; it can be zero only at the beginning. 1166 * And we cannot overflow iter->next_index in a single step, 1167 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1168 * 1169 * This condition also used by radix_tree_next_slot() to stop 1170 * contiguous iterating, and forbid switching to the next chunk. 1171 */ 1172 index = iter->next_index; 1173 if (!index && iter->index) 1174 return NULL; 1175 1176 restart: 1177 radix_tree_load_root(root, &child, &maxindex); 1178 if (index > maxindex) 1179 return NULL; 1180 if (!child) 1181 return NULL; 1182 1183 if (!radix_tree_is_internal_node(child)) { 1184 /* Single-slot tree */ 1185 iter->index = index; 1186 iter->next_index = maxindex + 1; 1187 iter->tags = 1; 1188 iter->node = NULL; 1189 return (void __rcu **)&root->xa_head; 1190 } 1191 1192 do { 1193 node = entry_to_node(child); 1194 offset = radix_tree_descend(node, &child, index); 1195 1196 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1197 !tag_get(node, tag, offset) : !child) { 1198 /* Hole detected */ 1199 if (flags & RADIX_TREE_ITER_CONTIG) 1200 return NULL; 1201 1202 if (flags & RADIX_TREE_ITER_TAGGED) 1203 offset = radix_tree_find_next_bit(node, tag, 1204 offset + 1); 1205 else 1206 while (++offset < RADIX_TREE_MAP_SIZE) { 1207 void *slot = rcu_dereference_raw( 1208 node->slots[offset]); 1209 if (slot) 1210 break; 1211 } 1212 index &= ~node_maxindex(node); 1213 index += offset << node->shift; 1214 /* Overflow after ~0UL */ 1215 if (!index) 1216 return NULL; 1217 if (offset == RADIX_TREE_MAP_SIZE) 1218 goto restart; 1219 child = rcu_dereference_raw(node->slots[offset]); 1220 } 1221 1222 if (!child) 1223 goto restart; 1224 if (child == RADIX_TREE_RETRY) 1225 break; 1226 } while (node->shift && radix_tree_is_internal_node(child)); 1227 1228 /* Update the iterator state */ 1229 iter->index = (index &~ node_maxindex(node)) | offset; 1230 iter->next_index = (index | node_maxindex(node)) + 1; 1231 iter->node = node; 1232 1233 if (flags & RADIX_TREE_ITER_TAGGED) 1234 set_iter_tags(iter, node, offset, tag); 1235 1236 return node->slots + offset; 1237 } 1238 EXPORT_SYMBOL(radix_tree_next_chunk); 1239 1240 /** 1241 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1242 * @root: radix tree root 1243 * @results: where the results of the lookup are placed 1244 * @first_index: start the lookup from this key 1245 * @max_items: place up to this many items at *results 1246 * 1247 * Performs an index-ascending scan of the tree for present items. Places 1248 * them at *@results and returns the number of items which were placed at 1249 * *@results. 1250 * 1251 * The implementation is naive. 1252 * 1253 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1254 * rcu_read_lock. In this case, rather than the returned results being 1255 * an atomic snapshot of the tree at a single point in time, the 1256 * semantics of an RCU protected gang lookup are as though multiple 1257 * radix_tree_lookups have been issued in individual locks, and results 1258 * stored in 'results'. 1259 */ 1260 unsigned int 1261 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1262 unsigned long first_index, unsigned int max_items) 1263 { 1264 struct radix_tree_iter iter; 1265 void __rcu **slot; 1266 unsigned int ret = 0; 1267 1268 if (unlikely(!max_items)) 1269 return 0; 1270 1271 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1272 results[ret] = rcu_dereference_raw(*slot); 1273 if (!results[ret]) 1274 continue; 1275 if (radix_tree_is_internal_node(results[ret])) { 1276 slot = radix_tree_iter_retry(&iter); 1277 continue; 1278 } 1279 if (++ret == max_items) 1280 break; 1281 } 1282 1283 return ret; 1284 } 1285 EXPORT_SYMBOL(radix_tree_gang_lookup); 1286 1287 /** 1288 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1289 * based on a tag 1290 * @root: radix tree root 1291 * @results: where the results of the lookup are placed 1292 * @first_index: start the lookup from this key 1293 * @max_items: place up to this many items at *results 1294 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1295 * 1296 * Performs an index-ascending scan of the tree for present items which 1297 * have the tag indexed by @tag set. Places the items at *@results and 1298 * returns the number of items which were placed at *@results. 1299 */ 1300 unsigned int 1301 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1302 unsigned long first_index, unsigned int max_items, 1303 unsigned int tag) 1304 { 1305 struct radix_tree_iter iter; 1306 void __rcu **slot; 1307 unsigned int ret = 0; 1308 1309 if (unlikely(!max_items)) 1310 return 0; 1311 1312 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1313 results[ret] = rcu_dereference_raw(*slot); 1314 if (!results[ret]) 1315 continue; 1316 if (radix_tree_is_internal_node(results[ret])) { 1317 slot = radix_tree_iter_retry(&iter); 1318 continue; 1319 } 1320 if (++ret == max_items) 1321 break; 1322 } 1323 1324 return ret; 1325 } 1326 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1327 1328 /** 1329 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1330 * radix tree based on a tag 1331 * @root: radix tree root 1332 * @results: where the results of the lookup are placed 1333 * @first_index: start the lookup from this key 1334 * @max_items: place up to this many items at *results 1335 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1336 * 1337 * Performs an index-ascending scan of the tree for present items which 1338 * have the tag indexed by @tag set. Places the slots at *@results and 1339 * returns the number of slots which were placed at *@results. 1340 */ 1341 unsigned int 1342 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1343 void __rcu ***results, unsigned long first_index, 1344 unsigned int max_items, unsigned int tag) 1345 { 1346 struct radix_tree_iter iter; 1347 void __rcu **slot; 1348 unsigned int ret = 0; 1349 1350 if (unlikely(!max_items)) 1351 return 0; 1352 1353 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1354 results[ret] = slot; 1355 if (++ret == max_items) 1356 break; 1357 } 1358 1359 return ret; 1360 } 1361 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1362 1363 static bool __radix_tree_delete(struct radix_tree_root *root, 1364 struct radix_tree_node *node, void __rcu **slot) 1365 { 1366 void *old = rcu_dereference_raw(*slot); 1367 int values = xa_is_value(old) ? -1 : 0; 1368 unsigned offset = get_slot_offset(node, slot); 1369 int tag; 1370 1371 if (is_idr(root)) 1372 node_tag_set(root, node, IDR_FREE, offset); 1373 else 1374 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1375 node_tag_clear(root, node, tag, offset); 1376 1377 replace_slot(slot, NULL, node, -1, values); 1378 return node && delete_node(root, node); 1379 } 1380 1381 /** 1382 * radix_tree_iter_delete - delete the entry at this iterator position 1383 * @root: radix tree root 1384 * @iter: iterator state 1385 * @slot: pointer to slot 1386 * 1387 * Delete the entry at the position currently pointed to by the iterator. 1388 * This may result in the current node being freed; if it is, the iterator 1389 * is advanced so that it will not reference the freed memory. This 1390 * function may be called without any locking if there are no other threads 1391 * which can access this tree. 1392 */ 1393 void radix_tree_iter_delete(struct radix_tree_root *root, 1394 struct radix_tree_iter *iter, void __rcu **slot) 1395 { 1396 if (__radix_tree_delete(root, iter->node, slot)) 1397 iter->index = iter->next_index; 1398 } 1399 EXPORT_SYMBOL(radix_tree_iter_delete); 1400 1401 /** 1402 * radix_tree_delete_item - delete an item from a radix tree 1403 * @root: radix tree root 1404 * @index: index key 1405 * @item: expected item 1406 * 1407 * Remove @item at @index from the radix tree rooted at @root. 1408 * 1409 * Return: the deleted entry, or %NULL if it was not present 1410 * or the entry at the given @index was not @item. 1411 */ 1412 void *radix_tree_delete_item(struct radix_tree_root *root, 1413 unsigned long index, void *item) 1414 { 1415 struct radix_tree_node *node = NULL; 1416 void __rcu **slot = NULL; 1417 void *entry; 1418 1419 entry = __radix_tree_lookup(root, index, &node, &slot); 1420 if (!slot) 1421 return NULL; 1422 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 1423 get_slot_offset(node, slot)))) 1424 return NULL; 1425 1426 if (item && entry != item) 1427 return NULL; 1428 1429 __radix_tree_delete(root, node, slot); 1430 1431 return entry; 1432 } 1433 EXPORT_SYMBOL(radix_tree_delete_item); 1434 1435 /** 1436 * radix_tree_delete - delete an entry from a radix tree 1437 * @root: radix tree root 1438 * @index: index key 1439 * 1440 * Remove the entry at @index from the radix tree rooted at @root. 1441 * 1442 * Return: The deleted entry, or %NULL if it was not present. 1443 */ 1444 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1445 { 1446 return radix_tree_delete_item(root, index, NULL); 1447 } 1448 EXPORT_SYMBOL(radix_tree_delete); 1449 1450 /** 1451 * radix_tree_tagged - test whether any items in the tree are tagged 1452 * @root: radix tree root 1453 * @tag: tag to test 1454 */ 1455 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 1456 { 1457 return root_tag_get(root, tag); 1458 } 1459 EXPORT_SYMBOL(radix_tree_tagged); 1460 1461 /** 1462 * idr_preload - preload for idr_alloc() 1463 * @gfp_mask: allocation mask to use for preloading 1464 * 1465 * Preallocate memory to use for the next call to idr_alloc(). This function 1466 * returns with preemption disabled. It will be enabled by idr_preload_end(). 1467 */ 1468 void idr_preload(gfp_t gfp_mask) 1469 { 1470 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1471 local_lock(&radix_tree_preloads.lock); 1472 } 1473 EXPORT_SYMBOL(idr_preload); 1474 1475 void __rcu **idr_get_free(struct radix_tree_root *root, 1476 struct radix_tree_iter *iter, gfp_t gfp, 1477 unsigned long max) 1478 { 1479 struct radix_tree_node *node = NULL, *child; 1480 void __rcu **slot = (void __rcu **)&root->xa_head; 1481 unsigned long maxindex, start = iter->next_index; 1482 unsigned int shift, offset = 0; 1483 1484 grow: 1485 shift = radix_tree_load_root(root, &child, &maxindex); 1486 if (!radix_tree_tagged(root, IDR_FREE)) 1487 start = max(start, maxindex + 1); 1488 if (start > max) 1489 return ERR_PTR(-ENOSPC); 1490 1491 if (start > maxindex) { 1492 int error = radix_tree_extend(root, gfp, start, shift); 1493 if (error < 0) 1494 return ERR_PTR(error); 1495 shift = error; 1496 child = rcu_dereference_raw(root->xa_head); 1497 } 1498 if (start == 0 && shift == 0) 1499 shift = RADIX_TREE_MAP_SHIFT; 1500 1501 while (shift) { 1502 shift -= RADIX_TREE_MAP_SHIFT; 1503 if (child == NULL) { 1504 /* Have to add a child node. */ 1505 child = radix_tree_node_alloc(gfp, node, root, shift, 1506 offset, 0, 0); 1507 if (!child) 1508 return ERR_PTR(-ENOMEM); 1509 all_tag_set(child, IDR_FREE); 1510 rcu_assign_pointer(*slot, node_to_entry(child)); 1511 if (node) 1512 node->count++; 1513 } else if (!radix_tree_is_internal_node(child)) 1514 break; 1515 1516 node = entry_to_node(child); 1517 offset = radix_tree_descend(node, &child, start); 1518 if (!tag_get(node, IDR_FREE, offset)) { 1519 offset = radix_tree_find_next_bit(node, IDR_FREE, 1520 offset + 1); 1521 start = next_index(start, node, offset); 1522 if (start > max || start == 0) 1523 return ERR_PTR(-ENOSPC); 1524 while (offset == RADIX_TREE_MAP_SIZE) { 1525 offset = node->offset + 1; 1526 node = node->parent; 1527 if (!node) 1528 goto grow; 1529 shift = node->shift; 1530 } 1531 child = rcu_dereference_raw(node->slots[offset]); 1532 } 1533 slot = &node->slots[offset]; 1534 } 1535 1536 iter->index = start; 1537 if (node) 1538 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 1539 else 1540 iter->next_index = 1; 1541 iter->node = node; 1542 set_iter_tags(iter, node, offset, IDR_FREE); 1543 1544 return slot; 1545 } 1546 1547 /** 1548 * idr_destroy - release all internal memory from an IDR 1549 * @idr: idr handle 1550 * 1551 * After this function is called, the IDR is empty, and may be reused or 1552 * the data structure containing it may be freed. 1553 * 1554 * A typical clean-up sequence for objects stored in an idr tree will use 1555 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 1556 * free the memory used to keep track of those objects. 1557 */ 1558 void idr_destroy(struct idr *idr) 1559 { 1560 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 1561 if (radix_tree_is_internal_node(node)) 1562 radix_tree_free_nodes(node); 1563 idr->idr_rt.xa_head = NULL; 1564 root_tag_set(&idr->idr_rt, IDR_FREE); 1565 } 1566 EXPORT_SYMBOL(idr_destroy); 1567 1568 static void 1569 radix_tree_node_ctor(void *arg) 1570 { 1571 struct radix_tree_node *node = arg; 1572 1573 memset(node, 0, sizeof(*node)); 1574 INIT_LIST_HEAD(&node->private_list); 1575 } 1576 1577 static int radix_tree_cpu_dead(unsigned int cpu) 1578 { 1579 struct radix_tree_preload *rtp; 1580 struct radix_tree_node *node; 1581 1582 /* Free per-cpu pool of preloaded nodes */ 1583 rtp = &per_cpu(radix_tree_preloads, cpu); 1584 while (rtp->nr) { 1585 node = rtp->nodes; 1586 rtp->nodes = node->parent; 1587 kmem_cache_free(radix_tree_node_cachep, node); 1588 rtp->nr--; 1589 } 1590 return 0; 1591 } 1592 1593 void __init radix_tree_init(void) 1594 { 1595 int ret; 1596 1597 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1598 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1599 BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 1600 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1601 sizeof(struct radix_tree_node), 0, 1602 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1603 radix_tree_node_ctor); 1604 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1605 NULL, radix_tree_cpu_dead); 1606 WARN_ON(ret < 0); 1607 } 1608