1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2001 Momchil Velikov 4 * Portions Copyright (C) 2001 Christoph Hellwig 5 * Copyright (C) 2005 SGI, Christoph Lameter 6 * Copyright (C) 2006 Nick Piggin 7 * Copyright (C) 2012 Konstantin Khlebnikov 8 * Copyright (C) 2016 Intel, Matthew Wilcox 9 * Copyright (C) 2016 Intel, Ross Zwisler 10 */ 11 12 #include <linux/bitmap.h> 13 #include <linux/bitops.h> 14 #include <linux/bug.h> 15 #include <linux/cpu.h> 16 #include <linux/errno.h> 17 #include <linux/export.h> 18 #include <linux/idr.h> 19 #include <linux/init.h> 20 #include <linux/kernel.h> 21 #include <linux/kmemleak.h> 22 #include <linux/percpu.h> 23 #include <linux/preempt.h> /* in_interrupt() */ 24 #include <linux/radix-tree.h> 25 #include <linux/rcupdate.h> 26 #include <linux/slab.h> 27 #include <linux/string.h> 28 #include <linux/xarray.h> 29 30 31 /* 32 * Radix tree node cache. 33 */ 34 struct kmem_cache *radix_tree_node_cachep; 35 36 /* 37 * The radix tree is variable-height, so an insert operation not only has 38 * to build the branch to its corresponding item, it also has to build the 39 * branch to existing items if the size has to be increased (by 40 * radix_tree_extend). 41 * 42 * The worst case is a zero height tree with just a single item at index 0, 43 * and then inserting an item at index ULONG_MAX. This requires 2 new branches 44 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. 45 * Hence: 46 */ 47 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 48 49 /* 50 * The IDR does not have to be as high as the radix tree since it uses 51 * signed integers, not unsigned longs. 52 */ 53 #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) 54 #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ 55 RADIX_TREE_MAP_SHIFT)) 56 #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) 57 58 /* 59 * Per-cpu pool of preloaded nodes 60 */ 61 struct radix_tree_preload { 62 unsigned nr; 63 /* nodes->parent points to next preallocated node */ 64 struct radix_tree_node *nodes; 65 }; 66 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 67 68 static inline struct radix_tree_node *entry_to_node(void *ptr) 69 { 70 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); 71 } 72 73 static inline void *node_to_entry(void *ptr) 74 { 75 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 76 } 77 78 #define RADIX_TREE_RETRY XA_RETRY_ENTRY 79 80 static inline unsigned long 81 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 82 { 83 return parent ? slot - parent->slots : 0; 84 } 85 86 static unsigned int radix_tree_descend(const struct radix_tree_node *parent, 87 struct radix_tree_node **nodep, unsigned long index) 88 { 89 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 90 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 91 92 *nodep = (void *)entry; 93 return offset; 94 } 95 96 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 97 { 98 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 99 } 100 101 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 102 int offset) 103 { 104 __set_bit(offset, node->tags[tag]); 105 } 106 107 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 108 int offset) 109 { 110 __clear_bit(offset, node->tags[tag]); 111 } 112 113 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, 114 int offset) 115 { 116 return test_bit(offset, node->tags[tag]); 117 } 118 119 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 120 { 121 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 122 } 123 124 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 125 { 126 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 127 } 128 129 static inline void root_tag_clear_all(struct radix_tree_root *root) 130 { 131 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); 132 } 133 134 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 135 { 136 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); 137 } 138 139 static inline unsigned root_tags_get(const struct radix_tree_root *root) 140 { 141 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; 142 } 143 144 static inline bool is_idr(const struct radix_tree_root *root) 145 { 146 return !!(root->xa_flags & ROOT_IS_IDR); 147 } 148 149 /* 150 * Returns 1 if any slot in the node has this tag set. 151 * Otherwise returns 0. 152 */ 153 static inline int any_tag_set(const struct radix_tree_node *node, 154 unsigned int tag) 155 { 156 unsigned idx; 157 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 158 if (node->tags[tag][idx]) 159 return 1; 160 } 161 return 0; 162 } 163 164 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) 165 { 166 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); 167 } 168 169 /** 170 * radix_tree_find_next_bit - find the next set bit in a memory region 171 * 172 * @addr: The address to base the search on 173 * @size: The bitmap size in bits 174 * @offset: The bitnumber to start searching at 175 * 176 * Unrollable variant of find_next_bit() for constant size arrays. 177 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. 178 * Returns next bit offset, or size if nothing found. 179 */ 180 static __always_inline unsigned long 181 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, 182 unsigned long offset) 183 { 184 const unsigned long *addr = node->tags[tag]; 185 186 if (offset < RADIX_TREE_MAP_SIZE) { 187 unsigned long tmp; 188 189 addr += offset / BITS_PER_LONG; 190 tmp = *addr >> (offset % BITS_PER_LONG); 191 if (tmp) 192 return __ffs(tmp) + offset; 193 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 194 while (offset < RADIX_TREE_MAP_SIZE) { 195 tmp = *++addr; 196 if (tmp) 197 return __ffs(tmp) + offset; 198 offset += BITS_PER_LONG; 199 } 200 } 201 return RADIX_TREE_MAP_SIZE; 202 } 203 204 static unsigned int iter_offset(const struct radix_tree_iter *iter) 205 { 206 return iter->index & RADIX_TREE_MAP_MASK; 207 } 208 209 /* 210 * The maximum index which can be stored in a radix tree 211 */ 212 static inline unsigned long shift_maxindex(unsigned int shift) 213 { 214 return (RADIX_TREE_MAP_SIZE << shift) - 1; 215 } 216 217 static inline unsigned long node_maxindex(const struct radix_tree_node *node) 218 { 219 return shift_maxindex(node->shift); 220 } 221 222 static unsigned long next_index(unsigned long index, 223 const struct radix_tree_node *node, 224 unsigned long offset) 225 { 226 return (index & ~node_maxindex(node)) + (offset << node->shift); 227 } 228 229 /* 230 * This assumes that the caller has performed appropriate preallocation, and 231 * that the caller has pinned this thread of control to the current CPU. 232 */ 233 static struct radix_tree_node * 234 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 235 struct radix_tree_root *root, 236 unsigned int shift, unsigned int offset, 237 unsigned int count, unsigned int nr_values) 238 { 239 struct radix_tree_node *ret = NULL; 240 241 /* 242 * Preload code isn't irq safe and it doesn't make sense to use 243 * preloading during an interrupt anyway as all the allocations have 244 * to be atomic. So just do normal allocation when in interrupt. 245 */ 246 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 247 struct radix_tree_preload *rtp; 248 249 /* 250 * Even if the caller has preloaded, try to allocate from the 251 * cache first for the new node to get accounted to the memory 252 * cgroup. 253 */ 254 ret = kmem_cache_alloc(radix_tree_node_cachep, 255 gfp_mask | __GFP_NOWARN); 256 if (ret) 257 goto out; 258 259 /* 260 * Provided the caller has preloaded here, we will always 261 * succeed in getting a node here (and never reach 262 * kmem_cache_alloc) 263 */ 264 rtp = this_cpu_ptr(&radix_tree_preloads); 265 if (rtp->nr) { 266 ret = rtp->nodes; 267 rtp->nodes = ret->parent; 268 rtp->nr--; 269 } 270 /* 271 * Update the allocation stack trace as this is more useful 272 * for debugging. 273 */ 274 kmemleak_update_trace(ret); 275 goto out; 276 } 277 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 278 out: 279 BUG_ON(radix_tree_is_internal_node(ret)); 280 if (ret) { 281 ret->shift = shift; 282 ret->offset = offset; 283 ret->count = count; 284 ret->nr_values = nr_values; 285 ret->parent = parent; 286 ret->array = root; 287 } 288 return ret; 289 } 290 291 void radix_tree_node_rcu_free(struct rcu_head *head) 292 { 293 struct radix_tree_node *node = 294 container_of(head, struct radix_tree_node, rcu_head); 295 296 /* 297 * Must only free zeroed nodes into the slab. We can be left with 298 * non-NULL entries by radix_tree_free_nodes, so clear the entries 299 * and tags here. 300 */ 301 memset(node->slots, 0, sizeof(node->slots)); 302 memset(node->tags, 0, sizeof(node->tags)); 303 INIT_LIST_HEAD(&node->private_list); 304 305 kmem_cache_free(radix_tree_node_cachep, node); 306 } 307 308 static inline void 309 radix_tree_node_free(struct radix_tree_node *node) 310 { 311 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 312 } 313 314 /* 315 * Load up this CPU's radix_tree_node buffer with sufficient objects to 316 * ensure that the addition of a single element in the tree cannot fail. On 317 * success, return zero, with preemption disabled. On error, return -ENOMEM 318 * with preemption not disabled. 319 * 320 * To make use of this facility, the radix tree must be initialised without 321 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 322 */ 323 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) 324 { 325 struct radix_tree_preload *rtp; 326 struct radix_tree_node *node; 327 int ret = -ENOMEM; 328 329 /* 330 * Nodes preloaded by one cgroup can be be used by another cgroup, so 331 * they should never be accounted to any particular memory cgroup. 332 */ 333 gfp_mask &= ~__GFP_ACCOUNT; 334 335 preempt_disable(); 336 rtp = this_cpu_ptr(&radix_tree_preloads); 337 while (rtp->nr < nr) { 338 preempt_enable(); 339 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 340 if (node == NULL) 341 goto out; 342 preempt_disable(); 343 rtp = this_cpu_ptr(&radix_tree_preloads); 344 if (rtp->nr < nr) { 345 node->parent = rtp->nodes; 346 rtp->nodes = node; 347 rtp->nr++; 348 } else { 349 kmem_cache_free(radix_tree_node_cachep, node); 350 } 351 } 352 ret = 0; 353 out: 354 return ret; 355 } 356 357 /* 358 * Load up this CPU's radix_tree_node buffer with sufficient objects to 359 * ensure that the addition of a single element in the tree cannot fail. On 360 * success, return zero, with preemption disabled. On error, return -ENOMEM 361 * with preemption not disabled. 362 * 363 * To make use of this facility, the radix tree must be initialised without 364 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 365 */ 366 int radix_tree_preload(gfp_t gfp_mask) 367 { 368 /* Warn on non-sensical use... */ 369 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 370 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 371 } 372 EXPORT_SYMBOL(radix_tree_preload); 373 374 /* 375 * The same as above function, except we don't guarantee preloading happens. 376 * We do it, if we decide it helps. On success, return zero with preemption 377 * disabled. On error, return -ENOMEM with preemption not disabled. 378 */ 379 int radix_tree_maybe_preload(gfp_t gfp_mask) 380 { 381 if (gfpflags_allow_blocking(gfp_mask)) 382 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); 383 /* Preloading doesn't help anything with this gfp mask, skip it */ 384 preempt_disable(); 385 return 0; 386 } 387 EXPORT_SYMBOL(radix_tree_maybe_preload); 388 389 static unsigned radix_tree_load_root(const struct radix_tree_root *root, 390 struct radix_tree_node **nodep, unsigned long *maxindex) 391 { 392 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 393 394 *nodep = node; 395 396 if (likely(radix_tree_is_internal_node(node))) { 397 node = entry_to_node(node); 398 *maxindex = node_maxindex(node); 399 return node->shift + RADIX_TREE_MAP_SHIFT; 400 } 401 402 *maxindex = 0; 403 return 0; 404 } 405 406 /* 407 * Extend a radix tree so it can store key @index. 408 */ 409 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, 410 unsigned long index, unsigned int shift) 411 { 412 void *entry; 413 unsigned int maxshift; 414 int tag; 415 416 /* Figure out what the shift should be. */ 417 maxshift = shift; 418 while (index > shift_maxindex(maxshift)) 419 maxshift += RADIX_TREE_MAP_SHIFT; 420 421 entry = rcu_dereference_raw(root->xa_head); 422 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 423 goto out; 424 425 do { 426 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, 427 root, shift, 0, 1, 0); 428 if (!node) 429 return -ENOMEM; 430 431 if (is_idr(root)) { 432 all_tag_set(node, IDR_FREE); 433 if (!root_tag_get(root, IDR_FREE)) { 434 tag_clear(node, IDR_FREE, 0); 435 root_tag_set(root, IDR_FREE); 436 } 437 } else { 438 /* Propagate the aggregated tag info to the new child */ 439 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 440 if (root_tag_get(root, tag)) 441 tag_set(node, tag, 0); 442 } 443 } 444 445 BUG_ON(shift > BITS_PER_LONG); 446 if (radix_tree_is_internal_node(entry)) { 447 entry_to_node(entry)->parent = node; 448 } else if (xa_is_value(entry)) { 449 /* Moving a value entry root->xa_head to a node */ 450 node->nr_values = 1; 451 } 452 /* 453 * entry was already in the radix tree, so we do not need 454 * rcu_assign_pointer here 455 */ 456 node->slots[0] = (void __rcu *)entry; 457 entry = node_to_entry(node); 458 rcu_assign_pointer(root->xa_head, entry); 459 shift += RADIX_TREE_MAP_SHIFT; 460 } while (shift <= maxshift); 461 out: 462 return maxshift + RADIX_TREE_MAP_SHIFT; 463 } 464 465 /** 466 * radix_tree_shrink - shrink radix tree to minimum height 467 * @root radix tree root 468 */ 469 static inline bool radix_tree_shrink(struct radix_tree_root *root) 470 { 471 bool shrunk = false; 472 473 for (;;) { 474 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); 475 struct radix_tree_node *child; 476 477 if (!radix_tree_is_internal_node(node)) 478 break; 479 node = entry_to_node(node); 480 481 /* 482 * The candidate node has more than one child, or its child 483 * is not at the leftmost slot, we cannot shrink. 484 */ 485 if (node->count != 1) 486 break; 487 child = rcu_dereference_raw(node->slots[0]); 488 if (!child) 489 break; 490 491 /* 492 * For an IDR, we must not shrink entry 0 into the root in 493 * case somebody calls idr_replace() with a pointer that 494 * appears to be an internal entry 495 */ 496 if (!node->shift && is_idr(root)) 497 break; 498 499 if (radix_tree_is_internal_node(child)) 500 entry_to_node(child)->parent = NULL; 501 502 /* 503 * We don't need rcu_assign_pointer(), since we are simply 504 * moving the node from one part of the tree to another: if it 505 * was safe to dereference the old pointer to it 506 * (node->slots[0]), it will be safe to dereference the new 507 * one (root->xa_head) as far as dependent read barriers go. 508 */ 509 root->xa_head = (void __rcu *)child; 510 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 511 root_tag_clear(root, IDR_FREE); 512 513 /* 514 * We have a dilemma here. The node's slot[0] must not be 515 * NULLed in case there are concurrent lookups expecting to 516 * find the item. However if this was a bottom-level node, 517 * then it may be subject to the slot pointer being visible 518 * to callers dereferencing it. If item corresponding to 519 * slot[0] is subsequently deleted, these callers would expect 520 * their slot to become empty sooner or later. 521 * 522 * For example, lockless pagecache will look up a slot, deref 523 * the page pointer, and if the page has 0 refcount it means it 524 * was concurrently deleted from pagecache so try the deref 525 * again. Fortunately there is already a requirement for logic 526 * to retry the entire slot lookup -- the indirect pointer 527 * problem (replacing direct root node with an indirect pointer 528 * also results in a stale slot). So tag the slot as indirect 529 * to force callers to retry. 530 */ 531 node->count = 0; 532 if (!radix_tree_is_internal_node(child)) { 533 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 534 } 535 536 WARN_ON_ONCE(!list_empty(&node->private_list)); 537 radix_tree_node_free(node); 538 shrunk = true; 539 } 540 541 return shrunk; 542 } 543 544 static bool delete_node(struct radix_tree_root *root, 545 struct radix_tree_node *node) 546 { 547 bool deleted = false; 548 549 do { 550 struct radix_tree_node *parent; 551 552 if (node->count) { 553 if (node_to_entry(node) == 554 rcu_dereference_raw(root->xa_head)) 555 deleted |= radix_tree_shrink(root); 556 return deleted; 557 } 558 559 parent = node->parent; 560 if (parent) { 561 parent->slots[node->offset] = NULL; 562 parent->count--; 563 } else { 564 /* 565 * Shouldn't the tags already have all been cleared 566 * by the caller? 567 */ 568 if (!is_idr(root)) 569 root_tag_clear_all(root); 570 root->xa_head = NULL; 571 } 572 573 WARN_ON_ONCE(!list_empty(&node->private_list)); 574 radix_tree_node_free(node); 575 deleted = true; 576 577 node = parent; 578 } while (node); 579 580 return deleted; 581 } 582 583 /** 584 * __radix_tree_create - create a slot in a radix tree 585 * @root: radix tree root 586 * @index: index key 587 * @nodep: returns node 588 * @slotp: returns slot 589 * 590 * Create, if necessary, and return the node and slot for an item 591 * at position @index in the radix tree @root. 592 * 593 * Until there is more than one item in the tree, no nodes are 594 * allocated and @root->xa_head is used as a direct slot instead of 595 * pointing to a node, in which case *@nodep will be NULL. 596 * 597 * Returns -ENOMEM, or 0 for success. 598 */ 599 static int __radix_tree_create(struct radix_tree_root *root, 600 unsigned long index, struct radix_tree_node **nodep, 601 void __rcu ***slotp) 602 { 603 struct radix_tree_node *node = NULL, *child; 604 void __rcu **slot = (void __rcu **)&root->xa_head; 605 unsigned long maxindex; 606 unsigned int shift, offset = 0; 607 unsigned long max = index; 608 gfp_t gfp = root_gfp_mask(root); 609 610 shift = radix_tree_load_root(root, &child, &maxindex); 611 612 /* Make sure the tree is high enough. */ 613 if (max > maxindex) { 614 int error = radix_tree_extend(root, gfp, max, shift); 615 if (error < 0) 616 return error; 617 shift = error; 618 child = rcu_dereference_raw(root->xa_head); 619 } 620 621 while (shift > 0) { 622 shift -= RADIX_TREE_MAP_SHIFT; 623 if (child == NULL) { 624 /* Have to add a child node. */ 625 child = radix_tree_node_alloc(gfp, node, root, shift, 626 offset, 0, 0); 627 if (!child) 628 return -ENOMEM; 629 rcu_assign_pointer(*slot, node_to_entry(child)); 630 if (node) 631 node->count++; 632 } else if (!radix_tree_is_internal_node(child)) 633 break; 634 635 /* Go a level down */ 636 node = entry_to_node(child); 637 offset = radix_tree_descend(node, &child, index); 638 slot = &node->slots[offset]; 639 } 640 641 if (nodep) 642 *nodep = node; 643 if (slotp) 644 *slotp = slot; 645 return 0; 646 } 647 648 /* 649 * Free any nodes below this node. The tree is presumed to not need 650 * shrinking, and any user data in the tree is presumed to not need a 651 * destructor called on it. If we need to add a destructor, we can 652 * add that functionality later. Note that we may not clear tags or 653 * slots from the tree as an RCU walker may still have a pointer into 654 * this subtree. We could replace the entries with RADIX_TREE_RETRY, 655 * but we'll still have to clear those in rcu_free. 656 */ 657 static void radix_tree_free_nodes(struct radix_tree_node *node) 658 { 659 unsigned offset = 0; 660 struct radix_tree_node *child = entry_to_node(node); 661 662 for (;;) { 663 void *entry = rcu_dereference_raw(child->slots[offset]); 664 if (xa_is_node(entry) && child->shift) { 665 child = entry_to_node(entry); 666 offset = 0; 667 continue; 668 } 669 offset++; 670 while (offset == RADIX_TREE_MAP_SIZE) { 671 struct radix_tree_node *old = child; 672 offset = child->offset + 1; 673 child = child->parent; 674 WARN_ON_ONCE(!list_empty(&old->private_list)); 675 radix_tree_node_free(old); 676 if (old == entry_to_node(node)) 677 return; 678 } 679 } 680 } 681 682 static inline int insert_entries(struct radix_tree_node *node, 683 void __rcu **slot, void *item, bool replace) 684 { 685 if (*slot) 686 return -EEXIST; 687 rcu_assign_pointer(*slot, item); 688 if (node) { 689 node->count++; 690 if (xa_is_value(item)) 691 node->nr_values++; 692 } 693 return 1; 694 } 695 696 /** 697 * __radix_tree_insert - insert into a radix tree 698 * @root: radix tree root 699 * @index: index key 700 * @item: item to insert 701 * 702 * Insert an item into the radix tree at position @index. 703 */ 704 int radix_tree_insert(struct radix_tree_root *root, unsigned long index, 705 void *item) 706 { 707 struct radix_tree_node *node; 708 void __rcu **slot; 709 int error; 710 711 BUG_ON(radix_tree_is_internal_node(item)); 712 713 error = __radix_tree_create(root, index, &node, &slot); 714 if (error) 715 return error; 716 717 error = insert_entries(node, slot, item, false); 718 if (error < 0) 719 return error; 720 721 if (node) { 722 unsigned offset = get_slot_offset(node, slot); 723 BUG_ON(tag_get(node, 0, offset)); 724 BUG_ON(tag_get(node, 1, offset)); 725 BUG_ON(tag_get(node, 2, offset)); 726 } else { 727 BUG_ON(root_tags_get(root)); 728 } 729 730 return 0; 731 } 732 EXPORT_SYMBOL(radix_tree_insert); 733 734 /** 735 * __radix_tree_lookup - lookup an item in a radix tree 736 * @root: radix tree root 737 * @index: index key 738 * @nodep: returns node 739 * @slotp: returns slot 740 * 741 * Lookup and return the item at position @index in the radix 742 * tree @root. 743 * 744 * Until there is more than one item in the tree, no nodes are 745 * allocated and @root->xa_head is used as a direct slot instead of 746 * pointing to a node, in which case *@nodep will be NULL. 747 */ 748 void *__radix_tree_lookup(const struct radix_tree_root *root, 749 unsigned long index, struct radix_tree_node **nodep, 750 void __rcu ***slotp) 751 { 752 struct radix_tree_node *node, *parent; 753 unsigned long maxindex; 754 void __rcu **slot; 755 756 restart: 757 parent = NULL; 758 slot = (void __rcu **)&root->xa_head; 759 radix_tree_load_root(root, &node, &maxindex); 760 if (index > maxindex) 761 return NULL; 762 763 while (radix_tree_is_internal_node(node)) { 764 unsigned offset; 765 766 parent = entry_to_node(node); 767 offset = radix_tree_descend(parent, &node, index); 768 slot = parent->slots + offset; 769 if (node == RADIX_TREE_RETRY) 770 goto restart; 771 if (parent->shift == 0) 772 break; 773 } 774 775 if (nodep) 776 *nodep = parent; 777 if (slotp) 778 *slotp = slot; 779 return node; 780 } 781 782 /** 783 * radix_tree_lookup_slot - lookup a slot in a radix tree 784 * @root: radix tree root 785 * @index: index key 786 * 787 * Returns: the slot corresponding to the position @index in the 788 * radix tree @root. This is useful for update-if-exists operations. 789 * 790 * This function can be called under rcu_read_lock iff the slot is not 791 * modified by radix_tree_replace_slot, otherwise it must be called 792 * exclusive from other writers. Any dereference of the slot must be done 793 * using radix_tree_deref_slot. 794 */ 795 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, 796 unsigned long index) 797 { 798 void __rcu **slot; 799 800 if (!__radix_tree_lookup(root, index, NULL, &slot)) 801 return NULL; 802 return slot; 803 } 804 EXPORT_SYMBOL(radix_tree_lookup_slot); 805 806 /** 807 * radix_tree_lookup - perform lookup operation on a radix tree 808 * @root: radix tree root 809 * @index: index key 810 * 811 * Lookup the item at the position @index in the radix tree @root. 812 * 813 * This function can be called under rcu_read_lock, however the caller 814 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 815 * them safely). No RCU barriers are required to access or modify the 816 * returned item, however. 817 */ 818 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) 819 { 820 return __radix_tree_lookup(root, index, NULL, NULL); 821 } 822 EXPORT_SYMBOL(radix_tree_lookup); 823 824 static void replace_slot(void __rcu **slot, void *item, 825 struct radix_tree_node *node, int count, int values) 826 { 827 if (node && (count || values)) { 828 node->count += count; 829 node->nr_values += values; 830 } 831 832 rcu_assign_pointer(*slot, item); 833 } 834 835 static bool node_tag_get(const struct radix_tree_root *root, 836 const struct radix_tree_node *node, 837 unsigned int tag, unsigned int offset) 838 { 839 if (node) 840 return tag_get(node, tag, offset); 841 return root_tag_get(root, tag); 842 } 843 844 /* 845 * IDR users want to be able to store NULL in the tree, so if the slot isn't 846 * free, don't adjust the count, even if it's transitioning between NULL and 847 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still 848 * have empty bits, but it only stores NULL in slots when they're being 849 * deleted. 850 */ 851 static int calculate_count(struct radix_tree_root *root, 852 struct radix_tree_node *node, void __rcu **slot, 853 void *item, void *old) 854 { 855 if (is_idr(root)) { 856 unsigned offset = get_slot_offset(node, slot); 857 bool free = node_tag_get(root, node, IDR_FREE, offset); 858 if (!free) 859 return 0; 860 if (!old) 861 return 1; 862 } 863 return !!item - !!old; 864 } 865 866 /** 867 * __radix_tree_replace - replace item in a slot 868 * @root: radix tree root 869 * @node: pointer to tree node 870 * @slot: pointer to slot in @node 871 * @item: new item to store in the slot. 872 * 873 * For use with __radix_tree_lookup(). Caller must hold tree write locked 874 * across slot lookup and replacement. 875 */ 876 void __radix_tree_replace(struct radix_tree_root *root, 877 struct radix_tree_node *node, 878 void __rcu **slot, void *item) 879 { 880 void *old = rcu_dereference_raw(*slot); 881 int values = !!xa_is_value(item) - !!xa_is_value(old); 882 int count = calculate_count(root, node, slot, item, old); 883 884 /* 885 * This function supports replacing value entries and 886 * deleting entries, but that needs accounting against the 887 * node unless the slot is root->xa_head. 888 */ 889 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && 890 (count || values)); 891 replace_slot(slot, item, node, count, values); 892 893 if (!node) 894 return; 895 896 delete_node(root, node); 897 } 898 899 /** 900 * radix_tree_replace_slot - replace item in a slot 901 * @root: radix tree root 902 * @slot: pointer to slot 903 * @item: new item to store in the slot. 904 * 905 * For use with radix_tree_lookup_slot() and 906 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 907 * across slot lookup and replacement. 908 * 909 * NOTE: This cannot be used to switch between non-entries (empty slots), 910 * regular entries, and value entries, as that requires accounting 911 * inside the radix tree node. When switching from one type of entry or 912 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 913 * radix_tree_iter_replace(). 914 */ 915 void radix_tree_replace_slot(struct radix_tree_root *root, 916 void __rcu **slot, void *item) 917 { 918 __radix_tree_replace(root, NULL, slot, item); 919 } 920 EXPORT_SYMBOL(radix_tree_replace_slot); 921 922 /** 923 * radix_tree_iter_replace - replace item in a slot 924 * @root: radix tree root 925 * @slot: pointer to slot 926 * @item: new item to store in the slot. 927 * 928 * For use with radix_tree_for_each_slot(). 929 * Caller must hold tree write locked. 930 */ 931 void radix_tree_iter_replace(struct radix_tree_root *root, 932 const struct radix_tree_iter *iter, 933 void __rcu **slot, void *item) 934 { 935 __radix_tree_replace(root, iter->node, slot, item); 936 } 937 938 static void node_tag_set(struct radix_tree_root *root, 939 struct radix_tree_node *node, 940 unsigned int tag, unsigned int offset) 941 { 942 while (node) { 943 if (tag_get(node, tag, offset)) 944 return; 945 tag_set(node, tag, offset); 946 offset = node->offset; 947 node = node->parent; 948 } 949 950 if (!root_tag_get(root, tag)) 951 root_tag_set(root, tag); 952 } 953 954 /** 955 * radix_tree_tag_set - set a tag on a radix tree node 956 * @root: radix tree root 957 * @index: index key 958 * @tag: tag index 959 * 960 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 961 * corresponding to @index in the radix tree. From 962 * the root all the way down to the leaf node. 963 * 964 * Returns the address of the tagged item. Setting a tag on a not-present 965 * item is a bug. 966 */ 967 void *radix_tree_tag_set(struct radix_tree_root *root, 968 unsigned long index, unsigned int tag) 969 { 970 struct radix_tree_node *node, *parent; 971 unsigned long maxindex; 972 973 radix_tree_load_root(root, &node, &maxindex); 974 BUG_ON(index > maxindex); 975 976 while (radix_tree_is_internal_node(node)) { 977 unsigned offset; 978 979 parent = entry_to_node(node); 980 offset = radix_tree_descend(parent, &node, index); 981 BUG_ON(!node); 982 983 if (!tag_get(parent, tag, offset)) 984 tag_set(parent, tag, offset); 985 } 986 987 /* set the root's tag bit */ 988 if (!root_tag_get(root, tag)) 989 root_tag_set(root, tag); 990 991 return node; 992 } 993 EXPORT_SYMBOL(radix_tree_tag_set); 994 995 static void node_tag_clear(struct radix_tree_root *root, 996 struct radix_tree_node *node, 997 unsigned int tag, unsigned int offset) 998 { 999 while (node) { 1000 if (!tag_get(node, tag, offset)) 1001 return; 1002 tag_clear(node, tag, offset); 1003 if (any_tag_set(node, tag)) 1004 return; 1005 1006 offset = node->offset; 1007 node = node->parent; 1008 } 1009 1010 /* clear the root's tag bit */ 1011 if (root_tag_get(root, tag)) 1012 root_tag_clear(root, tag); 1013 } 1014 1015 /** 1016 * radix_tree_tag_clear - clear a tag on a radix tree node 1017 * @root: radix tree root 1018 * @index: index key 1019 * @tag: tag index 1020 * 1021 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 1022 * corresponding to @index in the radix tree. If this causes 1023 * the leaf node to have no tags set then clear the tag in the 1024 * next-to-leaf node, etc. 1025 * 1026 * Returns the address of the tagged item on success, else NULL. ie: 1027 * has the same return value and semantics as radix_tree_lookup(). 1028 */ 1029 void *radix_tree_tag_clear(struct radix_tree_root *root, 1030 unsigned long index, unsigned int tag) 1031 { 1032 struct radix_tree_node *node, *parent; 1033 unsigned long maxindex; 1034 int uninitialized_var(offset); 1035 1036 radix_tree_load_root(root, &node, &maxindex); 1037 if (index > maxindex) 1038 return NULL; 1039 1040 parent = NULL; 1041 1042 while (radix_tree_is_internal_node(node)) { 1043 parent = entry_to_node(node); 1044 offset = radix_tree_descend(parent, &node, index); 1045 } 1046 1047 if (node) 1048 node_tag_clear(root, parent, tag, offset); 1049 1050 return node; 1051 } 1052 EXPORT_SYMBOL(radix_tree_tag_clear); 1053 1054 /** 1055 * radix_tree_iter_tag_clear - clear a tag on the current iterator entry 1056 * @root: radix tree root 1057 * @iter: iterator state 1058 * @tag: tag to clear 1059 */ 1060 void radix_tree_iter_tag_clear(struct radix_tree_root *root, 1061 const struct radix_tree_iter *iter, unsigned int tag) 1062 { 1063 node_tag_clear(root, iter->node, tag, iter_offset(iter)); 1064 } 1065 1066 /** 1067 * radix_tree_tag_get - get a tag on a radix tree node 1068 * @root: radix tree root 1069 * @index: index key 1070 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 1071 * 1072 * Return values: 1073 * 1074 * 0: tag not present or not set 1075 * 1: tag set 1076 * 1077 * Note that the return value of this function may not be relied on, even if 1078 * the RCU lock is held, unless tag modification and node deletion are excluded 1079 * from concurrency. 1080 */ 1081 int radix_tree_tag_get(const struct radix_tree_root *root, 1082 unsigned long index, unsigned int tag) 1083 { 1084 struct radix_tree_node *node, *parent; 1085 unsigned long maxindex; 1086 1087 if (!root_tag_get(root, tag)) 1088 return 0; 1089 1090 radix_tree_load_root(root, &node, &maxindex); 1091 if (index > maxindex) 1092 return 0; 1093 1094 while (radix_tree_is_internal_node(node)) { 1095 unsigned offset; 1096 1097 parent = entry_to_node(node); 1098 offset = radix_tree_descend(parent, &node, index); 1099 1100 if (!tag_get(parent, tag, offset)) 1101 return 0; 1102 if (node == RADIX_TREE_RETRY) 1103 break; 1104 } 1105 1106 return 1; 1107 } 1108 EXPORT_SYMBOL(radix_tree_tag_get); 1109 1110 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1111 static void set_iter_tags(struct radix_tree_iter *iter, 1112 struct radix_tree_node *node, unsigned offset, 1113 unsigned tag) 1114 { 1115 unsigned tag_long = offset / BITS_PER_LONG; 1116 unsigned tag_bit = offset % BITS_PER_LONG; 1117 1118 if (!node) { 1119 iter->tags = 1; 1120 return; 1121 } 1122 1123 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1124 1125 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1126 if (tag_long < RADIX_TREE_TAG_LONGS - 1) { 1127 /* Pick tags from next element */ 1128 if (tag_bit) 1129 iter->tags |= node->tags[tag][tag_long + 1] << 1130 (BITS_PER_LONG - tag_bit); 1131 /* Clip chunk size, here only BITS_PER_LONG tags */ 1132 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); 1133 } 1134 } 1135 1136 void __rcu **radix_tree_iter_resume(void __rcu **slot, 1137 struct radix_tree_iter *iter) 1138 { 1139 slot++; 1140 iter->index = __radix_tree_iter_add(iter, 1); 1141 iter->next_index = iter->index; 1142 iter->tags = 0; 1143 return NULL; 1144 } 1145 EXPORT_SYMBOL(radix_tree_iter_resume); 1146 1147 /** 1148 * radix_tree_next_chunk - find next chunk of slots for iteration 1149 * 1150 * @root: radix tree root 1151 * @iter: iterator state 1152 * @flags: RADIX_TREE_ITER_* flags and tag index 1153 * Returns: pointer to chunk first slot, or NULL if iteration is over 1154 */ 1155 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, 1156 struct radix_tree_iter *iter, unsigned flags) 1157 { 1158 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; 1159 struct radix_tree_node *node, *child; 1160 unsigned long index, offset, maxindex; 1161 1162 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 1163 return NULL; 1164 1165 /* 1166 * Catch next_index overflow after ~0UL. iter->index never overflows 1167 * during iterating; it can be zero only at the beginning. 1168 * And we cannot overflow iter->next_index in a single step, 1169 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1170 * 1171 * This condition also used by radix_tree_next_slot() to stop 1172 * contiguous iterating, and forbid switching to the next chunk. 1173 */ 1174 index = iter->next_index; 1175 if (!index && iter->index) 1176 return NULL; 1177 1178 restart: 1179 radix_tree_load_root(root, &child, &maxindex); 1180 if (index > maxindex) 1181 return NULL; 1182 if (!child) 1183 return NULL; 1184 1185 if (!radix_tree_is_internal_node(child)) { 1186 /* Single-slot tree */ 1187 iter->index = index; 1188 iter->next_index = maxindex + 1; 1189 iter->tags = 1; 1190 iter->node = NULL; 1191 return (void __rcu **)&root->xa_head; 1192 } 1193 1194 do { 1195 node = entry_to_node(child); 1196 offset = radix_tree_descend(node, &child, index); 1197 1198 if ((flags & RADIX_TREE_ITER_TAGGED) ? 1199 !tag_get(node, tag, offset) : !child) { 1200 /* Hole detected */ 1201 if (flags & RADIX_TREE_ITER_CONTIG) 1202 return NULL; 1203 1204 if (flags & RADIX_TREE_ITER_TAGGED) 1205 offset = radix_tree_find_next_bit(node, tag, 1206 offset + 1); 1207 else 1208 while (++offset < RADIX_TREE_MAP_SIZE) { 1209 void *slot = rcu_dereference_raw( 1210 node->slots[offset]); 1211 if (slot) 1212 break; 1213 } 1214 index &= ~node_maxindex(node); 1215 index += offset << node->shift; 1216 /* Overflow after ~0UL */ 1217 if (!index) 1218 return NULL; 1219 if (offset == RADIX_TREE_MAP_SIZE) 1220 goto restart; 1221 child = rcu_dereference_raw(node->slots[offset]); 1222 } 1223 1224 if (!child) 1225 goto restart; 1226 if (child == RADIX_TREE_RETRY) 1227 break; 1228 } while (node->shift && radix_tree_is_internal_node(child)); 1229 1230 /* Update the iterator state */ 1231 iter->index = (index &~ node_maxindex(node)) | offset; 1232 iter->next_index = (index | node_maxindex(node)) + 1; 1233 iter->node = node; 1234 1235 if (flags & RADIX_TREE_ITER_TAGGED) 1236 set_iter_tags(iter, node, offset, tag); 1237 1238 return node->slots + offset; 1239 } 1240 EXPORT_SYMBOL(radix_tree_next_chunk); 1241 1242 /** 1243 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1244 * @root: radix tree root 1245 * @results: where the results of the lookup are placed 1246 * @first_index: start the lookup from this key 1247 * @max_items: place up to this many items at *results 1248 * 1249 * Performs an index-ascending scan of the tree for present items. Places 1250 * them at *@results and returns the number of items which were placed at 1251 * *@results. 1252 * 1253 * The implementation is naive. 1254 * 1255 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1256 * rcu_read_lock. In this case, rather than the returned results being 1257 * an atomic snapshot of the tree at a single point in time, the 1258 * semantics of an RCU protected gang lookup are as though multiple 1259 * radix_tree_lookups have been issued in individual locks, and results 1260 * stored in 'results'. 1261 */ 1262 unsigned int 1263 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, 1264 unsigned long first_index, unsigned int max_items) 1265 { 1266 struct radix_tree_iter iter; 1267 void __rcu **slot; 1268 unsigned int ret = 0; 1269 1270 if (unlikely(!max_items)) 1271 return 0; 1272 1273 radix_tree_for_each_slot(slot, root, &iter, first_index) { 1274 results[ret] = rcu_dereference_raw(*slot); 1275 if (!results[ret]) 1276 continue; 1277 if (radix_tree_is_internal_node(results[ret])) { 1278 slot = radix_tree_iter_retry(&iter); 1279 continue; 1280 } 1281 if (++ret == max_items) 1282 break; 1283 } 1284 1285 return ret; 1286 } 1287 EXPORT_SYMBOL(radix_tree_gang_lookup); 1288 1289 /** 1290 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1291 * based on a tag 1292 * @root: radix tree root 1293 * @results: where the results of the lookup are placed 1294 * @first_index: start the lookup from this key 1295 * @max_items: place up to this many items at *results 1296 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1297 * 1298 * Performs an index-ascending scan of the tree for present items which 1299 * have the tag indexed by @tag set. Places the items at *@results and 1300 * returns the number of items which were placed at *@results. 1301 */ 1302 unsigned int 1303 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, 1304 unsigned long first_index, unsigned int max_items, 1305 unsigned int tag) 1306 { 1307 struct radix_tree_iter iter; 1308 void __rcu **slot; 1309 unsigned int ret = 0; 1310 1311 if (unlikely(!max_items)) 1312 return 0; 1313 1314 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1315 results[ret] = rcu_dereference_raw(*slot); 1316 if (!results[ret]) 1317 continue; 1318 if (radix_tree_is_internal_node(results[ret])) { 1319 slot = radix_tree_iter_retry(&iter); 1320 continue; 1321 } 1322 if (++ret == max_items) 1323 break; 1324 } 1325 1326 return ret; 1327 } 1328 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1329 1330 /** 1331 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1332 * radix tree based on a tag 1333 * @root: radix tree root 1334 * @results: where the results of the lookup are placed 1335 * @first_index: start the lookup from this key 1336 * @max_items: place up to this many items at *results 1337 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1338 * 1339 * Performs an index-ascending scan of the tree for present items which 1340 * have the tag indexed by @tag set. Places the slots at *@results and 1341 * returns the number of slots which were placed at *@results. 1342 */ 1343 unsigned int 1344 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, 1345 void __rcu ***results, unsigned long first_index, 1346 unsigned int max_items, unsigned int tag) 1347 { 1348 struct radix_tree_iter iter; 1349 void __rcu **slot; 1350 unsigned int ret = 0; 1351 1352 if (unlikely(!max_items)) 1353 return 0; 1354 1355 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { 1356 results[ret] = slot; 1357 if (++ret == max_items) 1358 break; 1359 } 1360 1361 return ret; 1362 } 1363 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1364 1365 static bool __radix_tree_delete(struct radix_tree_root *root, 1366 struct radix_tree_node *node, void __rcu **slot) 1367 { 1368 void *old = rcu_dereference_raw(*slot); 1369 int values = xa_is_value(old) ? -1 : 0; 1370 unsigned offset = get_slot_offset(node, slot); 1371 int tag; 1372 1373 if (is_idr(root)) 1374 node_tag_set(root, node, IDR_FREE, offset); 1375 else 1376 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1377 node_tag_clear(root, node, tag, offset); 1378 1379 replace_slot(slot, NULL, node, -1, values); 1380 return node && delete_node(root, node); 1381 } 1382 1383 /** 1384 * radix_tree_iter_delete - delete the entry at this iterator position 1385 * @root: radix tree root 1386 * @iter: iterator state 1387 * @slot: pointer to slot 1388 * 1389 * Delete the entry at the position currently pointed to by the iterator. 1390 * This may result in the current node being freed; if it is, the iterator 1391 * is advanced so that it will not reference the freed memory. This 1392 * function may be called without any locking if there are no other threads 1393 * which can access this tree. 1394 */ 1395 void radix_tree_iter_delete(struct radix_tree_root *root, 1396 struct radix_tree_iter *iter, void __rcu **slot) 1397 { 1398 if (__radix_tree_delete(root, iter->node, slot)) 1399 iter->index = iter->next_index; 1400 } 1401 EXPORT_SYMBOL(radix_tree_iter_delete); 1402 1403 /** 1404 * radix_tree_delete_item - delete an item from a radix tree 1405 * @root: radix tree root 1406 * @index: index key 1407 * @item: expected item 1408 * 1409 * Remove @item at @index from the radix tree rooted at @root. 1410 * 1411 * Return: the deleted entry, or %NULL if it was not present 1412 * or the entry at the given @index was not @item. 1413 */ 1414 void *radix_tree_delete_item(struct radix_tree_root *root, 1415 unsigned long index, void *item) 1416 { 1417 struct radix_tree_node *node = NULL; 1418 void __rcu **slot = NULL; 1419 void *entry; 1420 1421 entry = __radix_tree_lookup(root, index, &node, &slot); 1422 if (!slot) 1423 return NULL; 1424 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, 1425 get_slot_offset(node, slot)))) 1426 return NULL; 1427 1428 if (item && entry != item) 1429 return NULL; 1430 1431 __radix_tree_delete(root, node, slot); 1432 1433 return entry; 1434 } 1435 EXPORT_SYMBOL(radix_tree_delete_item); 1436 1437 /** 1438 * radix_tree_delete - delete an entry from a radix tree 1439 * @root: radix tree root 1440 * @index: index key 1441 * 1442 * Remove the entry at @index from the radix tree rooted at @root. 1443 * 1444 * Return: The deleted entry, or %NULL if it was not present. 1445 */ 1446 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1447 { 1448 return radix_tree_delete_item(root, index, NULL); 1449 } 1450 EXPORT_SYMBOL(radix_tree_delete); 1451 1452 /** 1453 * radix_tree_tagged - test whether any items in the tree are tagged 1454 * @root: radix tree root 1455 * @tag: tag to test 1456 */ 1457 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) 1458 { 1459 return root_tag_get(root, tag); 1460 } 1461 EXPORT_SYMBOL(radix_tree_tagged); 1462 1463 /** 1464 * idr_preload - preload for idr_alloc() 1465 * @gfp_mask: allocation mask to use for preloading 1466 * 1467 * Preallocate memory to use for the next call to idr_alloc(). This function 1468 * returns with preemption disabled. It will be enabled by idr_preload_end(). 1469 */ 1470 void idr_preload(gfp_t gfp_mask) 1471 { 1472 if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) 1473 preempt_disable(); 1474 } 1475 EXPORT_SYMBOL(idr_preload); 1476 1477 void __rcu **idr_get_free(struct radix_tree_root *root, 1478 struct radix_tree_iter *iter, gfp_t gfp, 1479 unsigned long max) 1480 { 1481 struct radix_tree_node *node = NULL, *child; 1482 void __rcu **slot = (void __rcu **)&root->xa_head; 1483 unsigned long maxindex, start = iter->next_index; 1484 unsigned int shift, offset = 0; 1485 1486 grow: 1487 shift = radix_tree_load_root(root, &child, &maxindex); 1488 if (!radix_tree_tagged(root, IDR_FREE)) 1489 start = max(start, maxindex + 1); 1490 if (start > max) 1491 return ERR_PTR(-ENOSPC); 1492 1493 if (start > maxindex) { 1494 int error = radix_tree_extend(root, gfp, start, shift); 1495 if (error < 0) 1496 return ERR_PTR(error); 1497 shift = error; 1498 child = rcu_dereference_raw(root->xa_head); 1499 } 1500 if (start == 0 && shift == 0) 1501 shift = RADIX_TREE_MAP_SHIFT; 1502 1503 while (shift) { 1504 shift -= RADIX_TREE_MAP_SHIFT; 1505 if (child == NULL) { 1506 /* Have to add a child node. */ 1507 child = radix_tree_node_alloc(gfp, node, root, shift, 1508 offset, 0, 0); 1509 if (!child) 1510 return ERR_PTR(-ENOMEM); 1511 all_tag_set(child, IDR_FREE); 1512 rcu_assign_pointer(*slot, node_to_entry(child)); 1513 if (node) 1514 node->count++; 1515 } else if (!radix_tree_is_internal_node(child)) 1516 break; 1517 1518 node = entry_to_node(child); 1519 offset = radix_tree_descend(node, &child, start); 1520 if (!tag_get(node, IDR_FREE, offset)) { 1521 offset = radix_tree_find_next_bit(node, IDR_FREE, 1522 offset + 1); 1523 start = next_index(start, node, offset); 1524 if (start > max || start == 0) 1525 return ERR_PTR(-ENOSPC); 1526 while (offset == RADIX_TREE_MAP_SIZE) { 1527 offset = node->offset + 1; 1528 node = node->parent; 1529 if (!node) 1530 goto grow; 1531 shift = node->shift; 1532 } 1533 child = rcu_dereference_raw(node->slots[offset]); 1534 } 1535 slot = &node->slots[offset]; 1536 } 1537 1538 iter->index = start; 1539 if (node) 1540 iter->next_index = 1 + min(max, (start | node_maxindex(node))); 1541 else 1542 iter->next_index = 1; 1543 iter->node = node; 1544 set_iter_tags(iter, node, offset, IDR_FREE); 1545 1546 return slot; 1547 } 1548 1549 /** 1550 * idr_destroy - release all internal memory from an IDR 1551 * @idr: idr handle 1552 * 1553 * After this function is called, the IDR is empty, and may be reused or 1554 * the data structure containing it may be freed. 1555 * 1556 * A typical clean-up sequence for objects stored in an idr tree will use 1557 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 1558 * free the memory used to keep track of those objects. 1559 */ 1560 void idr_destroy(struct idr *idr) 1561 { 1562 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); 1563 if (radix_tree_is_internal_node(node)) 1564 radix_tree_free_nodes(node); 1565 idr->idr_rt.xa_head = NULL; 1566 root_tag_set(&idr->idr_rt, IDR_FREE); 1567 } 1568 EXPORT_SYMBOL(idr_destroy); 1569 1570 static void 1571 radix_tree_node_ctor(void *arg) 1572 { 1573 struct radix_tree_node *node = arg; 1574 1575 memset(node, 0, sizeof(*node)); 1576 INIT_LIST_HEAD(&node->private_list); 1577 } 1578 1579 static int radix_tree_cpu_dead(unsigned int cpu) 1580 { 1581 struct radix_tree_preload *rtp; 1582 struct radix_tree_node *node; 1583 1584 /* Free per-cpu pool of preloaded nodes */ 1585 rtp = &per_cpu(radix_tree_preloads, cpu); 1586 while (rtp->nr) { 1587 node = rtp->nodes; 1588 rtp->nodes = node->parent; 1589 kmem_cache_free(radix_tree_node_cachep, node); 1590 rtp->nr--; 1591 } 1592 return 0; 1593 } 1594 1595 void __init radix_tree_init(void) 1596 { 1597 int ret; 1598 1599 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1600 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1601 BUILD_BUG_ON(XA_CHUNK_SIZE > 255); 1602 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1603 sizeof(struct radix_tree_node), 0, 1604 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1605 radix_tree_node_ctor); 1606 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1607 NULL, radix_tree_cpu_dead); 1608 WARN_ON(ret < 0); 1609 } 1610