1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 64 RADIX_TREE_MAP_SHIFT)) 65 66 /* 67 * The height_to_maxindex array needs to be one deeper than the maximum 68 * path as height 0 holds only 1 entry. 69 */ 70 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 71 72 /* 73 * Radix tree node cache. 74 */ 75 static struct kmem_cache *radix_tree_node_cachep; 76 77 /* 78 * Per-cpu pool of preloaded nodes 79 */ 80 struct radix_tree_preload { 81 int nr; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 83 }; 84 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 85 86 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 87 { 88 return root->gfp_mask & __GFP_BITS_MASK; 89 } 90 91 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 92 int offset) 93 { 94 __set_bit(offset, node->tags[tag]); 95 } 96 97 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 98 int offset) 99 { 100 __clear_bit(offset, node->tags[tag]); 101 } 102 103 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 104 int offset) 105 { 106 return test_bit(offset, node->tags[tag]); 107 } 108 109 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 110 { 111 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 112 } 113 114 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 115 { 116 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 117 } 118 119 static inline void root_tag_clear_all(struct radix_tree_root *root) 120 { 121 root->gfp_mask &= __GFP_BITS_MASK; 122 } 123 124 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 125 { 126 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 127 } 128 129 /* 130 * Returns 1 if any slot in the node has this tag set. 131 * Otherwise returns 0. 132 */ 133 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 134 { 135 int idx; 136 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 137 if (node->tags[tag][idx]) 138 return 1; 139 } 140 return 0; 141 } 142 /* 143 * This assumes that the caller has performed appropriate preallocation, and 144 * that the caller has pinned this thread of control to the current CPU. 145 */ 146 static struct radix_tree_node * 147 radix_tree_node_alloc(struct radix_tree_root *root) 148 { 149 struct radix_tree_node *ret = NULL; 150 gfp_t gfp_mask = root_gfp_mask(root); 151 152 if (!(gfp_mask & __GFP_WAIT)) { 153 struct radix_tree_preload *rtp; 154 155 /* 156 * Provided the caller has preloaded here, we will always 157 * succeed in getting a node here (and never reach 158 * kmem_cache_alloc) 159 */ 160 rtp = &__get_cpu_var(radix_tree_preloads); 161 if (rtp->nr) { 162 ret = rtp->nodes[rtp->nr - 1]; 163 rtp->nodes[rtp->nr - 1] = NULL; 164 rtp->nr--; 165 } 166 } 167 if (ret == NULL) 168 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 169 170 BUG_ON(radix_tree_is_indirect_ptr(ret)); 171 return ret; 172 } 173 174 static void radix_tree_node_rcu_free(struct rcu_head *head) 175 { 176 struct radix_tree_node *node = 177 container_of(head, struct radix_tree_node, rcu_head); 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 tag_clear(node, 0, 0); 185 tag_clear(node, 1, 0); 186 node->slots[0] = NULL; 187 node->count = 0; 188 189 kmem_cache_free(radix_tree_node_cachep, node); 190 } 191 192 static inline void 193 radix_tree_node_free(struct radix_tree_node *node) 194 { 195 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 196 } 197 198 /* 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to 200 * ensure that the addition of a single element in the tree cannot fail. On 201 * success, return zero, with preemption disabled. On error, return -ENOMEM 202 * with preemption not disabled. 203 */ 204 int radix_tree_preload(gfp_t gfp_mask) 205 { 206 struct radix_tree_preload *rtp; 207 struct radix_tree_node *node; 208 int ret = -ENOMEM; 209 210 preempt_disable(); 211 rtp = &__get_cpu_var(radix_tree_preloads); 212 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 213 preempt_enable(); 214 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 215 if (node == NULL) 216 goto out; 217 preempt_disable(); 218 rtp = &__get_cpu_var(radix_tree_preloads); 219 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 220 rtp->nodes[rtp->nr++] = node; 221 else 222 kmem_cache_free(radix_tree_node_cachep, node); 223 } 224 ret = 0; 225 out: 226 return ret; 227 } 228 EXPORT_SYMBOL(radix_tree_preload); 229 230 /* 231 * Return the maximum key which can be store into a 232 * radix tree with height HEIGHT. 233 */ 234 static inline unsigned long radix_tree_maxindex(unsigned int height) 235 { 236 return height_to_maxindex[height]; 237 } 238 239 /* 240 * Extend a radix tree so it can store key @index. 241 */ 242 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 243 { 244 struct radix_tree_node *node; 245 unsigned int height; 246 int tag; 247 248 /* Figure out what the height should be. */ 249 height = root->height + 1; 250 while (index > radix_tree_maxindex(height)) 251 height++; 252 253 if (root->rnode == NULL) { 254 root->height = height; 255 goto out; 256 } 257 258 do { 259 unsigned int newheight; 260 if (!(node = radix_tree_node_alloc(root))) 261 return -ENOMEM; 262 263 /* Increase the height. */ 264 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 265 266 /* Propagate the aggregated tag info into the new root */ 267 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 268 if (root_tag_get(root, tag)) 269 tag_set(node, tag, 0); 270 } 271 272 newheight = root->height+1; 273 node->height = newheight; 274 node->count = 1; 275 node = radix_tree_ptr_to_indirect(node); 276 rcu_assign_pointer(root->rnode, node); 277 root->height = newheight; 278 } while (height > root->height); 279 out: 280 return 0; 281 } 282 283 /** 284 * radix_tree_insert - insert into a radix tree 285 * @root: radix tree root 286 * @index: index key 287 * @item: item to insert 288 * 289 * Insert an item into the radix tree at position @index. 290 */ 291 int radix_tree_insert(struct radix_tree_root *root, 292 unsigned long index, void *item) 293 { 294 struct radix_tree_node *node = NULL, *slot; 295 unsigned int height, shift; 296 int offset; 297 int error; 298 299 BUG_ON(radix_tree_is_indirect_ptr(item)); 300 301 /* Make sure the tree is high enough. */ 302 if (index > radix_tree_maxindex(root->height)) { 303 error = radix_tree_extend(root, index); 304 if (error) 305 return error; 306 } 307 308 slot = radix_tree_indirect_to_ptr(root->rnode); 309 310 height = root->height; 311 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 312 313 offset = 0; /* uninitialised var warning */ 314 while (height > 0) { 315 if (slot == NULL) { 316 /* Have to add a child node. */ 317 if (!(slot = radix_tree_node_alloc(root))) 318 return -ENOMEM; 319 slot->height = height; 320 if (node) { 321 rcu_assign_pointer(node->slots[offset], slot); 322 node->count++; 323 } else 324 rcu_assign_pointer(root->rnode, 325 radix_tree_ptr_to_indirect(slot)); 326 } 327 328 /* Go a level down */ 329 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 330 node = slot; 331 slot = node->slots[offset]; 332 shift -= RADIX_TREE_MAP_SHIFT; 333 height--; 334 } 335 336 if (slot != NULL) 337 return -EEXIST; 338 339 if (node) { 340 node->count++; 341 rcu_assign_pointer(node->slots[offset], item); 342 BUG_ON(tag_get(node, 0, offset)); 343 BUG_ON(tag_get(node, 1, offset)); 344 } else { 345 rcu_assign_pointer(root->rnode, item); 346 BUG_ON(root_tag_get(root, 0)); 347 BUG_ON(root_tag_get(root, 1)); 348 } 349 350 return 0; 351 } 352 EXPORT_SYMBOL(radix_tree_insert); 353 354 /** 355 * radix_tree_lookup_slot - lookup a slot in a radix tree 356 * @root: radix tree root 357 * @index: index key 358 * 359 * Returns: the slot corresponding to the position @index in the 360 * radix tree @root. This is useful for update-if-exists operations. 361 * 362 * This function can be called under rcu_read_lock iff the slot is not 363 * modified by radix_tree_replace_slot, otherwise it must be called 364 * exclusive from other writers. Any dereference of the slot must be done 365 * using radix_tree_deref_slot. 366 */ 367 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 368 { 369 unsigned int height, shift; 370 struct radix_tree_node *node, **slot; 371 372 node = rcu_dereference(root->rnode); 373 if (node == NULL) 374 return NULL; 375 376 if (!radix_tree_is_indirect_ptr(node)) { 377 if (index > 0) 378 return NULL; 379 return (void **)&root->rnode; 380 } 381 node = radix_tree_indirect_to_ptr(node); 382 383 height = node->height; 384 if (index > radix_tree_maxindex(height)) 385 return NULL; 386 387 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 388 389 do { 390 slot = (struct radix_tree_node **) 391 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 392 node = rcu_dereference(*slot); 393 if (node == NULL) 394 return NULL; 395 396 shift -= RADIX_TREE_MAP_SHIFT; 397 height--; 398 } while (height > 0); 399 400 return (void **)slot; 401 } 402 EXPORT_SYMBOL(radix_tree_lookup_slot); 403 404 /** 405 * radix_tree_lookup - perform lookup operation on a radix tree 406 * @root: radix tree root 407 * @index: index key 408 * 409 * Lookup the item at the position @index in the radix tree @root. 410 * 411 * This function can be called under rcu_read_lock, however the caller 412 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 413 * them safely). No RCU barriers are required to access or modify the 414 * returned item, however. 415 */ 416 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 417 { 418 unsigned int height, shift; 419 struct radix_tree_node *node, **slot; 420 421 node = rcu_dereference(root->rnode); 422 if (node == NULL) 423 return NULL; 424 425 if (!radix_tree_is_indirect_ptr(node)) { 426 if (index > 0) 427 return NULL; 428 return node; 429 } 430 node = radix_tree_indirect_to_ptr(node); 431 432 height = node->height; 433 if (index > radix_tree_maxindex(height)) 434 return NULL; 435 436 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 437 438 do { 439 slot = (struct radix_tree_node **) 440 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 441 node = rcu_dereference(*slot); 442 if (node == NULL) 443 return NULL; 444 445 shift -= RADIX_TREE_MAP_SHIFT; 446 height--; 447 } while (height > 0); 448 449 return node; 450 } 451 EXPORT_SYMBOL(radix_tree_lookup); 452 453 /** 454 * radix_tree_tag_set - set a tag on a radix tree node 455 * @root: radix tree root 456 * @index: index key 457 * @tag: tag index 458 * 459 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 460 * corresponding to @index in the radix tree. From 461 * the root all the way down to the leaf node. 462 * 463 * Returns the address of the tagged item. Setting a tag on a not-present 464 * item is a bug. 465 */ 466 void *radix_tree_tag_set(struct radix_tree_root *root, 467 unsigned long index, unsigned int tag) 468 { 469 unsigned int height, shift; 470 struct radix_tree_node *slot; 471 472 height = root->height; 473 BUG_ON(index > radix_tree_maxindex(height)); 474 475 slot = radix_tree_indirect_to_ptr(root->rnode); 476 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 477 478 while (height > 0) { 479 int offset; 480 481 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 482 if (!tag_get(slot, tag, offset)) 483 tag_set(slot, tag, offset); 484 slot = slot->slots[offset]; 485 BUG_ON(slot == NULL); 486 shift -= RADIX_TREE_MAP_SHIFT; 487 height--; 488 } 489 490 /* set the root's tag bit */ 491 if (slot && !root_tag_get(root, tag)) 492 root_tag_set(root, tag); 493 494 return slot; 495 } 496 EXPORT_SYMBOL(radix_tree_tag_set); 497 498 /** 499 * radix_tree_tag_clear - clear a tag on a radix tree node 500 * @root: radix tree root 501 * @index: index key 502 * @tag: tag index 503 * 504 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 505 * corresponding to @index in the radix tree. If 506 * this causes the leaf node to have no tags set then clear the tag in the 507 * next-to-leaf node, etc. 508 * 509 * Returns the address of the tagged item on success, else NULL. ie: 510 * has the same return value and semantics as radix_tree_lookup(). 511 */ 512 void *radix_tree_tag_clear(struct radix_tree_root *root, 513 unsigned long index, unsigned int tag) 514 { 515 /* 516 * The radix tree path needs to be one longer than the maximum path 517 * since the "list" is null terminated. 518 */ 519 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 520 struct radix_tree_node *slot = NULL; 521 unsigned int height, shift; 522 523 height = root->height; 524 if (index > radix_tree_maxindex(height)) 525 goto out; 526 527 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 528 pathp->node = NULL; 529 slot = radix_tree_indirect_to_ptr(root->rnode); 530 531 while (height > 0) { 532 int offset; 533 534 if (slot == NULL) 535 goto out; 536 537 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 538 pathp[1].offset = offset; 539 pathp[1].node = slot; 540 slot = slot->slots[offset]; 541 pathp++; 542 shift -= RADIX_TREE_MAP_SHIFT; 543 height--; 544 } 545 546 if (slot == NULL) 547 goto out; 548 549 while (pathp->node) { 550 if (!tag_get(pathp->node, tag, pathp->offset)) 551 goto out; 552 tag_clear(pathp->node, tag, pathp->offset); 553 if (any_tag_set(pathp->node, tag)) 554 goto out; 555 pathp--; 556 } 557 558 /* clear the root's tag bit */ 559 if (root_tag_get(root, tag)) 560 root_tag_clear(root, tag); 561 562 out: 563 return slot; 564 } 565 EXPORT_SYMBOL(radix_tree_tag_clear); 566 567 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 568 /** 569 * radix_tree_tag_get - get a tag on a radix tree node 570 * @root: radix tree root 571 * @index: index key 572 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 573 * 574 * Return values: 575 * 576 * 0: tag not present or not set 577 * 1: tag set 578 */ 579 int radix_tree_tag_get(struct radix_tree_root *root, 580 unsigned long index, unsigned int tag) 581 { 582 unsigned int height, shift; 583 struct radix_tree_node *node; 584 int saw_unset_tag = 0; 585 586 /* check the root's tag bit */ 587 if (!root_tag_get(root, tag)) 588 return 0; 589 590 node = rcu_dereference(root->rnode); 591 if (node == NULL) 592 return 0; 593 594 if (!radix_tree_is_indirect_ptr(node)) 595 return (index == 0); 596 node = radix_tree_indirect_to_ptr(node); 597 598 height = node->height; 599 if (index > radix_tree_maxindex(height)) 600 return 0; 601 602 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 603 604 for ( ; ; ) { 605 int offset; 606 607 if (node == NULL) 608 return 0; 609 610 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 611 612 /* 613 * This is just a debug check. Later, we can bale as soon as 614 * we see an unset tag. 615 */ 616 if (!tag_get(node, tag, offset)) 617 saw_unset_tag = 1; 618 if (height == 1) { 619 int ret = tag_get(node, tag, offset); 620 621 BUG_ON(ret && saw_unset_tag); 622 return !!ret; 623 } 624 node = rcu_dereference(node->slots[offset]); 625 shift -= RADIX_TREE_MAP_SHIFT; 626 height--; 627 } 628 } 629 EXPORT_SYMBOL(radix_tree_tag_get); 630 #endif 631 632 /** 633 * radix_tree_next_hole - find the next hole (not-present entry) 634 * @root: tree root 635 * @index: index key 636 * @max_scan: maximum range to search 637 * 638 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 639 * indexed hole. 640 * 641 * Returns: the index of the hole if found, otherwise returns an index 642 * outside of the set specified (in which case 'return - index >= max_scan' 643 * will be true). 644 * 645 * radix_tree_next_hole may be called under rcu_read_lock. However, like 646 * radix_tree_gang_lookup, this will not atomically search a snapshot of the 647 * tree at a single point in time. For example, if a hole is created at index 648 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole 649 * covering both indexes may return 10 if called under rcu_read_lock. 650 */ 651 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 652 unsigned long index, unsigned long max_scan) 653 { 654 unsigned long i; 655 656 for (i = 0; i < max_scan; i++) { 657 if (!radix_tree_lookup(root, index)) 658 break; 659 index++; 660 if (index == 0) 661 break; 662 } 663 664 return index; 665 } 666 EXPORT_SYMBOL(radix_tree_next_hole); 667 668 static unsigned int 669 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 670 unsigned int max_items, unsigned long *next_index) 671 { 672 unsigned int nr_found = 0; 673 unsigned int shift, height; 674 unsigned long i; 675 676 height = slot->height; 677 if (height == 0) 678 goto out; 679 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 680 681 for ( ; height > 1; height--) { 682 i = (index >> shift) & RADIX_TREE_MAP_MASK; 683 for (;;) { 684 if (slot->slots[i] != NULL) 685 break; 686 index &= ~((1UL << shift) - 1); 687 index += 1UL << shift; 688 if (index == 0) 689 goto out; /* 32-bit wraparound */ 690 i++; 691 if (i == RADIX_TREE_MAP_SIZE) 692 goto out; 693 } 694 695 shift -= RADIX_TREE_MAP_SHIFT; 696 slot = rcu_dereference(slot->slots[i]); 697 if (slot == NULL) 698 goto out; 699 } 700 701 /* Bottom level: grab some items */ 702 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 703 index++; 704 if (slot->slots[i]) { 705 results[nr_found++] = &(slot->slots[i]); 706 if (nr_found == max_items) 707 goto out; 708 } 709 } 710 out: 711 *next_index = index; 712 return nr_found; 713 } 714 715 /** 716 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 717 * @root: radix tree root 718 * @results: where the results of the lookup are placed 719 * @first_index: start the lookup from this key 720 * @max_items: place up to this many items at *results 721 * 722 * Performs an index-ascending scan of the tree for present items. Places 723 * them at *@results and returns the number of items which were placed at 724 * *@results. 725 * 726 * The implementation is naive. 727 * 728 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 729 * rcu_read_lock. In this case, rather than the returned results being 730 * an atomic snapshot of the tree at a single point in time, the semantics 731 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 732 * have been issued in individual locks, and results stored in 'results'. 733 */ 734 unsigned int 735 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 736 unsigned long first_index, unsigned int max_items) 737 { 738 unsigned long max_index; 739 struct radix_tree_node *node; 740 unsigned long cur_index = first_index; 741 unsigned int ret; 742 743 node = rcu_dereference(root->rnode); 744 if (!node) 745 return 0; 746 747 if (!radix_tree_is_indirect_ptr(node)) { 748 if (first_index > 0) 749 return 0; 750 results[0] = node; 751 return 1; 752 } 753 node = radix_tree_indirect_to_ptr(node); 754 755 max_index = radix_tree_maxindex(node->height); 756 757 ret = 0; 758 while (ret < max_items) { 759 unsigned int nr_found, slots_found, i; 760 unsigned long next_index; /* Index of next search */ 761 762 if (cur_index > max_index) 763 break; 764 slots_found = __lookup(node, (void ***)results + ret, cur_index, 765 max_items - ret, &next_index); 766 nr_found = 0; 767 for (i = 0; i < slots_found; i++) { 768 struct radix_tree_node *slot; 769 slot = *(((void ***)results)[ret + i]); 770 if (!slot) 771 continue; 772 results[ret + nr_found] = rcu_dereference(slot); 773 nr_found++; 774 } 775 ret += nr_found; 776 if (next_index == 0) 777 break; 778 cur_index = next_index; 779 } 780 781 return ret; 782 } 783 EXPORT_SYMBOL(radix_tree_gang_lookup); 784 785 /** 786 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 787 * @root: radix tree root 788 * @results: where the results of the lookup are placed 789 * @first_index: start the lookup from this key 790 * @max_items: place up to this many items at *results 791 * 792 * Performs an index-ascending scan of the tree for present items. Places 793 * their slots at *@results and returns the number of items which were 794 * placed at *@results. 795 * 796 * The implementation is naive. 797 * 798 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 799 * be dereferenced with radix_tree_deref_slot, and if using only RCU 800 * protection, radix_tree_deref_slot may fail requiring a retry. 801 */ 802 unsigned int 803 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 804 unsigned long first_index, unsigned int max_items) 805 { 806 unsigned long max_index; 807 struct radix_tree_node *node; 808 unsigned long cur_index = first_index; 809 unsigned int ret; 810 811 node = rcu_dereference(root->rnode); 812 if (!node) 813 return 0; 814 815 if (!radix_tree_is_indirect_ptr(node)) { 816 if (first_index > 0) 817 return 0; 818 results[0] = (void **)&root->rnode; 819 return 1; 820 } 821 node = radix_tree_indirect_to_ptr(node); 822 823 max_index = radix_tree_maxindex(node->height); 824 825 ret = 0; 826 while (ret < max_items) { 827 unsigned int slots_found; 828 unsigned long next_index; /* Index of next search */ 829 830 if (cur_index > max_index) 831 break; 832 slots_found = __lookup(node, results + ret, cur_index, 833 max_items - ret, &next_index); 834 ret += slots_found; 835 if (next_index == 0) 836 break; 837 cur_index = next_index; 838 } 839 840 return ret; 841 } 842 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 843 844 /* 845 * FIXME: the two tag_get()s here should use find_next_bit() instead of 846 * open-coding the search. 847 */ 848 static unsigned int 849 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 850 unsigned int max_items, unsigned long *next_index, unsigned int tag) 851 { 852 unsigned int nr_found = 0; 853 unsigned int shift, height; 854 855 height = slot->height; 856 if (height == 0) 857 goto out; 858 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 859 860 while (height > 0) { 861 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 862 863 for (;;) { 864 if (tag_get(slot, tag, i)) 865 break; 866 index &= ~((1UL << shift) - 1); 867 index += 1UL << shift; 868 if (index == 0) 869 goto out; /* 32-bit wraparound */ 870 i++; 871 if (i == RADIX_TREE_MAP_SIZE) 872 goto out; 873 } 874 height--; 875 if (height == 0) { /* Bottom level: grab some items */ 876 unsigned long j = index & RADIX_TREE_MAP_MASK; 877 878 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 879 index++; 880 if (!tag_get(slot, tag, j)) 881 continue; 882 /* 883 * Even though the tag was found set, we need to 884 * recheck that we have a non-NULL node, because 885 * if this lookup is lockless, it may have been 886 * subsequently deleted. 887 * 888 * Similar care must be taken in any place that 889 * lookup ->slots[x] without a lock (ie. can't 890 * rely on its value remaining the same). 891 */ 892 if (slot->slots[j]) { 893 results[nr_found++] = &(slot->slots[j]); 894 if (nr_found == max_items) 895 goto out; 896 } 897 } 898 } 899 shift -= RADIX_TREE_MAP_SHIFT; 900 slot = rcu_dereference(slot->slots[i]); 901 if (slot == NULL) 902 break; 903 } 904 out: 905 *next_index = index; 906 return nr_found; 907 } 908 909 /** 910 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 911 * based on a tag 912 * @root: radix tree root 913 * @results: where the results of the lookup are placed 914 * @first_index: start the lookup from this key 915 * @max_items: place up to this many items at *results 916 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 917 * 918 * Performs an index-ascending scan of the tree for present items which 919 * have the tag indexed by @tag set. Places the items at *@results and 920 * returns the number of items which were placed at *@results. 921 */ 922 unsigned int 923 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 924 unsigned long first_index, unsigned int max_items, 925 unsigned int tag) 926 { 927 struct radix_tree_node *node; 928 unsigned long max_index; 929 unsigned long cur_index = first_index; 930 unsigned int ret; 931 932 /* check the root's tag bit */ 933 if (!root_tag_get(root, tag)) 934 return 0; 935 936 node = rcu_dereference(root->rnode); 937 if (!node) 938 return 0; 939 940 if (!radix_tree_is_indirect_ptr(node)) { 941 if (first_index > 0) 942 return 0; 943 results[0] = node; 944 return 1; 945 } 946 node = radix_tree_indirect_to_ptr(node); 947 948 max_index = radix_tree_maxindex(node->height); 949 950 ret = 0; 951 while (ret < max_items) { 952 unsigned int nr_found, slots_found, i; 953 unsigned long next_index; /* Index of next search */ 954 955 if (cur_index > max_index) 956 break; 957 slots_found = __lookup_tag(node, (void ***)results + ret, 958 cur_index, max_items - ret, &next_index, tag); 959 nr_found = 0; 960 for (i = 0; i < slots_found; i++) { 961 struct radix_tree_node *slot; 962 slot = *(((void ***)results)[ret + i]); 963 if (!slot) 964 continue; 965 results[ret + nr_found] = rcu_dereference(slot); 966 nr_found++; 967 } 968 ret += nr_found; 969 if (next_index == 0) 970 break; 971 cur_index = next_index; 972 } 973 974 return ret; 975 } 976 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 977 978 /** 979 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 980 * radix tree based on a tag 981 * @root: radix tree root 982 * @results: where the results of the lookup are placed 983 * @first_index: start the lookup from this key 984 * @max_items: place up to this many items at *results 985 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 986 * 987 * Performs an index-ascending scan of the tree for present items which 988 * have the tag indexed by @tag set. Places the slots at *@results and 989 * returns the number of slots which were placed at *@results. 990 */ 991 unsigned int 992 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 993 unsigned long first_index, unsigned int max_items, 994 unsigned int tag) 995 { 996 struct radix_tree_node *node; 997 unsigned long max_index; 998 unsigned long cur_index = first_index; 999 unsigned int ret; 1000 1001 /* check the root's tag bit */ 1002 if (!root_tag_get(root, tag)) 1003 return 0; 1004 1005 node = rcu_dereference(root->rnode); 1006 if (!node) 1007 return 0; 1008 1009 if (!radix_tree_is_indirect_ptr(node)) { 1010 if (first_index > 0) 1011 return 0; 1012 results[0] = (void **)&root->rnode; 1013 return 1; 1014 } 1015 node = radix_tree_indirect_to_ptr(node); 1016 1017 max_index = radix_tree_maxindex(node->height); 1018 1019 ret = 0; 1020 while (ret < max_items) { 1021 unsigned int slots_found; 1022 unsigned long next_index; /* Index of next search */ 1023 1024 if (cur_index > max_index) 1025 break; 1026 slots_found = __lookup_tag(node, results + ret, 1027 cur_index, max_items - ret, &next_index, tag); 1028 ret += slots_found; 1029 if (next_index == 0) 1030 break; 1031 cur_index = next_index; 1032 } 1033 1034 return ret; 1035 } 1036 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1037 1038 1039 /** 1040 * radix_tree_shrink - shrink height of a radix tree to minimal 1041 * @root radix tree root 1042 */ 1043 static inline void radix_tree_shrink(struct radix_tree_root *root) 1044 { 1045 /* try to shrink tree height */ 1046 while (root->height > 0) { 1047 struct radix_tree_node *to_free = root->rnode; 1048 void *newptr; 1049 1050 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1051 to_free = radix_tree_indirect_to_ptr(to_free); 1052 1053 /* 1054 * The candidate node has more than one child, or its child 1055 * is not at the leftmost slot, we cannot shrink. 1056 */ 1057 if (to_free->count != 1) 1058 break; 1059 if (!to_free->slots[0]) 1060 break; 1061 1062 /* 1063 * We don't need rcu_assign_pointer(), since we are simply 1064 * moving the node from one part of the tree to another. If 1065 * it was safe to dereference the old pointer to it 1066 * (to_free->slots[0]), it will be safe to dereference the new 1067 * one (root->rnode). 1068 */ 1069 newptr = to_free->slots[0]; 1070 if (root->height > 1) 1071 newptr = radix_tree_ptr_to_indirect(newptr); 1072 root->rnode = newptr; 1073 root->height--; 1074 radix_tree_node_free(to_free); 1075 } 1076 } 1077 1078 /** 1079 * radix_tree_delete - delete an item from a radix tree 1080 * @root: radix tree root 1081 * @index: index key 1082 * 1083 * Remove the item at @index from the radix tree rooted at @root. 1084 * 1085 * Returns the address of the deleted item, or NULL if it was not present. 1086 */ 1087 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1088 { 1089 /* 1090 * The radix tree path needs to be one longer than the maximum path 1091 * since the "list" is null terminated. 1092 */ 1093 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1094 struct radix_tree_node *slot = NULL; 1095 struct radix_tree_node *to_free; 1096 unsigned int height, shift; 1097 int tag; 1098 int offset; 1099 1100 height = root->height; 1101 if (index > radix_tree_maxindex(height)) 1102 goto out; 1103 1104 slot = root->rnode; 1105 if (height == 0) { 1106 root_tag_clear_all(root); 1107 root->rnode = NULL; 1108 goto out; 1109 } 1110 slot = radix_tree_indirect_to_ptr(slot); 1111 1112 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1113 pathp->node = NULL; 1114 1115 do { 1116 if (slot == NULL) 1117 goto out; 1118 1119 pathp++; 1120 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1121 pathp->offset = offset; 1122 pathp->node = slot; 1123 slot = slot->slots[offset]; 1124 shift -= RADIX_TREE_MAP_SHIFT; 1125 height--; 1126 } while (height > 0); 1127 1128 if (slot == NULL) 1129 goto out; 1130 1131 /* 1132 * Clear all tags associated with the just-deleted item 1133 */ 1134 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1135 if (tag_get(pathp->node, tag, pathp->offset)) 1136 radix_tree_tag_clear(root, index, tag); 1137 } 1138 1139 to_free = NULL; 1140 /* Now free the nodes we do not need anymore */ 1141 while (pathp->node) { 1142 pathp->node->slots[pathp->offset] = NULL; 1143 pathp->node->count--; 1144 /* 1145 * Queue the node for deferred freeing after the 1146 * last reference to it disappears (set NULL, above). 1147 */ 1148 if (to_free) 1149 radix_tree_node_free(to_free); 1150 1151 if (pathp->node->count) { 1152 if (pathp->node == 1153 radix_tree_indirect_to_ptr(root->rnode)) 1154 radix_tree_shrink(root); 1155 goto out; 1156 } 1157 1158 /* Node with zero slots in use so free it */ 1159 to_free = pathp->node; 1160 pathp--; 1161 1162 } 1163 root_tag_clear_all(root); 1164 root->height = 0; 1165 root->rnode = NULL; 1166 if (to_free) 1167 radix_tree_node_free(to_free); 1168 1169 out: 1170 return slot; 1171 } 1172 EXPORT_SYMBOL(radix_tree_delete); 1173 1174 /** 1175 * radix_tree_tagged - test whether any items in the tree are tagged 1176 * @root: radix tree root 1177 * @tag: tag to test 1178 */ 1179 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1180 { 1181 return root_tag_get(root, tag); 1182 } 1183 EXPORT_SYMBOL(radix_tree_tagged); 1184 1185 static void 1186 radix_tree_node_ctor(void *node) 1187 { 1188 memset(node, 0, sizeof(struct radix_tree_node)); 1189 } 1190 1191 static __init unsigned long __maxindex(unsigned int height) 1192 { 1193 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1194 int shift = RADIX_TREE_INDEX_BITS - width; 1195 1196 if (shift < 0) 1197 return ~0UL; 1198 if (shift >= BITS_PER_LONG) 1199 return 0UL; 1200 return ~0UL >> shift; 1201 } 1202 1203 static __init void radix_tree_init_maxindex(void) 1204 { 1205 unsigned int i; 1206 1207 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1208 height_to_maxindex[i] = __maxindex(i); 1209 } 1210 1211 static int radix_tree_callback(struct notifier_block *nfb, 1212 unsigned long action, 1213 void *hcpu) 1214 { 1215 int cpu = (long)hcpu; 1216 struct radix_tree_preload *rtp; 1217 1218 /* Free per-cpu pool of perloaded nodes */ 1219 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1220 rtp = &per_cpu(radix_tree_preloads, cpu); 1221 while (rtp->nr) { 1222 kmem_cache_free(radix_tree_node_cachep, 1223 rtp->nodes[rtp->nr-1]); 1224 rtp->nodes[rtp->nr-1] = NULL; 1225 rtp->nr--; 1226 } 1227 } 1228 return NOTIFY_OK; 1229 } 1230 1231 void __init radix_tree_init(void) 1232 { 1233 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1234 sizeof(struct radix_tree_node), 0, 1235 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1236 radix_tree_node_ctor); 1237 radix_tree_init_maxindex(); 1238 hotcpu_notifier(radix_tree_callback, 0); 1239 } 1240