1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/string.h> 32 #include <linux/bitops.h> 33 #include <linux/rcupdate.h> 34 35 36 #ifdef __KERNEL__ 37 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 38 #else 39 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 40 #endif 41 42 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 43 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 44 45 #define RADIX_TREE_TAG_LONGS \ 46 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 47 48 struct radix_tree_node { 49 unsigned int height; /* Height from the bottom */ 50 unsigned int count; 51 struct rcu_head rcu_head; 52 void *slots[RADIX_TREE_MAP_SIZE]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 54 }; 55 56 struct radix_tree_path { 57 struct radix_tree_node *node; 58 int offset; 59 }; 60 61 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 62 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 63 RADIX_TREE_MAP_SHIFT)) 64 65 /* 66 * The height_to_maxindex array needs to be one deeper than the maximum 67 * path as height 0 holds only 1 entry. 68 */ 69 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 70 71 /* 72 * Radix tree node cache. 73 */ 74 static struct kmem_cache *radix_tree_node_cachep; 75 76 /* 77 * Per-cpu pool of preloaded nodes 78 */ 79 struct radix_tree_preload { 80 int nr; 81 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 82 }; 83 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 84 85 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 86 { 87 return root->gfp_mask & __GFP_BITS_MASK; 88 } 89 90 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 91 int offset) 92 { 93 __set_bit(offset, node->tags[tag]); 94 } 95 96 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 97 int offset) 98 { 99 __clear_bit(offset, node->tags[tag]); 100 } 101 102 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 103 int offset) 104 { 105 return test_bit(offset, node->tags[tag]); 106 } 107 108 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 109 { 110 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 111 } 112 113 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 114 { 115 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 116 } 117 118 static inline void root_tag_clear_all(struct radix_tree_root *root) 119 { 120 root->gfp_mask &= __GFP_BITS_MASK; 121 } 122 123 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 124 { 125 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 126 } 127 128 /* 129 * Returns 1 if any slot in the node has this tag set. 130 * Otherwise returns 0. 131 */ 132 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 133 { 134 int idx; 135 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 136 if (node->tags[tag][idx]) 137 return 1; 138 } 139 return 0; 140 } 141 /* 142 * This assumes that the caller has performed appropriate preallocation, and 143 * that the caller has pinned this thread of control to the current CPU. 144 */ 145 static struct radix_tree_node * 146 radix_tree_node_alloc(struct radix_tree_root *root) 147 { 148 struct radix_tree_node *ret = NULL; 149 gfp_t gfp_mask = root_gfp_mask(root); 150 151 if (!(gfp_mask & __GFP_WAIT)) { 152 struct radix_tree_preload *rtp; 153 154 /* 155 * Provided the caller has preloaded here, we will always 156 * succeed in getting a node here (and never reach 157 * kmem_cache_alloc) 158 */ 159 rtp = &__get_cpu_var(radix_tree_preloads); 160 if (rtp->nr) { 161 ret = rtp->nodes[rtp->nr - 1]; 162 rtp->nodes[rtp->nr - 1] = NULL; 163 rtp->nr--; 164 } 165 } 166 if (ret == NULL) 167 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 168 169 BUG_ON(radix_tree_is_indirect_ptr(ret)); 170 return ret; 171 } 172 173 static void radix_tree_node_rcu_free(struct rcu_head *head) 174 { 175 struct radix_tree_node *node = 176 container_of(head, struct radix_tree_node, rcu_head); 177 178 /* 179 * must only free zeroed nodes into the slab. radix_tree_shrink 180 * can leave us with a non-NULL entry in the first slot, so clear 181 * that here to make sure. 182 */ 183 tag_clear(node, 0, 0); 184 tag_clear(node, 1, 0); 185 node->slots[0] = NULL; 186 node->count = 0; 187 188 kmem_cache_free(radix_tree_node_cachep, node); 189 } 190 191 static inline void 192 radix_tree_node_free(struct radix_tree_node *node) 193 { 194 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 195 } 196 197 /* 198 * Load up this CPU's radix_tree_node buffer with sufficient objects to 199 * ensure that the addition of a single element in the tree cannot fail. On 200 * success, return zero, with preemption disabled. On error, return -ENOMEM 201 * with preemption not disabled. 202 * 203 * To make use of this facility, the radix tree must be initialised without 204 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 205 */ 206 int radix_tree_preload(gfp_t gfp_mask) 207 { 208 struct radix_tree_preload *rtp; 209 struct radix_tree_node *node; 210 int ret = -ENOMEM; 211 212 preempt_disable(); 213 rtp = &__get_cpu_var(radix_tree_preloads); 214 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 215 preempt_enable(); 216 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 217 if (node == NULL) 218 goto out; 219 preempt_disable(); 220 rtp = &__get_cpu_var(radix_tree_preloads); 221 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 222 rtp->nodes[rtp->nr++] = node; 223 else 224 kmem_cache_free(radix_tree_node_cachep, node); 225 } 226 ret = 0; 227 out: 228 return ret; 229 } 230 EXPORT_SYMBOL(radix_tree_preload); 231 232 /* 233 * Return the maximum key which can be store into a 234 * radix tree with height HEIGHT. 235 */ 236 static inline unsigned long radix_tree_maxindex(unsigned int height) 237 { 238 return height_to_maxindex[height]; 239 } 240 241 /* 242 * Extend a radix tree so it can store key @index. 243 */ 244 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 245 { 246 struct radix_tree_node *node; 247 unsigned int height; 248 int tag; 249 250 /* Figure out what the height should be. */ 251 height = root->height + 1; 252 while (index > radix_tree_maxindex(height)) 253 height++; 254 255 if (root->rnode == NULL) { 256 root->height = height; 257 goto out; 258 } 259 260 do { 261 unsigned int newheight; 262 if (!(node = radix_tree_node_alloc(root))) 263 return -ENOMEM; 264 265 /* Increase the height. */ 266 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 267 268 /* Propagate the aggregated tag info into the new root */ 269 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 270 if (root_tag_get(root, tag)) 271 tag_set(node, tag, 0); 272 } 273 274 newheight = root->height+1; 275 node->height = newheight; 276 node->count = 1; 277 node = radix_tree_ptr_to_indirect(node); 278 rcu_assign_pointer(root->rnode, node); 279 root->height = newheight; 280 } while (height > root->height); 281 out: 282 return 0; 283 } 284 285 /** 286 * radix_tree_insert - insert into a radix tree 287 * @root: radix tree root 288 * @index: index key 289 * @item: item to insert 290 * 291 * Insert an item into the radix tree at position @index. 292 */ 293 int radix_tree_insert(struct radix_tree_root *root, 294 unsigned long index, void *item) 295 { 296 struct radix_tree_node *node = NULL, *slot; 297 unsigned int height, shift; 298 int offset; 299 int error; 300 301 BUG_ON(radix_tree_is_indirect_ptr(item)); 302 303 /* Make sure the tree is high enough. */ 304 if (index > radix_tree_maxindex(root->height)) { 305 error = radix_tree_extend(root, index); 306 if (error) 307 return error; 308 } 309 310 slot = radix_tree_indirect_to_ptr(root->rnode); 311 312 height = root->height; 313 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 314 315 offset = 0; /* uninitialised var warning */ 316 while (height > 0) { 317 if (slot == NULL) { 318 /* Have to add a child node. */ 319 if (!(slot = radix_tree_node_alloc(root))) 320 return -ENOMEM; 321 slot->height = height; 322 if (node) { 323 rcu_assign_pointer(node->slots[offset], slot); 324 node->count++; 325 } else 326 rcu_assign_pointer(root->rnode, 327 radix_tree_ptr_to_indirect(slot)); 328 } 329 330 /* Go a level down */ 331 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 332 node = slot; 333 slot = node->slots[offset]; 334 shift -= RADIX_TREE_MAP_SHIFT; 335 height--; 336 } 337 338 if (slot != NULL) 339 return -EEXIST; 340 341 if (node) { 342 node->count++; 343 rcu_assign_pointer(node->slots[offset], item); 344 BUG_ON(tag_get(node, 0, offset)); 345 BUG_ON(tag_get(node, 1, offset)); 346 } else { 347 rcu_assign_pointer(root->rnode, item); 348 BUG_ON(root_tag_get(root, 0)); 349 BUG_ON(root_tag_get(root, 1)); 350 } 351 352 return 0; 353 } 354 EXPORT_SYMBOL(radix_tree_insert); 355 356 /* 357 * is_slot == 1 : search for the slot. 358 * is_slot == 0 : search for the node. 359 */ 360 static void *radix_tree_lookup_element(struct radix_tree_root *root, 361 unsigned long index, int is_slot) 362 { 363 unsigned int height, shift; 364 struct radix_tree_node *node, **slot; 365 366 node = rcu_dereference_raw(root->rnode); 367 if (node == NULL) 368 return NULL; 369 370 if (!radix_tree_is_indirect_ptr(node)) { 371 if (index > 0) 372 return NULL; 373 return is_slot ? (void *)&root->rnode : node; 374 } 375 node = radix_tree_indirect_to_ptr(node); 376 377 height = node->height; 378 if (index > radix_tree_maxindex(height)) 379 return NULL; 380 381 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 382 383 do { 384 slot = (struct radix_tree_node **) 385 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 386 node = rcu_dereference_raw(*slot); 387 if (node == NULL) 388 return NULL; 389 390 shift -= RADIX_TREE_MAP_SHIFT; 391 height--; 392 } while (height > 0); 393 394 return is_slot ? (void *)slot:node; 395 } 396 397 /** 398 * radix_tree_lookup_slot - lookup a slot in a radix tree 399 * @root: radix tree root 400 * @index: index key 401 * 402 * Returns: the slot corresponding to the position @index in the 403 * radix tree @root. This is useful for update-if-exists operations. 404 * 405 * This function can be called under rcu_read_lock iff the slot is not 406 * modified by radix_tree_replace_slot, otherwise it must be called 407 * exclusive from other writers. Any dereference of the slot must be done 408 * using radix_tree_deref_slot. 409 */ 410 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 411 { 412 return (void **)radix_tree_lookup_element(root, index, 1); 413 } 414 EXPORT_SYMBOL(radix_tree_lookup_slot); 415 416 /** 417 * radix_tree_lookup - perform lookup operation on a radix tree 418 * @root: radix tree root 419 * @index: index key 420 * 421 * Lookup the item at the position @index in the radix tree @root. 422 * 423 * This function can be called under rcu_read_lock, however the caller 424 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 425 * them safely). No RCU barriers are required to access or modify the 426 * returned item, however. 427 */ 428 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 429 { 430 return radix_tree_lookup_element(root, index, 0); 431 } 432 EXPORT_SYMBOL(radix_tree_lookup); 433 434 /** 435 * radix_tree_tag_set - set a tag on a radix tree node 436 * @root: radix tree root 437 * @index: index key 438 * @tag: tag index 439 * 440 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 441 * corresponding to @index in the radix tree. From 442 * the root all the way down to the leaf node. 443 * 444 * Returns the address of the tagged item. Setting a tag on a not-present 445 * item is a bug. 446 */ 447 void *radix_tree_tag_set(struct radix_tree_root *root, 448 unsigned long index, unsigned int tag) 449 { 450 unsigned int height, shift; 451 struct radix_tree_node *slot; 452 453 height = root->height; 454 BUG_ON(index > radix_tree_maxindex(height)); 455 456 slot = radix_tree_indirect_to_ptr(root->rnode); 457 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 458 459 while (height > 0) { 460 int offset; 461 462 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 463 if (!tag_get(slot, tag, offset)) 464 tag_set(slot, tag, offset); 465 slot = slot->slots[offset]; 466 BUG_ON(slot == NULL); 467 shift -= RADIX_TREE_MAP_SHIFT; 468 height--; 469 } 470 471 /* set the root's tag bit */ 472 if (slot && !root_tag_get(root, tag)) 473 root_tag_set(root, tag); 474 475 return slot; 476 } 477 EXPORT_SYMBOL(radix_tree_tag_set); 478 479 /** 480 * radix_tree_tag_clear - clear a tag on a radix tree node 481 * @root: radix tree root 482 * @index: index key 483 * @tag: tag index 484 * 485 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 486 * corresponding to @index in the radix tree. If 487 * this causes the leaf node to have no tags set then clear the tag in the 488 * next-to-leaf node, etc. 489 * 490 * Returns the address of the tagged item on success, else NULL. ie: 491 * has the same return value and semantics as radix_tree_lookup(). 492 */ 493 void *radix_tree_tag_clear(struct radix_tree_root *root, 494 unsigned long index, unsigned int tag) 495 { 496 /* 497 * The radix tree path needs to be one longer than the maximum path 498 * since the "list" is null terminated. 499 */ 500 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 501 struct radix_tree_node *slot = NULL; 502 unsigned int height, shift; 503 504 height = root->height; 505 if (index > radix_tree_maxindex(height)) 506 goto out; 507 508 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 509 pathp->node = NULL; 510 slot = radix_tree_indirect_to_ptr(root->rnode); 511 512 while (height > 0) { 513 int offset; 514 515 if (slot == NULL) 516 goto out; 517 518 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 519 pathp[1].offset = offset; 520 pathp[1].node = slot; 521 slot = slot->slots[offset]; 522 pathp++; 523 shift -= RADIX_TREE_MAP_SHIFT; 524 height--; 525 } 526 527 if (slot == NULL) 528 goto out; 529 530 while (pathp->node) { 531 if (!tag_get(pathp->node, tag, pathp->offset)) 532 goto out; 533 tag_clear(pathp->node, tag, pathp->offset); 534 if (any_tag_set(pathp->node, tag)) 535 goto out; 536 pathp--; 537 } 538 539 /* clear the root's tag bit */ 540 if (root_tag_get(root, tag)) 541 root_tag_clear(root, tag); 542 543 out: 544 return slot; 545 } 546 EXPORT_SYMBOL(radix_tree_tag_clear); 547 548 /** 549 * radix_tree_tag_get - get a tag on a radix tree node 550 * @root: radix tree root 551 * @index: index key 552 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 553 * 554 * Return values: 555 * 556 * 0: tag not present or not set 557 * 1: tag set 558 * 559 * Note that the return value of this function may not be relied on, even if 560 * the RCU lock is held, unless tag modification and node deletion are excluded 561 * from concurrency. 562 */ 563 int radix_tree_tag_get(struct radix_tree_root *root, 564 unsigned long index, unsigned int tag) 565 { 566 unsigned int height, shift; 567 struct radix_tree_node *node; 568 int saw_unset_tag = 0; 569 570 /* check the root's tag bit */ 571 if (!root_tag_get(root, tag)) 572 return 0; 573 574 node = rcu_dereference_raw(root->rnode); 575 if (node == NULL) 576 return 0; 577 578 if (!radix_tree_is_indirect_ptr(node)) 579 return (index == 0); 580 node = radix_tree_indirect_to_ptr(node); 581 582 height = node->height; 583 if (index > radix_tree_maxindex(height)) 584 return 0; 585 586 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 587 588 for ( ; ; ) { 589 int offset; 590 591 if (node == NULL) 592 return 0; 593 594 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 595 596 /* 597 * This is just a debug check. Later, we can bale as soon as 598 * we see an unset tag. 599 */ 600 if (!tag_get(node, tag, offset)) 601 saw_unset_tag = 1; 602 if (height == 1) 603 return !!tag_get(node, tag, offset); 604 node = rcu_dereference_raw(node->slots[offset]); 605 shift -= RADIX_TREE_MAP_SHIFT; 606 height--; 607 } 608 } 609 EXPORT_SYMBOL(radix_tree_tag_get); 610 611 /** 612 * radix_tree_range_tag_if_tagged - for each item in given range set given 613 * tag if item has another tag set 614 * @root: radix tree root 615 * @first_indexp: pointer to a starting index of a range to scan 616 * @last_index: last index of a range to scan 617 * @nr_to_tag: maximum number items to tag 618 * @iftag: tag index to test 619 * @settag: tag index to set if tested tag is set 620 * 621 * This function scans range of radix tree from first_index to last_index 622 * (inclusive). For each item in the range if iftag is set, the function sets 623 * also settag. The function stops either after tagging nr_to_tag items or 624 * after reaching last_index. 625 * 626 * The function returns number of leaves where the tag was set and sets 627 * *first_indexp to the first unscanned index. 628 */ 629 unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, 630 unsigned long *first_indexp, unsigned long last_index, 631 unsigned long nr_to_tag, 632 unsigned int iftag, unsigned int settag) 633 { 634 unsigned int height = root->height, shift; 635 unsigned long tagged = 0, index = *first_indexp; 636 struct radix_tree_node *open_slots[height], *slot; 637 638 last_index = min(last_index, radix_tree_maxindex(height)); 639 if (index > last_index) 640 return 0; 641 if (!nr_to_tag) 642 return 0; 643 if (!root_tag_get(root, iftag)) { 644 *first_indexp = last_index + 1; 645 return 0; 646 } 647 if (height == 0) { 648 *first_indexp = last_index + 1; 649 root_tag_set(root, settag); 650 return 1; 651 } 652 653 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 654 slot = radix_tree_indirect_to_ptr(root->rnode); 655 656 for (;;) { 657 int offset; 658 659 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 660 if (!slot->slots[offset]) 661 goto next; 662 if (!tag_get(slot, iftag, offset)) 663 goto next; 664 tag_set(slot, settag, offset); 665 if (height == 1) { 666 tagged++; 667 goto next; 668 } 669 /* Go down one level */ 670 height--; 671 shift -= RADIX_TREE_MAP_SHIFT; 672 open_slots[height] = slot; 673 slot = slot->slots[offset]; 674 continue; 675 next: 676 /* Go to next item at level determined by 'shift' */ 677 index = ((index >> shift) + 1) << shift; 678 if (index > last_index) 679 break; 680 if (tagged >= nr_to_tag) 681 break; 682 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { 683 /* 684 * We've fully scanned this node. Go up. Because 685 * last_index is guaranteed to be in the tree, what 686 * we do below cannot wander astray. 687 */ 688 slot = open_slots[height]; 689 height++; 690 shift += RADIX_TREE_MAP_SHIFT; 691 } 692 } 693 /* 694 * The iftag must have been set somewhere because otherwise 695 * we would return immediated at the beginning of the function 696 */ 697 root_tag_set(root, settag); 698 *first_indexp = index; 699 700 return tagged; 701 } 702 EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); 703 704 705 /** 706 * radix_tree_next_hole - find the next hole (not-present entry) 707 * @root: tree root 708 * @index: index key 709 * @max_scan: maximum range to search 710 * 711 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 712 * indexed hole. 713 * 714 * Returns: the index of the hole if found, otherwise returns an index 715 * outside of the set specified (in which case 'return - index >= max_scan' 716 * will be true). In rare cases of index wrap-around, 0 will be returned. 717 * 718 * radix_tree_next_hole may be called under rcu_read_lock. However, like 719 * radix_tree_gang_lookup, this will not atomically search a snapshot of 720 * the tree at a single point in time. For example, if a hole is created 721 * at index 5, then subsequently a hole is created at index 10, 722 * radix_tree_next_hole covering both indexes may return 10 if called 723 * under rcu_read_lock. 724 */ 725 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 726 unsigned long index, unsigned long max_scan) 727 { 728 unsigned long i; 729 730 for (i = 0; i < max_scan; i++) { 731 if (!radix_tree_lookup(root, index)) 732 break; 733 index++; 734 if (index == 0) 735 break; 736 } 737 738 return index; 739 } 740 EXPORT_SYMBOL(radix_tree_next_hole); 741 742 /** 743 * radix_tree_prev_hole - find the prev hole (not-present entry) 744 * @root: tree root 745 * @index: index key 746 * @max_scan: maximum range to search 747 * 748 * Search backwards in the range [max(index-max_scan+1, 0), index] 749 * for the first hole. 750 * 751 * Returns: the index of the hole if found, otherwise returns an index 752 * outside of the set specified (in which case 'index - return >= max_scan' 753 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. 754 * 755 * radix_tree_next_hole may be called under rcu_read_lock. However, like 756 * radix_tree_gang_lookup, this will not atomically search a snapshot of 757 * the tree at a single point in time. For example, if a hole is created 758 * at index 10, then subsequently a hole is created at index 5, 759 * radix_tree_prev_hole covering both indexes may return 5 if called under 760 * rcu_read_lock. 761 */ 762 unsigned long radix_tree_prev_hole(struct radix_tree_root *root, 763 unsigned long index, unsigned long max_scan) 764 { 765 unsigned long i; 766 767 for (i = 0; i < max_scan; i++) { 768 if (!radix_tree_lookup(root, index)) 769 break; 770 index--; 771 if (index == ULONG_MAX) 772 break; 773 } 774 775 return index; 776 } 777 EXPORT_SYMBOL(radix_tree_prev_hole); 778 779 static unsigned int 780 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 781 unsigned int max_items, unsigned long *next_index) 782 { 783 unsigned int nr_found = 0; 784 unsigned int shift, height; 785 unsigned long i; 786 787 height = slot->height; 788 if (height == 0) 789 goto out; 790 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 791 792 for ( ; height > 1; height--) { 793 i = (index >> shift) & RADIX_TREE_MAP_MASK; 794 for (;;) { 795 if (slot->slots[i] != NULL) 796 break; 797 index &= ~((1UL << shift) - 1); 798 index += 1UL << shift; 799 if (index == 0) 800 goto out; /* 32-bit wraparound */ 801 i++; 802 if (i == RADIX_TREE_MAP_SIZE) 803 goto out; 804 } 805 806 shift -= RADIX_TREE_MAP_SHIFT; 807 slot = rcu_dereference_raw(slot->slots[i]); 808 if (slot == NULL) 809 goto out; 810 } 811 812 /* Bottom level: grab some items */ 813 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 814 index++; 815 if (slot->slots[i]) { 816 results[nr_found++] = &(slot->slots[i]); 817 if (nr_found == max_items) 818 goto out; 819 } 820 } 821 out: 822 *next_index = index; 823 return nr_found; 824 } 825 826 /** 827 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 828 * @root: radix tree root 829 * @results: where the results of the lookup are placed 830 * @first_index: start the lookup from this key 831 * @max_items: place up to this many items at *results 832 * 833 * Performs an index-ascending scan of the tree for present items. Places 834 * them at *@results and returns the number of items which were placed at 835 * *@results. 836 * 837 * The implementation is naive. 838 * 839 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 840 * rcu_read_lock. In this case, rather than the returned results being 841 * an atomic snapshot of the tree at a single point in time, the semantics 842 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 843 * have been issued in individual locks, and results stored in 'results'. 844 */ 845 unsigned int 846 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 847 unsigned long first_index, unsigned int max_items) 848 { 849 unsigned long max_index; 850 struct radix_tree_node *node; 851 unsigned long cur_index = first_index; 852 unsigned int ret; 853 854 node = rcu_dereference_raw(root->rnode); 855 if (!node) 856 return 0; 857 858 if (!radix_tree_is_indirect_ptr(node)) { 859 if (first_index > 0) 860 return 0; 861 results[0] = node; 862 return 1; 863 } 864 node = radix_tree_indirect_to_ptr(node); 865 866 max_index = radix_tree_maxindex(node->height); 867 868 ret = 0; 869 while (ret < max_items) { 870 unsigned int nr_found, slots_found, i; 871 unsigned long next_index; /* Index of next search */ 872 873 if (cur_index > max_index) 874 break; 875 slots_found = __lookup(node, (void ***)results + ret, cur_index, 876 max_items - ret, &next_index); 877 nr_found = 0; 878 for (i = 0; i < slots_found; i++) { 879 struct radix_tree_node *slot; 880 slot = *(((void ***)results)[ret + i]); 881 if (!slot) 882 continue; 883 results[ret + nr_found] = rcu_dereference_raw(slot); 884 nr_found++; 885 } 886 ret += nr_found; 887 if (next_index == 0) 888 break; 889 cur_index = next_index; 890 } 891 892 return ret; 893 } 894 EXPORT_SYMBOL(radix_tree_gang_lookup); 895 896 /** 897 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 898 * @root: radix tree root 899 * @results: where the results of the lookup are placed 900 * @first_index: start the lookup from this key 901 * @max_items: place up to this many items at *results 902 * 903 * Performs an index-ascending scan of the tree for present items. Places 904 * their slots at *@results and returns the number of items which were 905 * placed at *@results. 906 * 907 * The implementation is naive. 908 * 909 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 910 * be dereferenced with radix_tree_deref_slot, and if using only RCU 911 * protection, radix_tree_deref_slot may fail requiring a retry. 912 */ 913 unsigned int 914 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 915 unsigned long first_index, unsigned int max_items) 916 { 917 unsigned long max_index; 918 struct radix_tree_node *node; 919 unsigned long cur_index = first_index; 920 unsigned int ret; 921 922 node = rcu_dereference_raw(root->rnode); 923 if (!node) 924 return 0; 925 926 if (!radix_tree_is_indirect_ptr(node)) { 927 if (first_index > 0) 928 return 0; 929 results[0] = (void **)&root->rnode; 930 return 1; 931 } 932 node = radix_tree_indirect_to_ptr(node); 933 934 max_index = radix_tree_maxindex(node->height); 935 936 ret = 0; 937 while (ret < max_items) { 938 unsigned int slots_found; 939 unsigned long next_index; /* Index of next search */ 940 941 if (cur_index > max_index) 942 break; 943 slots_found = __lookup(node, results + ret, cur_index, 944 max_items - ret, &next_index); 945 ret += slots_found; 946 if (next_index == 0) 947 break; 948 cur_index = next_index; 949 } 950 951 return ret; 952 } 953 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 954 955 /* 956 * FIXME: the two tag_get()s here should use find_next_bit() instead of 957 * open-coding the search. 958 */ 959 static unsigned int 960 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 961 unsigned int max_items, unsigned long *next_index, unsigned int tag) 962 { 963 unsigned int nr_found = 0; 964 unsigned int shift, height; 965 966 height = slot->height; 967 if (height == 0) 968 goto out; 969 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 970 971 while (height > 0) { 972 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 973 974 for (;;) { 975 if (tag_get(slot, tag, i)) 976 break; 977 index &= ~((1UL << shift) - 1); 978 index += 1UL << shift; 979 if (index == 0) 980 goto out; /* 32-bit wraparound */ 981 i++; 982 if (i == RADIX_TREE_MAP_SIZE) 983 goto out; 984 } 985 height--; 986 if (height == 0) { /* Bottom level: grab some items */ 987 unsigned long j = index & RADIX_TREE_MAP_MASK; 988 989 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 990 index++; 991 if (!tag_get(slot, tag, j)) 992 continue; 993 /* 994 * Even though the tag was found set, we need to 995 * recheck that we have a non-NULL node, because 996 * if this lookup is lockless, it may have been 997 * subsequently deleted. 998 * 999 * Similar care must be taken in any place that 1000 * lookup ->slots[x] without a lock (ie. can't 1001 * rely on its value remaining the same). 1002 */ 1003 if (slot->slots[j]) { 1004 results[nr_found++] = &(slot->slots[j]); 1005 if (nr_found == max_items) 1006 goto out; 1007 } 1008 } 1009 } 1010 shift -= RADIX_TREE_MAP_SHIFT; 1011 slot = rcu_dereference_raw(slot->slots[i]); 1012 if (slot == NULL) 1013 break; 1014 } 1015 out: 1016 *next_index = index; 1017 return nr_found; 1018 } 1019 1020 /** 1021 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1022 * based on a tag 1023 * @root: radix tree root 1024 * @results: where the results of the lookup are placed 1025 * @first_index: start the lookup from this key 1026 * @max_items: place up to this many items at *results 1027 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1028 * 1029 * Performs an index-ascending scan of the tree for present items which 1030 * have the tag indexed by @tag set. Places the items at *@results and 1031 * returns the number of items which were placed at *@results. 1032 */ 1033 unsigned int 1034 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 1035 unsigned long first_index, unsigned int max_items, 1036 unsigned int tag) 1037 { 1038 struct radix_tree_node *node; 1039 unsigned long max_index; 1040 unsigned long cur_index = first_index; 1041 unsigned int ret; 1042 1043 /* check the root's tag bit */ 1044 if (!root_tag_get(root, tag)) 1045 return 0; 1046 1047 node = rcu_dereference_raw(root->rnode); 1048 if (!node) 1049 return 0; 1050 1051 if (!radix_tree_is_indirect_ptr(node)) { 1052 if (first_index > 0) 1053 return 0; 1054 results[0] = node; 1055 return 1; 1056 } 1057 node = radix_tree_indirect_to_ptr(node); 1058 1059 max_index = radix_tree_maxindex(node->height); 1060 1061 ret = 0; 1062 while (ret < max_items) { 1063 unsigned int nr_found, slots_found, i; 1064 unsigned long next_index; /* Index of next search */ 1065 1066 if (cur_index > max_index) 1067 break; 1068 slots_found = __lookup_tag(node, (void ***)results + ret, 1069 cur_index, max_items - ret, &next_index, tag); 1070 nr_found = 0; 1071 for (i = 0; i < slots_found; i++) { 1072 struct radix_tree_node *slot; 1073 slot = *(((void ***)results)[ret + i]); 1074 if (!slot) 1075 continue; 1076 results[ret + nr_found] = rcu_dereference_raw(slot); 1077 nr_found++; 1078 } 1079 ret += nr_found; 1080 if (next_index == 0) 1081 break; 1082 cur_index = next_index; 1083 } 1084 1085 return ret; 1086 } 1087 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1088 1089 /** 1090 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1091 * radix tree based on a tag 1092 * @root: radix tree root 1093 * @results: where the results of the lookup are placed 1094 * @first_index: start the lookup from this key 1095 * @max_items: place up to this many items at *results 1096 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1097 * 1098 * Performs an index-ascending scan of the tree for present items which 1099 * have the tag indexed by @tag set. Places the slots at *@results and 1100 * returns the number of slots which were placed at *@results. 1101 */ 1102 unsigned int 1103 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 1104 unsigned long first_index, unsigned int max_items, 1105 unsigned int tag) 1106 { 1107 struct radix_tree_node *node; 1108 unsigned long max_index; 1109 unsigned long cur_index = first_index; 1110 unsigned int ret; 1111 1112 /* check the root's tag bit */ 1113 if (!root_tag_get(root, tag)) 1114 return 0; 1115 1116 node = rcu_dereference_raw(root->rnode); 1117 if (!node) 1118 return 0; 1119 1120 if (!radix_tree_is_indirect_ptr(node)) { 1121 if (first_index > 0) 1122 return 0; 1123 results[0] = (void **)&root->rnode; 1124 return 1; 1125 } 1126 node = radix_tree_indirect_to_ptr(node); 1127 1128 max_index = radix_tree_maxindex(node->height); 1129 1130 ret = 0; 1131 while (ret < max_items) { 1132 unsigned int slots_found; 1133 unsigned long next_index; /* Index of next search */ 1134 1135 if (cur_index > max_index) 1136 break; 1137 slots_found = __lookup_tag(node, results + ret, 1138 cur_index, max_items - ret, &next_index, tag); 1139 ret += slots_found; 1140 if (next_index == 0) 1141 break; 1142 cur_index = next_index; 1143 } 1144 1145 return ret; 1146 } 1147 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1148 1149 1150 /** 1151 * radix_tree_shrink - shrink height of a radix tree to minimal 1152 * @root radix tree root 1153 */ 1154 static inline void radix_tree_shrink(struct radix_tree_root *root) 1155 { 1156 /* try to shrink tree height */ 1157 while (root->height > 0) { 1158 struct radix_tree_node *to_free = root->rnode; 1159 void *newptr; 1160 1161 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1162 to_free = radix_tree_indirect_to_ptr(to_free); 1163 1164 /* 1165 * The candidate node has more than one child, or its child 1166 * is not at the leftmost slot, we cannot shrink. 1167 */ 1168 if (to_free->count != 1) 1169 break; 1170 if (!to_free->slots[0]) 1171 break; 1172 1173 /* 1174 * We don't need rcu_assign_pointer(), since we are simply 1175 * moving the node from one part of the tree to another. If 1176 * it was safe to dereference the old pointer to it 1177 * (to_free->slots[0]), it will be safe to dereference the new 1178 * one (root->rnode). 1179 */ 1180 newptr = to_free->slots[0]; 1181 if (root->height > 1) 1182 newptr = radix_tree_ptr_to_indirect(newptr); 1183 root->rnode = newptr; 1184 root->height--; 1185 radix_tree_node_free(to_free); 1186 } 1187 } 1188 1189 /** 1190 * radix_tree_delete - delete an item from a radix tree 1191 * @root: radix tree root 1192 * @index: index key 1193 * 1194 * Remove the item at @index from the radix tree rooted at @root. 1195 * 1196 * Returns the address of the deleted item, or NULL if it was not present. 1197 */ 1198 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1199 { 1200 /* 1201 * The radix tree path needs to be one longer than the maximum path 1202 * since the "list" is null terminated. 1203 */ 1204 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1205 struct radix_tree_node *slot = NULL; 1206 struct radix_tree_node *to_free; 1207 unsigned int height, shift; 1208 int tag; 1209 int offset; 1210 1211 height = root->height; 1212 if (index > radix_tree_maxindex(height)) 1213 goto out; 1214 1215 slot = root->rnode; 1216 if (height == 0) { 1217 root_tag_clear_all(root); 1218 root->rnode = NULL; 1219 goto out; 1220 } 1221 slot = radix_tree_indirect_to_ptr(slot); 1222 1223 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1224 pathp->node = NULL; 1225 1226 do { 1227 if (slot == NULL) 1228 goto out; 1229 1230 pathp++; 1231 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1232 pathp->offset = offset; 1233 pathp->node = slot; 1234 slot = slot->slots[offset]; 1235 shift -= RADIX_TREE_MAP_SHIFT; 1236 height--; 1237 } while (height > 0); 1238 1239 if (slot == NULL) 1240 goto out; 1241 1242 /* 1243 * Clear all tags associated with the just-deleted item 1244 */ 1245 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1246 if (tag_get(pathp->node, tag, pathp->offset)) 1247 radix_tree_tag_clear(root, index, tag); 1248 } 1249 1250 to_free = NULL; 1251 /* Now free the nodes we do not need anymore */ 1252 while (pathp->node) { 1253 pathp->node->slots[pathp->offset] = NULL; 1254 pathp->node->count--; 1255 /* 1256 * Queue the node for deferred freeing after the 1257 * last reference to it disappears (set NULL, above). 1258 */ 1259 if (to_free) 1260 radix_tree_node_free(to_free); 1261 1262 if (pathp->node->count) { 1263 if (pathp->node == 1264 radix_tree_indirect_to_ptr(root->rnode)) 1265 radix_tree_shrink(root); 1266 goto out; 1267 } 1268 1269 /* Node with zero slots in use so free it */ 1270 to_free = pathp->node; 1271 pathp--; 1272 1273 } 1274 root_tag_clear_all(root); 1275 root->height = 0; 1276 root->rnode = NULL; 1277 if (to_free) 1278 radix_tree_node_free(to_free); 1279 1280 out: 1281 return slot; 1282 } 1283 EXPORT_SYMBOL(radix_tree_delete); 1284 1285 /** 1286 * radix_tree_tagged - test whether any items in the tree are tagged 1287 * @root: radix tree root 1288 * @tag: tag to test 1289 */ 1290 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1291 { 1292 return root_tag_get(root, tag); 1293 } 1294 EXPORT_SYMBOL(radix_tree_tagged); 1295 1296 static void 1297 radix_tree_node_ctor(void *node) 1298 { 1299 memset(node, 0, sizeof(struct radix_tree_node)); 1300 } 1301 1302 static __init unsigned long __maxindex(unsigned int height) 1303 { 1304 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1305 int shift = RADIX_TREE_INDEX_BITS - width; 1306 1307 if (shift < 0) 1308 return ~0UL; 1309 if (shift >= BITS_PER_LONG) 1310 return 0UL; 1311 return ~0UL >> shift; 1312 } 1313 1314 static __init void radix_tree_init_maxindex(void) 1315 { 1316 unsigned int i; 1317 1318 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1319 height_to_maxindex[i] = __maxindex(i); 1320 } 1321 1322 static int radix_tree_callback(struct notifier_block *nfb, 1323 unsigned long action, 1324 void *hcpu) 1325 { 1326 int cpu = (long)hcpu; 1327 struct radix_tree_preload *rtp; 1328 1329 /* Free per-cpu pool of perloaded nodes */ 1330 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1331 rtp = &per_cpu(radix_tree_preloads, cpu); 1332 while (rtp->nr) { 1333 kmem_cache_free(radix_tree_node_cachep, 1334 rtp->nodes[rtp->nr-1]); 1335 rtp->nodes[rtp->nr-1] = NULL; 1336 rtp->nr--; 1337 } 1338 } 1339 return NOTIFY_OK; 1340 } 1341 1342 void __init radix_tree_init(void) 1343 { 1344 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1345 sizeof(struct radix_tree_node), 0, 1346 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1347 radix_tree_node_ctor); 1348 radix_tree_init_maxindex(); 1349 hotcpu_notifier(radix_tree_callback, 0); 1350 } 1351