1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/string.h> 32 #include <linux/bitops.h> 33 #include <linux/rcupdate.h> 34 35 36 #ifdef __KERNEL__ 37 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 38 #else 39 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 40 #endif 41 42 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 43 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 44 45 #define RADIX_TREE_TAG_LONGS \ 46 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 47 48 struct radix_tree_node { 49 unsigned int height; /* Height from the bottom */ 50 unsigned int count; 51 struct rcu_head rcu_head; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 54 }; 55 56 struct radix_tree_path { 57 struct radix_tree_node *node; 58 int offset; 59 }; 60 61 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 62 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 63 RADIX_TREE_MAP_SHIFT)) 64 65 /* 66 * The height_to_maxindex array needs to be one deeper than the maximum 67 * path as height 0 holds only 1 entry. 68 */ 69 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; 70 71 /* 72 * Radix tree node cache. 73 */ 74 static struct kmem_cache *radix_tree_node_cachep; 75 76 /* 77 * Per-cpu pool of preloaded nodes 78 */ 79 struct radix_tree_preload { 80 int nr; 81 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 82 }; 83 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 84 85 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 86 { 87 return root->gfp_mask & __GFP_BITS_MASK; 88 } 89 90 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 91 int offset) 92 { 93 __set_bit(offset, node->tags[tag]); 94 } 95 96 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 97 int offset) 98 { 99 __clear_bit(offset, node->tags[tag]); 100 } 101 102 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 103 int offset) 104 { 105 return test_bit(offset, node->tags[tag]); 106 } 107 108 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 109 { 110 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 111 } 112 113 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 114 { 115 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 116 } 117 118 static inline void root_tag_clear_all(struct radix_tree_root *root) 119 { 120 root->gfp_mask &= __GFP_BITS_MASK; 121 } 122 123 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 124 { 125 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 126 } 127 128 /* 129 * Returns 1 if any slot in the node has this tag set. 130 * Otherwise returns 0. 131 */ 132 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 133 { 134 int idx; 135 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 136 if (node->tags[tag][idx]) 137 return 1; 138 } 139 return 0; 140 } 141 /* 142 * This assumes that the caller has performed appropriate preallocation, and 143 * that the caller has pinned this thread of control to the current CPU. 144 */ 145 static struct radix_tree_node * 146 radix_tree_node_alloc(struct radix_tree_root *root) 147 { 148 struct radix_tree_node *ret = NULL; 149 gfp_t gfp_mask = root_gfp_mask(root); 150 151 if (!(gfp_mask & __GFP_WAIT)) { 152 struct radix_tree_preload *rtp; 153 154 /* 155 * Provided the caller has preloaded here, we will always 156 * succeed in getting a node here (and never reach 157 * kmem_cache_alloc) 158 */ 159 rtp = &__get_cpu_var(radix_tree_preloads); 160 if (rtp->nr) { 161 ret = rtp->nodes[rtp->nr - 1]; 162 rtp->nodes[rtp->nr - 1] = NULL; 163 rtp->nr--; 164 } 165 } 166 if (ret == NULL) 167 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 168 169 BUG_ON(radix_tree_is_indirect_ptr(ret)); 170 return ret; 171 } 172 173 static void radix_tree_node_rcu_free(struct rcu_head *head) 174 { 175 struct radix_tree_node *node = 176 container_of(head, struct radix_tree_node, rcu_head); 177 int i; 178 179 /* 180 * must only free zeroed nodes into the slab. radix_tree_shrink 181 * can leave us with a non-NULL entry in the first slot, so clear 182 * that here to make sure. 183 */ 184 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) 185 tag_clear(node, i, 0); 186 187 node->slots[0] = NULL; 188 node->count = 0; 189 190 kmem_cache_free(radix_tree_node_cachep, node); 191 } 192 193 static inline void 194 radix_tree_node_free(struct radix_tree_node *node) 195 { 196 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 197 } 198 199 /* 200 * Load up this CPU's radix_tree_node buffer with sufficient objects to 201 * ensure that the addition of a single element in the tree cannot fail. On 202 * success, return zero, with preemption disabled. On error, return -ENOMEM 203 * with preemption not disabled. 204 * 205 * To make use of this facility, the radix tree must be initialised without 206 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 207 */ 208 int radix_tree_preload(gfp_t gfp_mask) 209 { 210 struct radix_tree_preload *rtp; 211 struct radix_tree_node *node; 212 int ret = -ENOMEM; 213 214 preempt_disable(); 215 rtp = &__get_cpu_var(radix_tree_preloads); 216 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 217 preempt_enable(); 218 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 219 if (node == NULL) 220 goto out; 221 preempt_disable(); 222 rtp = &__get_cpu_var(radix_tree_preloads); 223 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 224 rtp->nodes[rtp->nr++] = node; 225 else 226 kmem_cache_free(radix_tree_node_cachep, node); 227 } 228 ret = 0; 229 out: 230 return ret; 231 } 232 EXPORT_SYMBOL(radix_tree_preload); 233 234 /* 235 * Return the maximum key which can be store into a 236 * radix tree with height HEIGHT. 237 */ 238 static inline unsigned long radix_tree_maxindex(unsigned int height) 239 { 240 return height_to_maxindex[height]; 241 } 242 243 /* 244 * Extend a radix tree so it can store key @index. 245 */ 246 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 247 { 248 struct radix_tree_node *node; 249 unsigned int height; 250 int tag; 251 252 /* Figure out what the height should be. */ 253 height = root->height + 1; 254 while (index > radix_tree_maxindex(height)) 255 height++; 256 257 if (root->rnode == NULL) { 258 root->height = height; 259 goto out; 260 } 261 262 do { 263 unsigned int newheight; 264 if (!(node = radix_tree_node_alloc(root))) 265 return -ENOMEM; 266 267 /* Increase the height. */ 268 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 269 270 /* Propagate the aggregated tag info into the new root */ 271 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 272 if (root_tag_get(root, tag)) 273 tag_set(node, tag, 0); 274 } 275 276 newheight = root->height+1; 277 node->height = newheight; 278 node->count = 1; 279 node = radix_tree_ptr_to_indirect(node); 280 rcu_assign_pointer(root->rnode, node); 281 root->height = newheight; 282 } while (height > root->height); 283 out: 284 return 0; 285 } 286 287 /** 288 * radix_tree_insert - insert into a radix tree 289 * @root: radix tree root 290 * @index: index key 291 * @item: item to insert 292 * 293 * Insert an item into the radix tree at position @index. 294 */ 295 int radix_tree_insert(struct radix_tree_root *root, 296 unsigned long index, void *item) 297 { 298 struct radix_tree_node *node = NULL, *slot; 299 unsigned int height, shift; 300 int offset; 301 int error; 302 303 BUG_ON(radix_tree_is_indirect_ptr(item)); 304 305 /* Make sure the tree is high enough. */ 306 if (index > radix_tree_maxindex(root->height)) { 307 error = radix_tree_extend(root, index); 308 if (error) 309 return error; 310 } 311 312 slot = radix_tree_indirect_to_ptr(root->rnode); 313 314 height = root->height; 315 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 316 317 offset = 0; /* uninitialised var warning */ 318 while (height > 0) { 319 if (slot == NULL) { 320 /* Have to add a child node. */ 321 if (!(slot = radix_tree_node_alloc(root))) 322 return -ENOMEM; 323 slot->height = height; 324 if (node) { 325 rcu_assign_pointer(node->slots[offset], slot); 326 node->count++; 327 } else 328 rcu_assign_pointer(root->rnode, 329 radix_tree_ptr_to_indirect(slot)); 330 } 331 332 /* Go a level down */ 333 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 334 node = slot; 335 slot = node->slots[offset]; 336 shift -= RADIX_TREE_MAP_SHIFT; 337 height--; 338 } 339 340 if (slot != NULL) 341 return -EEXIST; 342 343 if (node) { 344 node->count++; 345 rcu_assign_pointer(node->slots[offset], item); 346 BUG_ON(tag_get(node, 0, offset)); 347 BUG_ON(tag_get(node, 1, offset)); 348 } else { 349 rcu_assign_pointer(root->rnode, item); 350 BUG_ON(root_tag_get(root, 0)); 351 BUG_ON(root_tag_get(root, 1)); 352 } 353 354 return 0; 355 } 356 EXPORT_SYMBOL(radix_tree_insert); 357 358 /* 359 * is_slot == 1 : search for the slot. 360 * is_slot == 0 : search for the node. 361 */ 362 static void *radix_tree_lookup_element(struct radix_tree_root *root, 363 unsigned long index, int is_slot) 364 { 365 unsigned int height, shift; 366 struct radix_tree_node *node, **slot; 367 368 node = rcu_dereference_raw(root->rnode); 369 if (node == NULL) 370 return NULL; 371 372 if (!radix_tree_is_indirect_ptr(node)) { 373 if (index > 0) 374 return NULL; 375 return is_slot ? (void *)&root->rnode : node; 376 } 377 node = radix_tree_indirect_to_ptr(node); 378 379 height = node->height; 380 if (index > radix_tree_maxindex(height)) 381 return NULL; 382 383 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 384 385 do { 386 slot = (struct radix_tree_node **) 387 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 388 node = rcu_dereference_raw(*slot); 389 if (node == NULL) 390 return NULL; 391 392 shift -= RADIX_TREE_MAP_SHIFT; 393 height--; 394 } while (height > 0); 395 396 return is_slot ? (void *)slot:node; 397 } 398 399 /** 400 * radix_tree_lookup_slot - lookup a slot in a radix tree 401 * @root: radix tree root 402 * @index: index key 403 * 404 * Returns: the slot corresponding to the position @index in the 405 * radix tree @root. This is useful for update-if-exists operations. 406 * 407 * This function can be called under rcu_read_lock iff the slot is not 408 * modified by radix_tree_replace_slot, otherwise it must be called 409 * exclusive from other writers. Any dereference of the slot must be done 410 * using radix_tree_deref_slot. 411 */ 412 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 413 { 414 return (void **)radix_tree_lookup_element(root, index, 1); 415 } 416 EXPORT_SYMBOL(radix_tree_lookup_slot); 417 418 /** 419 * radix_tree_lookup - perform lookup operation on a radix tree 420 * @root: radix tree root 421 * @index: index key 422 * 423 * Lookup the item at the position @index in the radix tree @root. 424 * 425 * This function can be called under rcu_read_lock, however the caller 426 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 427 * them safely). No RCU barriers are required to access or modify the 428 * returned item, however. 429 */ 430 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 431 { 432 return radix_tree_lookup_element(root, index, 0); 433 } 434 EXPORT_SYMBOL(radix_tree_lookup); 435 436 /** 437 * radix_tree_tag_set - set a tag on a radix tree node 438 * @root: radix tree root 439 * @index: index key 440 * @tag: tag index 441 * 442 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 443 * corresponding to @index in the radix tree. From 444 * the root all the way down to the leaf node. 445 * 446 * Returns the address of the tagged item. Setting a tag on a not-present 447 * item is a bug. 448 */ 449 void *radix_tree_tag_set(struct radix_tree_root *root, 450 unsigned long index, unsigned int tag) 451 { 452 unsigned int height, shift; 453 struct radix_tree_node *slot; 454 455 height = root->height; 456 BUG_ON(index > radix_tree_maxindex(height)); 457 458 slot = radix_tree_indirect_to_ptr(root->rnode); 459 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 460 461 while (height > 0) { 462 int offset; 463 464 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 465 if (!tag_get(slot, tag, offset)) 466 tag_set(slot, tag, offset); 467 slot = slot->slots[offset]; 468 BUG_ON(slot == NULL); 469 shift -= RADIX_TREE_MAP_SHIFT; 470 height--; 471 } 472 473 /* set the root's tag bit */ 474 if (slot && !root_tag_get(root, tag)) 475 root_tag_set(root, tag); 476 477 return slot; 478 } 479 EXPORT_SYMBOL(radix_tree_tag_set); 480 481 /** 482 * radix_tree_tag_clear - clear a tag on a radix tree node 483 * @root: radix tree root 484 * @index: index key 485 * @tag: tag index 486 * 487 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 488 * corresponding to @index in the radix tree. If 489 * this causes the leaf node to have no tags set then clear the tag in the 490 * next-to-leaf node, etc. 491 * 492 * Returns the address of the tagged item on success, else NULL. ie: 493 * has the same return value and semantics as radix_tree_lookup(). 494 */ 495 void *radix_tree_tag_clear(struct radix_tree_root *root, 496 unsigned long index, unsigned int tag) 497 { 498 /* 499 * The radix tree path needs to be one longer than the maximum path 500 * since the "list" is null terminated. 501 */ 502 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 503 struct radix_tree_node *slot = NULL; 504 unsigned int height, shift; 505 506 height = root->height; 507 if (index > radix_tree_maxindex(height)) 508 goto out; 509 510 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 511 pathp->node = NULL; 512 slot = radix_tree_indirect_to_ptr(root->rnode); 513 514 while (height > 0) { 515 int offset; 516 517 if (slot == NULL) 518 goto out; 519 520 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 521 pathp[1].offset = offset; 522 pathp[1].node = slot; 523 slot = slot->slots[offset]; 524 pathp++; 525 shift -= RADIX_TREE_MAP_SHIFT; 526 height--; 527 } 528 529 if (slot == NULL) 530 goto out; 531 532 while (pathp->node) { 533 if (!tag_get(pathp->node, tag, pathp->offset)) 534 goto out; 535 tag_clear(pathp->node, tag, pathp->offset); 536 if (any_tag_set(pathp->node, tag)) 537 goto out; 538 pathp--; 539 } 540 541 /* clear the root's tag bit */ 542 if (root_tag_get(root, tag)) 543 root_tag_clear(root, tag); 544 545 out: 546 return slot; 547 } 548 EXPORT_SYMBOL(radix_tree_tag_clear); 549 550 /** 551 * radix_tree_tag_get - get a tag on a radix tree node 552 * @root: radix tree root 553 * @index: index key 554 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 555 * 556 * Return values: 557 * 558 * 0: tag not present or not set 559 * 1: tag set 560 * 561 * Note that the return value of this function may not be relied on, even if 562 * the RCU lock is held, unless tag modification and node deletion are excluded 563 * from concurrency. 564 */ 565 int radix_tree_tag_get(struct radix_tree_root *root, 566 unsigned long index, unsigned int tag) 567 { 568 unsigned int height, shift; 569 struct radix_tree_node *node; 570 int saw_unset_tag = 0; 571 572 /* check the root's tag bit */ 573 if (!root_tag_get(root, tag)) 574 return 0; 575 576 node = rcu_dereference_raw(root->rnode); 577 if (node == NULL) 578 return 0; 579 580 if (!radix_tree_is_indirect_ptr(node)) 581 return (index == 0); 582 node = radix_tree_indirect_to_ptr(node); 583 584 height = node->height; 585 if (index > radix_tree_maxindex(height)) 586 return 0; 587 588 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 589 590 for ( ; ; ) { 591 int offset; 592 593 if (node == NULL) 594 return 0; 595 596 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 597 598 /* 599 * This is just a debug check. Later, we can bale as soon as 600 * we see an unset tag. 601 */ 602 if (!tag_get(node, tag, offset)) 603 saw_unset_tag = 1; 604 if (height == 1) 605 return !!tag_get(node, tag, offset); 606 node = rcu_dereference_raw(node->slots[offset]); 607 shift -= RADIX_TREE_MAP_SHIFT; 608 height--; 609 } 610 } 611 EXPORT_SYMBOL(radix_tree_tag_get); 612 613 /** 614 * radix_tree_range_tag_if_tagged - for each item in given range set given 615 * tag if item has another tag set 616 * @root: radix tree root 617 * @first_indexp: pointer to a starting index of a range to scan 618 * @last_index: last index of a range to scan 619 * @nr_to_tag: maximum number items to tag 620 * @iftag: tag index to test 621 * @settag: tag index to set if tested tag is set 622 * 623 * This function scans range of radix tree from first_index to last_index 624 * (inclusive). For each item in the range if iftag is set, the function sets 625 * also settag. The function stops either after tagging nr_to_tag items or 626 * after reaching last_index. 627 * 628 * The tags must be set from the leaf level only and propagated back up the 629 * path to the root. We must do this so that we resolve the full path before 630 * setting any tags on intermediate nodes. If we set tags as we descend, then 631 * we can get to the leaf node and find that the index that has the iftag 632 * set is outside the range we are scanning. This reults in dangling tags and 633 * can lead to problems with later tag operations (e.g. livelocks on lookups). 634 * 635 * The function returns number of leaves where the tag was set and sets 636 * *first_indexp to the first unscanned index. 637 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must 638 * be prepared to handle that. 639 */ 640 unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, 641 unsigned long *first_indexp, unsigned long last_index, 642 unsigned long nr_to_tag, 643 unsigned int iftag, unsigned int settag) 644 { 645 unsigned int height = root->height; 646 struct radix_tree_path path[height]; 647 struct radix_tree_path *pathp = path; 648 struct radix_tree_node *slot; 649 unsigned int shift; 650 unsigned long tagged = 0; 651 unsigned long index = *first_indexp; 652 653 last_index = min(last_index, radix_tree_maxindex(height)); 654 if (index > last_index) 655 return 0; 656 if (!nr_to_tag) 657 return 0; 658 if (!root_tag_get(root, iftag)) { 659 *first_indexp = last_index + 1; 660 return 0; 661 } 662 if (height == 0) { 663 *first_indexp = last_index + 1; 664 root_tag_set(root, settag); 665 return 1; 666 } 667 668 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 669 slot = radix_tree_indirect_to_ptr(root->rnode); 670 671 /* 672 * we fill the path from (root->height - 2) to 0, leaving the index at 673 * (root->height - 1) as a terminator. Zero the node in the terminator 674 * so that we can use this to end walk loops back up the path. 675 */ 676 path[height - 1].node = NULL; 677 678 for (;;) { 679 int offset; 680 681 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 682 if (!slot->slots[offset]) 683 goto next; 684 if (!tag_get(slot, iftag, offset)) 685 goto next; 686 if (height > 1) { 687 /* Go down one level */ 688 height--; 689 shift -= RADIX_TREE_MAP_SHIFT; 690 path[height - 1].node = slot; 691 path[height - 1].offset = offset; 692 slot = slot->slots[offset]; 693 continue; 694 } 695 696 /* tag the leaf */ 697 tagged++; 698 tag_set(slot, settag, offset); 699 700 /* walk back up the path tagging interior nodes */ 701 pathp = &path[0]; 702 while (pathp->node) { 703 /* stop if we find a node with the tag already set */ 704 if (tag_get(pathp->node, settag, pathp->offset)) 705 break; 706 tag_set(pathp->node, settag, pathp->offset); 707 pathp++; 708 } 709 710 next: 711 /* Go to next item at level determined by 'shift' */ 712 index = ((index >> shift) + 1) << shift; 713 /* Overflow can happen when last_index is ~0UL... */ 714 if (index > last_index || !index) 715 break; 716 if (tagged >= nr_to_tag) 717 break; 718 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { 719 /* 720 * We've fully scanned this node. Go up. Because 721 * last_index is guaranteed to be in the tree, what 722 * we do below cannot wander astray. 723 */ 724 slot = path[height - 1].node; 725 height++; 726 shift += RADIX_TREE_MAP_SHIFT; 727 } 728 } 729 /* 730 * The iftag must have been set somewhere because otherwise 731 * we would return immediated at the beginning of the function 732 */ 733 root_tag_set(root, settag); 734 *first_indexp = index; 735 736 return tagged; 737 } 738 EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); 739 740 741 /** 742 * radix_tree_next_hole - find the next hole (not-present entry) 743 * @root: tree root 744 * @index: index key 745 * @max_scan: maximum range to search 746 * 747 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest 748 * indexed hole. 749 * 750 * Returns: the index of the hole if found, otherwise returns an index 751 * outside of the set specified (in which case 'return - index >= max_scan' 752 * will be true). In rare cases of index wrap-around, 0 will be returned. 753 * 754 * radix_tree_next_hole may be called under rcu_read_lock. However, like 755 * radix_tree_gang_lookup, this will not atomically search a snapshot of 756 * the tree at a single point in time. For example, if a hole is created 757 * at index 5, then subsequently a hole is created at index 10, 758 * radix_tree_next_hole covering both indexes may return 10 if called 759 * under rcu_read_lock. 760 */ 761 unsigned long radix_tree_next_hole(struct radix_tree_root *root, 762 unsigned long index, unsigned long max_scan) 763 { 764 unsigned long i; 765 766 for (i = 0; i < max_scan; i++) { 767 if (!radix_tree_lookup(root, index)) 768 break; 769 index++; 770 if (index == 0) 771 break; 772 } 773 774 return index; 775 } 776 EXPORT_SYMBOL(radix_tree_next_hole); 777 778 /** 779 * radix_tree_prev_hole - find the prev hole (not-present entry) 780 * @root: tree root 781 * @index: index key 782 * @max_scan: maximum range to search 783 * 784 * Search backwards in the range [max(index-max_scan+1, 0), index] 785 * for the first hole. 786 * 787 * Returns: the index of the hole if found, otherwise returns an index 788 * outside of the set specified (in which case 'index - return >= max_scan' 789 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. 790 * 791 * radix_tree_next_hole may be called under rcu_read_lock. However, like 792 * radix_tree_gang_lookup, this will not atomically search a snapshot of 793 * the tree at a single point in time. For example, if a hole is created 794 * at index 10, then subsequently a hole is created at index 5, 795 * radix_tree_prev_hole covering both indexes may return 5 if called under 796 * rcu_read_lock. 797 */ 798 unsigned long radix_tree_prev_hole(struct radix_tree_root *root, 799 unsigned long index, unsigned long max_scan) 800 { 801 unsigned long i; 802 803 for (i = 0; i < max_scan; i++) { 804 if (!radix_tree_lookup(root, index)) 805 break; 806 index--; 807 if (index == ULONG_MAX) 808 break; 809 } 810 811 return index; 812 } 813 EXPORT_SYMBOL(radix_tree_prev_hole); 814 815 static unsigned int 816 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 817 unsigned int max_items, unsigned long *next_index) 818 { 819 unsigned int nr_found = 0; 820 unsigned int shift, height; 821 unsigned long i; 822 823 height = slot->height; 824 if (height == 0) 825 goto out; 826 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 827 828 for ( ; height > 1; height--) { 829 i = (index >> shift) & RADIX_TREE_MAP_MASK; 830 for (;;) { 831 if (slot->slots[i] != NULL) 832 break; 833 index &= ~((1UL << shift) - 1); 834 index += 1UL << shift; 835 if (index == 0) 836 goto out; /* 32-bit wraparound */ 837 i++; 838 if (i == RADIX_TREE_MAP_SIZE) 839 goto out; 840 } 841 842 shift -= RADIX_TREE_MAP_SHIFT; 843 slot = rcu_dereference_raw(slot->slots[i]); 844 if (slot == NULL) 845 goto out; 846 } 847 848 /* Bottom level: grab some items */ 849 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 850 index++; 851 if (slot->slots[i]) { 852 results[nr_found++] = &(slot->slots[i]); 853 if (nr_found == max_items) 854 goto out; 855 } 856 } 857 out: 858 *next_index = index; 859 return nr_found; 860 } 861 862 /** 863 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 864 * @root: radix tree root 865 * @results: where the results of the lookup are placed 866 * @first_index: start the lookup from this key 867 * @max_items: place up to this many items at *results 868 * 869 * Performs an index-ascending scan of the tree for present items. Places 870 * them at *@results and returns the number of items which were placed at 871 * *@results. 872 * 873 * The implementation is naive. 874 * 875 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 876 * rcu_read_lock. In this case, rather than the returned results being 877 * an atomic snapshot of the tree at a single point in time, the semantics 878 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 879 * have been issued in individual locks, and results stored in 'results'. 880 */ 881 unsigned int 882 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 883 unsigned long first_index, unsigned int max_items) 884 { 885 unsigned long max_index; 886 struct radix_tree_node *node; 887 unsigned long cur_index = first_index; 888 unsigned int ret; 889 890 node = rcu_dereference_raw(root->rnode); 891 if (!node) 892 return 0; 893 894 if (!radix_tree_is_indirect_ptr(node)) { 895 if (first_index > 0) 896 return 0; 897 results[0] = node; 898 return 1; 899 } 900 node = radix_tree_indirect_to_ptr(node); 901 902 max_index = radix_tree_maxindex(node->height); 903 904 ret = 0; 905 while (ret < max_items) { 906 unsigned int nr_found, slots_found, i; 907 unsigned long next_index; /* Index of next search */ 908 909 if (cur_index > max_index) 910 break; 911 slots_found = __lookup(node, (void ***)results + ret, cur_index, 912 max_items - ret, &next_index); 913 nr_found = 0; 914 for (i = 0; i < slots_found; i++) { 915 struct radix_tree_node *slot; 916 slot = *(((void ***)results)[ret + i]); 917 if (!slot) 918 continue; 919 results[ret + nr_found] = rcu_dereference_raw(slot); 920 nr_found++; 921 } 922 ret += nr_found; 923 if (next_index == 0) 924 break; 925 cur_index = next_index; 926 } 927 928 return ret; 929 } 930 EXPORT_SYMBOL(radix_tree_gang_lookup); 931 932 /** 933 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 934 * @root: radix tree root 935 * @results: where the results of the lookup are placed 936 * @first_index: start the lookup from this key 937 * @max_items: place up to this many items at *results 938 * 939 * Performs an index-ascending scan of the tree for present items. Places 940 * their slots at *@results and returns the number of items which were 941 * placed at *@results. 942 * 943 * The implementation is naive. 944 * 945 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must 946 * be dereferenced with radix_tree_deref_slot, and if using only RCU 947 * protection, radix_tree_deref_slot may fail requiring a retry. 948 */ 949 unsigned int 950 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 951 unsigned long first_index, unsigned int max_items) 952 { 953 unsigned long max_index; 954 struct radix_tree_node *node; 955 unsigned long cur_index = first_index; 956 unsigned int ret; 957 958 node = rcu_dereference_raw(root->rnode); 959 if (!node) 960 return 0; 961 962 if (!radix_tree_is_indirect_ptr(node)) { 963 if (first_index > 0) 964 return 0; 965 results[0] = (void **)&root->rnode; 966 return 1; 967 } 968 node = radix_tree_indirect_to_ptr(node); 969 970 max_index = radix_tree_maxindex(node->height); 971 972 ret = 0; 973 while (ret < max_items) { 974 unsigned int slots_found; 975 unsigned long next_index; /* Index of next search */ 976 977 if (cur_index > max_index) 978 break; 979 slots_found = __lookup(node, results + ret, cur_index, 980 max_items - ret, &next_index); 981 ret += slots_found; 982 if (next_index == 0) 983 break; 984 cur_index = next_index; 985 } 986 987 return ret; 988 } 989 EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 990 991 /* 992 * FIXME: the two tag_get()s here should use find_next_bit() instead of 993 * open-coding the search. 994 */ 995 static unsigned int 996 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, 997 unsigned int max_items, unsigned long *next_index, unsigned int tag) 998 { 999 unsigned int nr_found = 0; 1000 unsigned int shift, height; 1001 1002 height = slot->height; 1003 if (height == 0) 1004 goto out; 1005 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 1006 1007 while (height > 0) { 1008 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 1009 1010 for (;;) { 1011 if (tag_get(slot, tag, i)) 1012 break; 1013 index &= ~((1UL << shift) - 1); 1014 index += 1UL << shift; 1015 if (index == 0) 1016 goto out; /* 32-bit wraparound */ 1017 i++; 1018 if (i == RADIX_TREE_MAP_SIZE) 1019 goto out; 1020 } 1021 height--; 1022 if (height == 0) { /* Bottom level: grab some items */ 1023 unsigned long j = index & RADIX_TREE_MAP_MASK; 1024 1025 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 1026 index++; 1027 if (!tag_get(slot, tag, j)) 1028 continue; 1029 /* 1030 * Even though the tag was found set, we need to 1031 * recheck that we have a non-NULL node, because 1032 * if this lookup is lockless, it may have been 1033 * subsequently deleted. 1034 * 1035 * Similar care must be taken in any place that 1036 * lookup ->slots[x] without a lock (ie. can't 1037 * rely on its value remaining the same). 1038 */ 1039 if (slot->slots[j]) { 1040 results[nr_found++] = &(slot->slots[j]); 1041 if (nr_found == max_items) 1042 goto out; 1043 } 1044 } 1045 } 1046 shift -= RADIX_TREE_MAP_SHIFT; 1047 slot = rcu_dereference_raw(slot->slots[i]); 1048 if (slot == NULL) 1049 break; 1050 } 1051 out: 1052 *next_index = index; 1053 return nr_found; 1054 } 1055 1056 /** 1057 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1058 * based on a tag 1059 * @root: radix tree root 1060 * @results: where the results of the lookup are placed 1061 * @first_index: start the lookup from this key 1062 * @max_items: place up to this many items at *results 1063 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1064 * 1065 * Performs an index-ascending scan of the tree for present items which 1066 * have the tag indexed by @tag set. Places the items at *@results and 1067 * returns the number of items which were placed at *@results. 1068 */ 1069 unsigned int 1070 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 1071 unsigned long first_index, unsigned int max_items, 1072 unsigned int tag) 1073 { 1074 struct radix_tree_node *node; 1075 unsigned long max_index; 1076 unsigned long cur_index = first_index; 1077 unsigned int ret; 1078 1079 /* check the root's tag bit */ 1080 if (!root_tag_get(root, tag)) 1081 return 0; 1082 1083 node = rcu_dereference_raw(root->rnode); 1084 if (!node) 1085 return 0; 1086 1087 if (!radix_tree_is_indirect_ptr(node)) { 1088 if (first_index > 0) 1089 return 0; 1090 results[0] = node; 1091 return 1; 1092 } 1093 node = radix_tree_indirect_to_ptr(node); 1094 1095 max_index = radix_tree_maxindex(node->height); 1096 1097 ret = 0; 1098 while (ret < max_items) { 1099 unsigned int nr_found, slots_found, i; 1100 unsigned long next_index; /* Index of next search */ 1101 1102 if (cur_index > max_index) 1103 break; 1104 slots_found = __lookup_tag(node, (void ***)results + ret, 1105 cur_index, max_items - ret, &next_index, tag); 1106 nr_found = 0; 1107 for (i = 0; i < slots_found; i++) { 1108 struct radix_tree_node *slot; 1109 slot = *(((void ***)results)[ret + i]); 1110 if (!slot) 1111 continue; 1112 results[ret + nr_found] = rcu_dereference_raw(slot); 1113 nr_found++; 1114 } 1115 ret += nr_found; 1116 if (next_index == 0) 1117 break; 1118 cur_index = next_index; 1119 } 1120 1121 return ret; 1122 } 1123 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 1124 1125 /** 1126 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a 1127 * radix tree based on a tag 1128 * @root: radix tree root 1129 * @results: where the results of the lookup are placed 1130 * @first_index: start the lookup from this key 1131 * @max_items: place up to this many items at *results 1132 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 1133 * 1134 * Performs an index-ascending scan of the tree for present items which 1135 * have the tag indexed by @tag set. Places the slots at *@results and 1136 * returns the number of slots which were placed at *@results. 1137 */ 1138 unsigned int 1139 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, 1140 unsigned long first_index, unsigned int max_items, 1141 unsigned int tag) 1142 { 1143 struct radix_tree_node *node; 1144 unsigned long max_index; 1145 unsigned long cur_index = first_index; 1146 unsigned int ret; 1147 1148 /* check the root's tag bit */ 1149 if (!root_tag_get(root, tag)) 1150 return 0; 1151 1152 node = rcu_dereference_raw(root->rnode); 1153 if (!node) 1154 return 0; 1155 1156 if (!radix_tree_is_indirect_ptr(node)) { 1157 if (first_index > 0) 1158 return 0; 1159 results[0] = (void **)&root->rnode; 1160 return 1; 1161 } 1162 node = radix_tree_indirect_to_ptr(node); 1163 1164 max_index = radix_tree_maxindex(node->height); 1165 1166 ret = 0; 1167 while (ret < max_items) { 1168 unsigned int slots_found; 1169 unsigned long next_index; /* Index of next search */ 1170 1171 if (cur_index > max_index) 1172 break; 1173 slots_found = __lookup_tag(node, results + ret, 1174 cur_index, max_items - ret, &next_index, tag); 1175 ret += slots_found; 1176 if (next_index == 0) 1177 break; 1178 cur_index = next_index; 1179 } 1180 1181 return ret; 1182 } 1183 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1184 1185 1186 /** 1187 * radix_tree_shrink - shrink height of a radix tree to minimal 1188 * @root radix tree root 1189 */ 1190 static inline void radix_tree_shrink(struct radix_tree_root *root) 1191 { 1192 /* try to shrink tree height */ 1193 while (root->height > 0) { 1194 struct radix_tree_node *to_free = root->rnode; 1195 void *newptr; 1196 1197 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1198 to_free = radix_tree_indirect_to_ptr(to_free); 1199 1200 /* 1201 * The candidate node has more than one child, or its child 1202 * is not at the leftmost slot, we cannot shrink. 1203 */ 1204 if (to_free->count != 1) 1205 break; 1206 if (!to_free->slots[0]) 1207 break; 1208 1209 /* 1210 * We don't need rcu_assign_pointer(), since we are simply 1211 * moving the node from one part of the tree to another. If 1212 * it was safe to dereference the old pointer to it 1213 * (to_free->slots[0]), it will be safe to dereference the new 1214 * one (root->rnode). 1215 */ 1216 newptr = to_free->slots[0]; 1217 if (root->height > 1) 1218 newptr = radix_tree_ptr_to_indirect(newptr); 1219 root->rnode = newptr; 1220 root->height--; 1221 radix_tree_node_free(to_free); 1222 } 1223 } 1224 1225 /** 1226 * radix_tree_delete - delete an item from a radix tree 1227 * @root: radix tree root 1228 * @index: index key 1229 * 1230 * Remove the item at @index from the radix tree rooted at @root. 1231 * 1232 * Returns the address of the deleted item, or NULL if it was not present. 1233 */ 1234 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1235 { 1236 /* 1237 * The radix tree path needs to be one longer than the maximum path 1238 * since the "list" is null terminated. 1239 */ 1240 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; 1241 struct radix_tree_node *slot = NULL; 1242 struct radix_tree_node *to_free; 1243 unsigned int height, shift; 1244 int tag; 1245 int offset; 1246 1247 height = root->height; 1248 if (index > radix_tree_maxindex(height)) 1249 goto out; 1250 1251 slot = root->rnode; 1252 if (height == 0) { 1253 root_tag_clear_all(root); 1254 root->rnode = NULL; 1255 goto out; 1256 } 1257 slot = radix_tree_indirect_to_ptr(slot); 1258 1259 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1260 pathp->node = NULL; 1261 1262 do { 1263 if (slot == NULL) 1264 goto out; 1265 1266 pathp++; 1267 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1268 pathp->offset = offset; 1269 pathp->node = slot; 1270 slot = slot->slots[offset]; 1271 shift -= RADIX_TREE_MAP_SHIFT; 1272 height--; 1273 } while (height > 0); 1274 1275 if (slot == NULL) 1276 goto out; 1277 1278 /* 1279 * Clear all tags associated with the just-deleted item 1280 */ 1281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1282 if (tag_get(pathp->node, tag, pathp->offset)) 1283 radix_tree_tag_clear(root, index, tag); 1284 } 1285 1286 to_free = NULL; 1287 /* Now free the nodes we do not need anymore */ 1288 while (pathp->node) { 1289 pathp->node->slots[pathp->offset] = NULL; 1290 pathp->node->count--; 1291 /* 1292 * Queue the node for deferred freeing after the 1293 * last reference to it disappears (set NULL, above). 1294 */ 1295 if (to_free) 1296 radix_tree_node_free(to_free); 1297 1298 if (pathp->node->count) { 1299 if (pathp->node == 1300 radix_tree_indirect_to_ptr(root->rnode)) 1301 radix_tree_shrink(root); 1302 goto out; 1303 } 1304 1305 /* Node with zero slots in use so free it */ 1306 to_free = pathp->node; 1307 pathp--; 1308 1309 } 1310 root_tag_clear_all(root); 1311 root->height = 0; 1312 root->rnode = NULL; 1313 if (to_free) 1314 radix_tree_node_free(to_free); 1315 1316 out: 1317 return slot; 1318 } 1319 EXPORT_SYMBOL(radix_tree_delete); 1320 1321 /** 1322 * radix_tree_tagged - test whether any items in the tree are tagged 1323 * @root: radix tree root 1324 * @tag: tag to test 1325 */ 1326 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 1327 { 1328 return root_tag_get(root, tag); 1329 } 1330 EXPORT_SYMBOL(radix_tree_tagged); 1331 1332 static void 1333 radix_tree_node_ctor(void *node) 1334 { 1335 memset(node, 0, sizeof(struct radix_tree_node)); 1336 } 1337 1338 static __init unsigned long __maxindex(unsigned int height) 1339 { 1340 unsigned int width = height * RADIX_TREE_MAP_SHIFT; 1341 int shift = RADIX_TREE_INDEX_BITS - width; 1342 1343 if (shift < 0) 1344 return ~0UL; 1345 if (shift >= BITS_PER_LONG) 1346 return 0UL; 1347 return ~0UL >> shift; 1348 } 1349 1350 static __init void radix_tree_init_maxindex(void) 1351 { 1352 unsigned int i; 1353 1354 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 1355 height_to_maxindex[i] = __maxindex(i); 1356 } 1357 1358 static int radix_tree_callback(struct notifier_block *nfb, 1359 unsigned long action, 1360 void *hcpu) 1361 { 1362 int cpu = (long)hcpu; 1363 struct radix_tree_preload *rtp; 1364 1365 /* Free per-cpu pool of perloaded nodes */ 1366 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1367 rtp = &per_cpu(radix_tree_preloads, cpu); 1368 while (rtp->nr) { 1369 kmem_cache_free(radix_tree_node_cachep, 1370 rtp->nodes[rtp->nr-1]); 1371 rtp->nodes[rtp->nr-1] = NULL; 1372 rtp->nr--; 1373 } 1374 } 1375 return NOTIFY_OK; 1376 } 1377 1378 void __init radix_tree_init(void) 1379 { 1380 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1381 sizeof(struct radix_tree_node), 0, 1382 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1383 radix_tree_node_ctor); 1384 radix_tree_init_maxindex(); 1385 hotcpu_notifier(radix_tree_callback, 0); 1386 } 1387