1 /* 2 * Copyright (C) 2001 Momchil Velikov 3 * Portions Copyright (C) 2001 Christoph Hellwig 4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 5 * Copyright (C) 2006 Nick Piggin 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2, or (at 10 * your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/radix-tree.h> 27 #include <linux/percpu.h> 28 #include <linux/slab.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/gfp.h> 32 #include <linux/string.h> 33 #include <linux/bitops.h> 34 #include <linux/rcupdate.h> 35 36 37 #ifdef __KERNEL__ 38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) 39 #else 40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ 41 #endif 42 43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) 44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) 45 46 #define RADIX_TREE_TAG_LONGS \ 47 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 48 49 struct radix_tree_node { 50 unsigned int height; /* Height from the bottom */ 51 unsigned int count; 52 struct rcu_head rcu_head; 53 void *slots[RADIX_TREE_MAP_SIZE]; 54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 55 }; 56 57 struct radix_tree_path { 58 struct radix_tree_node *node; 59 int offset; 60 }; 61 62 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 63 #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) 64 65 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; 66 67 /* 68 * Radix tree node cache. 69 */ 70 static struct kmem_cache *radix_tree_node_cachep; 71 72 /* 73 * Per-cpu pool of preloaded nodes 74 */ 75 struct radix_tree_preload { 76 int nr; 77 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 78 }; 79 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 80 81 static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 82 { 83 return root->gfp_mask & __GFP_BITS_MASK; 84 } 85 86 /* 87 * This assumes that the caller has performed appropriate preallocation, and 88 * that the caller has pinned this thread of control to the current CPU. 89 */ 90 static struct radix_tree_node * 91 radix_tree_node_alloc(struct radix_tree_root *root) 92 { 93 struct radix_tree_node *ret; 94 gfp_t gfp_mask = root_gfp_mask(root); 95 96 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 97 if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { 98 struct radix_tree_preload *rtp; 99 100 rtp = &__get_cpu_var(radix_tree_preloads); 101 if (rtp->nr) { 102 ret = rtp->nodes[rtp->nr - 1]; 103 rtp->nodes[rtp->nr - 1] = NULL; 104 rtp->nr--; 105 } 106 } 107 BUG_ON(radix_tree_is_direct_ptr(ret)); 108 return ret; 109 } 110 111 static void radix_tree_node_rcu_free(struct rcu_head *head) 112 { 113 struct radix_tree_node *node = 114 container_of(head, struct radix_tree_node, rcu_head); 115 kmem_cache_free(radix_tree_node_cachep, node); 116 } 117 118 static inline void 119 radix_tree_node_free(struct radix_tree_node *node) 120 { 121 call_rcu(&node->rcu_head, radix_tree_node_rcu_free); 122 } 123 124 /* 125 * Load up this CPU's radix_tree_node buffer with sufficient objects to 126 * ensure that the addition of a single element in the tree cannot fail. On 127 * success, return zero, with preemption disabled. On error, return -ENOMEM 128 * with preemption not disabled. 129 */ 130 int radix_tree_preload(gfp_t gfp_mask) 131 { 132 struct radix_tree_preload *rtp; 133 struct radix_tree_node *node; 134 int ret = -ENOMEM; 135 136 preempt_disable(); 137 rtp = &__get_cpu_var(radix_tree_preloads); 138 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { 139 preempt_enable(); 140 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 141 if (node == NULL) 142 goto out; 143 preempt_disable(); 144 rtp = &__get_cpu_var(radix_tree_preloads); 145 if (rtp->nr < ARRAY_SIZE(rtp->nodes)) 146 rtp->nodes[rtp->nr++] = node; 147 else 148 kmem_cache_free(radix_tree_node_cachep, node); 149 } 150 ret = 0; 151 out: 152 return ret; 153 } 154 EXPORT_SYMBOL(radix_tree_preload); 155 156 static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 157 int offset) 158 { 159 __set_bit(offset, node->tags[tag]); 160 } 161 162 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, 163 int offset) 164 { 165 __clear_bit(offset, node->tags[tag]); 166 } 167 168 static inline int tag_get(struct radix_tree_node *node, unsigned int tag, 169 int offset) 170 { 171 return test_bit(offset, node->tags[tag]); 172 } 173 174 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) 175 { 176 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 177 } 178 179 180 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 181 { 182 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 183 } 184 185 static inline void root_tag_clear_all(struct radix_tree_root *root) 186 { 187 root->gfp_mask &= __GFP_BITS_MASK; 188 } 189 190 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 191 { 192 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 193 } 194 195 /* 196 * Returns 1 if any slot in the node has this tag set. 197 * Otherwise returns 0. 198 */ 199 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 200 { 201 int idx; 202 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 203 if (node->tags[tag][idx]) 204 return 1; 205 } 206 return 0; 207 } 208 209 /* 210 * Return the maximum key which can be store into a 211 * radix tree with height HEIGHT. 212 */ 213 static inline unsigned long radix_tree_maxindex(unsigned int height) 214 { 215 return height_to_maxindex[height]; 216 } 217 218 /* 219 * Extend a radix tree so it can store key @index. 220 */ 221 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 222 { 223 struct radix_tree_node *node; 224 unsigned int height; 225 int tag; 226 227 /* Figure out what the height should be. */ 228 height = root->height + 1; 229 while (index > radix_tree_maxindex(height)) 230 height++; 231 232 if (root->rnode == NULL) { 233 root->height = height; 234 goto out; 235 } 236 237 do { 238 unsigned int newheight; 239 if (!(node = radix_tree_node_alloc(root))) 240 return -ENOMEM; 241 242 /* Increase the height. */ 243 node->slots[0] = radix_tree_direct_to_ptr(root->rnode); 244 245 /* Propagate the aggregated tag info into the new root */ 246 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 247 if (root_tag_get(root, tag)) 248 tag_set(node, tag, 0); 249 } 250 251 newheight = root->height+1; 252 node->height = newheight; 253 node->count = 1; 254 rcu_assign_pointer(root->rnode, node); 255 root->height = newheight; 256 } while (height > root->height); 257 out: 258 return 0; 259 } 260 261 /** 262 * radix_tree_insert - insert into a radix tree 263 * @root: radix tree root 264 * @index: index key 265 * @item: item to insert 266 * 267 * Insert an item into the radix tree at position @index. 268 */ 269 int radix_tree_insert(struct radix_tree_root *root, 270 unsigned long index, void *item) 271 { 272 struct radix_tree_node *node = NULL, *slot; 273 unsigned int height, shift; 274 int offset; 275 int error; 276 277 BUG_ON(radix_tree_is_direct_ptr(item)); 278 279 /* Make sure the tree is high enough. */ 280 if (index > radix_tree_maxindex(root->height)) { 281 error = radix_tree_extend(root, index); 282 if (error) 283 return error; 284 } 285 286 slot = root->rnode; 287 height = root->height; 288 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 289 290 offset = 0; /* uninitialised var warning */ 291 while (height > 0) { 292 if (slot == NULL) { 293 /* Have to add a child node. */ 294 if (!(slot = radix_tree_node_alloc(root))) 295 return -ENOMEM; 296 slot->height = height; 297 if (node) { 298 rcu_assign_pointer(node->slots[offset], slot); 299 node->count++; 300 } else 301 rcu_assign_pointer(root->rnode, slot); 302 } 303 304 /* Go a level down */ 305 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 306 node = slot; 307 slot = node->slots[offset]; 308 shift -= RADIX_TREE_MAP_SHIFT; 309 height--; 310 } 311 312 if (slot != NULL) 313 return -EEXIST; 314 315 if (node) { 316 node->count++; 317 rcu_assign_pointer(node->slots[offset], item); 318 BUG_ON(tag_get(node, 0, offset)); 319 BUG_ON(tag_get(node, 1, offset)); 320 } else { 321 rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); 322 BUG_ON(root_tag_get(root, 0)); 323 BUG_ON(root_tag_get(root, 1)); 324 } 325 326 return 0; 327 } 328 EXPORT_SYMBOL(radix_tree_insert); 329 330 /** 331 * radix_tree_lookup_slot - lookup a slot in a radix tree 332 * @root: radix tree root 333 * @index: index key 334 * 335 * Returns: the slot corresponding to the position @index in the 336 * radix tree @root. This is useful for update-if-exists operations. 337 * 338 * This function cannot be called under rcu_read_lock, it must be 339 * excluded from writers, as must the returned slot for subsequent 340 * use by radix_tree_deref_slot() and radix_tree_replace slot. 341 * Caller must hold tree write locked across slot lookup and 342 * replace. 343 */ 344 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 345 { 346 unsigned int height, shift; 347 struct radix_tree_node *node, **slot; 348 349 node = root->rnode; 350 if (node == NULL) 351 return NULL; 352 353 if (radix_tree_is_direct_ptr(node)) { 354 if (index > 0) 355 return NULL; 356 return (void **)&root->rnode; 357 } 358 359 height = node->height; 360 if (index > radix_tree_maxindex(height)) 361 return NULL; 362 363 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 364 365 do { 366 slot = (struct radix_tree_node **) 367 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 368 node = *slot; 369 if (node == NULL) 370 return NULL; 371 372 shift -= RADIX_TREE_MAP_SHIFT; 373 height--; 374 } while (height > 0); 375 376 return (void **)slot; 377 } 378 EXPORT_SYMBOL(radix_tree_lookup_slot); 379 380 /** 381 * radix_tree_lookup - perform lookup operation on a radix tree 382 * @root: radix tree root 383 * @index: index key 384 * 385 * Lookup the item at the position @index in the radix tree @root. 386 * 387 * This function can be called under rcu_read_lock, however the caller 388 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free 389 * them safely). No RCU barriers are required to access or modify the 390 * returned item, however. 391 */ 392 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) 393 { 394 unsigned int height, shift; 395 struct radix_tree_node *node, **slot; 396 397 node = rcu_dereference(root->rnode); 398 if (node == NULL) 399 return NULL; 400 401 if (radix_tree_is_direct_ptr(node)) { 402 if (index > 0) 403 return NULL; 404 return radix_tree_direct_to_ptr(node); 405 } 406 407 height = node->height; 408 if (index > radix_tree_maxindex(height)) 409 return NULL; 410 411 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 412 413 do { 414 slot = (struct radix_tree_node **) 415 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 416 node = rcu_dereference(*slot); 417 if (node == NULL) 418 return NULL; 419 420 shift -= RADIX_TREE_MAP_SHIFT; 421 height--; 422 } while (height > 0); 423 424 return node; 425 } 426 EXPORT_SYMBOL(radix_tree_lookup); 427 428 /** 429 * radix_tree_tag_set - set a tag on a radix tree node 430 * @root: radix tree root 431 * @index: index key 432 * @tag: tag index 433 * 434 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 435 * corresponding to @index in the radix tree. From 436 * the root all the way down to the leaf node. 437 * 438 * Returns the address of the tagged item. Setting a tag on a not-present 439 * item is a bug. 440 */ 441 void *radix_tree_tag_set(struct radix_tree_root *root, 442 unsigned long index, unsigned int tag) 443 { 444 unsigned int height, shift; 445 struct radix_tree_node *slot; 446 447 height = root->height; 448 BUG_ON(index > radix_tree_maxindex(height)); 449 450 slot = root->rnode; 451 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 452 453 while (height > 0) { 454 int offset; 455 456 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 457 if (!tag_get(slot, tag, offset)) 458 tag_set(slot, tag, offset); 459 slot = slot->slots[offset]; 460 BUG_ON(slot == NULL); 461 shift -= RADIX_TREE_MAP_SHIFT; 462 height--; 463 } 464 465 /* set the root's tag bit */ 466 if (slot && !root_tag_get(root, tag)) 467 root_tag_set(root, tag); 468 469 return slot; 470 } 471 EXPORT_SYMBOL(radix_tree_tag_set); 472 473 /** 474 * radix_tree_tag_clear - clear a tag on a radix tree node 475 * @root: radix tree root 476 * @index: index key 477 * @tag: tag index 478 * 479 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 480 * corresponding to @index in the radix tree. If 481 * this causes the leaf node to have no tags set then clear the tag in the 482 * next-to-leaf node, etc. 483 * 484 * Returns the address of the tagged item on success, else NULL. ie: 485 * has the same return value and semantics as radix_tree_lookup(). 486 */ 487 void *radix_tree_tag_clear(struct radix_tree_root *root, 488 unsigned long index, unsigned int tag) 489 { 490 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; 491 struct radix_tree_node *slot = NULL; 492 unsigned int height, shift; 493 494 height = root->height; 495 if (index > radix_tree_maxindex(height)) 496 goto out; 497 498 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 499 pathp->node = NULL; 500 slot = root->rnode; 501 502 while (height > 0) { 503 int offset; 504 505 if (slot == NULL) 506 goto out; 507 508 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 509 pathp[1].offset = offset; 510 pathp[1].node = slot; 511 slot = slot->slots[offset]; 512 pathp++; 513 shift -= RADIX_TREE_MAP_SHIFT; 514 height--; 515 } 516 517 if (slot == NULL) 518 goto out; 519 520 while (pathp->node) { 521 if (!tag_get(pathp->node, tag, pathp->offset)) 522 goto out; 523 tag_clear(pathp->node, tag, pathp->offset); 524 if (any_tag_set(pathp->node, tag)) 525 goto out; 526 pathp--; 527 } 528 529 /* clear the root's tag bit */ 530 if (root_tag_get(root, tag)) 531 root_tag_clear(root, tag); 532 533 out: 534 return slot; 535 } 536 EXPORT_SYMBOL(radix_tree_tag_clear); 537 538 #ifndef __KERNEL__ /* Only the test harness uses this at present */ 539 /** 540 * radix_tree_tag_get - get a tag on a radix tree node 541 * @root: radix tree root 542 * @index: index key 543 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 544 * 545 * Return values: 546 * 547 * 0: tag not present or not set 548 * 1: tag set 549 */ 550 int radix_tree_tag_get(struct radix_tree_root *root, 551 unsigned long index, unsigned int tag) 552 { 553 unsigned int height, shift; 554 struct radix_tree_node *node; 555 int saw_unset_tag = 0; 556 557 /* check the root's tag bit */ 558 if (!root_tag_get(root, tag)) 559 return 0; 560 561 node = rcu_dereference(root->rnode); 562 if (node == NULL) 563 return 0; 564 565 if (radix_tree_is_direct_ptr(node)) 566 return (index == 0); 567 568 height = node->height; 569 if (index > radix_tree_maxindex(height)) 570 return 0; 571 572 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 573 574 for ( ; ; ) { 575 int offset; 576 577 if (node == NULL) 578 return 0; 579 580 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 581 582 /* 583 * This is just a debug check. Later, we can bale as soon as 584 * we see an unset tag. 585 */ 586 if (!tag_get(node, tag, offset)) 587 saw_unset_tag = 1; 588 if (height == 1) { 589 int ret = tag_get(node, tag, offset); 590 591 BUG_ON(ret && saw_unset_tag); 592 return !!ret; 593 } 594 node = rcu_dereference(node->slots[offset]); 595 shift -= RADIX_TREE_MAP_SHIFT; 596 height--; 597 } 598 } 599 EXPORT_SYMBOL(radix_tree_tag_get); 600 #endif 601 602 static unsigned int 603 __lookup(struct radix_tree_node *slot, void **results, unsigned long index, 604 unsigned int max_items, unsigned long *next_index) 605 { 606 unsigned int nr_found = 0; 607 unsigned int shift, height; 608 unsigned long i; 609 610 height = slot->height; 611 if (height == 0) 612 goto out; 613 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 614 615 for ( ; height > 1; height--) { 616 i = (index >> shift) & RADIX_TREE_MAP_MASK; 617 for (;;) { 618 if (slot->slots[i] != NULL) 619 break; 620 index &= ~((1UL << shift) - 1); 621 index += 1UL << shift; 622 if (index == 0) 623 goto out; /* 32-bit wraparound */ 624 i++; 625 if (i == RADIX_TREE_MAP_SIZE) 626 goto out; 627 } 628 629 shift -= RADIX_TREE_MAP_SHIFT; 630 slot = rcu_dereference(slot->slots[i]); 631 if (slot == NULL) 632 goto out; 633 } 634 635 /* Bottom level: grab some items */ 636 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 637 struct radix_tree_node *node; 638 index++; 639 node = slot->slots[i]; 640 if (node) { 641 results[nr_found++] = rcu_dereference(node); 642 if (nr_found == max_items) 643 goto out; 644 } 645 } 646 out: 647 *next_index = index; 648 return nr_found; 649 } 650 651 /** 652 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 653 * @root: radix tree root 654 * @results: where the results of the lookup are placed 655 * @first_index: start the lookup from this key 656 * @max_items: place up to this many items at *results 657 * 658 * Performs an index-ascending scan of the tree for present items. Places 659 * them at *@results and returns the number of items which were placed at 660 * *@results. 661 * 662 * The implementation is naive. 663 * 664 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 665 * rcu_read_lock. In this case, rather than the returned results being 666 * an atomic snapshot of the tree at a single point in time, the semantics 667 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 668 * have been issued in individual locks, and results stored in 'results'. 669 */ 670 unsigned int 671 radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 672 unsigned long first_index, unsigned int max_items) 673 { 674 unsigned long max_index; 675 struct radix_tree_node *node; 676 unsigned long cur_index = first_index; 677 unsigned int ret; 678 679 node = rcu_dereference(root->rnode); 680 if (!node) 681 return 0; 682 683 if (radix_tree_is_direct_ptr(node)) { 684 if (first_index > 0) 685 return 0; 686 node = radix_tree_direct_to_ptr(node); 687 results[0] = rcu_dereference(node); 688 return 1; 689 } 690 691 max_index = radix_tree_maxindex(node->height); 692 693 ret = 0; 694 while (ret < max_items) { 695 unsigned int nr_found; 696 unsigned long next_index; /* Index of next search */ 697 698 if (cur_index > max_index) 699 break; 700 nr_found = __lookup(node, results + ret, cur_index, 701 max_items - ret, &next_index); 702 ret += nr_found; 703 if (next_index == 0) 704 break; 705 cur_index = next_index; 706 } 707 708 return ret; 709 } 710 EXPORT_SYMBOL(radix_tree_gang_lookup); 711 712 /* 713 * FIXME: the two tag_get()s here should use find_next_bit() instead of 714 * open-coding the search. 715 */ 716 static unsigned int 717 __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 718 unsigned int max_items, unsigned long *next_index, unsigned int tag) 719 { 720 unsigned int nr_found = 0; 721 unsigned int shift, height; 722 723 height = slot->height; 724 if (height == 0) 725 goto out; 726 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 727 728 while (height > 0) { 729 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; 730 731 for (;;) { 732 if (tag_get(slot, tag, i)) 733 break; 734 index &= ~((1UL << shift) - 1); 735 index += 1UL << shift; 736 if (index == 0) 737 goto out; /* 32-bit wraparound */ 738 i++; 739 if (i == RADIX_TREE_MAP_SIZE) 740 goto out; 741 } 742 height--; 743 if (height == 0) { /* Bottom level: grab some items */ 744 unsigned long j = index & RADIX_TREE_MAP_MASK; 745 746 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 747 struct radix_tree_node *node; 748 index++; 749 if (!tag_get(slot, tag, j)) 750 continue; 751 node = slot->slots[j]; 752 /* 753 * Even though the tag was found set, we need to 754 * recheck that we have a non-NULL node, because 755 * if this lookup is lockless, it may have been 756 * subsequently deleted. 757 * 758 * Similar care must be taken in any place that 759 * lookup ->slots[x] without a lock (ie. can't 760 * rely on its value remaining the same). 761 */ 762 if (node) { 763 node = rcu_dereference(node); 764 results[nr_found++] = node; 765 if (nr_found == max_items) 766 goto out; 767 } 768 } 769 } 770 shift -= RADIX_TREE_MAP_SHIFT; 771 slot = rcu_dereference(slot->slots[i]); 772 if (slot == NULL) 773 break; 774 } 775 out: 776 *next_index = index; 777 return nr_found; 778 } 779 780 /** 781 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 782 * based on a tag 783 * @root: radix tree root 784 * @results: where the results of the lookup are placed 785 * @first_index: start the lookup from this key 786 * @max_items: place up to this many items at *results 787 * @tag: the tag index (< RADIX_TREE_MAX_TAGS) 788 * 789 * Performs an index-ascending scan of the tree for present items which 790 * have the tag indexed by @tag set. Places the items at *@results and 791 * returns the number of items which were placed at *@results. 792 */ 793 unsigned int 794 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 795 unsigned long first_index, unsigned int max_items, 796 unsigned int tag) 797 { 798 struct radix_tree_node *node; 799 unsigned long max_index; 800 unsigned long cur_index = first_index; 801 unsigned int ret; 802 803 /* check the root's tag bit */ 804 if (!root_tag_get(root, tag)) 805 return 0; 806 807 node = rcu_dereference(root->rnode); 808 if (!node) 809 return 0; 810 811 if (radix_tree_is_direct_ptr(node)) { 812 if (first_index > 0) 813 return 0; 814 node = radix_tree_direct_to_ptr(node); 815 results[0] = rcu_dereference(node); 816 return 1; 817 } 818 819 max_index = radix_tree_maxindex(node->height); 820 821 ret = 0; 822 while (ret < max_items) { 823 unsigned int nr_found; 824 unsigned long next_index; /* Index of next search */ 825 826 if (cur_index > max_index) 827 break; 828 nr_found = __lookup_tag(node, results + ret, cur_index, 829 max_items - ret, &next_index, tag); 830 ret += nr_found; 831 if (next_index == 0) 832 break; 833 cur_index = next_index; 834 } 835 836 return ret; 837 } 838 EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 839 840 /** 841 * radix_tree_shrink - shrink height of a radix tree to minimal 842 * @root radix tree root 843 */ 844 static inline void radix_tree_shrink(struct radix_tree_root *root) 845 { 846 /* try to shrink tree height */ 847 while (root->height > 0 && 848 root->rnode->count == 1 && 849 root->rnode->slots[0]) { 850 struct radix_tree_node *to_free = root->rnode; 851 void *newptr; 852 853 /* 854 * We don't need rcu_assign_pointer(), since we are simply 855 * moving the node from one part of the tree to another. If 856 * it was safe to dereference the old pointer to it 857 * (to_free->slots[0]), it will be safe to dereference the new 858 * one (root->rnode). 859 */ 860 newptr = to_free->slots[0]; 861 if (root->height == 1) 862 newptr = radix_tree_ptr_to_direct(newptr); 863 root->rnode = newptr; 864 root->height--; 865 /* must only free zeroed nodes into the slab */ 866 tag_clear(to_free, 0, 0); 867 tag_clear(to_free, 1, 0); 868 to_free->slots[0] = NULL; 869 to_free->count = 0; 870 radix_tree_node_free(to_free); 871 } 872 } 873 874 /** 875 * radix_tree_delete - delete an item from a radix tree 876 * @root: radix tree root 877 * @index: index key 878 * 879 * Remove the item at @index from the radix tree rooted at @root. 880 * 881 * Returns the address of the deleted item, or NULL if it was not present. 882 */ 883 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 884 { 885 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; 886 struct radix_tree_node *slot = NULL; 887 struct radix_tree_node *to_free; 888 unsigned int height, shift; 889 int tag; 890 int offset; 891 892 height = root->height; 893 if (index > radix_tree_maxindex(height)) 894 goto out; 895 896 slot = root->rnode; 897 if (height == 0 && root->rnode) { 898 slot = radix_tree_direct_to_ptr(slot); 899 root_tag_clear_all(root); 900 root->rnode = NULL; 901 goto out; 902 } 903 904 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 905 pathp->node = NULL; 906 907 do { 908 if (slot == NULL) 909 goto out; 910 911 pathp++; 912 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 913 pathp->offset = offset; 914 pathp->node = slot; 915 slot = slot->slots[offset]; 916 shift -= RADIX_TREE_MAP_SHIFT; 917 height--; 918 } while (height > 0); 919 920 if (slot == NULL) 921 goto out; 922 923 /* 924 * Clear all tags associated with the just-deleted item 925 */ 926 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 927 if (tag_get(pathp->node, tag, pathp->offset)) 928 radix_tree_tag_clear(root, index, tag); 929 } 930 931 to_free = NULL; 932 /* Now free the nodes we do not need anymore */ 933 while (pathp->node) { 934 pathp->node->slots[pathp->offset] = NULL; 935 pathp->node->count--; 936 /* 937 * Queue the node for deferred freeing after the 938 * last reference to it disappears (set NULL, above). 939 */ 940 if (to_free) 941 radix_tree_node_free(to_free); 942 943 if (pathp->node->count) { 944 if (pathp->node == root->rnode) 945 radix_tree_shrink(root); 946 goto out; 947 } 948 949 /* Node with zero slots in use so free it */ 950 to_free = pathp->node; 951 pathp--; 952 953 } 954 root_tag_clear_all(root); 955 root->height = 0; 956 root->rnode = NULL; 957 if (to_free) 958 radix_tree_node_free(to_free); 959 960 out: 961 return slot; 962 } 963 EXPORT_SYMBOL(radix_tree_delete); 964 965 /** 966 * radix_tree_tagged - test whether any items in the tree are tagged 967 * @root: radix tree root 968 * @tag: tag to test 969 */ 970 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) 971 { 972 return root_tag_get(root, tag); 973 } 974 EXPORT_SYMBOL(radix_tree_tagged); 975 976 static void 977 radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags) 978 { 979 memset(node, 0, sizeof(struct radix_tree_node)); 980 } 981 982 static __init unsigned long __maxindex(unsigned int height) 983 { 984 unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; 985 unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; 986 987 if (tmp >= RADIX_TREE_INDEX_BITS) 988 index = ~0UL; 989 return index; 990 } 991 992 static __init void radix_tree_init_maxindex(void) 993 { 994 unsigned int i; 995 996 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) 997 height_to_maxindex[i] = __maxindex(i); 998 } 999 1000 static int radix_tree_callback(struct notifier_block *nfb, 1001 unsigned long action, 1002 void *hcpu) 1003 { 1004 int cpu = (long)hcpu; 1005 struct radix_tree_preload *rtp; 1006 1007 /* Free per-cpu pool of perloaded nodes */ 1008 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1009 rtp = &per_cpu(radix_tree_preloads, cpu); 1010 while (rtp->nr) { 1011 kmem_cache_free(radix_tree_node_cachep, 1012 rtp->nodes[rtp->nr-1]); 1013 rtp->nodes[rtp->nr-1] = NULL; 1014 rtp->nr--; 1015 } 1016 } 1017 return NOTIFY_OK; 1018 } 1019 1020 void __init radix_tree_init(void) 1021 { 1022 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1023 sizeof(struct radix_tree_node), 0, 1024 SLAB_PANIC, radix_tree_node_ctor); 1025 radix_tree_init_maxindex(); 1026 hotcpu_notifier(radix_tree_callback, 0); 1027 } 1028