1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent-io-tree.h" 8 #include "btrfs_inode.h" 9 #include "misc.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 kmem_cache_free(extent_state_cache, state); 52 } 53 } 54 55 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 57 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 58 struct extent_io_tree *tree, 59 u64 start, u64 end) 60 { 61 struct btrfs_inode *inode = tree->inode; 62 u64 isize; 63 64 if (!inode) 65 return; 66 67 isize = i_size_read(&inode->vfs_inode); 68 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 69 btrfs_debug_rl(inode->root->fs_info, 70 "%s: ino %llu isize %llu odd range [%llu,%llu]", 71 caller, btrfs_ino(inode), isize, start, end); 72 } 73 } 74 #else 75 #define btrfs_leak_debug_add_state(state) do {} while (0) 76 #define btrfs_leak_debug_del_state(state) do {} while (0) 77 #define btrfs_extent_state_leak_debug_check() do {} while (0) 78 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 79 #endif 80 81 /* 82 * For the file_extent_tree, we want to hold the inode lock when we lookup and 83 * update the disk_i_size, but lockdep will complain because our io_tree we hold 84 * the tree lock and get the inode lock when setting delalloc. These two things 85 * are unrelated, so make a class for the file_extent_tree so we don't get the 86 * two locking patterns mixed up. 87 */ 88 static struct lock_class_key file_extent_tree_class; 89 90 struct tree_entry { 91 u64 start; 92 u64 end; 93 struct rb_node rb_node; 94 }; 95 96 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 97 struct extent_io_tree *tree, unsigned int owner) 98 { 99 tree->fs_info = fs_info; 100 tree->state = RB_ROOT; 101 spin_lock_init(&tree->lock); 102 tree->inode = NULL; 103 tree->owner = owner; 104 if (owner == IO_TREE_INODE_FILE_EXTENT) 105 lockdep_set_class(&tree->lock, &file_extent_tree_class); 106 } 107 108 void extent_io_tree_release(struct extent_io_tree *tree) 109 { 110 spin_lock(&tree->lock); 111 /* 112 * Do a single barrier for the waitqueue_active check here, the state 113 * of the waitqueue should not change once extent_io_tree_release is 114 * called. 115 */ 116 smp_mb(); 117 while (!RB_EMPTY_ROOT(&tree->state)) { 118 struct rb_node *node; 119 struct extent_state *state; 120 121 node = rb_first(&tree->state); 122 state = rb_entry(node, struct extent_state, rb_node); 123 rb_erase(&state->rb_node, &tree->state); 124 RB_CLEAR_NODE(&state->rb_node); 125 /* 126 * btree io trees aren't supposed to have tasks waiting for 127 * changes in the flags of extent states ever. 128 */ 129 ASSERT(!waitqueue_active(&state->wq)); 130 free_extent_state(state); 131 132 cond_resched_lock(&tree->lock); 133 } 134 spin_unlock(&tree->lock); 135 } 136 137 static struct extent_state *alloc_extent_state(gfp_t mask) 138 { 139 struct extent_state *state; 140 141 /* 142 * The given mask might be not appropriate for the slab allocator, 143 * drop the unsupported bits 144 */ 145 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 146 state = kmem_cache_alloc(extent_state_cache, mask); 147 if (!state) 148 return state; 149 state->state = 0; 150 RB_CLEAR_NODE(&state->rb_node); 151 btrfs_leak_debug_add_state(state); 152 refcount_set(&state->refs, 1); 153 init_waitqueue_head(&state->wq); 154 trace_alloc_extent_state(state, mask, _RET_IP_); 155 return state; 156 } 157 158 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 159 { 160 if (!prealloc) 161 prealloc = alloc_extent_state(GFP_ATOMIC); 162 163 return prealloc; 164 } 165 166 void free_extent_state(struct extent_state *state) 167 { 168 if (!state) 169 return; 170 if (refcount_dec_and_test(&state->refs)) { 171 WARN_ON(extent_state_in_tree(state)); 172 btrfs_leak_debug_del_state(state); 173 trace_free_extent_state(state, _RET_IP_); 174 kmem_cache_free(extent_state_cache, state); 175 } 176 } 177 178 static int add_extent_changeset(struct extent_state *state, u32 bits, 179 struct extent_changeset *changeset, 180 int set) 181 { 182 int ret; 183 184 if (!changeset) 185 return 0; 186 if (set && (state->state & bits) == bits) 187 return 0; 188 if (!set && (state->state & bits) == 0) 189 return 0; 190 changeset->bytes_changed += state->end - state->start + 1; 191 ret = ulist_add(&changeset->range_changed, state->start, state->end, 192 GFP_ATOMIC); 193 return ret; 194 } 195 196 static inline struct extent_state *next_state(struct extent_state *state) 197 { 198 struct rb_node *next = rb_next(&state->rb_node); 199 200 if (next) 201 return rb_entry(next, struct extent_state, rb_node); 202 else 203 return NULL; 204 } 205 206 static inline struct extent_state *prev_state(struct extent_state *state) 207 { 208 struct rb_node *next = rb_prev(&state->rb_node); 209 210 if (next) 211 return rb_entry(next, struct extent_state, rb_node); 212 else 213 return NULL; 214 } 215 216 /* 217 * Search @tree for an entry that contains @offset. Such entry would have 218 * entry->start <= offset && entry->end >= offset. 219 * 220 * @tree: the tree to search 221 * @offset: offset that should fall within an entry in @tree 222 * @node_ret: pointer where new node should be anchored (used when inserting an 223 * entry in the tree) 224 * @parent_ret: points to entry which would have been the parent of the entry, 225 * containing @offset 226 * 227 * Return a pointer to the entry that contains @offset byte address and don't change 228 * @node_ret and @parent_ret. 229 * 230 * If no such entry exists, return pointer to entry that ends before @offset 231 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 232 */ 233 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 234 u64 offset, 235 struct rb_node ***node_ret, 236 struct rb_node **parent_ret) 237 { 238 struct rb_root *root = &tree->state; 239 struct rb_node **node = &root->rb_node; 240 struct rb_node *prev = NULL; 241 struct extent_state *entry = NULL; 242 243 while (*node) { 244 prev = *node; 245 entry = rb_entry(prev, struct extent_state, rb_node); 246 247 if (offset < entry->start) 248 node = &(*node)->rb_left; 249 else if (offset > entry->end) 250 node = &(*node)->rb_right; 251 else 252 return entry; 253 } 254 255 if (node_ret) 256 *node_ret = node; 257 if (parent_ret) 258 *parent_ret = prev; 259 260 /* Search neighbors until we find the first one past the end */ 261 while (entry && offset > entry->end) 262 entry = next_state(entry); 263 264 return entry; 265 } 266 267 /* 268 * Search offset in the tree or fill neighbor rbtree node pointers. 269 * 270 * @tree: the tree to search 271 * @offset: offset that should fall within an entry in @tree 272 * @next_ret: pointer to the first entry whose range ends after @offset 273 * @prev_ret: pointer to the first entry whose range begins before @offset 274 * 275 * Return a pointer to the entry that contains @offset byte address. If no 276 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 277 * Otherwise return the found entry and other pointers are left untouched. 278 */ 279 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 280 u64 offset, 281 struct extent_state **prev_ret, 282 struct extent_state **next_ret) 283 { 284 struct rb_root *root = &tree->state; 285 struct rb_node **node = &root->rb_node; 286 struct extent_state *orig_prev; 287 struct extent_state *entry = NULL; 288 289 ASSERT(prev_ret); 290 ASSERT(next_ret); 291 292 while (*node) { 293 entry = rb_entry(*node, struct extent_state, rb_node); 294 295 if (offset < entry->start) 296 node = &(*node)->rb_left; 297 else if (offset > entry->end) 298 node = &(*node)->rb_right; 299 else 300 return entry; 301 } 302 303 orig_prev = entry; 304 while (entry && offset > entry->end) 305 entry = next_state(entry); 306 *next_ret = entry; 307 entry = orig_prev; 308 309 while (entry && offset < entry->start) 310 entry = prev_state(entry); 311 *prev_ret = entry; 312 313 return NULL; 314 } 315 316 /* 317 * Inexact rb-tree search, return the next entry if @offset is not found 318 */ 319 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 320 { 321 return tree_search_for_insert(tree, offset, NULL, NULL); 322 } 323 324 static void extent_io_tree_panic(struct extent_io_tree *tree, int err) 325 { 326 btrfs_panic(tree->fs_info, err, 327 "locking error: extent tree was modified by another thread while locked"); 328 } 329 330 /* 331 * Utility function to look for merge candidates inside a given range. Any 332 * extents with matching state are merged together into a single extent in the 333 * tree. Extents with EXTENT_IO in their state field are not merged because 334 * the end_io handlers need to be able to do operations on them without 335 * sleeping (or doing allocations/splits). 336 * 337 * This should be called with the tree lock held. 338 */ 339 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 340 { 341 struct extent_state *other; 342 343 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 344 return; 345 346 other = prev_state(state); 347 if (other && other->end == state->start - 1 && 348 other->state == state->state) { 349 if (tree->inode) 350 btrfs_merge_delalloc_extent(tree->inode, state, other); 351 state->start = other->start; 352 rb_erase(&other->rb_node, &tree->state); 353 RB_CLEAR_NODE(&other->rb_node); 354 free_extent_state(other); 355 } 356 other = next_state(state); 357 if (other && other->start == state->end + 1 && 358 other->state == state->state) { 359 if (tree->inode) 360 btrfs_merge_delalloc_extent(tree->inode, state, other); 361 state->end = other->end; 362 rb_erase(&other->rb_node, &tree->state); 363 RB_CLEAR_NODE(&other->rb_node); 364 free_extent_state(other); 365 } 366 } 367 368 static void set_state_bits(struct extent_io_tree *tree, 369 struct extent_state *state, 370 u32 bits, struct extent_changeset *changeset) 371 { 372 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 373 int ret; 374 375 if (tree->inode) 376 btrfs_set_delalloc_extent(tree->inode, state, bits); 377 378 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 379 BUG_ON(ret < 0); 380 state->state |= bits_to_set; 381 } 382 383 /* 384 * Insert an extent_state struct into the tree. 'bits' are set on the 385 * struct before it is inserted. 386 * 387 * This may return -EEXIST if the extent is already there, in which case the 388 * state struct is freed. 389 * 390 * The tree lock is not taken internally. This is a utility function and 391 * probably isn't what you want to call (see set/clear_extent_bit). 392 */ 393 static int insert_state(struct extent_io_tree *tree, 394 struct extent_state *state, 395 u32 bits, struct extent_changeset *changeset) 396 { 397 struct rb_node **node; 398 struct rb_node *parent = NULL; 399 const u64 end = state->end; 400 401 set_state_bits(tree, state, bits, changeset); 402 403 node = &tree->state.rb_node; 404 while (*node) { 405 struct extent_state *entry; 406 407 parent = *node; 408 entry = rb_entry(parent, struct extent_state, rb_node); 409 410 if (end < entry->start) { 411 node = &(*node)->rb_left; 412 } else if (end > entry->end) { 413 node = &(*node)->rb_right; 414 } else { 415 btrfs_err(tree->fs_info, 416 "found node %llu %llu on insert of %llu %llu", 417 entry->start, entry->end, state->start, end); 418 return -EEXIST; 419 } 420 } 421 422 rb_link_node(&state->rb_node, parent, node); 423 rb_insert_color(&state->rb_node, &tree->state); 424 425 merge_state(tree, state); 426 return 0; 427 } 428 429 /* 430 * Insert state to @tree to the location given by @node and @parent. 431 */ 432 static void insert_state_fast(struct extent_io_tree *tree, 433 struct extent_state *state, struct rb_node **node, 434 struct rb_node *parent, unsigned bits, 435 struct extent_changeset *changeset) 436 { 437 set_state_bits(tree, state, bits, changeset); 438 rb_link_node(&state->rb_node, parent, node); 439 rb_insert_color(&state->rb_node, &tree->state); 440 merge_state(tree, state); 441 } 442 443 /* 444 * Split a given extent state struct in two, inserting the preallocated 445 * struct 'prealloc' as the newly created second half. 'split' indicates an 446 * offset inside 'orig' where it should be split. 447 * 448 * Before calling, 449 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 450 * are two extent state structs in the tree: 451 * prealloc: [orig->start, split - 1] 452 * orig: [ split, orig->end ] 453 * 454 * The tree locks are not taken by this function. They need to be held 455 * by the caller. 456 */ 457 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 458 struct extent_state *prealloc, u64 split) 459 { 460 struct rb_node *parent = NULL; 461 struct rb_node **node; 462 463 if (tree->inode) 464 btrfs_split_delalloc_extent(tree->inode, orig, split); 465 466 prealloc->start = orig->start; 467 prealloc->end = split - 1; 468 prealloc->state = orig->state; 469 orig->start = split; 470 471 parent = &orig->rb_node; 472 node = &parent; 473 while (*node) { 474 struct extent_state *entry; 475 476 parent = *node; 477 entry = rb_entry(parent, struct extent_state, rb_node); 478 479 if (prealloc->end < entry->start) { 480 node = &(*node)->rb_left; 481 } else if (prealloc->end > entry->end) { 482 node = &(*node)->rb_right; 483 } else { 484 free_extent_state(prealloc); 485 return -EEXIST; 486 } 487 } 488 489 rb_link_node(&prealloc->rb_node, parent, node); 490 rb_insert_color(&prealloc->rb_node, &tree->state); 491 492 return 0; 493 } 494 495 /* 496 * Utility function to clear some bits in an extent state struct. It will 497 * optionally wake up anyone waiting on this state (wake == 1). 498 * 499 * If no bits are set on the state struct after clearing things, the 500 * struct is freed and removed from the tree 501 */ 502 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 503 struct extent_state *state, 504 u32 bits, int wake, 505 struct extent_changeset *changeset) 506 { 507 struct extent_state *next; 508 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 509 int ret; 510 511 if (tree->inode) 512 btrfs_clear_delalloc_extent(tree->inode, state, bits); 513 514 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 515 BUG_ON(ret < 0); 516 state->state &= ~bits_to_clear; 517 if (wake) 518 wake_up(&state->wq); 519 if (state->state == 0) { 520 next = next_state(state); 521 if (extent_state_in_tree(state)) { 522 rb_erase(&state->rb_node, &tree->state); 523 RB_CLEAR_NODE(&state->rb_node); 524 free_extent_state(state); 525 } else { 526 WARN_ON(1); 527 } 528 } else { 529 merge_state(tree, state); 530 next = next_state(state); 531 } 532 return next; 533 } 534 535 /* 536 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 537 * unset the EXTENT_NOWAIT bit. 538 */ 539 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 540 { 541 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 542 *bits &= EXTENT_NOWAIT - 1; 543 } 544 545 /* 546 * Clear some bits on a range in the tree. This may require splitting or 547 * inserting elements in the tree, so the gfp mask is used to indicate which 548 * allocations or sleeping are allowed. 549 * 550 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 551 * range from the tree regardless of state (ie for truncate). 552 * 553 * The range [start, end] is inclusive. 554 * 555 * This takes the tree lock, and returns 0 on success and < 0 on error. 556 */ 557 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 558 u32 bits, struct extent_state **cached_state, 559 struct extent_changeset *changeset) 560 { 561 struct extent_state *state; 562 struct extent_state *cached; 563 struct extent_state *prealloc = NULL; 564 u64 last_end; 565 int err; 566 int clear = 0; 567 int wake; 568 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 569 gfp_t mask; 570 571 set_gfp_mask_from_bits(&bits, &mask); 572 btrfs_debug_check_extent_io_range(tree, start, end); 573 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 574 575 if (delete) 576 bits |= ~EXTENT_CTLBITS; 577 578 if (bits & EXTENT_DELALLOC) 579 bits |= EXTENT_NORESERVE; 580 581 wake = (bits & EXTENT_LOCKED) ? 1 : 0; 582 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 583 clear = 1; 584 again: 585 if (!prealloc) { 586 /* 587 * Don't care for allocation failure here because we might end 588 * up not needing the pre-allocated extent state at all, which 589 * is the case if we only have in the tree extent states that 590 * cover our input range and don't cover too any other range. 591 * If we end up needing a new extent state we allocate it later. 592 */ 593 prealloc = alloc_extent_state(mask); 594 } 595 596 spin_lock(&tree->lock); 597 if (cached_state) { 598 cached = *cached_state; 599 600 if (clear) { 601 *cached_state = NULL; 602 cached_state = NULL; 603 } 604 605 if (cached && extent_state_in_tree(cached) && 606 cached->start <= start && cached->end > start) { 607 if (clear) 608 refcount_dec(&cached->refs); 609 state = cached; 610 goto hit_next; 611 } 612 if (clear) 613 free_extent_state(cached); 614 } 615 616 /* This search will find the extents that end after our range starts. */ 617 state = tree_search(tree, start); 618 if (!state) 619 goto out; 620 hit_next: 621 if (state->start > end) 622 goto out; 623 WARN_ON(state->end < start); 624 last_end = state->end; 625 626 /* The state doesn't have the wanted bits, go ahead. */ 627 if (!(state->state & bits)) { 628 state = next_state(state); 629 goto next; 630 } 631 632 /* 633 * | ---- desired range ---- | 634 * | state | or 635 * | ------------- state -------------- | 636 * 637 * We need to split the extent we found, and may flip bits on second 638 * half. 639 * 640 * If the extent we found extends past our range, we just split and 641 * search again. It'll get split again the next time though. 642 * 643 * If the extent we found is inside our range, we clear the desired bit 644 * on it. 645 */ 646 647 if (state->start < start) { 648 prealloc = alloc_extent_state_atomic(prealloc); 649 if (!prealloc) 650 goto search_again; 651 err = split_state(tree, state, prealloc, start); 652 if (err) 653 extent_io_tree_panic(tree, err); 654 655 prealloc = NULL; 656 if (err) 657 goto out; 658 if (state->end <= end) { 659 state = clear_state_bit(tree, state, bits, wake, changeset); 660 goto next; 661 } 662 goto search_again; 663 } 664 /* 665 * | ---- desired range ---- | 666 * | state | 667 * We need to split the extent, and clear the bit on the first half. 668 */ 669 if (state->start <= end && state->end > end) { 670 prealloc = alloc_extent_state_atomic(prealloc); 671 if (!prealloc) 672 goto search_again; 673 err = split_state(tree, state, prealloc, end + 1); 674 if (err) 675 extent_io_tree_panic(tree, err); 676 677 if (wake) 678 wake_up(&state->wq); 679 680 clear_state_bit(tree, prealloc, bits, wake, changeset); 681 682 prealloc = NULL; 683 goto out; 684 } 685 686 state = clear_state_bit(tree, state, bits, wake, changeset); 687 next: 688 if (last_end == (u64)-1) 689 goto out; 690 start = last_end + 1; 691 if (start <= end && state && !need_resched()) 692 goto hit_next; 693 694 search_again: 695 if (start > end) 696 goto out; 697 spin_unlock(&tree->lock); 698 if (gfpflags_allow_blocking(mask)) 699 cond_resched(); 700 goto again; 701 702 out: 703 spin_unlock(&tree->lock); 704 if (prealloc) 705 free_extent_state(prealloc); 706 707 return 0; 708 709 } 710 711 static void wait_on_state(struct extent_io_tree *tree, 712 struct extent_state *state) 713 __releases(tree->lock) 714 __acquires(tree->lock) 715 { 716 DEFINE_WAIT(wait); 717 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 718 spin_unlock(&tree->lock); 719 schedule(); 720 spin_lock(&tree->lock); 721 finish_wait(&state->wq, &wait); 722 } 723 724 /* 725 * Wait for one or more bits to clear on a range in the state tree. 726 * The range [start, end] is inclusive. 727 * The tree lock is taken by this function 728 */ 729 void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 730 struct extent_state **cached_state) 731 { 732 struct extent_state *state; 733 734 btrfs_debug_check_extent_io_range(tree, start, end); 735 736 spin_lock(&tree->lock); 737 again: 738 /* 739 * Maintain cached_state, as we may not remove it from the tree if there 740 * are more bits than the bits we're waiting on set on this state. 741 */ 742 if (cached_state && *cached_state) { 743 state = *cached_state; 744 if (extent_state_in_tree(state) && 745 state->start <= start && start < state->end) 746 goto process_node; 747 } 748 while (1) { 749 /* 750 * This search will find all the extents that end after our 751 * range starts. 752 */ 753 state = tree_search(tree, start); 754 process_node: 755 if (!state) 756 break; 757 if (state->start > end) 758 goto out; 759 760 if (state->state & bits) { 761 start = state->start; 762 refcount_inc(&state->refs); 763 wait_on_state(tree, state); 764 free_extent_state(state); 765 goto again; 766 } 767 start = state->end + 1; 768 769 if (start > end) 770 break; 771 772 if (!cond_resched_lock(&tree->lock)) { 773 state = next_state(state); 774 goto process_node; 775 } 776 } 777 out: 778 /* This state is no longer useful, clear it and free it up. */ 779 if (cached_state && *cached_state) { 780 state = *cached_state; 781 *cached_state = NULL; 782 free_extent_state(state); 783 } 784 spin_unlock(&tree->lock); 785 } 786 787 static void cache_state_if_flags(struct extent_state *state, 788 struct extent_state **cached_ptr, 789 unsigned flags) 790 { 791 if (cached_ptr && !(*cached_ptr)) { 792 if (!flags || (state->state & flags)) { 793 *cached_ptr = state; 794 refcount_inc(&state->refs); 795 } 796 } 797 } 798 799 static void cache_state(struct extent_state *state, 800 struct extent_state **cached_ptr) 801 { 802 return cache_state_if_flags(state, cached_ptr, 803 EXTENT_LOCKED | EXTENT_BOUNDARY); 804 } 805 806 /* 807 * Find the first state struct with 'bits' set after 'start', and return it. 808 * tree->lock must be held. NULL will returned if nothing was found after 809 * 'start'. 810 */ 811 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 812 u64 start, u32 bits) 813 { 814 struct extent_state *state; 815 816 /* 817 * This search will find all the extents that end after our range 818 * starts. 819 */ 820 state = tree_search(tree, start); 821 while (state) { 822 if (state->end >= start && (state->state & bits)) 823 return state; 824 state = next_state(state); 825 } 826 return NULL; 827 } 828 829 /* 830 * Find the first offset in the io tree with one or more @bits set. 831 * 832 * Note: If there are multiple bits set in @bits, any of them will match. 833 * 834 * Return true if we find something, and update @start_ret and @end_ret. 835 * Return false if we found nothing. 836 */ 837 bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, 838 u64 *start_ret, u64 *end_ret, u32 bits, 839 struct extent_state **cached_state) 840 { 841 struct extent_state *state; 842 bool ret = false; 843 844 spin_lock(&tree->lock); 845 if (cached_state && *cached_state) { 846 state = *cached_state; 847 if (state->end == start - 1 && extent_state_in_tree(state)) { 848 while ((state = next_state(state)) != NULL) { 849 if (state->state & bits) 850 goto got_it; 851 } 852 free_extent_state(*cached_state); 853 *cached_state = NULL; 854 goto out; 855 } 856 free_extent_state(*cached_state); 857 *cached_state = NULL; 858 } 859 860 state = find_first_extent_bit_state(tree, start, bits); 861 got_it: 862 if (state) { 863 cache_state_if_flags(state, cached_state, 0); 864 *start_ret = state->start; 865 *end_ret = state->end; 866 ret = true; 867 } 868 out: 869 spin_unlock(&tree->lock); 870 return ret; 871 } 872 873 /* 874 * Find a contiguous area of bits 875 * 876 * @tree: io tree to check 877 * @start: offset to start the search from 878 * @start_ret: the first offset we found with the bits set 879 * @end_ret: the final contiguous range of the bits that were set 880 * @bits: bits to look for 881 * 882 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 883 * to set bits appropriately, and then merge them again. During this time it 884 * will drop the tree->lock, so use this helper if you want to find the actual 885 * contiguous area for given bits. We will search to the first bit we find, and 886 * then walk down the tree until we find a non-contiguous area. The area 887 * returned will be the full contiguous area with the bits set. 888 */ 889 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 890 u64 *start_ret, u64 *end_ret, u32 bits) 891 { 892 struct extent_state *state; 893 int ret = 1; 894 895 spin_lock(&tree->lock); 896 state = find_first_extent_bit_state(tree, start, bits); 897 if (state) { 898 *start_ret = state->start; 899 *end_ret = state->end; 900 while ((state = next_state(state)) != NULL) { 901 if (state->start > (*end_ret + 1)) 902 break; 903 *end_ret = state->end; 904 } 905 ret = 0; 906 } 907 spin_unlock(&tree->lock); 908 return ret; 909 } 910 911 /* 912 * Find a contiguous range of bytes in the file marked as delalloc, not more 913 * than 'max_bytes'. start and end are used to return the range, 914 * 915 * True is returned if we find something, false if nothing was in the tree. 916 */ 917 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 918 u64 *end, u64 max_bytes, 919 struct extent_state **cached_state) 920 { 921 struct extent_state *state; 922 u64 cur_start = *start; 923 bool found = false; 924 u64 total_bytes = 0; 925 926 spin_lock(&tree->lock); 927 928 /* 929 * This search will find all the extents that end after our range 930 * starts. 931 */ 932 state = tree_search(tree, cur_start); 933 if (!state) { 934 *end = (u64)-1; 935 goto out; 936 } 937 938 while (state) { 939 if (found && (state->start != cur_start || 940 (state->state & EXTENT_BOUNDARY))) { 941 goto out; 942 } 943 if (!(state->state & EXTENT_DELALLOC)) { 944 if (!found) 945 *end = state->end; 946 goto out; 947 } 948 if (!found) { 949 *start = state->start; 950 *cached_state = state; 951 refcount_inc(&state->refs); 952 } 953 found = true; 954 *end = state->end; 955 cur_start = state->end + 1; 956 total_bytes += state->end - state->start + 1; 957 if (total_bytes >= max_bytes) 958 break; 959 state = next_state(state); 960 } 961 out: 962 spin_unlock(&tree->lock); 963 return found; 964 } 965 966 /* 967 * Set some bits on a range in the tree. This may require allocations or 968 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 969 * GFP_NOWAIT. 970 * 971 * If any of the exclusive bits are set, this will fail with -EEXIST if some 972 * part of the range already has the desired bits set. The extent_state of the 973 * existing range is returned in failed_state in this case, and the start of the 974 * existing range is returned in failed_start. failed_state is used as an 975 * optimization for wait_extent_bit, failed_start must be used as the source of 976 * truth as failed_state may have changed since we returned. 977 * 978 * [start, end] is inclusive This takes the tree lock. 979 */ 980 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 981 u32 bits, u64 *failed_start, 982 struct extent_state **failed_state, 983 struct extent_state **cached_state, 984 struct extent_changeset *changeset) 985 { 986 struct extent_state *state; 987 struct extent_state *prealloc = NULL; 988 struct rb_node **p = NULL; 989 struct rb_node *parent = NULL; 990 int err = 0; 991 u64 last_start; 992 u64 last_end; 993 u32 exclusive_bits = (bits & EXTENT_LOCKED); 994 gfp_t mask; 995 996 set_gfp_mask_from_bits(&bits, &mask); 997 btrfs_debug_check_extent_io_range(tree, start, end); 998 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 999 1000 if (exclusive_bits) 1001 ASSERT(failed_start); 1002 else 1003 ASSERT(failed_start == NULL && failed_state == NULL); 1004 again: 1005 if (!prealloc) { 1006 /* 1007 * Don't care for allocation failure here because we might end 1008 * up not needing the pre-allocated extent state at all, which 1009 * is the case if we only have in the tree extent states that 1010 * cover our input range and don't cover too any other range. 1011 * If we end up needing a new extent state we allocate it later. 1012 */ 1013 prealloc = alloc_extent_state(mask); 1014 } 1015 1016 spin_lock(&tree->lock); 1017 if (cached_state && *cached_state) { 1018 state = *cached_state; 1019 if (state->start <= start && state->end > start && 1020 extent_state_in_tree(state)) 1021 goto hit_next; 1022 } 1023 /* 1024 * This search will find all the extents that end after our range 1025 * starts. 1026 */ 1027 state = tree_search_for_insert(tree, start, &p, &parent); 1028 if (!state) { 1029 prealloc = alloc_extent_state_atomic(prealloc); 1030 if (!prealloc) 1031 goto search_again; 1032 prealloc->start = start; 1033 prealloc->end = end; 1034 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1035 cache_state(prealloc, cached_state); 1036 prealloc = NULL; 1037 goto out; 1038 } 1039 hit_next: 1040 last_start = state->start; 1041 last_end = state->end; 1042 1043 /* 1044 * | ---- desired range ---- | 1045 * | state | 1046 * 1047 * Just lock what we found and keep going 1048 */ 1049 if (state->start == start && state->end <= end) { 1050 if (state->state & exclusive_bits) { 1051 *failed_start = state->start; 1052 cache_state(state, failed_state); 1053 err = -EEXIST; 1054 goto out; 1055 } 1056 1057 set_state_bits(tree, state, bits, changeset); 1058 cache_state(state, cached_state); 1059 merge_state(tree, state); 1060 if (last_end == (u64)-1) 1061 goto out; 1062 start = last_end + 1; 1063 state = next_state(state); 1064 if (start < end && state && state->start == start && 1065 !need_resched()) 1066 goto hit_next; 1067 goto search_again; 1068 } 1069 1070 /* 1071 * | ---- desired range ---- | 1072 * | state | 1073 * or 1074 * | ------------- state -------------- | 1075 * 1076 * We need to split the extent we found, and may flip bits on second 1077 * half. 1078 * 1079 * If the extent we found extends past our range, we just split and 1080 * search again. It'll get split again the next time though. 1081 * 1082 * If the extent we found is inside our range, we set the desired bit 1083 * on it. 1084 */ 1085 if (state->start < start) { 1086 if (state->state & exclusive_bits) { 1087 *failed_start = start; 1088 cache_state(state, failed_state); 1089 err = -EEXIST; 1090 goto out; 1091 } 1092 1093 /* 1094 * If this extent already has all the bits we want set, then 1095 * skip it, not necessary to split it or do anything with it. 1096 */ 1097 if ((state->state & bits) == bits) { 1098 start = state->end + 1; 1099 cache_state(state, cached_state); 1100 goto search_again; 1101 } 1102 1103 prealloc = alloc_extent_state_atomic(prealloc); 1104 if (!prealloc) 1105 goto search_again; 1106 err = split_state(tree, state, prealloc, start); 1107 if (err) 1108 extent_io_tree_panic(tree, err); 1109 1110 prealloc = NULL; 1111 if (err) 1112 goto out; 1113 if (state->end <= end) { 1114 set_state_bits(tree, state, bits, changeset); 1115 cache_state(state, cached_state); 1116 merge_state(tree, state); 1117 if (last_end == (u64)-1) 1118 goto out; 1119 start = last_end + 1; 1120 state = next_state(state); 1121 if (start < end && state && state->start == start && 1122 !need_resched()) 1123 goto hit_next; 1124 } 1125 goto search_again; 1126 } 1127 /* 1128 * | ---- desired range ---- | 1129 * | state | or | state | 1130 * 1131 * There's a hole, we need to insert something in it and ignore the 1132 * extent we found. 1133 */ 1134 if (state->start > start) { 1135 u64 this_end; 1136 if (end < last_start) 1137 this_end = end; 1138 else 1139 this_end = last_start - 1; 1140 1141 prealloc = alloc_extent_state_atomic(prealloc); 1142 if (!prealloc) 1143 goto search_again; 1144 1145 /* 1146 * Avoid to free 'prealloc' if it can be merged with the later 1147 * extent. 1148 */ 1149 prealloc->start = start; 1150 prealloc->end = this_end; 1151 err = insert_state(tree, prealloc, bits, changeset); 1152 if (err) 1153 extent_io_tree_panic(tree, err); 1154 1155 cache_state(prealloc, cached_state); 1156 prealloc = NULL; 1157 start = this_end + 1; 1158 goto search_again; 1159 } 1160 /* 1161 * | ---- desired range ---- | 1162 * | state | 1163 * 1164 * We need to split the extent, and set the bit on the first half 1165 */ 1166 if (state->start <= end && state->end > end) { 1167 if (state->state & exclusive_bits) { 1168 *failed_start = start; 1169 cache_state(state, failed_state); 1170 err = -EEXIST; 1171 goto out; 1172 } 1173 1174 prealloc = alloc_extent_state_atomic(prealloc); 1175 if (!prealloc) 1176 goto search_again; 1177 err = split_state(tree, state, prealloc, end + 1); 1178 if (err) 1179 extent_io_tree_panic(tree, err); 1180 1181 set_state_bits(tree, prealloc, bits, changeset); 1182 cache_state(prealloc, cached_state); 1183 merge_state(tree, prealloc); 1184 prealloc = NULL; 1185 goto out; 1186 } 1187 1188 search_again: 1189 if (start > end) 1190 goto out; 1191 spin_unlock(&tree->lock); 1192 if (gfpflags_allow_blocking(mask)) 1193 cond_resched(); 1194 goto again; 1195 1196 out: 1197 spin_unlock(&tree->lock); 1198 if (prealloc) 1199 free_extent_state(prealloc); 1200 1201 return err; 1202 1203 } 1204 1205 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1206 u32 bits, struct extent_state **cached_state) 1207 { 1208 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1209 cached_state, NULL); 1210 } 1211 1212 /* 1213 * Convert all bits in a given range from one bit to another 1214 * 1215 * @tree: the io tree to search 1216 * @start: the start offset in bytes 1217 * @end: the end offset in bytes (inclusive) 1218 * @bits: the bits to set in this range 1219 * @clear_bits: the bits to clear in this range 1220 * @cached_state: state that we're going to cache 1221 * 1222 * This will go through and set bits for the given range. If any states exist 1223 * already in this range they are set with the given bit and cleared of the 1224 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1225 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1226 * boundary bits like LOCK. 1227 * 1228 * All allocations are done with GFP_NOFS. 1229 */ 1230 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1231 u32 bits, u32 clear_bits, 1232 struct extent_state **cached_state) 1233 { 1234 struct extent_state *state; 1235 struct extent_state *prealloc = NULL; 1236 struct rb_node **p = NULL; 1237 struct rb_node *parent = NULL; 1238 int err = 0; 1239 u64 last_start; 1240 u64 last_end; 1241 bool first_iteration = true; 1242 1243 btrfs_debug_check_extent_io_range(tree, start, end); 1244 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1245 clear_bits); 1246 1247 again: 1248 if (!prealloc) { 1249 /* 1250 * Best effort, don't worry if extent state allocation fails 1251 * here for the first iteration. We might have a cached state 1252 * that matches exactly the target range, in which case no 1253 * extent state allocations are needed. We'll only know this 1254 * after locking the tree. 1255 */ 1256 prealloc = alloc_extent_state(GFP_NOFS); 1257 if (!prealloc && !first_iteration) 1258 return -ENOMEM; 1259 } 1260 1261 spin_lock(&tree->lock); 1262 if (cached_state && *cached_state) { 1263 state = *cached_state; 1264 if (state->start <= start && state->end > start && 1265 extent_state_in_tree(state)) 1266 goto hit_next; 1267 } 1268 1269 /* 1270 * This search will find all the extents that end after our range 1271 * starts. 1272 */ 1273 state = tree_search_for_insert(tree, start, &p, &parent); 1274 if (!state) { 1275 prealloc = alloc_extent_state_atomic(prealloc); 1276 if (!prealloc) { 1277 err = -ENOMEM; 1278 goto out; 1279 } 1280 prealloc->start = start; 1281 prealloc->end = end; 1282 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1283 cache_state(prealloc, cached_state); 1284 prealloc = NULL; 1285 goto out; 1286 } 1287 hit_next: 1288 last_start = state->start; 1289 last_end = state->end; 1290 1291 /* 1292 * | ---- desired range ---- | 1293 * | state | 1294 * 1295 * Just lock what we found and keep going. 1296 */ 1297 if (state->start == start && state->end <= end) { 1298 set_state_bits(tree, state, bits, NULL); 1299 cache_state(state, cached_state); 1300 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1301 if (last_end == (u64)-1) 1302 goto out; 1303 start = last_end + 1; 1304 if (start < end && state && state->start == start && 1305 !need_resched()) 1306 goto hit_next; 1307 goto search_again; 1308 } 1309 1310 /* 1311 * | ---- desired range ---- | 1312 * | state | 1313 * or 1314 * | ------------- state -------------- | 1315 * 1316 * We need to split the extent we found, and may flip bits on second 1317 * half. 1318 * 1319 * If the extent we found extends past our range, we just split and 1320 * search again. It'll get split again the next time though. 1321 * 1322 * If the extent we found is inside our range, we set the desired bit 1323 * on it. 1324 */ 1325 if (state->start < start) { 1326 prealloc = alloc_extent_state_atomic(prealloc); 1327 if (!prealloc) { 1328 err = -ENOMEM; 1329 goto out; 1330 } 1331 err = split_state(tree, state, prealloc, start); 1332 if (err) 1333 extent_io_tree_panic(tree, err); 1334 prealloc = NULL; 1335 if (err) 1336 goto out; 1337 if (state->end <= end) { 1338 set_state_bits(tree, state, bits, NULL); 1339 cache_state(state, cached_state); 1340 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1341 if (last_end == (u64)-1) 1342 goto out; 1343 start = last_end + 1; 1344 if (start < end && state && state->start == start && 1345 !need_resched()) 1346 goto hit_next; 1347 } 1348 goto search_again; 1349 } 1350 /* 1351 * | ---- desired range ---- | 1352 * | state | or | state | 1353 * 1354 * There's a hole, we need to insert something in it and ignore the 1355 * extent we found. 1356 */ 1357 if (state->start > start) { 1358 u64 this_end; 1359 if (end < last_start) 1360 this_end = end; 1361 else 1362 this_end = last_start - 1; 1363 1364 prealloc = alloc_extent_state_atomic(prealloc); 1365 if (!prealloc) { 1366 err = -ENOMEM; 1367 goto out; 1368 } 1369 1370 /* 1371 * Avoid to free 'prealloc' if it can be merged with the later 1372 * extent. 1373 */ 1374 prealloc->start = start; 1375 prealloc->end = this_end; 1376 err = insert_state(tree, prealloc, bits, NULL); 1377 if (err) 1378 extent_io_tree_panic(tree, err); 1379 cache_state(prealloc, cached_state); 1380 prealloc = NULL; 1381 start = this_end + 1; 1382 goto search_again; 1383 } 1384 /* 1385 * | ---- desired range ---- | 1386 * | state | 1387 * 1388 * We need to split the extent, and set the bit on the first half. 1389 */ 1390 if (state->start <= end && state->end > end) { 1391 prealloc = alloc_extent_state_atomic(prealloc); 1392 if (!prealloc) { 1393 err = -ENOMEM; 1394 goto out; 1395 } 1396 1397 err = split_state(tree, state, prealloc, end + 1); 1398 if (err) 1399 extent_io_tree_panic(tree, err); 1400 1401 set_state_bits(tree, prealloc, bits, NULL); 1402 cache_state(prealloc, cached_state); 1403 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1404 prealloc = NULL; 1405 goto out; 1406 } 1407 1408 search_again: 1409 if (start > end) 1410 goto out; 1411 spin_unlock(&tree->lock); 1412 cond_resched(); 1413 first_iteration = false; 1414 goto again; 1415 1416 out: 1417 spin_unlock(&tree->lock); 1418 if (prealloc) 1419 free_extent_state(prealloc); 1420 1421 return err; 1422 } 1423 1424 /* 1425 * Find the first range that has @bits not set. This range could start before 1426 * @start. 1427 * 1428 * @tree: the tree to search 1429 * @start: offset at/after which the found extent should start 1430 * @start_ret: records the beginning of the range 1431 * @end_ret: records the end of the range (inclusive) 1432 * @bits: the set of bits which must be unset 1433 * 1434 * Since unallocated range is also considered one which doesn't have the bits 1435 * set it's possible that @end_ret contains -1, this happens in case the range 1436 * spans (last_range_end, end of device]. In this case it's up to the caller to 1437 * trim @end_ret to the appropriate size. 1438 */ 1439 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1440 u64 *start_ret, u64 *end_ret, u32 bits) 1441 { 1442 struct extent_state *state; 1443 struct extent_state *prev = NULL, *next = NULL; 1444 1445 spin_lock(&tree->lock); 1446 1447 /* Find first extent with bits cleared */ 1448 while (1) { 1449 state = tree_search_prev_next(tree, start, &prev, &next); 1450 if (!state && !next && !prev) { 1451 /* 1452 * Tree is completely empty, send full range and let 1453 * caller deal with it 1454 */ 1455 *start_ret = 0; 1456 *end_ret = -1; 1457 goto out; 1458 } else if (!state && !next) { 1459 /* 1460 * We are past the last allocated chunk, set start at 1461 * the end of the last extent. 1462 */ 1463 *start_ret = prev->end + 1; 1464 *end_ret = -1; 1465 goto out; 1466 } else if (!state) { 1467 state = next; 1468 } 1469 1470 /* 1471 * At this point 'state' either contains 'start' or start is 1472 * before 'state' 1473 */ 1474 if (in_range(start, state->start, state->end - state->start + 1)) { 1475 if (state->state & bits) { 1476 /* 1477 * |--range with bits sets--| 1478 * | 1479 * start 1480 */ 1481 start = state->end + 1; 1482 } else { 1483 /* 1484 * 'start' falls within a range that doesn't 1485 * have the bits set, so take its start as the 1486 * beginning of the desired range 1487 * 1488 * |--range with bits cleared----| 1489 * | 1490 * start 1491 */ 1492 *start_ret = state->start; 1493 break; 1494 } 1495 } else { 1496 /* 1497 * |---prev range---|---hole/unset---|---node range---| 1498 * | 1499 * start 1500 * 1501 * or 1502 * 1503 * |---hole/unset--||--first node--| 1504 * 0 | 1505 * start 1506 */ 1507 if (prev) 1508 *start_ret = prev->end + 1; 1509 else 1510 *start_ret = 0; 1511 break; 1512 } 1513 } 1514 1515 /* 1516 * Find the longest stretch from start until an entry which has the 1517 * bits set 1518 */ 1519 while (state) { 1520 if (state->end >= start && !(state->state & bits)) { 1521 *end_ret = state->end; 1522 } else { 1523 *end_ret = state->start - 1; 1524 break; 1525 } 1526 state = next_state(state); 1527 } 1528 out: 1529 spin_unlock(&tree->lock); 1530 } 1531 1532 /* 1533 * Count the number of bytes in the tree that have a given bit(s) set for a 1534 * given range. 1535 * 1536 * @tree: The io tree to search. 1537 * @start: The start offset of the range. This value is updated to the 1538 * offset of the first byte found with the given bit(s), so it 1539 * can end up being bigger than the initial value. 1540 * @search_end: The end offset (inclusive value) of the search range. 1541 * @max_bytes: The maximum byte count we are interested. The search stops 1542 * once it reaches this count. 1543 * @bits: The bits the range must have in order to be accounted for. 1544 * If multiple bits are set, then only subranges that have all 1545 * the bits set are accounted for. 1546 * @contig: Indicate if we should ignore holes in the range or not. If 1547 * this is true, then stop once we find a hole. 1548 * @cached_state: A cached state to be used across multiple calls to this 1549 * function in order to speedup searches. Use NULL if this is 1550 * called only once or if each call does not start where the 1551 * previous one ended. 1552 * 1553 * Returns the total number of bytes found within the given range that have 1554 * all given bits set. If the returned number of bytes is greater than zero 1555 * then @start is updated with the offset of the first byte with the bits set. 1556 */ 1557 u64 count_range_bits(struct extent_io_tree *tree, 1558 u64 *start, u64 search_end, u64 max_bytes, 1559 u32 bits, int contig, 1560 struct extent_state **cached_state) 1561 { 1562 struct extent_state *state = NULL; 1563 struct extent_state *cached; 1564 u64 cur_start = *start; 1565 u64 total_bytes = 0; 1566 u64 last = 0; 1567 int found = 0; 1568 1569 if (WARN_ON(search_end < cur_start)) 1570 return 0; 1571 1572 spin_lock(&tree->lock); 1573 1574 if (!cached_state || !*cached_state) 1575 goto search; 1576 1577 cached = *cached_state; 1578 1579 if (!extent_state_in_tree(cached)) 1580 goto search; 1581 1582 if (cached->start <= cur_start && cur_start <= cached->end) { 1583 state = cached; 1584 } else if (cached->start > cur_start) { 1585 struct extent_state *prev; 1586 1587 /* 1588 * The cached state starts after our search range's start. Check 1589 * if the previous state record starts at or before the range we 1590 * are looking for, and if so, use it - this is a common case 1591 * when there are holes between records in the tree. If there is 1592 * no previous state record, we can start from our cached state. 1593 */ 1594 prev = prev_state(cached); 1595 if (!prev) 1596 state = cached; 1597 else if (prev->start <= cur_start && cur_start <= prev->end) 1598 state = prev; 1599 } 1600 1601 /* 1602 * This search will find all the extents that end after our range 1603 * starts. 1604 */ 1605 search: 1606 if (!state) 1607 state = tree_search(tree, cur_start); 1608 1609 while (state) { 1610 if (state->start > search_end) 1611 break; 1612 if (contig && found && state->start > last + 1) 1613 break; 1614 if (state->end >= cur_start && (state->state & bits) == bits) { 1615 total_bytes += min(search_end, state->end) + 1 - 1616 max(cur_start, state->start); 1617 if (total_bytes >= max_bytes) 1618 break; 1619 if (!found) { 1620 *start = max(cur_start, state->start); 1621 found = 1; 1622 } 1623 last = state->end; 1624 } else if (contig && found) { 1625 break; 1626 } 1627 state = next_state(state); 1628 } 1629 1630 if (cached_state) { 1631 free_extent_state(*cached_state); 1632 *cached_state = state; 1633 if (state) 1634 refcount_inc(&state->refs); 1635 } 1636 1637 spin_unlock(&tree->lock); 1638 1639 return total_bytes; 1640 } 1641 1642 /* 1643 * Search a range in the state tree for a given mask. If 'filled' == 1, this 1644 * returns 1 only if every extent in the tree has the bits set. Otherwise, 1 1645 * is returned if any bit in the range is found set. 1646 */ 1647 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1648 u32 bits, int filled, struct extent_state *cached) 1649 { 1650 struct extent_state *state = NULL; 1651 int bitset = 0; 1652 1653 spin_lock(&tree->lock); 1654 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1655 cached->end > start) 1656 state = cached; 1657 else 1658 state = tree_search(tree, start); 1659 while (state && start <= end) { 1660 if (filled && state->start > start) { 1661 bitset = 0; 1662 break; 1663 } 1664 1665 if (state->start > end) 1666 break; 1667 1668 if (state->state & bits) { 1669 bitset = 1; 1670 if (!filled) 1671 break; 1672 } else if (filled) { 1673 bitset = 0; 1674 break; 1675 } 1676 1677 if (state->end == (u64)-1) 1678 break; 1679 1680 start = state->end + 1; 1681 if (start > end) 1682 break; 1683 state = next_state(state); 1684 } 1685 1686 /* We ran out of states and were still inside of our range. */ 1687 if (filled && !state) 1688 bitset = 0; 1689 spin_unlock(&tree->lock); 1690 return bitset; 1691 } 1692 1693 /* Wrappers around set/clear extent bit */ 1694 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1695 u32 bits, struct extent_changeset *changeset) 1696 { 1697 /* 1698 * We don't support EXTENT_LOCKED yet, as current changeset will 1699 * record any bits changed, so for EXTENT_LOCKED case, it will 1700 * either fail with -EEXIST or changeset will record the whole 1701 * range. 1702 */ 1703 ASSERT(!(bits & EXTENT_LOCKED)); 1704 1705 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1706 } 1707 1708 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1709 u32 bits, struct extent_changeset *changeset) 1710 { 1711 /* 1712 * Don't support EXTENT_LOCKED case, same reason as 1713 * set_record_extent_bits(). 1714 */ 1715 ASSERT(!(bits & EXTENT_LOCKED)); 1716 1717 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1718 } 1719 1720 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1721 struct extent_state **cached) 1722 { 1723 int err; 1724 u64 failed_start; 1725 1726 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1727 NULL, cached, NULL); 1728 if (err == -EEXIST) { 1729 if (failed_start > start) 1730 clear_extent_bit(tree, start, failed_start - 1, 1731 EXTENT_LOCKED, cached); 1732 return 0; 1733 } 1734 return 1; 1735 } 1736 1737 /* 1738 * Either insert or lock state struct between start and end use mask to tell 1739 * us if waiting is desired. 1740 */ 1741 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1742 struct extent_state **cached_state) 1743 { 1744 struct extent_state *failed_state = NULL; 1745 int err; 1746 u64 failed_start; 1747 1748 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1749 &failed_state, cached_state, NULL); 1750 while (err == -EEXIST) { 1751 if (failed_start != start) 1752 clear_extent_bit(tree, start, failed_start - 1, 1753 EXTENT_LOCKED, cached_state); 1754 1755 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1756 &failed_state); 1757 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1758 &failed_start, &failed_state, 1759 cached_state, NULL); 1760 } 1761 return err; 1762 } 1763 1764 void __cold extent_state_free_cachep(void) 1765 { 1766 btrfs_extent_state_leak_debug_check(); 1767 kmem_cache_destroy(extent_state_cache); 1768 } 1769 1770 int __init extent_state_init_cachep(void) 1771 { 1772 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1773 sizeof(struct extent_state), 0, 1774 SLAB_MEM_SPREAD, NULL); 1775 if (!extent_state_cache) 1776 return -ENOMEM; 1777 1778 return 0; 1779 } 1780