1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent-io-tree.h" 8 #include "btrfs_inode.h" 9 #include "misc.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 kmem_cache_free(extent_state_cache, state); 52 } 53 } 54 55 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 57 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 58 struct extent_io_tree *tree, 59 u64 start, u64 end) 60 { 61 struct btrfs_inode *inode = tree->inode; 62 u64 isize; 63 64 if (!inode) 65 return; 66 67 isize = i_size_read(&inode->vfs_inode); 68 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 69 btrfs_debug_rl(inode->root->fs_info, 70 "%s: ino %llu isize %llu odd range [%llu,%llu]", 71 caller, btrfs_ino(inode), isize, start, end); 72 } 73 } 74 #else 75 #define btrfs_leak_debug_add_state(state) do {} while (0) 76 #define btrfs_leak_debug_del_state(state) do {} while (0) 77 #define btrfs_extent_state_leak_debug_check() do {} while (0) 78 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 79 #endif 80 81 /* 82 * For the file_extent_tree, we want to hold the inode lock when we lookup and 83 * update the disk_i_size, but lockdep will complain because our io_tree we hold 84 * the tree lock and get the inode lock when setting delalloc. These two things 85 * are unrelated, so make a class for the file_extent_tree so we don't get the 86 * two locking patterns mixed up. 87 */ 88 static struct lock_class_key file_extent_tree_class; 89 90 struct tree_entry { 91 u64 start; 92 u64 end; 93 struct rb_node rb_node; 94 }; 95 96 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 97 struct extent_io_tree *tree, unsigned int owner) 98 { 99 tree->fs_info = fs_info; 100 tree->state = RB_ROOT; 101 spin_lock_init(&tree->lock); 102 tree->inode = NULL; 103 tree->owner = owner; 104 if (owner == IO_TREE_INODE_FILE_EXTENT) 105 lockdep_set_class(&tree->lock, &file_extent_tree_class); 106 } 107 108 void extent_io_tree_release(struct extent_io_tree *tree) 109 { 110 spin_lock(&tree->lock); 111 /* 112 * Do a single barrier for the waitqueue_active check here, the state 113 * of the waitqueue should not change once extent_io_tree_release is 114 * called. 115 */ 116 smp_mb(); 117 while (!RB_EMPTY_ROOT(&tree->state)) { 118 struct rb_node *node; 119 struct extent_state *state; 120 121 node = rb_first(&tree->state); 122 state = rb_entry(node, struct extent_state, rb_node); 123 rb_erase(&state->rb_node, &tree->state); 124 RB_CLEAR_NODE(&state->rb_node); 125 /* 126 * btree io trees aren't supposed to have tasks waiting for 127 * changes in the flags of extent states ever. 128 */ 129 ASSERT(!waitqueue_active(&state->wq)); 130 free_extent_state(state); 131 132 cond_resched_lock(&tree->lock); 133 } 134 spin_unlock(&tree->lock); 135 } 136 137 static struct extent_state *alloc_extent_state(gfp_t mask) 138 { 139 struct extent_state *state; 140 141 /* 142 * The given mask might be not appropriate for the slab allocator, 143 * drop the unsupported bits 144 */ 145 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 146 state = kmem_cache_alloc(extent_state_cache, mask); 147 if (!state) 148 return state; 149 state->state = 0; 150 RB_CLEAR_NODE(&state->rb_node); 151 btrfs_leak_debug_add_state(state); 152 refcount_set(&state->refs, 1); 153 init_waitqueue_head(&state->wq); 154 trace_alloc_extent_state(state, mask, _RET_IP_); 155 return state; 156 } 157 158 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 159 { 160 if (!prealloc) 161 prealloc = alloc_extent_state(GFP_ATOMIC); 162 163 return prealloc; 164 } 165 166 void free_extent_state(struct extent_state *state) 167 { 168 if (!state) 169 return; 170 if (refcount_dec_and_test(&state->refs)) { 171 WARN_ON(extent_state_in_tree(state)); 172 btrfs_leak_debug_del_state(state); 173 trace_free_extent_state(state, _RET_IP_); 174 kmem_cache_free(extent_state_cache, state); 175 } 176 } 177 178 static int add_extent_changeset(struct extent_state *state, u32 bits, 179 struct extent_changeset *changeset, 180 int set) 181 { 182 int ret; 183 184 if (!changeset) 185 return 0; 186 if (set && (state->state & bits) == bits) 187 return 0; 188 if (!set && (state->state & bits) == 0) 189 return 0; 190 changeset->bytes_changed += state->end - state->start + 1; 191 ret = ulist_add(&changeset->range_changed, state->start, state->end, 192 GFP_ATOMIC); 193 return ret; 194 } 195 196 static inline struct extent_state *next_state(struct extent_state *state) 197 { 198 struct rb_node *next = rb_next(&state->rb_node); 199 200 if (next) 201 return rb_entry(next, struct extent_state, rb_node); 202 else 203 return NULL; 204 } 205 206 static inline struct extent_state *prev_state(struct extent_state *state) 207 { 208 struct rb_node *next = rb_prev(&state->rb_node); 209 210 if (next) 211 return rb_entry(next, struct extent_state, rb_node); 212 else 213 return NULL; 214 } 215 216 /* 217 * Search @tree for an entry that contains @offset. Such entry would have 218 * entry->start <= offset && entry->end >= offset. 219 * 220 * @tree: the tree to search 221 * @offset: offset that should fall within an entry in @tree 222 * @node_ret: pointer where new node should be anchored (used when inserting an 223 * entry in the tree) 224 * @parent_ret: points to entry which would have been the parent of the entry, 225 * containing @offset 226 * 227 * Return a pointer to the entry that contains @offset byte address and don't change 228 * @node_ret and @parent_ret. 229 * 230 * If no such entry exists, return pointer to entry that ends before @offset 231 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 232 */ 233 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 234 u64 offset, 235 struct rb_node ***node_ret, 236 struct rb_node **parent_ret) 237 { 238 struct rb_root *root = &tree->state; 239 struct rb_node **node = &root->rb_node; 240 struct rb_node *prev = NULL; 241 struct extent_state *entry = NULL; 242 243 while (*node) { 244 prev = *node; 245 entry = rb_entry(prev, struct extent_state, rb_node); 246 247 if (offset < entry->start) 248 node = &(*node)->rb_left; 249 else if (offset > entry->end) 250 node = &(*node)->rb_right; 251 else 252 return entry; 253 } 254 255 if (node_ret) 256 *node_ret = node; 257 if (parent_ret) 258 *parent_ret = prev; 259 260 /* Search neighbors until we find the first one past the end */ 261 while (entry && offset > entry->end) 262 entry = next_state(entry); 263 264 return entry; 265 } 266 267 /* 268 * Search offset in the tree or fill neighbor rbtree node pointers. 269 * 270 * @tree: the tree to search 271 * @offset: offset that should fall within an entry in @tree 272 * @next_ret: pointer to the first entry whose range ends after @offset 273 * @prev_ret: pointer to the first entry whose range begins before @offset 274 * 275 * Return a pointer to the entry that contains @offset byte address. If no 276 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 277 * Otherwise return the found entry and other pointers are left untouched. 278 */ 279 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 280 u64 offset, 281 struct extent_state **prev_ret, 282 struct extent_state **next_ret) 283 { 284 struct rb_root *root = &tree->state; 285 struct rb_node **node = &root->rb_node; 286 struct extent_state *orig_prev; 287 struct extent_state *entry = NULL; 288 289 ASSERT(prev_ret); 290 ASSERT(next_ret); 291 292 while (*node) { 293 entry = rb_entry(*node, struct extent_state, rb_node); 294 295 if (offset < entry->start) 296 node = &(*node)->rb_left; 297 else if (offset > entry->end) 298 node = &(*node)->rb_right; 299 else 300 return entry; 301 } 302 303 orig_prev = entry; 304 while (entry && offset > entry->end) 305 entry = next_state(entry); 306 *next_ret = entry; 307 entry = orig_prev; 308 309 while (entry && offset < entry->start) 310 entry = prev_state(entry); 311 *prev_ret = entry; 312 313 return NULL; 314 } 315 316 /* 317 * Inexact rb-tree search, return the next entry if @offset is not found 318 */ 319 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 320 { 321 return tree_search_for_insert(tree, offset, NULL, NULL); 322 } 323 324 static void extent_io_tree_panic(struct extent_io_tree *tree, int err) 325 { 326 btrfs_panic(tree->fs_info, err, 327 "locking error: extent tree was modified by another thread while locked"); 328 } 329 330 /* 331 * Utility function to look for merge candidates inside a given range. Any 332 * extents with matching state are merged together into a single extent in the 333 * tree. Extents with EXTENT_IO in their state field are not merged because 334 * the end_io handlers need to be able to do operations on them without 335 * sleeping (or doing allocations/splits). 336 * 337 * This should be called with the tree lock held. 338 */ 339 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 340 { 341 struct extent_state *other; 342 343 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 344 return; 345 346 other = prev_state(state); 347 if (other && other->end == state->start - 1 && 348 other->state == state->state) { 349 if (tree->inode) 350 btrfs_merge_delalloc_extent(tree->inode, state, other); 351 state->start = other->start; 352 rb_erase(&other->rb_node, &tree->state); 353 RB_CLEAR_NODE(&other->rb_node); 354 free_extent_state(other); 355 } 356 other = next_state(state); 357 if (other && other->start == state->end + 1 && 358 other->state == state->state) { 359 if (tree->inode) 360 btrfs_merge_delalloc_extent(tree->inode, state, other); 361 state->end = other->end; 362 rb_erase(&other->rb_node, &tree->state); 363 RB_CLEAR_NODE(&other->rb_node); 364 free_extent_state(other); 365 } 366 } 367 368 static void set_state_bits(struct extent_io_tree *tree, 369 struct extent_state *state, 370 u32 bits, struct extent_changeset *changeset) 371 { 372 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 373 int ret; 374 375 if (tree->inode) 376 btrfs_set_delalloc_extent(tree->inode, state, bits); 377 378 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 379 BUG_ON(ret < 0); 380 state->state |= bits_to_set; 381 } 382 383 /* 384 * Insert an extent_state struct into the tree. 'bits' are set on the 385 * struct before it is inserted. 386 * 387 * This may return -EEXIST if the extent is already there, in which case the 388 * state struct is freed. 389 * 390 * The tree lock is not taken internally. This is a utility function and 391 * probably isn't what you want to call (see set/clear_extent_bit). 392 */ 393 static int insert_state(struct extent_io_tree *tree, 394 struct extent_state *state, 395 u32 bits, struct extent_changeset *changeset) 396 { 397 struct rb_node **node; 398 struct rb_node *parent = NULL; 399 const u64 end = state->end; 400 401 set_state_bits(tree, state, bits, changeset); 402 403 node = &tree->state.rb_node; 404 while (*node) { 405 struct extent_state *entry; 406 407 parent = *node; 408 entry = rb_entry(parent, struct extent_state, rb_node); 409 410 if (end < entry->start) { 411 node = &(*node)->rb_left; 412 } else if (end > entry->end) { 413 node = &(*node)->rb_right; 414 } else { 415 btrfs_err(tree->fs_info, 416 "found node %llu %llu on insert of %llu %llu", 417 entry->start, entry->end, state->start, end); 418 return -EEXIST; 419 } 420 } 421 422 rb_link_node(&state->rb_node, parent, node); 423 rb_insert_color(&state->rb_node, &tree->state); 424 425 merge_state(tree, state); 426 return 0; 427 } 428 429 /* 430 * Insert state to @tree to the location given by @node and @parent. 431 */ 432 static void insert_state_fast(struct extent_io_tree *tree, 433 struct extent_state *state, struct rb_node **node, 434 struct rb_node *parent, unsigned bits, 435 struct extent_changeset *changeset) 436 { 437 set_state_bits(tree, state, bits, changeset); 438 rb_link_node(&state->rb_node, parent, node); 439 rb_insert_color(&state->rb_node, &tree->state); 440 merge_state(tree, state); 441 } 442 443 /* 444 * Split a given extent state struct in two, inserting the preallocated 445 * struct 'prealloc' as the newly created second half. 'split' indicates an 446 * offset inside 'orig' where it should be split. 447 * 448 * Before calling, 449 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 450 * are two extent state structs in the tree: 451 * prealloc: [orig->start, split - 1] 452 * orig: [ split, orig->end ] 453 * 454 * The tree locks are not taken by this function. They need to be held 455 * by the caller. 456 */ 457 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 458 struct extent_state *prealloc, u64 split) 459 { 460 struct rb_node *parent = NULL; 461 struct rb_node **node; 462 463 if (tree->inode) 464 btrfs_split_delalloc_extent(tree->inode, orig, split); 465 466 prealloc->start = orig->start; 467 prealloc->end = split - 1; 468 prealloc->state = orig->state; 469 orig->start = split; 470 471 parent = &orig->rb_node; 472 node = &parent; 473 while (*node) { 474 struct extent_state *entry; 475 476 parent = *node; 477 entry = rb_entry(parent, struct extent_state, rb_node); 478 479 if (prealloc->end < entry->start) { 480 node = &(*node)->rb_left; 481 } else if (prealloc->end > entry->end) { 482 node = &(*node)->rb_right; 483 } else { 484 free_extent_state(prealloc); 485 return -EEXIST; 486 } 487 } 488 489 rb_link_node(&prealloc->rb_node, parent, node); 490 rb_insert_color(&prealloc->rb_node, &tree->state); 491 492 return 0; 493 } 494 495 /* 496 * Utility function to clear some bits in an extent state struct. It will 497 * optionally wake up anyone waiting on this state (wake == 1). 498 * 499 * If no bits are set on the state struct after clearing things, the 500 * struct is freed and removed from the tree 501 */ 502 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 503 struct extent_state *state, 504 u32 bits, int wake, 505 struct extent_changeset *changeset) 506 { 507 struct extent_state *next; 508 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 509 int ret; 510 511 if (tree->inode) 512 btrfs_clear_delalloc_extent(tree->inode, state, bits); 513 514 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 515 BUG_ON(ret < 0); 516 state->state &= ~bits_to_clear; 517 if (wake) 518 wake_up(&state->wq); 519 if (state->state == 0) { 520 next = next_state(state); 521 if (extent_state_in_tree(state)) { 522 rb_erase(&state->rb_node, &tree->state); 523 RB_CLEAR_NODE(&state->rb_node); 524 free_extent_state(state); 525 } else { 526 WARN_ON(1); 527 } 528 } else { 529 merge_state(tree, state); 530 next = next_state(state); 531 } 532 return next; 533 } 534 535 /* 536 * Clear some bits on a range in the tree. This may require splitting or 537 * inserting elements in the tree, so the gfp mask is used to indicate which 538 * allocations or sleeping are allowed. 539 * 540 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 541 * range from the tree regardless of state (ie for truncate). 542 * 543 * The range [start, end] is inclusive. 544 * 545 * This takes the tree lock, and returns 0 on success and < 0 on error. 546 */ 547 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 548 u32 bits, struct extent_state **cached_state, 549 gfp_t mask, struct extent_changeset *changeset) 550 { 551 struct extent_state *state; 552 struct extent_state *cached; 553 struct extent_state *prealloc = NULL; 554 u64 last_end; 555 int err; 556 int clear = 0; 557 int wake; 558 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 559 560 btrfs_debug_check_extent_io_range(tree, start, end); 561 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 562 563 if (delete) 564 bits |= ~EXTENT_CTLBITS; 565 566 if (bits & EXTENT_DELALLOC) 567 bits |= EXTENT_NORESERVE; 568 569 wake = (bits & EXTENT_LOCKED) ? 1 : 0; 570 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 571 clear = 1; 572 again: 573 if (!prealloc) { 574 /* 575 * Don't care for allocation failure here because we might end 576 * up not needing the pre-allocated extent state at all, which 577 * is the case if we only have in the tree extent states that 578 * cover our input range and don't cover too any other range. 579 * If we end up needing a new extent state we allocate it later. 580 */ 581 prealloc = alloc_extent_state(mask); 582 } 583 584 spin_lock(&tree->lock); 585 if (cached_state) { 586 cached = *cached_state; 587 588 if (clear) { 589 *cached_state = NULL; 590 cached_state = NULL; 591 } 592 593 if (cached && extent_state_in_tree(cached) && 594 cached->start <= start && cached->end > start) { 595 if (clear) 596 refcount_dec(&cached->refs); 597 state = cached; 598 goto hit_next; 599 } 600 if (clear) 601 free_extent_state(cached); 602 } 603 604 /* This search will find the extents that end after our range starts. */ 605 state = tree_search(tree, start); 606 if (!state) 607 goto out; 608 hit_next: 609 if (state->start > end) 610 goto out; 611 WARN_ON(state->end < start); 612 last_end = state->end; 613 614 /* The state doesn't have the wanted bits, go ahead. */ 615 if (!(state->state & bits)) { 616 state = next_state(state); 617 goto next; 618 } 619 620 /* 621 * | ---- desired range ---- | 622 * | state | or 623 * | ------------- state -------------- | 624 * 625 * We need to split the extent we found, and may flip bits on second 626 * half. 627 * 628 * If the extent we found extends past our range, we just split and 629 * search again. It'll get split again the next time though. 630 * 631 * If the extent we found is inside our range, we clear the desired bit 632 * on it. 633 */ 634 635 if (state->start < start) { 636 prealloc = alloc_extent_state_atomic(prealloc); 637 if (!prealloc) 638 goto search_again; 639 err = split_state(tree, state, prealloc, start); 640 if (err) 641 extent_io_tree_panic(tree, err); 642 643 prealloc = NULL; 644 if (err) 645 goto out; 646 if (state->end <= end) { 647 state = clear_state_bit(tree, state, bits, wake, changeset); 648 goto next; 649 } 650 goto search_again; 651 } 652 /* 653 * | ---- desired range ---- | 654 * | state | 655 * We need to split the extent, and clear the bit on the first half. 656 */ 657 if (state->start <= end && state->end > end) { 658 prealloc = alloc_extent_state_atomic(prealloc); 659 if (!prealloc) 660 goto search_again; 661 err = split_state(tree, state, prealloc, end + 1); 662 if (err) 663 extent_io_tree_panic(tree, err); 664 665 if (wake) 666 wake_up(&state->wq); 667 668 clear_state_bit(tree, prealloc, bits, wake, changeset); 669 670 prealloc = NULL; 671 goto out; 672 } 673 674 state = clear_state_bit(tree, state, bits, wake, changeset); 675 next: 676 if (last_end == (u64)-1) 677 goto out; 678 start = last_end + 1; 679 if (start <= end && state && !need_resched()) 680 goto hit_next; 681 682 search_again: 683 if (start > end) 684 goto out; 685 spin_unlock(&tree->lock); 686 if (gfpflags_allow_blocking(mask)) 687 cond_resched(); 688 goto again; 689 690 out: 691 spin_unlock(&tree->lock); 692 if (prealloc) 693 free_extent_state(prealloc); 694 695 return 0; 696 697 } 698 699 static void wait_on_state(struct extent_io_tree *tree, 700 struct extent_state *state) 701 __releases(tree->lock) 702 __acquires(tree->lock) 703 { 704 DEFINE_WAIT(wait); 705 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 706 spin_unlock(&tree->lock); 707 schedule(); 708 spin_lock(&tree->lock); 709 finish_wait(&state->wq, &wait); 710 } 711 712 /* 713 * Wait for one or more bits to clear on a range in the state tree. 714 * The range [start, end] is inclusive. 715 * The tree lock is taken by this function 716 */ 717 void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 718 struct extent_state **cached_state) 719 { 720 struct extent_state *state; 721 722 btrfs_debug_check_extent_io_range(tree, start, end); 723 724 spin_lock(&tree->lock); 725 again: 726 /* 727 * Maintain cached_state, as we may not remove it from the tree if there 728 * are more bits than the bits we're waiting on set on this state. 729 */ 730 if (cached_state && *cached_state) { 731 state = *cached_state; 732 if (extent_state_in_tree(state) && 733 state->start <= start && start < state->end) 734 goto process_node; 735 } 736 while (1) { 737 /* 738 * This search will find all the extents that end after our 739 * range starts. 740 */ 741 state = tree_search(tree, start); 742 process_node: 743 if (!state) 744 break; 745 if (state->start > end) 746 goto out; 747 748 if (state->state & bits) { 749 start = state->start; 750 refcount_inc(&state->refs); 751 wait_on_state(tree, state); 752 free_extent_state(state); 753 goto again; 754 } 755 start = state->end + 1; 756 757 if (start > end) 758 break; 759 760 if (!cond_resched_lock(&tree->lock)) { 761 state = next_state(state); 762 goto process_node; 763 } 764 } 765 out: 766 /* This state is no longer useful, clear it and free it up. */ 767 if (cached_state && *cached_state) { 768 state = *cached_state; 769 *cached_state = NULL; 770 free_extent_state(state); 771 } 772 spin_unlock(&tree->lock); 773 } 774 775 static void cache_state_if_flags(struct extent_state *state, 776 struct extent_state **cached_ptr, 777 unsigned flags) 778 { 779 if (cached_ptr && !(*cached_ptr)) { 780 if (!flags || (state->state & flags)) { 781 *cached_ptr = state; 782 refcount_inc(&state->refs); 783 } 784 } 785 } 786 787 static void cache_state(struct extent_state *state, 788 struct extent_state **cached_ptr) 789 { 790 return cache_state_if_flags(state, cached_ptr, 791 EXTENT_LOCKED | EXTENT_BOUNDARY); 792 } 793 794 /* 795 * Find the first state struct with 'bits' set after 'start', and return it. 796 * tree->lock must be held. NULL will returned if nothing was found after 797 * 'start'. 798 */ 799 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 800 u64 start, u32 bits) 801 { 802 struct extent_state *state; 803 804 /* 805 * This search will find all the extents that end after our range 806 * starts. 807 */ 808 state = tree_search(tree, start); 809 while (state) { 810 if (state->end >= start && (state->state & bits)) 811 return state; 812 state = next_state(state); 813 } 814 return NULL; 815 } 816 817 /* 818 * Find the first offset in the io tree with one or more @bits set. 819 * 820 * Note: If there are multiple bits set in @bits, any of them will match. 821 * 822 * Return 0 if we find something, and update @start_ret and @end_ret. 823 * Return 1 if we found nothing. 824 */ 825 int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 826 u64 *start_ret, u64 *end_ret, u32 bits, 827 struct extent_state **cached_state) 828 { 829 struct extent_state *state; 830 int ret = 1; 831 832 spin_lock(&tree->lock); 833 if (cached_state && *cached_state) { 834 state = *cached_state; 835 if (state->end == start - 1 && extent_state_in_tree(state)) { 836 while ((state = next_state(state)) != NULL) { 837 if (state->state & bits) 838 goto got_it; 839 } 840 free_extent_state(*cached_state); 841 *cached_state = NULL; 842 goto out; 843 } 844 free_extent_state(*cached_state); 845 *cached_state = NULL; 846 } 847 848 state = find_first_extent_bit_state(tree, start, bits); 849 got_it: 850 if (state) { 851 cache_state_if_flags(state, cached_state, 0); 852 *start_ret = state->start; 853 *end_ret = state->end; 854 ret = 0; 855 } 856 out: 857 spin_unlock(&tree->lock); 858 return ret; 859 } 860 861 /* 862 * Find a contiguous area of bits 863 * 864 * @tree: io tree to check 865 * @start: offset to start the search from 866 * @start_ret: the first offset we found with the bits set 867 * @end_ret: the final contiguous range of the bits that were set 868 * @bits: bits to look for 869 * 870 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 871 * to set bits appropriately, and then merge them again. During this time it 872 * will drop the tree->lock, so use this helper if you want to find the actual 873 * contiguous area for given bits. We will search to the first bit we find, and 874 * then walk down the tree until we find a non-contiguous area. The area 875 * returned will be the full contiguous area with the bits set. 876 */ 877 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 878 u64 *start_ret, u64 *end_ret, u32 bits) 879 { 880 struct extent_state *state; 881 int ret = 1; 882 883 spin_lock(&tree->lock); 884 state = find_first_extent_bit_state(tree, start, bits); 885 if (state) { 886 *start_ret = state->start; 887 *end_ret = state->end; 888 while ((state = next_state(state)) != NULL) { 889 if (state->start > (*end_ret + 1)) 890 break; 891 *end_ret = state->end; 892 } 893 ret = 0; 894 } 895 spin_unlock(&tree->lock); 896 return ret; 897 } 898 899 /* 900 * Find a contiguous range of bytes in the file marked as delalloc, not more 901 * than 'max_bytes'. start and end are used to return the range, 902 * 903 * True is returned if we find something, false if nothing was in the tree. 904 */ 905 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 906 u64 *end, u64 max_bytes, 907 struct extent_state **cached_state) 908 { 909 struct extent_state *state; 910 u64 cur_start = *start; 911 bool found = false; 912 u64 total_bytes = 0; 913 914 spin_lock(&tree->lock); 915 916 /* 917 * This search will find all the extents that end after our range 918 * starts. 919 */ 920 state = tree_search(tree, cur_start); 921 if (!state) { 922 *end = (u64)-1; 923 goto out; 924 } 925 926 while (state) { 927 if (found && (state->start != cur_start || 928 (state->state & EXTENT_BOUNDARY))) { 929 goto out; 930 } 931 if (!(state->state & EXTENT_DELALLOC)) { 932 if (!found) 933 *end = state->end; 934 goto out; 935 } 936 if (!found) { 937 *start = state->start; 938 *cached_state = state; 939 refcount_inc(&state->refs); 940 } 941 found = true; 942 *end = state->end; 943 cur_start = state->end + 1; 944 total_bytes += state->end - state->start + 1; 945 if (total_bytes >= max_bytes) 946 break; 947 state = next_state(state); 948 } 949 out: 950 spin_unlock(&tree->lock); 951 return found; 952 } 953 954 /* 955 * Set some bits on a range in the tree. This may require allocations or 956 * sleeping, so the gfp mask is used to indicate what is allowed. 957 * 958 * If any of the exclusive bits are set, this will fail with -EEXIST if some 959 * part of the range already has the desired bits set. The extent_state of the 960 * existing range is returned in failed_state in this case, and the start of the 961 * existing range is returned in failed_start. failed_state is used as an 962 * optimization for wait_extent_bit, failed_start must be used as the source of 963 * truth as failed_state may have changed since we returned. 964 * 965 * [start, end] is inclusive This takes the tree lock. 966 */ 967 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 968 u32 bits, u64 *failed_start, 969 struct extent_state **failed_state, 970 struct extent_state **cached_state, 971 struct extent_changeset *changeset, gfp_t mask) 972 { 973 struct extent_state *state; 974 struct extent_state *prealloc = NULL; 975 struct rb_node **p = NULL; 976 struct rb_node *parent = NULL; 977 int err = 0; 978 u64 last_start; 979 u64 last_end; 980 u32 exclusive_bits = (bits & EXTENT_LOCKED); 981 982 btrfs_debug_check_extent_io_range(tree, start, end); 983 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 984 985 if (exclusive_bits) 986 ASSERT(failed_start); 987 else 988 ASSERT(failed_start == NULL && failed_state == NULL); 989 again: 990 if (!prealloc) { 991 /* 992 * Don't care for allocation failure here because we might end 993 * up not needing the pre-allocated extent state at all, which 994 * is the case if we only have in the tree extent states that 995 * cover our input range and don't cover too any other range. 996 * If we end up needing a new extent state we allocate it later. 997 */ 998 prealloc = alloc_extent_state(mask); 999 } 1000 1001 spin_lock(&tree->lock); 1002 if (cached_state && *cached_state) { 1003 state = *cached_state; 1004 if (state->start <= start && state->end > start && 1005 extent_state_in_tree(state)) 1006 goto hit_next; 1007 } 1008 /* 1009 * This search will find all the extents that end after our range 1010 * starts. 1011 */ 1012 state = tree_search_for_insert(tree, start, &p, &parent); 1013 if (!state) { 1014 prealloc = alloc_extent_state_atomic(prealloc); 1015 if (!prealloc) 1016 goto search_again; 1017 prealloc->start = start; 1018 prealloc->end = end; 1019 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1020 cache_state(prealloc, cached_state); 1021 prealloc = NULL; 1022 goto out; 1023 } 1024 hit_next: 1025 last_start = state->start; 1026 last_end = state->end; 1027 1028 /* 1029 * | ---- desired range ---- | 1030 * | state | 1031 * 1032 * Just lock what we found and keep going 1033 */ 1034 if (state->start == start && state->end <= end) { 1035 if (state->state & exclusive_bits) { 1036 *failed_start = state->start; 1037 cache_state(state, failed_state); 1038 err = -EEXIST; 1039 goto out; 1040 } 1041 1042 set_state_bits(tree, state, bits, changeset); 1043 cache_state(state, cached_state); 1044 merge_state(tree, state); 1045 if (last_end == (u64)-1) 1046 goto out; 1047 start = last_end + 1; 1048 state = next_state(state); 1049 if (start < end && state && state->start == start && 1050 !need_resched()) 1051 goto hit_next; 1052 goto search_again; 1053 } 1054 1055 /* 1056 * | ---- desired range ---- | 1057 * | state | 1058 * or 1059 * | ------------- state -------------- | 1060 * 1061 * We need to split the extent we found, and may flip bits on second 1062 * half. 1063 * 1064 * If the extent we found extends past our range, we just split and 1065 * search again. It'll get split again the next time though. 1066 * 1067 * If the extent we found is inside our range, we set the desired bit 1068 * on it. 1069 */ 1070 if (state->start < start) { 1071 if (state->state & exclusive_bits) { 1072 *failed_start = start; 1073 cache_state(state, failed_state); 1074 err = -EEXIST; 1075 goto out; 1076 } 1077 1078 /* 1079 * If this extent already has all the bits we want set, then 1080 * skip it, not necessary to split it or do anything with it. 1081 */ 1082 if ((state->state & bits) == bits) { 1083 start = state->end + 1; 1084 cache_state(state, cached_state); 1085 goto search_again; 1086 } 1087 1088 prealloc = alloc_extent_state_atomic(prealloc); 1089 if (!prealloc) 1090 goto search_again; 1091 err = split_state(tree, state, prealloc, start); 1092 if (err) 1093 extent_io_tree_panic(tree, err); 1094 1095 prealloc = NULL; 1096 if (err) 1097 goto out; 1098 if (state->end <= end) { 1099 set_state_bits(tree, state, bits, changeset); 1100 cache_state(state, cached_state); 1101 merge_state(tree, state); 1102 if (last_end == (u64)-1) 1103 goto out; 1104 start = last_end + 1; 1105 state = next_state(state); 1106 if (start < end && state && state->start == start && 1107 !need_resched()) 1108 goto hit_next; 1109 } 1110 goto search_again; 1111 } 1112 /* 1113 * | ---- desired range ---- | 1114 * | state | or | state | 1115 * 1116 * There's a hole, we need to insert something in it and ignore the 1117 * extent we found. 1118 */ 1119 if (state->start > start) { 1120 u64 this_end; 1121 if (end < last_start) 1122 this_end = end; 1123 else 1124 this_end = last_start - 1; 1125 1126 prealloc = alloc_extent_state_atomic(prealloc); 1127 if (!prealloc) 1128 goto search_again; 1129 1130 /* 1131 * Avoid to free 'prealloc' if it can be merged with the later 1132 * extent. 1133 */ 1134 prealloc->start = start; 1135 prealloc->end = this_end; 1136 err = insert_state(tree, prealloc, bits, changeset); 1137 if (err) 1138 extent_io_tree_panic(tree, err); 1139 1140 cache_state(prealloc, cached_state); 1141 prealloc = NULL; 1142 start = this_end + 1; 1143 goto search_again; 1144 } 1145 /* 1146 * | ---- desired range ---- | 1147 * | state | 1148 * 1149 * We need to split the extent, and set the bit on the first half 1150 */ 1151 if (state->start <= end && state->end > end) { 1152 if (state->state & exclusive_bits) { 1153 *failed_start = start; 1154 cache_state(state, failed_state); 1155 err = -EEXIST; 1156 goto out; 1157 } 1158 1159 prealloc = alloc_extent_state_atomic(prealloc); 1160 if (!prealloc) 1161 goto search_again; 1162 err = split_state(tree, state, prealloc, end + 1); 1163 if (err) 1164 extent_io_tree_panic(tree, err); 1165 1166 set_state_bits(tree, prealloc, bits, changeset); 1167 cache_state(prealloc, cached_state); 1168 merge_state(tree, prealloc); 1169 prealloc = NULL; 1170 goto out; 1171 } 1172 1173 search_again: 1174 if (start > end) 1175 goto out; 1176 spin_unlock(&tree->lock); 1177 if (gfpflags_allow_blocking(mask)) 1178 cond_resched(); 1179 goto again; 1180 1181 out: 1182 spin_unlock(&tree->lock); 1183 if (prealloc) 1184 free_extent_state(prealloc); 1185 1186 return err; 1187 1188 } 1189 1190 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1191 u32 bits, struct extent_state **cached_state, gfp_t mask) 1192 { 1193 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1194 cached_state, NULL, mask); 1195 } 1196 1197 /* 1198 * Convert all bits in a given range from one bit to another 1199 * 1200 * @tree: the io tree to search 1201 * @start: the start offset in bytes 1202 * @end: the end offset in bytes (inclusive) 1203 * @bits: the bits to set in this range 1204 * @clear_bits: the bits to clear in this range 1205 * @cached_state: state that we're going to cache 1206 * 1207 * This will go through and set bits for the given range. If any states exist 1208 * already in this range they are set with the given bit and cleared of the 1209 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1210 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1211 * boundary bits like LOCK. 1212 * 1213 * All allocations are done with GFP_NOFS. 1214 */ 1215 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1216 u32 bits, u32 clear_bits, 1217 struct extent_state **cached_state) 1218 { 1219 struct extent_state *state; 1220 struct extent_state *prealloc = NULL; 1221 struct rb_node **p = NULL; 1222 struct rb_node *parent = NULL; 1223 int err = 0; 1224 u64 last_start; 1225 u64 last_end; 1226 bool first_iteration = true; 1227 1228 btrfs_debug_check_extent_io_range(tree, start, end); 1229 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1230 clear_bits); 1231 1232 again: 1233 if (!prealloc) { 1234 /* 1235 * Best effort, don't worry if extent state allocation fails 1236 * here for the first iteration. We might have a cached state 1237 * that matches exactly the target range, in which case no 1238 * extent state allocations are needed. We'll only know this 1239 * after locking the tree. 1240 */ 1241 prealloc = alloc_extent_state(GFP_NOFS); 1242 if (!prealloc && !first_iteration) 1243 return -ENOMEM; 1244 } 1245 1246 spin_lock(&tree->lock); 1247 if (cached_state && *cached_state) { 1248 state = *cached_state; 1249 if (state->start <= start && state->end > start && 1250 extent_state_in_tree(state)) 1251 goto hit_next; 1252 } 1253 1254 /* 1255 * This search will find all the extents that end after our range 1256 * starts. 1257 */ 1258 state = tree_search_for_insert(tree, start, &p, &parent); 1259 if (!state) { 1260 prealloc = alloc_extent_state_atomic(prealloc); 1261 if (!prealloc) { 1262 err = -ENOMEM; 1263 goto out; 1264 } 1265 prealloc->start = start; 1266 prealloc->end = end; 1267 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1268 cache_state(prealloc, cached_state); 1269 prealloc = NULL; 1270 goto out; 1271 } 1272 hit_next: 1273 last_start = state->start; 1274 last_end = state->end; 1275 1276 /* 1277 * | ---- desired range ---- | 1278 * | state | 1279 * 1280 * Just lock what we found and keep going. 1281 */ 1282 if (state->start == start && state->end <= end) { 1283 set_state_bits(tree, state, bits, NULL); 1284 cache_state(state, cached_state); 1285 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1286 if (last_end == (u64)-1) 1287 goto out; 1288 start = last_end + 1; 1289 if (start < end && state && state->start == start && 1290 !need_resched()) 1291 goto hit_next; 1292 goto search_again; 1293 } 1294 1295 /* 1296 * | ---- desired range ---- | 1297 * | state | 1298 * or 1299 * | ------------- state -------------- | 1300 * 1301 * We need to split the extent we found, and may flip bits on second 1302 * half. 1303 * 1304 * If the extent we found extends past our range, we just split and 1305 * search again. It'll get split again the next time though. 1306 * 1307 * If the extent we found is inside our range, we set the desired bit 1308 * on it. 1309 */ 1310 if (state->start < start) { 1311 prealloc = alloc_extent_state_atomic(prealloc); 1312 if (!prealloc) { 1313 err = -ENOMEM; 1314 goto out; 1315 } 1316 err = split_state(tree, state, prealloc, start); 1317 if (err) 1318 extent_io_tree_panic(tree, err); 1319 prealloc = NULL; 1320 if (err) 1321 goto out; 1322 if (state->end <= end) { 1323 set_state_bits(tree, state, bits, NULL); 1324 cache_state(state, cached_state); 1325 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1326 if (last_end == (u64)-1) 1327 goto out; 1328 start = last_end + 1; 1329 if (start < end && state && state->start == start && 1330 !need_resched()) 1331 goto hit_next; 1332 } 1333 goto search_again; 1334 } 1335 /* 1336 * | ---- desired range ---- | 1337 * | state | or | state | 1338 * 1339 * There's a hole, we need to insert something in it and ignore the 1340 * extent we found. 1341 */ 1342 if (state->start > start) { 1343 u64 this_end; 1344 if (end < last_start) 1345 this_end = end; 1346 else 1347 this_end = last_start - 1; 1348 1349 prealloc = alloc_extent_state_atomic(prealloc); 1350 if (!prealloc) { 1351 err = -ENOMEM; 1352 goto out; 1353 } 1354 1355 /* 1356 * Avoid to free 'prealloc' if it can be merged with the later 1357 * extent. 1358 */ 1359 prealloc->start = start; 1360 prealloc->end = this_end; 1361 err = insert_state(tree, prealloc, bits, NULL); 1362 if (err) 1363 extent_io_tree_panic(tree, err); 1364 cache_state(prealloc, cached_state); 1365 prealloc = NULL; 1366 start = this_end + 1; 1367 goto search_again; 1368 } 1369 /* 1370 * | ---- desired range ---- | 1371 * | state | 1372 * 1373 * We need to split the extent, and set the bit on the first half. 1374 */ 1375 if (state->start <= end && state->end > end) { 1376 prealloc = alloc_extent_state_atomic(prealloc); 1377 if (!prealloc) { 1378 err = -ENOMEM; 1379 goto out; 1380 } 1381 1382 err = split_state(tree, state, prealloc, end + 1); 1383 if (err) 1384 extent_io_tree_panic(tree, err); 1385 1386 set_state_bits(tree, prealloc, bits, NULL); 1387 cache_state(prealloc, cached_state); 1388 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1389 prealloc = NULL; 1390 goto out; 1391 } 1392 1393 search_again: 1394 if (start > end) 1395 goto out; 1396 spin_unlock(&tree->lock); 1397 cond_resched(); 1398 first_iteration = false; 1399 goto again; 1400 1401 out: 1402 spin_unlock(&tree->lock); 1403 if (prealloc) 1404 free_extent_state(prealloc); 1405 1406 return err; 1407 } 1408 1409 /* 1410 * Find the first range that has @bits not set. This range could start before 1411 * @start. 1412 * 1413 * @tree: the tree to search 1414 * @start: offset at/after which the found extent should start 1415 * @start_ret: records the beginning of the range 1416 * @end_ret: records the end of the range (inclusive) 1417 * @bits: the set of bits which must be unset 1418 * 1419 * Since unallocated range is also considered one which doesn't have the bits 1420 * set it's possible that @end_ret contains -1, this happens in case the range 1421 * spans (last_range_end, end of device]. In this case it's up to the caller to 1422 * trim @end_ret to the appropriate size. 1423 */ 1424 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1425 u64 *start_ret, u64 *end_ret, u32 bits) 1426 { 1427 struct extent_state *state; 1428 struct extent_state *prev = NULL, *next = NULL; 1429 1430 spin_lock(&tree->lock); 1431 1432 /* Find first extent with bits cleared */ 1433 while (1) { 1434 state = tree_search_prev_next(tree, start, &prev, &next); 1435 if (!state && !next && !prev) { 1436 /* 1437 * Tree is completely empty, send full range and let 1438 * caller deal with it 1439 */ 1440 *start_ret = 0; 1441 *end_ret = -1; 1442 goto out; 1443 } else if (!state && !next) { 1444 /* 1445 * We are past the last allocated chunk, set start at 1446 * the end of the last extent. 1447 */ 1448 *start_ret = prev->end + 1; 1449 *end_ret = -1; 1450 goto out; 1451 } else if (!state) { 1452 state = next; 1453 } 1454 1455 /* 1456 * At this point 'state' either contains 'start' or start is 1457 * before 'state' 1458 */ 1459 if (in_range(start, state->start, state->end - state->start + 1)) { 1460 if (state->state & bits) { 1461 /* 1462 * |--range with bits sets--| 1463 * | 1464 * start 1465 */ 1466 start = state->end + 1; 1467 } else { 1468 /* 1469 * 'start' falls within a range that doesn't 1470 * have the bits set, so take its start as the 1471 * beginning of the desired range 1472 * 1473 * |--range with bits cleared----| 1474 * | 1475 * start 1476 */ 1477 *start_ret = state->start; 1478 break; 1479 } 1480 } else { 1481 /* 1482 * |---prev range---|---hole/unset---|---node range---| 1483 * | 1484 * start 1485 * 1486 * or 1487 * 1488 * |---hole/unset--||--first node--| 1489 * 0 | 1490 * start 1491 */ 1492 if (prev) 1493 *start_ret = prev->end + 1; 1494 else 1495 *start_ret = 0; 1496 break; 1497 } 1498 } 1499 1500 /* 1501 * Find the longest stretch from start until an entry which has the 1502 * bits set 1503 */ 1504 while (state) { 1505 if (state->end >= start && !(state->state & bits)) { 1506 *end_ret = state->end; 1507 } else { 1508 *end_ret = state->start - 1; 1509 break; 1510 } 1511 state = next_state(state); 1512 } 1513 out: 1514 spin_unlock(&tree->lock); 1515 } 1516 1517 /* 1518 * Count the number of bytes in the tree that have a given bit(s) set for a 1519 * given range. 1520 * 1521 * @tree: The io tree to search. 1522 * @start: The start offset of the range. This value is updated to the 1523 * offset of the first byte found with the given bit(s), so it 1524 * can end up being bigger than the initial value. 1525 * @search_end: The end offset (inclusive value) of the search range. 1526 * @max_bytes: The maximum byte count we are interested. The search stops 1527 * once it reaches this count. 1528 * @bits: The bits the range must have in order to be accounted for. 1529 * If multiple bits are set, then only subranges that have all 1530 * the bits set are accounted for. 1531 * @contig: Indicate if we should ignore holes in the range or not. If 1532 * this is true, then stop once we find a hole. 1533 * @cached_state: A cached state to be used across multiple calls to this 1534 * function in order to speedup searches. Use NULL if this is 1535 * called only once or if each call does not start where the 1536 * previous one ended. 1537 * 1538 * Returns the total number of bytes found within the given range that have 1539 * all given bits set. If the returned number of bytes is greater than zero 1540 * then @start is updated with the offset of the first byte with the bits set. 1541 */ 1542 u64 count_range_bits(struct extent_io_tree *tree, 1543 u64 *start, u64 search_end, u64 max_bytes, 1544 u32 bits, int contig, 1545 struct extent_state **cached_state) 1546 { 1547 struct extent_state *state = NULL; 1548 struct extent_state *cached; 1549 u64 cur_start = *start; 1550 u64 total_bytes = 0; 1551 u64 last = 0; 1552 int found = 0; 1553 1554 if (WARN_ON(search_end < cur_start)) 1555 return 0; 1556 1557 spin_lock(&tree->lock); 1558 1559 if (!cached_state || !*cached_state) 1560 goto search; 1561 1562 cached = *cached_state; 1563 1564 if (!extent_state_in_tree(cached)) 1565 goto search; 1566 1567 if (cached->start <= cur_start && cur_start <= cached->end) { 1568 state = cached; 1569 } else if (cached->start > cur_start) { 1570 struct extent_state *prev; 1571 1572 /* 1573 * The cached state starts after our search range's start. Check 1574 * if the previous state record starts at or before the range we 1575 * are looking for, and if so, use it - this is a common case 1576 * when there are holes between records in the tree. If there is 1577 * no previous state record, we can start from our cached state. 1578 */ 1579 prev = prev_state(cached); 1580 if (!prev) 1581 state = cached; 1582 else if (prev->start <= cur_start && cur_start <= prev->end) 1583 state = prev; 1584 } 1585 1586 /* 1587 * This search will find all the extents that end after our range 1588 * starts. 1589 */ 1590 search: 1591 if (!state) 1592 state = tree_search(tree, cur_start); 1593 1594 while (state) { 1595 if (state->start > search_end) 1596 break; 1597 if (contig && found && state->start > last + 1) 1598 break; 1599 if (state->end >= cur_start && (state->state & bits) == bits) { 1600 total_bytes += min(search_end, state->end) + 1 - 1601 max(cur_start, state->start); 1602 if (total_bytes >= max_bytes) 1603 break; 1604 if (!found) { 1605 *start = max(cur_start, state->start); 1606 found = 1; 1607 } 1608 last = state->end; 1609 } else if (contig && found) { 1610 break; 1611 } 1612 state = next_state(state); 1613 } 1614 1615 if (cached_state) { 1616 free_extent_state(*cached_state); 1617 *cached_state = state; 1618 if (state) 1619 refcount_inc(&state->refs); 1620 } 1621 1622 spin_unlock(&tree->lock); 1623 1624 return total_bytes; 1625 } 1626 1627 /* 1628 * Search a range in the state tree for a given mask. If 'filled' == 1, this 1629 * returns 1 only if every extent in the tree has the bits set. Otherwise, 1 1630 * is returned if any bit in the range is found set. 1631 */ 1632 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1633 u32 bits, int filled, struct extent_state *cached) 1634 { 1635 struct extent_state *state = NULL; 1636 int bitset = 0; 1637 1638 spin_lock(&tree->lock); 1639 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1640 cached->end > start) 1641 state = cached; 1642 else 1643 state = tree_search(tree, start); 1644 while (state && start <= end) { 1645 if (filled && state->start > start) { 1646 bitset = 0; 1647 break; 1648 } 1649 1650 if (state->start > end) 1651 break; 1652 1653 if (state->state & bits) { 1654 bitset = 1; 1655 if (!filled) 1656 break; 1657 } else if (filled) { 1658 bitset = 0; 1659 break; 1660 } 1661 1662 if (state->end == (u64)-1) 1663 break; 1664 1665 start = state->end + 1; 1666 if (start > end) 1667 break; 1668 state = next_state(state); 1669 } 1670 1671 /* We ran out of states and were still inside of our range. */ 1672 if (filled && !state) 1673 bitset = 0; 1674 spin_unlock(&tree->lock); 1675 return bitset; 1676 } 1677 1678 /* Wrappers around set/clear extent bit */ 1679 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1680 u32 bits, struct extent_changeset *changeset) 1681 { 1682 /* 1683 * We don't support EXTENT_LOCKED yet, as current changeset will 1684 * record any bits changed, so for EXTENT_LOCKED case, it will 1685 * either fail with -EEXIST or changeset will record the whole 1686 * range. 1687 */ 1688 ASSERT(!(bits & EXTENT_LOCKED)); 1689 1690 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, 1691 changeset, GFP_NOFS); 1692 } 1693 1694 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1695 u32 bits, struct extent_changeset *changeset) 1696 { 1697 /* 1698 * Don't support EXTENT_LOCKED case, same reason as 1699 * set_record_extent_bits(). 1700 */ 1701 ASSERT(!(bits & EXTENT_LOCKED)); 1702 1703 return __clear_extent_bit(tree, start, end, bits, NULL, GFP_NOFS, 1704 changeset); 1705 } 1706 1707 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1708 struct extent_state **cached) 1709 { 1710 int err; 1711 u64 failed_start; 1712 1713 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1714 NULL, cached, NULL, GFP_NOFS); 1715 if (err == -EEXIST) { 1716 if (failed_start > start) 1717 clear_extent_bit(tree, start, failed_start - 1, 1718 EXTENT_LOCKED, cached); 1719 return 0; 1720 } 1721 return 1; 1722 } 1723 1724 /* 1725 * Either insert or lock state struct between start and end use mask to tell 1726 * us if waiting is desired. 1727 */ 1728 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1729 struct extent_state **cached_state) 1730 { 1731 struct extent_state *failed_state = NULL; 1732 int err; 1733 u64 failed_start; 1734 1735 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1736 &failed_state, cached_state, NULL, GFP_NOFS); 1737 while (err == -EEXIST) { 1738 if (failed_start != start) 1739 clear_extent_bit(tree, start, failed_start - 1, 1740 EXTENT_LOCKED, cached_state); 1741 1742 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1743 &failed_state); 1744 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1745 &failed_start, &failed_state, 1746 cached_state, NULL, GFP_NOFS); 1747 } 1748 return err; 1749 } 1750 1751 void __cold extent_state_free_cachep(void) 1752 { 1753 btrfs_extent_state_leak_debug_check(); 1754 kmem_cache_destroy(extent_state_cache); 1755 } 1756 1757 int __init extent_state_init_cachep(void) 1758 { 1759 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1760 sizeof(struct extent_state), 0, 1761 SLAB_MEM_SPREAD, NULL); 1762 if (!extent_state_cache) 1763 return -ENOMEM; 1764 1765 return 0; 1766 } 1767