1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent-io-tree.h" 8 #include "btrfs_inode.h" 9 #include "misc.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 kmem_cache_free(extent_state_cache, state); 52 } 53 } 54 55 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 57 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 58 struct extent_io_tree *tree, 59 u64 start, u64 end) 60 { 61 struct btrfs_inode *inode = tree->inode; 62 u64 isize; 63 64 if (!inode) 65 return; 66 67 isize = i_size_read(&inode->vfs_inode); 68 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 69 btrfs_debug_rl(inode->root->fs_info, 70 "%s: ino %llu isize %llu odd range [%llu,%llu]", 71 caller, btrfs_ino(inode), isize, start, end); 72 } 73 } 74 #else 75 #define btrfs_leak_debug_add_state(state) do {} while (0) 76 #define btrfs_leak_debug_del_state(state) do {} while (0) 77 #define btrfs_extent_state_leak_debug_check() do {} while (0) 78 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 79 #endif 80 81 /* 82 * For the file_extent_tree, we want to hold the inode lock when we lookup and 83 * update the disk_i_size, but lockdep will complain because our io_tree we hold 84 * the tree lock and get the inode lock when setting delalloc. These two things 85 * are unrelated, so make a class for the file_extent_tree so we don't get the 86 * two locking patterns mixed up. 87 */ 88 static struct lock_class_key file_extent_tree_class; 89 90 struct tree_entry { 91 u64 start; 92 u64 end; 93 struct rb_node rb_node; 94 }; 95 96 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 97 struct extent_io_tree *tree, unsigned int owner) 98 { 99 tree->fs_info = fs_info; 100 tree->state = RB_ROOT; 101 spin_lock_init(&tree->lock); 102 tree->inode = NULL; 103 tree->owner = owner; 104 if (owner == IO_TREE_INODE_FILE_EXTENT) 105 lockdep_set_class(&tree->lock, &file_extent_tree_class); 106 } 107 108 /* 109 * Empty an io tree, removing and freeing every extent state record from the 110 * tree. This should be called once we are sure no other task can access the 111 * tree anymore, so no tree updates happen after we empty the tree and there 112 * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never 113 * set on any extent state when calling this function). 114 */ 115 void extent_io_tree_release(struct extent_io_tree *tree) 116 { 117 struct rb_root root; 118 struct extent_state *state; 119 struct extent_state *tmp; 120 121 spin_lock(&tree->lock); 122 root = tree->state; 123 tree->state = RB_ROOT; 124 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 125 /* Clear node to keep free_extent_state() happy. */ 126 RB_CLEAR_NODE(&state->rb_node); 127 ASSERT(!(state->state & EXTENT_LOCKED)); 128 /* 129 * No need for a memory barrier here, as we are holding the tree 130 * lock and we only change the waitqueue while holding that lock 131 * (see wait_extent_bit()). 132 */ 133 ASSERT(!waitqueue_active(&state->wq)); 134 free_extent_state(state); 135 cond_resched_lock(&tree->lock); 136 } 137 /* 138 * Should still be empty even after a reschedule, no other task should 139 * be accessing the tree anymore. 140 */ 141 ASSERT(RB_EMPTY_ROOT(&tree->state)); 142 spin_unlock(&tree->lock); 143 } 144 145 static struct extent_state *alloc_extent_state(gfp_t mask) 146 { 147 struct extent_state *state; 148 149 /* 150 * The given mask might be not appropriate for the slab allocator, 151 * drop the unsupported bits 152 */ 153 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 154 state = kmem_cache_alloc(extent_state_cache, mask); 155 if (!state) 156 return state; 157 state->state = 0; 158 RB_CLEAR_NODE(&state->rb_node); 159 btrfs_leak_debug_add_state(state); 160 refcount_set(&state->refs, 1); 161 init_waitqueue_head(&state->wq); 162 trace_alloc_extent_state(state, mask, _RET_IP_); 163 return state; 164 } 165 166 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 167 { 168 if (!prealloc) 169 prealloc = alloc_extent_state(GFP_ATOMIC); 170 171 return prealloc; 172 } 173 174 void free_extent_state(struct extent_state *state) 175 { 176 if (!state) 177 return; 178 if (refcount_dec_and_test(&state->refs)) { 179 WARN_ON(extent_state_in_tree(state)); 180 btrfs_leak_debug_del_state(state); 181 trace_free_extent_state(state, _RET_IP_); 182 kmem_cache_free(extent_state_cache, state); 183 } 184 } 185 186 static int add_extent_changeset(struct extent_state *state, u32 bits, 187 struct extent_changeset *changeset, 188 int set) 189 { 190 int ret; 191 192 if (!changeset) 193 return 0; 194 if (set && (state->state & bits) == bits) 195 return 0; 196 if (!set && (state->state & bits) == 0) 197 return 0; 198 changeset->bytes_changed += state->end - state->start + 1; 199 ret = ulist_add(&changeset->range_changed, state->start, state->end, 200 GFP_ATOMIC); 201 return ret; 202 } 203 204 static inline struct extent_state *next_state(struct extent_state *state) 205 { 206 struct rb_node *next = rb_next(&state->rb_node); 207 208 if (next) 209 return rb_entry(next, struct extent_state, rb_node); 210 else 211 return NULL; 212 } 213 214 static inline struct extent_state *prev_state(struct extent_state *state) 215 { 216 struct rb_node *next = rb_prev(&state->rb_node); 217 218 if (next) 219 return rb_entry(next, struct extent_state, rb_node); 220 else 221 return NULL; 222 } 223 224 /* 225 * Search @tree for an entry that contains @offset. Such entry would have 226 * entry->start <= offset && entry->end >= offset. 227 * 228 * @tree: the tree to search 229 * @offset: offset that should fall within an entry in @tree 230 * @node_ret: pointer where new node should be anchored (used when inserting an 231 * entry in the tree) 232 * @parent_ret: points to entry which would have been the parent of the entry, 233 * containing @offset 234 * 235 * Return a pointer to the entry that contains @offset byte address and don't change 236 * @node_ret and @parent_ret. 237 * 238 * If no such entry exists, return pointer to entry that ends before @offset 239 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 240 */ 241 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 242 u64 offset, 243 struct rb_node ***node_ret, 244 struct rb_node **parent_ret) 245 { 246 struct rb_root *root = &tree->state; 247 struct rb_node **node = &root->rb_node; 248 struct rb_node *prev = NULL; 249 struct extent_state *entry = NULL; 250 251 while (*node) { 252 prev = *node; 253 entry = rb_entry(prev, struct extent_state, rb_node); 254 255 if (offset < entry->start) 256 node = &(*node)->rb_left; 257 else if (offset > entry->end) 258 node = &(*node)->rb_right; 259 else 260 return entry; 261 } 262 263 if (node_ret) 264 *node_ret = node; 265 if (parent_ret) 266 *parent_ret = prev; 267 268 /* Search neighbors until we find the first one past the end */ 269 while (entry && offset > entry->end) 270 entry = next_state(entry); 271 272 return entry; 273 } 274 275 /* 276 * Search offset in the tree or fill neighbor rbtree node pointers. 277 * 278 * @tree: the tree to search 279 * @offset: offset that should fall within an entry in @tree 280 * @next_ret: pointer to the first entry whose range ends after @offset 281 * @prev_ret: pointer to the first entry whose range begins before @offset 282 * 283 * Return a pointer to the entry that contains @offset byte address. If no 284 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 285 * Otherwise return the found entry and other pointers are left untouched. 286 */ 287 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 288 u64 offset, 289 struct extent_state **prev_ret, 290 struct extent_state **next_ret) 291 { 292 struct rb_root *root = &tree->state; 293 struct rb_node **node = &root->rb_node; 294 struct extent_state *orig_prev; 295 struct extent_state *entry = NULL; 296 297 ASSERT(prev_ret); 298 ASSERT(next_ret); 299 300 while (*node) { 301 entry = rb_entry(*node, struct extent_state, rb_node); 302 303 if (offset < entry->start) 304 node = &(*node)->rb_left; 305 else if (offset > entry->end) 306 node = &(*node)->rb_right; 307 else 308 return entry; 309 } 310 311 orig_prev = entry; 312 while (entry && offset > entry->end) 313 entry = next_state(entry); 314 *next_ret = entry; 315 entry = orig_prev; 316 317 while (entry && offset < entry->start) 318 entry = prev_state(entry); 319 *prev_ret = entry; 320 321 return NULL; 322 } 323 324 /* 325 * Inexact rb-tree search, return the next entry if @offset is not found 326 */ 327 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 328 { 329 return tree_search_for_insert(tree, offset, NULL, NULL); 330 } 331 332 static void extent_io_tree_panic(struct extent_io_tree *tree, int err) 333 { 334 btrfs_panic(tree->fs_info, err, 335 "locking error: extent tree was modified by another thread while locked"); 336 } 337 338 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 339 { 340 struct extent_state *prev; 341 342 prev = prev_state(state); 343 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 344 if (tree->inode) 345 btrfs_merge_delalloc_extent(tree->inode, state, prev); 346 state->start = prev->start; 347 rb_erase(&prev->rb_node, &tree->state); 348 RB_CLEAR_NODE(&prev->rb_node); 349 free_extent_state(prev); 350 } 351 } 352 353 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 354 { 355 struct extent_state *next; 356 357 next = next_state(state); 358 if (next && next->start == state->end + 1 && next->state == state->state) { 359 if (tree->inode) 360 btrfs_merge_delalloc_extent(tree->inode, state, next); 361 state->end = next->end; 362 rb_erase(&next->rb_node, &tree->state); 363 RB_CLEAR_NODE(&next->rb_node); 364 free_extent_state(next); 365 } 366 } 367 368 /* 369 * Utility function to look for merge candidates inside a given range. Any 370 * extents with matching state are merged together into a single extent in the 371 * tree. Extents with EXTENT_IO in their state field are not merged because 372 * the end_io handlers need to be able to do operations on them without 373 * sleeping (or doing allocations/splits). 374 * 375 * This should be called with the tree lock held. 376 */ 377 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 378 { 379 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 380 return; 381 382 merge_prev_state(tree, state); 383 merge_next_state(tree, state); 384 } 385 386 static void set_state_bits(struct extent_io_tree *tree, 387 struct extent_state *state, 388 u32 bits, struct extent_changeset *changeset) 389 { 390 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 391 int ret; 392 393 if (tree->inode) 394 btrfs_set_delalloc_extent(tree->inode, state, bits); 395 396 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 397 BUG_ON(ret < 0); 398 state->state |= bits_to_set; 399 } 400 401 /* 402 * Insert an extent_state struct into the tree. 'bits' are set on the 403 * struct before it is inserted. 404 * 405 * Returns a pointer to the struct extent_state record containing the range 406 * requested for insertion, which may be the same as the given struct or it 407 * may be an existing record in the tree that was expanded to accommodate the 408 * requested range. In case of an extent_state different from the one that was 409 * given, the later can be freed or reused by the caller. 410 * 411 * On error it returns an error pointer. 412 * 413 * The tree lock is not taken internally. This is a utility function and 414 * probably isn't what you want to call (see set/clear_extent_bit). 415 */ 416 static struct extent_state *insert_state(struct extent_io_tree *tree, 417 struct extent_state *state, 418 u32 bits, 419 struct extent_changeset *changeset) 420 { 421 struct rb_node **node; 422 struct rb_node *parent = NULL; 423 const u64 start = state->start - 1; 424 const u64 end = state->end + 1; 425 const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)); 426 427 set_state_bits(tree, state, bits, changeset); 428 429 node = &tree->state.rb_node; 430 while (*node) { 431 struct extent_state *entry; 432 433 parent = *node; 434 entry = rb_entry(parent, struct extent_state, rb_node); 435 436 if (state->end < entry->start) { 437 if (try_merge && end == entry->start && 438 state->state == entry->state) { 439 if (tree->inode) 440 btrfs_merge_delalloc_extent(tree->inode, 441 state, entry); 442 entry->start = state->start; 443 merge_prev_state(tree, entry); 444 state->state = 0; 445 return entry; 446 } 447 node = &(*node)->rb_left; 448 } else if (state->end > entry->end) { 449 if (try_merge && entry->end == start && 450 state->state == entry->state) { 451 if (tree->inode) 452 btrfs_merge_delalloc_extent(tree->inode, 453 state, entry); 454 entry->end = state->end; 455 merge_next_state(tree, entry); 456 state->state = 0; 457 return entry; 458 } 459 node = &(*node)->rb_right; 460 } else { 461 btrfs_err(tree->fs_info, 462 "found node %llu %llu on insert of %llu %llu", 463 entry->start, entry->end, state->start, state->end); 464 return ERR_PTR(-EEXIST); 465 } 466 } 467 468 rb_link_node(&state->rb_node, parent, node); 469 rb_insert_color(&state->rb_node, &tree->state); 470 471 return state; 472 } 473 474 /* 475 * Insert state to @tree to the location given by @node and @parent. 476 */ 477 static void insert_state_fast(struct extent_io_tree *tree, 478 struct extent_state *state, struct rb_node **node, 479 struct rb_node *parent, unsigned bits, 480 struct extent_changeset *changeset) 481 { 482 set_state_bits(tree, state, bits, changeset); 483 rb_link_node(&state->rb_node, parent, node); 484 rb_insert_color(&state->rb_node, &tree->state); 485 merge_state(tree, state); 486 } 487 488 /* 489 * Split a given extent state struct in two, inserting the preallocated 490 * struct 'prealloc' as the newly created second half. 'split' indicates an 491 * offset inside 'orig' where it should be split. 492 * 493 * Before calling, 494 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 495 * are two extent state structs in the tree: 496 * prealloc: [orig->start, split - 1] 497 * orig: [ split, orig->end ] 498 * 499 * The tree locks are not taken by this function. They need to be held 500 * by the caller. 501 */ 502 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 503 struct extent_state *prealloc, u64 split) 504 { 505 struct rb_node *parent = NULL; 506 struct rb_node **node; 507 508 if (tree->inode) 509 btrfs_split_delalloc_extent(tree->inode, orig, split); 510 511 prealloc->start = orig->start; 512 prealloc->end = split - 1; 513 prealloc->state = orig->state; 514 orig->start = split; 515 516 parent = &orig->rb_node; 517 node = &parent; 518 while (*node) { 519 struct extent_state *entry; 520 521 parent = *node; 522 entry = rb_entry(parent, struct extent_state, rb_node); 523 524 if (prealloc->end < entry->start) { 525 node = &(*node)->rb_left; 526 } else if (prealloc->end > entry->end) { 527 node = &(*node)->rb_right; 528 } else { 529 free_extent_state(prealloc); 530 return -EEXIST; 531 } 532 } 533 534 rb_link_node(&prealloc->rb_node, parent, node); 535 rb_insert_color(&prealloc->rb_node, &tree->state); 536 537 return 0; 538 } 539 540 /* 541 * Utility function to clear some bits in an extent state struct. It will 542 * optionally wake up anyone waiting on this state (wake == 1). 543 * 544 * If no bits are set on the state struct after clearing things, the 545 * struct is freed and removed from the tree 546 */ 547 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 548 struct extent_state *state, 549 u32 bits, int wake, 550 struct extent_changeset *changeset) 551 { 552 struct extent_state *next; 553 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 554 int ret; 555 556 if (tree->inode) 557 btrfs_clear_delalloc_extent(tree->inode, state, bits); 558 559 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 560 BUG_ON(ret < 0); 561 state->state &= ~bits_to_clear; 562 if (wake) 563 wake_up(&state->wq); 564 if (state->state == 0) { 565 next = next_state(state); 566 if (extent_state_in_tree(state)) { 567 rb_erase(&state->rb_node, &tree->state); 568 RB_CLEAR_NODE(&state->rb_node); 569 free_extent_state(state); 570 } else { 571 WARN_ON(1); 572 } 573 } else { 574 merge_state(tree, state); 575 next = next_state(state); 576 } 577 return next; 578 } 579 580 /* 581 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 582 * unset the EXTENT_NOWAIT bit. 583 */ 584 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 585 { 586 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 587 *bits &= EXTENT_NOWAIT - 1; 588 } 589 590 /* 591 * Clear some bits on a range in the tree. This may require splitting or 592 * inserting elements in the tree, so the gfp mask is used to indicate which 593 * allocations or sleeping are allowed. 594 * 595 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 596 * range from the tree regardless of state (ie for truncate). 597 * 598 * The range [start, end] is inclusive. 599 * 600 * This takes the tree lock, and returns 0 on success and < 0 on error. 601 */ 602 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 603 u32 bits, struct extent_state **cached_state, 604 struct extent_changeset *changeset) 605 { 606 struct extent_state *state; 607 struct extent_state *cached; 608 struct extent_state *prealloc = NULL; 609 u64 last_end; 610 int err; 611 int clear = 0; 612 int wake; 613 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 614 gfp_t mask; 615 616 set_gfp_mask_from_bits(&bits, &mask); 617 btrfs_debug_check_extent_io_range(tree, start, end); 618 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 619 620 if (delete) 621 bits |= ~EXTENT_CTLBITS; 622 623 if (bits & EXTENT_DELALLOC) 624 bits |= EXTENT_NORESERVE; 625 626 wake = (bits & EXTENT_LOCKED) ? 1 : 0; 627 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 628 clear = 1; 629 again: 630 if (!prealloc) { 631 /* 632 * Don't care for allocation failure here because we might end 633 * up not needing the pre-allocated extent state at all, which 634 * is the case if we only have in the tree extent states that 635 * cover our input range and don't cover too any other range. 636 * If we end up needing a new extent state we allocate it later. 637 */ 638 prealloc = alloc_extent_state(mask); 639 } 640 641 spin_lock(&tree->lock); 642 if (cached_state) { 643 cached = *cached_state; 644 645 if (clear) { 646 *cached_state = NULL; 647 cached_state = NULL; 648 } 649 650 if (cached && extent_state_in_tree(cached) && 651 cached->start <= start && cached->end > start) { 652 if (clear) 653 refcount_dec(&cached->refs); 654 state = cached; 655 goto hit_next; 656 } 657 if (clear) 658 free_extent_state(cached); 659 } 660 661 /* This search will find the extents that end after our range starts. */ 662 state = tree_search(tree, start); 663 if (!state) 664 goto out; 665 hit_next: 666 if (state->start > end) 667 goto out; 668 WARN_ON(state->end < start); 669 last_end = state->end; 670 671 /* The state doesn't have the wanted bits, go ahead. */ 672 if (!(state->state & bits)) { 673 state = next_state(state); 674 goto next; 675 } 676 677 /* 678 * | ---- desired range ---- | 679 * | state | or 680 * | ------------- state -------------- | 681 * 682 * We need to split the extent we found, and may flip bits on second 683 * half. 684 * 685 * If the extent we found extends past our range, we just split and 686 * search again. It'll get split again the next time though. 687 * 688 * If the extent we found is inside our range, we clear the desired bit 689 * on it. 690 */ 691 692 if (state->start < start) { 693 prealloc = alloc_extent_state_atomic(prealloc); 694 if (!prealloc) 695 goto search_again; 696 err = split_state(tree, state, prealloc, start); 697 if (err) 698 extent_io_tree_panic(tree, err); 699 700 prealloc = NULL; 701 if (err) 702 goto out; 703 if (state->end <= end) { 704 state = clear_state_bit(tree, state, bits, wake, changeset); 705 goto next; 706 } 707 goto search_again; 708 } 709 /* 710 * | ---- desired range ---- | 711 * | state | 712 * We need to split the extent, and clear the bit on the first half. 713 */ 714 if (state->start <= end && state->end > end) { 715 prealloc = alloc_extent_state_atomic(prealloc); 716 if (!prealloc) 717 goto search_again; 718 err = split_state(tree, state, prealloc, end + 1); 719 if (err) 720 extent_io_tree_panic(tree, err); 721 722 if (wake) 723 wake_up(&state->wq); 724 725 clear_state_bit(tree, prealloc, bits, wake, changeset); 726 727 prealloc = NULL; 728 goto out; 729 } 730 731 state = clear_state_bit(tree, state, bits, wake, changeset); 732 next: 733 if (last_end == (u64)-1) 734 goto out; 735 start = last_end + 1; 736 if (start <= end && state && !need_resched()) 737 goto hit_next; 738 739 search_again: 740 if (start > end) 741 goto out; 742 spin_unlock(&tree->lock); 743 if (gfpflags_allow_blocking(mask)) 744 cond_resched(); 745 goto again; 746 747 out: 748 spin_unlock(&tree->lock); 749 if (prealloc) 750 free_extent_state(prealloc); 751 752 return 0; 753 754 } 755 756 /* 757 * Wait for one or more bits to clear on a range in the state tree. 758 * The range [start, end] is inclusive. 759 * The tree lock is taken by this function 760 */ 761 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 762 u32 bits, struct extent_state **cached_state) 763 { 764 struct extent_state *state; 765 766 btrfs_debug_check_extent_io_range(tree, start, end); 767 768 spin_lock(&tree->lock); 769 again: 770 /* 771 * Maintain cached_state, as we may not remove it from the tree if there 772 * are more bits than the bits we're waiting on set on this state. 773 */ 774 if (cached_state && *cached_state) { 775 state = *cached_state; 776 if (extent_state_in_tree(state) && 777 state->start <= start && start < state->end) 778 goto process_node; 779 } 780 while (1) { 781 /* 782 * This search will find all the extents that end after our 783 * range starts. 784 */ 785 state = tree_search(tree, start); 786 process_node: 787 if (!state) 788 break; 789 if (state->start > end) 790 goto out; 791 792 if (state->state & bits) { 793 DEFINE_WAIT(wait); 794 795 start = state->start; 796 refcount_inc(&state->refs); 797 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 798 spin_unlock(&tree->lock); 799 schedule(); 800 spin_lock(&tree->lock); 801 finish_wait(&state->wq, &wait); 802 free_extent_state(state); 803 goto again; 804 } 805 start = state->end + 1; 806 807 if (start > end) 808 break; 809 810 if (!cond_resched_lock(&tree->lock)) { 811 state = next_state(state); 812 goto process_node; 813 } 814 } 815 out: 816 /* This state is no longer useful, clear it and free it up. */ 817 if (cached_state && *cached_state) { 818 state = *cached_state; 819 *cached_state = NULL; 820 free_extent_state(state); 821 } 822 spin_unlock(&tree->lock); 823 } 824 825 static void cache_state_if_flags(struct extent_state *state, 826 struct extent_state **cached_ptr, 827 unsigned flags) 828 { 829 if (cached_ptr && !(*cached_ptr)) { 830 if (!flags || (state->state & flags)) { 831 *cached_ptr = state; 832 refcount_inc(&state->refs); 833 } 834 } 835 } 836 837 static void cache_state(struct extent_state *state, 838 struct extent_state **cached_ptr) 839 { 840 return cache_state_if_flags(state, cached_ptr, 841 EXTENT_LOCKED | EXTENT_BOUNDARY); 842 } 843 844 /* 845 * Find the first state struct with 'bits' set after 'start', and return it. 846 * tree->lock must be held. NULL will returned if nothing was found after 847 * 'start'. 848 */ 849 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 850 u64 start, u32 bits) 851 { 852 struct extent_state *state; 853 854 /* 855 * This search will find all the extents that end after our range 856 * starts. 857 */ 858 state = tree_search(tree, start); 859 while (state) { 860 if (state->end >= start && (state->state & bits)) 861 return state; 862 state = next_state(state); 863 } 864 return NULL; 865 } 866 867 /* 868 * Find the first offset in the io tree with one or more @bits set. 869 * 870 * Note: If there are multiple bits set in @bits, any of them will match. 871 * 872 * Return true if we find something, and update @start_ret and @end_ret. 873 * Return false if we found nothing. 874 */ 875 bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, 876 u64 *start_ret, u64 *end_ret, u32 bits, 877 struct extent_state **cached_state) 878 { 879 struct extent_state *state; 880 bool ret = false; 881 882 spin_lock(&tree->lock); 883 if (cached_state && *cached_state) { 884 state = *cached_state; 885 if (state->end == start - 1 && extent_state_in_tree(state)) { 886 while ((state = next_state(state)) != NULL) { 887 if (state->state & bits) 888 break; 889 } 890 /* 891 * If we found the next extent state, clear cached_state 892 * so that we can cache the next extent state below and 893 * avoid future calls going over the same extent state 894 * again. If we haven't found any, clear as well since 895 * it's now useless. 896 */ 897 free_extent_state(*cached_state); 898 *cached_state = NULL; 899 if (state) 900 goto got_it; 901 goto out; 902 } 903 free_extent_state(*cached_state); 904 *cached_state = NULL; 905 } 906 907 state = find_first_extent_bit_state(tree, start, bits); 908 got_it: 909 if (state) { 910 cache_state_if_flags(state, cached_state, 0); 911 *start_ret = state->start; 912 *end_ret = state->end; 913 ret = true; 914 } 915 out: 916 spin_unlock(&tree->lock); 917 return ret; 918 } 919 920 /* 921 * Find a contiguous area of bits 922 * 923 * @tree: io tree to check 924 * @start: offset to start the search from 925 * @start_ret: the first offset we found with the bits set 926 * @end_ret: the final contiguous range of the bits that were set 927 * @bits: bits to look for 928 * 929 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 930 * to set bits appropriately, and then merge them again. During this time it 931 * will drop the tree->lock, so use this helper if you want to find the actual 932 * contiguous area for given bits. We will search to the first bit we find, and 933 * then walk down the tree until we find a non-contiguous area. The area 934 * returned will be the full contiguous area with the bits set. 935 */ 936 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 937 u64 *start_ret, u64 *end_ret, u32 bits) 938 { 939 struct extent_state *state; 940 int ret = 1; 941 942 spin_lock(&tree->lock); 943 state = find_first_extent_bit_state(tree, start, bits); 944 if (state) { 945 *start_ret = state->start; 946 *end_ret = state->end; 947 while ((state = next_state(state)) != NULL) { 948 if (state->start > (*end_ret + 1)) 949 break; 950 *end_ret = state->end; 951 } 952 ret = 0; 953 } 954 spin_unlock(&tree->lock); 955 return ret; 956 } 957 958 /* 959 * Find a contiguous range of bytes in the file marked as delalloc, not more 960 * than 'max_bytes'. start and end are used to return the range, 961 * 962 * True is returned if we find something, false if nothing was in the tree. 963 */ 964 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 965 u64 *end, u64 max_bytes, 966 struct extent_state **cached_state) 967 { 968 struct extent_state *state; 969 u64 cur_start = *start; 970 bool found = false; 971 u64 total_bytes = 0; 972 973 spin_lock(&tree->lock); 974 975 /* 976 * This search will find all the extents that end after our range 977 * starts. 978 */ 979 state = tree_search(tree, cur_start); 980 if (!state) { 981 *end = (u64)-1; 982 goto out; 983 } 984 985 while (state) { 986 if (found && (state->start != cur_start || 987 (state->state & EXTENT_BOUNDARY))) { 988 goto out; 989 } 990 if (!(state->state & EXTENT_DELALLOC)) { 991 if (!found) 992 *end = state->end; 993 goto out; 994 } 995 if (!found) { 996 *start = state->start; 997 *cached_state = state; 998 refcount_inc(&state->refs); 999 } 1000 found = true; 1001 *end = state->end; 1002 cur_start = state->end + 1; 1003 total_bytes += state->end - state->start + 1; 1004 if (total_bytes >= max_bytes) 1005 break; 1006 state = next_state(state); 1007 } 1008 out: 1009 spin_unlock(&tree->lock); 1010 return found; 1011 } 1012 1013 /* 1014 * Set some bits on a range in the tree. This may require allocations or 1015 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1016 * GFP_NOWAIT. 1017 * 1018 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1019 * part of the range already has the desired bits set. The extent_state of the 1020 * existing range is returned in failed_state in this case, and the start of the 1021 * existing range is returned in failed_start. failed_state is used as an 1022 * optimization for wait_extent_bit, failed_start must be used as the source of 1023 * truth as failed_state may have changed since we returned. 1024 * 1025 * [start, end] is inclusive This takes the tree lock. 1026 */ 1027 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1028 u32 bits, u64 *failed_start, 1029 struct extent_state **failed_state, 1030 struct extent_state **cached_state, 1031 struct extent_changeset *changeset) 1032 { 1033 struct extent_state *state; 1034 struct extent_state *prealloc = NULL; 1035 struct rb_node **p = NULL; 1036 struct rb_node *parent = NULL; 1037 int err = 0; 1038 u64 last_start; 1039 u64 last_end; 1040 u32 exclusive_bits = (bits & EXTENT_LOCKED); 1041 gfp_t mask; 1042 1043 set_gfp_mask_from_bits(&bits, &mask); 1044 btrfs_debug_check_extent_io_range(tree, start, end); 1045 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1046 1047 if (exclusive_bits) 1048 ASSERT(failed_start); 1049 else 1050 ASSERT(failed_start == NULL && failed_state == NULL); 1051 again: 1052 if (!prealloc) { 1053 /* 1054 * Don't care for allocation failure here because we might end 1055 * up not needing the pre-allocated extent state at all, which 1056 * is the case if we only have in the tree extent states that 1057 * cover our input range and don't cover too any other range. 1058 * If we end up needing a new extent state we allocate it later. 1059 */ 1060 prealloc = alloc_extent_state(mask); 1061 } 1062 1063 spin_lock(&tree->lock); 1064 if (cached_state && *cached_state) { 1065 state = *cached_state; 1066 if (state->start <= start && state->end > start && 1067 extent_state_in_tree(state)) 1068 goto hit_next; 1069 } 1070 /* 1071 * This search will find all the extents that end after our range 1072 * starts. 1073 */ 1074 state = tree_search_for_insert(tree, start, &p, &parent); 1075 if (!state) { 1076 prealloc = alloc_extent_state_atomic(prealloc); 1077 if (!prealloc) 1078 goto search_again; 1079 prealloc->start = start; 1080 prealloc->end = end; 1081 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1082 cache_state(prealloc, cached_state); 1083 prealloc = NULL; 1084 goto out; 1085 } 1086 hit_next: 1087 last_start = state->start; 1088 last_end = state->end; 1089 1090 /* 1091 * | ---- desired range ---- | 1092 * | state | 1093 * 1094 * Just lock what we found and keep going 1095 */ 1096 if (state->start == start && state->end <= end) { 1097 if (state->state & exclusive_bits) { 1098 *failed_start = state->start; 1099 cache_state(state, failed_state); 1100 err = -EEXIST; 1101 goto out; 1102 } 1103 1104 set_state_bits(tree, state, bits, changeset); 1105 cache_state(state, cached_state); 1106 merge_state(tree, state); 1107 if (last_end == (u64)-1) 1108 goto out; 1109 start = last_end + 1; 1110 state = next_state(state); 1111 if (start < end && state && state->start == start && 1112 !need_resched()) 1113 goto hit_next; 1114 goto search_again; 1115 } 1116 1117 /* 1118 * | ---- desired range ---- | 1119 * | state | 1120 * or 1121 * | ------------- state -------------- | 1122 * 1123 * We need to split the extent we found, and may flip bits on second 1124 * half. 1125 * 1126 * If the extent we found extends past our range, we just split and 1127 * search again. It'll get split again the next time though. 1128 * 1129 * If the extent we found is inside our range, we set the desired bit 1130 * on it. 1131 */ 1132 if (state->start < start) { 1133 if (state->state & exclusive_bits) { 1134 *failed_start = start; 1135 cache_state(state, failed_state); 1136 err = -EEXIST; 1137 goto out; 1138 } 1139 1140 /* 1141 * If this extent already has all the bits we want set, then 1142 * skip it, not necessary to split it or do anything with it. 1143 */ 1144 if ((state->state & bits) == bits) { 1145 start = state->end + 1; 1146 cache_state(state, cached_state); 1147 goto search_again; 1148 } 1149 1150 prealloc = alloc_extent_state_atomic(prealloc); 1151 if (!prealloc) 1152 goto search_again; 1153 err = split_state(tree, state, prealloc, start); 1154 if (err) 1155 extent_io_tree_panic(tree, err); 1156 1157 prealloc = NULL; 1158 if (err) 1159 goto out; 1160 if (state->end <= end) { 1161 set_state_bits(tree, state, bits, changeset); 1162 cache_state(state, cached_state); 1163 merge_state(tree, state); 1164 if (last_end == (u64)-1) 1165 goto out; 1166 start = last_end + 1; 1167 state = next_state(state); 1168 if (start < end && state && state->start == start && 1169 !need_resched()) 1170 goto hit_next; 1171 } 1172 goto search_again; 1173 } 1174 /* 1175 * | ---- desired range ---- | 1176 * | state | or | state | 1177 * 1178 * There's a hole, we need to insert something in it and ignore the 1179 * extent we found. 1180 */ 1181 if (state->start > start) { 1182 u64 this_end; 1183 struct extent_state *inserted_state; 1184 1185 if (end < last_start) 1186 this_end = end; 1187 else 1188 this_end = last_start - 1; 1189 1190 prealloc = alloc_extent_state_atomic(prealloc); 1191 if (!prealloc) 1192 goto search_again; 1193 1194 /* 1195 * Avoid to free 'prealloc' if it can be merged with the later 1196 * extent. 1197 */ 1198 prealloc->start = start; 1199 prealloc->end = this_end; 1200 inserted_state = insert_state(tree, prealloc, bits, changeset); 1201 if (IS_ERR(inserted_state)) { 1202 err = PTR_ERR(inserted_state); 1203 extent_io_tree_panic(tree, err); 1204 } 1205 1206 cache_state(inserted_state, cached_state); 1207 if (inserted_state == prealloc) 1208 prealloc = NULL; 1209 start = this_end + 1; 1210 goto search_again; 1211 } 1212 /* 1213 * | ---- desired range ---- | 1214 * | state | 1215 * 1216 * We need to split the extent, and set the bit on the first half 1217 */ 1218 if (state->start <= end && state->end > end) { 1219 if (state->state & exclusive_bits) { 1220 *failed_start = start; 1221 cache_state(state, failed_state); 1222 err = -EEXIST; 1223 goto out; 1224 } 1225 1226 prealloc = alloc_extent_state_atomic(prealloc); 1227 if (!prealloc) 1228 goto search_again; 1229 err = split_state(tree, state, prealloc, end + 1); 1230 if (err) 1231 extent_io_tree_panic(tree, err); 1232 1233 set_state_bits(tree, prealloc, bits, changeset); 1234 cache_state(prealloc, cached_state); 1235 merge_state(tree, prealloc); 1236 prealloc = NULL; 1237 goto out; 1238 } 1239 1240 search_again: 1241 if (start > end) 1242 goto out; 1243 spin_unlock(&tree->lock); 1244 if (gfpflags_allow_blocking(mask)) 1245 cond_resched(); 1246 goto again; 1247 1248 out: 1249 spin_unlock(&tree->lock); 1250 if (prealloc) 1251 free_extent_state(prealloc); 1252 1253 return err; 1254 1255 } 1256 1257 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1258 u32 bits, struct extent_state **cached_state) 1259 { 1260 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1261 cached_state, NULL); 1262 } 1263 1264 /* 1265 * Convert all bits in a given range from one bit to another 1266 * 1267 * @tree: the io tree to search 1268 * @start: the start offset in bytes 1269 * @end: the end offset in bytes (inclusive) 1270 * @bits: the bits to set in this range 1271 * @clear_bits: the bits to clear in this range 1272 * @cached_state: state that we're going to cache 1273 * 1274 * This will go through and set bits for the given range. If any states exist 1275 * already in this range they are set with the given bit and cleared of the 1276 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1277 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1278 * boundary bits like LOCK. 1279 * 1280 * All allocations are done with GFP_NOFS. 1281 */ 1282 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1283 u32 bits, u32 clear_bits, 1284 struct extent_state **cached_state) 1285 { 1286 struct extent_state *state; 1287 struct extent_state *prealloc = NULL; 1288 struct rb_node **p = NULL; 1289 struct rb_node *parent = NULL; 1290 int err = 0; 1291 u64 last_start; 1292 u64 last_end; 1293 bool first_iteration = true; 1294 1295 btrfs_debug_check_extent_io_range(tree, start, end); 1296 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1297 clear_bits); 1298 1299 again: 1300 if (!prealloc) { 1301 /* 1302 * Best effort, don't worry if extent state allocation fails 1303 * here for the first iteration. We might have a cached state 1304 * that matches exactly the target range, in which case no 1305 * extent state allocations are needed. We'll only know this 1306 * after locking the tree. 1307 */ 1308 prealloc = alloc_extent_state(GFP_NOFS); 1309 if (!prealloc && !first_iteration) 1310 return -ENOMEM; 1311 } 1312 1313 spin_lock(&tree->lock); 1314 if (cached_state && *cached_state) { 1315 state = *cached_state; 1316 if (state->start <= start && state->end > start && 1317 extent_state_in_tree(state)) 1318 goto hit_next; 1319 } 1320 1321 /* 1322 * This search will find all the extents that end after our range 1323 * starts. 1324 */ 1325 state = tree_search_for_insert(tree, start, &p, &parent); 1326 if (!state) { 1327 prealloc = alloc_extent_state_atomic(prealloc); 1328 if (!prealloc) { 1329 err = -ENOMEM; 1330 goto out; 1331 } 1332 prealloc->start = start; 1333 prealloc->end = end; 1334 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1335 cache_state(prealloc, cached_state); 1336 prealloc = NULL; 1337 goto out; 1338 } 1339 hit_next: 1340 last_start = state->start; 1341 last_end = state->end; 1342 1343 /* 1344 * | ---- desired range ---- | 1345 * | state | 1346 * 1347 * Just lock what we found and keep going. 1348 */ 1349 if (state->start == start && state->end <= end) { 1350 set_state_bits(tree, state, bits, NULL); 1351 cache_state(state, cached_state); 1352 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1353 if (last_end == (u64)-1) 1354 goto out; 1355 start = last_end + 1; 1356 if (start < end && state && state->start == start && 1357 !need_resched()) 1358 goto hit_next; 1359 goto search_again; 1360 } 1361 1362 /* 1363 * | ---- desired range ---- | 1364 * | state | 1365 * or 1366 * | ------------- state -------------- | 1367 * 1368 * We need to split the extent we found, and may flip bits on second 1369 * half. 1370 * 1371 * If the extent we found extends past our range, we just split and 1372 * search again. It'll get split again the next time though. 1373 * 1374 * If the extent we found is inside our range, we set the desired bit 1375 * on it. 1376 */ 1377 if (state->start < start) { 1378 prealloc = alloc_extent_state_atomic(prealloc); 1379 if (!prealloc) { 1380 err = -ENOMEM; 1381 goto out; 1382 } 1383 err = split_state(tree, state, prealloc, start); 1384 if (err) 1385 extent_io_tree_panic(tree, err); 1386 prealloc = NULL; 1387 if (err) 1388 goto out; 1389 if (state->end <= end) { 1390 set_state_bits(tree, state, bits, NULL); 1391 cache_state(state, cached_state); 1392 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1393 if (last_end == (u64)-1) 1394 goto out; 1395 start = last_end + 1; 1396 if (start < end && state && state->start == start && 1397 !need_resched()) 1398 goto hit_next; 1399 } 1400 goto search_again; 1401 } 1402 /* 1403 * | ---- desired range ---- | 1404 * | state | or | state | 1405 * 1406 * There's a hole, we need to insert something in it and ignore the 1407 * extent we found. 1408 */ 1409 if (state->start > start) { 1410 u64 this_end; 1411 struct extent_state *inserted_state; 1412 1413 if (end < last_start) 1414 this_end = end; 1415 else 1416 this_end = last_start - 1; 1417 1418 prealloc = alloc_extent_state_atomic(prealloc); 1419 if (!prealloc) { 1420 err = -ENOMEM; 1421 goto out; 1422 } 1423 1424 /* 1425 * Avoid to free 'prealloc' if it can be merged with the later 1426 * extent. 1427 */ 1428 prealloc->start = start; 1429 prealloc->end = this_end; 1430 inserted_state = insert_state(tree, prealloc, bits, NULL); 1431 if (IS_ERR(inserted_state)) { 1432 err = PTR_ERR(inserted_state); 1433 extent_io_tree_panic(tree, err); 1434 } 1435 cache_state(inserted_state, cached_state); 1436 if (inserted_state == prealloc) 1437 prealloc = NULL; 1438 start = this_end + 1; 1439 goto search_again; 1440 } 1441 /* 1442 * | ---- desired range ---- | 1443 * | state | 1444 * 1445 * We need to split the extent, and set the bit on the first half. 1446 */ 1447 if (state->start <= end && state->end > end) { 1448 prealloc = alloc_extent_state_atomic(prealloc); 1449 if (!prealloc) { 1450 err = -ENOMEM; 1451 goto out; 1452 } 1453 1454 err = split_state(tree, state, prealloc, end + 1); 1455 if (err) 1456 extent_io_tree_panic(tree, err); 1457 1458 set_state_bits(tree, prealloc, bits, NULL); 1459 cache_state(prealloc, cached_state); 1460 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1461 prealloc = NULL; 1462 goto out; 1463 } 1464 1465 search_again: 1466 if (start > end) 1467 goto out; 1468 spin_unlock(&tree->lock); 1469 cond_resched(); 1470 first_iteration = false; 1471 goto again; 1472 1473 out: 1474 spin_unlock(&tree->lock); 1475 if (prealloc) 1476 free_extent_state(prealloc); 1477 1478 return err; 1479 } 1480 1481 /* 1482 * Find the first range that has @bits not set. This range could start before 1483 * @start. 1484 * 1485 * @tree: the tree to search 1486 * @start: offset at/after which the found extent should start 1487 * @start_ret: records the beginning of the range 1488 * @end_ret: records the end of the range (inclusive) 1489 * @bits: the set of bits which must be unset 1490 * 1491 * Since unallocated range is also considered one which doesn't have the bits 1492 * set it's possible that @end_ret contains -1, this happens in case the range 1493 * spans (last_range_end, end of device]. In this case it's up to the caller to 1494 * trim @end_ret to the appropriate size. 1495 */ 1496 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1497 u64 *start_ret, u64 *end_ret, u32 bits) 1498 { 1499 struct extent_state *state; 1500 struct extent_state *prev = NULL, *next = NULL; 1501 1502 spin_lock(&tree->lock); 1503 1504 /* Find first extent with bits cleared */ 1505 while (1) { 1506 state = tree_search_prev_next(tree, start, &prev, &next); 1507 if (!state && !next && !prev) { 1508 /* 1509 * Tree is completely empty, send full range and let 1510 * caller deal with it 1511 */ 1512 *start_ret = 0; 1513 *end_ret = -1; 1514 goto out; 1515 } else if (!state && !next) { 1516 /* 1517 * We are past the last allocated chunk, set start at 1518 * the end of the last extent. 1519 */ 1520 *start_ret = prev->end + 1; 1521 *end_ret = -1; 1522 goto out; 1523 } else if (!state) { 1524 state = next; 1525 } 1526 1527 /* 1528 * At this point 'state' either contains 'start' or start is 1529 * before 'state' 1530 */ 1531 if (in_range(start, state->start, state->end - state->start + 1)) { 1532 if (state->state & bits) { 1533 /* 1534 * |--range with bits sets--| 1535 * | 1536 * start 1537 */ 1538 start = state->end + 1; 1539 } else { 1540 /* 1541 * 'start' falls within a range that doesn't 1542 * have the bits set, so take its start as the 1543 * beginning of the desired range 1544 * 1545 * |--range with bits cleared----| 1546 * | 1547 * start 1548 */ 1549 *start_ret = state->start; 1550 break; 1551 } 1552 } else { 1553 /* 1554 * |---prev range---|---hole/unset---|---node range---| 1555 * | 1556 * start 1557 * 1558 * or 1559 * 1560 * |---hole/unset--||--first node--| 1561 * 0 | 1562 * start 1563 */ 1564 if (prev) 1565 *start_ret = prev->end + 1; 1566 else 1567 *start_ret = 0; 1568 break; 1569 } 1570 } 1571 1572 /* 1573 * Find the longest stretch from start until an entry which has the 1574 * bits set 1575 */ 1576 while (state) { 1577 if (state->end >= start && !(state->state & bits)) { 1578 *end_ret = state->end; 1579 } else { 1580 *end_ret = state->start - 1; 1581 break; 1582 } 1583 state = next_state(state); 1584 } 1585 out: 1586 spin_unlock(&tree->lock); 1587 } 1588 1589 /* 1590 * Count the number of bytes in the tree that have a given bit(s) set for a 1591 * given range. 1592 * 1593 * @tree: The io tree to search. 1594 * @start: The start offset of the range. This value is updated to the 1595 * offset of the first byte found with the given bit(s), so it 1596 * can end up being bigger than the initial value. 1597 * @search_end: The end offset (inclusive value) of the search range. 1598 * @max_bytes: The maximum byte count we are interested. The search stops 1599 * once it reaches this count. 1600 * @bits: The bits the range must have in order to be accounted for. 1601 * If multiple bits are set, then only subranges that have all 1602 * the bits set are accounted for. 1603 * @contig: Indicate if we should ignore holes in the range or not. If 1604 * this is true, then stop once we find a hole. 1605 * @cached_state: A cached state to be used across multiple calls to this 1606 * function in order to speedup searches. Use NULL if this is 1607 * called only once or if each call does not start where the 1608 * previous one ended. 1609 * 1610 * Returns the total number of bytes found within the given range that have 1611 * all given bits set. If the returned number of bytes is greater than zero 1612 * then @start is updated with the offset of the first byte with the bits set. 1613 */ 1614 u64 count_range_bits(struct extent_io_tree *tree, 1615 u64 *start, u64 search_end, u64 max_bytes, 1616 u32 bits, int contig, 1617 struct extent_state **cached_state) 1618 { 1619 struct extent_state *state = NULL; 1620 struct extent_state *cached; 1621 u64 cur_start = *start; 1622 u64 total_bytes = 0; 1623 u64 last = 0; 1624 int found = 0; 1625 1626 if (WARN_ON(search_end < cur_start)) 1627 return 0; 1628 1629 spin_lock(&tree->lock); 1630 1631 if (!cached_state || !*cached_state) 1632 goto search; 1633 1634 cached = *cached_state; 1635 1636 if (!extent_state_in_tree(cached)) 1637 goto search; 1638 1639 if (cached->start <= cur_start && cur_start <= cached->end) { 1640 state = cached; 1641 } else if (cached->start > cur_start) { 1642 struct extent_state *prev; 1643 1644 /* 1645 * The cached state starts after our search range's start. Check 1646 * if the previous state record starts at or before the range we 1647 * are looking for, and if so, use it - this is a common case 1648 * when there are holes between records in the tree. If there is 1649 * no previous state record, we can start from our cached state. 1650 */ 1651 prev = prev_state(cached); 1652 if (!prev) 1653 state = cached; 1654 else if (prev->start <= cur_start && cur_start <= prev->end) 1655 state = prev; 1656 } 1657 1658 /* 1659 * This search will find all the extents that end after our range 1660 * starts. 1661 */ 1662 search: 1663 if (!state) 1664 state = tree_search(tree, cur_start); 1665 1666 while (state) { 1667 if (state->start > search_end) 1668 break; 1669 if (contig && found && state->start > last + 1) 1670 break; 1671 if (state->end >= cur_start && (state->state & bits) == bits) { 1672 total_bytes += min(search_end, state->end) + 1 - 1673 max(cur_start, state->start); 1674 if (total_bytes >= max_bytes) 1675 break; 1676 if (!found) { 1677 *start = max(cur_start, state->start); 1678 found = 1; 1679 } 1680 last = state->end; 1681 } else if (contig && found) { 1682 break; 1683 } 1684 state = next_state(state); 1685 } 1686 1687 if (cached_state) { 1688 free_extent_state(*cached_state); 1689 *cached_state = state; 1690 if (state) 1691 refcount_inc(&state->refs); 1692 } 1693 1694 spin_unlock(&tree->lock); 1695 1696 return total_bytes; 1697 } 1698 1699 /* 1700 * Check if the single @bit exists in the given range. 1701 */ 1702 bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1703 { 1704 struct extent_state *state = NULL; 1705 bool bitset = false; 1706 1707 ASSERT(is_power_of_2(bit)); 1708 1709 spin_lock(&tree->lock); 1710 state = tree_search(tree, start); 1711 while (state && start <= end) { 1712 if (state->start > end) 1713 break; 1714 1715 if (state->state & bit) { 1716 bitset = true; 1717 break; 1718 } 1719 1720 /* If state->end is (u64)-1, start will overflow to 0 */ 1721 start = state->end + 1; 1722 if (start > end || start == 0) 1723 break; 1724 state = next_state(state); 1725 } 1726 spin_unlock(&tree->lock); 1727 return bitset; 1728 } 1729 1730 /* 1731 * Check if the whole range [@start,@end) contains the single @bit set. 1732 */ 1733 bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1734 struct extent_state *cached) 1735 { 1736 struct extent_state *state = NULL; 1737 bool bitset = true; 1738 1739 ASSERT(is_power_of_2(bit)); 1740 1741 spin_lock(&tree->lock); 1742 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1743 cached->end > start) 1744 state = cached; 1745 else 1746 state = tree_search(tree, start); 1747 while (state && start <= end) { 1748 if (state->start > start) { 1749 bitset = false; 1750 break; 1751 } 1752 1753 if (state->start > end) 1754 break; 1755 1756 if ((state->state & bit) == 0) { 1757 bitset = false; 1758 break; 1759 } 1760 1761 if (state->end == (u64)-1) 1762 break; 1763 1764 /* 1765 * Last entry (if state->end is (u64)-1 and overflow happens), 1766 * or next entry starts after the range. 1767 */ 1768 start = state->end + 1; 1769 if (start > end || start == 0) 1770 break; 1771 state = next_state(state); 1772 } 1773 1774 /* We ran out of states and were still inside of our range. */ 1775 if (!state) 1776 bitset = false; 1777 spin_unlock(&tree->lock); 1778 return bitset; 1779 } 1780 1781 /* Wrappers around set/clear extent bit */ 1782 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1783 u32 bits, struct extent_changeset *changeset) 1784 { 1785 /* 1786 * We don't support EXTENT_LOCKED yet, as current changeset will 1787 * record any bits changed, so for EXTENT_LOCKED case, it will 1788 * either fail with -EEXIST or changeset will record the whole 1789 * range. 1790 */ 1791 ASSERT(!(bits & EXTENT_LOCKED)); 1792 1793 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1794 } 1795 1796 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1797 u32 bits, struct extent_changeset *changeset) 1798 { 1799 /* 1800 * Don't support EXTENT_LOCKED case, same reason as 1801 * set_record_extent_bits(). 1802 */ 1803 ASSERT(!(bits & EXTENT_LOCKED)); 1804 1805 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1806 } 1807 1808 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1809 struct extent_state **cached) 1810 { 1811 int err; 1812 u64 failed_start; 1813 1814 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1815 NULL, cached, NULL); 1816 if (err == -EEXIST) { 1817 if (failed_start > start) 1818 clear_extent_bit(tree, start, failed_start - 1, 1819 EXTENT_LOCKED, cached); 1820 return 0; 1821 } 1822 return 1; 1823 } 1824 1825 /* 1826 * Either insert or lock state struct between start and end use mask to tell 1827 * us if waiting is desired. 1828 */ 1829 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1830 struct extent_state **cached_state) 1831 { 1832 struct extent_state *failed_state = NULL; 1833 int err; 1834 u64 failed_start; 1835 1836 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1837 &failed_state, cached_state, NULL); 1838 while (err == -EEXIST) { 1839 if (failed_start != start) 1840 clear_extent_bit(tree, start, failed_start - 1, 1841 EXTENT_LOCKED, cached_state); 1842 1843 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1844 &failed_state); 1845 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1846 &failed_start, &failed_state, 1847 cached_state, NULL); 1848 } 1849 return err; 1850 } 1851 1852 void __cold extent_state_free_cachep(void) 1853 { 1854 btrfs_extent_state_leak_debug_check(); 1855 kmem_cache_destroy(extent_state_cache); 1856 } 1857 1858 int __init extent_state_init_cachep(void) 1859 { 1860 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1861 sizeof(struct extent_state), 0, 1862 SLAB_MEM_SPREAD, NULL); 1863 if (!extent_state_cache) 1864 return -ENOMEM; 1865 1866 return 0; 1867 } 1868