1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent-io-tree.h" 8 #include "btrfs_inode.h" 9 #include "misc.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 kmem_cache_free(extent_state_cache, state); 52 } 53 } 54 55 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 57 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 58 struct extent_io_tree *tree, 59 u64 start, u64 end) 60 { 61 const struct btrfs_inode *inode; 62 u64 isize; 63 64 if (tree->owner != IO_TREE_INODE_IO) 65 return; 66 67 inode = extent_io_tree_to_inode_const(tree); 68 isize = i_size_read(&inode->vfs_inode); 69 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 70 btrfs_debug_rl(inode->root->fs_info, 71 "%s: ino %llu isize %llu odd range [%llu,%llu]", 72 caller, btrfs_ino(inode), isize, start, end); 73 } 74 } 75 #else 76 #define btrfs_leak_debug_add_state(state) do {} while (0) 77 #define btrfs_leak_debug_del_state(state) do {} while (0) 78 #define btrfs_extent_state_leak_debug_check() do {} while (0) 79 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 80 #endif 81 82 83 /* 84 * The only tree allowed to set the inode is IO_TREE_INODE_IO. 85 */ 86 static bool is_inode_io_tree(const struct extent_io_tree *tree) 87 { 88 return tree->owner == IO_TREE_INODE_IO; 89 } 90 91 /* Return the inode if it's valid for the given tree, otherwise NULL. */ 92 struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree) 93 { 94 if (tree->owner == IO_TREE_INODE_IO) 95 return tree->inode; 96 return NULL; 97 } 98 99 /* Read-only access to the inode. */ 100 const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree) 101 { 102 if (tree->owner == IO_TREE_INODE_IO) 103 return tree->inode; 104 return NULL; 105 } 106 107 /* For read-only access to fs_info. */ 108 const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 109 { 110 if (tree->owner == IO_TREE_INODE_IO) 111 return tree->inode->root->fs_info; 112 return tree->fs_info; 113 } 114 115 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 116 struct extent_io_tree *tree, unsigned int owner) 117 { 118 tree->state = RB_ROOT; 119 spin_lock_init(&tree->lock); 120 tree->fs_info = fs_info; 121 tree->owner = owner; 122 } 123 124 /* 125 * Empty an io tree, removing and freeing every extent state record from the 126 * tree. This should be called once we are sure no other task can access the 127 * tree anymore, so no tree updates happen after we empty the tree and there 128 * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never 129 * set on any extent state when calling this function). 130 */ 131 void extent_io_tree_release(struct extent_io_tree *tree) 132 { 133 struct rb_root root; 134 struct extent_state *state; 135 struct extent_state *tmp; 136 137 spin_lock(&tree->lock); 138 root = tree->state; 139 tree->state = RB_ROOT; 140 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 141 /* Clear node to keep free_extent_state() happy. */ 142 RB_CLEAR_NODE(&state->rb_node); 143 ASSERT(!(state->state & EXTENT_LOCKED)); 144 /* 145 * No need for a memory barrier here, as we are holding the tree 146 * lock and we only change the waitqueue while holding that lock 147 * (see wait_extent_bit()). 148 */ 149 ASSERT(!waitqueue_active(&state->wq)); 150 free_extent_state(state); 151 cond_resched_lock(&tree->lock); 152 } 153 /* 154 * Should still be empty even after a reschedule, no other task should 155 * be accessing the tree anymore. 156 */ 157 ASSERT(RB_EMPTY_ROOT(&tree->state)); 158 spin_unlock(&tree->lock); 159 } 160 161 static struct extent_state *alloc_extent_state(gfp_t mask) 162 { 163 struct extent_state *state; 164 165 /* 166 * The given mask might be not appropriate for the slab allocator, 167 * drop the unsupported bits 168 */ 169 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 170 state = kmem_cache_alloc(extent_state_cache, mask); 171 if (!state) 172 return state; 173 state->state = 0; 174 RB_CLEAR_NODE(&state->rb_node); 175 btrfs_leak_debug_add_state(state); 176 refcount_set(&state->refs, 1); 177 init_waitqueue_head(&state->wq); 178 trace_alloc_extent_state(state, mask, _RET_IP_); 179 return state; 180 } 181 182 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 183 { 184 if (!prealloc) 185 prealloc = alloc_extent_state(GFP_ATOMIC); 186 187 return prealloc; 188 } 189 190 void free_extent_state(struct extent_state *state) 191 { 192 if (!state) 193 return; 194 if (refcount_dec_and_test(&state->refs)) { 195 WARN_ON(extent_state_in_tree(state)); 196 btrfs_leak_debug_del_state(state); 197 trace_free_extent_state(state, _RET_IP_); 198 kmem_cache_free(extent_state_cache, state); 199 } 200 } 201 202 static int add_extent_changeset(struct extent_state *state, u32 bits, 203 struct extent_changeset *changeset, 204 int set) 205 { 206 int ret; 207 208 if (!changeset) 209 return 0; 210 if (set && (state->state & bits) == bits) 211 return 0; 212 if (!set && (state->state & bits) == 0) 213 return 0; 214 changeset->bytes_changed += state->end - state->start + 1; 215 ret = ulist_add(&changeset->range_changed, state->start, state->end, 216 GFP_ATOMIC); 217 return ret; 218 } 219 220 static inline struct extent_state *next_state(struct extent_state *state) 221 { 222 struct rb_node *next = rb_next(&state->rb_node); 223 224 if (next) 225 return rb_entry(next, struct extent_state, rb_node); 226 else 227 return NULL; 228 } 229 230 static inline struct extent_state *prev_state(struct extent_state *state) 231 { 232 struct rb_node *next = rb_prev(&state->rb_node); 233 234 if (next) 235 return rb_entry(next, struct extent_state, rb_node); 236 else 237 return NULL; 238 } 239 240 /* 241 * Search @tree for an entry that contains @offset. Such entry would have 242 * entry->start <= offset && entry->end >= offset. 243 * 244 * @tree: the tree to search 245 * @offset: offset that should fall within an entry in @tree 246 * @node_ret: pointer where new node should be anchored (used when inserting an 247 * entry in the tree) 248 * @parent_ret: points to entry which would have been the parent of the entry, 249 * containing @offset 250 * 251 * Return a pointer to the entry that contains @offset byte address and don't change 252 * @node_ret and @parent_ret. 253 * 254 * If no such entry exists, return pointer to entry that ends before @offset 255 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 256 */ 257 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 258 u64 offset, 259 struct rb_node ***node_ret, 260 struct rb_node **parent_ret) 261 { 262 struct rb_root *root = &tree->state; 263 struct rb_node **node = &root->rb_node; 264 struct rb_node *prev = NULL; 265 struct extent_state *entry = NULL; 266 267 while (*node) { 268 prev = *node; 269 entry = rb_entry(prev, struct extent_state, rb_node); 270 271 if (offset < entry->start) 272 node = &(*node)->rb_left; 273 else if (offset > entry->end) 274 node = &(*node)->rb_right; 275 else 276 return entry; 277 } 278 279 if (node_ret) 280 *node_ret = node; 281 if (parent_ret) 282 *parent_ret = prev; 283 284 /* Search neighbors until we find the first one past the end */ 285 while (entry && offset > entry->end) 286 entry = next_state(entry); 287 288 return entry; 289 } 290 291 /* 292 * Search offset in the tree or fill neighbor rbtree node pointers. 293 * 294 * @tree: the tree to search 295 * @offset: offset that should fall within an entry in @tree 296 * @next_ret: pointer to the first entry whose range ends after @offset 297 * @prev_ret: pointer to the first entry whose range begins before @offset 298 * 299 * Return a pointer to the entry that contains @offset byte address. If no 300 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 301 * Otherwise return the found entry and other pointers are left untouched. 302 */ 303 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 304 u64 offset, 305 struct extent_state **prev_ret, 306 struct extent_state **next_ret) 307 { 308 struct rb_root *root = &tree->state; 309 struct rb_node **node = &root->rb_node; 310 struct extent_state *orig_prev; 311 struct extent_state *entry = NULL; 312 313 ASSERT(prev_ret); 314 ASSERT(next_ret); 315 316 while (*node) { 317 entry = rb_entry(*node, struct extent_state, rb_node); 318 319 if (offset < entry->start) 320 node = &(*node)->rb_left; 321 else if (offset > entry->end) 322 node = &(*node)->rb_right; 323 else 324 return entry; 325 } 326 327 orig_prev = entry; 328 while (entry && offset > entry->end) 329 entry = next_state(entry); 330 *next_ret = entry; 331 entry = orig_prev; 332 333 while (entry && offset < entry->start) 334 entry = prev_state(entry); 335 *prev_ret = entry; 336 337 return NULL; 338 } 339 340 /* 341 * Inexact rb-tree search, return the next entry if @offset is not found 342 */ 343 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 344 { 345 return tree_search_for_insert(tree, offset, NULL, NULL); 346 } 347 348 static void extent_io_tree_panic(const struct extent_io_tree *tree, 349 const struct extent_state *state, 350 const char *opname, 351 int err) 352 { 353 btrfs_panic(extent_io_tree_to_fs_info(tree), err, 354 "extent io tree error on %s state start %llu end %llu", 355 opname, state->start, state->end); 356 } 357 358 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 359 { 360 struct extent_state *prev; 361 362 prev = prev_state(state); 363 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 364 if (is_inode_io_tree(tree)) 365 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 366 state, prev); 367 state->start = prev->start; 368 rb_erase(&prev->rb_node, &tree->state); 369 RB_CLEAR_NODE(&prev->rb_node); 370 free_extent_state(prev); 371 } 372 } 373 374 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 375 { 376 struct extent_state *next; 377 378 next = next_state(state); 379 if (next && next->start == state->end + 1 && next->state == state->state) { 380 if (is_inode_io_tree(tree)) 381 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 382 state, next); 383 state->end = next->end; 384 rb_erase(&next->rb_node, &tree->state); 385 RB_CLEAR_NODE(&next->rb_node); 386 free_extent_state(next); 387 } 388 } 389 390 /* 391 * Utility function to look for merge candidates inside a given range. Any 392 * extents with matching state are merged together into a single extent in the 393 * tree. Extents with EXTENT_IO in their state field are not merged because 394 * the end_io handlers need to be able to do operations on them without 395 * sleeping (or doing allocations/splits). 396 * 397 * This should be called with the tree lock held. 398 */ 399 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 400 { 401 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 402 return; 403 404 merge_prev_state(tree, state); 405 merge_next_state(tree, state); 406 } 407 408 static void set_state_bits(struct extent_io_tree *tree, 409 struct extent_state *state, 410 u32 bits, struct extent_changeset *changeset) 411 { 412 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 413 int ret; 414 415 if (is_inode_io_tree(tree)) 416 btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits); 417 418 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 419 BUG_ON(ret < 0); 420 state->state |= bits_to_set; 421 } 422 423 /* 424 * Insert an extent_state struct into the tree. 'bits' are set on the 425 * struct before it is inserted. 426 * 427 * Returns a pointer to the struct extent_state record containing the range 428 * requested for insertion, which may be the same as the given struct or it 429 * may be an existing record in the tree that was expanded to accommodate the 430 * requested range. In case of an extent_state different from the one that was 431 * given, the later can be freed or reused by the caller. 432 * 433 * On error it returns an error pointer. 434 * 435 * The tree lock is not taken internally. This is a utility function and 436 * probably isn't what you want to call (see set/clear_extent_bit). 437 */ 438 static struct extent_state *insert_state(struct extent_io_tree *tree, 439 struct extent_state *state, 440 u32 bits, 441 struct extent_changeset *changeset) 442 { 443 struct rb_node **node; 444 struct rb_node *parent = NULL; 445 const u64 start = state->start - 1; 446 const u64 end = state->end + 1; 447 const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)); 448 449 set_state_bits(tree, state, bits, changeset); 450 451 node = &tree->state.rb_node; 452 while (*node) { 453 struct extent_state *entry; 454 455 parent = *node; 456 entry = rb_entry(parent, struct extent_state, rb_node); 457 458 if (state->end < entry->start) { 459 if (try_merge && end == entry->start && 460 state->state == entry->state) { 461 if (is_inode_io_tree(tree)) 462 btrfs_merge_delalloc_extent( 463 extent_io_tree_to_inode(tree), 464 state, entry); 465 entry->start = state->start; 466 merge_prev_state(tree, entry); 467 state->state = 0; 468 return entry; 469 } 470 node = &(*node)->rb_left; 471 } else if (state->end > entry->end) { 472 if (try_merge && entry->end == start && 473 state->state == entry->state) { 474 if (is_inode_io_tree(tree)) 475 btrfs_merge_delalloc_extent( 476 extent_io_tree_to_inode(tree), 477 state, entry); 478 entry->end = state->end; 479 merge_next_state(tree, entry); 480 state->state = 0; 481 return entry; 482 } 483 node = &(*node)->rb_right; 484 } else { 485 return ERR_PTR(-EEXIST); 486 } 487 } 488 489 rb_link_node(&state->rb_node, parent, node); 490 rb_insert_color(&state->rb_node, &tree->state); 491 492 return state; 493 } 494 495 /* 496 * Insert state to @tree to the location given by @node and @parent. 497 */ 498 static void insert_state_fast(struct extent_io_tree *tree, 499 struct extent_state *state, struct rb_node **node, 500 struct rb_node *parent, unsigned bits, 501 struct extent_changeset *changeset) 502 { 503 set_state_bits(tree, state, bits, changeset); 504 rb_link_node(&state->rb_node, parent, node); 505 rb_insert_color(&state->rb_node, &tree->state); 506 merge_state(tree, state); 507 } 508 509 /* 510 * Split a given extent state struct in two, inserting the preallocated 511 * struct 'prealloc' as the newly created second half. 'split' indicates an 512 * offset inside 'orig' where it should be split. 513 * 514 * Before calling, 515 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 516 * are two extent state structs in the tree: 517 * prealloc: [orig->start, split - 1] 518 * orig: [ split, orig->end ] 519 * 520 * The tree locks are not taken by this function. They need to be held 521 * by the caller. 522 */ 523 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 524 struct extent_state *prealloc, u64 split) 525 { 526 struct rb_node *parent = NULL; 527 struct rb_node **node; 528 529 if (is_inode_io_tree(tree)) 530 btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig, 531 split); 532 533 prealloc->start = orig->start; 534 prealloc->end = split - 1; 535 prealloc->state = orig->state; 536 orig->start = split; 537 538 parent = &orig->rb_node; 539 node = &parent; 540 while (*node) { 541 struct extent_state *entry; 542 543 parent = *node; 544 entry = rb_entry(parent, struct extent_state, rb_node); 545 546 if (prealloc->end < entry->start) { 547 node = &(*node)->rb_left; 548 } else if (prealloc->end > entry->end) { 549 node = &(*node)->rb_right; 550 } else { 551 free_extent_state(prealloc); 552 return -EEXIST; 553 } 554 } 555 556 rb_link_node(&prealloc->rb_node, parent, node); 557 rb_insert_color(&prealloc->rb_node, &tree->state); 558 559 return 0; 560 } 561 562 /* 563 * Utility function to clear some bits in an extent state struct. It will 564 * optionally wake up anyone waiting on this state (wake == 1). 565 * 566 * If no bits are set on the state struct after clearing things, the 567 * struct is freed and removed from the tree 568 */ 569 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 570 struct extent_state *state, 571 u32 bits, int wake, 572 struct extent_changeset *changeset) 573 { 574 struct extent_state *next; 575 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 576 int ret; 577 578 if (is_inode_io_tree(tree)) 579 btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state, 580 bits); 581 582 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 583 BUG_ON(ret < 0); 584 state->state &= ~bits_to_clear; 585 if (wake) 586 wake_up(&state->wq); 587 if (state->state == 0) { 588 next = next_state(state); 589 if (extent_state_in_tree(state)) { 590 rb_erase(&state->rb_node, &tree->state); 591 RB_CLEAR_NODE(&state->rb_node); 592 free_extent_state(state); 593 } else { 594 WARN_ON(1); 595 } 596 } else { 597 merge_state(tree, state); 598 next = next_state(state); 599 } 600 return next; 601 } 602 603 /* 604 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 605 * unset the EXTENT_NOWAIT bit. 606 */ 607 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 608 { 609 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 610 *bits &= EXTENT_NOWAIT - 1; 611 } 612 613 /* 614 * Clear some bits on a range in the tree. This may require splitting or 615 * inserting elements in the tree, so the gfp mask is used to indicate which 616 * allocations or sleeping are allowed. 617 * 618 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 619 * range from the tree regardless of state (ie for truncate). 620 * 621 * The range [start, end] is inclusive. 622 * 623 * This takes the tree lock, and returns 0 on success and < 0 on error. 624 */ 625 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 626 u32 bits, struct extent_state **cached_state, 627 struct extent_changeset *changeset) 628 { 629 struct extent_state *state; 630 struct extent_state *cached; 631 struct extent_state *prealloc = NULL; 632 u64 last_end; 633 int err; 634 int clear = 0; 635 int wake; 636 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 637 gfp_t mask; 638 639 set_gfp_mask_from_bits(&bits, &mask); 640 btrfs_debug_check_extent_io_range(tree, start, end); 641 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 642 643 if (delete) 644 bits |= ~EXTENT_CTLBITS; 645 646 if (bits & EXTENT_DELALLOC) 647 bits |= EXTENT_NORESERVE; 648 649 wake = (bits & EXTENT_LOCKED) ? 1 : 0; 650 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 651 clear = 1; 652 again: 653 if (!prealloc) { 654 /* 655 * Don't care for allocation failure here because we might end 656 * up not needing the pre-allocated extent state at all, which 657 * is the case if we only have in the tree extent states that 658 * cover our input range and don't cover too any other range. 659 * If we end up needing a new extent state we allocate it later. 660 */ 661 prealloc = alloc_extent_state(mask); 662 } 663 664 spin_lock(&tree->lock); 665 if (cached_state) { 666 cached = *cached_state; 667 668 if (clear) { 669 *cached_state = NULL; 670 cached_state = NULL; 671 } 672 673 if (cached && extent_state_in_tree(cached) && 674 cached->start <= start && cached->end > start) { 675 if (clear) 676 refcount_dec(&cached->refs); 677 state = cached; 678 goto hit_next; 679 } 680 if (clear) 681 free_extent_state(cached); 682 } 683 684 /* This search will find the extents that end after our range starts. */ 685 state = tree_search(tree, start); 686 if (!state) 687 goto out; 688 hit_next: 689 if (state->start > end) 690 goto out; 691 WARN_ON(state->end < start); 692 last_end = state->end; 693 694 /* The state doesn't have the wanted bits, go ahead. */ 695 if (!(state->state & bits)) { 696 state = next_state(state); 697 goto next; 698 } 699 700 /* 701 * | ---- desired range ---- | 702 * | state | or 703 * | ------------- state -------------- | 704 * 705 * We need to split the extent we found, and may flip bits on second 706 * half. 707 * 708 * If the extent we found extends past our range, we just split and 709 * search again. It'll get split again the next time though. 710 * 711 * If the extent we found is inside our range, we clear the desired bit 712 * on it. 713 */ 714 715 if (state->start < start) { 716 prealloc = alloc_extent_state_atomic(prealloc); 717 if (!prealloc) 718 goto search_again; 719 err = split_state(tree, state, prealloc, start); 720 if (err) 721 extent_io_tree_panic(tree, state, "split", err); 722 723 prealloc = NULL; 724 if (err) 725 goto out; 726 if (state->end <= end) { 727 state = clear_state_bit(tree, state, bits, wake, changeset); 728 goto next; 729 } 730 goto search_again; 731 } 732 /* 733 * | ---- desired range ---- | 734 * | state | 735 * We need to split the extent, and clear the bit on the first half. 736 */ 737 if (state->start <= end && state->end > end) { 738 prealloc = alloc_extent_state_atomic(prealloc); 739 if (!prealloc) 740 goto search_again; 741 err = split_state(tree, state, prealloc, end + 1); 742 if (err) 743 extent_io_tree_panic(tree, state, "split", err); 744 745 if (wake) 746 wake_up(&state->wq); 747 748 clear_state_bit(tree, prealloc, bits, wake, changeset); 749 750 prealloc = NULL; 751 goto out; 752 } 753 754 state = clear_state_bit(tree, state, bits, wake, changeset); 755 next: 756 if (last_end == (u64)-1) 757 goto out; 758 start = last_end + 1; 759 if (start <= end && state && !need_resched()) 760 goto hit_next; 761 762 search_again: 763 if (start > end) 764 goto out; 765 spin_unlock(&tree->lock); 766 if (gfpflags_allow_blocking(mask)) 767 cond_resched(); 768 goto again; 769 770 out: 771 spin_unlock(&tree->lock); 772 if (prealloc) 773 free_extent_state(prealloc); 774 775 return 0; 776 777 } 778 779 /* 780 * Wait for one or more bits to clear on a range in the state tree. 781 * The range [start, end] is inclusive. 782 * The tree lock is taken by this function 783 */ 784 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 785 u32 bits, struct extent_state **cached_state) 786 { 787 struct extent_state *state; 788 789 btrfs_debug_check_extent_io_range(tree, start, end); 790 791 spin_lock(&tree->lock); 792 again: 793 /* 794 * Maintain cached_state, as we may not remove it from the tree if there 795 * are more bits than the bits we're waiting on set on this state. 796 */ 797 if (cached_state && *cached_state) { 798 state = *cached_state; 799 if (extent_state_in_tree(state) && 800 state->start <= start && start < state->end) 801 goto process_node; 802 } 803 while (1) { 804 /* 805 * This search will find all the extents that end after our 806 * range starts. 807 */ 808 state = tree_search(tree, start); 809 process_node: 810 if (!state) 811 break; 812 if (state->start > end) 813 goto out; 814 815 if (state->state & bits) { 816 DEFINE_WAIT(wait); 817 818 start = state->start; 819 refcount_inc(&state->refs); 820 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 821 spin_unlock(&tree->lock); 822 schedule(); 823 spin_lock(&tree->lock); 824 finish_wait(&state->wq, &wait); 825 free_extent_state(state); 826 goto again; 827 } 828 start = state->end + 1; 829 830 if (start > end) 831 break; 832 833 if (!cond_resched_lock(&tree->lock)) { 834 state = next_state(state); 835 goto process_node; 836 } 837 } 838 out: 839 /* This state is no longer useful, clear it and free it up. */ 840 if (cached_state && *cached_state) { 841 state = *cached_state; 842 *cached_state = NULL; 843 free_extent_state(state); 844 } 845 spin_unlock(&tree->lock); 846 } 847 848 static void cache_state_if_flags(struct extent_state *state, 849 struct extent_state **cached_ptr, 850 unsigned flags) 851 { 852 if (cached_ptr && !(*cached_ptr)) { 853 if (!flags || (state->state & flags)) { 854 *cached_ptr = state; 855 refcount_inc(&state->refs); 856 } 857 } 858 } 859 860 static void cache_state(struct extent_state *state, 861 struct extent_state **cached_ptr) 862 { 863 return cache_state_if_flags(state, cached_ptr, 864 EXTENT_LOCKED | EXTENT_BOUNDARY); 865 } 866 867 /* 868 * Find the first state struct with 'bits' set after 'start', and return it. 869 * tree->lock must be held. NULL will returned if nothing was found after 870 * 'start'. 871 */ 872 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 873 u64 start, u32 bits) 874 { 875 struct extent_state *state; 876 877 /* 878 * This search will find all the extents that end after our range 879 * starts. 880 */ 881 state = tree_search(tree, start); 882 while (state) { 883 if (state->end >= start && (state->state & bits)) 884 return state; 885 state = next_state(state); 886 } 887 return NULL; 888 } 889 890 /* 891 * Find the first offset in the io tree with one or more @bits set. 892 * 893 * Note: If there are multiple bits set in @bits, any of them will match. 894 * 895 * Return true if we find something, and update @start_ret and @end_ret. 896 * Return false if we found nothing. 897 */ 898 bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, 899 u64 *start_ret, u64 *end_ret, u32 bits, 900 struct extent_state **cached_state) 901 { 902 struct extent_state *state; 903 bool ret = false; 904 905 spin_lock(&tree->lock); 906 if (cached_state && *cached_state) { 907 state = *cached_state; 908 if (state->end == start - 1 && extent_state_in_tree(state)) { 909 while ((state = next_state(state)) != NULL) { 910 if (state->state & bits) 911 break; 912 } 913 /* 914 * If we found the next extent state, clear cached_state 915 * so that we can cache the next extent state below and 916 * avoid future calls going over the same extent state 917 * again. If we haven't found any, clear as well since 918 * it's now useless. 919 */ 920 free_extent_state(*cached_state); 921 *cached_state = NULL; 922 if (state) 923 goto got_it; 924 goto out; 925 } 926 free_extent_state(*cached_state); 927 *cached_state = NULL; 928 } 929 930 state = find_first_extent_bit_state(tree, start, bits); 931 got_it: 932 if (state) { 933 cache_state_if_flags(state, cached_state, 0); 934 *start_ret = state->start; 935 *end_ret = state->end; 936 ret = true; 937 } 938 out: 939 spin_unlock(&tree->lock); 940 return ret; 941 } 942 943 /* 944 * Find a contiguous area of bits 945 * 946 * @tree: io tree to check 947 * @start: offset to start the search from 948 * @start_ret: the first offset we found with the bits set 949 * @end_ret: the final contiguous range of the bits that were set 950 * @bits: bits to look for 951 * 952 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 953 * to set bits appropriately, and then merge them again. During this time it 954 * will drop the tree->lock, so use this helper if you want to find the actual 955 * contiguous area for given bits. We will search to the first bit we find, and 956 * then walk down the tree until we find a non-contiguous area. The area 957 * returned will be the full contiguous area with the bits set. 958 */ 959 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 960 u64 *start_ret, u64 *end_ret, u32 bits) 961 { 962 struct extent_state *state; 963 int ret = 1; 964 965 ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES)); 966 967 spin_lock(&tree->lock); 968 state = find_first_extent_bit_state(tree, start, bits); 969 if (state) { 970 *start_ret = state->start; 971 *end_ret = state->end; 972 while ((state = next_state(state)) != NULL) { 973 if (state->start > (*end_ret + 1)) 974 break; 975 *end_ret = state->end; 976 } 977 ret = 0; 978 } 979 spin_unlock(&tree->lock); 980 return ret; 981 } 982 983 /* 984 * Find a contiguous range of bytes in the file marked as delalloc, not more 985 * than 'max_bytes'. start and end are used to return the range, 986 * 987 * True is returned if we find something, false if nothing was in the tree. 988 */ 989 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 990 u64 *end, u64 max_bytes, 991 struct extent_state **cached_state) 992 { 993 struct extent_state *state; 994 u64 cur_start = *start; 995 bool found = false; 996 u64 total_bytes = 0; 997 998 spin_lock(&tree->lock); 999 1000 /* 1001 * This search will find all the extents that end after our range 1002 * starts. 1003 */ 1004 state = tree_search(tree, cur_start); 1005 if (!state) { 1006 *end = (u64)-1; 1007 goto out; 1008 } 1009 1010 while (state) { 1011 if (found && (state->start != cur_start || 1012 (state->state & EXTENT_BOUNDARY))) { 1013 goto out; 1014 } 1015 if (!(state->state & EXTENT_DELALLOC)) { 1016 if (!found) 1017 *end = state->end; 1018 goto out; 1019 } 1020 if (!found) { 1021 *start = state->start; 1022 *cached_state = state; 1023 refcount_inc(&state->refs); 1024 } 1025 found = true; 1026 *end = state->end; 1027 cur_start = state->end + 1; 1028 total_bytes += state->end - state->start + 1; 1029 if (total_bytes >= max_bytes) 1030 break; 1031 state = next_state(state); 1032 } 1033 out: 1034 spin_unlock(&tree->lock); 1035 return found; 1036 } 1037 1038 /* 1039 * Set some bits on a range in the tree. This may require allocations or 1040 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1041 * GFP_NOWAIT. 1042 * 1043 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1044 * part of the range already has the desired bits set. The extent_state of the 1045 * existing range is returned in failed_state in this case, and the start of the 1046 * existing range is returned in failed_start. failed_state is used as an 1047 * optimization for wait_extent_bit, failed_start must be used as the source of 1048 * truth as failed_state may have changed since we returned. 1049 * 1050 * [start, end] is inclusive This takes the tree lock. 1051 */ 1052 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1053 u32 bits, u64 *failed_start, 1054 struct extent_state **failed_state, 1055 struct extent_state **cached_state, 1056 struct extent_changeset *changeset) 1057 { 1058 struct extent_state *state; 1059 struct extent_state *prealloc = NULL; 1060 struct rb_node **p = NULL; 1061 struct rb_node *parent = NULL; 1062 int err = 0; 1063 u64 last_start; 1064 u64 last_end; 1065 u32 exclusive_bits = (bits & EXTENT_LOCKED); 1066 gfp_t mask; 1067 1068 set_gfp_mask_from_bits(&bits, &mask); 1069 btrfs_debug_check_extent_io_range(tree, start, end); 1070 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1071 1072 if (exclusive_bits) 1073 ASSERT(failed_start); 1074 else 1075 ASSERT(failed_start == NULL && failed_state == NULL); 1076 again: 1077 if (!prealloc) { 1078 /* 1079 * Don't care for allocation failure here because we might end 1080 * up not needing the pre-allocated extent state at all, which 1081 * is the case if we only have in the tree extent states that 1082 * cover our input range and don't cover too any other range. 1083 * If we end up needing a new extent state we allocate it later. 1084 */ 1085 prealloc = alloc_extent_state(mask); 1086 } 1087 1088 spin_lock(&tree->lock); 1089 if (cached_state && *cached_state) { 1090 state = *cached_state; 1091 if (state->start <= start && state->end > start && 1092 extent_state_in_tree(state)) 1093 goto hit_next; 1094 } 1095 /* 1096 * This search will find all the extents that end after our range 1097 * starts. 1098 */ 1099 state = tree_search_for_insert(tree, start, &p, &parent); 1100 if (!state) { 1101 prealloc = alloc_extent_state_atomic(prealloc); 1102 if (!prealloc) 1103 goto search_again; 1104 prealloc->start = start; 1105 prealloc->end = end; 1106 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1107 cache_state(prealloc, cached_state); 1108 prealloc = NULL; 1109 goto out; 1110 } 1111 hit_next: 1112 last_start = state->start; 1113 last_end = state->end; 1114 1115 /* 1116 * | ---- desired range ---- | 1117 * | state | 1118 * 1119 * Just lock what we found and keep going 1120 */ 1121 if (state->start == start && state->end <= end) { 1122 if (state->state & exclusive_bits) { 1123 *failed_start = state->start; 1124 cache_state(state, failed_state); 1125 err = -EEXIST; 1126 goto out; 1127 } 1128 1129 set_state_bits(tree, state, bits, changeset); 1130 cache_state(state, cached_state); 1131 merge_state(tree, state); 1132 if (last_end == (u64)-1) 1133 goto out; 1134 start = last_end + 1; 1135 state = next_state(state); 1136 if (start < end && state && state->start == start && 1137 !need_resched()) 1138 goto hit_next; 1139 goto search_again; 1140 } 1141 1142 /* 1143 * | ---- desired range ---- | 1144 * | state | 1145 * or 1146 * | ------------- state -------------- | 1147 * 1148 * We need to split the extent we found, and may flip bits on second 1149 * half. 1150 * 1151 * If the extent we found extends past our range, we just split and 1152 * search again. It'll get split again the next time though. 1153 * 1154 * If the extent we found is inside our range, we set the desired bit 1155 * on it. 1156 */ 1157 if (state->start < start) { 1158 if (state->state & exclusive_bits) { 1159 *failed_start = start; 1160 cache_state(state, failed_state); 1161 err = -EEXIST; 1162 goto out; 1163 } 1164 1165 /* 1166 * If this extent already has all the bits we want set, then 1167 * skip it, not necessary to split it or do anything with it. 1168 */ 1169 if ((state->state & bits) == bits) { 1170 start = state->end + 1; 1171 cache_state(state, cached_state); 1172 goto search_again; 1173 } 1174 1175 prealloc = alloc_extent_state_atomic(prealloc); 1176 if (!prealloc) 1177 goto search_again; 1178 err = split_state(tree, state, prealloc, start); 1179 if (err) 1180 extent_io_tree_panic(tree, state, "split", err); 1181 1182 prealloc = NULL; 1183 if (err) 1184 goto out; 1185 if (state->end <= end) { 1186 set_state_bits(tree, state, bits, changeset); 1187 cache_state(state, cached_state); 1188 merge_state(tree, state); 1189 if (last_end == (u64)-1) 1190 goto out; 1191 start = last_end + 1; 1192 state = next_state(state); 1193 if (start < end && state && state->start == start && 1194 !need_resched()) 1195 goto hit_next; 1196 } 1197 goto search_again; 1198 } 1199 /* 1200 * | ---- desired range ---- | 1201 * | state | or | state | 1202 * 1203 * There's a hole, we need to insert something in it and ignore the 1204 * extent we found. 1205 */ 1206 if (state->start > start) { 1207 u64 this_end; 1208 struct extent_state *inserted_state; 1209 1210 if (end < last_start) 1211 this_end = end; 1212 else 1213 this_end = last_start - 1; 1214 1215 prealloc = alloc_extent_state_atomic(prealloc); 1216 if (!prealloc) 1217 goto search_again; 1218 1219 /* 1220 * Avoid to free 'prealloc' if it can be merged with the later 1221 * extent. 1222 */ 1223 prealloc->start = start; 1224 prealloc->end = this_end; 1225 inserted_state = insert_state(tree, prealloc, bits, changeset); 1226 if (IS_ERR(inserted_state)) { 1227 err = PTR_ERR(inserted_state); 1228 extent_io_tree_panic(tree, prealloc, "insert", err); 1229 } 1230 1231 cache_state(inserted_state, cached_state); 1232 if (inserted_state == prealloc) 1233 prealloc = NULL; 1234 start = this_end + 1; 1235 goto search_again; 1236 } 1237 /* 1238 * | ---- desired range ---- | 1239 * | state | 1240 * 1241 * We need to split the extent, and set the bit on the first half 1242 */ 1243 if (state->start <= end && state->end > end) { 1244 if (state->state & exclusive_bits) { 1245 *failed_start = start; 1246 cache_state(state, failed_state); 1247 err = -EEXIST; 1248 goto out; 1249 } 1250 1251 prealloc = alloc_extent_state_atomic(prealloc); 1252 if (!prealloc) 1253 goto search_again; 1254 err = split_state(tree, state, prealloc, end + 1); 1255 if (err) 1256 extent_io_tree_panic(tree, state, "split", err); 1257 1258 set_state_bits(tree, prealloc, bits, changeset); 1259 cache_state(prealloc, cached_state); 1260 merge_state(tree, prealloc); 1261 prealloc = NULL; 1262 goto out; 1263 } 1264 1265 search_again: 1266 if (start > end) 1267 goto out; 1268 spin_unlock(&tree->lock); 1269 if (gfpflags_allow_blocking(mask)) 1270 cond_resched(); 1271 goto again; 1272 1273 out: 1274 spin_unlock(&tree->lock); 1275 if (prealloc) 1276 free_extent_state(prealloc); 1277 1278 return err; 1279 1280 } 1281 1282 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1283 u32 bits, struct extent_state **cached_state) 1284 { 1285 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1286 cached_state, NULL); 1287 } 1288 1289 /* 1290 * Convert all bits in a given range from one bit to another 1291 * 1292 * @tree: the io tree to search 1293 * @start: the start offset in bytes 1294 * @end: the end offset in bytes (inclusive) 1295 * @bits: the bits to set in this range 1296 * @clear_bits: the bits to clear in this range 1297 * @cached_state: state that we're going to cache 1298 * 1299 * This will go through and set bits for the given range. If any states exist 1300 * already in this range they are set with the given bit and cleared of the 1301 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1302 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1303 * boundary bits like LOCK. 1304 * 1305 * All allocations are done with GFP_NOFS. 1306 */ 1307 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1308 u32 bits, u32 clear_bits, 1309 struct extent_state **cached_state) 1310 { 1311 struct extent_state *state; 1312 struct extent_state *prealloc = NULL; 1313 struct rb_node **p = NULL; 1314 struct rb_node *parent = NULL; 1315 int err = 0; 1316 u64 last_start; 1317 u64 last_end; 1318 bool first_iteration = true; 1319 1320 btrfs_debug_check_extent_io_range(tree, start, end); 1321 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1322 clear_bits); 1323 1324 again: 1325 if (!prealloc) { 1326 /* 1327 * Best effort, don't worry if extent state allocation fails 1328 * here for the first iteration. We might have a cached state 1329 * that matches exactly the target range, in which case no 1330 * extent state allocations are needed. We'll only know this 1331 * after locking the tree. 1332 */ 1333 prealloc = alloc_extent_state(GFP_NOFS); 1334 if (!prealloc && !first_iteration) 1335 return -ENOMEM; 1336 } 1337 1338 spin_lock(&tree->lock); 1339 if (cached_state && *cached_state) { 1340 state = *cached_state; 1341 if (state->start <= start && state->end > start && 1342 extent_state_in_tree(state)) 1343 goto hit_next; 1344 } 1345 1346 /* 1347 * This search will find all the extents that end after our range 1348 * starts. 1349 */ 1350 state = tree_search_for_insert(tree, start, &p, &parent); 1351 if (!state) { 1352 prealloc = alloc_extent_state_atomic(prealloc); 1353 if (!prealloc) { 1354 err = -ENOMEM; 1355 goto out; 1356 } 1357 prealloc->start = start; 1358 prealloc->end = end; 1359 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1360 cache_state(prealloc, cached_state); 1361 prealloc = NULL; 1362 goto out; 1363 } 1364 hit_next: 1365 last_start = state->start; 1366 last_end = state->end; 1367 1368 /* 1369 * | ---- desired range ---- | 1370 * | state | 1371 * 1372 * Just lock what we found and keep going. 1373 */ 1374 if (state->start == start && state->end <= end) { 1375 set_state_bits(tree, state, bits, NULL); 1376 cache_state(state, cached_state); 1377 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1378 if (last_end == (u64)-1) 1379 goto out; 1380 start = last_end + 1; 1381 if (start < end && state && state->start == start && 1382 !need_resched()) 1383 goto hit_next; 1384 goto search_again; 1385 } 1386 1387 /* 1388 * | ---- desired range ---- | 1389 * | state | 1390 * or 1391 * | ------------- state -------------- | 1392 * 1393 * We need to split the extent we found, and may flip bits on second 1394 * half. 1395 * 1396 * If the extent we found extends past our range, we just split and 1397 * search again. It'll get split again the next time though. 1398 * 1399 * If the extent we found is inside our range, we set the desired bit 1400 * on it. 1401 */ 1402 if (state->start < start) { 1403 prealloc = alloc_extent_state_atomic(prealloc); 1404 if (!prealloc) { 1405 err = -ENOMEM; 1406 goto out; 1407 } 1408 err = split_state(tree, state, prealloc, start); 1409 if (err) 1410 extent_io_tree_panic(tree, state, "split", err); 1411 prealloc = NULL; 1412 if (err) 1413 goto out; 1414 if (state->end <= end) { 1415 set_state_bits(tree, state, bits, NULL); 1416 cache_state(state, cached_state); 1417 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1418 if (last_end == (u64)-1) 1419 goto out; 1420 start = last_end + 1; 1421 if (start < end && state && state->start == start && 1422 !need_resched()) 1423 goto hit_next; 1424 } 1425 goto search_again; 1426 } 1427 /* 1428 * | ---- desired range ---- | 1429 * | state | or | state | 1430 * 1431 * There's a hole, we need to insert something in it and ignore the 1432 * extent we found. 1433 */ 1434 if (state->start > start) { 1435 u64 this_end; 1436 struct extent_state *inserted_state; 1437 1438 if (end < last_start) 1439 this_end = end; 1440 else 1441 this_end = last_start - 1; 1442 1443 prealloc = alloc_extent_state_atomic(prealloc); 1444 if (!prealloc) { 1445 err = -ENOMEM; 1446 goto out; 1447 } 1448 1449 /* 1450 * Avoid to free 'prealloc' if it can be merged with the later 1451 * extent. 1452 */ 1453 prealloc->start = start; 1454 prealloc->end = this_end; 1455 inserted_state = insert_state(tree, prealloc, bits, NULL); 1456 if (IS_ERR(inserted_state)) { 1457 err = PTR_ERR(inserted_state); 1458 extent_io_tree_panic(tree, prealloc, "insert", err); 1459 } 1460 cache_state(inserted_state, cached_state); 1461 if (inserted_state == prealloc) 1462 prealloc = NULL; 1463 start = this_end + 1; 1464 goto search_again; 1465 } 1466 /* 1467 * | ---- desired range ---- | 1468 * | state | 1469 * 1470 * We need to split the extent, and set the bit on the first half. 1471 */ 1472 if (state->start <= end && state->end > end) { 1473 prealloc = alloc_extent_state_atomic(prealloc); 1474 if (!prealloc) { 1475 err = -ENOMEM; 1476 goto out; 1477 } 1478 1479 err = split_state(tree, state, prealloc, end + 1); 1480 if (err) 1481 extent_io_tree_panic(tree, state, "split", err); 1482 1483 set_state_bits(tree, prealloc, bits, NULL); 1484 cache_state(prealloc, cached_state); 1485 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1486 prealloc = NULL; 1487 goto out; 1488 } 1489 1490 search_again: 1491 if (start > end) 1492 goto out; 1493 spin_unlock(&tree->lock); 1494 cond_resched(); 1495 first_iteration = false; 1496 goto again; 1497 1498 out: 1499 spin_unlock(&tree->lock); 1500 if (prealloc) 1501 free_extent_state(prealloc); 1502 1503 return err; 1504 } 1505 1506 /* 1507 * Find the first range that has @bits not set. This range could start before 1508 * @start. 1509 * 1510 * @tree: the tree to search 1511 * @start: offset at/after which the found extent should start 1512 * @start_ret: records the beginning of the range 1513 * @end_ret: records the end of the range (inclusive) 1514 * @bits: the set of bits which must be unset 1515 * 1516 * Since unallocated range is also considered one which doesn't have the bits 1517 * set it's possible that @end_ret contains -1, this happens in case the range 1518 * spans (last_range_end, end of device]. In this case it's up to the caller to 1519 * trim @end_ret to the appropriate size. 1520 */ 1521 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1522 u64 *start_ret, u64 *end_ret, u32 bits) 1523 { 1524 struct extent_state *state; 1525 struct extent_state *prev = NULL, *next = NULL; 1526 1527 spin_lock(&tree->lock); 1528 1529 /* Find first extent with bits cleared */ 1530 while (1) { 1531 state = tree_search_prev_next(tree, start, &prev, &next); 1532 if (!state && !next && !prev) { 1533 /* 1534 * Tree is completely empty, send full range and let 1535 * caller deal with it 1536 */ 1537 *start_ret = 0; 1538 *end_ret = -1; 1539 goto out; 1540 } else if (!state && !next) { 1541 /* 1542 * We are past the last allocated chunk, set start at 1543 * the end of the last extent. 1544 */ 1545 *start_ret = prev->end + 1; 1546 *end_ret = -1; 1547 goto out; 1548 } else if (!state) { 1549 state = next; 1550 } 1551 1552 /* 1553 * At this point 'state' either contains 'start' or start is 1554 * before 'state' 1555 */ 1556 if (in_range(start, state->start, state->end - state->start + 1)) { 1557 if (state->state & bits) { 1558 /* 1559 * |--range with bits sets--| 1560 * | 1561 * start 1562 */ 1563 start = state->end + 1; 1564 } else { 1565 /* 1566 * 'start' falls within a range that doesn't 1567 * have the bits set, so take its start as the 1568 * beginning of the desired range 1569 * 1570 * |--range with bits cleared----| 1571 * | 1572 * start 1573 */ 1574 *start_ret = state->start; 1575 break; 1576 } 1577 } else { 1578 /* 1579 * |---prev range---|---hole/unset---|---node range---| 1580 * | 1581 * start 1582 * 1583 * or 1584 * 1585 * |---hole/unset--||--first node--| 1586 * 0 | 1587 * start 1588 */ 1589 if (prev) 1590 *start_ret = prev->end + 1; 1591 else 1592 *start_ret = 0; 1593 break; 1594 } 1595 } 1596 1597 /* 1598 * Find the longest stretch from start until an entry which has the 1599 * bits set 1600 */ 1601 while (state) { 1602 if (state->end >= start && !(state->state & bits)) { 1603 *end_ret = state->end; 1604 } else { 1605 *end_ret = state->start - 1; 1606 break; 1607 } 1608 state = next_state(state); 1609 } 1610 out: 1611 spin_unlock(&tree->lock); 1612 } 1613 1614 /* 1615 * Count the number of bytes in the tree that have a given bit(s) set for a 1616 * given range. 1617 * 1618 * @tree: The io tree to search. 1619 * @start: The start offset of the range. This value is updated to the 1620 * offset of the first byte found with the given bit(s), so it 1621 * can end up being bigger than the initial value. 1622 * @search_end: The end offset (inclusive value) of the search range. 1623 * @max_bytes: The maximum byte count we are interested. The search stops 1624 * once it reaches this count. 1625 * @bits: The bits the range must have in order to be accounted for. 1626 * If multiple bits are set, then only subranges that have all 1627 * the bits set are accounted for. 1628 * @contig: Indicate if we should ignore holes in the range or not. If 1629 * this is true, then stop once we find a hole. 1630 * @cached_state: A cached state to be used across multiple calls to this 1631 * function in order to speedup searches. Use NULL if this is 1632 * called only once or if each call does not start where the 1633 * previous one ended. 1634 * 1635 * Returns the total number of bytes found within the given range that have 1636 * all given bits set. If the returned number of bytes is greater than zero 1637 * then @start is updated with the offset of the first byte with the bits set. 1638 */ 1639 u64 count_range_bits(struct extent_io_tree *tree, 1640 u64 *start, u64 search_end, u64 max_bytes, 1641 u32 bits, int contig, 1642 struct extent_state **cached_state) 1643 { 1644 struct extent_state *state = NULL; 1645 struct extent_state *cached; 1646 u64 cur_start = *start; 1647 u64 total_bytes = 0; 1648 u64 last = 0; 1649 int found = 0; 1650 1651 if (WARN_ON(search_end < cur_start)) 1652 return 0; 1653 1654 spin_lock(&tree->lock); 1655 1656 if (!cached_state || !*cached_state) 1657 goto search; 1658 1659 cached = *cached_state; 1660 1661 if (!extent_state_in_tree(cached)) 1662 goto search; 1663 1664 if (cached->start <= cur_start && cur_start <= cached->end) { 1665 state = cached; 1666 } else if (cached->start > cur_start) { 1667 struct extent_state *prev; 1668 1669 /* 1670 * The cached state starts after our search range's start. Check 1671 * if the previous state record starts at or before the range we 1672 * are looking for, and if so, use it - this is a common case 1673 * when there are holes between records in the tree. If there is 1674 * no previous state record, we can start from our cached state. 1675 */ 1676 prev = prev_state(cached); 1677 if (!prev) 1678 state = cached; 1679 else if (prev->start <= cur_start && cur_start <= prev->end) 1680 state = prev; 1681 } 1682 1683 /* 1684 * This search will find all the extents that end after our range 1685 * starts. 1686 */ 1687 search: 1688 if (!state) 1689 state = tree_search(tree, cur_start); 1690 1691 while (state) { 1692 if (state->start > search_end) 1693 break; 1694 if (contig && found && state->start > last + 1) 1695 break; 1696 if (state->end >= cur_start && (state->state & bits) == bits) { 1697 total_bytes += min(search_end, state->end) + 1 - 1698 max(cur_start, state->start); 1699 if (total_bytes >= max_bytes) 1700 break; 1701 if (!found) { 1702 *start = max(cur_start, state->start); 1703 found = 1; 1704 } 1705 last = state->end; 1706 } else if (contig && found) { 1707 break; 1708 } 1709 state = next_state(state); 1710 } 1711 1712 if (cached_state) { 1713 free_extent_state(*cached_state); 1714 *cached_state = state; 1715 if (state) 1716 refcount_inc(&state->refs); 1717 } 1718 1719 spin_unlock(&tree->lock); 1720 1721 return total_bytes; 1722 } 1723 1724 /* 1725 * Check if the single @bit exists in the given range. 1726 */ 1727 bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1728 { 1729 struct extent_state *state = NULL; 1730 bool bitset = false; 1731 1732 ASSERT(is_power_of_2(bit)); 1733 1734 spin_lock(&tree->lock); 1735 state = tree_search(tree, start); 1736 while (state && start <= end) { 1737 if (state->start > end) 1738 break; 1739 1740 if (state->state & bit) { 1741 bitset = true; 1742 break; 1743 } 1744 1745 /* If state->end is (u64)-1, start will overflow to 0 */ 1746 start = state->end + 1; 1747 if (start > end || start == 0) 1748 break; 1749 state = next_state(state); 1750 } 1751 spin_unlock(&tree->lock); 1752 return bitset; 1753 } 1754 1755 /* 1756 * Check if the whole range [@start,@end) contains the single @bit set. 1757 */ 1758 bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1759 struct extent_state *cached) 1760 { 1761 struct extent_state *state = NULL; 1762 bool bitset = true; 1763 1764 ASSERT(is_power_of_2(bit)); 1765 1766 spin_lock(&tree->lock); 1767 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1768 cached->end > start) 1769 state = cached; 1770 else 1771 state = tree_search(tree, start); 1772 while (state && start <= end) { 1773 if (state->start > start) { 1774 bitset = false; 1775 break; 1776 } 1777 1778 if (state->start > end) 1779 break; 1780 1781 if ((state->state & bit) == 0) { 1782 bitset = false; 1783 break; 1784 } 1785 1786 if (state->end == (u64)-1) 1787 break; 1788 1789 /* 1790 * Last entry (if state->end is (u64)-1 and overflow happens), 1791 * or next entry starts after the range. 1792 */ 1793 start = state->end + 1; 1794 if (start > end || start == 0) 1795 break; 1796 state = next_state(state); 1797 } 1798 1799 /* We ran out of states and were still inside of our range. */ 1800 if (!state) 1801 bitset = false; 1802 spin_unlock(&tree->lock); 1803 return bitset; 1804 } 1805 1806 /* Wrappers around set/clear extent bit */ 1807 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1808 u32 bits, struct extent_changeset *changeset) 1809 { 1810 /* 1811 * We don't support EXTENT_LOCKED yet, as current changeset will 1812 * record any bits changed, so for EXTENT_LOCKED case, it will 1813 * either fail with -EEXIST or changeset will record the whole 1814 * range. 1815 */ 1816 ASSERT(!(bits & EXTENT_LOCKED)); 1817 1818 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1819 } 1820 1821 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1822 u32 bits, struct extent_changeset *changeset) 1823 { 1824 /* 1825 * Don't support EXTENT_LOCKED case, same reason as 1826 * set_record_extent_bits(). 1827 */ 1828 ASSERT(!(bits & EXTENT_LOCKED)); 1829 1830 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1831 } 1832 1833 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1834 struct extent_state **cached) 1835 { 1836 int err; 1837 u64 failed_start; 1838 1839 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1840 NULL, cached, NULL); 1841 if (err == -EEXIST) { 1842 if (failed_start > start) 1843 clear_extent_bit(tree, start, failed_start - 1, 1844 EXTENT_LOCKED, cached); 1845 return 0; 1846 } 1847 return 1; 1848 } 1849 1850 /* 1851 * Either insert or lock state struct between start and end use mask to tell 1852 * us if waiting is desired. 1853 */ 1854 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1855 struct extent_state **cached_state) 1856 { 1857 struct extent_state *failed_state = NULL; 1858 int err; 1859 u64 failed_start; 1860 1861 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1862 &failed_state, cached_state, NULL); 1863 while (err == -EEXIST) { 1864 if (failed_start != start) 1865 clear_extent_bit(tree, start, failed_start - 1, 1866 EXTENT_LOCKED, cached_state); 1867 1868 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1869 &failed_state); 1870 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1871 &failed_start, &failed_state, 1872 cached_state, NULL); 1873 } 1874 return err; 1875 } 1876 1877 void __cold extent_state_free_cachep(void) 1878 { 1879 btrfs_extent_state_leak_debug_check(); 1880 kmem_cache_destroy(extent_state_cache); 1881 } 1882 1883 int __init extent_state_init_cachep(void) 1884 { 1885 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1886 sizeof(struct extent_state), 0, 1887 SLAB_MEM_SPREAD, NULL); 1888 if (!extent_state_cache) 1889 return -ENOMEM; 1890 1891 return 0; 1892 } 1893