1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent_io.h" 8 #include "extent-io-tree.h" 9 #include "btrfs_inode.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 WARN_ON_ONCE(1); 52 kmem_cache_free(extent_state_cache, state); 53 } 54 } 55 56 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 57 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 58 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 59 struct extent_io_tree *tree, 60 u64 start, u64 end) 61 { 62 const struct btrfs_inode *inode; 63 u64 isize; 64 65 if (tree->owner != IO_TREE_INODE_IO) 66 return; 67 68 inode = extent_io_tree_to_inode_const(tree); 69 isize = i_size_read(&inode->vfs_inode); 70 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 71 btrfs_debug_rl(inode->root->fs_info, 72 "%s: ino %llu isize %llu odd range [%llu,%llu]", 73 caller, btrfs_ino(inode), isize, start, end); 74 } 75 } 76 #else 77 #define btrfs_leak_debug_add_state(state) do {} while (0) 78 #define btrfs_leak_debug_del_state(state) do {} while (0) 79 #define btrfs_extent_state_leak_debug_check() do {} while (0) 80 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 81 #endif 82 83 84 /* 85 * The only tree allowed to set the inode is IO_TREE_INODE_IO. 86 */ 87 static bool is_inode_io_tree(const struct extent_io_tree *tree) 88 { 89 return tree->owner == IO_TREE_INODE_IO; 90 } 91 92 /* Return the inode if it's valid for the given tree, otherwise NULL. */ 93 struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree) 94 { 95 if (tree->owner == IO_TREE_INODE_IO) 96 return tree->inode; 97 return NULL; 98 } 99 100 /* Read-only access to the inode. */ 101 const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree) 102 { 103 if (tree->owner == IO_TREE_INODE_IO) 104 return tree->inode; 105 return NULL; 106 } 107 108 /* For read-only access to fs_info. */ 109 const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 110 { 111 if (tree->owner == IO_TREE_INODE_IO) 112 return tree->inode->root->fs_info; 113 return tree->fs_info; 114 } 115 116 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 117 struct extent_io_tree *tree, unsigned int owner) 118 { 119 tree->state = RB_ROOT; 120 spin_lock_init(&tree->lock); 121 tree->fs_info = fs_info; 122 tree->owner = owner; 123 } 124 125 /* 126 * Empty an io tree, removing and freeing every extent state record from the 127 * tree. This should be called once we are sure no other task can access the 128 * tree anymore, so no tree updates happen after we empty the tree and there 129 * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never 130 * set on any extent state when calling this function). 131 */ 132 void extent_io_tree_release(struct extent_io_tree *tree) 133 { 134 struct rb_root root; 135 struct extent_state *state; 136 struct extent_state *tmp; 137 138 spin_lock(&tree->lock); 139 root = tree->state; 140 tree->state = RB_ROOT; 141 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 142 /* Clear node to keep free_extent_state() happy. */ 143 RB_CLEAR_NODE(&state->rb_node); 144 ASSERT(!(state->state & EXTENT_LOCK_BITS)); 145 /* 146 * No need for a memory barrier here, as we are holding the tree 147 * lock and we only change the waitqueue while holding that lock 148 * (see wait_extent_bit()). 149 */ 150 ASSERT(!waitqueue_active(&state->wq)); 151 free_extent_state(state); 152 cond_resched_lock(&tree->lock); 153 } 154 /* 155 * Should still be empty even after a reschedule, no other task should 156 * be accessing the tree anymore. 157 */ 158 ASSERT(RB_EMPTY_ROOT(&tree->state)); 159 spin_unlock(&tree->lock); 160 } 161 162 static struct extent_state *alloc_extent_state(gfp_t mask) 163 { 164 struct extent_state *state; 165 166 /* 167 * The given mask might be not appropriate for the slab allocator, 168 * drop the unsupported bits 169 */ 170 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 171 state = kmem_cache_alloc(extent_state_cache, mask); 172 if (!state) 173 return state; 174 state->state = 0; 175 RB_CLEAR_NODE(&state->rb_node); 176 btrfs_leak_debug_add_state(state); 177 refcount_set(&state->refs, 1); 178 init_waitqueue_head(&state->wq); 179 trace_alloc_extent_state(state, mask, _RET_IP_); 180 return state; 181 } 182 183 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 184 { 185 if (!prealloc) 186 prealloc = alloc_extent_state(GFP_ATOMIC); 187 188 return prealloc; 189 } 190 191 void free_extent_state(struct extent_state *state) 192 { 193 if (!state) 194 return; 195 if (refcount_dec_and_test(&state->refs)) { 196 WARN_ON(extent_state_in_tree(state)); 197 btrfs_leak_debug_del_state(state); 198 trace_free_extent_state(state, _RET_IP_); 199 kmem_cache_free(extent_state_cache, state); 200 } 201 } 202 203 static int add_extent_changeset(struct extent_state *state, u32 bits, 204 struct extent_changeset *changeset, 205 int set) 206 { 207 int ret; 208 209 if (!changeset) 210 return 0; 211 if (set && (state->state & bits) == bits) 212 return 0; 213 if (!set && (state->state & bits) == 0) 214 return 0; 215 changeset->bytes_changed += state->end - state->start + 1; 216 ret = ulist_add(&changeset->range_changed, state->start, state->end, 217 GFP_ATOMIC); 218 return ret; 219 } 220 221 static inline struct extent_state *next_state(struct extent_state *state) 222 { 223 struct rb_node *next = rb_next(&state->rb_node); 224 225 if (next) 226 return rb_entry(next, struct extent_state, rb_node); 227 else 228 return NULL; 229 } 230 231 static inline struct extent_state *prev_state(struct extent_state *state) 232 { 233 struct rb_node *next = rb_prev(&state->rb_node); 234 235 if (next) 236 return rb_entry(next, struct extent_state, rb_node); 237 else 238 return NULL; 239 } 240 241 /* 242 * Search @tree for an entry that contains @offset. Such entry would have 243 * entry->start <= offset && entry->end >= offset. 244 * 245 * @tree: the tree to search 246 * @offset: offset that should fall within an entry in @tree 247 * @node_ret: pointer where new node should be anchored (used when inserting an 248 * entry in the tree) 249 * @parent_ret: points to entry which would have been the parent of the entry, 250 * containing @offset 251 * 252 * Return a pointer to the entry that contains @offset byte address and don't change 253 * @node_ret and @parent_ret. 254 * 255 * If no such entry exists, return pointer to entry that ends before @offset 256 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 257 */ 258 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 259 u64 offset, 260 struct rb_node ***node_ret, 261 struct rb_node **parent_ret) 262 { 263 struct rb_root *root = &tree->state; 264 struct rb_node **node = &root->rb_node; 265 struct rb_node *prev = NULL; 266 struct extent_state *entry = NULL; 267 268 while (*node) { 269 prev = *node; 270 entry = rb_entry(prev, struct extent_state, rb_node); 271 272 if (offset < entry->start) 273 node = &(*node)->rb_left; 274 else if (offset > entry->end) 275 node = &(*node)->rb_right; 276 else 277 return entry; 278 } 279 280 if (node_ret) 281 *node_ret = node; 282 if (parent_ret) 283 *parent_ret = prev; 284 285 /* Search neighbors until we find the first one past the end */ 286 while (entry && offset > entry->end) 287 entry = next_state(entry); 288 289 return entry; 290 } 291 292 /* 293 * Search offset in the tree or fill neighbor rbtree node pointers. 294 * 295 * @tree: the tree to search 296 * @offset: offset that should fall within an entry in @tree 297 * @next_ret: pointer to the first entry whose range ends after @offset 298 * @prev_ret: pointer to the first entry whose range begins before @offset 299 * 300 * Return a pointer to the entry that contains @offset byte address. If no 301 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 302 * Otherwise return the found entry and other pointers are left untouched. 303 */ 304 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 305 u64 offset, 306 struct extent_state **prev_ret, 307 struct extent_state **next_ret) 308 { 309 struct rb_root *root = &tree->state; 310 struct rb_node **node = &root->rb_node; 311 struct extent_state *orig_prev; 312 struct extent_state *entry = NULL; 313 314 ASSERT(prev_ret); 315 ASSERT(next_ret); 316 317 while (*node) { 318 entry = rb_entry(*node, struct extent_state, rb_node); 319 320 if (offset < entry->start) 321 node = &(*node)->rb_left; 322 else if (offset > entry->end) 323 node = &(*node)->rb_right; 324 else 325 return entry; 326 } 327 328 orig_prev = entry; 329 while (entry && offset > entry->end) 330 entry = next_state(entry); 331 *next_ret = entry; 332 entry = orig_prev; 333 334 while (entry && offset < entry->start) 335 entry = prev_state(entry); 336 *prev_ret = entry; 337 338 return NULL; 339 } 340 341 /* 342 * Inexact rb-tree search, return the next entry if @offset is not found 343 */ 344 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 345 { 346 return tree_search_for_insert(tree, offset, NULL, NULL); 347 } 348 349 static void extent_io_tree_panic(const struct extent_io_tree *tree, 350 const struct extent_state *state, 351 const char *opname, 352 int err) 353 { 354 btrfs_panic(extent_io_tree_to_fs_info(tree), err, 355 "extent io tree error on %s state start %llu end %llu", 356 opname, state->start, state->end); 357 } 358 359 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 360 { 361 struct extent_state *prev; 362 363 prev = prev_state(state); 364 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 365 if (is_inode_io_tree(tree)) 366 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 367 state, prev); 368 state->start = prev->start; 369 rb_erase(&prev->rb_node, &tree->state); 370 RB_CLEAR_NODE(&prev->rb_node); 371 free_extent_state(prev); 372 } 373 } 374 375 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 376 { 377 struct extent_state *next; 378 379 next = next_state(state); 380 if (next && next->start == state->end + 1 && next->state == state->state) { 381 if (is_inode_io_tree(tree)) 382 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 383 state, next); 384 state->end = next->end; 385 rb_erase(&next->rb_node, &tree->state); 386 RB_CLEAR_NODE(&next->rb_node); 387 free_extent_state(next); 388 } 389 } 390 391 /* 392 * Utility function to look for merge candidates inside a given range. Any 393 * extents with matching state are merged together into a single extent in the 394 * tree. Extents with EXTENT_IO in their state field are not merged because 395 * the end_io handlers need to be able to do operations on them without 396 * sleeping (or doing allocations/splits). 397 * 398 * This should be called with the tree lock held. 399 */ 400 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 401 { 402 if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 403 return; 404 405 merge_prev_state(tree, state); 406 merge_next_state(tree, state); 407 } 408 409 static void set_state_bits(struct extent_io_tree *tree, 410 struct extent_state *state, 411 u32 bits, struct extent_changeset *changeset) 412 { 413 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 414 int ret; 415 416 if (is_inode_io_tree(tree)) 417 btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits); 418 419 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 420 BUG_ON(ret < 0); 421 state->state |= bits_to_set; 422 } 423 424 /* 425 * Insert an extent_state struct into the tree. 'bits' are set on the 426 * struct before it is inserted. 427 * 428 * Returns a pointer to the struct extent_state record containing the range 429 * requested for insertion, which may be the same as the given struct or it 430 * may be an existing record in the tree that was expanded to accommodate the 431 * requested range. In case of an extent_state different from the one that was 432 * given, the later can be freed or reused by the caller. 433 * 434 * On error it returns an error pointer. 435 * 436 * The tree lock is not taken internally. This is a utility function and 437 * probably isn't what you want to call (see set/clear_extent_bit). 438 */ 439 static struct extent_state *insert_state(struct extent_io_tree *tree, 440 struct extent_state *state, 441 u32 bits, 442 struct extent_changeset *changeset) 443 { 444 struct rb_node **node; 445 struct rb_node *parent = NULL; 446 const u64 start = state->start - 1; 447 const u64 end = state->end + 1; 448 const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 449 450 set_state_bits(tree, state, bits, changeset); 451 452 node = &tree->state.rb_node; 453 while (*node) { 454 struct extent_state *entry; 455 456 parent = *node; 457 entry = rb_entry(parent, struct extent_state, rb_node); 458 459 if (state->end < entry->start) { 460 if (try_merge && end == entry->start && 461 state->state == entry->state) { 462 if (is_inode_io_tree(tree)) 463 btrfs_merge_delalloc_extent( 464 extent_io_tree_to_inode(tree), 465 state, entry); 466 entry->start = state->start; 467 merge_prev_state(tree, entry); 468 state->state = 0; 469 return entry; 470 } 471 node = &(*node)->rb_left; 472 } else if (state->end > entry->end) { 473 if (try_merge && entry->end == start && 474 state->state == entry->state) { 475 if (is_inode_io_tree(tree)) 476 btrfs_merge_delalloc_extent( 477 extent_io_tree_to_inode(tree), 478 state, entry); 479 entry->end = state->end; 480 merge_next_state(tree, entry); 481 state->state = 0; 482 return entry; 483 } 484 node = &(*node)->rb_right; 485 } else { 486 return ERR_PTR(-EEXIST); 487 } 488 } 489 490 rb_link_node(&state->rb_node, parent, node); 491 rb_insert_color(&state->rb_node, &tree->state); 492 493 return state; 494 } 495 496 /* 497 * Insert state to @tree to the location given by @node and @parent. 498 */ 499 static void insert_state_fast(struct extent_io_tree *tree, 500 struct extent_state *state, struct rb_node **node, 501 struct rb_node *parent, unsigned bits, 502 struct extent_changeset *changeset) 503 { 504 set_state_bits(tree, state, bits, changeset); 505 rb_link_node(&state->rb_node, parent, node); 506 rb_insert_color(&state->rb_node, &tree->state); 507 merge_state(tree, state); 508 } 509 510 /* 511 * Split a given extent state struct in two, inserting the preallocated 512 * struct 'prealloc' as the newly created second half. 'split' indicates an 513 * offset inside 'orig' where it should be split. 514 * 515 * Before calling, 516 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 517 * are two extent state structs in the tree: 518 * prealloc: [orig->start, split - 1] 519 * orig: [ split, orig->end ] 520 * 521 * The tree locks are not taken by this function. They need to be held 522 * by the caller. 523 */ 524 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 525 struct extent_state *prealloc, u64 split) 526 { 527 struct rb_node *parent = NULL; 528 struct rb_node **node; 529 530 if (is_inode_io_tree(tree)) 531 btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig, 532 split); 533 534 prealloc->start = orig->start; 535 prealloc->end = split - 1; 536 prealloc->state = orig->state; 537 orig->start = split; 538 539 parent = &orig->rb_node; 540 node = &parent; 541 while (*node) { 542 struct extent_state *entry; 543 544 parent = *node; 545 entry = rb_entry(parent, struct extent_state, rb_node); 546 547 if (prealloc->end < entry->start) { 548 node = &(*node)->rb_left; 549 } else if (prealloc->end > entry->end) { 550 node = &(*node)->rb_right; 551 } else { 552 free_extent_state(prealloc); 553 return -EEXIST; 554 } 555 } 556 557 rb_link_node(&prealloc->rb_node, parent, node); 558 rb_insert_color(&prealloc->rb_node, &tree->state); 559 560 return 0; 561 } 562 563 /* 564 * Utility function to clear some bits in an extent state struct. It will 565 * optionally wake up anyone waiting on this state (wake == 1). 566 * 567 * If no bits are set on the state struct after clearing things, the 568 * struct is freed and removed from the tree 569 */ 570 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 571 struct extent_state *state, 572 u32 bits, int wake, 573 struct extent_changeset *changeset) 574 { 575 struct extent_state *next; 576 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 577 int ret; 578 579 if (is_inode_io_tree(tree)) 580 btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state, 581 bits); 582 583 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 584 BUG_ON(ret < 0); 585 state->state &= ~bits_to_clear; 586 if (wake) 587 wake_up(&state->wq); 588 if (state->state == 0) { 589 next = next_state(state); 590 if (extent_state_in_tree(state)) { 591 rb_erase(&state->rb_node, &tree->state); 592 RB_CLEAR_NODE(&state->rb_node); 593 free_extent_state(state); 594 } else { 595 WARN_ON(1); 596 } 597 } else { 598 merge_state(tree, state); 599 next = next_state(state); 600 } 601 return next; 602 } 603 604 /* 605 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 606 * unset the EXTENT_NOWAIT bit. 607 */ 608 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 609 { 610 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 611 *bits &= EXTENT_NOWAIT - 1; 612 } 613 614 /* 615 * Clear some bits on a range in the tree. This may require splitting or 616 * inserting elements in the tree, so the gfp mask is used to indicate which 617 * allocations or sleeping are allowed. 618 * 619 * The range [start, end] is inclusive. 620 * 621 * This takes the tree lock, and returns 0 on success and < 0 on error. 622 */ 623 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 624 u32 bits, struct extent_state **cached_state, 625 struct extent_changeset *changeset) 626 { 627 struct extent_state *state; 628 struct extent_state *cached; 629 struct extent_state *prealloc = NULL; 630 u64 last_end; 631 int err; 632 int clear = 0; 633 int wake; 634 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 635 gfp_t mask; 636 637 set_gfp_mask_from_bits(&bits, &mask); 638 btrfs_debug_check_extent_io_range(tree, start, end); 639 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 640 641 if (delete) 642 bits |= ~EXTENT_CTLBITS; 643 644 if (bits & EXTENT_DELALLOC) 645 bits |= EXTENT_NORESERVE; 646 647 wake = ((bits & EXTENT_LOCK_BITS) ? 1 : 0); 648 if (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 649 clear = 1; 650 again: 651 if (!prealloc) { 652 /* 653 * Don't care for allocation failure here because we might end 654 * up not needing the pre-allocated extent state at all, which 655 * is the case if we only have in the tree extent states that 656 * cover our input range and don't cover too any other range. 657 * If we end up needing a new extent state we allocate it later. 658 */ 659 prealloc = alloc_extent_state(mask); 660 } 661 662 spin_lock(&tree->lock); 663 if (cached_state) { 664 cached = *cached_state; 665 666 if (clear) { 667 *cached_state = NULL; 668 cached_state = NULL; 669 } 670 671 if (cached && extent_state_in_tree(cached) && 672 cached->start <= start && cached->end > start) { 673 if (clear) 674 refcount_dec(&cached->refs); 675 state = cached; 676 goto hit_next; 677 } 678 if (clear) 679 free_extent_state(cached); 680 } 681 682 /* This search will find the extents that end after our range starts. */ 683 state = tree_search(tree, start); 684 if (!state) 685 goto out; 686 hit_next: 687 if (state->start > end) 688 goto out; 689 WARN_ON(state->end < start); 690 last_end = state->end; 691 692 /* The state doesn't have the wanted bits, go ahead. */ 693 if (!(state->state & bits)) { 694 state = next_state(state); 695 goto next; 696 } 697 698 /* 699 * | ---- desired range ---- | 700 * | state | or 701 * | ------------- state -------------- | 702 * 703 * We need to split the extent we found, and may flip bits on second 704 * half. 705 * 706 * If the extent we found extends past our range, we just split and 707 * search again. It'll get split again the next time though. 708 * 709 * If the extent we found is inside our range, we clear the desired bit 710 * on it. 711 */ 712 713 if (state->start < start) { 714 prealloc = alloc_extent_state_atomic(prealloc); 715 if (!prealloc) 716 goto search_again; 717 err = split_state(tree, state, prealloc, start); 718 if (err) 719 extent_io_tree_panic(tree, state, "split", err); 720 721 prealloc = NULL; 722 if (err) 723 goto out; 724 if (state->end <= end) { 725 state = clear_state_bit(tree, state, bits, wake, changeset); 726 goto next; 727 } 728 goto search_again; 729 } 730 /* 731 * | ---- desired range ---- | 732 * | state | 733 * We need to split the extent, and clear the bit on the first half. 734 */ 735 if (state->start <= end && state->end > end) { 736 prealloc = alloc_extent_state_atomic(prealloc); 737 if (!prealloc) 738 goto search_again; 739 err = split_state(tree, state, prealloc, end + 1); 740 if (err) 741 extent_io_tree_panic(tree, state, "split", err); 742 743 if (wake) 744 wake_up(&state->wq); 745 746 clear_state_bit(tree, prealloc, bits, wake, changeset); 747 748 prealloc = NULL; 749 goto out; 750 } 751 752 state = clear_state_bit(tree, state, bits, wake, changeset); 753 next: 754 if (last_end == (u64)-1) 755 goto out; 756 start = last_end + 1; 757 if (start <= end && state && !need_resched()) 758 goto hit_next; 759 760 search_again: 761 if (start > end) 762 goto out; 763 spin_unlock(&tree->lock); 764 if (gfpflags_allow_blocking(mask)) 765 cond_resched(); 766 goto again; 767 768 out: 769 spin_unlock(&tree->lock); 770 if (prealloc) 771 free_extent_state(prealloc); 772 773 return 0; 774 775 } 776 777 /* 778 * Wait for one or more bits to clear on a range in the state tree. 779 * The range [start, end] is inclusive. 780 * The tree lock is taken by this function 781 */ 782 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 783 u32 bits, struct extent_state **cached_state) 784 { 785 struct extent_state *state; 786 787 btrfs_debug_check_extent_io_range(tree, start, end); 788 789 spin_lock(&tree->lock); 790 again: 791 /* 792 * Maintain cached_state, as we may not remove it from the tree if there 793 * are more bits than the bits we're waiting on set on this state. 794 */ 795 if (cached_state && *cached_state) { 796 state = *cached_state; 797 if (extent_state_in_tree(state) && 798 state->start <= start && start < state->end) 799 goto process_node; 800 } 801 while (1) { 802 /* 803 * This search will find all the extents that end after our 804 * range starts. 805 */ 806 state = tree_search(tree, start); 807 process_node: 808 if (!state) 809 break; 810 if (state->start > end) 811 goto out; 812 813 if (state->state & bits) { 814 DEFINE_WAIT(wait); 815 816 start = state->start; 817 refcount_inc(&state->refs); 818 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 819 spin_unlock(&tree->lock); 820 schedule(); 821 spin_lock(&tree->lock); 822 finish_wait(&state->wq, &wait); 823 free_extent_state(state); 824 goto again; 825 } 826 start = state->end + 1; 827 828 if (start > end) 829 break; 830 831 if (!cond_resched_lock(&tree->lock)) { 832 state = next_state(state); 833 goto process_node; 834 } 835 } 836 out: 837 /* This state is no longer useful, clear it and free it up. */ 838 if (cached_state && *cached_state) { 839 state = *cached_state; 840 *cached_state = NULL; 841 free_extent_state(state); 842 } 843 spin_unlock(&tree->lock); 844 } 845 846 static void cache_state_if_flags(struct extent_state *state, 847 struct extent_state **cached_ptr, 848 unsigned flags) 849 { 850 if (cached_ptr && !(*cached_ptr)) { 851 if (!flags || (state->state & flags)) { 852 *cached_ptr = state; 853 refcount_inc(&state->refs); 854 } 855 } 856 } 857 858 static void cache_state(struct extent_state *state, 859 struct extent_state **cached_ptr) 860 { 861 return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); 862 } 863 864 /* 865 * Find the first state struct with 'bits' set after 'start', and return it. 866 * tree->lock must be held. NULL will returned if nothing was found after 867 * 'start'. 868 */ 869 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 870 u64 start, u32 bits) 871 { 872 struct extent_state *state; 873 874 /* 875 * This search will find all the extents that end after our range 876 * starts. 877 */ 878 state = tree_search(tree, start); 879 while (state) { 880 if (state->end >= start && (state->state & bits)) 881 return state; 882 state = next_state(state); 883 } 884 return NULL; 885 } 886 887 /* 888 * Find the first offset in the io tree with one or more @bits set. 889 * 890 * Note: If there are multiple bits set in @bits, any of them will match. 891 * 892 * Return true if we find something, and update @start_ret and @end_ret. 893 * Return false if we found nothing. 894 */ 895 bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, 896 u64 *start_ret, u64 *end_ret, u32 bits, 897 struct extent_state **cached_state) 898 { 899 struct extent_state *state; 900 bool ret = false; 901 902 spin_lock(&tree->lock); 903 if (cached_state && *cached_state) { 904 state = *cached_state; 905 if (state->end == start - 1 && extent_state_in_tree(state)) { 906 while ((state = next_state(state)) != NULL) { 907 if (state->state & bits) 908 break; 909 } 910 /* 911 * If we found the next extent state, clear cached_state 912 * so that we can cache the next extent state below and 913 * avoid future calls going over the same extent state 914 * again. If we haven't found any, clear as well since 915 * it's now useless. 916 */ 917 free_extent_state(*cached_state); 918 *cached_state = NULL; 919 if (state) 920 goto got_it; 921 goto out; 922 } 923 free_extent_state(*cached_state); 924 *cached_state = NULL; 925 } 926 927 state = find_first_extent_bit_state(tree, start, bits); 928 got_it: 929 if (state) { 930 cache_state_if_flags(state, cached_state, 0); 931 *start_ret = state->start; 932 *end_ret = state->end; 933 ret = true; 934 } 935 out: 936 spin_unlock(&tree->lock); 937 return ret; 938 } 939 940 /* 941 * Find a contiguous area of bits 942 * 943 * @tree: io tree to check 944 * @start: offset to start the search from 945 * @start_ret: the first offset we found with the bits set 946 * @end_ret: the final contiguous range of the bits that were set 947 * @bits: bits to look for 948 * 949 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 950 * to set bits appropriately, and then merge them again. During this time it 951 * will drop the tree->lock, so use this helper if you want to find the actual 952 * contiguous area for given bits. We will search to the first bit we find, and 953 * then walk down the tree until we find a non-contiguous area. The area 954 * returned will be the full contiguous area with the bits set. 955 */ 956 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 957 u64 *start_ret, u64 *end_ret, u32 bits) 958 { 959 struct extent_state *state; 960 int ret = 1; 961 962 ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES)); 963 964 spin_lock(&tree->lock); 965 state = find_first_extent_bit_state(tree, start, bits); 966 if (state) { 967 *start_ret = state->start; 968 *end_ret = state->end; 969 while ((state = next_state(state)) != NULL) { 970 if (state->start > (*end_ret + 1)) 971 break; 972 *end_ret = state->end; 973 } 974 ret = 0; 975 } 976 spin_unlock(&tree->lock); 977 return ret; 978 } 979 980 /* 981 * Find a contiguous range of bytes in the file marked as delalloc, not more 982 * than 'max_bytes'. start and end are used to return the range, 983 * 984 * True is returned if we find something, false if nothing was in the tree. 985 */ 986 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 987 u64 *end, u64 max_bytes, 988 struct extent_state **cached_state) 989 { 990 struct extent_state *state; 991 u64 cur_start = *start; 992 bool found = false; 993 u64 total_bytes = 0; 994 995 spin_lock(&tree->lock); 996 997 /* 998 * This search will find all the extents that end after our range 999 * starts. 1000 */ 1001 state = tree_search(tree, cur_start); 1002 if (!state) { 1003 *end = (u64)-1; 1004 goto out; 1005 } 1006 1007 while (state) { 1008 if (found && (state->start != cur_start || 1009 (state->state & EXTENT_BOUNDARY))) { 1010 goto out; 1011 } 1012 if (!(state->state & EXTENT_DELALLOC)) { 1013 if (!found) 1014 *end = state->end; 1015 goto out; 1016 } 1017 if (!found) { 1018 *start = state->start; 1019 *cached_state = state; 1020 refcount_inc(&state->refs); 1021 } 1022 found = true; 1023 *end = state->end; 1024 cur_start = state->end + 1; 1025 total_bytes += state->end - state->start + 1; 1026 if (total_bytes >= max_bytes) 1027 break; 1028 state = next_state(state); 1029 } 1030 out: 1031 spin_unlock(&tree->lock); 1032 return found; 1033 } 1034 1035 /* 1036 * Set some bits on a range in the tree. This may require allocations or 1037 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1038 * GFP_NOWAIT. 1039 * 1040 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1041 * part of the range already has the desired bits set. The extent_state of the 1042 * existing range is returned in failed_state in this case, and the start of the 1043 * existing range is returned in failed_start. failed_state is used as an 1044 * optimization for wait_extent_bit, failed_start must be used as the source of 1045 * truth as failed_state may have changed since we returned. 1046 * 1047 * [start, end] is inclusive This takes the tree lock. 1048 */ 1049 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1050 u32 bits, u64 *failed_start, 1051 struct extent_state **failed_state, 1052 struct extent_state **cached_state, 1053 struct extent_changeset *changeset) 1054 { 1055 struct extent_state *state; 1056 struct extent_state *prealloc = NULL; 1057 struct rb_node **p = NULL; 1058 struct rb_node *parent = NULL; 1059 int ret = 0; 1060 u64 last_start; 1061 u64 last_end; 1062 u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); 1063 gfp_t mask; 1064 1065 set_gfp_mask_from_bits(&bits, &mask); 1066 btrfs_debug_check_extent_io_range(tree, start, end); 1067 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1068 1069 if (exclusive_bits) 1070 ASSERT(failed_start); 1071 else 1072 ASSERT(failed_start == NULL && failed_state == NULL); 1073 again: 1074 if (!prealloc) { 1075 /* 1076 * Don't care for allocation failure here because we might end 1077 * up not needing the pre-allocated extent state at all, which 1078 * is the case if we only have in the tree extent states that 1079 * cover our input range and don't cover too any other range. 1080 * If we end up needing a new extent state we allocate it later. 1081 */ 1082 prealloc = alloc_extent_state(mask); 1083 } 1084 /* Optimistically preallocate the extent changeset ulist node. */ 1085 if (changeset) 1086 extent_changeset_prealloc(changeset, mask); 1087 1088 spin_lock(&tree->lock); 1089 if (cached_state && *cached_state) { 1090 state = *cached_state; 1091 if (state->start <= start && state->end > start && 1092 extent_state_in_tree(state)) 1093 goto hit_next; 1094 } 1095 /* 1096 * This search will find all the extents that end after our range 1097 * starts. 1098 */ 1099 state = tree_search_for_insert(tree, start, &p, &parent); 1100 if (!state) { 1101 prealloc = alloc_extent_state_atomic(prealloc); 1102 if (!prealloc) 1103 goto search_again; 1104 prealloc->start = start; 1105 prealloc->end = end; 1106 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1107 cache_state(prealloc, cached_state); 1108 prealloc = NULL; 1109 goto out; 1110 } 1111 hit_next: 1112 last_start = state->start; 1113 last_end = state->end; 1114 1115 /* 1116 * | ---- desired range ---- | 1117 * | state | 1118 * 1119 * Just lock what we found and keep going 1120 */ 1121 if (state->start == start && state->end <= end) { 1122 if (state->state & exclusive_bits) { 1123 *failed_start = state->start; 1124 cache_state(state, failed_state); 1125 ret = -EEXIST; 1126 goto out; 1127 } 1128 1129 set_state_bits(tree, state, bits, changeset); 1130 cache_state(state, cached_state); 1131 merge_state(tree, state); 1132 if (last_end == (u64)-1) 1133 goto out; 1134 start = last_end + 1; 1135 state = next_state(state); 1136 if (start < end && state && state->start == start && 1137 !need_resched()) 1138 goto hit_next; 1139 goto search_again; 1140 } 1141 1142 /* 1143 * | ---- desired range ---- | 1144 * | state | 1145 * or 1146 * | ------------- state -------------- | 1147 * 1148 * We need to split the extent we found, and may flip bits on second 1149 * half. 1150 * 1151 * If the extent we found extends past our range, we just split and 1152 * search again. It'll get split again the next time though. 1153 * 1154 * If the extent we found is inside our range, we set the desired bit 1155 * on it. 1156 */ 1157 if (state->start < start) { 1158 if (state->state & exclusive_bits) { 1159 *failed_start = start; 1160 cache_state(state, failed_state); 1161 ret = -EEXIST; 1162 goto out; 1163 } 1164 1165 /* 1166 * If this extent already has all the bits we want set, then 1167 * skip it, not necessary to split it or do anything with it. 1168 */ 1169 if ((state->state & bits) == bits) { 1170 start = state->end + 1; 1171 cache_state(state, cached_state); 1172 goto search_again; 1173 } 1174 1175 prealloc = alloc_extent_state_atomic(prealloc); 1176 if (!prealloc) 1177 goto search_again; 1178 ret = split_state(tree, state, prealloc, start); 1179 if (ret) 1180 extent_io_tree_panic(tree, state, "split", ret); 1181 1182 prealloc = NULL; 1183 if (ret) 1184 goto out; 1185 if (state->end <= end) { 1186 set_state_bits(tree, state, bits, changeset); 1187 cache_state(state, cached_state); 1188 merge_state(tree, state); 1189 if (last_end == (u64)-1) 1190 goto out; 1191 start = last_end + 1; 1192 state = next_state(state); 1193 if (start < end && state && state->start == start && 1194 !need_resched()) 1195 goto hit_next; 1196 } 1197 goto search_again; 1198 } 1199 /* 1200 * | ---- desired range ---- | 1201 * | state | or | state | 1202 * 1203 * There's a hole, we need to insert something in it and ignore the 1204 * extent we found. 1205 */ 1206 if (state->start > start) { 1207 u64 this_end; 1208 struct extent_state *inserted_state; 1209 1210 if (end < last_start) 1211 this_end = end; 1212 else 1213 this_end = last_start - 1; 1214 1215 prealloc = alloc_extent_state_atomic(prealloc); 1216 if (!prealloc) 1217 goto search_again; 1218 1219 /* 1220 * Avoid to free 'prealloc' if it can be merged with the later 1221 * extent. 1222 */ 1223 prealloc->start = start; 1224 prealloc->end = this_end; 1225 inserted_state = insert_state(tree, prealloc, bits, changeset); 1226 if (IS_ERR(inserted_state)) { 1227 ret = PTR_ERR(inserted_state); 1228 extent_io_tree_panic(tree, prealloc, "insert", ret); 1229 } 1230 1231 cache_state(inserted_state, cached_state); 1232 if (inserted_state == prealloc) 1233 prealloc = NULL; 1234 start = this_end + 1; 1235 goto search_again; 1236 } 1237 /* 1238 * | ---- desired range ---- | 1239 * | state | 1240 * 1241 * We need to split the extent, and set the bit on the first half 1242 */ 1243 if (state->start <= end && state->end > end) { 1244 if (state->state & exclusive_bits) { 1245 *failed_start = start; 1246 cache_state(state, failed_state); 1247 ret = -EEXIST; 1248 goto out; 1249 } 1250 1251 prealloc = alloc_extent_state_atomic(prealloc); 1252 if (!prealloc) 1253 goto search_again; 1254 ret = split_state(tree, state, prealloc, end + 1); 1255 if (ret) 1256 extent_io_tree_panic(tree, state, "split", ret); 1257 1258 set_state_bits(tree, prealloc, bits, changeset); 1259 cache_state(prealloc, cached_state); 1260 merge_state(tree, prealloc); 1261 prealloc = NULL; 1262 goto out; 1263 } 1264 1265 search_again: 1266 if (start > end) 1267 goto out; 1268 spin_unlock(&tree->lock); 1269 if (gfpflags_allow_blocking(mask)) 1270 cond_resched(); 1271 goto again; 1272 1273 out: 1274 spin_unlock(&tree->lock); 1275 if (prealloc) 1276 free_extent_state(prealloc); 1277 1278 return ret; 1279 1280 } 1281 1282 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1283 u32 bits, struct extent_state **cached_state) 1284 { 1285 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1286 cached_state, NULL); 1287 } 1288 1289 /* 1290 * Convert all bits in a given range from one bit to another 1291 * 1292 * @tree: the io tree to search 1293 * @start: the start offset in bytes 1294 * @end: the end offset in bytes (inclusive) 1295 * @bits: the bits to set in this range 1296 * @clear_bits: the bits to clear in this range 1297 * @cached_state: state that we're going to cache 1298 * 1299 * This will go through and set bits for the given range. If any states exist 1300 * already in this range they are set with the given bit and cleared of the 1301 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1302 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1303 * boundary bits like LOCK. 1304 * 1305 * All allocations are done with GFP_NOFS. 1306 */ 1307 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1308 u32 bits, u32 clear_bits, 1309 struct extent_state **cached_state) 1310 { 1311 struct extent_state *state; 1312 struct extent_state *prealloc = NULL; 1313 struct rb_node **p = NULL; 1314 struct rb_node *parent = NULL; 1315 int ret = 0; 1316 u64 last_start; 1317 u64 last_end; 1318 bool first_iteration = true; 1319 1320 btrfs_debug_check_extent_io_range(tree, start, end); 1321 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1322 clear_bits); 1323 1324 again: 1325 if (!prealloc) { 1326 /* 1327 * Best effort, don't worry if extent state allocation fails 1328 * here for the first iteration. We might have a cached state 1329 * that matches exactly the target range, in which case no 1330 * extent state allocations are needed. We'll only know this 1331 * after locking the tree. 1332 */ 1333 prealloc = alloc_extent_state(GFP_NOFS); 1334 if (!prealloc && !first_iteration) 1335 return -ENOMEM; 1336 } 1337 1338 spin_lock(&tree->lock); 1339 if (cached_state && *cached_state) { 1340 state = *cached_state; 1341 if (state->start <= start && state->end > start && 1342 extent_state_in_tree(state)) 1343 goto hit_next; 1344 } 1345 1346 /* 1347 * This search will find all the extents that end after our range 1348 * starts. 1349 */ 1350 state = tree_search_for_insert(tree, start, &p, &parent); 1351 if (!state) { 1352 prealloc = alloc_extent_state_atomic(prealloc); 1353 if (!prealloc) { 1354 ret = -ENOMEM; 1355 goto out; 1356 } 1357 prealloc->start = start; 1358 prealloc->end = end; 1359 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1360 cache_state(prealloc, cached_state); 1361 prealloc = NULL; 1362 goto out; 1363 } 1364 hit_next: 1365 last_start = state->start; 1366 last_end = state->end; 1367 1368 /* 1369 * | ---- desired range ---- | 1370 * | state | 1371 * 1372 * Just lock what we found and keep going. 1373 */ 1374 if (state->start == start && state->end <= end) { 1375 set_state_bits(tree, state, bits, NULL); 1376 cache_state(state, cached_state); 1377 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1378 if (last_end == (u64)-1) 1379 goto out; 1380 start = last_end + 1; 1381 if (start < end && state && state->start == start && 1382 !need_resched()) 1383 goto hit_next; 1384 goto search_again; 1385 } 1386 1387 /* 1388 * | ---- desired range ---- | 1389 * | state | 1390 * or 1391 * | ------------- state -------------- | 1392 * 1393 * We need to split the extent we found, and may flip bits on second 1394 * half. 1395 * 1396 * If the extent we found extends past our range, we just split and 1397 * search again. It'll get split again the next time though. 1398 * 1399 * If the extent we found is inside our range, we set the desired bit 1400 * on it. 1401 */ 1402 if (state->start < start) { 1403 prealloc = alloc_extent_state_atomic(prealloc); 1404 if (!prealloc) { 1405 ret = -ENOMEM; 1406 goto out; 1407 } 1408 ret = split_state(tree, state, prealloc, start); 1409 if (ret) 1410 extent_io_tree_panic(tree, state, "split", ret); 1411 prealloc = NULL; 1412 if (ret) 1413 goto out; 1414 if (state->end <= end) { 1415 set_state_bits(tree, state, bits, NULL); 1416 cache_state(state, cached_state); 1417 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1418 if (last_end == (u64)-1) 1419 goto out; 1420 start = last_end + 1; 1421 if (start < end && state && state->start == start && 1422 !need_resched()) 1423 goto hit_next; 1424 } 1425 goto search_again; 1426 } 1427 /* 1428 * | ---- desired range ---- | 1429 * | state | or | state | 1430 * 1431 * There's a hole, we need to insert something in it and ignore the 1432 * extent we found. 1433 */ 1434 if (state->start > start) { 1435 u64 this_end; 1436 struct extent_state *inserted_state; 1437 1438 if (end < last_start) 1439 this_end = end; 1440 else 1441 this_end = last_start - 1; 1442 1443 prealloc = alloc_extent_state_atomic(prealloc); 1444 if (!prealloc) { 1445 ret = -ENOMEM; 1446 goto out; 1447 } 1448 1449 /* 1450 * Avoid to free 'prealloc' if it can be merged with the later 1451 * extent. 1452 */ 1453 prealloc->start = start; 1454 prealloc->end = this_end; 1455 inserted_state = insert_state(tree, prealloc, bits, NULL); 1456 if (IS_ERR(inserted_state)) { 1457 ret = PTR_ERR(inserted_state); 1458 extent_io_tree_panic(tree, prealloc, "insert", ret); 1459 } 1460 cache_state(inserted_state, cached_state); 1461 if (inserted_state == prealloc) 1462 prealloc = NULL; 1463 start = this_end + 1; 1464 goto search_again; 1465 } 1466 /* 1467 * | ---- desired range ---- | 1468 * | state | 1469 * 1470 * We need to split the extent, and set the bit on the first half. 1471 */ 1472 if (state->start <= end && state->end > end) { 1473 prealloc = alloc_extent_state_atomic(prealloc); 1474 if (!prealloc) { 1475 ret = -ENOMEM; 1476 goto out; 1477 } 1478 1479 ret = split_state(tree, state, prealloc, end + 1); 1480 if (ret) 1481 extent_io_tree_panic(tree, state, "split", ret); 1482 1483 set_state_bits(tree, prealloc, bits, NULL); 1484 cache_state(prealloc, cached_state); 1485 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1486 prealloc = NULL; 1487 goto out; 1488 } 1489 1490 search_again: 1491 if (start > end) 1492 goto out; 1493 spin_unlock(&tree->lock); 1494 cond_resched(); 1495 first_iteration = false; 1496 goto again; 1497 1498 out: 1499 spin_unlock(&tree->lock); 1500 if (prealloc) 1501 free_extent_state(prealloc); 1502 1503 return ret; 1504 } 1505 1506 /* 1507 * Find the first range that has @bits not set. This range could start before 1508 * @start. 1509 * 1510 * @tree: the tree to search 1511 * @start: offset at/after which the found extent should start 1512 * @start_ret: records the beginning of the range 1513 * @end_ret: records the end of the range (inclusive) 1514 * @bits: the set of bits which must be unset 1515 * 1516 * Since unallocated range is also considered one which doesn't have the bits 1517 * set it's possible that @end_ret contains -1, this happens in case the range 1518 * spans (last_range_end, end of device]. In this case it's up to the caller to 1519 * trim @end_ret to the appropriate size. 1520 */ 1521 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1522 u64 *start_ret, u64 *end_ret, u32 bits) 1523 { 1524 struct extent_state *state; 1525 struct extent_state *prev = NULL, *next = NULL; 1526 1527 spin_lock(&tree->lock); 1528 1529 /* Find first extent with bits cleared */ 1530 while (1) { 1531 state = tree_search_prev_next(tree, start, &prev, &next); 1532 if (!state && !next && !prev) { 1533 /* 1534 * Tree is completely empty, send full range and let 1535 * caller deal with it 1536 */ 1537 *start_ret = 0; 1538 *end_ret = -1; 1539 goto out; 1540 } else if (!state && !next) { 1541 /* 1542 * We are past the last allocated chunk, set start at 1543 * the end of the last extent. 1544 */ 1545 *start_ret = prev->end + 1; 1546 *end_ret = -1; 1547 goto out; 1548 } else if (!state) { 1549 state = next; 1550 } 1551 1552 /* 1553 * At this point 'state' either contains 'start' or start is 1554 * before 'state' 1555 */ 1556 if (in_range(start, state->start, state->end - state->start + 1)) { 1557 if (state->state & bits) { 1558 /* 1559 * |--range with bits sets--| 1560 * | 1561 * start 1562 */ 1563 start = state->end + 1; 1564 } else { 1565 /* 1566 * 'start' falls within a range that doesn't 1567 * have the bits set, so take its start as the 1568 * beginning of the desired range 1569 * 1570 * |--range with bits cleared----| 1571 * | 1572 * start 1573 */ 1574 *start_ret = state->start; 1575 break; 1576 } 1577 } else { 1578 /* 1579 * |---prev range---|---hole/unset---|---node range---| 1580 * | 1581 * start 1582 * 1583 * or 1584 * 1585 * |---hole/unset--||--first node--| 1586 * 0 | 1587 * start 1588 */ 1589 if (prev) 1590 *start_ret = prev->end + 1; 1591 else 1592 *start_ret = 0; 1593 break; 1594 } 1595 } 1596 1597 /* 1598 * Find the longest stretch from start until an entry which has the 1599 * bits set 1600 */ 1601 while (state) { 1602 if (state->end >= start && !(state->state & bits)) { 1603 *end_ret = state->end; 1604 } else { 1605 *end_ret = state->start - 1; 1606 break; 1607 } 1608 state = next_state(state); 1609 } 1610 out: 1611 spin_unlock(&tree->lock); 1612 } 1613 1614 /* 1615 * Count the number of bytes in the tree that have a given bit(s) set for a 1616 * given range. 1617 * 1618 * @tree: The io tree to search. 1619 * @start: The start offset of the range. This value is updated to the 1620 * offset of the first byte found with the given bit(s), so it 1621 * can end up being bigger than the initial value. 1622 * @search_end: The end offset (inclusive value) of the search range. 1623 * @max_bytes: The maximum byte count we are interested. The search stops 1624 * once it reaches this count. 1625 * @bits: The bits the range must have in order to be accounted for. 1626 * If multiple bits are set, then only subranges that have all 1627 * the bits set are accounted for. 1628 * @contig: Indicate if we should ignore holes in the range or not. If 1629 * this is true, then stop once we find a hole. 1630 * @cached_state: A cached state to be used across multiple calls to this 1631 * function in order to speedup searches. Use NULL if this is 1632 * called only once or if each call does not start where the 1633 * previous one ended. 1634 * 1635 * Returns the total number of bytes found within the given range that have 1636 * all given bits set. If the returned number of bytes is greater than zero 1637 * then @start is updated with the offset of the first byte with the bits set. 1638 */ 1639 u64 count_range_bits(struct extent_io_tree *tree, 1640 u64 *start, u64 search_end, u64 max_bytes, 1641 u32 bits, int contig, 1642 struct extent_state **cached_state) 1643 { 1644 struct extent_state *state = NULL; 1645 struct extent_state *cached; 1646 u64 cur_start = *start; 1647 u64 total_bytes = 0; 1648 u64 last = 0; 1649 int found = 0; 1650 1651 if (WARN_ON(search_end < cur_start)) 1652 return 0; 1653 1654 spin_lock(&tree->lock); 1655 1656 if (!cached_state || !*cached_state) 1657 goto search; 1658 1659 cached = *cached_state; 1660 1661 if (!extent_state_in_tree(cached)) 1662 goto search; 1663 1664 if (cached->start <= cur_start && cur_start <= cached->end) { 1665 state = cached; 1666 } else if (cached->start > cur_start) { 1667 struct extent_state *prev; 1668 1669 /* 1670 * The cached state starts after our search range's start. Check 1671 * if the previous state record starts at or before the range we 1672 * are looking for, and if so, use it - this is a common case 1673 * when there are holes between records in the tree. If there is 1674 * no previous state record, we can start from our cached state. 1675 */ 1676 prev = prev_state(cached); 1677 if (!prev) 1678 state = cached; 1679 else if (prev->start <= cur_start && cur_start <= prev->end) 1680 state = prev; 1681 } 1682 1683 /* 1684 * This search will find all the extents that end after our range 1685 * starts. 1686 */ 1687 search: 1688 if (!state) 1689 state = tree_search(tree, cur_start); 1690 1691 while (state) { 1692 if (state->start > search_end) 1693 break; 1694 if (contig && found && state->start > last + 1) 1695 break; 1696 if (state->end >= cur_start && (state->state & bits) == bits) { 1697 total_bytes += min(search_end, state->end) + 1 - 1698 max(cur_start, state->start); 1699 if (total_bytes >= max_bytes) 1700 break; 1701 if (!found) { 1702 *start = max(cur_start, state->start); 1703 found = 1; 1704 } 1705 last = state->end; 1706 } else if (contig && found) { 1707 break; 1708 } 1709 state = next_state(state); 1710 } 1711 1712 if (cached_state) { 1713 free_extent_state(*cached_state); 1714 *cached_state = state; 1715 if (state) 1716 refcount_inc(&state->refs); 1717 } 1718 1719 spin_unlock(&tree->lock); 1720 1721 return total_bytes; 1722 } 1723 1724 /* 1725 * Check if the single @bit exists in the given range. 1726 */ 1727 bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1728 { 1729 struct extent_state *state = NULL; 1730 bool bitset = false; 1731 1732 ASSERT(is_power_of_2(bit)); 1733 1734 spin_lock(&tree->lock); 1735 state = tree_search(tree, start); 1736 while (state && start <= end) { 1737 if (state->start > end) 1738 break; 1739 1740 if (state->state & bit) { 1741 bitset = true; 1742 break; 1743 } 1744 1745 /* If state->end is (u64)-1, start will overflow to 0 */ 1746 start = state->end + 1; 1747 if (start > end || start == 0) 1748 break; 1749 state = next_state(state); 1750 } 1751 spin_unlock(&tree->lock); 1752 return bitset; 1753 } 1754 1755 /* 1756 * Check if the whole range [@start,@end) contains the single @bit set. 1757 */ 1758 bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1759 struct extent_state *cached) 1760 { 1761 struct extent_state *state = NULL; 1762 bool bitset = true; 1763 1764 ASSERT(is_power_of_2(bit)); 1765 1766 spin_lock(&tree->lock); 1767 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1768 cached->end > start) 1769 state = cached; 1770 else 1771 state = tree_search(tree, start); 1772 while (state && start <= end) { 1773 if (state->start > start) { 1774 bitset = false; 1775 break; 1776 } 1777 1778 if (state->start > end) 1779 break; 1780 1781 if ((state->state & bit) == 0) { 1782 bitset = false; 1783 break; 1784 } 1785 1786 if (state->end == (u64)-1) 1787 break; 1788 1789 /* 1790 * Last entry (if state->end is (u64)-1 and overflow happens), 1791 * or next entry starts after the range. 1792 */ 1793 start = state->end + 1; 1794 if (start > end || start == 0) 1795 break; 1796 state = next_state(state); 1797 } 1798 1799 /* We ran out of states and were still inside of our range. */ 1800 if (!state) 1801 bitset = false; 1802 spin_unlock(&tree->lock); 1803 return bitset; 1804 } 1805 1806 /* Wrappers around set/clear extent bit */ 1807 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1808 u32 bits, struct extent_changeset *changeset) 1809 { 1810 /* 1811 * We don't support EXTENT_LOCK_BITS yet, as current changeset will 1812 * record any bits changed, so for EXTENT_LOCK_BITS case, it will either 1813 * fail with -EEXIST or changeset will record the whole range. 1814 */ 1815 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1816 1817 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1818 } 1819 1820 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1821 u32 bits, struct extent_changeset *changeset) 1822 { 1823 /* 1824 * Don't support EXTENT_LOCK_BITS case, same reason as 1825 * set_record_extent_bits(). 1826 */ 1827 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1828 1829 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1830 } 1831 1832 bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1833 struct extent_state **cached) 1834 { 1835 int err; 1836 u64 failed_start; 1837 1838 err = __set_extent_bit(tree, start, end, bits, &failed_start, 1839 NULL, cached, NULL); 1840 if (err == -EEXIST) { 1841 if (failed_start > start) 1842 clear_extent_bit(tree, start, failed_start - 1, bits, cached); 1843 return 0; 1844 } 1845 return 1; 1846 } 1847 1848 /* 1849 * Either insert or lock state struct between start and end use mask to tell 1850 * us if waiting is desired. 1851 */ 1852 int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1853 struct extent_state **cached_state) 1854 { 1855 struct extent_state *failed_state = NULL; 1856 int err; 1857 u64 failed_start; 1858 1859 err = __set_extent_bit(tree, start, end, bits, &failed_start, 1860 &failed_state, cached_state, NULL); 1861 while (err == -EEXIST) { 1862 if (failed_start != start) 1863 clear_extent_bit(tree, start, failed_start - 1, 1864 bits, cached_state); 1865 1866 wait_extent_bit(tree, failed_start, end, bits, &failed_state); 1867 err = __set_extent_bit(tree, start, end, bits, 1868 &failed_start, &failed_state, 1869 cached_state, NULL); 1870 } 1871 return err; 1872 } 1873 1874 void __cold extent_state_free_cachep(void) 1875 { 1876 btrfs_extent_state_leak_debug_check(); 1877 kmem_cache_destroy(extent_state_cache); 1878 } 1879 1880 int __init extent_state_init_cachep(void) 1881 { 1882 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1883 sizeof(struct extent_state), 0, 0, 1884 NULL); 1885 if (!extent_state_cache) 1886 return -ENOMEM; 1887 1888 return 0; 1889 } 1890