1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent_io.h" 8 #include "extent-io-tree.h" 9 #include "btrfs_inode.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_entry(states.next, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 WARN_ON_ONCE(1); 52 kmem_cache_free(extent_state_cache, state); 53 } 54 } 55 56 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 57 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 58 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 59 struct extent_io_tree *tree, 60 u64 start, u64 end) 61 { 62 const struct btrfs_inode *inode; 63 u64 isize; 64 65 if (tree->owner != IO_TREE_INODE_IO) 66 return; 67 68 inode = extent_io_tree_to_inode_const(tree); 69 isize = i_size_read(&inode->vfs_inode); 70 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 71 btrfs_debug_rl(inode->root->fs_info, 72 "%s: ino %llu isize %llu odd range [%llu,%llu]", 73 caller, btrfs_ino(inode), isize, start, end); 74 } 75 } 76 #else 77 #define btrfs_leak_debug_add_state(state) do {} while (0) 78 #define btrfs_leak_debug_del_state(state) do {} while (0) 79 #define btrfs_extent_state_leak_debug_check() do {} while (0) 80 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 81 #endif 82 83 84 /* 85 * The only tree allowed to set the inode is IO_TREE_INODE_IO. 86 */ 87 static bool is_inode_io_tree(const struct extent_io_tree *tree) 88 { 89 return tree->owner == IO_TREE_INODE_IO; 90 } 91 92 /* Return the inode if it's valid for the given tree, otherwise NULL. */ 93 struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree) 94 { 95 if (tree->owner == IO_TREE_INODE_IO) 96 return tree->inode; 97 return NULL; 98 } 99 100 /* Read-only access to the inode. */ 101 const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree) 102 { 103 if (tree->owner == IO_TREE_INODE_IO) 104 return tree->inode; 105 return NULL; 106 } 107 108 /* For read-only access to fs_info. */ 109 const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 110 { 111 if (tree->owner == IO_TREE_INODE_IO) 112 return tree->inode->root->fs_info; 113 return tree->fs_info; 114 } 115 116 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 117 struct extent_io_tree *tree, unsigned int owner) 118 { 119 tree->state = RB_ROOT; 120 spin_lock_init(&tree->lock); 121 tree->fs_info = fs_info; 122 tree->owner = owner; 123 } 124 125 /* 126 * Empty an io tree, removing and freeing every extent state record from the 127 * tree. This should be called once we are sure no other task can access the 128 * tree anymore, so no tree updates happen after we empty the tree and there 129 * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never 130 * set on any extent state when calling this function). 131 */ 132 void extent_io_tree_release(struct extent_io_tree *tree) 133 { 134 struct rb_root root; 135 struct extent_state *state; 136 struct extent_state *tmp; 137 138 spin_lock(&tree->lock); 139 root = tree->state; 140 tree->state = RB_ROOT; 141 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 142 /* Clear node to keep free_extent_state() happy. */ 143 RB_CLEAR_NODE(&state->rb_node); 144 ASSERT(!(state->state & EXTENT_LOCKED)); 145 /* 146 * No need for a memory barrier here, as we are holding the tree 147 * lock and we only change the waitqueue while holding that lock 148 * (see wait_extent_bit()). 149 */ 150 ASSERT(!waitqueue_active(&state->wq)); 151 free_extent_state(state); 152 cond_resched_lock(&tree->lock); 153 } 154 /* 155 * Should still be empty even after a reschedule, no other task should 156 * be accessing the tree anymore. 157 */ 158 ASSERT(RB_EMPTY_ROOT(&tree->state)); 159 spin_unlock(&tree->lock); 160 } 161 162 static struct extent_state *alloc_extent_state(gfp_t mask) 163 { 164 struct extent_state *state; 165 166 /* 167 * The given mask might be not appropriate for the slab allocator, 168 * drop the unsupported bits 169 */ 170 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 171 state = kmem_cache_alloc(extent_state_cache, mask); 172 if (!state) 173 return state; 174 state->state = 0; 175 RB_CLEAR_NODE(&state->rb_node); 176 btrfs_leak_debug_add_state(state); 177 refcount_set(&state->refs, 1); 178 init_waitqueue_head(&state->wq); 179 trace_alloc_extent_state(state, mask, _RET_IP_); 180 return state; 181 } 182 183 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 184 { 185 if (!prealloc) 186 prealloc = alloc_extent_state(GFP_ATOMIC); 187 188 return prealloc; 189 } 190 191 void free_extent_state(struct extent_state *state) 192 { 193 if (!state) 194 return; 195 if (refcount_dec_and_test(&state->refs)) { 196 WARN_ON(extent_state_in_tree(state)); 197 btrfs_leak_debug_del_state(state); 198 trace_free_extent_state(state, _RET_IP_); 199 kmem_cache_free(extent_state_cache, state); 200 } 201 } 202 203 static int add_extent_changeset(struct extent_state *state, u32 bits, 204 struct extent_changeset *changeset, 205 int set) 206 { 207 int ret; 208 209 if (!changeset) 210 return 0; 211 if (set && (state->state & bits) == bits) 212 return 0; 213 if (!set && (state->state & bits) == 0) 214 return 0; 215 changeset->bytes_changed += state->end - state->start + 1; 216 ret = ulist_add(&changeset->range_changed, state->start, state->end, 217 GFP_ATOMIC); 218 return ret; 219 } 220 221 static inline struct extent_state *next_state(struct extent_state *state) 222 { 223 struct rb_node *next = rb_next(&state->rb_node); 224 225 if (next) 226 return rb_entry(next, struct extent_state, rb_node); 227 else 228 return NULL; 229 } 230 231 static inline struct extent_state *prev_state(struct extent_state *state) 232 { 233 struct rb_node *next = rb_prev(&state->rb_node); 234 235 if (next) 236 return rb_entry(next, struct extent_state, rb_node); 237 else 238 return NULL; 239 } 240 241 /* 242 * Search @tree for an entry that contains @offset. Such entry would have 243 * entry->start <= offset && entry->end >= offset. 244 * 245 * @tree: the tree to search 246 * @offset: offset that should fall within an entry in @tree 247 * @node_ret: pointer where new node should be anchored (used when inserting an 248 * entry in the tree) 249 * @parent_ret: points to entry which would have been the parent of the entry, 250 * containing @offset 251 * 252 * Return a pointer to the entry that contains @offset byte address and don't change 253 * @node_ret and @parent_ret. 254 * 255 * If no such entry exists, return pointer to entry that ends before @offset 256 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. 257 */ 258 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 259 u64 offset, 260 struct rb_node ***node_ret, 261 struct rb_node **parent_ret) 262 { 263 struct rb_root *root = &tree->state; 264 struct rb_node **node = &root->rb_node; 265 struct rb_node *prev = NULL; 266 struct extent_state *entry = NULL; 267 268 while (*node) { 269 prev = *node; 270 entry = rb_entry(prev, struct extent_state, rb_node); 271 272 if (offset < entry->start) 273 node = &(*node)->rb_left; 274 else if (offset > entry->end) 275 node = &(*node)->rb_right; 276 else 277 return entry; 278 } 279 280 if (node_ret) 281 *node_ret = node; 282 if (parent_ret) 283 *parent_ret = prev; 284 285 /* Search neighbors until we find the first one past the end */ 286 while (entry && offset > entry->end) 287 entry = next_state(entry); 288 289 return entry; 290 } 291 292 /* 293 * Search offset in the tree or fill neighbor rbtree node pointers. 294 * 295 * @tree: the tree to search 296 * @offset: offset that should fall within an entry in @tree 297 * @next_ret: pointer to the first entry whose range ends after @offset 298 * @prev_ret: pointer to the first entry whose range begins before @offset 299 * 300 * Return a pointer to the entry that contains @offset byte address. If no 301 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 302 * Otherwise return the found entry and other pointers are left untouched. 303 */ 304 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 305 u64 offset, 306 struct extent_state **prev_ret, 307 struct extent_state **next_ret) 308 { 309 struct rb_root *root = &tree->state; 310 struct rb_node **node = &root->rb_node; 311 struct extent_state *orig_prev; 312 struct extent_state *entry = NULL; 313 314 ASSERT(prev_ret); 315 ASSERT(next_ret); 316 317 while (*node) { 318 entry = rb_entry(*node, struct extent_state, rb_node); 319 320 if (offset < entry->start) 321 node = &(*node)->rb_left; 322 else if (offset > entry->end) 323 node = &(*node)->rb_right; 324 else 325 return entry; 326 } 327 328 orig_prev = entry; 329 while (entry && offset > entry->end) 330 entry = next_state(entry); 331 *next_ret = entry; 332 entry = orig_prev; 333 334 while (entry && offset < entry->start) 335 entry = prev_state(entry); 336 *prev_ret = entry; 337 338 return NULL; 339 } 340 341 /* 342 * Inexact rb-tree search, return the next entry if @offset is not found 343 */ 344 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 345 { 346 return tree_search_for_insert(tree, offset, NULL, NULL); 347 } 348 349 static void extent_io_tree_panic(const struct extent_io_tree *tree, 350 const struct extent_state *state, 351 const char *opname, 352 int err) 353 { 354 btrfs_panic(extent_io_tree_to_fs_info(tree), err, 355 "extent io tree error on %s state start %llu end %llu", 356 opname, state->start, state->end); 357 } 358 359 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 360 { 361 struct extent_state *prev; 362 363 prev = prev_state(state); 364 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 365 if (is_inode_io_tree(tree)) 366 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 367 state, prev); 368 state->start = prev->start; 369 rb_erase(&prev->rb_node, &tree->state); 370 RB_CLEAR_NODE(&prev->rb_node); 371 free_extent_state(prev); 372 } 373 } 374 375 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 376 { 377 struct extent_state *next; 378 379 next = next_state(state); 380 if (next && next->start == state->end + 1 && next->state == state->state) { 381 if (is_inode_io_tree(tree)) 382 btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), 383 state, next); 384 state->end = next->end; 385 rb_erase(&next->rb_node, &tree->state); 386 RB_CLEAR_NODE(&next->rb_node); 387 free_extent_state(next); 388 } 389 } 390 391 /* 392 * Utility function to look for merge candidates inside a given range. Any 393 * extents with matching state are merged together into a single extent in the 394 * tree. Extents with EXTENT_IO in their state field are not merged because 395 * the end_io handlers need to be able to do operations on them without 396 * sleeping (or doing allocations/splits). 397 * 398 * This should be called with the tree lock held. 399 */ 400 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 401 { 402 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 403 return; 404 405 merge_prev_state(tree, state); 406 merge_next_state(tree, state); 407 } 408 409 static void set_state_bits(struct extent_io_tree *tree, 410 struct extent_state *state, 411 u32 bits, struct extent_changeset *changeset) 412 { 413 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 414 int ret; 415 416 if (is_inode_io_tree(tree)) 417 btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits); 418 419 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 420 BUG_ON(ret < 0); 421 state->state |= bits_to_set; 422 } 423 424 /* 425 * Insert an extent_state struct into the tree. 'bits' are set on the 426 * struct before it is inserted. 427 * 428 * Returns a pointer to the struct extent_state record containing the range 429 * requested for insertion, which may be the same as the given struct or it 430 * may be an existing record in the tree that was expanded to accommodate the 431 * requested range. In case of an extent_state different from the one that was 432 * given, the later can be freed or reused by the caller. 433 * 434 * On error it returns an error pointer. 435 * 436 * The tree lock is not taken internally. This is a utility function and 437 * probably isn't what you want to call (see set/clear_extent_bit). 438 */ 439 static struct extent_state *insert_state(struct extent_io_tree *tree, 440 struct extent_state *state, 441 u32 bits, 442 struct extent_changeset *changeset) 443 { 444 struct rb_node **node; 445 struct rb_node *parent = NULL; 446 const u64 start = state->start - 1; 447 const u64 end = state->end + 1; 448 const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)); 449 450 set_state_bits(tree, state, bits, changeset); 451 452 node = &tree->state.rb_node; 453 while (*node) { 454 struct extent_state *entry; 455 456 parent = *node; 457 entry = rb_entry(parent, struct extent_state, rb_node); 458 459 if (state->end < entry->start) { 460 if (try_merge && end == entry->start && 461 state->state == entry->state) { 462 if (is_inode_io_tree(tree)) 463 btrfs_merge_delalloc_extent( 464 extent_io_tree_to_inode(tree), 465 state, entry); 466 entry->start = state->start; 467 merge_prev_state(tree, entry); 468 state->state = 0; 469 return entry; 470 } 471 node = &(*node)->rb_left; 472 } else if (state->end > entry->end) { 473 if (try_merge && entry->end == start && 474 state->state == entry->state) { 475 if (is_inode_io_tree(tree)) 476 btrfs_merge_delalloc_extent( 477 extent_io_tree_to_inode(tree), 478 state, entry); 479 entry->end = state->end; 480 merge_next_state(tree, entry); 481 state->state = 0; 482 return entry; 483 } 484 node = &(*node)->rb_right; 485 } else { 486 return ERR_PTR(-EEXIST); 487 } 488 } 489 490 rb_link_node(&state->rb_node, parent, node); 491 rb_insert_color(&state->rb_node, &tree->state); 492 493 return state; 494 } 495 496 /* 497 * Insert state to @tree to the location given by @node and @parent. 498 */ 499 static void insert_state_fast(struct extent_io_tree *tree, 500 struct extent_state *state, struct rb_node **node, 501 struct rb_node *parent, unsigned bits, 502 struct extent_changeset *changeset) 503 { 504 set_state_bits(tree, state, bits, changeset); 505 rb_link_node(&state->rb_node, parent, node); 506 rb_insert_color(&state->rb_node, &tree->state); 507 merge_state(tree, state); 508 } 509 510 /* 511 * Split a given extent state struct in two, inserting the preallocated 512 * struct 'prealloc' as the newly created second half. 'split' indicates an 513 * offset inside 'orig' where it should be split. 514 * 515 * Before calling, 516 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 517 * are two extent state structs in the tree: 518 * prealloc: [orig->start, split - 1] 519 * orig: [ split, orig->end ] 520 * 521 * The tree locks are not taken by this function. They need to be held 522 * by the caller. 523 */ 524 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 525 struct extent_state *prealloc, u64 split) 526 { 527 struct rb_node *parent = NULL; 528 struct rb_node **node; 529 530 if (is_inode_io_tree(tree)) 531 btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig, 532 split); 533 534 prealloc->start = orig->start; 535 prealloc->end = split - 1; 536 prealloc->state = orig->state; 537 orig->start = split; 538 539 parent = &orig->rb_node; 540 node = &parent; 541 while (*node) { 542 struct extent_state *entry; 543 544 parent = *node; 545 entry = rb_entry(parent, struct extent_state, rb_node); 546 547 if (prealloc->end < entry->start) { 548 node = &(*node)->rb_left; 549 } else if (prealloc->end > entry->end) { 550 node = &(*node)->rb_right; 551 } else { 552 free_extent_state(prealloc); 553 return -EEXIST; 554 } 555 } 556 557 rb_link_node(&prealloc->rb_node, parent, node); 558 rb_insert_color(&prealloc->rb_node, &tree->state); 559 560 return 0; 561 } 562 563 /* 564 * Utility function to clear some bits in an extent state struct. It will 565 * optionally wake up anyone waiting on this state (wake == 1). 566 * 567 * If no bits are set on the state struct after clearing things, the 568 * struct is freed and removed from the tree 569 */ 570 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 571 struct extent_state *state, 572 u32 bits, int wake, 573 struct extent_changeset *changeset) 574 { 575 struct extent_state *next; 576 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 577 int ret; 578 579 if (is_inode_io_tree(tree)) 580 btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state, 581 bits); 582 583 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 584 BUG_ON(ret < 0); 585 state->state &= ~bits_to_clear; 586 if (wake) 587 wake_up(&state->wq); 588 if (state->state == 0) { 589 next = next_state(state); 590 if (extent_state_in_tree(state)) { 591 rb_erase(&state->rb_node, &tree->state); 592 RB_CLEAR_NODE(&state->rb_node); 593 free_extent_state(state); 594 } else { 595 WARN_ON(1); 596 } 597 } else { 598 merge_state(tree, state); 599 next = next_state(state); 600 } 601 return next; 602 } 603 604 /* 605 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 606 * unset the EXTENT_NOWAIT bit. 607 */ 608 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 609 { 610 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 611 *bits &= EXTENT_NOWAIT - 1; 612 } 613 614 /* 615 * Clear some bits on a range in the tree. This may require splitting or 616 * inserting elements in the tree, so the gfp mask is used to indicate which 617 * allocations or sleeping are allowed. 618 * 619 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 620 * range from the tree regardless of state (ie for truncate). 621 * 622 * The range [start, end] is inclusive. 623 * 624 * This takes the tree lock, and returns 0 on success and < 0 on error. 625 */ 626 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 627 u32 bits, struct extent_state **cached_state, 628 struct extent_changeset *changeset) 629 { 630 struct extent_state *state; 631 struct extent_state *cached; 632 struct extent_state *prealloc = NULL; 633 u64 last_end; 634 int err; 635 int clear = 0; 636 int wake; 637 int delete = (bits & EXTENT_CLEAR_ALL_BITS); 638 gfp_t mask; 639 640 set_gfp_mask_from_bits(&bits, &mask); 641 btrfs_debug_check_extent_io_range(tree, start, end); 642 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 643 644 if (delete) 645 bits |= ~EXTENT_CTLBITS; 646 647 if (bits & EXTENT_DELALLOC) 648 bits |= EXTENT_NORESERVE; 649 650 wake = (bits & EXTENT_LOCKED) ? 1 : 0; 651 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 652 clear = 1; 653 again: 654 if (!prealloc) { 655 /* 656 * Don't care for allocation failure here because we might end 657 * up not needing the pre-allocated extent state at all, which 658 * is the case if we only have in the tree extent states that 659 * cover our input range and don't cover too any other range. 660 * If we end up needing a new extent state we allocate it later. 661 */ 662 prealloc = alloc_extent_state(mask); 663 } 664 665 spin_lock(&tree->lock); 666 if (cached_state) { 667 cached = *cached_state; 668 669 if (clear) { 670 *cached_state = NULL; 671 cached_state = NULL; 672 } 673 674 if (cached && extent_state_in_tree(cached) && 675 cached->start <= start && cached->end > start) { 676 if (clear) 677 refcount_dec(&cached->refs); 678 state = cached; 679 goto hit_next; 680 } 681 if (clear) 682 free_extent_state(cached); 683 } 684 685 /* This search will find the extents that end after our range starts. */ 686 state = tree_search(tree, start); 687 if (!state) 688 goto out; 689 hit_next: 690 if (state->start > end) 691 goto out; 692 WARN_ON(state->end < start); 693 last_end = state->end; 694 695 /* The state doesn't have the wanted bits, go ahead. */ 696 if (!(state->state & bits)) { 697 state = next_state(state); 698 goto next; 699 } 700 701 /* 702 * | ---- desired range ---- | 703 * | state | or 704 * | ------------- state -------------- | 705 * 706 * We need to split the extent we found, and may flip bits on second 707 * half. 708 * 709 * If the extent we found extends past our range, we just split and 710 * search again. It'll get split again the next time though. 711 * 712 * If the extent we found is inside our range, we clear the desired bit 713 * on it. 714 */ 715 716 if (state->start < start) { 717 prealloc = alloc_extent_state_atomic(prealloc); 718 if (!prealloc) 719 goto search_again; 720 err = split_state(tree, state, prealloc, start); 721 if (err) 722 extent_io_tree_panic(tree, state, "split", err); 723 724 prealloc = NULL; 725 if (err) 726 goto out; 727 if (state->end <= end) { 728 state = clear_state_bit(tree, state, bits, wake, changeset); 729 goto next; 730 } 731 goto search_again; 732 } 733 /* 734 * | ---- desired range ---- | 735 * | state | 736 * We need to split the extent, and clear the bit on the first half. 737 */ 738 if (state->start <= end && state->end > end) { 739 prealloc = alloc_extent_state_atomic(prealloc); 740 if (!prealloc) 741 goto search_again; 742 err = split_state(tree, state, prealloc, end + 1); 743 if (err) 744 extent_io_tree_panic(tree, state, "split", err); 745 746 if (wake) 747 wake_up(&state->wq); 748 749 clear_state_bit(tree, prealloc, bits, wake, changeset); 750 751 prealloc = NULL; 752 goto out; 753 } 754 755 state = clear_state_bit(tree, state, bits, wake, changeset); 756 next: 757 if (last_end == (u64)-1) 758 goto out; 759 start = last_end + 1; 760 if (start <= end && state && !need_resched()) 761 goto hit_next; 762 763 search_again: 764 if (start > end) 765 goto out; 766 spin_unlock(&tree->lock); 767 if (gfpflags_allow_blocking(mask)) 768 cond_resched(); 769 goto again; 770 771 out: 772 spin_unlock(&tree->lock); 773 if (prealloc) 774 free_extent_state(prealloc); 775 776 return 0; 777 778 } 779 780 /* 781 * Wait for one or more bits to clear on a range in the state tree. 782 * The range [start, end] is inclusive. 783 * The tree lock is taken by this function 784 */ 785 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 786 u32 bits, struct extent_state **cached_state) 787 { 788 struct extent_state *state; 789 790 btrfs_debug_check_extent_io_range(tree, start, end); 791 792 spin_lock(&tree->lock); 793 again: 794 /* 795 * Maintain cached_state, as we may not remove it from the tree if there 796 * are more bits than the bits we're waiting on set on this state. 797 */ 798 if (cached_state && *cached_state) { 799 state = *cached_state; 800 if (extent_state_in_tree(state) && 801 state->start <= start && start < state->end) 802 goto process_node; 803 } 804 while (1) { 805 /* 806 * This search will find all the extents that end after our 807 * range starts. 808 */ 809 state = tree_search(tree, start); 810 process_node: 811 if (!state) 812 break; 813 if (state->start > end) 814 goto out; 815 816 if (state->state & bits) { 817 DEFINE_WAIT(wait); 818 819 start = state->start; 820 refcount_inc(&state->refs); 821 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 822 spin_unlock(&tree->lock); 823 schedule(); 824 spin_lock(&tree->lock); 825 finish_wait(&state->wq, &wait); 826 free_extent_state(state); 827 goto again; 828 } 829 start = state->end + 1; 830 831 if (start > end) 832 break; 833 834 if (!cond_resched_lock(&tree->lock)) { 835 state = next_state(state); 836 goto process_node; 837 } 838 } 839 out: 840 /* This state is no longer useful, clear it and free it up. */ 841 if (cached_state && *cached_state) { 842 state = *cached_state; 843 *cached_state = NULL; 844 free_extent_state(state); 845 } 846 spin_unlock(&tree->lock); 847 } 848 849 static void cache_state_if_flags(struct extent_state *state, 850 struct extent_state **cached_ptr, 851 unsigned flags) 852 { 853 if (cached_ptr && !(*cached_ptr)) { 854 if (!flags || (state->state & flags)) { 855 *cached_ptr = state; 856 refcount_inc(&state->refs); 857 } 858 } 859 } 860 861 static void cache_state(struct extent_state *state, 862 struct extent_state **cached_ptr) 863 { 864 return cache_state_if_flags(state, cached_ptr, 865 EXTENT_LOCKED | EXTENT_BOUNDARY); 866 } 867 868 /* 869 * Find the first state struct with 'bits' set after 'start', and return it. 870 * tree->lock must be held. NULL will returned if nothing was found after 871 * 'start'. 872 */ 873 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 874 u64 start, u32 bits) 875 { 876 struct extent_state *state; 877 878 /* 879 * This search will find all the extents that end after our range 880 * starts. 881 */ 882 state = tree_search(tree, start); 883 while (state) { 884 if (state->end >= start && (state->state & bits)) 885 return state; 886 state = next_state(state); 887 } 888 return NULL; 889 } 890 891 /* 892 * Find the first offset in the io tree with one or more @bits set. 893 * 894 * Note: If there are multiple bits set in @bits, any of them will match. 895 * 896 * Return true if we find something, and update @start_ret and @end_ret. 897 * Return false if we found nothing. 898 */ 899 bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, 900 u64 *start_ret, u64 *end_ret, u32 bits, 901 struct extent_state **cached_state) 902 { 903 struct extent_state *state; 904 bool ret = false; 905 906 spin_lock(&tree->lock); 907 if (cached_state && *cached_state) { 908 state = *cached_state; 909 if (state->end == start - 1 && extent_state_in_tree(state)) { 910 while ((state = next_state(state)) != NULL) { 911 if (state->state & bits) 912 break; 913 } 914 /* 915 * If we found the next extent state, clear cached_state 916 * so that we can cache the next extent state below and 917 * avoid future calls going over the same extent state 918 * again. If we haven't found any, clear as well since 919 * it's now useless. 920 */ 921 free_extent_state(*cached_state); 922 *cached_state = NULL; 923 if (state) 924 goto got_it; 925 goto out; 926 } 927 free_extent_state(*cached_state); 928 *cached_state = NULL; 929 } 930 931 state = find_first_extent_bit_state(tree, start, bits); 932 got_it: 933 if (state) { 934 cache_state_if_flags(state, cached_state, 0); 935 *start_ret = state->start; 936 *end_ret = state->end; 937 ret = true; 938 } 939 out: 940 spin_unlock(&tree->lock); 941 return ret; 942 } 943 944 /* 945 * Find a contiguous area of bits 946 * 947 * @tree: io tree to check 948 * @start: offset to start the search from 949 * @start_ret: the first offset we found with the bits set 950 * @end_ret: the final contiguous range of the bits that were set 951 * @bits: bits to look for 952 * 953 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 954 * to set bits appropriately, and then merge them again. During this time it 955 * will drop the tree->lock, so use this helper if you want to find the actual 956 * contiguous area for given bits. We will search to the first bit we find, and 957 * then walk down the tree until we find a non-contiguous area. The area 958 * returned will be the full contiguous area with the bits set. 959 */ 960 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 961 u64 *start_ret, u64 *end_ret, u32 bits) 962 { 963 struct extent_state *state; 964 int ret = 1; 965 966 ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES)); 967 968 spin_lock(&tree->lock); 969 state = find_first_extent_bit_state(tree, start, bits); 970 if (state) { 971 *start_ret = state->start; 972 *end_ret = state->end; 973 while ((state = next_state(state)) != NULL) { 974 if (state->start > (*end_ret + 1)) 975 break; 976 *end_ret = state->end; 977 } 978 ret = 0; 979 } 980 spin_unlock(&tree->lock); 981 return ret; 982 } 983 984 /* 985 * Find a contiguous range of bytes in the file marked as delalloc, not more 986 * than 'max_bytes'. start and end are used to return the range, 987 * 988 * True is returned if we find something, false if nothing was in the tree. 989 */ 990 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 991 u64 *end, u64 max_bytes, 992 struct extent_state **cached_state) 993 { 994 struct extent_state *state; 995 u64 cur_start = *start; 996 bool found = false; 997 u64 total_bytes = 0; 998 999 spin_lock(&tree->lock); 1000 1001 /* 1002 * This search will find all the extents that end after our range 1003 * starts. 1004 */ 1005 state = tree_search(tree, cur_start); 1006 if (!state) { 1007 *end = (u64)-1; 1008 goto out; 1009 } 1010 1011 while (state) { 1012 if (found && (state->start != cur_start || 1013 (state->state & EXTENT_BOUNDARY))) { 1014 goto out; 1015 } 1016 if (!(state->state & EXTENT_DELALLOC)) { 1017 if (!found) 1018 *end = state->end; 1019 goto out; 1020 } 1021 if (!found) { 1022 *start = state->start; 1023 *cached_state = state; 1024 refcount_inc(&state->refs); 1025 } 1026 found = true; 1027 *end = state->end; 1028 cur_start = state->end + 1; 1029 total_bytes += state->end - state->start + 1; 1030 if (total_bytes >= max_bytes) 1031 break; 1032 state = next_state(state); 1033 } 1034 out: 1035 spin_unlock(&tree->lock); 1036 return found; 1037 } 1038 1039 /* 1040 * Set some bits on a range in the tree. This may require allocations or 1041 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1042 * GFP_NOWAIT. 1043 * 1044 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1045 * part of the range already has the desired bits set. The extent_state of the 1046 * existing range is returned in failed_state in this case, and the start of the 1047 * existing range is returned in failed_start. failed_state is used as an 1048 * optimization for wait_extent_bit, failed_start must be used as the source of 1049 * truth as failed_state may have changed since we returned. 1050 * 1051 * [start, end] is inclusive This takes the tree lock. 1052 */ 1053 static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1054 u32 bits, u64 *failed_start, 1055 struct extent_state **failed_state, 1056 struct extent_state **cached_state, 1057 struct extent_changeset *changeset) 1058 { 1059 struct extent_state *state; 1060 struct extent_state *prealloc = NULL; 1061 struct rb_node **p = NULL; 1062 struct rb_node *parent = NULL; 1063 int ret = 0; 1064 u64 last_start; 1065 u64 last_end; 1066 u32 exclusive_bits = (bits & EXTENT_LOCKED); 1067 gfp_t mask; 1068 1069 set_gfp_mask_from_bits(&bits, &mask); 1070 btrfs_debug_check_extent_io_range(tree, start, end); 1071 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1072 1073 if (exclusive_bits) 1074 ASSERT(failed_start); 1075 else 1076 ASSERT(failed_start == NULL && failed_state == NULL); 1077 again: 1078 if (!prealloc) { 1079 /* 1080 * Don't care for allocation failure here because we might end 1081 * up not needing the pre-allocated extent state at all, which 1082 * is the case if we only have in the tree extent states that 1083 * cover our input range and don't cover too any other range. 1084 * If we end up needing a new extent state we allocate it later. 1085 */ 1086 prealloc = alloc_extent_state(mask); 1087 } 1088 /* Optimistically preallocate the extent changeset ulist node. */ 1089 if (changeset) 1090 extent_changeset_prealloc(changeset, mask); 1091 1092 spin_lock(&tree->lock); 1093 if (cached_state && *cached_state) { 1094 state = *cached_state; 1095 if (state->start <= start && state->end > start && 1096 extent_state_in_tree(state)) 1097 goto hit_next; 1098 } 1099 /* 1100 * This search will find all the extents that end after our range 1101 * starts. 1102 */ 1103 state = tree_search_for_insert(tree, start, &p, &parent); 1104 if (!state) { 1105 prealloc = alloc_extent_state_atomic(prealloc); 1106 if (!prealloc) 1107 goto search_again; 1108 prealloc->start = start; 1109 prealloc->end = end; 1110 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1111 cache_state(prealloc, cached_state); 1112 prealloc = NULL; 1113 goto out; 1114 } 1115 hit_next: 1116 last_start = state->start; 1117 last_end = state->end; 1118 1119 /* 1120 * | ---- desired range ---- | 1121 * | state | 1122 * 1123 * Just lock what we found and keep going 1124 */ 1125 if (state->start == start && state->end <= end) { 1126 if (state->state & exclusive_bits) { 1127 *failed_start = state->start; 1128 cache_state(state, failed_state); 1129 ret = -EEXIST; 1130 goto out; 1131 } 1132 1133 set_state_bits(tree, state, bits, changeset); 1134 cache_state(state, cached_state); 1135 merge_state(tree, state); 1136 if (last_end == (u64)-1) 1137 goto out; 1138 start = last_end + 1; 1139 state = next_state(state); 1140 if (start < end && state && state->start == start && 1141 !need_resched()) 1142 goto hit_next; 1143 goto search_again; 1144 } 1145 1146 /* 1147 * | ---- desired range ---- | 1148 * | state | 1149 * or 1150 * | ------------- state -------------- | 1151 * 1152 * We need to split the extent we found, and may flip bits on second 1153 * half. 1154 * 1155 * If the extent we found extends past our range, we just split and 1156 * search again. It'll get split again the next time though. 1157 * 1158 * If the extent we found is inside our range, we set the desired bit 1159 * on it. 1160 */ 1161 if (state->start < start) { 1162 if (state->state & exclusive_bits) { 1163 *failed_start = start; 1164 cache_state(state, failed_state); 1165 ret = -EEXIST; 1166 goto out; 1167 } 1168 1169 /* 1170 * If this extent already has all the bits we want set, then 1171 * skip it, not necessary to split it or do anything with it. 1172 */ 1173 if ((state->state & bits) == bits) { 1174 start = state->end + 1; 1175 cache_state(state, cached_state); 1176 goto search_again; 1177 } 1178 1179 prealloc = alloc_extent_state_atomic(prealloc); 1180 if (!prealloc) 1181 goto search_again; 1182 ret = split_state(tree, state, prealloc, start); 1183 if (ret) 1184 extent_io_tree_panic(tree, state, "split", ret); 1185 1186 prealloc = NULL; 1187 if (ret) 1188 goto out; 1189 if (state->end <= end) { 1190 set_state_bits(tree, state, bits, changeset); 1191 cache_state(state, cached_state); 1192 merge_state(tree, state); 1193 if (last_end == (u64)-1) 1194 goto out; 1195 start = last_end + 1; 1196 state = next_state(state); 1197 if (start < end && state && state->start == start && 1198 !need_resched()) 1199 goto hit_next; 1200 } 1201 goto search_again; 1202 } 1203 /* 1204 * | ---- desired range ---- | 1205 * | state | or | state | 1206 * 1207 * There's a hole, we need to insert something in it and ignore the 1208 * extent we found. 1209 */ 1210 if (state->start > start) { 1211 u64 this_end; 1212 struct extent_state *inserted_state; 1213 1214 if (end < last_start) 1215 this_end = end; 1216 else 1217 this_end = last_start - 1; 1218 1219 prealloc = alloc_extent_state_atomic(prealloc); 1220 if (!prealloc) 1221 goto search_again; 1222 1223 /* 1224 * Avoid to free 'prealloc' if it can be merged with the later 1225 * extent. 1226 */ 1227 prealloc->start = start; 1228 prealloc->end = this_end; 1229 inserted_state = insert_state(tree, prealloc, bits, changeset); 1230 if (IS_ERR(inserted_state)) { 1231 ret = PTR_ERR(inserted_state); 1232 extent_io_tree_panic(tree, prealloc, "insert", ret); 1233 } 1234 1235 cache_state(inserted_state, cached_state); 1236 if (inserted_state == prealloc) 1237 prealloc = NULL; 1238 start = this_end + 1; 1239 goto search_again; 1240 } 1241 /* 1242 * | ---- desired range ---- | 1243 * | state | 1244 * 1245 * We need to split the extent, and set the bit on the first half 1246 */ 1247 if (state->start <= end && state->end > end) { 1248 if (state->state & exclusive_bits) { 1249 *failed_start = start; 1250 cache_state(state, failed_state); 1251 ret = -EEXIST; 1252 goto out; 1253 } 1254 1255 prealloc = alloc_extent_state_atomic(prealloc); 1256 if (!prealloc) 1257 goto search_again; 1258 ret = split_state(tree, state, prealloc, end + 1); 1259 if (ret) 1260 extent_io_tree_panic(tree, state, "split", ret); 1261 1262 set_state_bits(tree, prealloc, bits, changeset); 1263 cache_state(prealloc, cached_state); 1264 merge_state(tree, prealloc); 1265 prealloc = NULL; 1266 goto out; 1267 } 1268 1269 search_again: 1270 if (start > end) 1271 goto out; 1272 spin_unlock(&tree->lock); 1273 if (gfpflags_allow_blocking(mask)) 1274 cond_resched(); 1275 goto again; 1276 1277 out: 1278 spin_unlock(&tree->lock); 1279 if (prealloc) 1280 free_extent_state(prealloc); 1281 1282 return ret; 1283 1284 } 1285 1286 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1287 u32 bits, struct extent_state **cached_state) 1288 { 1289 return __set_extent_bit(tree, start, end, bits, NULL, NULL, 1290 cached_state, NULL); 1291 } 1292 1293 /* 1294 * Convert all bits in a given range from one bit to another 1295 * 1296 * @tree: the io tree to search 1297 * @start: the start offset in bytes 1298 * @end: the end offset in bytes (inclusive) 1299 * @bits: the bits to set in this range 1300 * @clear_bits: the bits to clear in this range 1301 * @cached_state: state that we're going to cache 1302 * 1303 * This will go through and set bits for the given range. If any states exist 1304 * already in this range they are set with the given bit and cleared of the 1305 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1306 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1307 * boundary bits like LOCK. 1308 * 1309 * All allocations are done with GFP_NOFS. 1310 */ 1311 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1312 u32 bits, u32 clear_bits, 1313 struct extent_state **cached_state) 1314 { 1315 struct extent_state *state; 1316 struct extent_state *prealloc = NULL; 1317 struct rb_node **p = NULL; 1318 struct rb_node *parent = NULL; 1319 int ret = 0; 1320 u64 last_start; 1321 u64 last_end; 1322 bool first_iteration = true; 1323 1324 btrfs_debug_check_extent_io_range(tree, start, end); 1325 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1326 clear_bits); 1327 1328 again: 1329 if (!prealloc) { 1330 /* 1331 * Best effort, don't worry if extent state allocation fails 1332 * here for the first iteration. We might have a cached state 1333 * that matches exactly the target range, in which case no 1334 * extent state allocations are needed. We'll only know this 1335 * after locking the tree. 1336 */ 1337 prealloc = alloc_extent_state(GFP_NOFS); 1338 if (!prealloc && !first_iteration) 1339 return -ENOMEM; 1340 } 1341 1342 spin_lock(&tree->lock); 1343 if (cached_state && *cached_state) { 1344 state = *cached_state; 1345 if (state->start <= start && state->end > start && 1346 extent_state_in_tree(state)) 1347 goto hit_next; 1348 } 1349 1350 /* 1351 * This search will find all the extents that end after our range 1352 * starts. 1353 */ 1354 state = tree_search_for_insert(tree, start, &p, &parent); 1355 if (!state) { 1356 prealloc = alloc_extent_state_atomic(prealloc); 1357 if (!prealloc) { 1358 ret = -ENOMEM; 1359 goto out; 1360 } 1361 prealloc->start = start; 1362 prealloc->end = end; 1363 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1364 cache_state(prealloc, cached_state); 1365 prealloc = NULL; 1366 goto out; 1367 } 1368 hit_next: 1369 last_start = state->start; 1370 last_end = state->end; 1371 1372 /* 1373 * | ---- desired range ---- | 1374 * | state | 1375 * 1376 * Just lock what we found and keep going. 1377 */ 1378 if (state->start == start && state->end <= end) { 1379 set_state_bits(tree, state, bits, NULL); 1380 cache_state(state, cached_state); 1381 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1382 if (last_end == (u64)-1) 1383 goto out; 1384 start = last_end + 1; 1385 if (start < end && state && state->start == start && 1386 !need_resched()) 1387 goto hit_next; 1388 goto search_again; 1389 } 1390 1391 /* 1392 * | ---- desired range ---- | 1393 * | state | 1394 * or 1395 * | ------------- state -------------- | 1396 * 1397 * We need to split the extent we found, and may flip bits on second 1398 * half. 1399 * 1400 * If the extent we found extends past our range, we just split and 1401 * search again. It'll get split again the next time though. 1402 * 1403 * If the extent we found is inside our range, we set the desired bit 1404 * on it. 1405 */ 1406 if (state->start < start) { 1407 prealloc = alloc_extent_state_atomic(prealloc); 1408 if (!prealloc) { 1409 ret = -ENOMEM; 1410 goto out; 1411 } 1412 ret = split_state(tree, state, prealloc, start); 1413 if (ret) 1414 extent_io_tree_panic(tree, state, "split", ret); 1415 prealloc = NULL; 1416 if (ret) 1417 goto out; 1418 if (state->end <= end) { 1419 set_state_bits(tree, state, bits, NULL); 1420 cache_state(state, cached_state); 1421 state = clear_state_bit(tree, state, clear_bits, 0, NULL); 1422 if (last_end == (u64)-1) 1423 goto out; 1424 start = last_end + 1; 1425 if (start < end && state && state->start == start && 1426 !need_resched()) 1427 goto hit_next; 1428 } 1429 goto search_again; 1430 } 1431 /* 1432 * | ---- desired range ---- | 1433 * | state | or | state | 1434 * 1435 * There's a hole, we need to insert something in it and ignore the 1436 * extent we found. 1437 */ 1438 if (state->start > start) { 1439 u64 this_end; 1440 struct extent_state *inserted_state; 1441 1442 if (end < last_start) 1443 this_end = end; 1444 else 1445 this_end = last_start - 1; 1446 1447 prealloc = alloc_extent_state_atomic(prealloc); 1448 if (!prealloc) { 1449 ret = -ENOMEM; 1450 goto out; 1451 } 1452 1453 /* 1454 * Avoid to free 'prealloc' if it can be merged with the later 1455 * extent. 1456 */ 1457 prealloc->start = start; 1458 prealloc->end = this_end; 1459 inserted_state = insert_state(tree, prealloc, bits, NULL); 1460 if (IS_ERR(inserted_state)) { 1461 ret = PTR_ERR(inserted_state); 1462 extent_io_tree_panic(tree, prealloc, "insert", ret); 1463 } 1464 cache_state(inserted_state, cached_state); 1465 if (inserted_state == prealloc) 1466 prealloc = NULL; 1467 start = this_end + 1; 1468 goto search_again; 1469 } 1470 /* 1471 * | ---- desired range ---- | 1472 * | state | 1473 * 1474 * We need to split the extent, and set the bit on the first half. 1475 */ 1476 if (state->start <= end && state->end > end) { 1477 prealloc = alloc_extent_state_atomic(prealloc); 1478 if (!prealloc) { 1479 ret = -ENOMEM; 1480 goto out; 1481 } 1482 1483 ret = split_state(tree, state, prealloc, end + 1); 1484 if (ret) 1485 extent_io_tree_panic(tree, state, "split", ret); 1486 1487 set_state_bits(tree, prealloc, bits, NULL); 1488 cache_state(prealloc, cached_state); 1489 clear_state_bit(tree, prealloc, clear_bits, 0, NULL); 1490 prealloc = NULL; 1491 goto out; 1492 } 1493 1494 search_again: 1495 if (start > end) 1496 goto out; 1497 spin_unlock(&tree->lock); 1498 cond_resched(); 1499 first_iteration = false; 1500 goto again; 1501 1502 out: 1503 spin_unlock(&tree->lock); 1504 if (prealloc) 1505 free_extent_state(prealloc); 1506 1507 return ret; 1508 } 1509 1510 /* 1511 * Find the first range that has @bits not set. This range could start before 1512 * @start. 1513 * 1514 * @tree: the tree to search 1515 * @start: offset at/after which the found extent should start 1516 * @start_ret: records the beginning of the range 1517 * @end_ret: records the end of the range (inclusive) 1518 * @bits: the set of bits which must be unset 1519 * 1520 * Since unallocated range is also considered one which doesn't have the bits 1521 * set it's possible that @end_ret contains -1, this happens in case the range 1522 * spans (last_range_end, end of device]. In this case it's up to the caller to 1523 * trim @end_ret to the appropriate size. 1524 */ 1525 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1526 u64 *start_ret, u64 *end_ret, u32 bits) 1527 { 1528 struct extent_state *state; 1529 struct extent_state *prev = NULL, *next = NULL; 1530 1531 spin_lock(&tree->lock); 1532 1533 /* Find first extent with bits cleared */ 1534 while (1) { 1535 state = tree_search_prev_next(tree, start, &prev, &next); 1536 if (!state && !next && !prev) { 1537 /* 1538 * Tree is completely empty, send full range and let 1539 * caller deal with it 1540 */ 1541 *start_ret = 0; 1542 *end_ret = -1; 1543 goto out; 1544 } else if (!state && !next) { 1545 /* 1546 * We are past the last allocated chunk, set start at 1547 * the end of the last extent. 1548 */ 1549 *start_ret = prev->end + 1; 1550 *end_ret = -1; 1551 goto out; 1552 } else if (!state) { 1553 state = next; 1554 } 1555 1556 /* 1557 * At this point 'state' either contains 'start' or start is 1558 * before 'state' 1559 */ 1560 if (in_range(start, state->start, state->end - state->start + 1)) { 1561 if (state->state & bits) { 1562 /* 1563 * |--range with bits sets--| 1564 * | 1565 * start 1566 */ 1567 start = state->end + 1; 1568 } else { 1569 /* 1570 * 'start' falls within a range that doesn't 1571 * have the bits set, so take its start as the 1572 * beginning of the desired range 1573 * 1574 * |--range with bits cleared----| 1575 * | 1576 * start 1577 */ 1578 *start_ret = state->start; 1579 break; 1580 } 1581 } else { 1582 /* 1583 * |---prev range---|---hole/unset---|---node range---| 1584 * | 1585 * start 1586 * 1587 * or 1588 * 1589 * |---hole/unset--||--first node--| 1590 * 0 | 1591 * start 1592 */ 1593 if (prev) 1594 *start_ret = prev->end + 1; 1595 else 1596 *start_ret = 0; 1597 break; 1598 } 1599 } 1600 1601 /* 1602 * Find the longest stretch from start until an entry which has the 1603 * bits set 1604 */ 1605 while (state) { 1606 if (state->end >= start && !(state->state & bits)) { 1607 *end_ret = state->end; 1608 } else { 1609 *end_ret = state->start - 1; 1610 break; 1611 } 1612 state = next_state(state); 1613 } 1614 out: 1615 spin_unlock(&tree->lock); 1616 } 1617 1618 /* 1619 * Count the number of bytes in the tree that have a given bit(s) set for a 1620 * given range. 1621 * 1622 * @tree: The io tree to search. 1623 * @start: The start offset of the range. This value is updated to the 1624 * offset of the first byte found with the given bit(s), so it 1625 * can end up being bigger than the initial value. 1626 * @search_end: The end offset (inclusive value) of the search range. 1627 * @max_bytes: The maximum byte count we are interested. The search stops 1628 * once it reaches this count. 1629 * @bits: The bits the range must have in order to be accounted for. 1630 * If multiple bits are set, then only subranges that have all 1631 * the bits set are accounted for. 1632 * @contig: Indicate if we should ignore holes in the range or not. If 1633 * this is true, then stop once we find a hole. 1634 * @cached_state: A cached state to be used across multiple calls to this 1635 * function in order to speedup searches. Use NULL if this is 1636 * called only once or if each call does not start where the 1637 * previous one ended. 1638 * 1639 * Returns the total number of bytes found within the given range that have 1640 * all given bits set. If the returned number of bytes is greater than zero 1641 * then @start is updated with the offset of the first byte with the bits set. 1642 */ 1643 u64 count_range_bits(struct extent_io_tree *tree, 1644 u64 *start, u64 search_end, u64 max_bytes, 1645 u32 bits, int contig, 1646 struct extent_state **cached_state) 1647 { 1648 struct extent_state *state = NULL; 1649 struct extent_state *cached; 1650 u64 cur_start = *start; 1651 u64 total_bytes = 0; 1652 u64 last = 0; 1653 int found = 0; 1654 1655 if (WARN_ON(search_end < cur_start)) 1656 return 0; 1657 1658 spin_lock(&tree->lock); 1659 1660 if (!cached_state || !*cached_state) 1661 goto search; 1662 1663 cached = *cached_state; 1664 1665 if (!extent_state_in_tree(cached)) 1666 goto search; 1667 1668 if (cached->start <= cur_start && cur_start <= cached->end) { 1669 state = cached; 1670 } else if (cached->start > cur_start) { 1671 struct extent_state *prev; 1672 1673 /* 1674 * The cached state starts after our search range's start. Check 1675 * if the previous state record starts at or before the range we 1676 * are looking for, and if so, use it - this is a common case 1677 * when there are holes between records in the tree. If there is 1678 * no previous state record, we can start from our cached state. 1679 */ 1680 prev = prev_state(cached); 1681 if (!prev) 1682 state = cached; 1683 else if (prev->start <= cur_start && cur_start <= prev->end) 1684 state = prev; 1685 } 1686 1687 /* 1688 * This search will find all the extents that end after our range 1689 * starts. 1690 */ 1691 search: 1692 if (!state) 1693 state = tree_search(tree, cur_start); 1694 1695 while (state) { 1696 if (state->start > search_end) 1697 break; 1698 if (contig && found && state->start > last + 1) 1699 break; 1700 if (state->end >= cur_start && (state->state & bits) == bits) { 1701 total_bytes += min(search_end, state->end) + 1 - 1702 max(cur_start, state->start); 1703 if (total_bytes >= max_bytes) 1704 break; 1705 if (!found) { 1706 *start = max(cur_start, state->start); 1707 found = 1; 1708 } 1709 last = state->end; 1710 } else if (contig && found) { 1711 break; 1712 } 1713 state = next_state(state); 1714 } 1715 1716 if (cached_state) { 1717 free_extent_state(*cached_state); 1718 *cached_state = state; 1719 if (state) 1720 refcount_inc(&state->refs); 1721 } 1722 1723 spin_unlock(&tree->lock); 1724 1725 return total_bytes; 1726 } 1727 1728 /* 1729 * Check if the single @bit exists in the given range. 1730 */ 1731 bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1732 { 1733 struct extent_state *state = NULL; 1734 bool bitset = false; 1735 1736 ASSERT(is_power_of_2(bit)); 1737 1738 spin_lock(&tree->lock); 1739 state = tree_search(tree, start); 1740 while (state && start <= end) { 1741 if (state->start > end) 1742 break; 1743 1744 if (state->state & bit) { 1745 bitset = true; 1746 break; 1747 } 1748 1749 /* If state->end is (u64)-1, start will overflow to 0 */ 1750 start = state->end + 1; 1751 if (start > end || start == 0) 1752 break; 1753 state = next_state(state); 1754 } 1755 spin_unlock(&tree->lock); 1756 return bitset; 1757 } 1758 1759 /* 1760 * Check if the whole range [@start,@end) contains the single @bit set. 1761 */ 1762 bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1763 struct extent_state *cached) 1764 { 1765 struct extent_state *state = NULL; 1766 bool bitset = true; 1767 1768 ASSERT(is_power_of_2(bit)); 1769 1770 spin_lock(&tree->lock); 1771 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1772 cached->end > start) 1773 state = cached; 1774 else 1775 state = tree_search(tree, start); 1776 while (state && start <= end) { 1777 if (state->start > start) { 1778 bitset = false; 1779 break; 1780 } 1781 1782 if (state->start > end) 1783 break; 1784 1785 if ((state->state & bit) == 0) { 1786 bitset = false; 1787 break; 1788 } 1789 1790 if (state->end == (u64)-1) 1791 break; 1792 1793 /* 1794 * Last entry (if state->end is (u64)-1 and overflow happens), 1795 * or next entry starts after the range. 1796 */ 1797 start = state->end + 1; 1798 if (start > end || start == 0) 1799 break; 1800 state = next_state(state); 1801 } 1802 1803 /* We ran out of states and were still inside of our range. */ 1804 if (!state) 1805 bitset = false; 1806 spin_unlock(&tree->lock); 1807 return bitset; 1808 } 1809 1810 /* Wrappers around set/clear extent bit */ 1811 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1812 u32 bits, struct extent_changeset *changeset) 1813 { 1814 /* 1815 * We don't support EXTENT_LOCKED yet, as current changeset will 1816 * record any bits changed, so for EXTENT_LOCKED case, it will 1817 * either fail with -EEXIST or changeset will record the whole 1818 * range. 1819 */ 1820 ASSERT(!(bits & EXTENT_LOCKED)); 1821 1822 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1823 } 1824 1825 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1826 u32 bits, struct extent_changeset *changeset) 1827 { 1828 /* 1829 * Don't support EXTENT_LOCKED case, same reason as 1830 * set_record_extent_bits(). 1831 */ 1832 ASSERT(!(bits & EXTENT_LOCKED)); 1833 1834 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1835 } 1836 1837 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1838 struct extent_state **cached) 1839 { 1840 int err; 1841 u64 failed_start; 1842 1843 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1844 NULL, cached, NULL); 1845 if (err == -EEXIST) { 1846 if (failed_start > start) 1847 clear_extent_bit(tree, start, failed_start - 1, 1848 EXTENT_LOCKED, cached); 1849 return 0; 1850 } 1851 return 1; 1852 } 1853 1854 /* 1855 * Either insert or lock state struct between start and end use mask to tell 1856 * us if waiting is desired. 1857 */ 1858 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1859 struct extent_state **cached_state) 1860 { 1861 struct extent_state *failed_state = NULL; 1862 int err; 1863 u64 failed_start; 1864 1865 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1866 &failed_state, cached_state, NULL); 1867 while (err == -EEXIST) { 1868 if (failed_start != start) 1869 clear_extent_bit(tree, start, failed_start - 1, 1870 EXTENT_LOCKED, cached_state); 1871 1872 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1873 &failed_state); 1874 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1875 &failed_start, &failed_state, 1876 cached_state, NULL); 1877 } 1878 return err; 1879 } 1880 1881 void __cold extent_state_free_cachep(void) 1882 { 1883 btrfs_extent_state_leak_debug_check(); 1884 kmem_cache_destroy(extent_state_cache); 1885 } 1886 1887 int __init extent_state_init_cachep(void) 1888 { 1889 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1890 sizeof(struct extent_state), 0, 0, 1891 NULL); 1892 if (!extent_state_cache) 1893 return -ENOMEM; 1894 1895 return 0; 1896 } 1897