1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent_io.h" 8 #include "extent-io-tree.h" 9 #include "btrfs_inode.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_first_entry(&states, struct extent_state, leak_list); 46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 47 state->start, state->end, state->state, 48 extent_state_in_tree(state), 49 refcount_read(&state->refs)); 50 list_del(&state->leak_list); 51 WARN_ON_ONCE(1); 52 kmem_cache_free(extent_state_cache, state); 53 } 54 } 55 56 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 57 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 58 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 59 struct extent_io_tree *tree, 60 u64 start, u64 end) 61 { 62 const struct btrfs_inode *inode = tree->inode; 63 u64 isize; 64 65 if (tree->owner != IO_TREE_INODE_IO) 66 return; 67 68 isize = i_size_read(&inode->vfs_inode); 69 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 70 btrfs_debug_rl(inode->root->fs_info, 71 "%s: ino %llu isize %llu odd range [%llu,%llu]", 72 caller, btrfs_ino(inode), isize, start, end); 73 } 74 } 75 #else 76 #define btrfs_leak_debug_add_state(state) do {} while (0) 77 #define btrfs_leak_debug_del_state(state) do {} while (0) 78 #define btrfs_extent_state_leak_debug_check() do {} while (0) 79 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 80 #endif 81 82 /* Read-only access to the inode. */ 83 const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree) 84 { 85 if (tree->owner == IO_TREE_INODE_IO) 86 return tree->inode; 87 return NULL; 88 } 89 90 /* For read-only access to fs_info. */ 91 const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 92 { 93 if (tree->owner == IO_TREE_INODE_IO) 94 return tree->inode->root->fs_info; 95 return tree->fs_info; 96 } 97 98 void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info, 99 struct extent_io_tree *tree, unsigned int owner) 100 { 101 tree->state = RB_ROOT; 102 spin_lock_init(&tree->lock); 103 tree->fs_info = fs_info; 104 tree->owner = owner; 105 } 106 107 /* 108 * Empty an io tree, removing and freeing every extent state record from the 109 * tree. This should be called once we are sure no other task can access the 110 * tree anymore, so no tree updates happen after we empty the tree and there 111 * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never 112 * set on any extent state when calling this function). 113 */ 114 void btrfs_extent_io_tree_release(struct extent_io_tree *tree) 115 { 116 struct rb_root root; 117 struct extent_state *state; 118 struct extent_state *tmp; 119 120 spin_lock(&tree->lock); 121 root = tree->state; 122 tree->state = RB_ROOT; 123 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 124 /* Clear node to keep free_extent_state() happy. */ 125 RB_CLEAR_NODE(&state->rb_node); 126 ASSERT(!(state->state & EXTENT_LOCK_BITS)); 127 /* 128 * No need for a memory barrier here, as we are holding the tree 129 * lock and we only change the waitqueue while holding that lock 130 * (see wait_extent_bit()). 131 */ 132 ASSERT(!waitqueue_active(&state->wq)); 133 btrfs_free_extent_state(state); 134 cond_resched_lock(&tree->lock); 135 } 136 /* 137 * Should still be empty even after a reschedule, no other task should 138 * be accessing the tree anymore. 139 */ 140 ASSERT(RB_EMPTY_ROOT(&tree->state)); 141 spin_unlock(&tree->lock); 142 } 143 144 static struct extent_state *alloc_extent_state(gfp_t mask) 145 { 146 struct extent_state *state; 147 148 /* 149 * The given mask might be not appropriate for the slab allocator, 150 * drop the unsupported bits 151 */ 152 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 153 state = kmem_cache_alloc(extent_state_cache, mask); 154 if (!state) 155 return state; 156 state->state = 0; 157 RB_CLEAR_NODE(&state->rb_node); 158 btrfs_leak_debug_add_state(state); 159 refcount_set(&state->refs, 1); 160 init_waitqueue_head(&state->wq); 161 trace_btrfs_alloc_extent_state(state, mask, _RET_IP_); 162 return state; 163 } 164 165 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 166 { 167 if (!prealloc) 168 prealloc = alloc_extent_state(GFP_ATOMIC); 169 170 return prealloc; 171 } 172 173 void btrfs_free_extent_state(struct extent_state *state) 174 { 175 if (!state) 176 return; 177 if (refcount_dec_and_test(&state->refs)) { 178 WARN_ON(extent_state_in_tree(state)); 179 btrfs_leak_debug_del_state(state); 180 trace_btrfs_free_extent_state(state, _RET_IP_); 181 kmem_cache_free(extent_state_cache, state); 182 } 183 } 184 185 static int add_extent_changeset(struct extent_state *state, u32 bits, 186 struct extent_changeset *changeset, 187 int set) 188 { 189 int ret; 190 191 if (!changeset) 192 return 0; 193 if (set && (state->state & bits) == bits) 194 return 0; 195 if (!set && (state->state & bits) == 0) 196 return 0; 197 changeset->bytes_changed += state->end - state->start + 1; 198 ret = ulist_add(&changeset->range_changed, state->start, state->end, 199 GFP_ATOMIC); 200 return ret; 201 } 202 203 static inline struct extent_state *next_state(struct extent_state *state) 204 { 205 struct rb_node *next = rb_next(&state->rb_node); 206 207 return rb_entry_safe(next, struct extent_state, rb_node); 208 } 209 210 static inline struct extent_state *prev_state(struct extent_state *state) 211 { 212 struct rb_node *next = rb_prev(&state->rb_node); 213 214 return rb_entry_safe(next, struct extent_state, rb_node); 215 } 216 217 /* 218 * Search @tree for an entry that contains @offset or if none exists for the 219 * first entry that starts and ends after that offset. 220 * 221 * @tree: the tree to search 222 * @offset: search offset 223 * @node_ret: pointer where new node should be anchored (used when inserting an 224 * entry in the tree) 225 * @parent_ret: points to entry which would have been the parent of the entry, 226 * containing @offset 227 * 228 * Return a pointer to the entry that contains @offset byte address. 229 * 230 * If no such entry exists, return the first entry that starts and ends after 231 * @offset if one exists, otherwise NULL. 232 * 233 * If the returned entry starts at @offset, then @node_ret and @parent_ret 234 * aren't changed. 235 */ 236 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 237 u64 offset, 238 struct rb_node ***node_ret, 239 struct rb_node **parent_ret) 240 { 241 struct rb_root *root = &tree->state; 242 struct rb_node **node = &root->rb_node; 243 struct rb_node *prev = NULL; 244 struct extent_state *entry = NULL; 245 246 while (*node) { 247 prev = *node; 248 entry = rb_entry(prev, struct extent_state, rb_node); 249 250 if (offset < entry->start) 251 node = &(*node)->rb_left; 252 else if (offset > entry->end) 253 node = &(*node)->rb_right; 254 else 255 return entry; 256 } 257 258 if (node_ret) 259 *node_ret = node; 260 if (parent_ret) 261 *parent_ret = prev; 262 263 /* 264 * Return either the current entry if it contains offset (it ends after 265 * or at offset) or the first entry that starts and ends after offset if 266 * one exists, or NULL. 267 */ 268 while (entry && offset > entry->end) 269 entry = next_state(entry); 270 271 return entry; 272 } 273 274 /* 275 * Search offset in the tree or fill neighbor rbtree node pointers. 276 * 277 * @tree: the tree to search 278 * @offset: offset that should fall within an entry in @tree 279 * @next_ret: pointer to the first entry whose range ends after @offset 280 * @prev_ret: pointer to the first entry whose range begins before @offset 281 * 282 * Return a pointer to the entry that contains @offset byte address. If no 283 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 284 * Otherwise return the found entry and other pointers are left untouched. 285 */ 286 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 287 u64 offset, 288 struct extent_state **prev_ret, 289 struct extent_state **next_ret) 290 { 291 struct rb_root *root = &tree->state; 292 struct rb_node **node = &root->rb_node; 293 struct extent_state *orig_prev; 294 struct extent_state *entry = NULL; 295 296 ASSERT(prev_ret); 297 ASSERT(next_ret); 298 299 while (*node) { 300 entry = rb_entry(*node, struct extent_state, rb_node); 301 302 if (offset < entry->start) 303 node = &(*node)->rb_left; 304 else if (offset > entry->end) 305 node = &(*node)->rb_right; 306 else 307 return entry; 308 } 309 310 orig_prev = entry; 311 while (entry && offset > entry->end) 312 entry = next_state(entry); 313 *next_ret = entry; 314 entry = orig_prev; 315 316 while (entry && offset < entry->start) 317 entry = prev_state(entry); 318 *prev_ret = entry; 319 320 return NULL; 321 } 322 323 /* 324 * Inexact rb-tree search, return the next entry if @offset is not found 325 */ 326 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 327 { 328 return tree_search_for_insert(tree, offset, NULL, NULL); 329 } 330 331 static void __cold extent_io_tree_panic(const struct extent_io_tree *tree, 332 const struct extent_state *state, 333 const char *opname, 334 int err) 335 { 336 btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err, 337 "extent io tree error on %s state start %llu end %llu", 338 opname, state->start, state->end); 339 } 340 341 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 342 { 343 struct extent_state *prev; 344 345 prev = prev_state(state); 346 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 347 if (tree->owner == IO_TREE_INODE_IO) 348 btrfs_merge_delalloc_extent(tree->inode, state, prev); 349 state->start = prev->start; 350 rb_erase(&prev->rb_node, &tree->state); 351 RB_CLEAR_NODE(&prev->rb_node); 352 btrfs_free_extent_state(prev); 353 } 354 } 355 356 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 357 { 358 struct extent_state *next; 359 360 next = next_state(state); 361 if (next && next->start == state->end + 1 && next->state == state->state) { 362 if (tree->owner == IO_TREE_INODE_IO) 363 btrfs_merge_delalloc_extent(tree->inode, state, next); 364 state->end = next->end; 365 rb_erase(&next->rb_node, &tree->state); 366 RB_CLEAR_NODE(&next->rb_node); 367 btrfs_free_extent_state(next); 368 } 369 } 370 371 /* 372 * Utility function to look for merge candidates inside a given range. Any 373 * extents with matching state are merged together into a single extent in the 374 * tree. Extents with EXTENT_IO in their state field are not merged because 375 * the end_io handlers need to be able to do operations on them without 376 * sleeping (or doing allocations/splits). 377 * 378 * This should be called with the tree lock held. 379 */ 380 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 381 { 382 if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 383 return; 384 385 merge_prev_state(tree, state); 386 merge_next_state(tree, state); 387 } 388 389 static void set_state_bits(struct extent_io_tree *tree, 390 struct extent_state *state, 391 u32 bits, struct extent_changeset *changeset) 392 { 393 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 394 int ret; 395 396 if (tree->owner == IO_TREE_INODE_IO) 397 btrfs_set_delalloc_extent(tree->inode, state, bits); 398 399 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 400 BUG_ON(ret < 0); 401 state->state |= bits_to_set; 402 } 403 404 /* 405 * Insert an extent_state struct into the tree. 'bits' are set on the 406 * struct before it is inserted. 407 * 408 * Returns a pointer to the struct extent_state record containing the range 409 * requested for insertion, which may be the same as the given struct or it 410 * may be an existing record in the tree that was expanded to accommodate the 411 * requested range. In case of an extent_state different from the one that was 412 * given, the later can be freed or reused by the caller. 413 * 414 * On error it returns an error pointer. 415 * 416 * The tree lock is not taken internally. This is a utility function and 417 * probably isn't what you want to call (see set/clear_extent_bit). 418 */ 419 static struct extent_state *insert_state(struct extent_io_tree *tree, 420 struct extent_state *state, 421 u32 bits, 422 struct extent_changeset *changeset) 423 { 424 struct rb_node **node; 425 struct rb_node *parent = NULL; 426 const u64 start = state->start - 1; 427 const u64 end = state->end + 1; 428 const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 429 430 set_state_bits(tree, state, bits, changeset); 431 432 node = &tree->state.rb_node; 433 while (*node) { 434 struct extent_state *entry; 435 436 parent = *node; 437 entry = rb_entry(parent, struct extent_state, rb_node); 438 439 if (state->end < entry->start) { 440 if (try_merge && end == entry->start && 441 state->state == entry->state) { 442 if (tree->owner == IO_TREE_INODE_IO) 443 btrfs_merge_delalloc_extent(tree->inode, 444 state, entry); 445 entry->start = state->start; 446 merge_prev_state(tree, entry); 447 state->state = 0; 448 return entry; 449 } 450 node = &(*node)->rb_left; 451 } else if (state->end > entry->end) { 452 if (try_merge && entry->end == start && 453 state->state == entry->state) { 454 if (tree->owner == IO_TREE_INODE_IO) 455 btrfs_merge_delalloc_extent(tree->inode, 456 state, entry); 457 entry->end = state->end; 458 merge_next_state(tree, entry); 459 state->state = 0; 460 return entry; 461 } 462 node = &(*node)->rb_right; 463 } else { 464 return ERR_PTR(-EEXIST); 465 } 466 } 467 468 rb_link_node(&state->rb_node, parent, node); 469 rb_insert_color(&state->rb_node, &tree->state); 470 471 return state; 472 } 473 474 /* 475 * Insert state to @tree to the location given by @node and @parent. 476 */ 477 static void insert_state_fast(struct extent_io_tree *tree, 478 struct extent_state *state, struct rb_node **node, 479 struct rb_node *parent, unsigned bits, 480 struct extent_changeset *changeset) 481 { 482 set_state_bits(tree, state, bits, changeset); 483 rb_link_node(&state->rb_node, parent, node); 484 rb_insert_color(&state->rb_node, &tree->state); 485 merge_state(tree, state); 486 } 487 488 /* 489 * Split a given extent state struct in two, inserting the preallocated 490 * struct 'prealloc' as the newly created second half. 'split' indicates an 491 * offset inside 'orig' where it should be split. 492 * 493 * Before calling, 494 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 495 * are two extent state structs in the tree: 496 * prealloc: [orig->start, split - 1] 497 * orig: [ split, orig->end ] 498 * 499 * The tree locks are not taken by this function. They need to be held 500 * by the caller. 501 */ 502 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 503 struct extent_state *prealloc, u64 split) 504 { 505 struct rb_node *parent = NULL; 506 struct rb_node **node; 507 508 if (tree->owner == IO_TREE_INODE_IO) 509 btrfs_split_delalloc_extent(tree->inode, orig, split); 510 511 prealloc->start = orig->start; 512 prealloc->end = split - 1; 513 prealloc->state = orig->state; 514 orig->start = split; 515 516 parent = &orig->rb_node; 517 node = &parent; 518 while (*node) { 519 struct extent_state *entry; 520 521 parent = *node; 522 entry = rb_entry(parent, struct extent_state, rb_node); 523 524 if (prealloc->end < entry->start) { 525 node = &(*node)->rb_left; 526 } else if (prealloc->end > entry->end) { 527 node = &(*node)->rb_right; 528 } else { 529 btrfs_free_extent_state(prealloc); 530 return -EEXIST; 531 } 532 } 533 534 rb_link_node(&prealloc->rb_node, parent, node); 535 rb_insert_color(&prealloc->rb_node, &tree->state); 536 537 return 0; 538 } 539 540 /* 541 * Use this during tree iteration to avoid doing next node searches when it's 542 * not needed (the current record ends at or after the target range's end). 543 */ 544 static inline struct extent_state *next_search_state(struct extent_state *state, u64 end) 545 { 546 if (state->end < end) 547 return next_state(state); 548 549 return NULL; 550 } 551 552 /* 553 * Utility function to clear some bits in an extent state struct. It will 554 * optionally wake up anyone waiting on this state (wake == 1). 555 * 556 * If no bits are set on the state struct after clearing things, the 557 * struct is freed and removed from the tree 558 */ 559 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 560 struct extent_state *state, 561 u32 bits, int wake, u64 end, 562 struct extent_changeset *changeset) 563 { 564 struct extent_state *next; 565 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 566 int ret; 567 568 if (tree->owner == IO_TREE_INODE_IO) 569 btrfs_clear_delalloc_extent(tree->inode, state, bits); 570 571 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 572 BUG_ON(ret < 0); 573 state->state &= ~bits_to_clear; 574 if (wake) 575 wake_up(&state->wq); 576 if (state->state == 0) { 577 next = next_search_state(state, end); 578 if (extent_state_in_tree(state)) { 579 rb_erase(&state->rb_node, &tree->state); 580 RB_CLEAR_NODE(&state->rb_node); 581 btrfs_free_extent_state(state); 582 } else { 583 WARN_ON(1); 584 } 585 } else { 586 merge_state(tree, state); 587 next = next_search_state(state, end); 588 } 589 return next; 590 } 591 592 /* 593 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 594 * unset the EXTENT_NOWAIT bit. 595 */ 596 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 597 { 598 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 599 *bits &= EXTENT_NOWAIT - 1; 600 } 601 602 /* 603 * Clear some bits on a range in the tree. This may require splitting or 604 * inserting elements in the tree, so the gfp mask is used to indicate which 605 * allocations or sleeping are allowed. 606 * 607 * The range [start, end] is inclusive. 608 * 609 * This takes the tree lock, and returns 0 on success and < 0 on error. 610 */ 611 int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end, 612 u32 bits, struct extent_state **cached_state, 613 struct extent_changeset *changeset) 614 { 615 struct extent_state *state; 616 struct extent_state *cached; 617 struct extent_state *prealloc = NULL; 618 u64 last_end; 619 int ret = 0; 620 bool clear; 621 bool wake; 622 const bool delete = (bits & EXTENT_CLEAR_ALL_BITS); 623 gfp_t mask; 624 625 set_gfp_mask_from_bits(&bits, &mask); 626 btrfs_debug_check_extent_io_range(tree, start, end); 627 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 628 629 if (delete) 630 bits |= ~EXTENT_CTLBITS; 631 632 if (bits & EXTENT_DELALLOC) 633 bits |= EXTENT_NORESERVE; 634 635 wake = (bits & EXTENT_LOCK_BITS); 636 clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 637 again: 638 if (!prealloc) { 639 /* 640 * Don't care for allocation failure here because we might end 641 * up not needing the pre-allocated extent state at all, which 642 * is the case if we only have in the tree extent states that 643 * cover our input range and don't cover too any other range. 644 * If we end up needing a new extent state we allocate it later. 645 */ 646 prealloc = alloc_extent_state(mask); 647 } 648 649 spin_lock(&tree->lock); 650 if (cached_state) { 651 cached = *cached_state; 652 653 if (clear) { 654 *cached_state = NULL; 655 cached_state = NULL; 656 } 657 658 if (cached && extent_state_in_tree(cached) && 659 cached->start <= start && cached->end > start) { 660 if (clear) 661 refcount_dec(&cached->refs); 662 state = cached; 663 goto hit_next; 664 } 665 if (clear) 666 btrfs_free_extent_state(cached); 667 } 668 669 /* This search will find the extents that end after our range starts. */ 670 state = tree_search(tree, start); 671 if (!state) 672 goto out; 673 hit_next: 674 if (state->start > end) 675 goto out; 676 WARN_ON(state->end < start); 677 last_end = state->end; 678 679 /* The state doesn't have the wanted bits, go ahead. */ 680 if (!(state->state & bits)) { 681 state = next_search_state(state, end); 682 goto next; 683 } 684 685 /* 686 * | ---- desired range ---- | 687 * | state | or 688 * | ------------- state -------------- | 689 * 690 * We need to split the extent we found, and may flip bits on second 691 * half. 692 * 693 * If the extent we found extends past our range, we just split and 694 * search again. It'll get split again the next time though. 695 * 696 * If the extent we found is inside our range, we clear the desired bit 697 * on it. 698 */ 699 700 if (state->start < start) { 701 prealloc = alloc_extent_state_atomic(prealloc); 702 if (!prealloc) 703 goto search_again; 704 ret = split_state(tree, state, prealloc, start); 705 prealloc = NULL; 706 if (ret) { 707 extent_io_tree_panic(tree, state, "split", ret); 708 goto out; 709 } 710 if (state->end <= end) { 711 state = clear_state_bit(tree, state, bits, wake, end, 712 changeset); 713 goto next; 714 } 715 if (need_resched()) 716 goto search_again; 717 /* 718 * Fallthrough and try atomic extent state allocation if needed. 719 * If it fails we'll jump to 'search_again' retry the allocation 720 * in non-atomic mode and start the search again. 721 */ 722 } 723 /* 724 * | ---- desired range ---- | 725 * | state | 726 * We need to split the extent, and clear the bit on the first half. 727 */ 728 if (state->start <= end && state->end > end) { 729 prealloc = alloc_extent_state_atomic(prealloc); 730 if (!prealloc) 731 goto search_again; 732 ret = split_state(tree, state, prealloc, end + 1); 733 if (ret) { 734 extent_io_tree_panic(tree, state, "split", ret); 735 prealloc = NULL; 736 goto out; 737 } 738 739 if (wake) 740 wake_up(&state->wq); 741 742 clear_state_bit(tree, prealloc, bits, wake, end, changeset); 743 744 prealloc = NULL; 745 goto out; 746 } 747 748 state = clear_state_bit(tree, state, bits, wake, end, changeset); 749 next: 750 if (last_end >= end) 751 goto out; 752 start = last_end + 1; 753 if (state && !need_resched()) 754 goto hit_next; 755 756 search_again: 757 spin_unlock(&tree->lock); 758 if (gfpflags_allow_blocking(mask)) 759 cond_resched(); 760 goto again; 761 762 out: 763 spin_unlock(&tree->lock); 764 btrfs_free_extent_state(prealloc); 765 766 return ret; 767 768 } 769 770 /* 771 * Wait for one or more bits to clear on a range in the state tree. 772 * The range [start, end] is inclusive. 773 * The tree lock is taken by this function 774 */ 775 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 776 u32 bits, struct extent_state **cached_state) 777 { 778 struct extent_state *state; 779 780 btrfs_debug_check_extent_io_range(tree, start, end); 781 782 spin_lock(&tree->lock); 783 again: 784 /* 785 * Maintain cached_state, as we may not remove it from the tree if there 786 * are more bits than the bits we're waiting on set on this state. 787 */ 788 if (cached_state && *cached_state) { 789 state = *cached_state; 790 if (extent_state_in_tree(state) && 791 state->start <= start && start < state->end) 792 goto process_node; 793 } 794 while (1) { 795 /* 796 * This search will find all the extents that end after our 797 * range starts. 798 */ 799 state = tree_search(tree, start); 800 process_node: 801 if (!state) 802 break; 803 if (state->start > end) 804 goto out; 805 806 if (state->state & bits) { 807 DEFINE_WAIT(wait); 808 809 start = state->start; 810 refcount_inc(&state->refs); 811 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 812 spin_unlock(&tree->lock); 813 schedule(); 814 spin_lock(&tree->lock); 815 finish_wait(&state->wq, &wait); 816 btrfs_free_extent_state(state); 817 goto again; 818 } 819 start = state->end + 1; 820 821 if (start > end) 822 break; 823 824 if (!cond_resched_lock(&tree->lock)) { 825 state = next_state(state); 826 goto process_node; 827 } 828 } 829 out: 830 /* This state is no longer useful, clear it and free it up. */ 831 if (cached_state && *cached_state) { 832 state = *cached_state; 833 *cached_state = NULL; 834 btrfs_free_extent_state(state); 835 } 836 spin_unlock(&tree->lock); 837 } 838 839 static void cache_state_if_flags(struct extent_state *state, 840 struct extent_state **cached_ptr, 841 unsigned flags) 842 { 843 if (cached_ptr && !(*cached_ptr)) { 844 if (!flags || (state->state & flags)) { 845 *cached_ptr = state; 846 refcount_inc(&state->refs); 847 } 848 } 849 } 850 851 static void cache_state(struct extent_state *state, 852 struct extent_state **cached_ptr) 853 { 854 return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); 855 } 856 857 /* 858 * Find the first state struct with 'bits' set after 'start', and return it. 859 * tree->lock must be held. NULL will returned if nothing was found after 860 * 'start'. 861 */ 862 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 863 u64 start, u32 bits) 864 { 865 struct extent_state *state; 866 867 /* 868 * This search will find all the extents that end after our range 869 * starts. 870 */ 871 state = tree_search(tree, start); 872 while (state) { 873 if (state->state & bits) 874 return state; 875 state = next_state(state); 876 } 877 return NULL; 878 } 879 880 /* 881 * Find the first offset in the io tree with one or more @bits set. 882 * 883 * Note: If there are multiple bits set in @bits, any of them will match. 884 * 885 * Return true if we find something, and update @start_ret and @end_ret. 886 * Return false if we found nothing. 887 */ 888 bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start, 889 u64 *start_ret, u64 *end_ret, u32 bits, 890 struct extent_state **cached_state) 891 { 892 struct extent_state *state; 893 bool ret = false; 894 895 spin_lock(&tree->lock); 896 if (cached_state && *cached_state) { 897 state = *cached_state; 898 if (state->end == start - 1 && extent_state_in_tree(state)) { 899 while ((state = next_state(state)) != NULL) { 900 if (state->state & bits) 901 break; 902 } 903 /* 904 * If we found the next extent state, clear cached_state 905 * so that we can cache the next extent state below and 906 * avoid future calls going over the same extent state 907 * again. If we haven't found any, clear as well since 908 * it's now useless. 909 */ 910 btrfs_free_extent_state(*cached_state); 911 *cached_state = NULL; 912 if (state) 913 goto got_it; 914 goto out; 915 } 916 btrfs_free_extent_state(*cached_state); 917 *cached_state = NULL; 918 } 919 920 state = find_first_extent_bit_state(tree, start, bits); 921 got_it: 922 if (state) { 923 cache_state_if_flags(state, cached_state, 0); 924 *start_ret = state->start; 925 *end_ret = state->end; 926 ret = true; 927 } 928 out: 929 spin_unlock(&tree->lock); 930 return ret; 931 } 932 933 /* 934 * Find a contiguous area of bits 935 * 936 * @tree: io tree to check 937 * @start: offset to start the search from 938 * @start_ret: the first offset we found with the bits set 939 * @end_ret: the final contiguous range of the bits that were set 940 * @bits: bits to look for 941 * 942 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 943 * to set bits appropriately, and then merge them again. During this time it 944 * will drop the tree->lock, so use this helper if you want to find the actual 945 * contiguous area for given bits. We will search to the first bit we find, and 946 * then walk down the tree until we find a non-contiguous area. The area 947 * returned will be the full contiguous area with the bits set. 948 * 949 * Returns true if we found a range with the given bits set, in which case 950 * @start_ret and @end_ret are updated, or false if no range was found. 951 */ 952 bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 953 u64 *start_ret, u64 *end_ret, u32 bits) 954 { 955 struct extent_state *state; 956 bool ret = false; 957 958 ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES)); 959 960 spin_lock(&tree->lock); 961 state = find_first_extent_bit_state(tree, start, bits); 962 if (state) { 963 *start_ret = state->start; 964 *end_ret = state->end; 965 while ((state = next_state(state)) != NULL) { 966 if (state->start > (*end_ret + 1)) 967 break; 968 *end_ret = state->end; 969 } 970 ret = true; 971 } 972 spin_unlock(&tree->lock); 973 return ret; 974 } 975 976 /* 977 * Find a contiguous range of bytes in the file marked as delalloc, not more 978 * than 'max_bytes'. start and end are used to return the range, 979 * 980 * True is returned if we find something, false if nothing was in the tree. 981 */ 982 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 983 u64 *end, u64 max_bytes, 984 struct extent_state **cached_state) 985 { 986 struct extent_state *state; 987 u64 cur_start = *start; 988 bool found = false; 989 u64 total_bytes = 0; 990 991 spin_lock(&tree->lock); 992 993 /* 994 * This search will find all the extents that end after our range 995 * starts. 996 */ 997 state = tree_search(tree, cur_start); 998 if (!state) { 999 *end = (u64)-1; 1000 goto out; 1001 } 1002 1003 while (state) { 1004 if (found && (state->start != cur_start || 1005 (state->state & EXTENT_BOUNDARY))) { 1006 goto out; 1007 } 1008 if (!(state->state & EXTENT_DELALLOC)) { 1009 if (!found) 1010 *end = state->end; 1011 goto out; 1012 } 1013 if (!found) { 1014 *start = state->start; 1015 *cached_state = state; 1016 refcount_inc(&state->refs); 1017 } 1018 found = true; 1019 *end = state->end; 1020 cur_start = state->end + 1; 1021 total_bytes += state->end - state->start + 1; 1022 if (total_bytes >= max_bytes) 1023 break; 1024 state = next_state(state); 1025 } 1026 out: 1027 spin_unlock(&tree->lock); 1028 return found; 1029 } 1030 1031 /* 1032 * Set some bits on a range in the tree. This may require allocations or 1033 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1034 * GFP_NOWAIT. 1035 * 1036 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1037 * part of the range already has the desired bits set. The extent_state of the 1038 * existing range is returned in failed_state in this case, and the start of the 1039 * existing range is returned in failed_start. failed_state is used as an 1040 * optimization for wait_extent_bit, failed_start must be used as the source of 1041 * truth as failed_state may have changed since we returned. 1042 * 1043 * [start, end] is inclusive This takes the tree lock. 1044 */ 1045 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1046 u32 bits, u64 *failed_start, 1047 struct extent_state **failed_state, 1048 struct extent_state **cached_state, 1049 struct extent_changeset *changeset) 1050 { 1051 struct extent_state *state; 1052 struct extent_state *prealloc = NULL; 1053 struct rb_node **p = NULL; 1054 struct rb_node *parent = NULL; 1055 int ret = 0; 1056 u64 last_start; 1057 u64 last_end; 1058 u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); 1059 gfp_t mask; 1060 1061 set_gfp_mask_from_bits(&bits, &mask); 1062 btrfs_debug_check_extent_io_range(tree, start, end); 1063 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1064 1065 if (exclusive_bits) 1066 ASSERT(failed_start); 1067 else 1068 ASSERT(failed_start == NULL && failed_state == NULL); 1069 again: 1070 if (!prealloc) { 1071 /* 1072 * Don't care for allocation failure here because we might end 1073 * up not needing the pre-allocated extent state at all, which 1074 * is the case if we only have in the tree extent states that 1075 * cover our input range and don't cover too any other range. 1076 * If we end up needing a new extent state we allocate it later. 1077 */ 1078 prealloc = alloc_extent_state(mask); 1079 } 1080 /* Optimistically preallocate the extent changeset ulist node. */ 1081 if (changeset) 1082 extent_changeset_prealloc(changeset, mask); 1083 1084 spin_lock(&tree->lock); 1085 if (cached_state && *cached_state) { 1086 state = *cached_state; 1087 if (state->start <= start && state->end > start && 1088 extent_state_in_tree(state)) 1089 goto hit_next; 1090 } 1091 /* 1092 * This search will find all the extents that end after our range 1093 * starts. 1094 */ 1095 state = tree_search_for_insert(tree, start, &p, &parent); 1096 if (!state) { 1097 prealloc = alloc_extent_state_atomic(prealloc); 1098 if (!prealloc) 1099 goto search_again; 1100 prealloc->start = start; 1101 prealloc->end = end; 1102 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1103 cache_state(prealloc, cached_state); 1104 prealloc = NULL; 1105 goto out; 1106 } 1107 hit_next: 1108 last_start = state->start; 1109 last_end = state->end; 1110 1111 /* 1112 * | ---- desired range ---- | 1113 * | state | 1114 * 1115 * Just lock what we found and keep going 1116 */ 1117 if (state->start == start && state->end <= end) { 1118 if (state->state & exclusive_bits) { 1119 *failed_start = state->start; 1120 cache_state(state, failed_state); 1121 ret = -EEXIST; 1122 goto out; 1123 } 1124 1125 set_state_bits(tree, state, bits, changeset); 1126 cache_state(state, cached_state); 1127 merge_state(tree, state); 1128 if (last_end >= end) 1129 goto out; 1130 start = last_end + 1; 1131 state = next_state(state); 1132 if (state && state->start == start && !need_resched()) 1133 goto hit_next; 1134 goto search_again; 1135 } 1136 1137 /* 1138 * | ---- desired range ---- | 1139 * | state | 1140 * or 1141 * | ------------- state -------------- | 1142 * 1143 * We need to split the extent we found, and may flip bits on second 1144 * half. 1145 * 1146 * If the extent we found extends past our range, we just split and 1147 * search again. It'll get split again the next time though. 1148 * 1149 * If the extent we found is inside our range, we set the desired bit 1150 * on it. 1151 */ 1152 if (state->start < start) { 1153 if (state->state & exclusive_bits) { 1154 *failed_start = start; 1155 cache_state(state, failed_state); 1156 ret = -EEXIST; 1157 goto out; 1158 } 1159 1160 /* 1161 * If this extent already has all the bits we want set, then 1162 * skip it, not necessary to split it or do anything with it. 1163 */ 1164 if ((state->state & bits) == bits) { 1165 start = state->end + 1; 1166 cache_state(state, cached_state); 1167 goto search_again; 1168 } 1169 1170 prealloc = alloc_extent_state_atomic(prealloc); 1171 if (!prealloc) 1172 goto search_again; 1173 ret = split_state(tree, state, prealloc, start); 1174 if (ret) 1175 extent_io_tree_panic(tree, state, "split", ret); 1176 1177 prealloc = NULL; 1178 if (ret) 1179 goto out; 1180 if (state->end <= end) { 1181 set_state_bits(tree, state, bits, changeset); 1182 cache_state(state, cached_state); 1183 merge_state(tree, state); 1184 if (last_end >= end) 1185 goto out; 1186 start = last_end + 1; 1187 state = next_state(state); 1188 if (state && state->start == start && !need_resched()) 1189 goto hit_next; 1190 } 1191 goto search_again; 1192 } 1193 /* 1194 * | ---- desired range ---- | 1195 * | state | or | state | 1196 * 1197 * There's a hole, we need to insert something in it and ignore the 1198 * extent we found. 1199 */ 1200 if (state->start > start) { 1201 struct extent_state *inserted_state; 1202 1203 prealloc = alloc_extent_state_atomic(prealloc); 1204 if (!prealloc) 1205 goto search_again; 1206 1207 /* 1208 * Avoid to free 'prealloc' if it can be merged with the later 1209 * extent. 1210 */ 1211 prealloc->start = start; 1212 if (end < last_start) 1213 prealloc->end = end; 1214 else 1215 prealloc->end = last_start - 1; 1216 1217 inserted_state = insert_state(tree, prealloc, bits, changeset); 1218 if (IS_ERR(inserted_state)) { 1219 ret = PTR_ERR(inserted_state); 1220 extent_io_tree_panic(tree, prealloc, "insert", ret); 1221 goto out; 1222 } 1223 1224 cache_state(inserted_state, cached_state); 1225 if (inserted_state == prealloc) 1226 prealloc = NULL; 1227 start = inserted_state->end + 1; 1228 1229 /* Beyond target range, stop. */ 1230 if (start > end) 1231 goto out; 1232 1233 if (need_resched()) 1234 goto search_again; 1235 1236 state = next_search_state(inserted_state, end); 1237 /* 1238 * If there's a next state, whether contiguous or not, we don't 1239 * need to unlock and start search agian. If it's not contiguous 1240 * we will end up here and try to allocate a prealloc state and insert. 1241 */ 1242 if (state) 1243 goto hit_next; 1244 goto search_again; 1245 } 1246 /* 1247 * | ---- desired range ---- | 1248 * | state | 1249 * 1250 * We need to split the extent, and set the bit on the first half 1251 */ 1252 if (state->start <= end && state->end > end) { 1253 if (state->state & exclusive_bits) { 1254 *failed_start = start; 1255 cache_state(state, failed_state); 1256 ret = -EEXIST; 1257 goto out; 1258 } 1259 1260 prealloc = alloc_extent_state_atomic(prealloc); 1261 if (!prealloc) 1262 goto search_again; 1263 ret = split_state(tree, state, prealloc, end + 1); 1264 if (ret) { 1265 extent_io_tree_panic(tree, state, "split", ret); 1266 prealloc = NULL; 1267 goto out; 1268 } 1269 1270 set_state_bits(tree, prealloc, bits, changeset); 1271 cache_state(prealloc, cached_state); 1272 merge_state(tree, prealloc); 1273 prealloc = NULL; 1274 goto out; 1275 } 1276 1277 search_again: 1278 if (start > end) 1279 goto out; 1280 spin_unlock(&tree->lock); 1281 if (gfpflags_allow_blocking(mask)) 1282 cond_resched(); 1283 goto again; 1284 1285 out: 1286 spin_unlock(&tree->lock); 1287 btrfs_free_extent_state(prealloc); 1288 1289 return ret; 1290 1291 } 1292 1293 int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1294 u32 bits, struct extent_state **cached_state) 1295 { 1296 return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL); 1297 } 1298 1299 /* 1300 * Convert all bits in a given range from one bit to another 1301 * 1302 * @tree: the io tree to search 1303 * @start: the start offset in bytes 1304 * @end: the end offset in bytes (inclusive) 1305 * @bits: the bits to set in this range 1306 * @clear_bits: the bits to clear in this range 1307 * @cached_state: state that we're going to cache 1308 * 1309 * This will go through and set bits for the given range. If any states exist 1310 * already in this range they are set with the given bit and cleared of the 1311 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1312 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1313 * boundary bits like LOCK. 1314 * 1315 * All allocations are done with GFP_NOFS. 1316 */ 1317 int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1318 u32 bits, u32 clear_bits, 1319 struct extent_state **cached_state) 1320 { 1321 struct extent_state *state; 1322 struct extent_state *prealloc = NULL; 1323 struct rb_node **p = NULL; 1324 struct rb_node *parent = NULL; 1325 int ret = 0; 1326 u64 last_start; 1327 u64 last_end; 1328 bool first_iteration = true; 1329 1330 btrfs_debug_check_extent_io_range(tree, start, end); 1331 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1332 clear_bits); 1333 1334 again: 1335 if (!prealloc) { 1336 /* 1337 * Best effort, don't worry if extent state allocation fails 1338 * here for the first iteration. We might have a cached state 1339 * that matches exactly the target range, in which case no 1340 * extent state allocations are needed. We'll only know this 1341 * after locking the tree. 1342 */ 1343 prealloc = alloc_extent_state(GFP_NOFS); 1344 if (!prealloc && !first_iteration) 1345 return -ENOMEM; 1346 } 1347 1348 spin_lock(&tree->lock); 1349 if (cached_state && *cached_state) { 1350 state = *cached_state; 1351 if (state->start <= start && state->end > start && 1352 extent_state_in_tree(state)) 1353 goto hit_next; 1354 } 1355 1356 /* 1357 * This search will find all the extents that end after our range 1358 * starts. 1359 */ 1360 state = tree_search_for_insert(tree, start, &p, &parent); 1361 if (!state) { 1362 prealloc = alloc_extent_state_atomic(prealloc); 1363 if (!prealloc) { 1364 ret = -ENOMEM; 1365 goto out; 1366 } 1367 prealloc->start = start; 1368 prealloc->end = end; 1369 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1370 cache_state(prealloc, cached_state); 1371 prealloc = NULL; 1372 goto out; 1373 } 1374 hit_next: 1375 last_start = state->start; 1376 last_end = state->end; 1377 1378 /* 1379 * | ---- desired range ---- | 1380 * | state | 1381 * 1382 * Just lock what we found and keep going. 1383 */ 1384 if (state->start == start && state->end <= end) { 1385 set_state_bits(tree, state, bits, NULL); 1386 cache_state(state, cached_state); 1387 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1388 if (last_end >= end) 1389 goto out; 1390 start = last_end + 1; 1391 if (state && state->start == start && !need_resched()) 1392 goto hit_next; 1393 goto search_again; 1394 } 1395 1396 /* 1397 * | ---- desired range ---- | 1398 * | state | 1399 * or 1400 * | ------------- state -------------- | 1401 * 1402 * We need to split the extent we found, and may flip bits on second 1403 * half. 1404 * 1405 * If the extent we found extends past our range, we just split and 1406 * search again. It'll get split again the next time though. 1407 * 1408 * If the extent we found is inside our range, we set the desired bit 1409 * on it. 1410 */ 1411 if (state->start < start) { 1412 prealloc = alloc_extent_state_atomic(prealloc); 1413 if (!prealloc) { 1414 ret = -ENOMEM; 1415 goto out; 1416 } 1417 ret = split_state(tree, state, prealloc, start); 1418 prealloc = NULL; 1419 if (ret) { 1420 extent_io_tree_panic(tree, state, "split", ret); 1421 goto out; 1422 } 1423 if (state->end <= end) { 1424 set_state_bits(tree, state, bits, NULL); 1425 cache_state(state, cached_state); 1426 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1427 if (last_end >= end) 1428 goto out; 1429 start = last_end + 1; 1430 if (state && state->start == start && !need_resched()) 1431 goto hit_next; 1432 } 1433 goto search_again; 1434 } 1435 /* 1436 * | ---- desired range ---- | 1437 * | state | or | state | 1438 * 1439 * There's a hole, we need to insert something in it and ignore the 1440 * extent we found. 1441 */ 1442 if (state->start > start) { 1443 struct extent_state *inserted_state; 1444 1445 prealloc = alloc_extent_state_atomic(prealloc); 1446 if (!prealloc) { 1447 ret = -ENOMEM; 1448 goto out; 1449 } 1450 1451 /* 1452 * Avoid to free 'prealloc' if it can be merged with the later 1453 * extent. 1454 */ 1455 prealloc->start = start; 1456 if (end < last_start) 1457 prealloc->end = end; 1458 else 1459 prealloc->end = last_start - 1; 1460 1461 inserted_state = insert_state(tree, prealloc, bits, NULL); 1462 if (IS_ERR(inserted_state)) { 1463 ret = PTR_ERR(inserted_state); 1464 extent_io_tree_panic(tree, prealloc, "insert", ret); 1465 goto out; 1466 } 1467 cache_state(inserted_state, cached_state); 1468 if (inserted_state == prealloc) 1469 prealloc = NULL; 1470 start = inserted_state->end + 1; 1471 1472 /* Beyond target range, stop. */ 1473 if (start > end) 1474 goto out; 1475 1476 if (need_resched()) 1477 goto search_again; 1478 1479 state = next_search_state(inserted_state, end); 1480 /* 1481 * If there's a next state, whether contiguous or not, we don't 1482 * need to unlock and start search again. If it's not contiguous 1483 * we will end up here and try to allocate a prealloc state and insert. 1484 */ 1485 if (state) 1486 goto hit_next; 1487 goto search_again; 1488 } 1489 /* 1490 * | ---- desired range ---- | 1491 * | state | 1492 * 1493 * We need to split the extent, and set the bit on the first half. 1494 */ 1495 if (state->start <= end && state->end > end) { 1496 prealloc = alloc_extent_state_atomic(prealloc); 1497 if (!prealloc) { 1498 ret = -ENOMEM; 1499 goto out; 1500 } 1501 1502 ret = split_state(tree, state, prealloc, end + 1); 1503 if (ret) { 1504 extent_io_tree_panic(tree, state, "split", ret); 1505 prealloc = NULL; 1506 goto out; 1507 } 1508 1509 set_state_bits(tree, prealloc, bits, NULL); 1510 cache_state(prealloc, cached_state); 1511 clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL); 1512 prealloc = NULL; 1513 goto out; 1514 } 1515 1516 search_again: 1517 if (start > end) 1518 goto out; 1519 spin_unlock(&tree->lock); 1520 cond_resched(); 1521 first_iteration = false; 1522 goto again; 1523 1524 out: 1525 spin_unlock(&tree->lock); 1526 btrfs_free_extent_state(prealloc); 1527 1528 return ret; 1529 } 1530 1531 /* 1532 * Find the first range that has @bits not set. This range could start before 1533 * @start. 1534 * 1535 * @tree: the tree to search 1536 * @start: offset at/after which the found extent should start 1537 * @start_ret: records the beginning of the range 1538 * @end_ret: records the end of the range (inclusive) 1539 * @bits: the set of bits which must be unset 1540 * 1541 * Since unallocated range is also considered one which doesn't have the bits 1542 * set it's possible that @end_ret contains -1, this happens in case the range 1543 * spans (last_range_end, end of device]. In this case it's up to the caller to 1544 * trim @end_ret to the appropriate size. 1545 */ 1546 void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1547 u64 *start_ret, u64 *end_ret, u32 bits) 1548 { 1549 struct extent_state *state; 1550 struct extent_state *prev = NULL, *next = NULL; 1551 1552 spin_lock(&tree->lock); 1553 1554 /* Find first extent with bits cleared */ 1555 while (1) { 1556 state = tree_search_prev_next(tree, start, &prev, &next); 1557 if (!state && !next && !prev) { 1558 /* 1559 * Tree is completely empty, send full range and let 1560 * caller deal with it 1561 */ 1562 *start_ret = 0; 1563 *end_ret = -1; 1564 goto out; 1565 } else if (!state && !next) { 1566 /* 1567 * We are past the last allocated chunk, set start at 1568 * the end of the last extent. 1569 */ 1570 *start_ret = prev->end + 1; 1571 *end_ret = -1; 1572 goto out; 1573 } else if (!state) { 1574 state = next; 1575 } 1576 1577 /* 1578 * At this point 'state' either contains 'start' or start is 1579 * before 'state' 1580 */ 1581 if (in_range(start, state->start, state->end - state->start + 1)) { 1582 if (state->state & bits) { 1583 /* 1584 * |--range with bits sets--| 1585 * | 1586 * start 1587 */ 1588 start = state->end + 1; 1589 } else { 1590 /* 1591 * 'start' falls within a range that doesn't 1592 * have the bits set, so take its start as the 1593 * beginning of the desired range 1594 * 1595 * |--range with bits cleared----| 1596 * | 1597 * start 1598 */ 1599 *start_ret = state->start; 1600 break; 1601 } 1602 } else { 1603 /* 1604 * |---prev range---|---hole/unset---|---node range---| 1605 * | 1606 * start 1607 * 1608 * or 1609 * 1610 * |---hole/unset--||--first node--| 1611 * 0 | 1612 * start 1613 */ 1614 if (prev) 1615 *start_ret = prev->end + 1; 1616 else 1617 *start_ret = 0; 1618 break; 1619 } 1620 } 1621 1622 /* 1623 * Find the longest stretch from start until an entry which has the 1624 * bits set 1625 */ 1626 while (state) { 1627 if (state->end >= start && !(state->state & bits)) { 1628 *end_ret = state->end; 1629 } else { 1630 *end_ret = state->start - 1; 1631 break; 1632 } 1633 state = next_state(state); 1634 } 1635 out: 1636 spin_unlock(&tree->lock); 1637 } 1638 1639 /* 1640 * Count the number of bytes in the tree that have a given bit(s) set for a 1641 * given range. 1642 * 1643 * @tree: The io tree to search. 1644 * @start: The start offset of the range. This value is updated to the 1645 * offset of the first byte found with the given bit(s), so it 1646 * can end up being bigger than the initial value. 1647 * @search_end: The end offset (inclusive value) of the search range. 1648 * @max_bytes: The maximum byte count we are interested. The search stops 1649 * once it reaches this count. 1650 * @bits: The bits the range must have in order to be accounted for. 1651 * If multiple bits are set, then only subranges that have all 1652 * the bits set are accounted for. 1653 * @contig: Indicate if we should ignore holes in the range or not. If 1654 * this is true, then stop once we find a hole. 1655 * @cached_state: A cached state to be used across multiple calls to this 1656 * function in order to speedup searches. Use NULL if this is 1657 * called only once or if each call does not start where the 1658 * previous one ended. 1659 * 1660 * Returns the total number of bytes found within the given range that have 1661 * all given bits set. If the returned number of bytes is greater than zero 1662 * then @start is updated with the offset of the first byte with the bits set. 1663 */ 1664 u64 btrfs_count_range_bits(struct extent_io_tree *tree, 1665 u64 *start, u64 search_end, u64 max_bytes, 1666 u32 bits, int contig, 1667 struct extent_state **cached_state) 1668 { 1669 struct extent_state *state = NULL; 1670 struct extent_state *cached; 1671 u64 cur_start = *start; 1672 u64 total_bytes = 0; 1673 u64 last = 0; 1674 int found = 0; 1675 1676 if (WARN_ON(search_end < cur_start)) 1677 return 0; 1678 1679 spin_lock(&tree->lock); 1680 1681 if (!cached_state || !*cached_state) 1682 goto search; 1683 1684 cached = *cached_state; 1685 1686 if (!extent_state_in_tree(cached)) 1687 goto search; 1688 1689 if (cached->start <= cur_start && cur_start <= cached->end) { 1690 state = cached; 1691 } else if (cached->start > cur_start) { 1692 struct extent_state *prev; 1693 1694 /* 1695 * The cached state starts after our search range's start. Check 1696 * if the previous state record starts at or before the range we 1697 * are looking for, and if so, use it - this is a common case 1698 * when there are holes between records in the tree. If there is 1699 * no previous state record, we can start from our cached state. 1700 */ 1701 prev = prev_state(cached); 1702 if (!prev) 1703 state = cached; 1704 else if (prev->start <= cur_start && cur_start <= prev->end) 1705 state = prev; 1706 } 1707 1708 /* 1709 * This search will find all the extents that end after our range 1710 * starts. 1711 */ 1712 search: 1713 if (!state) 1714 state = tree_search(tree, cur_start); 1715 1716 while (state) { 1717 if (state->start > search_end) 1718 break; 1719 if (contig && found && state->start > last + 1) 1720 break; 1721 if (state->end >= cur_start && (state->state & bits) == bits) { 1722 total_bytes += min(search_end, state->end) + 1 - 1723 max(cur_start, state->start); 1724 if (total_bytes >= max_bytes) 1725 break; 1726 if (!found) { 1727 *start = max(cur_start, state->start); 1728 found = 1; 1729 } 1730 last = state->end; 1731 } else if (contig && found) { 1732 break; 1733 } 1734 state = next_state(state); 1735 } 1736 1737 if (cached_state) { 1738 btrfs_free_extent_state(*cached_state); 1739 *cached_state = state; 1740 if (state) 1741 refcount_inc(&state->refs); 1742 } 1743 1744 spin_unlock(&tree->lock); 1745 1746 return total_bytes; 1747 } 1748 1749 /* 1750 * Check if the single @bit exists in the given range. 1751 */ 1752 bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1753 { 1754 struct extent_state *state; 1755 bool bitset = false; 1756 1757 ASSERT(is_power_of_2(bit)); 1758 1759 spin_lock(&tree->lock); 1760 state = tree_search(tree, start); 1761 while (state) { 1762 if (state->start > end) 1763 break; 1764 1765 if (state->state & bit) { 1766 bitset = true; 1767 break; 1768 } 1769 1770 if (state->end >= end) 1771 break; 1772 state = next_state(state); 1773 } 1774 spin_unlock(&tree->lock); 1775 return bitset; 1776 } 1777 1778 void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits, 1779 struct extent_state **cached_state) 1780 { 1781 struct extent_state *state; 1782 1783 /* 1784 * The cached state is currently mandatory and not used to start the 1785 * search, only to cache the first state record found in the range. 1786 */ 1787 ASSERT(cached_state != NULL); 1788 ASSERT(*cached_state == NULL); 1789 1790 *bits = 0; 1791 1792 spin_lock(&tree->lock); 1793 state = tree_search(tree, start); 1794 if (state && state->start < end) { 1795 *cached_state = state; 1796 refcount_inc(&state->refs); 1797 } 1798 while (state) { 1799 if (state->start > end) 1800 break; 1801 1802 *bits |= state->state; 1803 1804 if (state->end >= end) 1805 break; 1806 1807 state = next_state(state); 1808 } 1809 spin_unlock(&tree->lock); 1810 } 1811 1812 /* 1813 * Check if the whole range [@start,@end) contains the single @bit set. 1814 */ 1815 bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1816 struct extent_state *cached) 1817 { 1818 struct extent_state *state; 1819 bool bitset = true; 1820 1821 ASSERT(is_power_of_2(bit)); 1822 ASSERT(start < end); 1823 1824 spin_lock(&tree->lock); 1825 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1826 cached->end > start) 1827 state = cached; 1828 else 1829 state = tree_search(tree, start); 1830 while (state) { 1831 if (state->start > start) { 1832 bitset = false; 1833 break; 1834 } 1835 1836 if ((state->state & bit) == 0) { 1837 bitset = false; 1838 break; 1839 } 1840 1841 if (state->end >= end) 1842 break; 1843 1844 /* Next state must start where this one ends. */ 1845 start = state->end + 1; 1846 state = next_state(state); 1847 } 1848 1849 /* We ran out of states and were still inside of our range. */ 1850 if (!state) 1851 bitset = false; 1852 spin_unlock(&tree->lock); 1853 return bitset; 1854 } 1855 1856 /* Wrappers around set/clear extent bit */ 1857 int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1858 u32 bits, struct extent_changeset *changeset) 1859 { 1860 /* 1861 * We don't support EXTENT_LOCK_BITS yet, as current changeset will 1862 * record any bits changed, so for EXTENT_LOCK_BITS case, it will either 1863 * fail with -EEXIST or changeset will record the whole range. 1864 */ 1865 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1866 1867 return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1868 } 1869 1870 int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1871 u32 bits, struct extent_changeset *changeset) 1872 { 1873 /* 1874 * Don't support EXTENT_LOCK_BITS case, same reason as 1875 * set_record_extent_bits(). 1876 */ 1877 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1878 1879 return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset); 1880 } 1881 1882 bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1883 u32 bits, struct extent_state **cached) 1884 { 1885 int err; 1886 u64 failed_start; 1887 1888 err = set_extent_bit(tree, start, end, bits, &failed_start, NULL, 1889 cached, NULL); 1890 if (err == -EEXIST) { 1891 if (failed_start > start) 1892 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1893 bits, cached); 1894 return 0; 1895 } 1896 return 1; 1897 } 1898 1899 /* 1900 * Either insert or lock state struct between start and end use mask to tell 1901 * us if waiting is desired. 1902 */ 1903 int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1904 struct extent_state **cached_state) 1905 { 1906 struct extent_state *failed_state = NULL; 1907 int err; 1908 u64 failed_start; 1909 1910 err = set_extent_bit(tree, start, end, bits, &failed_start, 1911 &failed_state, cached_state, NULL); 1912 while (err == -EEXIST) { 1913 if (failed_start != start) 1914 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1915 bits, cached_state); 1916 1917 wait_extent_bit(tree, failed_start, end, bits, &failed_state); 1918 err = set_extent_bit(tree, start, end, bits, &failed_start, 1919 &failed_state, cached_state, NULL); 1920 } 1921 return err; 1922 } 1923 1924 /* 1925 * Get the extent state that follows the given extent state. 1926 * This is meant to be used in a context where we know no other tasks can 1927 * concurrently modify the tree. 1928 */ 1929 struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree, 1930 struct extent_state *state) 1931 { 1932 struct extent_state *next; 1933 1934 spin_lock(&tree->lock); 1935 ASSERT(extent_state_in_tree(state)); 1936 next = next_state(state); 1937 if (next) 1938 refcount_inc(&next->refs); 1939 spin_unlock(&tree->lock); 1940 1941 return next; 1942 } 1943 1944 void __cold btrfs_extent_state_free_cachep(void) 1945 { 1946 btrfs_extent_state_leak_debug_check(); 1947 kmem_cache_destroy(extent_state_cache); 1948 } 1949 1950 int __init btrfs_extent_state_init_cachep(void) 1951 { 1952 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1953 sizeof(struct extent_state), 0, 0, 1954 NULL); 1955 if (!extent_state_cache) 1956 return -ENOMEM; 1957 1958 return 0; 1959 } 1960