1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent_io.h" 8 #include "extent-io-tree.h" 9 #include "btrfs_inode.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_first_entry(&states, struct extent_state, leak_list); 46 btrfs_err(NULL, 47 "state leak: start %llu end %llu state %u in tree %d refs %d", 48 state->start, state->end, state->state, 49 extent_state_in_tree(state), 50 refcount_read(&state->refs)); 51 list_del(&state->leak_list); 52 WARN_ON_ONCE(1); 53 kmem_cache_free(extent_state_cache, state); 54 } 55 } 56 57 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 58 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 59 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 60 struct extent_io_tree *tree, 61 u64 start, u64 end) 62 { 63 const struct btrfs_inode *inode = tree->inode; 64 u64 isize; 65 66 if (tree->owner != IO_TREE_INODE_IO) 67 return; 68 69 isize = i_size_read(&inode->vfs_inode); 70 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 71 btrfs_debug_rl(inode->root->fs_info, 72 "%s: ino %llu isize %llu odd range [%llu,%llu]", 73 caller, btrfs_ino(inode), isize, start, end); 74 } 75 } 76 #else 77 #define btrfs_leak_debug_add_state(state) do {} while (0) 78 #define btrfs_leak_debug_del_state(state) do {} while (0) 79 #define btrfs_extent_state_leak_debug_check() do {} while (0) 80 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 81 #endif 82 83 /* Read-only access to the inode. */ 84 const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree) 85 { 86 if (tree->owner == IO_TREE_INODE_IO) 87 return tree->inode; 88 return NULL; 89 } 90 91 /* For read-only access to fs_info. */ 92 const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 93 { 94 if (tree->owner == IO_TREE_INODE_IO) 95 return tree->inode->root->fs_info; 96 return tree->fs_info; 97 } 98 99 void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info, 100 struct extent_io_tree *tree, unsigned int owner) 101 { 102 tree->state = RB_ROOT; 103 spin_lock_init(&tree->lock); 104 tree->fs_info = fs_info; 105 tree->owner = owner; 106 } 107 108 /* 109 * Empty an io tree, removing and freeing every extent state record from the 110 * tree. This should be called once we are sure no other task can access the 111 * tree anymore, so no tree updates happen after we empty the tree and there 112 * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never 113 * set on any extent state when calling this function). 114 */ 115 void btrfs_extent_io_tree_release(struct extent_io_tree *tree) 116 { 117 struct rb_root root; 118 struct extent_state *state; 119 struct extent_state *tmp; 120 121 spin_lock(&tree->lock); 122 root = tree->state; 123 tree->state = RB_ROOT; 124 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 125 /* Clear node to keep free_extent_state() happy. */ 126 RB_CLEAR_NODE(&state->rb_node); 127 ASSERT(!(state->state & EXTENT_LOCK_BITS)); 128 /* 129 * No need for a memory barrier here, as we are holding the tree 130 * lock and we only change the waitqueue while holding that lock 131 * (see wait_extent_bit()). 132 */ 133 ASSERT(!waitqueue_active(&state->wq)); 134 btrfs_free_extent_state(state); 135 cond_resched_lock(&tree->lock); 136 } 137 /* 138 * Should still be empty even after a reschedule, no other task should 139 * be accessing the tree anymore. 140 */ 141 ASSERT(RB_EMPTY_ROOT(&tree->state)); 142 spin_unlock(&tree->lock); 143 } 144 145 static struct extent_state *alloc_extent_state(gfp_t mask) 146 { 147 struct extent_state *state; 148 149 /* 150 * The given mask might be not appropriate for the slab allocator, 151 * drop the unsupported bits 152 */ 153 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 154 state = kmem_cache_alloc(extent_state_cache, mask); 155 if (!state) 156 return state; 157 state->state = 0; 158 RB_CLEAR_NODE(&state->rb_node); 159 btrfs_leak_debug_add_state(state); 160 refcount_set(&state->refs, 1); 161 init_waitqueue_head(&state->wq); 162 trace_btrfs_alloc_extent_state(state, mask, _RET_IP_); 163 return state; 164 } 165 166 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 167 { 168 if (!prealloc) 169 prealloc = alloc_extent_state(GFP_ATOMIC); 170 171 return prealloc; 172 } 173 174 void btrfs_free_extent_state(struct extent_state *state) 175 { 176 if (!state) 177 return; 178 if (refcount_dec_and_test(&state->refs)) { 179 WARN_ON(extent_state_in_tree(state)); 180 btrfs_leak_debug_del_state(state); 181 trace_btrfs_free_extent_state(state, _RET_IP_); 182 kmem_cache_free(extent_state_cache, state); 183 } 184 } 185 186 static int add_extent_changeset(struct extent_state *state, u32 bits, 187 struct extent_changeset *changeset, 188 int set) 189 { 190 if (!changeset) 191 return 0; 192 if (set && (state->state & bits) == bits) 193 return 0; 194 if (!set && (state->state & bits) == 0) 195 return 0; 196 changeset->bytes_changed += state->end - state->start + 1; 197 198 return ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC); 199 } 200 201 static inline struct extent_state *next_state(struct extent_state *state) 202 { 203 struct rb_node *next = rb_next(&state->rb_node); 204 205 return rb_entry_safe(next, struct extent_state, rb_node); 206 } 207 208 static inline struct extent_state *prev_state(struct extent_state *state) 209 { 210 struct rb_node *next = rb_prev(&state->rb_node); 211 212 return rb_entry_safe(next, struct extent_state, rb_node); 213 } 214 215 /* 216 * Search @tree for an entry that contains @offset or if none exists for the 217 * first entry that starts and ends after that offset. 218 * 219 * @tree: the tree to search 220 * @offset: search offset 221 * @node_ret: pointer where new node should be anchored (used when inserting an 222 * entry in the tree) 223 * @parent_ret: points to entry which would have been the parent of the entry, 224 * containing @offset 225 * 226 * Return a pointer to the entry that contains @offset byte address. 227 * 228 * If no such entry exists, return the first entry that starts and ends after 229 * @offset if one exists, otherwise NULL. 230 * 231 * If the returned entry starts at @offset, then @node_ret and @parent_ret 232 * aren't changed. 233 */ 234 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 235 u64 offset, 236 struct rb_node ***node_ret, 237 struct rb_node **parent_ret) 238 { 239 struct rb_root *root = &tree->state; 240 struct rb_node **node = &root->rb_node; 241 struct rb_node *prev = NULL; 242 struct extent_state *entry = NULL; 243 244 while (*node) { 245 prev = *node; 246 entry = rb_entry(prev, struct extent_state, rb_node); 247 248 if (offset < entry->start) 249 node = &(*node)->rb_left; 250 else if (offset > entry->end) 251 node = &(*node)->rb_right; 252 else 253 return entry; 254 } 255 256 if (node_ret) 257 *node_ret = node; 258 if (parent_ret) 259 *parent_ret = prev; 260 261 /* 262 * Return either the current entry if it contains offset (it ends after 263 * or at offset) or the first entry that starts and ends after offset if 264 * one exists, or NULL. 265 */ 266 while (entry && offset > entry->end) 267 entry = next_state(entry); 268 269 return entry; 270 } 271 272 /* 273 * Search offset in the tree or fill neighbor rbtree node pointers. 274 * 275 * @tree: the tree to search 276 * @offset: offset that should fall within an entry in @tree 277 * @next_ret: pointer to the first entry whose range ends after @offset 278 * @prev_ret: pointer to the first entry whose range begins before @offset 279 * 280 * Return a pointer to the entry that contains @offset byte address. If no 281 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 282 * Otherwise return the found entry and other pointers are left untouched. 283 */ 284 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 285 u64 offset, 286 struct extent_state **prev_ret, 287 struct extent_state **next_ret) 288 { 289 struct rb_root *root = &tree->state; 290 struct rb_node **node = &root->rb_node; 291 struct extent_state *orig_prev; 292 struct extent_state *entry = NULL; 293 294 ASSERT(prev_ret); 295 ASSERT(next_ret); 296 297 while (*node) { 298 entry = rb_entry(*node, struct extent_state, rb_node); 299 300 if (offset < entry->start) 301 node = &(*node)->rb_left; 302 else if (offset > entry->end) 303 node = &(*node)->rb_right; 304 else 305 return entry; 306 } 307 308 orig_prev = entry; 309 while (entry && offset > entry->end) 310 entry = next_state(entry); 311 *next_ret = entry; 312 entry = orig_prev; 313 314 while (entry && offset < entry->start) 315 entry = prev_state(entry); 316 *prev_ret = entry; 317 318 return NULL; 319 } 320 321 /* 322 * Inexact rb-tree search, return the next entry if @offset is not found 323 */ 324 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 325 { 326 return tree_search_for_insert(tree, offset, NULL, NULL); 327 } 328 329 static void __cold extent_io_tree_panic(const struct extent_io_tree *tree, 330 const struct extent_state *state, 331 const char *opname, 332 int err) 333 { 334 btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err, 335 "extent io tree error on %s state start %llu end %llu", 336 opname, state->start, state->end); 337 } 338 339 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 340 { 341 struct extent_state *prev; 342 343 prev = prev_state(state); 344 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 345 if (tree->owner == IO_TREE_INODE_IO) 346 btrfs_merge_delalloc_extent(tree->inode, state, prev); 347 state->start = prev->start; 348 rb_erase(&prev->rb_node, &tree->state); 349 RB_CLEAR_NODE(&prev->rb_node); 350 btrfs_free_extent_state(prev); 351 } 352 } 353 354 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 355 { 356 struct extent_state *next; 357 358 next = next_state(state); 359 if (next && next->start == state->end + 1 && next->state == state->state) { 360 if (tree->owner == IO_TREE_INODE_IO) 361 btrfs_merge_delalloc_extent(tree->inode, state, next); 362 state->end = next->end; 363 rb_erase(&next->rb_node, &tree->state); 364 RB_CLEAR_NODE(&next->rb_node); 365 btrfs_free_extent_state(next); 366 } 367 } 368 369 /* 370 * Utility function to look for merge candidates inside a given range. Any 371 * extents with matching state are merged together into a single extent in the 372 * tree. Extents with EXTENT_IO in their state field are not merged because 373 * the end_io handlers need to be able to do operations on them without 374 * sleeping (or doing allocations/splits). 375 * 376 * This should be called with the tree lock held. 377 */ 378 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 379 { 380 if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 381 return; 382 383 merge_prev_state(tree, state); 384 merge_next_state(tree, state); 385 } 386 387 static void set_state_bits(struct extent_io_tree *tree, 388 struct extent_state *state, 389 u32 bits, struct extent_changeset *changeset) 390 { 391 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 392 int ret; 393 394 if (tree->owner == IO_TREE_INODE_IO) 395 btrfs_set_delalloc_extent(tree->inode, state, bits); 396 397 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 398 BUG_ON(ret < 0); 399 state->state |= bits_to_set; 400 } 401 402 /* 403 * Insert an extent_state struct into the tree. 'bits' are set on the 404 * struct before it is inserted. 405 * 406 * Returns a pointer to the struct extent_state record containing the range 407 * requested for insertion, which may be the same as the given struct or it 408 * may be an existing record in the tree that was expanded to accommodate the 409 * requested range. In case of an extent_state different from the one that was 410 * given, the later can be freed or reused by the caller. 411 * 412 * On error it returns an error pointer. 413 * 414 * The tree lock is not taken internally. This is a utility function and 415 * probably isn't what you want to call (see set/clear_extent_bit). 416 */ 417 static struct extent_state *insert_state(struct extent_io_tree *tree, 418 struct extent_state *state, 419 u32 bits, 420 struct extent_changeset *changeset) 421 { 422 struct rb_node **node; 423 struct rb_node *parent = NULL; 424 const u64 start = state->start - 1; 425 const u64 end = state->end + 1; 426 const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 427 428 set_state_bits(tree, state, bits, changeset); 429 430 node = &tree->state.rb_node; 431 while (*node) { 432 struct extent_state *entry; 433 434 parent = *node; 435 entry = rb_entry(parent, struct extent_state, rb_node); 436 437 if (state->end < entry->start) { 438 if (try_merge && end == entry->start && 439 state->state == entry->state) { 440 if (tree->owner == IO_TREE_INODE_IO) 441 btrfs_merge_delalloc_extent(tree->inode, 442 state, entry); 443 entry->start = state->start; 444 merge_prev_state(tree, entry); 445 state->state = 0; 446 return entry; 447 } 448 node = &(*node)->rb_left; 449 } else if (state->end > entry->end) { 450 if (try_merge && entry->end == start && 451 state->state == entry->state) { 452 if (tree->owner == IO_TREE_INODE_IO) 453 btrfs_merge_delalloc_extent(tree->inode, 454 state, entry); 455 entry->end = state->end; 456 merge_next_state(tree, entry); 457 state->state = 0; 458 return entry; 459 } 460 node = &(*node)->rb_right; 461 } else { 462 return ERR_PTR(-EEXIST); 463 } 464 } 465 466 rb_link_node(&state->rb_node, parent, node); 467 rb_insert_color(&state->rb_node, &tree->state); 468 469 return state; 470 } 471 472 /* 473 * Insert state to @tree to the location given by @node and @parent. 474 */ 475 static void insert_state_fast(struct extent_io_tree *tree, 476 struct extent_state *state, struct rb_node **node, 477 struct rb_node *parent, unsigned bits, 478 struct extent_changeset *changeset) 479 { 480 set_state_bits(tree, state, bits, changeset); 481 rb_link_node(&state->rb_node, parent, node); 482 rb_insert_color(&state->rb_node, &tree->state); 483 merge_state(tree, state); 484 } 485 486 /* 487 * Split a given extent state struct in two, inserting the preallocated 488 * struct 'prealloc' as the newly created second half. 'split' indicates an 489 * offset inside 'orig' where it should be split. 490 * 491 * Before calling, 492 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 493 * are two extent state structs in the tree: 494 * prealloc: [orig->start, split - 1] 495 * orig: [ split, orig->end ] 496 * 497 * The tree locks are not taken by this function. They need to be held 498 * by the caller. 499 */ 500 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 501 struct extent_state *prealloc, u64 split) 502 { 503 struct rb_node *parent = NULL; 504 struct rb_node **node; 505 506 if (tree->owner == IO_TREE_INODE_IO) 507 btrfs_split_delalloc_extent(tree->inode, orig, split); 508 509 prealloc->start = orig->start; 510 prealloc->end = split - 1; 511 prealloc->state = orig->state; 512 orig->start = split; 513 514 parent = &orig->rb_node; 515 node = &parent; 516 while (*node) { 517 struct extent_state *entry; 518 519 parent = *node; 520 entry = rb_entry(parent, struct extent_state, rb_node); 521 522 if (prealloc->end < entry->start) { 523 node = &(*node)->rb_left; 524 } else if (prealloc->end > entry->end) { 525 node = &(*node)->rb_right; 526 } else { 527 btrfs_free_extent_state(prealloc); 528 return -EEXIST; 529 } 530 } 531 532 rb_link_node(&prealloc->rb_node, parent, node); 533 rb_insert_color(&prealloc->rb_node, &tree->state); 534 535 return 0; 536 } 537 538 /* 539 * Use this during tree iteration to avoid doing next node searches when it's 540 * not needed (the current record ends at or after the target range's end). 541 */ 542 static inline struct extent_state *next_search_state(struct extent_state *state, u64 end) 543 { 544 if (state->end < end) 545 return next_state(state); 546 547 return NULL; 548 } 549 550 /* 551 * Utility function to clear some bits in an extent state struct. It will 552 * optionally wake up anyone waiting on this state (wake == 1). 553 * 554 * If no bits are set on the state struct after clearing things, the 555 * struct is freed and removed from the tree 556 */ 557 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 558 struct extent_state *state, 559 u32 bits, int wake, u64 end, 560 struct extent_changeset *changeset) 561 { 562 struct extent_state *next; 563 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 564 int ret; 565 566 if (tree->owner == IO_TREE_INODE_IO) 567 btrfs_clear_delalloc_extent(tree->inode, state, bits); 568 569 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 570 BUG_ON(ret < 0); 571 state->state &= ~bits_to_clear; 572 if (wake) 573 wake_up(&state->wq); 574 if (state->state == 0) { 575 next = next_search_state(state, end); 576 if (extent_state_in_tree(state)) { 577 rb_erase(&state->rb_node, &tree->state); 578 RB_CLEAR_NODE(&state->rb_node); 579 btrfs_free_extent_state(state); 580 } else { 581 WARN_ON(1); 582 } 583 } else { 584 merge_state(tree, state); 585 next = next_search_state(state, end); 586 } 587 return next; 588 } 589 590 /* 591 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 592 * unset the EXTENT_NOWAIT bit. 593 */ 594 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 595 { 596 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 597 *bits &= EXTENT_NOWAIT - 1; 598 } 599 600 /* 601 * Clear some bits on a range in the tree. This may require splitting or 602 * inserting elements in the tree, so the gfp mask is used to indicate which 603 * allocations or sleeping are allowed. 604 * 605 * The range [start, end] is inclusive. 606 * 607 * This takes the tree lock, and returns 0 on success and < 0 on error. 608 */ 609 int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end, 610 u32 bits, struct extent_state **cached_state, 611 struct extent_changeset *changeset) 612 { 613 struct extent_state *state; 614 struct extent_state *cached; 615 struct extent_state *prealloc = NULL; 616 u64 last_end; 617 int ret = 0; 618 bool clear; 619 bool wake; 620 const bool delete = (bits & EXTENT_CLEAR_ALL_BITS); 621 gfp_t mask; 622 623 set_gfp_mask_from_bits(&bits, &mask); 624 btrfs_debug_check_extent_io_range(tree, start, end); 625 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 626 627 if (delete) 628 bits |= ~EXTENT_CTLBITS; 629 630 if (bits & EXTENT_DELALLOC) 631 bits |= EXTENT_NORESERVE; 632 633 wake = (bits & EXTENT_LOCK_BITS); 634 clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 635 again: 636 if (!prealloc) { 637 /* 638 * Don't care for allocation failure here because we might end 639 * up not needing the pre-allocated extent state at all, which 640 * is the case if we only have in the tree extent states that 641 * cover our input range and don't cover too any other range. 642 * If we end up needing a new extent state we allocate it later. 643 */ 644 prealloc = alloc_extent_state(mask); 645 } 646 647 spin_lock(&tree->lock); 648 if (cached_state) { 649 cached = *cached_state; 650 651 if (clear) { 652 *cached_state = NULL; 653 cached_state = NULL; 654 } 655 656 if (cached && extent_state_in_tree(cached) && 657 cached->start <= start && cached->end > start) { 658 if (clear) 659 refcount_dec(&cached->refs); 660 state = cached; 661 goto hit_next; 662 } 663 if (clear) 664 btrfs_free_extent_state(cached); 665 } 666 667 /* This search will find the extents that end after our range starts. */ 668 state = tree_search(tree, start); 669 if (!state) 670 goto out; 671 hit_next: 672 if (state->start > end) 673 goto out; 674 WARN_ON(state->end < start); 675 last_end = state->end; 676 677 /* The state doesn't have the wanted bits, go ahead. */ 678 if (!(state->state & bits)) { 679 state = next_search_state(state, end); 680 goto next; 681 } 682 683 /* 684 * | ---- desired range ---- | 685 * | state | or 686 * | ------------- state -------------- | 687 * 688 * We need to split the extent we found, and may flip bits on second 689 * half. 690 * 691 * If the extent we found extends past our range, we just split and 692 * search again. It'll get split again the next time though. 693 * 694 * If the extent we found is inside our range, we clear the desired bit 695 * on it. 696 */ 697 698 if (state->start < start) { 699 prealloc = alloc_extent_state_atomic(prealloc); 700 if (!prealloc) 701 goto search_again; 702 ret = split_state(tree, state, prealloc, start); 703 prealloc = NULL; 704 if (ret) { 705 extent_io_tree_panic(tree, state, "split", ret); 706 goto out; 707 } 708 if (state->end <= end) { 709 state = clear_state_bit(tree, state, bits, wake, end, 710 changeset); 711 goto next; 712 } 713 if (need_resched()) 714 goto search_again; 715 /* 716 * Fallthrough and try atomic extent state allocation if needed. 717 * If it fails we'll jump to 'search_again' retry the allocation 718 * in non-atomic mode and start the search again. 719 */ 720 } 721 /* 722 * | ---- desired range ---- | 723 * | state | 724 * We need to split the extent, and clear the bit on the first half. 725 */ 726 if (state->start <= end && state->end > end) { 727 prealloc = alloc_extent_state_atomic(prealloc); 728 if (!prealloc) 729 goto search_again; 730 ret = split_state(tree, state, prealloc, end + 1); 731 if (ret) { 732 extent_io_tree_panic(tree, state, "split", ret); 733 prealloc = NULL; 734 goto out; 735 } 736 737 if (wake) 738 wake_up(&state->wq); 739 740 clear_state_bit(tree, prealloc, bits, wake, end, changeset); 741 742 prealloc = NULL; 743 goto out; 744 } 745 746 state = clear_state_bit(tree, state, bits, wake, end, changeset); 747 next: 748 if (last_end >= end) 749 goto out; 750 start = last_end + 1; 751 if (state && !need_resched()) 752 goto hit_next; 753 754 search_again: 755 spin_unlock(&tree->lock); 756 if (gfpflags_allow_blocking(mask)) 757 cond_resched(); 758 goto again; 759 760 out: 761 spin_unlock(&tree->lock); 762 btrfs_free_extent_state(prealloc); 763 764 return ret; 765 766 } 767 768 /* 769 * Wait for one or more bits to clear on a range in the state tree. 770 * The range [start, end] is inclusive. 771 * The tree lock is taken by this function 772 */ 773 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 774 u32 bits, struct extent_state **cached_state) 775 { 776 struct extent_state *state; 777 778 btrfs_debug_check_extent_io_range(tree, start, end); 779 780 spin_lock(&tree->lock); 781 again: 782 /* 783 * Maintain cached_state, as we may not remove it from the tree if there 784 * are more bits than the bits we're waiting on set on this state. 785 */ 786 if (cached_state && *cached_state) { 787 state = *cached_state; 788 if (extent_state_in_tree(state) && 789 state->start <= start && start < state->end) 790 goto process_node; 791 } 792 while (1) { 793 /* 794 * This search will find all the extents that end after our 795 * range starts. 796 */ 797 state = tree_search(tree, start); 798 process_node: 799 if (!state) 800 break; 801 if (state->start > end) 802 goto out; 803 804 if (state->state & bits) { 805 DEFINE_WAIT(wait); 806 807 start = state->start; 808 refcount_inc(&state->refs); 809 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 810 spin_unlock(&tree->lock); 811 schedule(); 812 spin_lock(&tree->lock); 813 finish_wait(&state->wq, &wait); 814 btrfs_free_extent_state(state); 815 goto again; 816 } 817 start = state->end + 1; 818 819 if (start > end) 820 break; 821 822 if (!cond_resched_lock(&tree->lock)) { 823 state = next_state(state); 824 goto process_node; 825 } 826 } 827 out: 828 /* This state is no longer useful, clear it and free it up. */ 829 if (cached_state && *cached_state) { 830 state = *cached_state; 831 *cached_state = NULL; 832 btrfs_free_extent_state(state); 833 } 834 spin_unlock(&tree->lock); 835 } 836 837 static void cache_state_if_flags(struct extent_state *state, 838 struct extent_state **cached_ptr, 839 unsigned flags) 840 { 841 if (cached_ptr && !(*cached_ptr)) { 842 if (!flags || (state->state & flags)) { 843 *cached_ptr = state; 844 refcount_inc(&state->refs); 845 } 846 } 847 } 848 849 static void cache_state(struct extent_state *state, 850 struct extent_state **cached_ptr) 851 { 852 return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); 853 } 854 855 /* 856 * Find the first state struct with 'bits' set after 'start', and return it. 857 * tree->lock must be held. NULL will returned if nothing was found after 858 * 'start'. 859 */ 860 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 861 u64 start, u32 bits) 862 { 863 struct extent_state *state; 864 865 /* 866 * This search will find all the extents that end after our range 867 * starts. 868 */ 869 state = tree_search(tree, start); 870 while (state) { 871 if (state->state & bits) 872 return state; 873 state = next_state(state); 874 } 875 return NULL; 876 } 877 878 /* 879 * Find the first offset in the io tree with one or more @bits set. 880 * 881 * Note: If there are multiple bits set in @bits, any of them will match. 882 * 883 * Return true if we find something, and update @start_ret and @end_ret. 884 * Return false if we found nothing. 885 */ 886 bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start, 887 u64 *start_ret, u64 *end_ret, u32 bits, 888 struct extent_state **cached_state) 889 { 890 struct extent_state *state; 891 bool ret = false; 892 893 spin_lock(&tree->lock); 894 if (cached_state && *cached_state) { 895 state = *cached_state; 896 if (state->end == start - 1 && extent_state_in_tree(state)) { 897 while ((state = next_state(state)) != NULL) { 898 if (state->state & bits) 899 break; 900 } 901 /* 902 * If we found the next extent state, clear cached_state 903 * so that we can cache the next extent state below and 904 * avoid future calls going over the same extent state 905 * again. If we haven't found any, clear as well since 906 * it's now useless. 907 */ 908 btrfs_free_extent_state(*cached_state); 909 *cached_state = NULL; 910 if (state) 911 goto got_it; 912 goto out; 913 } 914 btrfs_free_extent_state(*cached_state); 915 *cached_state = NULL; 916 } 917 918 state = find_first_extent_bit_state(tree, start, bits); 919 got_it: 920 if (state) { 921 cache_state_if_flags(state, cached_state, 0); 922 *start_ret = state->start; 923 *end_ret = state->end; 924 ret = true; 925 } 926 out: 927 spin_unlock(&tree->lock); 928 return ret; 929 } 930 931 /* 932 * Find a contiguous area of bits 933 * 934 * @tree: io tree to check 935 * @start: offset to start the search from 936 * @start_ret: the first offset we found with the bits set 937 * @end_ret: the final contiguous range of the bits that were set 938 * @bits: bits to look for 939 * 940 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 941 * to set bits appropriately, and then merge them again. During this time it 942 * will drop the tree->lock, so use this helper if you want to find the actual 943 * contiguous area for given bits. We will search to the first bit we find, and 944 * then walk down the tree until we find a non-contiguous area. The area 945 * returned will be the full contiguous area with the bits set. 946 * 947 * Returns true if we found a range with the given bits set, in which case 948 * @start_ret and @end_ret are updated, or false if no range was found. 949 */ 950 bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 951 u64 *start_ret, u64 *end_ret, u32 bits) 952 { 953 struct extent_state *state; 954 bool ret = false; 955 956 ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES)); 957 958 spin_lock(&tree->lock); 959 state = find_first_extent_bit_state(tree, start, bits); 960 if (state) { 961 *start_ret = state->start; 962 *end_ret = state->end; 963 while ((state = next_state(state)) != NULL) { 964 if (state->start > (*end_ret + 1)) 965 break; 966 *end_ret = state->end; 967 } 968 ret = true; 969 } 970 spin_unlock(&tree->lock); 971 return ret; 972 } 973 974 /* 975 * Find a contiguous range of bytes in the file marked as delalloc, not more 976 * than 'max_bytes'. start and end are used to return the range, 977 * 978 * True is returned if we find something, false if nothing was in the tree. 979 */ 980 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 981 u64 *end, u64 max_bytes, 982 struct extent_state **cached_state) 983 { 984 struct extent_state *state; 985 u64 cur_start = *start; 986 bool found = false; 987 u64 total_bytes = 0; 988 989 spin_lock(&tree->lock); 990 991 /* 992 * This search will find all the extents that end after our range 993 * starts. 994 */ 995 state = tree_search(tree, cur_start); 996 if (!state) { 997 *end = (u64)-1; 998 goto out; 999 } 1000 1001 while (state) { 1002 if (found && (state->start != cur_start || 1003 (state->state & EXTENT_BOUNDARY))) { 1004 goto out; 1005 } 1006 if (!(state->state & EXTENT_DELALLOC)) { 1007 if (!found) 1008 *end = state->end; 1009 goto out; 1010 } 1011 if (!found) { 1012 *start = state->start; 1013 *cached_state = state; 1014 refcount_inc(&state->refs); 1015 } 1016 found = true; 1017 *end = state->end; 1018 cur_start = state->end + 1; 1019 total_bytes += state->end - state->start + 1; 1020 if (total_bytes >= max_bytes) 1021 break; 1022 state = next_state(state); 1023 } 1024 out: 1025 spin_unlock(&tree->lock); 1026 return found; 1027 } 1028 1029 /* 1030 * Set some bits on a range in the tree. This may require allocations or 1031 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1032 * GFP_NOWAIT. 1033 * 1034 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1035 * part of the range already has the desired bits set. The extent_state of the 1036 * existing range is returned in failed_state in this case, and the start of the 1037 * existing range is returned in failed_start. failed_state is used as an 1038 * optimization for wait_extent_bit, failed_start must be used as the source of 1039 * truth as failed_state may have changed since we returned. 1040 * 1041 * [start, end] is inclusive This takes the tree lock. 1042 */ 1043 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1044 u32 bits, u64 *failed_start, 1045 struct extent_state **failed_state, 1046 struct extent_state **cached_state, 1047 struct extent_changeset *changeset) 1048 { 1049 struct extent_state *state; 1050 struct extent_state *prealloc = NULL; 1051 struct rb_node **p = NULL; 1052 struct rb_node *parent = NULL; 1053 int ret = 0; 1054 u64 last_start; 1055 u64 last_end; 1056 u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); 1057 gfp_t mask; 1058 1059 set_gfp_mask_from_bits(&bits, &mask); 1060 btrfs_debug_check_extent_io_range(tree, start, end); 1061 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1062 1063 if (exclusive_bits) 1064 ASSERT(failed_start); 1065 else 1066 ASSERT(failed_start == NULL && failed_state == NULL); 1067 again: 1068 if (!prealloc) { 1069 /* 1070 * Don't care for allocation failure here because we might end 1071 * up not needing the pre-allocated extent state at all, which 1072 * is the case if we only have in the tree extent states that 1073 * cover our input range and don't cover too any other range. 1074 * If we end up needing a new extent state we allocate it later. 1075 */ 1076 prealloc = alloc_extent_state(mask); 1077 } 1078 /* Optimistically preallocate the extent changeset ulist node. */ 1079 if (changeset) 1080 extent_changeset_prealloc(changeset, mask); 1081 1082 spin_lock(&tree->lock); 1083 if (cached_state && *cached_state) { 1084 state = *cached_state; 1085 if (state->start <= start && state->end > start && 1086 extent_state_in_tree(state)) 1087 goto hit_next; 1088 } 1089 /* 1090 * This search will find all the extents that end after our range 1091 * starts. 1092 */ 1093 state = tree_search_for_insert(tree, start, &p, &parent); 1094 if (!state) { 1095 prealloc = alloc_extent_state_atomic(prealloc); 1096 if (!prealloc) 1097 goto search_again; 1098 prealloc->start = start; 1099 prealloc->end = end; 1100 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1101 cache_state(prealloc, cached_state); 1102 prealloc = NULL; 1103 goto out; 1104 } 1105 hit_next: 1106 last_start = state->start; 1107 last_end = state->end; 1108 1109 /* 1110 * | ---- desired range ---- | 1111 * | state | 1112 * 1113 * Just lock what we found and keep going 1114 */ 1115 if (state->start == start && state->end <= end) { 1116 if (state->state & exclusive_bits) { 1117 *failed_start = state->start; 1118 cache_state(state, failed_state); 1119 ret = -EEXIST; 1120 goto out; 1121 } 1122 1123 set_state_bits(tree, state, bits, changeset); 1124 cache_state(state, cached_state); 1125 merge_state(tree, state); 1126 if (last_end >= end) 1127 goto out; 1128 start = last_end + 1; 1129 state = next_state(state); 1130 if (state && state->start == start && !need_resched()) 1131 goto hit_next; 1132 goto search_again; 1133 } 1134 1135 /* 1136 * | ---- desired range ---- | 1137 * | state | 1138 * or 1139 * | ------------- state -------------- | 1140 * 1141 * We need to split the extent we found, and may flip bits on second 1142 * half. 1143 * 1144 * If the extent we found extends past our range, we just split and 1145 * search again. It'll get split again the next time though. 1146 * 1147 * If the extent we found is inside our range, we set the desired bit 1148 * on it. 1149 */ 1150 if (state->start < start) { 1151 if (state->state & exclusive_bits) { 1152 *failed_start = start; 1153 cache_state(state, failed_state); 1154 ret = -EEXIST; 1155 goto out; 1156 } 1157 1158 /* 1159 * If this extent already has all the bits we want set, then 1160 * skip it, not necessary to split it or do anything with it. 1161 */ 1162 if ((state->state & bits) == bits) { 1163 start = state->end + 1; 1164 cache_state(state, cached_state); 1165 goto search_again; 1166 } 1167 1168 prealloc = alloc_extent_state_atomic(prealloc); 1169 if (!prealloc) 1170 goto search_again; 1171 ret = split_state(tree, state, prealloc, start); 1172 if (ret) 1173 extent_io_tree_panic(tree, state, "split", ret); 1174 1175 prealloc = NULL; 1176 if (ret) 1177 goto out; 1178 if (state->end <= end) { 1179 set_state_bits(tree, state, bits, changeset); 1180 cache_state(state, cached_state); 1181 merge_state(tree, state); 1182 if (last_end >= end) 1183 goto out; 1184 start = last_end + 1; 1185 state = next_state(state); 1186 if (state && state->start == start && !need_resched()) 1187 goto hit_next; 1188 } 1189 goto search_again; 1190 } 1191 /* 1192 * | ---- desired range ---- | 1193 * | state | or | state | 1194 * 1195 * There's a hole, we need to insert something in it and ignore the 1196 * extent we found. 1197 */ 1198 if (state->start > start) { 1199 struct extent_state *inserted_state; 1200 1201 prealloc = alloc_extent_state_atomic(prealloc); 1202 if (!prealloc) 1203 goto search_again; 1204 1205 /* 1206 * Avoid to free 'prealloc' if it can be merged with the later 1207 * extent. 1208 */ 1209 prealloc->start = start; 1210 if (end < last_start) 1211 prealloc->end = end; 1212 else 1213 prealloc->end = last_start - 1; 1214 1215 inserted_state = insert_state(tree, prealloc, bits, changeset); 1216 if (IS_ERR(inserted_state)) { 1217 ret = PTR_ERR(inserted_state); 1218 extent_io_tree_panic(tree, prealloc, "insert", ret); 1219 goto out; 1220 } 1221 1222 cache_state(inserted_state, cached_state); 1223 if (inserted_state == prealloc) 1224 prealloc = NULL; 1225 start = inserted_state->end + 1; 1226 1227 /* Beyond target range, stop. */ 1228 if (start > end) 1229 goto out; 1230 1231 if (need_resched()) 1232 goto search_again; 1233 1234 state = next_search_state(inserted_state, end); 1235 /* 1236 * If there's a next state, whether contiguous or not, we don't 1237 * need to unlock and start search again. If it's not contiguous 1238 * we will end up here and try to allocate a prealloc state and insert. 1239 */ 1240 if (state) 1241 goto hit_next; 1242 goto search_again; 1243 } 1244 /* 1245 * | ---- desired range ---- | 1246 * | state | 1247 * 1248 * We need to split the extent, and set the bit on the first half 1249 */ 1250 if (state->start <= end && state->end > end) { 1251 if (state->state & exclusive_bits) { 1252 *failed_start = start; 1253 cache_state(state, failed_state); 1254 ret = -EEXIST; 1255 goto out; 1256 } 1257 1258 prealloc = alloc_extent_state_atomic(prealloc); 1259 if (!prealloc) 1260 goto search_again; 1261 ret = split_state(tree, state, prealloc, end + 1); 1262 if (ret) { 1263 extent_io_tree_panic(tree, state, "split", ret); 1264 prealloc = NULL; 1265 goto out; 1266 } 1267 1268 set_state_bits(tree, prealloc, bits, changeset); 1269 cache_state(prealloc, cached_state); 1270 merge_state(tree, prealloc); 1271 prealloc = NULL; 1272 goto out; 1273 } 1274 1275 search_again: 1276 if (start > end) 1277 goto out; 1278 spin_unlock(&tree->lock); 1279 if (gfpflags_allow_blocking(mask)) 1280 cond_resched(); 1281 goto again; 1282 1283 out: 1284 spin_unlock(&tree->lock); 1285 btrfs_free_extent_state(prealloc); 1286 1287 return ret; 1288 1289 } 1290 1291 int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1292 u32 bits, struct extent_state **cached_state) 1293 { 1294 return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL); 1295 } 1296 1297 /* 1298 * Convert all bits in a given range from one bit to another 1299 * 1300 * @tree: the io tree to search 1301 * @start: the start offset in bytes 1302 * @end: the end offset in bytes (inclusive) 1303 * @bits: the bits to set in this range 1304 * @clear_bits: the bits to clear in this range 1305 * @cached_state: state that we're going to cache 1306 * 1307 * This will go through and set bits for the given range. If any states exist 1308 * already in this range they are set with the given bit and cleared of the 1309 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1310 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1311 * boundary bits like LOCK. 1312 * 1313 * All allocations are done with GFP_NOFS. 1314 */ 1315 int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1316 u32 bits, u32 clear_bits, 1317 struct extent_state **cached_state) 1318 { 1319 struct extent_state *state; 1320 struct extent_state *prealloc = NULL; 1321 struct rb_node **p = NULL; 1322 struct rb_node *parent = NULL; 1323 int ret = 0; 1324 u64 last_start; 1325 u64 last_end; 1326 bool first_iteration = true; 1327 1328 btrfs_debug_check_extent_io_range(tree, start, end); 1329 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1330 clear_bits); 1331 1332 again: 1333 if (!prealloc) { 1334 /* 1335 * Best effort, don't worry if extent state allocation fails 1336 * here for the first iteration. We might have a cached state 1337 * that matches exactly the target range, in which case no 1338 * extent state allocations are needed. We'll only know this 1339 * after locking the tree. 1340 */ 1341 prealloc = alloc_extent_state(GFP_NOFS); 1342 if (!prealloc && !first_iteration) 1343 return -ENOMEM; 1344 } 1345 1346 spin_lock(&tree->lock); 1347 if (cached_state && *cached_state) { 1348 state = *cached_state; 1349 if (state->start <= start && state->end > start && 1350 extent_state_in_tree(state)) 1351 goto hit_next; 1352 } 1353 1354 /* 1355 * This search will find all the extents that end after our range 1356 * starts. 1357 */ 1358 state = tree_search_for_insert(tree, start, &p, &parent); 1359 if (!state) { 1360 prealloc = alloc_extent_state_atomic(prealloc); 1361 if (!prealloc) { 1362 ret = -ENOMEM; 1363 goto out; 1364 } 1365 prealloc->start = start; 1366 prealloc->end = end; 1367 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1368 cache_state(prealloc, cached_state); 1369 prealloc = NULL; 1370 goto out; 1371 } 1372 hit_next: 1373 last_start = state->start; 1374 last_end = state->end; 1375 1376 /* 1377 * | ---- desired range ---- | 1378 * | state | 1379 * 1380 * Just lock what we found and keep going. 1381 */ 1382 if (state->start == start && state->end <= end) { 1383 set_state_bits(tree, state, bits, NULL); 1384 cache_state(state, cached_state); 1385 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1386 if (last_end >= end) 1387 goto out; 1388 start = last_end + 1; 1389 if (state && state->start == start && !need_resched()) 1390 goto hit_next; 1391 goto search_again; 1392 } 1393 1394 /* 1395 * | ---- desired range ---- | 1396 * | state | 1397 * or 1398 * | ------------- state -------------- | 1399 * 1400 * We need to split the extent we found, and may flip bits on second 1401 * half. 1402 * 1403 * If the extent we found extends past our range, we just split and 1404 * search again. It'll get split again the next time though. 1405 * 1406 * If the extent we found is inside our range, we set the desired bit 1407 * on it. 1408 */ 1409 if (state->start < start) { 1410 prealloc = alloc_extent_state_atomic(prealloc); 1411 if (!prealloc) { 1412 ret = -ENOMEM; 1413 goto out; 1414 } 1415 ret = split_state(tree, state, prealloc, start); 1416 prealloc = NULL; 1417 if (ret) { 1418 extent_io_tree_panic(tree, state, "split", ret); 1419 goto out; 1420 } 1421 if (state->end <= end) { 1422 set_state_bits(tree, state, bits, NULL); 1423 cache_state(state, cached_state); 1424 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1425 if (last_end >= end) 1426 goto out; 1427 start = last_end + 1; 1428 if (state && state->start == start && !need_resched()) 1429 goto hit_next; 1430 } 1431 goto search_again; 1432 } 1433 /* 1434 * | ---- desired range ---- | 1435 * | state | or | state | 1436 * 1437 * There's a hole, we need to insert something in it and ignore the 1438 * extent we found. 1439 */ 1440 if (state->start > start) { 1441 struct extent_state *inserted_state; 1442 1443 prealloc = alloc_extent_state_atomic(prealloc); 1444 if (!prealloc) { 1445 ret = -ENOMEM; 1446 goto out; 1447 } 1448 1449 /* 1450 * Avoid to free 'prealloc' if it can be merged with the later 1451 * extent. 1452 */ 1453 prealloc->start = start; 1454 if (end < last_start) 1455 prealloc->end = end; 1456 else 1457 prealloc->end = last_start - 1; 1458 1459 inserted_state = insert_state(tree, prealloc, bits, NULL); 1460 if (IS_ERR(inserted_state)) { 1461 ret = PTR_ERR(inserted_state); 1462 extent_io_tree_panic(tree, prealloc, "insert", ret); 1463 goto out; 1464 } 1465 cache_state(inserted_state, cached_state); 1466 if (inserted_state == prealloc) 1467 prealloc = NULL; 1468 start = inserted_state->end + 1; 1469 1470 /* Beyond target range, stop. */ 1471 if (start > end) 1472 goto out; 1473 1474 if (need_resched()) 1475 goto search_again; 1476 1477 state = next_search_state(inserted_state, end); 1478 /* 1479 * If there's a next state, whether contiguous or not, we don't 1480 * need to unlock and start search again. If it's not contiguous 1481 * we will end up here and try to allocate a prealloc state and insert. 1482 */ 1483 if (state) 1484 goto hit_next; 1485 goto search_again; 1486 } 1487 /* 1488 * | ---- desired range ---- | 1489 * | state | 1490 * 1491 * We need to split the extent, and set the bit on the first half. 1492 */ 1493 if (state->start <= end && state->end > end) { 1494 prealloc = alloc_extent_state_atomic(prealloc); 1495 if (!prealloc) { 1496 ret = -ENOMEM; 1497 goto out; 1498 } 1499 1500 ret = split_state(tree, state, prealloc, end + 1); 1501 if (ret) { 1502 extent_io_tree_panic(tree, state, "split", ret); 1503 prealloc = NULL; 1504 goto out; 1505 } 1506 1507 set_state_bits(tree, prealloc, bits, NULL); 1508 cache_state(prealloc, cached_state); 1509 clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL); 1510 prealloc = NULL; 1511 goto out; 1512 } 1513 1514 search_again: 1515 if (start > end) 1516 goto out; 1517 spin_unlock(&tree->lock); 1518 cond_resched(); 1519 first_iteration = false; 1520 goto again; 1521 1522 out: 1523 spin_unlock(&tree->lock); 1524 btrfs_free_extent_state(prealloc); 1525 1526 return ret; 1527 } 1528 1529 /* 1530 * Find the first range that has @bits not set. This range could start before 1531 * @start. 1532 * 1533 * @tree: the tree to search 1534 * @start: offset at/after which the found extent should start 1535 * @start_ret: records the beginning of the range 1536 * @end_ret: records the end of the range (inclusive) 1537 * @bits: the set of bits which must be unset 1538 * 1539 * Since unallocated range is also considered one which doesn't have the bits 1540 * set it's possible that @end_ret contains -1, this happens in case the range 1541 * spans (last_range_end, end of device]. In this case it's up to the caller to 1542 * trim @end_ret to the appropriate size. 1543 */ 1544 void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1545 u64 *start_ret, u64 *end_ret, u32 bits) 1546 { 1547 struct extent_state *state; 1548 struct extent_state *prev = NULL, *next = NULL; 1549 1550 spin_lock(&tree->lock); 1551 1552 /* Find first extent with bits cleared */ 1553 while (1) { 1554 state = tree_search_prev_next(tree, start, &prev, &next); 1555 if (!state && !next && !prev) { 1556 /* 1557 * Tree is completely empty, send full range and let 1558 * caller deal with it 1559 */ 1560 *start_ret = 0; 1561 *end_ret = -1; 1562 goto out; 1563 } else if (!state && !next) { 1564 /* 1565 * We are past the last allocated chunk, set start at 1566 * the end of the last extent. 1567 */ 1568 *start_ret = prev->end + 1; 1569 *end_ret = -1; 1570 goto out; 1571 } else if (!state) { 1572 state = next; 1573 } 1574 1575 /* 1576 * At this point 'state' either contains 'start' or start is 1577 * before 'state' 1578 */ 1579 if (in_range(start, state->start, state->end - state->start + 1)) { 1580 if (state->state & bits) { 1581 /* 1582 * |--range with bits sets--| 1583 * | 1584 * start 1585 */ 1586 start = state->end + 1; 1587 } else { 1588 /* 1589 * 'start' falls within a range that doesn't 1590 * have the bits set, so take its start as the 1591 * beginning of the desired range 1592 * 1593 * |--range with bits cleared----| 1594 * | 1595 * start 1596 */ 1597 *start_ret = state->start; 1598 break; 1599 } 1600 } else { 1601 /* 1602 * |---prev range---|---hole/unset---|---node range---| 1603 * | 1604 * start 1605 * 1606 * or 1607 * 1608 * |---hole/unset--||--first node--| 1609 * 0 | 1610 * start 1611 */ 1612 if (prev) 1613 *start_ret = prev->end + 1; 1614 else 1615 *start_ret = 0; 1616 break; 1617 } 1618 } 1619 1620 /* 1621 * Find the longest stretch from start until an entry which has the 1622 * bits set 1623 */ 1624 while (state) { 1625 if (state->end >= start && !(state->state & bits)) { 1626 *end_ret = state->end; 1627 } else { 1628 *end_ret = state->start - 1; 1629 break; 1630 } 1631 state = next_state(state); 1632 } 1633 out: 1634 spin_unlock(&tree->lock); 1635 } 1636 1637 /* 1638 * Count the number of bytes in the tree that have a given bit(s) set for a 1639 * given range. 1640 * 1641 * @tree: The io tree to search. 1642 * @start: The start offset of the range. This value is updated to the 1643 * offset of the first byte found with the given bit(s), so it 1644 * can end up being bigger than the initial value. 1645 * @search_end: The end offset (inclusive value) of the search range. 1646 * @max_bytes: The maximum byte count we are interested. The search stops 1647 * once it reaches this count. 1648 * @bits: The bits the range must have in order to be accounted for. 1649 * If multiple bits are set, then only subranges that have all 1650 * the bits set are accounted for. 1651 * @contig: Indicate if we should ignore holes in the range or not. If 1652 * this is true, then stop once we find a hole. 1653 * @cached_state: A cached state to be used across multiple calls to this 1654 * function in order to speedup searches. Use NULL if this is 1655 * called only once or if each call does not start where the 1656 * previous one ended. 1657 * 1658 * Returns the total number of bytes found within the given range that have 1659 * all given bits set. If the returned number of bytes is greater than zero 1660 * then @start is updated with the offset of the first byte with the bits set. 1661 */ 1662 u64 btrfs_count_range_bits(struct extent_io_tree *tree, 1663 u64 *start, u64 search_end, u64 max_bytes, 1664 u32 bits, bool contig, 1665 struct extent_state **cached_state) 1666 { 1667 struct extent_state *state = NULL; 1668 struct extent_state *cached; 1669 u64 cur_start = *start; 1670 u64 total_bytes = 0; 1671 u64 last = 0; 1672 int found = 0; 1673 1674 if (WARN_ON(search_end < cur_start)) 1675 return 0; 1676 1677 spin_lock(&tree->lock); 1678 1679 if (!cached_state || !*cached_state) 1680 goto search; 1681 1682 cached = *cached_state; 1683 1684 if (!extent_state_in_tree(cached)) 1685 goto search; 1686 1687 if (cached->start <= cur_start && cur_start <= cached->end) { 1688 state = cached; 1689 } else if (cached->start > cur_start) { 1690 struct extent_state *prev; 1691 1692 /* 1693 * The cached state starts after our search range's start. Check 1694 * if the previous state record starts at or before the range we 1695 * are looking for, and if so, use it - this is a common case 1696 * when there are holes between records in the tree. If there is 1697 * no previous state record, we can start from our cached state. 1698 */ 1699 prev = prev_state(cached); 1700 if (!prev) 1701 state = cached; 1702 else if (prev->start <= cur_start && cur_start <= prev->end) 1703 state = prev; 1704 } 1705 1706 /* 1707 * This search will find all the extents that end after our range 1708 * starts. 1709 */ 1710 search: 1711 if (!state) 1712 state = tree_search(tree, cur_start); 1713 1714 while (state) { 1715 if (state->start > search_end) 1716 break; 1717 if (contig && found && state->start > last + 1) 1718 break; 1719 if (state->end >= cur_start && (state->state & bits) == bits) { 1720 total_bytes += min(search_end, state->end) + 1 - 1721 max(cur_start, state->start); 1722 if (total_bytes >= max_bytes) 1723 break; 1724 if (!found) { 1725 *start = max(cur_start, state->start); 1726 found = 1; 1727 } 1728 last = state->end; 1729 } else if (contig && found) { 1730 break; 1731 } 1732 state = next_state(state); 1733 } 1734 1735 if (cached_state) { 1736 btrfs_free_extent_state(*cached_state); 1737 *cached_state = state; 1738 if (state) 1739 refcount_inc(&state->refs); 1740 } 1741 1742 spin_unlock(&tree->lock); 1743 1744 return total_bytes; 1745 } 1746 1747 /* 1748 * Check if the single @bit exists in the given range. 1749 */ 1750 bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1751 { 1752 struct extent_state *state; 1753 bool bitset = false; 1754 1755 ASSERT(is_power_of_2(bit)); 1756 1757 spin_lock(&tree->lock); 1758 state = tree_search(tree, start); 1759 while (state) { 1760 if (state->start > end) 1761 break; 1762 1763 if (state->state & bit) { 1764 bitset = true; 1765 break; 1766 } 1767 1768 if (state->end >= end) 1769 break; 1770 state = next_state(state); 1771 } 1772 spin_unlock(&tree->lock); 1773 return bitset; 1774 } 1775 1776 void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits, 1777 struct extent_state **cached_state) 1778 { 1779 struct extent_state *state; 1780 1781 /* 1782 * The cached state is currently mandatory and not used to start the 1783 * search, only to cache the first state record found in the range. 1784 */ 1785 ASSERT(cached_state != NULL); 1786 ASSERT(*cached_state == NULL); 1787 1788 *bits = 0; 1789 1790 spin_lock(&tree->lock); 1791 state = tree_search(tree, start); 1792 if (state && state->start < end) { 1793 *cached_state = state; 1794 refcount_inc(&state->refs); 1795 } 1796 while (state) { 1797 if (state->start > end) 1798 break; 1799 1800 *bits |= state->state; 1801 1802 if (state->end >= end) 1803 break; 1804 1805 state = next_state(state); 1806 } 1807 spin_unlock(&tree->lock); 1808 } 1809 1810 /* 1811 * Check if the whole range [@start,@end) contains the single @bit set. 1812 */ 1813 bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1814 struct extent_state *cached) 1815 { 1816 struct extent_state *state; 1817 bool bitset = true; 1818 1819 ASSERT(is_power_of_2(bit)); 1820 ASSERT(start < end); 1821 1822 spin_lock(&tree->lock); 1823 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1824 cached->end > start) 1825 state = cached; 1826 else 1827 state = tree_search(tree, start); 1828 while (state) { 1829 if (state->start > start) { 1830 bitset = false; 1831 break; 1832 } 1833 1834 if ((state->state & bit) == 0) { 1835 bitset = false; 1836 break; 1837 } 1838 1839 if (state->end >= end) 1840 break; 1841 1842 /* Next state must start where this one ends. */ 1843 start = state->end + 1; 1844 state = next_state(state); 1845 } 1846 1847 /* We ran out of states and were still inside of our range. */ 1848 if (!state) 1849 bitset = false; 1850 spin_unlock(&tree->lock); 1851 return bitset; 1852 } 1853 1854 /* Wrappers around set/clear extent bit */ 1855 int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1856 u32 bits, struct extent_changeset *changeset) 1857 { 1858 /* 1859 * We don't support EXTENT_LOCK_BITS yet, as current changeset will 1860 * record any bits changed, so for EXTENT_LOCK_BITS case, it will either 1861 * fail with -EEXIST or changeset will record the whole range. 1862 */ 1863 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1864 1865 return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1866 } 1867 1868 int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1869 u32 bits, struct extent_changeset *changeset) 1870 { 1871 /* 1872 * Don't support EXTENT_LOCK_BITS case, same reason as 1873 * set_record_extent_bits(). 1874 */ 1875 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1876 1877 return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset); 1878 } 1879 1880 bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1881 u32 bits, struct extent_state **cached) 1882 { 1883 int ret; 1884 u64 failed_start; 1885 1886 ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL); 1887 if (ret == -EEXIST) { 1888 if (failed_start > start) 1889 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1890 bits, cached); 1891 return 0; 1892 } 1893 return 1; 1894 } 1895 1896 /* 1897 * Either insert or lock state struct between start and end use mask to tell 1898 * us if waiting is desired. 1899 */ 1900 int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1901 struct extent_state **cached_state) 1902 { 1903 struct extent_state *failed_state = NULL; 1904 int ret; 1905 u64 failed_start; 1906 1907 ret = set_extent_bit(tree, start, end, bits, &failed_start, 1908 &failed_state, cached_state, NULL); 1909 while (ret == -EEXIST) { 1910 if (failed_start != start) 1911 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1912 bits, cached_state); 1913 1914 wait_extent_bit(tree, failed_start, end, bits, &failed_state); 1915 ret = set_extent_bit(tree, start, end, bits, &failed_start, 1916 &failed_state, cached_state, NULL); 1917 } 1918 return ret; 1919 } 1920 1921 /* 1922 * Get the extent state that follows the given extent state. 1923 * This is meant to be used in a context where we know no other tasks can 1924 * concurrently modify the tree. 1925 */ 1926 struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree, 1927 struct extent_state *state) 1928 { 1929 struct extent_state *next; 1930 1931 spin_lock(&tree->lock); 1932 ASSERT(extent_state_in_tree(state)); 1933 next = next_state(state); 1934 if (next) 1935 refcount_inc(&next->refs); 1936 spin_unlock(&tree->lock); 1937 1938 return next; 1939 } 1940 1941 void __cold btrfs_extent_state_free_cachep(void) 1942 { 1943 btrfs_extent_state_leak_debug_check(); 1944 kmem_cache_destroy(extent_state_cache); 1945 } 1946 1947 int __init btrfs_extent_state_init_cachep(void) 1948 { 1949 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1950 sizeof(struct extent_state), 0, 0, 1951 NULL); 1952 if (!extent_state_cache) 1953 return -ENOMEM; 1954 1955 return 0; 1956 } 1957