1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include <trace/events/btrfs.h> 5 #include "messages.h" 6 #include "ctree.h" 7 #include "extent_io.h" 8 #include "extent-io-tree.h" 9 #include "btrfs_inode.h" 10 11 static struct kmem_cache *extent_state_cache; 12 13 static inline bool extent_state_in_tree(const struct extent_state *state) 14 { 15 return !RB_EMPTY_NODE(&state->rb_node); 16 } 17 18 #ifdef CONFIG_BTRFS_DEBUG 19 static LIST_HEAD(states); 20 static DEFINE_SPINLOCK(leak_lock); 21 22 static inline void btrfs_leak_debug_add_state(struct extent_state *state) 23 { 24 unsigned long flags; 25 26 spin_lock_irqsave(&leak_lock, flags); 27 list_add(&state->leak_list, &states); 28 spin_unlock_irqrestore(&leak_lock, flags); 29 } 30 31 static inline void btrfs_leak_debug_del_state(struct extent_state *state) 32 { 33 unsigned long flags; 34 35 spin_lock_irqsave(&leak_lock, flags); 36 list_del(&state->leak_list); 37 spin_unlock_irqrestore(&leak_lock, flags); 38 } 39 40 static inline void btrfs_extent_state_leak_debug_check(void) 41 { 42 struct extent_state *state; 43 44 while (!list_empty(&states)) { 45 state = list_first_entry(&states, struct extent_state, leak_list); 46 btrfs_err(NULL, 47 "state leak: start %llu end %llu state %u in tree %d refs %d", 48 state->start, state->end, state->state, 49 extent_state_in_tree(state), 50 refcount_read(&state->refs)); 51 list_del(&state->leak_list); 52 WARN_ON_ONCE(1); 53 kmem_cache_free(extent_state_cache, state); 54 } 55 } 56 57 #define btrfs_debug_check_extent_io_range(tree, start, end) \ 58 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 59 static inline void __btrfs_debug_check_extent_io_range(const char *caller, 60 struct extent_io_tree *tree, 61 u64 start, u64 end) 62 { 63 const struct btrfs_inode *inode = tree->inode; 64 u64 isize; 65 66 if (tree->owner != IO_TREE_INODE_IO) 67 return; 68 69 isize = i_size_read(&inode->vfs_inode); 70 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 71 btrfs_debug_rl(inode->root->fs_info, 72 "%s: ino %llu isize %llu odd range [%llu,%llu]", 73 caller, btrfs_ino(inode), isize, start, end); 74 } 75 } 76 #else 77 #define btrfs_leak_debug_add_state(state) do {} while (0) 78 #define btrfs_leak_debug_del_state(state) do {} while (0) 79 #define btrfs_extent_state_leak_debug_check() do {} while (0) 80 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 81 #endif 82 83 /* Read-only access to the inode. */ 84 const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree) 85 { 86 if (tree->owner == IO_TREE_INODE_IO) 87 return tree->inode; 88 return NULL; 89 } 90 91 /* For read-only access to fs_info. */ 92 const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree) 93 { 94 if (tree->owner == IO_TREE_INODE_IO) 95 return tree->inode->root->fs_info; 96 return tree->fs_info; 97 } 98 99 void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info, 100 struct extent_io_tree *tree, unsigned int owner) 101 { 102 tree->state = RB_ROOT; 103 spin_lock_init(&tree->lock); 104 tree->fs_info = fs_info; 105 tree->owner = owner; 106 } 107 108 /* 109 * Empty an io tree, removing and freeing every extent state record from the 110 * tree. This should be called once we are sure no other task can access the 111 * tree anymore, so no tree updates happen after we empty the tree and there 112 * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never 113 * set on any extent state when calling this function). 114 */ 115 void btrfs_extent_io_tree_release(struct extent_io_tree *tree) 116 { 117 struct rb_root root; 118 struct extent_state *state; 119 struct extent_state *tmp; 120 121 spin_lock(&tree->lock); 122 root = tree->state; 123 tree->state = RB_ROOT; 124 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 125 /* Clear node to keep free_extent_state() happy. */ 126 RB_CLEAR_NODE(&state->rb_node); 127 ASSERT(!(state->state & EXTENT_LOCK_BITS)); 128 /* 129 * No need for a memory barrier here, as we are holding the tree 130 * lock and we only change the waitqueue while holding that lock 131 * (see wait_extent_bit()). 132 */ 133 ASSERT(!waitqueue_active(&state->wq)); 134 btrfs_free_extent_state(state); 135 cond_resched_lock(&tree->lock); 136 } 137 /* 138 * Should still be empty even after a reschedule, no other task should 139 * be accessing the tree anymore. 140 */ 141 ASSERT(RB_EMPTY_ROOT(&tree->state)); 142 spin_unlock(&tree->lock); 143 } 144 145 static struct extent_state *alloc_extent_state(gfp_t mask) 146 { 147 struct extent_state *state; 148 149 /* 150 * The given mask might be not appropriate for the slab allocator, 151 * drop the unsupported bits 152 */ 153 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 154 state = kmem_cache_alloc(extent_state_cache, mask); 155 if (!state) 156 return state; 157 state->state = 0; 158 RB_CLEAR_NODE(&state->rb_node); 159 btrfs_leak_debug_add_state(state); 160 refcount_set(&state->refs, 1); 161 init_waitqueue_head(&state->wq); 162 trace_btrfs_alloc_extent_state(state, mask, _RET_IP_); 163 return state; 164 } 165 166 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) 167 { 168 if (!prealloc) 169 prealloc = alloc_extent_state(GFP_ATOMIC); 170 171 return prealloc; 172 } 173 174 void btrfs_free_extent_state(struct extent_state *state) 175 { 176 if (!state) 177 return; 178 if (refcount_dec_and_test(&state->refs)) { 179 WARN_ON(extent_state_in_tree(state)); 180 btrfs_leak_debug_del_state(state); 181 trace_btrfs_free_extent_state(state, _RET_IP_); 182 kmem_cache_free(extent_state_cache, state); 183 } 184 } 185 186 static int add_extent_changeset(struct extent_state *state, u32 bits, 187 struct extent_changeset *changeset, 188 int set) 189 { 190 int ret; 191 192 if (!changeset) 193 return 0; 194 if (set && (state->state & bits) == bits) 195 return 0; 196 if (!set && (state->state & bits) == 0) 197 return 0; 198 changeset->bytes_changed += state->end - state->start + 1; 199 ret = ulist_add(&changeset->range_changed, state->start, state->end, 200 GFP_ATOMIC); 201 return ret; 202 } 203 204 static inline struct extent_state *next_state(struct extent_state *state) 205 { 206 struct rb_node *next = rb_next(&state->rb_node); 207 208 return rb_entry_safe(next, struct extent_state, rb_node); 209 } 210 211 static inline struct extent_state *prev_state(struct extent_state *state) 212 { 213 struct rb_node *next = rb_prev(&state->rb_node); 214 215 return rb_entry_safe(next, struct extent_state, rb_node); 216 } 217 218 /* 219 * Search @tree for an entry that contains @offset or if none exists for the 220 * first entry that starts and ends after that offset. 221 * 222 * @tree: the tree to search 223 * @offset: search offset 224 * @node_ret: pointer where new node should be anchored (used when inserting an 225 * entry in the tree) 226 * @parent_ret: points to entry which would have been the parent of the entry, 227 * containing @offset 228 * 229 * Return a pointer to the entry that contains @offset byte address. 230 * 231 * If no such entry exists, return the first entry that starts and ends after 232 * @offset if one exists, otherwise NULL. 233 * 234 * If the returned entry starts at @offset, then @node_ret and @parent_ret 235 * aren't changed. 236 */ 237 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, 238 u64 offset, 239 struct rb_node ***node_ret, 240 struct rb_node **parent_ret) 241 { 242 struct rb_root *root = &tree->state; 243 struct rb_node **node = &root->rb_node; 244 struct rb_node *prev = NULL; 245 struct extent_state *entry = NULL; 246 247 while (*node) { 248 prev = *node; 249 entry = rb_entry(prev, struct extent_state, rb_node); 250 251 if (offset < entry->start) 252 node = &(*node)->rb_left; 253 else if (offset > entry->end) 254 node = &(*node)->rb_right; 255 else 256 return entry; 257 } 258 259 if (node_ret) 260 *node_ret = node; 261 if (parent_ret) 262 *parent_ret = prev; 263 264 /* 265 * Return either the current entry if it contains offset (it ends after 266 * or at offset) or the first entry that starts and ends after offset if 267 * one exists, or NULL. 268 */ 269 while (entry && offset > entry->end) 270 entry = next_state(entry); 271 272 return entry; 273 } 274 275 /* 276 * Search offset in the tree or fill neighbor rbtree node pointers. 277 * 278 * @tree: the tree to search 279 * @offset: offset that should fall within an entry in @tree 280 * @next_ret: pointer to the first entry whose range ends after @offset 281 * @prev_ret: pointer to the first entry whose range begins before @offset 282 * 283 * Return a pointer to the entry that contains @offset byte address. If no 284 * such entry exists, then return NULL and fill @prev_ret and @next_ret. 285 * Otherwise return the found entry and other pointers are left untouched. 286 */ 287 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, 288 u64 offset, 289 struct extent_state **prev_ret, 290 struct extent_state **next_ret) 291 { 292 struct rb_root *root = &tree->state; 293 struct rb_node **node = &root->rb_node; 294 struct extent_state *orig_prev; 295 struct extent_state *entry = NULL; 296 297 ASSERT(prev_ret); 298 ASSERT(next_ret); 299 300 while (*node) { 301 entry = rb_entry(*node, struct extent_state, rb_node); 302 303 if (offset < entry->start) 304 node = &(*node)->rb_left; 305 else if (offset > entry->end) 306 node = &(*node)->rb_right; 307 else 308 return entry; 309 } 310 311 orig_prev = entry; 312 while (entry && offset > entry->end) 313 entry = next_state(entry); 314 *next_ret = entry; 315 entry = orig_prev; 316 317 while (entry && offset < entry->start) 318 entry = prev_state(entry); 319 *prev_ret = entry; 320 321 return NULL; 322 } 323 324 /* 325 * Inexact rb-tree search, return the next entry if @offset is not found 326 */ 327 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) 328 { 329 return tree_search_for_insert(tree, offset, NULL, NULL); 330 } 331 332 static void __cold extent_io_tree_panic(const struct extent_io_tree *tree, 333 const struct extent_state *state, 334 const char *opname, 335 int err) 336 { 337 btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err, 338 "extent io tree error on %s state start %llu end %llu", 339 opname, state->start, state->end); 340 } 341 342 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) 343 { 344 struct extent_state *prev; 345 346 prev = prev_state(state); 347 if (prev && prev->end == state->start - 1 && prev->state == state->state) { 348 if (tree->owner == IO_TREE_INODE_IO) 349 btrfs_merge_delalloc_extent(tree->inode, state, prev); 350 state->start = prev->start; 351 rb_erase(&prev->rb_node, &tree->state); 352 RB_CLEAR_NODE(&prev->rb_node); 353 btrfs_free_extent_state(prev); 354 } 355 } 356 357 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) 358 { 359 struct extent_state *next; 360 361 next = next_state(state); 362 if (next && next->start == state->end + 1 && next->state == state->state) { 363 if (tree->owner == IO_TREE_INODE_IO) 364 btrfs_merge_delalloc_extent(tree->inode, state, next); 365 state->end = next->end; 366 rb_erase(&next->rb_node, &tree->state); 367 RB_CLEAR_NODE(&next->rb_node); 368 btrfs_free_extent_state(next); 369 } 370 } 371 372 /* 373 * Utility function to look for merge candidates inside a given range. Any 374 * extents with matching state are merged together into a single extent in the 375 * tree. Extents with EXTENT_IO in their state field are not merged because 376 * the end_io handlers need to be able to do operations on them without 377 * sleeping (or doing allocations/splits). 378 * 379 * This should be called with the tree lock held. 380 */ 381 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 382 { 383 if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 384 return; 385 386 merge_prev_state(tree, state); 387 merge_next_state(tree, state); 388 } 389 390 static void set_state_bits(struct extent_io_tree *tree, 391 struct extent_state *state, 392 u32 bits, struct extent_changeset *changeset) 393 { 394 u32 bits_to_set = bits & ~EXTENT_CTLBITS; 395 int ret; 396 397 if (tree->owner == IO_TREE_INODE_IO) 398 btrfs_set_delalloc_extent(tree->inode, state, bits); 399 400 ret = add_extent_changeset(state, bits_to_set, changeset, 1); 401 BUG_ON(ret < 0); 402 state->state |= bits_to_set; 403 } 404 405 /* 406 * Insert an extent_state struct into the tree. 'bits' are set on the 407 * struct before it is inserted. 408 * 409 * Returns a pointer to the struct extent_state record containing the range 410 * requested for insertion, which may be the same as the given struct or it 411 * may be an existing record in the tree that was expanded to accommodate the 412 * requested range. In case of an extent_state different from the one that was 413 * given, the later can be freed or reused by the caller. 414 * 415 * On error it returns an error pointer. 416 * 417 * The tree lock is not taken internally. This is a utility function and 418 * probably isn't what you want to call (see set/clear_extent_bit). 419 */ 420 static struct extent_state *insert_state(struct extent_io_tree *tree, 421 struct extent_state *state, 422 u32 bits, 423 struct extent_changeset *changeset) 424 { 425 struct rb_node **node; 426 struct rb_node *parent = NULL; 427 const u64 start = state->start - 1; 428 const u64 end = state->end + 1; 429 const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 430 431 set_state_bits(tree, state, bits, changeset); 432 433 node = &tree->state.rb_node; 434 while (*node) { 435 struct extent_state *entry; 436 437 parent = *node; 438 entry = rb_entry(parent, struct extent_state, rb_node); 439 440 if (state->end < entry->start) { 441 if (try_merge && end == entry->start && 442 state->state == entry->state) { 443 if (tree->owner == IO_TREE_INODE_IO) 444 btrfs_merge_delalloc_extent(tree->inode, 445 state, entry); 446 entry->start = state->start; 447 merge_prev_state(tree, entry); 448 state->state = 0; 449 return entry; 450 } 451 node = &(*node)->rb_left; 452 } else if (state->end > entry->end) { 453 if (try_merge && entry->end == start && 454 state->state == entry->state) { 455 if (tree->owner == IO_TREE_INODE_IO) 456 btrfs_merge_delalloc_extent(tree->inode, 457 state, entry); 458 entry->end = state->end; 459 merge_next_state(tree, entry); 460 state->state = 0; 461 return entry; 462 } 463 node = &(*node)->rb_right; 464 } else { 465 return ERR_PTR(-EEXIST); 466 } 467 } 468 469 rb_link_node(&state->rb_node, parent, node); 470 rb_insert_color(&state->rb_node, &tree->state); 471 472 return state; 473 } 474 475 /* 476 * Insert state to @tree to the location given by @node and @parent. 477 */ 478 static void insert_state_fast(struct extent_io_tree *tree, 479 struct extent_state *state, struct rb_node **node, 480 struct rb_node *parent, unsigned bits, 481 struct extent_changeset *changeset) 482 { 483 set_state_bits(tree, state, bits, changeset); 484 rb_link_node(&state->rb_node, parent, node); 485 rb_insert_color(&state->rb_node, &tree->state); 486 merge_state(tree, state); 487 } 488 489 /* 490 * Split a given extent state struct in two, inserting the preallocated 491 * struct 'prealloc' as the newly created second half. 'split' indicates an 492 * offset inside 'orig' where it should be split. 493 * 494 * Before calling, 495 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 496 * are two extent state structs in the tree: 497 * prealloc: [orig->start, split - 1] 498 * orig: [ split, orig->end ] 499 * 500 * The tree locks are not taken by this function. They need to be held 501 * by the caller. 502 */ 503 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 504 struct extent_state *prealloc, u64 split) 505 { 506 struct rb_node *parent = NULL; 507 struct rb_node **node; 508 509 if (tree->owner == IO_TREE_INODE_IO) 510 btrfs_split_delalloc_extent(tree->inode, orig, split); 511 512 prealloc->start = orig->start; 513 prealloc->end = split - 1; 514 prealloc->state = orig->state; 515 orig->start = split; 516 517 parent = &orig->rb_node; 518 node = &parent; 519 while (*node) { 520 struct extent_state *entry; 521 522 parent = *node; 523 entry = rb_entry(parent, struct extent_state, rb_node); 524 525 if (prealloc->end < entry->start) { 526 node = &(*node)->rb_left; 527 } else if (prealloc->end > entry->end) { 528 node = &(*node)->rb_right; 529 } else { 530 btrfs_free_extent_state(prealloc); 531 return -EEXIST; 532 } 533 } 534 535 rb_link_node(&prealloc->rb_node, parent, node); 536 rb_insert_color(&prealloc->rb_node, &tree->state); 537 538 return 0; 539 } 540 541 /* 542 * Use this during tree iteration to avoid doing next node searches when it's 543 * not needed (the current record ends at or after the target range's end). 544 */ 545 static inline struct extent_state *next_search_state(struct extent_state *state, u64 end) 546 { 547 if (state->end < end) 548 return next_state(state); 549 550 return NULL; 551 } 552 553 /* 554 * Utility function to clear some bits in an extent state struct. It will 555 * optionally wake up anyone waiting on this state (wake == 1). 556 * 557 * If no bits are set on the state struct after clearing things, the 558 * struct is freed and removed from the tree 559 */ 560 static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 561 struct extent_state *state, 562 u32 bits, int wake, u64 end, 563 struct extent_changeset *changeset) 564 { 565 struct extent_state *next; 566 u32 bits_to_clear = bits & ~EXTENT_CTLBITS; 567 int ret; 568 569 if (tree->owner == IO_TREE_INODE_IO) 570 btrfs_clear_delalloc_extent(tree->inode, state, bits); 571 572 ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 573 BUG_ON(ret < 0); 574 state->state &= ~bits_to_clear; 575 if (wake) 576 wake_up(&state->wq); 577 if (state->state == 0) { 578 next = next_search_state(state, end); 579 if (extent_state_in_tree(state)) { 580 rb_erase(&state->rb_node, &tree->state); 581 RB_CLEAR_NODE(&state->rb_node); 582 btrfs_free_extent_state(state); 583 } else { 584 WARN_ON(1); 585 } 586 } else { 587 merge_state(tree, state); 588 next = next_search_state(state, end); 589 } 590 return next; 591 } 592 593 /* 594 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, 595 * unset the EXTENT_NOWAIT bit. 596 */ 597 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) 598 { 599 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); 600 *bits &= EXTENT_NOWAIT - 1; 601 } 602 603 /* 604 * Clear some bits on a range in the tree. This may require splitting or 605 * inserting elements in the tree, so the gfp mask is used to indicate which 606 * allocations or sleeping are allowed. 607 * 608 * The range [start, end] is inclusive. 609 * 610 * This takes the tree lock, and returns 0 on success and < 0 on error. 611 */ 612 int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end, 613 u32 bits, struct extent_state **cached_state, 614 struct extent_changeset *changeset) 615 { 616 struct extent_state *state; 617 struct extent_state *cached; 618 struct extent_state *prealloc = NULL; 619 u64 last_end; 620 int ret = 0; 621 bool clear; 622 bool wake; 623 const bool delete = (bits & EXTENT_CLEAR_ALL_BITS); 624 gfp_t mask; 625 626 set_gfp_mask_from_bits(&bits, &mask); 627 btrfs_debug_check_extent_io_range(tree, start, end); 628 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 629 630 if (delete) 631 bits |= ~EXTENT_CTLBITS; 632 633 if (bits & EXTENT_DELALLOC) 634 bits |= EXTENT_NORESERVE; 635 636 wake = (bits & EXTENT_LOCK_BITS); 637 clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 638 again: 639 if (!prealloc) { 640 /* 641 * Don't care for allocation failure here because we might end 642 * up not needing the pre-allocated extent state at all, which 643 * is the case if we only have in the tree extent states that 644 * cover our input range and don't cover too any other range. 645 * If we end up needing a new extent state we allocate it later. 646 */ 647 prealloc = alloc_extent_state(mask); 648 } 649 650 spin_lock(&tree->lock); 651 if (cached_state) { 652 cached = *cached_state; 653 654 if (clear) { 655 *cached_state = NULL; 656 cached_state = NULL; 657 } 658 659 if (cached && extent_state_in_tree(cached) && 660 cached->start <= start && cached->end > start) { 661 if (clear) 662 refcount_dec(&cached->refs); 663 state = cached; 664 goto hit_next; 665 } 666 if (clear) 667 btrfs_free_extent_state(cached); 668 } 669 670 /* This search will find the extents that end after our range starts. */ 671 state = tree_search(tree, start); 672 if (!state) 673 goto out; 674 hit_next: 675 if (state->start > end) 676 goto out; 677 WARN_ON(state->end < start); 678 last_end = state->end; 679 680 /* The state doesn't have the wanted bits, go ahead. */ 681 if (!(state->state & bits)) { 682 state = next_search_state(state, end); 683 goto next; 684 } 685 686 /* 687 * | ---- desired range ---- | 688 * | state | or 689 * | ------------- state -------------- | 690 * 691 * We need to split the extent we found, and may flip bits on second 692 * half. 693 * 694 * If the extent we found extends past our range, we just split and 695 * search again. It'll get split again the next time though. 696 * 697 * If the extent we found is inside our range, we clear the desired bit 698 * on it. 699 */ 700 701 if (state->start < start) { 702 prealloc = alloc_extent_state_atomic(prealloc); 703 if (!prealloc) 704 goto search_again; 705 ret = split_state(tree, state, prealloc, start); 706 prealloc = NULL; 707 if (ret) { 708 extent_io_tree_panic(tree, state, "split", ret); 709 goto out; 710 } 711 if (state->end <= end) { 712 state = clear_state_bit(tree, state, bits, wake, end, 713 changeset); 714 goto next; 715 } 716 if (need_resched()) 717 goto search_again; 718 /* 719 * Fallthrough and try atomic extent state allocation if needed. 720 * If it fails we'll jump to 'search_again' retry the allocation 721 * in non-atomic mode and start the search again. 722 */ 723 } 724 /* 725 * | ---- desired range ---- | 726 * | state | 727 * We need to split the extent, and clear the bit on the first half. 728 */ 729 if (state->start <= end && state->end > end) { 730 prealloc = alloc_extent_state_atomic(prealloc); 731 if (!prealloc) 732 goto search_again; 733 ret = split_state(tree, state, prealloc, end + 1); 734 if (ret) { 735 extent_io_tree_panic(tree, state, "split", ret); 736 prealloc = NULL; 737 goto out; 738 } 739 740 if (wake) 741 wake_up(&state->wq); 742 743 clear_state_bit(tree, prealloc, bits, wake, end, changeset); 744 745 prealloc = NULL; 746 goto out; 747 } 748 749 state = clear_state_bit(tree, state, bits, wake, end, changeset); 750 next: 751 if (last_end >= end) 752 goto out; 753 start = last_end + 1; 754 if (state && !need_resched()) 755 goto hit_next; 756 757 search_again: 758 spin_unlock(&tree->lock); 759 if (gfpflags_allow_blocking(mask)) 760 cond_resched(); 761 goto again; 762 763 out: 764 spin_unlock(&tree->lock); 765 btrfs_free_extent_state(prealloc); 766 767 return ret; 768 769 } 770 771 /* 772 * Wait for one or more bits to clear on a range in the state tree. 773 * The range [start, end] is inclusive. 774 * The tree lock is taken by this function 775 */ 776 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 777 u32 bits, struct extent_state **cached_state) 778 { 779 struct extent_state *state; 780 781 btrfs_debug_check_extent_io_range(tree, start, end); 782 783 spin_lock(&tree->lock); 784 again: 785 /* 786 * Maintain cached_state, as we may not remove it from the tree if there 787 * are more bits than the bits we're waiting on set on this state. 788 */ 789 if (cached_state && *cached_state) { 790 state = *cached_state; 791 if (extent_state_in_tree(state) && 792 state->start <= start && start < state->end) 793 goto process_node; 794 } 795 while (1) { 796 /* 797 * This search will find all the extents that end after our 798 * range starts. 799 */ 800 state = tree_search(tree, start); 801 process_node: 802 if (!state) 803 break; 804 if (state->start > end) 805 goto out; 806 807 if (state->state & bits) { 808 DEFINE_WAIT(wait); 809 810 start = state->start; 811 refcount_inc(&state->refs); 812 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 813 spin_unlock(&tree->lock); 814 schedule(); 815 spin_lock(&tree->lock); 816 finish_wait(&state->wq, &wait); 817 btrfs_free_extent_state(state); 818 goto again; 819 } 820 start = state->end + 1; 821 822 if (start > end) 823 break; 824 825 if (!cond_resched_lock(&tree->lock)) { 826 state = next_state(state); 827 goto process_node; 828 } 829 } 830 out: 831 /* This state is no longer useful, clear it and free it up. */ 832 if (cached_state && *cached_state) { 833 state = *cached_state; 834 *cached_state = NULL; 835 btrfs_free_extent_state(state); 836 } 837 spin_unlock(&tree->lock); 838 } 839 840 static void cache_state_if_flags(struct extent_state *state, 841 struct extent_state **cached_ptr, 842 unsigned flags) 843 { 844 if (cached_ptr && !(*cached_ptr)) { 845 if (!flags || (state->state & flags)) { 846 *cached_ptr = state; 847 refcount_inc(&state->refs); 848 } 849 } 850 } 851 852 static void cache_state(struct extent_state *state, 853 struct extent_state **cached_ptr) 854 { 855 return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); 856 } 857 858 /* 859 * Find the first state struct with 'bits' set after 'start', and return it. 860 * tree->lock must be held. NULL will returned if nothing was found after 861 * 'start'. 862 */ 863 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 864 u64 start, u32 bits) 865 { 866 struct extent_state *state; 867 868 /* 869 * This search will find all the extents that end after our range 870 * starts. 871 */ 872 state = tree_search(tree, start); 873 while (state) { 874 if (state->state & bits) 875 return state; 876 state = next_state(state); 877 } 878 return NULL; 879 } 880 881 /* 882 * Find the first offset in the io tree with one or more @bits set. 883 * 884 * Note: If there are multiple bits set in @bits, any of them will match. 885 * 886 * Return true if we find something, and update @start_ret and @end_ret. 887 * Return false if we found nothing. 888 */ 889 bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start, 890 u64 *start_ret, u64 *end_ret, u32 bits, 891 struct extent_state **cached_state) 892 { 893 struct extent_state *state; 894 bool ret = false; 895 896 spin_lock(&tree->lock); 897 if (cached_state && *cached_state) { 898 state = *cached_state; 899 if (state->end == start - 1 && extent_state_in_tree(state)) { 900 while ((state = next_state(state)) != NULL) { 901 if (state->state & bits) 902 break; 903 } 904 /* 905 * If we found the next extent state, clear cached_state 906 * so that we can cache the next extent state below and 907 * avoid future calls going over the same extent state 908 * again. If we haven't found any, clear as well since 909 * it's now useless. 910 */ 911 btrfs_free_extent_state(*cached_state); 912 *cached_state = NULL; 913 if (state) 914 goto got_it; 915 goto out; 916 } 917 btrfs_free_extent_state(*cached_state); 918 *cached_state = NULL; 919 } 920 921 state = find_first_extent_bit_state(tree, start, bits); 922 got_it: 923 if (state) { 924 cache_state_if_flags(state, cached_state, 0); 925 *start_ret = state->start; 926 *end_ret = state->end; 927 ret = true; 928 } 929 out: 930 spin_unlock(&tree->lock); 931 return ret; 932 } 933 934 /* 935 * Find a contiguous area of bits 936 * 937 * @tree: io tree to check 938 * @start: offset to start the search from 939 * @start_ret: the first offset we found with the bits set 940 * @end_ret: the final contiguous range of the bits that were set 941 * @bits: bits to look for 942 * 943 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 944 * to set bits appropriately, and then merge them again. During this time it 945 * will drop the tree->lock, so use this helper if you want to find the actual 946 * contiguous area for given bits. We will search to the first bit we find, and 947 * then walk down the tree until we find a non-contiguous area. The area 948 * returned will be the full contiguous area with the bits set. 949 * 950 * Returns true if we found a range with the given bits set, in which case 951 * @start_ret and @end_ret are updated, or false if no range was found. 952 */ 953 bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 954 u64 *start_ret, u64 *end_ret, u32 bits) 955 { 956 struct extent_state *state; 957 bool ret = false; 958 959 ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES)); 960 961 spin_lock(&tree->lock); 962 state = find_first_extent_bit_state(tree, start, bits); 963 if (state) { 964 *start_ret = state->start; 965 *end_ret = state->end; 966 while ((state = next_state(state)) != NULL) { 967 if (state->start > (*end_ret + 1)) 968 break; 969 *end_ret = state->end; 970 } 971 ret = true; 972 } 973 spin_unlock(&tree->lock); 974 return ret; 975 } 976 977 /* 978 * Find a contiguous range of bytes in the file marked as delalloc, not more 979 * than 'max_bytes'. start and end are used to return the range, 980 * 981 * True is returned if we find something, false if nothing was in the tree. 982 */ 983 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 984 u64 *end, u64 max_bytes, 985 struct extent_state **cached_state) 986 { 987 struct extent_state *state; 988 u64 cur_start = *start; 989 bool found = false; 990 u64 total_bytes = 0; 991 992 spin_lock(&tree->lock); 993 994 /* 995 * This search will find all the extents that end after our range 996 * starts. 997 */ 998 state = tree_search(tree, cur_start); 999 if (!state) { 1000 *end = (u64)-1; 1001 goto out; 1002 } 1003 1004 while (state) { 1005 if (found && (state->start != cur_start || 1006 (state->state & EXTENT_BOUNDARY))) { 1007 goto out; 1008 } 1009 if (!(state->state & EXTENT_DELALLOC)) { 1010 if (!found) 1011 *end = state->end; 1012 goto out; 1013 } 1014 if (!found) { 1015 *start = state->start; 1016 *cached_state = state; 1017 refcount_inc(&state->refs); 1018 } 1019 found = true; 1020 *end = state->end; 1021 cur_start = state->end + 1; 1022 total_bytes += state->end - state->start + 1; 1023 if (total_bytes >= max_bytes) 1024 break; 1025 state = next_state(state); 1026 } 1027 out: 1028 spin_unlock(&tree->lock); 1029 return found; 1030 } 1031 1032 /* 1033 * Set some bits on a range in the tree. This may require allocations or 1034 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for 1035 * GFP_NOWAIT. 1036 * 1037 * If any of the exclusive bits are set, this will fail with -EEXIST if some 1038 * part of the range already has the desired bits set. The extent_state of the 1039 * existing range is returned in failed_state in this case, and the start of the 1040 * existing range is returned in failed_start. failed_state is used as an 1041 * optimization for wait_extent_bit, failed_start must be used as the source of 1042 * truth as failed_state may have changed since we returned. 1043 * 1044 * [start, end] is inclusive This takes the tree lock. 1045 */ 1046 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1047 u32 bits, u64 *failed_start, 1048 struct extent_state **failed_state, 1049 struct extent_state **cached_state, 1050 struct extent_changeset *changeset) 1051 { 1052 struct extent_state *state; 1053 struct extent_state *prealloc = NULL; 1054 struct rb_node **p = NULL; 1055 struct rb_node *parent = NULL; 1056 int ret = 0; 1057 u64 last_start; 1058 u64 last_end; 1059 u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); 1060 gfp_t mask; 1061 1062 set_gfp_mask_from_bits(&bits, &mask); 1063 btrfs_debug_check_extent_io_range(tree, start, end); 1064 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 1065 1066 if (exclusive_bits) 1067 ASSERT(failed_start); 1068 else 1069 ASSERT(failed_start == NULL && failed_state == NULL); 1070 again: 1071 if (!prealloc) { 1072 /* 1073 * Don't care for allocation failure here because we might end 1074 * up not needing the pre-allocated extent state at all, which 1075 * is the case if we only have in the tree extent states that 1076 * cover our input range and don't cover too any other range. 1077 * If we end up needing a new extent state we allocate it later. 1078 */ 1079 prealloc = alloc_extent_state(mask); 1080 } 1081 /* Optimistically preallocate the extent changeset ulist node. */ 1082 if (changeset) 1083 extent_changeset_prealloc(changeset, mask); 1084 1085 spin_lock(&tree->lock); 1086 if (cached_state && *cached_state) { 1087 state = *cached_state; 1088 if (state->start <= start && state->end > start && 1089 extent_state_in_tree(state)) 1090 goto hit_next; 1091 } 1092 /* 1093 * This search will find all the extents that end after our range 1094 * starts. 1095 */ 1096 state = tree_search_for_insert(tree, start, &p, &parent); 1097 if (!state) { 1098 prealloc = alloc_extent_state_atomic(prealloc); 1099 if (!prealloc) 1100 goto search_again; 1101 prealloc->start = start; 1102 prealloc->end = end; 1103 insert_state_fast(tree, prealloc, p, parent, bits, changeset); 1104 cache_state(prealloc, cached_state); 1105 prealloc = NULL; 1106 goto out; 1107 } 1108 hit_next: 1109 last_start = state->start; 1110 last_end = state->end; 1111 1112 /* 1113 * | ---- desired range ---- | 1114 * | state | 1115 * 1116 * Just lock what we found and keep going 1117 */ 1118 if (state->start == start && state->end <= end) { 1119 if (state->state & exclusive_bits) { 1120 *failed_start = state->start; 1121 cache_state(state, failed_state); 1122 ret = -EEXIST; 1123 goto out; 1124 } 1125 1126 set_state_bits(tree, state, bits, changeset); 1127 cache_state(state, cached_state); 1128 merge_state(tree, state); 1129 if (last_end >= end) 1130 goto out; 1131 start = last_end + 1; 1132 state = next_state(state); 1133 if (state && state->start == start && !need_resched()) 1134 goto hit_next; 1135 goto search_again; 1136 } 1137 1138 /* 1139 * | ---- desired range ---- | 1140 * | state | 1141 * or 1142 * | ------------- state -------------- | 1143 * 1144 * We need to split the extent we found, and may flip bits on second 1145 * half. 1146 * 1147 * If the extent we found extends past our range, we just split and 1148 * search again. It'll get split again the next time though. 1149 * 1150 * If the extent we found is inside our range, we set the desired bit 1151 * on it. 1152 */ 1153 if (state->start < start) { 1154 if (state->state & exclusive_bits) { 1155 *failed_start = start; 1156 cache_state(state, failed_state); 1157 ret = -EEXIST; 1158 goto out; 1159 } 1160 1161 /* 1162 * If this extent already has all the bits we want set, then 1163 * skip it, not necessary to split it or do anything with it. 1164 */ 1165 if ((state->state & bits) == bits) { 1166 start = state->end + 1; 1167 cache_state(state, cached_state); 1168 goto search_again; 1169 } 1170 1171 prealloc = alloc_extent_state_atomic(prealloc); 1172 if (!prealloc) 1173 goto search_again; 1174 ret = split_state(tree, state, prealloc, start); 1175 if (ret) 1176 extent_io_tree_panic(tree, state, "split", ret); 1177 1178 prealloc = NULL; 1179 if (ret) 1180 goto out; 1181 if (state->end <= end) { 1182 set_state_bits(tree, state, bits, changeset); 1183 cache_state(state, cached_state); 1184 merge_state(tree, state); 1185 if (last_end >= end) 1186 goto out; 1187 start = last_end + 1; 1188 state = next_state(state); 1189 if (state && state->start == start && !need_resched()) 1190 goto hit_next; 1191 } 1192 goto search_again; 1193 } 1194 /* 1195 * | ---- desired range ---- | 1196 * | state | or | state | 1197 * 1198 * There's a hole, we need to insert something in it and ignore the 1199 * extent we found. 1200 */ 1201 if (state->start > start) { 1202 struct extent_state *inserted_state; 1203 1204 prealloc = alloc_extent_state_atomic(prealloc); 1205 if (!prealloc) 1206 goto search_again; 1207 1208 /* 1209 * Avoid to free 'prealloc' if it can be merged with the later 1210 * extent. 1211 */ 1212 prealloc->start = start; 1213 if (end < last_start) 1214 prealloc->end = end; 1215 else 1216 prealloc->end = last_start - 1; 1217 1218 inserted_state = insert_state(tree, prealloc, bits, changeset); 1219 if (IS_ERR(inserted_state)) { 1220 ret = PTR_ERR(inserted_state); 1221 extent_io_tree_panic(tree, prealloc, "insert", ret); 1222 goto out; 1223 } 1224 1225 cache_state(inserted_state, cached_state); 1226 if (inserted_state == prealloc) 1227 prealloc = NULL; 1228 start = inserted_state->end + 1; 1229 1230 /* Beyond target range, stop. */ 1231 if (start > end) 1232 goto out; 1233 1234 if (need_resched()) 1235 goto search_again; 1236 1237 state = next_search_state(inserted_state, end); 1238 /* 1239 * If there's a next state, whether contiguous or not, we don't 1240 * need to unlock and start search agian. If it's not contiguous 1241 * we will end up here and try to allocate a prealloc state and insert. 1242 */ 1243 if (state) 1244 goto hit_next; 1245 goto search_again; 1246 } 1247 /* 1248 * | ---- desired range ---- | 1249 * | state | 1250 * 1251 * We need to split the extent, and set the bit on the first half 1252 */ 1253 if (state->start <= end && state->end > end) { 1254 if (state->state & exclusive_bits) { 1255 *failed_start = start; 1256 cache_state(state, failed_state); 1257 ret = -EEXIST; 1258 goto out; 1259 } 1260 1261 prealloc = alloc_extent_state_atomic(prealloc); 1262 if (!prealloc) 1263 goto search_again; 1264 ret = split_state(tree, state, prealloc, end + 1); 1265 if (ret) { 1266 extent_io_tree_panic(tree, state, "split", ret); 1267 prealloc = NULL; 1268 goto out; 1269 } 1270 1271 set_state_bits(tree, prealloc, bits, changeset); 1272 cache_state(prealloc, cached_state); 1273 merge_state(tree, prealloc); 1274 prealloc = NULL; 1275 goto out; 1276 } 1277 1278 search_again: 1279 if (start > end) 1280 goto out; 1281 spin_unlock(&tree->lock); 1282 if (gfpflags_allow_blocking(mask)) 1283 cond_resched(); 1284 goto again; 1285 1286 out: 1287 spin_unlock(&tree->lock); 1288 btrfs_free_extent_state(prealloc); 1289 1290 return ret; 1291 1292 } 1293 1294 int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1295 u32 bits, struct extent_state **cached_state) 1296 { 1297 return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL); 1298 } 1299 1300 /* 1301 * Convert all bits in a given range from one bit to another 1302 * 1303 * @tree: the io tree to search 1304 * @start: the start offset in bytes 1305 * @end: the end offset in bytes (inclusive) 1306 * @bits: the bits to set in this range 1307 * @clear_bits: the bits to clear in this range 1308 * @cached_state: state that we're going to cache 1309 * 1310 * This will go through and set bits for the given range. If any states exist 1311 * already in this range they are set with the given bit and cleared of the 1312 * clear_bits. This is only meant to be used by things that are mergeable, ie. 1313 * converting from say DELALLOC to DIRTY. This is not meant to be used with 1314 * boundary bits like LOCK. 1315 * 1316 * All allocations are done with GFP_NOFS. 1317 */ 1318 int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1319 u32 bits, u32 clear_bits, 1320 struct extent_state **cached_state) 1321 { 1322 struct extent_state *state; 1323 struct extent_state *prealloc = NULL; 1324 struct rb_node **p = NULL; 1325 struct rb_node *parent = NULL; 1326 int ret = 0; 1327 u64 last_start; 1328 u64 last_end; 1329 bool first_iteration = true; 1330 1331 btrfs_debug_check_extent_io_range(tree, start, end); 1332 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 1333 clear_bits); 1334 1335 again: 1336 if (!prealloc) { 1337 /* 1338 * Best effort, don't worry if extent state allocation fails 1339 * here for the first iteration. We might have a cached state 1340 * that matches exactly the target range, in which case no 1341 * extent state allocations are needed. We'll only know this 1342 * after locking the tree. 1343 */ 1344 prealloc = alloc_extent_state(GFP_NOFS); 1345 if (!prealloc && !first_iteration) 1346 return -ENOMEM; 1347 } 1348 1349 spin_lock(&tree->lock); 1350 if (cached_state && *cached_state) { 1351 state = *cached_state; 1352 if (state->start <= start && state->end > start && 1353 extent_state_in_tree(state)) 1354 goto hit_next; 1355 } 1356 1357 /* 1358 * This search will find all the extents that end after our range 1359 * starts. 1360 */ 1361 state = tree_search_for_insert(tree, start, &p, &parent); 1362 if (!state) { 1363 prealloc = alloc_extent_state_atomic(prealloc); 1364 if (!prealloc) { 1365 ret = -ENOMEM; 1366 goto out; 1367 } 1368 prealloc->start = start; 1369 prealloc->end = end; 1370 insert_state_fast(tree, prealloc, p, parent, bits, NULL); 1371 cache_state(prealloc, cached_state); 1372 prealloc = NULL; 1373 goto out; 1374 } 1375 hit_next: 1376 last_start = state->start; 1377 last_end = state->end; 1378 1379 /* 1380 * | ---- desired range ---- | 1381 * | state | 1382 * 1383 * Just lock what we found and keep going. 1384 */ 1385 if (state->start == start && state->end <= end) { 1386 set_state_bits(tree, state, bits, NULL); 1387 cache_state(state, cached_state); 1388 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1389 if (last_end >= end) 1390 goto out; 1391 start = last_end + 1; 1392 if (state && state->start == start && !need_resched()) 1393 goto hit_next; 1394 goto search_again; 1395 } 1396 1397 /* 1398 * | ---- desired range ---- | 1399 * | state | 1400 * or 1401 * | ------------- state -------------- | 1402 * 1403 * We need to split the extent we found, and may flip bits on second 1404 * half. 1405 * 1406 * If the extent we found extends past our range, we just split and 1407 * search again. It'll get split again the next time though. 1408 * 1409 * If the extent we found is inside our range, we set the desired bit 1410 * on it. 1411 */ 1412 if (state->start < start) { 1413 prealloc = alloc_extent_state_atomic(prealloc); 1414 if (!prealloc) { 1415 ret = -ENOMEM; 1416 goto out; 1417 } 1418 ret = split_state(tree, state, prealloc, start); 1419 prealloc = NULL; 1420 if (ret) { 1421 extent_io_tree_panic(tree, state, "split", ret); 1422 goto out; 1423 } 1424 if (state->end <= end) { 1425 set_state_bits(tree, state, bits, NULL); 1426 cache_state(state, cached_state); 1427 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL); 1428 if (last_end >= end) 1429 goto out; 1430 start = last_end + 1; 1431 if (state && state->start == start && !need_resched()) 1432 goto hit_next; 1433 } 1434 goto search_again; 1435 } 1436 /* 1437 * | ---- desired range ---- | 1438 * | state | or | state | 1439 * 1440 * There's a hole, we need to insert something in it and ignore the 1441 * extent we found. 1442 */ 1443 if (state->start > start) { 1444 struct extent_state *inserted_state; 1445 1446 prealloc = alloc_extent_state_atomic(prealloc); 1447 if (!prealloc) { 1448 ret = -ENOMEM; 1449 goto out; 1450 } 1451 1452 /* 1453 * Avoid to free 'prealloc' if it can be merged with the later 1454 * extent. 1455 */ 1456 prealloc->start = start; 1457 if (end < last_start) 1458 prealloc->end = end; 1459 else 1460 prealloc->end = last_start - 1; 1461 1462 inserted_state = insert_state(tree, prealloc, bits, NULL); 1463 if (IS_ERR(inserted_state)) { 1464 ret = PTR_ERR(inserted_state); 1465 extent_io_tree_panic(tree, prealloc, "insert", ret); 1466 goto out; 1467 } 1468 cache_state(inserted_state, cached_state); 1469 if (inserted_state == prealloc) 1470 prealloc = NULL; 1471 start = inserted_state->end + 1; 1472 1473 /* Beyond target range, stop. */ 1474 if (start > end) 1475 goto out; 1476 1477 if (need_resched()) 1478 goto search_again; 1479 1480 state = next_search_state(inserted_state, end); 1481 /* 1482 * If there's a next state, whether contiguous or not, we don't 1483 * need to unlock and start search again. If it's not contiguous 1484 * we will end up here and try to allocate a prealloc state and insert. 1485 */ 1486 if (state) 1487 goto hit_next; 1488 goto search_again; 1489 } 1490 /* 1491 * | ---- desired range ---- | 1492 * | state | 1493 * 1494 * We need to split the extent, and set the bit on the first half. 1495 */ 1496 if (state->start <= end && state->end > end) { 1497 prealloc = alloc_extent_state_atomic(prealloc); 1498 if (!prealloc) { 1499 ret = -ENOMEM; 1500 goto out; 1501 } 1502 1503 ret = split_state(tree, state, prealloc, end + 1); 1504 if (ret) { 1505 extent_io_tree_panic(tree, state, "split", ret); 1506 prealloc = NULL; 1507 goto out; 1508 } 1509 1510 set_state_bits(tree, prealloc, bits, NULL); 1511 cache_state(prealloc, cached_state); 1512 clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL); 1513 prealloc = NULL; 1514 goto out; 1515 } 1516 1517 search_again: 1518 if (start > end) 1519 goto out; 1520 spin_unlock(&tree->lock); 1521 cond_resched(); 1522 first_iteration = false; 1523 goto again; 1524 1525 out: 1526 spin_unlock(&tree->lock); 1527 btrfs_free_extent_state(prealloc); 1528 1529 return ret; 1530 } 1531 1532 /* 1533 * Find the first range that has @bits not set. This range could start before 1534 * @start. 1535 * 1536 * @tree: the tree to search 1537 * @start: offset at/after which the found extent should start 1538 * @start_ret: records the beginning of the range 1539 * @end_ret: records the end of the range (inclusive) 1540 * @bits: the set of bits which must be unset 1541 * 1542 * Since unallocated range is also considered one which doesn't have the bits 1543 * set it's possible that @end_ret contains -1, this happens in case the range 1544 * spans (last_range_end, end of device]. In this case it's up to the caller to 1545 * trim @end_ret to the appropriate size. 1546 */ 1547 void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 1548 u64 *start_ret, u64 *end_ret, u32 bits) 1549 { 1550 struct extent_state *state; 1551 struct extent_state *prev = NULL, *next = NULL; 1552 1553 spin_lock(&tree->lock); 1554 1555 /* Find first extent with bits cleared */ 1556 while (1) { 1557 state = tree_search_prev_next(tree, start, &prev, &next); 1558 if (!state && !next && !prev) { 1559 /* 1560 * Tree is completely empty, send full range and let 1561 * caller deal with it 1562 */ 1563 *start_ret = 0; 1564 *end_ret = -1; 1565 goto out; 1566 } else if (!state && !next) { 1567 /* 1568 * We are past the last allocated chunk, set start at 1569 * the end of the last extent. 1570 */ 1571 *start_ret = prev->end + 1; 1572 *end_ret = -1; 1573 goto out; 1574 } else if (!state) { 1575 state = next; 1576 } 1577 1578 /* 1579 * At this point 'state' either contains 'start' or start is 1580 * before 'state' 1581 */ 1582 if (in_range(start, state->start, state->end - state->start + 1)) { 1583 if (state->state & bits) { 1584 /* 1585 * |--range with bits sets--| 1586 * | 1587 * start 1588 */ 1589 start = state->end + 1; 1590 } else { 1591 /* 1592 * 'start' falls within a range that doesn't 1593 * have the bits set, so take its start as the 1594 * beginning of the desired range 1595 * 1596 * |--range with bits cleared----| 1597 * | 1598 * start 1599 */ 1600 *start_ret = state->start; 1601 break; 1602 } 1603 } else { 1604 /* 1605 * |---prev range---|---hole/unset---|---node range---| 1606 * | 1607 * start 1608 * 1609 * or 1610 * 1611 * |---hole/unset--||--first node--| 1612 * 0 | 1613 * start 1614 */ 1615 if (prev) 1616 *start_ret = prev->end + 1; 1617 else 1618 *start_ret = 0; 1619 break; 1620 } 1621 } 1622 1623 /* 1624 * Find the longest stretch from start until an entry which has the 1625 * bits set 1626 */ 1627 while (state) { 1628 if (state->end >= start && !(state->state & bits)) { 1629 *end_ret = state->end; 1630 } else { 1631 *end_ret = state->start - 1; 1632 break; 1633 } 1634 state = next_state(state); 1635 } 1636 out: 1637 spin_unlock(&tree->lock); 1638 } 1639 1640 /* 1641 * Count the number of bytes in the tree that have a given bit(s) set for a 1642 * given range. 1643 * 1644 * @tree: The io tree to search. 1645 * @start: The start offset of the range. This value is updated to the 1646 * offset of the first byte found with the given bit(s), so it 1647 * can end up being bigger than the initial value. 1648 * @search_end: The end offset (inclusive value) of the search range. 1649 * @max_bytes: The maximum byte count we are interested. The search stops 1650 * once it reaches this count. 1651 * @bits: The bits the range must have in order to be accounted for. 1652 * If multiple bits are set, then only subranges that have all 1653 * the bits set are accounted for. 1654 * @contig: Indicate if we should ignore holes in the range or not. If 1655 * this is true, then stop once we find a hole. 1656 * @cached_state: A cached state to be used across multiple calls to this 1657 * function in order to speedup searches. Use NULL if this is 1658 * called only once or if each call does not start where the 1659 * previous one ended. 1660 * 1661 * Returns the total number of bytes found within the given range that have 1662 * all given bits set. If the returned number of bytes is greater than zero 1663 * then @start is updated with the offset of the first byte with the bits set. 1664 */ 1665 u64 btrfs_count_range_bits(struct extent_io_tree *tree, 1666 u64 *start, u64 search_end, u64 max_bytes, 1667 u32 bits, int contig, 1668 struct extent_state **cached_state) 1669 { 1670 struct extent_state *state = NULL; 1671 struct extent_state *cached; 1672 u64 cur_start = *start; 1673 u64 total_bytes = 0; 1674 u64 last = 0; 1675 int found = 0; 1676 1677 if (WARN_ON(search_end < cur_start)) 1678 return 0; 1679 1680 spin_lock(&tree->lock); 1681 1682 if (!cached_state || !*cached_state) 1683 goto search; 1684 1685 cached = *cached_state; 1686 1687 if (!extent_state_in_tree(cached)) 1688 goto search; 1689 1690 if (cached->start <= cur_start && cur_start <= cached->end) { 1691 state = cached; 1692 } else if (cached->start > cur_start) { 1693 struct extent_state *prev; 1694 1695 /* 1696 * The cached state starts after our search range's start. Check 1697 * if the previous state record starts at or before the range we 1698 * are looking for, and if so, use it - this is a common case 1699 * when there are holes between records in the tree. If there is 1700 * no previous state record, we can start from our cached state. 1701 */ 1702 prev = prev_state(cached); 1703 if (!prev) 1704 state = cached; 1705 else if (prev->start <= cur_start && cur_start <= prev->end) 1706 state = prev; 1707 } 1708 1709 /* 1710 * This search will find all the extents that end after our range 1711 * starts. 1712 */ 1713 search: 1714 if (!state) 1715 state = tree_search(tree, cur_start); 1716 1717 while (state) { 1718 if (state->start > search_end) 1719 break; 1720 if (contig && found && state->start > last + 1) 1721 break; 1722 if (state->end >= cur_start && (state->state & bits) == bits) { 1723 total_bytes += min(search_end, state->end) + 1 - 1724 max(cur_start, state->start); 1725 if (total_bytes >= max_bytes) 1726 break; 1727 if (!found) { 1728 *start = max(cur_start, state->start); 1729 found = 1; 1730 } 1731 last = state->end; 1732 } else if (contig && found) { 1733 break; 1734 } 1735 state = next_state(state); 1736 } 1737 1738 if (cached_state) { 1739 btrfs_free_extent_state(*cached_state); 1740 *cached_state = state; 1741 if (state) 1742 refcount_inc(&state->refs); 1743 } 1744 1745 spin_unlock(&tree->lock); 1746 1747 return total_bytes; 1748 } 1749 1750 /* 1751 * Check if the single @bit exists in the given range. 1752 */ 1753 bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) 1754 { 1755 struct extent_state *state; 1756 bool bitset = false; 1757 1758 ASSERT(is_power_of_2(bit)); 1759 1760 spin_lock(&tree->lock); 1761 state = tree_search(tree, start); 1762 while (state) { 1763 if (state->start > end) 1764 break; 1765 1766 if (state->state & bit) { 1767 bitset = true; 1768 break; 1769 } 1770 1771 if (state->end >= end) 1772 break; 1773 state = next_state(state); 1774 } 1775 spin_unlock(&tree->lock); 1776 return bitset; 1777 } 1778 1779 void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits, 1780 struct extent_state **cached_state) 1781 { 1782 struct extent_state *state; 1783 1784 /* 1785 * The cached state is currently mandatory and not used to start the 1786 * search, only to cache the first state record found in the range. 1787 */ 1788 ASSERT(cached_state != NULL); 1789 ASSERT(*cached_state == NULL); 1790 1791 *bits = 0; 1792 1793 spin_lock(&tree->lock); 1794 state = tree_search(tree, start); 1795 if (state && state->start < end) { 1796 *cached_state = state; 1797 refcount_inc(&state->refs); 1798 } 1799 while (state) { 1800 if (state->start > end) 1801 break; 1802 1803 *bits |= state->state; 1804 1805 if (state->end >= end) 1806 break; 1807 1808 state = next_state(state); 1809 } 1810 spin_unlock(&tree->lock); 1811 } 1812 1813 /* 1814 * Check if the whole range [@start,@end) contains the single @bit set. 1815 */ 1816 bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, 1817 struct extent_state *cached) 1818 { 1819 struct extent_state *state; 1820 bool bitset = true; 1821 1822 ASSERT(is_power_of_2(bit)); 1823 ASSERT(start < end); 1824 1825 spin_lock(&tree->lock); 1826 if (cached && extent_state_in_tree(cached) && cached->start <= start && 1827 cached->end > start) 1828 state = cached; 1829 else 1830 state = tree_search(tree, start); 1831 while (state) { 1832 if (state->start > start) { 1833 bitset = false; 1834 break; 1835 } 1836 1837 if ((state->state & bit) == 0) { 1838 bitset = false; 1839 break; 1840 } 1841 1842 if (state->end >= end) 1843 break; 1844 1845 /* Next state must start where this one ends. */ 1846 start = state->end + 1; 1847 state = next_state(state); 1848 } 1849 1850 /* We ran out of states and were still inside of our range. */ 1851 if (!state) 1852 bitset = false; 1853 spin_unlock(&tree->lock); 1854 return bitset; 1855 } 1856 1857 /* Wrappers around set/clear extent bit */ 1858 int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1859 u32 bits, struct extent_changeset *changeset) 1860 { 1861 /* 1862 * We don't support EXTENT_LOCK_BITS yet, as current changeset will 1863 * record any bits changed, so for EXTENT_LOCK_BITS case, it will either 1864 * fail with -EEXIST or changeset will record the whole range. 1865 */ 1866 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1867 1868 return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1869 } 1870 1871 int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1872 u32 bits, struct extent_changeset *changeset) 1873 { 1874 /* 1875 * Don't support EXTENT_LOCK_BITS case, same reason as 1876 * set_record_extent_bits(). 1877 */ 1878 ASSERT(!(bits & EXTENT_LOCK_BITS)); 1879 1880 return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset); 1881 } 1882 1883 bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1884 u32 bits, struct extent_state **cached) 1885 { 1886 int ret; 1887 u64 failed_start; 1888 1889 ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL); 1890 if (ret == -EEXIST) { 1891 if (failed_start > start) 1892 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1893 bits, cached); 1894 return 0; 1895 } 1896 return 1; 1897 } 1898 1899 /* 1900 * Either insert or lock state struct between start and end use mask to tell 1901 * us if waiting is desired. 1902 */ 1903 int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1904 struct extent_state **cached_state) 1905 { 1906 struct extent_state *failed_state = NULL; 1907 int ret; 1908 u64 failed_start; 1909 1910 ret = set_extent_bit(tree, start, end, bits, &failed_start, 1911 &failed_state, cached_state, NULL); 1912 while (ret == -EEXIST) { 1913 if (failed_start != start) 1914 btrfs_clear_extent_bit(tree, start, failed_start - 1, 1915 bits, cached_state); 1916 1917 wait_extent_bit(tree, failed_start, end, bits, &failed_state); 1918 ret = set_extent_bit(tree, start, end, bits, &failed_start, 1919 &failed_state, cached_state, NULL); 1920 } 1921 return ret; 1922 } 1923 1924 /* 1925 * Get the extent state that follows the given extent state. 1926 * This is meant to be used in a context where we know no other tasks can 1927 * concurrently modify the tree. 1928 */ 1929 struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree, 1930 struct extent_state *state) 1931 { 1932 struct extent_state *next; 1933 1934 spin_lock(&tree->lock); 1935 ASSERT(extent_state_in_tree(state)); 1936 next = next_state(state); 1937 if (next) 1938 refcount_inc(&next->refs); 1939 spin_unlock(&tree->lock); 1940 1941 return next; 1942 } 1943 1944 void __cold btrfs_extent_state_free_cachep(void) 1945 { 1946 btrfs_extent_state_leak_debug_check(); 1947 kmem_cache_destroy(extent_state_cache); 1948 } 1949 1950 int __init btrfs_extent_state_init_cachep(void) 1951 { 1952 extent_state_cache = kmem_cache_create("btrfs_extent_state", 1953 sizeof(struct extent_state), 0, 0, 1954 NULL); 1955 if (!extent_state_cache) 1956 return -ENOMEM; 1957 1958 return 0; 1959 } 1960