1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/slab.h> 7 #include <linux/blkdev.h> 8 #include <linux/writeback.h> 9 #include <linux/sched/mm.h> 10 #include "messages.h" 11 #include "misc.h" 12 #include "ctree.h" 13 #include "transaction.h" 14 #include "btrfs_inode.h" 15 #include "extent_io.h" 16 #include "disk-io.h" 17 #include "compression.h" 18 #include "delalloc-space.h" 19 #include "qgroup.h" 20 #include "subpage.h" 21 #include "file.h" 22 #include "block-group.h" 23 24 static struct kmem_cache *btrfs_ordered_extent_cache; 25 26 static u64 entry_end(struct btrfs_ordered_extent *entry) 27 { 28 if (entry->file_offset + entry->num_bytes < entry->file_offset) 29 return (u64)-1; 30 return entry->file_offset + entry->num_bytes; 31 } 32 33 /* returns NULL if the insertion worked, or it returns the node it did find 34 * in the tree 35 */ 36 static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, 37 struct rb_node *node) 38 { 39 struct rb_node **p = &root->rb_node; 40 struct rb_node *parent = NULL; 41 struct btrfs_ordered_extent *entry; 42 43 while (*p) { 44 parent = *p; 45 entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); 46 47 if (file_offset < entry->file_offset) 48 p = &(*p)->rb_left; 49 else if (file_offset >= entry_end(entry)) 50 p = &(*p)->rb_right; 51 else 52 return parent; 53 } 54 55 rb_link_node(node, parent, p); 56 rb_insert_color(node, root); 57 return NULL; 58 } 59 60 /* 61 * look for a given offset in the tree, and if it can't be found return the 62 * first lesser offset 63 */ 64 static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, 65 struct rb_node **prev_ret) 66 { 67 struct rb_node *n = root->rb_node; 68 struct rb_node *prev = NULL; 69 struct rb_node *test; 70 struct btrfs_ordered_extent *entry; 71 struct btrfs_ordered_extent *prev_entry = NULL; 72 73 while (n) { 74 entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); 75 prev = n; 76 prev_entry = entry; 77 78 if (file_offset < entry->file_offset) 79 n = n->rb_left; 80 else if (file_offset >= entry_end(entry)) 81 n = n->rb_right; 82 else 83 return n; 84 } 85 if (!prev_ret) 86 return NULL; 87 88 while (prev && file_offset >= entry_end(prev_entry)) { 89 test = rb_next(prev); 90 if (!test) 91 break; 92 prev_entry = rb_entry(test, struct btrfs_ordered_extent, 93 rb_node); 94 if (file_offset < entry_end(prev_entry)) 95 break; 96 97 prev = test; 98 } 99 if (prev) 100 prev_entry = rb_entry(prev, struct btrfs_ordered_extent, 101 rb_node); 102 while (prev && file_offset < entry_end(prev_entry)) { 103 test = rb_prev(prev); 104 if (!test) 105 break; 106 prev_entry = rb_entry(test, struct btrfs_ordered_extent, 107 rb_node); 108 prev = test; 109 } 110 *prev_ret = prev; 111 return NULL; 112 } 113 114 static int btrfs_range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, 115 u64 len) 116 { 117 if (file_offset + len <= entry->file_offset || 118 entry->file_offset + entry->num_bytes <= file_offset) 119 return 0; 120 return 1; 121 } 122 123 /* 124 * look find the first ordered struct that has this offset, otherwise 125 * the first one less than this offset 126 */ 127 static inline struct rb_node *ordered_tree_search(struct btrfs_inode *inode, 128 u64 file_offset) 129 { 130 struct rb_node *prev = NULL; 131 struct rb_node *ret; 132 struct btrfs_ordered_extent *entry; 133 134 if (inode->ordered_tree_last) { 135 entry = rb_entry(inode->ordered_tree_last, struct btrfs_ordered_extent, 136 rb_node); 137 if (in_range(file_offset, entry->file_offset, entry->num_bytes)) 138 return inode->ordered_tree_last; 139 } 140 ret = __tree_search(&inode->ordered_tree, file_offset, &prev); 141 if (!ret) 142 ret = prev; 143 if (ret) 144 inode->ordered_tree_last = ret; 145 return ret; 146 } 147 148 static struct btrfs_ordered_extent *alloc_ordered_extent( 149 struct btrfs_inode *inode, u64 file_offset, u64 num_bytes, 150 u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, 151 u64 offset, unsigned long flags, int compress_type) 152 { 153 struct btrfs_ordered_extent *entry; 154 int ret; 155 u64 qgroup_rsv = 0; 156 const bool is_nocow = (flags & 157 ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC))); 158 159 /* 160 * For a NOCOW write we can free the qgroup reserve right now. For a COW 161 * one we transfer the reserved space from the inode's iotree into the 162 * ordered extent by calling btrfs_qgroup_release_data() and tracking 163 * the qgroup reserved amount in the ordered extent, so that later after 164 * completing the ordered extent, when running the data delayed ref it 165 * creates, we free the reserved data with btrfs_qgroup_free_refroot(). 166 */ 167 if (is_nocow) 168 ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); 169 else 170 ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); 171 172 if (ret < 0) 173 return ERR_PTR(ret); 174 175 entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); 176 if (!entry) { 177 entry = ERR_PTR(-ENOMEM); 178 goto out; 179 } 180 181 entry->file_offset = file_offset; 182 entry->num_bytes = num_bytes; 183 entry->ram_bytes = ram_bytes; 184 entry->disk_bytenr = disk_bytenr; 185 entry->disk_num_bytes = disk_num_bytes; 186 entry->offset = offset; 187 entry->bytes_left = num_bytes; 188 if (WARN_ON_ONCE(!igrab(&inode->vfs_inode))) { 189 kmem_cache_free(btrfs_ordered_extent_cache, entry); 190 entry = ERR_PTR(-ESTALE); 191 goto out; 192 } 193 entry->inode = inode; 194 entry->compress_type = compress_type; 195 entry->truncated_len = (u64)-1; 196 entry->qgroup_rsv = qgroup_rsv; 197 entry->flags = flags; 198 refcount_set(&entry->refs, 1); 199 init_waitqueue_head(&entry->wait); 200 INIT_LIST_HEAD(&entry->list); 201 INIT_LIST_HEAD(&entry->log_list); 202 INIT_LIST_HEAD(&entry->root_extent_list); 203 INIT_LIST_HEAD(&entry->work_list); 204 INIT_LIST_HEAD(&entry->bioc_list); 205 init_completion(&entry->completion); 206 207 /* 208 * We don't need the count_max_extents here, we can assume that all of 209 * that work has been done at higher layers, so this is truly the 210 * smallest the extent is going to get. 211 */ 212 spin_lock(&inode->lock); 213 btrfs_mod_outstanding_extents(inode, 1); 214 spin_unlock(&inode->lock); 215 216 out: 217 if (IS_ERR(entry) && !is_nocow) 218 btrfs_qgroup_free_refroot(inode->root->fs_info, 219 btrfs_root_id(inode->root), 220 qgroup_rsv, BTRFS_QGROUP_RSV_DATA); 221 222 return entry; 223 } 224 225 static void insert_ordered_extent(struct btrfs_ordered_extent *entry) 226 { 227 struct btrfs_inode *inode = entry->inode; 228 struct btrfs_root *root = inode->root; 229 struct btrfs_fs_info *fs_info = root->fs_info; 230 struct rb_node *node; 231 232 trace_btrfs_ordered_extent_add(inode, entry); 233 234 percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes, 235 fs_info->delalloc_batch); 236 237 /* One ref for the tree. */ 238 refcount_inc(&entry->refs); 239 240 spin_lock(&inode->ordered_tree_lock); 241 node = tree_insert(&inode->ordered_tree, entry->file_offset, 242 &entry->rb_node); 243 if (unlikely(node)) 244 btrfs_panic(fs_info, -EEXIST, 245 "inconsistency in ordered tree at offset %llu", 246 entry->file_offset); 247 spin_unlock(&inode->ordered_tree_lock); 248 249 spin_lock(&root->ordered_extent_lock); 250 list_add_tail(&entry->root_extent_list, 251 &root->ordered_extents); 252 root->nr_ordered_extents++; 253 if (root->nr_ordered_extents == 1) { 254 spin_lock(&fs_info->ordered_root_lock); 255 BUG_ON(!list_empty(&root->ordered_root)); 256 list_add_tail(&root->ordered_root, &fs_info->ordered_roots); 257 spin_unlock(&fs_info->ordered_root_lock); 258 } 259 spin_unlock(&root->ordered_extent_lock); 260 } 261 262 /* 263 * Add an ordered extent to the per-inode tree. 264 * 265 * @inode: Inode that this extent is for. 266 * @file_offset: Logical offset in file where the extent starts. 267 * @num_bytes: Logical length of extent in file. 268 * @ram_bytes: Full length of unencoded data. 269 * @disk_bytenr: Offset of extent on disk. 270 * @disk_num_bytes: Size of extent on disk. 271 * @offset: Offset into unencoded data where file data starts. 272 * @flags: Flags specifying type of extent (1U << BTRFS_ORDERED_*). 273 * @compress_type: Compression algorithm used for data. 274 * 275 * Most of these parameters correspond to &struct btrfs_file_extent_item. The 276 * tree is given a single reference on the ordered extent that was inserted, and 277 * the returned pointer is given a second reference. 278 * 279 * Return: the new ordered extent or error pointer. 280 */ 281 struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( 282 struct btrfs_inode *inode, u64 file_offset, 283 const struct btrfs_file_extent *file_extent, unsigned long flags) 284 { 285 struct btrfs_ordered_extent *entry; 286 287 ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); 288 289 /* 290 * For regular writes, we just use the members in @file_extent. 291 * 292 * For NOCOW, we don't really care about the numbers except @start and 293 * file_extent->num_bytes, as we won't insert a file extent item at all. 294 * 295 * For PREALLOC, we do not use ordered extent members, but 296 * btrfs_mark_extent_written() handles everything. 297 * 298 * So here we always pass 0 as offset for NOCOW/PREALLOC ordered extents, 299 * or btrfs_split_ordered_extent() cannot handle it correctly. 300 */ 301 if (flags & ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC))) 302 entry = alloc_ordered_extent(inode, file_offset, 303 file_extent->num_bytes, 304 file_extent->num_bytes, 305 file_extent->disk_bytenr + file_extent->offset, 306 file_extent->num_bytes, 0, flags, 307 file_extent->compression); 308 else 309 entry = alloc_ordered_extent(inode, file_offset, 310 file_extent->num_bytes, 311 file_extent->ram_bytes, 312 file_extent->disk_bytenr, 313 file_extent->disk_num_bytes, 314 file_extent->offset, flags, 315 file_extent->compression); 316 if (!IS_ERR(entry)) 317 insert_ordered_extent(entry); 318 return entry; 319 } 320 321 /* 322 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted 323 * when an ordered extent is finished. If the list covers more than one 324 * ordered extent, it is split across multiples. 325 */ 326 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, 327 struct btrfs_ordered_sum *sum) 328 { 329 struct btrfs_inode *inode = entry->inode; 330 331 spin_lock(&inode->ordered_tree_lock); 332 list_add_tail(&sum->list, &entry->list); 333 spin_unlock(&inode->ordered_tree_lock); 334 } 335 336 void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered) 337 { 338 if (!test_and_set_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) 339 mapping_set_error(ordered->inode->vfs_inode.i_mapping, -EIO); 340 } 341 342 static void finish_ordered_fn(struct btrfs_work *work) 343 { 344 struct btrfs_ordered_extent *ordered_extent; 345 346 ordered_extent = container_of(work, struct btrfs_ordered_extent, work); 347 btrfs_finish_ordered_io(ordered_extent); 348 } 349 350 static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, 351 struct folio *folio, u64 file_offset, 352 u64 len, bool uptodate) 353 { 354 struct btrfs_inode *inode = ordered->inode; 355 struct btrfs_fs_info *fs_info = inode->root->fs_info; 356 357 lockdep_assert_held(&inode->ordered_tree_lock); 358 359 if (folio) { 360 ASSERT(folio->mapping); 361 ASSERT(folio_pos(folio) <= file_offset); 362 ASSERT(file_offset + len <= folio_next_pos(folio)); 363 364 /* 365 * Ordered flag indicates whether we still have 366 * pending io unfinished for the ordered extent. 367 * 368 * If it's not set, we need to skip to next range. 369 */ 370 if (!btrfs_folio_test_ordered(fs_info, folio, file_offset, len)) 371 return false; 372 btrfs_folio_clear_ordered(fs_info, folio, file_offset, len); 373 } 374 375 /* Now we're fine to update the accounting. */ 376 if (WARN_ON_ONCE(len > ordered->bytes_left)) { 377 btrfs_crit(fs_info, 378 "bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu", 379 btrfs_root_id(inode->root), btrfs_ino(inode), 380 ordered->file_offset, ordered->num_bytes, 381 len, ordered->bytes_left); 382 ordered->bytes_left = 0; 383 } else { 384 ordered->bytes_left -= len; 385 } 386 387 if (!uptodate) 388 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); 389 390 if (ordered->bytes_left) 391 return false; 392 393 /* 394 * All the IO of the ordered extent is finished, we need to queue 395 * the finish_func to be executed. 396 */ 397 set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags); 398 cond_wake_up(&ordered->wait); 399 refcount_inc(&ordered->refs); 400 trace_btrfs_ordered_extent_mark_finished(inode, ordered); 401 return true; 402 } 403 404 static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) 405 { 406 struct btrfs_inode *inode = ordered->inode; 407 struct btrfs_fs_info *fs_info = inode->root->fs_info; 408 struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ? 409 fs_info->endio_freespace_worker : fs_info->endio_write_workers; 410 411 btrfs_init_work(&ordered->work, finish_ordered_fn, NULL); 412 btrfs_queue_work(wq, &ordered->work); 413 } 414 415 void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, 416 struct folio *folio, u64 file_offset, u64 len, 417 bool uptodate) 418 { 419 struct btrfs_inode *inode = ordered->inode; 420 bool ret; 421 422 trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); 423 424 spin_lock(&inode->ordered_tree_lock); 425 ret = can_finish_ordered_extent(ordered, folio, file_offset, len, 426 uptodate); 427 spin_unlock(&inode->ordered_tree_lock); 428 429 /* 430 * If this is a COW write it means we created new extent maps for the 431 * range and they point to unwritten locations if we got an error either 432 * before submitting a bio or during IO. 433 * 434 * We have marked the ordered extent with BTRFS_ORDERED_IOERR, and we 435 * are queuing its completion below. During completion, at 436 * btrfs_finish_one_ordered(), we will drop the extent maps for the 437 * unwritten extents. 438 * 439 * However because completion runs in a work queue we can end up having 440 * a fast fsync running before that. In the case of direct IO, once we 441 * unlock the inode the fsync might start, and we queue the completion 442 * before unlocking the inode. In the case of buffered IO when writeback 443 * finishes (end_bbio_data_write()) we queue the completion, so if the 444 * writeback was triggered by a fast fsync, the fsync might start 445 * logging before ordered extent completion runs in the work queue. 446 * 447 * The fast fsync will log file extent items based on the extent maps it 448 * finds, so if by the time it collects extent maps the ordered extent 449 * completion didn't happen yet, it will log file extent items that 450 * point to unwritten extents, resulting in a corruption if a crash 451 * happens and the log tree is replayed. Note that a fast fsync does not 452 * wait for completion of ordered extents in order to reduce latency. 453 * 454 * Set a flag in the inode so that the next fast fsync will wait for 455 * ordered extents to complete before starting to log. 456 */ 457 if (!uptodate && !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 458 set_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags); 459 460 if (ret) 461 btrfs_queue_ordered_fn(ordered); 462 } 463 464 /* 465 * Mark all ordered extents io inside the specified range finished. 466 * 467 * @folio: The involved folio for the operation. 468 * For uncompressed buffered IO, the folio status also needs to be 469 * updated to indicate whether the pending ordered io is finished. 470 * Can be NULL for direct IO and compressed write. 471 * For these cases, callers are ensured they won't execute the 472 * endio function twice. 473 * 474 * This function is called for endio, thus the range must have ordered 475 * extent(s) covering it. 476 */ 477 void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, 478 struct folio *folio, u64 file_offset, 479 u64 num_bytes, bool uptodate) 480 { 481 struct rb_node *node; 482 struct btrfs_ordered_extent *entry = NULL; 483 u64 cur = file_offset; 484 const u64 end = file_offset + num_bytes; 485 486 trace_btrfs_writepage_end_io_hook(inode, file_offset, end - 1, uptodate); 487 488 spin_lock(&inode->ordered_tree_lock); 489 while (cur < end) { 490 u64 entry_end; 491 u64 this_end; 492 u64 len; 493 494 node = ordered_tree_search(inode, cur); 495 /* No ordered extents at all */ 496 if (!node) 497 break; 498 499 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 500 entry_end = entry->file_offset + entry->num_bytes; 501 /* 502 * |<-- OE --->| | 503 * cur 504 * Go to next OE. 505 */ 506 if (cur >= entry_end) { 507 node = rb_next(node); 508 /* No more ordered extents, exit */ 509 if (!node) 510 break; 511 entry = rb_entry(node, struct btrfs_ordered_extent, 512 rb_node); 513 514 /* Go to next ordered extent and continue */ 515 cur = entry->file_offset; 516 continue; 517 } 518 /* 519 * | |<--- OE --->| 520 * cur 521 * Go to the start of OE. 522 */ 523 if (cur < entry->file_offset) { 524 cur = entry->file_offset; 525 continue; 526 } 527 528 /* 529 * Now we are definitely inside one ordered extent. 530 * 531 * |<--- OE --->| 532 * | 533 * cur 534 */ 535 this_end = min(entry_end, end); 536 len = this_end - cur; 537 ASSERT(len < U32_MAX); 538 539 if (can_finish_ordered_extent(entry, folio, cur, len, uptodate)) { 540 spin_unlock(&inode->ordered_tree_lock); 541 btrfs_queue_ordered_fn(entry); 542 spin_lock(&inode->ordered_tree_lock); 543 } 544 cur += len; 545 } 546 spin_unlock(&inode->ordered_tree_lock); 547 } 548 549 /* 550 * Finish IO for one ordered extent across a given range. The range can only 551 * contain one ordered extent. 552 * 553 * @cached: The cached ordered extent. If not NULL, we can skip the tree 554 * search and use the ordered extent directly. 555 * Will be also used to store the finished ordered extent. 556 * @file_offset: File offset for the finished IO 557 * @io_size: Length of the finish IO range 558 * 559 * Return true if the ordered extent is finished in the range, and update 560 * @cached. 561 * Return false otherwise. 562 * 563 * NOTE: The range can NOT cross multiple ordered extents. 564 * Thus caller should ensure the range doesn't cross ordered extents. 565 */ 566 bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, 567 struct btrfs_ordered_extent **cached, 568 u64 file_offset, u64 io_size) 569 { 570 struct rb_node *node; 571 struct btrfs_ordered_extent *entry = NULL; 572 bool finished = false; 573 574 spin_lock(&inode->ordered_tree_lock); 575 if (cached && *cached) { 576 entry = *cached; 577 goto have_entry; 578 } 579 580 node = ordered_tree_search(inode, file_offset); 581 if (!node) 582 goto out; 583 584 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 585 have_entry: 586 if (!in_range(file_offset, entry->file_offset, entry->num_bytes)) 587 goto out; 588 589 if (io_size > entry->bytes_left) 590 btrfs_crit(inode->root->fs_info, 591 "bad ordered accounting left %llu size %llu", 592 entry->bytes_left, io_size); 593 594 entry->bytes_left -= io_size; 595 596 if (entry->bytes_left == 0) { 597 /* 598 * Ensure only one caller can set the flag and finished_ret 599 * accordingly 600 */ 601 finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 602 /* test_and_set_bit implies a barrier */ 603 cond_wake_up_nomb(&entry->wait); 604 } 605 out: 606 if (finished && cached && entry) { 607 *cached = entry; 608 refcount_inc(&entry->refs); 609 trace_btrfs_ordered_extent_dec_test_pending(inode, entry); 610 } 611 spin_unlock(&inode->ordered_tree_lock); 612 return finished; 613 } 614 615 /* 616 * used to drop a reference on an ordered extent. This will free 617 * the extent if the last reference is dropped 618 */ 619 void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 620 { 621 trace_btrfs_ordered_extent_put(entry->inode, entry); 622 623 if (refcount_dec_and_test(&entry->refs)) { 624 struct btrfs_ordered_sum *sum; 625 struct btrfs_ordered_sum *tmp; 626 627 ASSERT(list_empty(&entry->root_extent_list)); 628 ASSERT(list_empty(&entry->log_list)); 629 ASSERT(RB_EMPTY_NODE(&entry->rb_node)); 630 btrfs_add_delayed_iput(entry->inode); 631 list_for_each_entry_safe(sum, tmp, &entry->list, list) 632 kvfree(sum); 633 kmem_cache_free(btrfs_ordered_extent_cache, entry); 634 } 635 } 636 637 /* 638 * remove an ordered extent from the tree. No references are dropped 639 * and waiters are woken up. 640 */ 641 void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, 642 struct btrfs_ordered_extent *entry) 643 { 644 struct btrfs_root *root = btrfs_inode->root; 645 struct btrfs_fs_info *fs_info = root->fs_info; 646 struct rb_node *node; 647 bool pending; 648 bool freespace_inode; 649 650 /* 651 * If this is a free space inode the thread has not acquired the ordered 652 * extents lockdep map. 653 */ 654 freespace_inode = btrfs_is_free_space_inode(btrfs_inode); 655 656 btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); 657 /* This is paired with alloc_ordered_extent(). */ 658 spin_lock(&btrfs_inode->lock); 659 btrfs_mod_outstanding_extents(btrfs_inode, -1); 660 spin_unlock(&btrfs_inode->lock); 661 if (root != fs_info->tree_root) { 662 u64 release; 663 664 if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags)) 665 release = entry->disk_num_bytes; 666 else 667 release = entry->num_bytes; 668 btrfs_delalloc_release_metadata(btrfs_inode, release, 669 test_bit(BTRFS_ORDERED_IOERR, 670 &entry->flags)); 671 } 672 673 percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, 674 fs_info->delalloc_batch); 675 676 spin_lock(&btrfs_inode->ordered_tree_lock); 677 node = &entry->rb_node; 678 rb_erase(node, &btrfs_inode->ordered_tree); 679 RB_CLEAR_NODE(node); 680 if (btrfs_inode->ordered_tree_last == node) 681 btrfs_inode->ordered_tree_last = NULL; 682 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 683 pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags); 684 spin_unlock(&btrfs_inode->ordered_tree_lock); 685 686 /* 687 * The current running transaction is waiting on us, we need to let it 688 * know that we're complete and wake it up. 689 */ 690 if (pending) { 691 struct btrfs_transaction *trans; 692 693 /* 694 * The checks for trans are just a formality, it should be set, 695 * but if it isn't we don't want to deref/assert under the spin 696 * lock, so be nice and check if trans is set, but ASSERT() so 697 * if it isn't set a developer will notice. 698 */ 699 spin_lock(&fs_info->trans_lock); 700 trans = fs_info->running_transaction; 701 if (trans) 702 refcount_inc(&trans->use_count); 703 spin_unlock(&fs_info->trans_lock); 704 705 ASSERT(trans || BTRFS_FS_ERROR(fs_info)); 706 if (trans) { 707 if (atomic_dec_and_test(&trans->pending_ordered)) 708 wake_up(&trans->pending_wait); 709 btrfs_put_transaction(trans); 710 } 711 } 712 713 btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered); 714 715 spin_lock(&root->ordered_extent_lock); 716 list_del_init(&entry->root_extent_list); 717 root->nr_ordered_extents--; 718 719 trace_btrfs_ordered_extent_remove(btrfs_inode, entry); 720 721 if (!root->nr_ordered_extents) { 722 spin_lock(&fs_info->ordered_root_lock); 723 BUG_ON(list_empty(&root->ordered_root)); 724 list_del_init(&root->ordered_root); 725 spin_unlock(&fs_info->ordered_root_lock); 726 } 727 spin_unlock(&root->ordered_extent_lock); 728 wake_up(&entry->wait); 729 if (!freespace_inode) 730 btrfs_lockdep_release(fs_info, btrfs_ordered_extent); 731 } 732 733 static void btrfs_run_ordered_extent_work(struct btrfs_work *work) 734 { 735 struct btrfs_ordered_extent *ordered; 736 737 ordered = container_of(work, struct btrfs_ordered_extent, flush_work); 738 btrfs_start_ordered_extent(ordered); 739 complete(&ordered->completion); 740 } 741 742 /* 743 * Wait for all the ordered extents in a root. Use @bg as range or do whole 744 * range if it's NULL. 745 */ 746 u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, 747 const struct btrfs_block_group *bg) 748 { 749 struct btrfs_fs_info *fs_info = root->fs_info; 750 LIST_HEAD(splice); 751 LIST_HEAD(skipped); 752 LIST_HEAD(works); 753 struct btrfs_ordered_extent *ordered, *next; 754 u64 count = 0; 755 u64 range_start, range_len; 756 u64 range_end; 757 758 if (bg) { 759 range_start = bg->start; 760 range_len = bg->length; 761 } else { 762 range_start = 0; 763 range_len = U64_MAX; 764 } 765 range_end = range_start + range_len; 766 767 mutex_lock(&root->ordered_extent_mutex); 768 spin_lock(&root->ordered_extent_lock); 769 list_splice_init(&root->ordered_extents, &splice); 770 while (!list_empty(&splice) && nr) { 771 ordered = list_first_entry(&splice, struct btrfs_ordered_extent, 772 root_extent_list); 773 774 if (range_end <= ordered->disk_bytenr || 775 ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) { 776 list_move_tail(&ordered->root_extent_list, &skipped); 777 cond_resched_lock(&root->ordered_extent_lock); 778 continue; 779 } 780 781 list_move_tail(&ordered->root_extent_list, 782 &root->ordered_extents); 783 refcount_inc(&ordered->refs); 784 spin_unlock(&root->ordered_extent_lock); 785 786 btrfs_init_work(&ordered->flush_work, 787 btrfs_run_ordered_extent_work, NULL); 788 list_add_tail(&ordered->work_list, &works); 789 btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); 790 791 cond_resched(); 792 if (nr != U64_MAX) 793 nr--; 794 count++; 795 spin_lock(&root->ordered_extent_lock); 796 } 797 list_splice_tail(&skipped, &root->ordered_extents); 798 list_splice_tail(&splice, &root->ordered_extents); 799 spin_unlock(&root->ordered_extent_lock); 800 801 list_for_each_entry_safe(ordered, next, &works, work_list) { 802 list_del_init(&ordered->work_list); 803 wait_for_completion(&ordered->completion); 804 btrfs_put_ordered_extent(ordered); 805 cond_resched(); 806 } 807 mutex_unlock(&root->ordered_extent_mutex); 808 809 return count; 810 } 811 812 /* 813 * Wait for @nr ordered extents that intersect the @bg, or the whole range of 814 * the filesystem if @bg is NULL. 815 */ 816 void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, 817 const struct btrfs_block_group *bg) 818 { 819 struct btrfs_root *root; 820 LIST_HEAD(splice); 821 u64 done; 822 823 mutex_lock(&fs_info->ordered_operations_mutex); 824 spin_lock(&fs_info->ordered_root_lock); 825 list_splice_init(&fs_info->ordered_roots, &splice); 826 while (!list_empty(&splice) && nr) { 827 root = list_first_entry(&splice, struct btrfs_root, 828 ordered_root); 829 root = btrfs_grab_root(root); 830 BUG_ON(!root); 831 list_move_tail(&root->ordered_root, 832 &fs_info->ordered_roots); 833 spin_unlock(&fs_info->ordered_root_lock); 834 835 done = btrfs_wait_ordered_extents(root, nr, bg); 836 btrfs_put_root(root); 837 838 if (nr != U64_MAX) 839 nr -= done; 840 841 spin_lock(&fs_info->ordered_root_lock); 842 } 843 list_splice_tail(&splice, &fs_info->ordered_roots); 844 spin_unlock(&fs_info->ordered_root_lock); 845 mutex_unlock(&fs_info->ordered_operations_mutex); 846 } 847 848 /* 849 * Start IO and wait for a given ordered extent to finish. 850 * 851 * Wait on page writeback for all the pages in the extent but not in 852 * [@nowriteback_start, @nowriteback_start + @nowriteback_len) and the 853 * IO completion code to insert metadata into the btree corresponding to the extent. 854 */ 855 void btrfs_start_ordered_extent_nowriteback(struct btrfs_ordered_extent *entry, 856 u64 nowriteback_start, u32 nowriteback_len) 857 { 858 u64 start = entry->file_offset; 859 u64 end = start + entry->num_bytes - 1; 860 struct btrfs_inode *inode = entry->inode; 861 bool freespace_inode; 862 863 trace_btrfs_ordered_extent_start(inode, entry); 864 865 /* 866 * If this is a free space inode do not take the ordered extents lockdep 867 * map. 868 */ 869 freespace_inode = btrfs_is_free_space_inode(inode); 870 871 /* 872 * pages in the range can be dirty, clean or writeback. We 873 * start IO on any dirty ones so the wait doesn't stall waiting 874 * for the flusher thread to find them 875 */ 876 if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) { 877 if (!nowriteback_len) { 878 filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); 879 } else { 880 if (start < nowriteback_start) 881 filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, 882 nowriteback_start - 1); 883 if (nowriteback_start + nowriteback_len < end) 884 filemap_fdatawrite_range(inode->vfs_inode.i_mapping, 885 nowriteback_start + nowriteback_len, 886 end); 887 } 888 } 889 890 if (!freespace_inode) 891 btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent); 892 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); 893 } 894 895 /* 896 * Used to wait on ordered extents across a large range of bytes. 897 */ 898 int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len) 899 { 900 int ret = 0; 901 int ret_wb = 0; 902 u64 end; 903 u64 orig_end; 904 struct btrfs_ordered_extent *ordered; 905 906 if (start + len < start) { 907 orig_end = OFFSET_MAX; 908 } else { 909 orig_end = start + len - 1; 910 if (orig_end > OFFSET_MAX) 911 orig_end = OFFSET_MAX; 912 } 913 914 /* start IO across the range first to instantiate any delalloc 915 * extents 916 */ 917 ret = btrfs_fdatawrite_range(inode, start, orig_end); 918 if (ret) 919 return ret; 920 921 /* 922 * If we have a writeback error don't return immediately. Wait first 923 * for any ordered extents that haven't completed yet. This is to make 924 * sure no one can dirty the same page ranges and call writepages() 925 * before the ordered extents complete - to avoid failures (-EEXIST) 926 * when adding the new ordered extents to the ordered tree. 927 */ 928 ret_wb = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, orig_end); 929 930 end = orig_end; 931 while (1) { 932 ordered = btrfs_lookup_first_ordered_extent(inode, end); 933 if (!ordered) 934 break; 935 if (ordered->file_offset > orig_end) { 936 btrfs_put_ordered_extent(ordered); 937 break; 938 } 939 if (ordered->file_offset + ordered->num_bytes <= start) { 940 btrfs_put_ordered_extent(ordered); 941 break; 942 } 943 btrfs_start_ordered_extent(ordered); 944 end = ordered->file_offset; 945 /* 946 * If the ordered extent had an error save the error but don't 947 * exit without waiting first for all other ordered extents in 948 * the range to complete. 949 */ 950 if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) 951 ret = -EIO; 952 btrfs_put_ordered_extent(ordered); 953 if (end == 0 || end == start) 954 break; 955 end--; 956 } 957 return ret_wb ? ret_wb : ret; 958 } 959 960 /* 961 * find an ordered extent corresponding to file_offset. return NULL if 962 * nothing is found, otherwise take a reference on the extent and return it 963 */ 964 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, 965 u64 file_offset) 966 { 967 struct rb_node *node; 968 struct btrfs_ordered_extent *entry = NULL; 969 970 spin_lock(&inode->ordered_tree_lock); 971 node = ordered_tree_search(inode, file_offset); 972 if (!node) 973 goto out; 974 975 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 976 if (!in_range(file_offset, entry->file_offset, entry->num_bytes)) 977 entry = NULL; 978 if (entry) { 979 refcount_inc(&entry->refs); 980 trace_btrfs_ordered_extent_lookup(inode, entry); 981 } 982 out: 983 spin_unlock(&inode->ordered_tree_lock); 984 return entry; 985 } 986 987 /* Since the DIO code tries to lock a wide area we need to look for any ordered 988 * extents that exist in the range, rather than just the start of the range. 989 */ 990 struct btrfs_ordered_extent *btrfs_lookup_ordered_range( 991 struct btrfs_inode *inode, u64 file_offset, u64 len) 992 { 993 struct rb_node *node; 994 struct btrfs_ordered_extent *entry = NULL; 995 996 spin_lock(&inode->ordered_tree_lock); 997 node = ordered_tree_search(inode, file_offset); 998 if (!node) { 999 node = ordered_tree_search(inode, file_offset + len); 1000 if (!node) 1001 goto out; 1002 } 1003 1004 while (1) { 1005 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 1006 if (btrfs_range_overlaps(entry, file_offset, len)) 1007 break; 1008 1009 if (entry->file_offset >= file_offset + len) { 1010 entry = NULL; 1011 break; 1012 } 1013 entry = NULL; 1014 node = rb_next(node); 1015 if (!node) 1016 break; 1017 } 1018 out: 1019 if (entry) { 1020 refcount_inc(&entry->refs); 1021 trace_btrfs_ordered_extent_lookup_range(inode, entry); 1022 } 1023 spin_unlock(&inode->ordered_tree_lock); 1024 return entry; 1025 } 1026 1027 /* 1028 * Adds all ordered extents to the given list. The list ends up sorted by the 1029 * file_offset of the ordered extents. 1030 */ 1031 void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode, 1032 struct list_head *list) 1033 { 1034 struct rb_node *n; 1035 1036 btrfs_assert_inode_locked(inode); 1037 1038 spin_lock(&inode->ordered_tree_lock); 1039 for (n = rb_first(&inode->ordered_tree); n; n = rb_next(n)) { 1040 struct btrfs_ordered_extent *ordered; 1041 1042 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); 1043 1044 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 1045 continue; 1046 1047 ASSERT(list_empty(&ordered->log_list)); 1048 list_add_tail(&ordered->log_list, list); 1049 refcount_inc(&ordered->refs); 1050 trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered); 1051 } 1052 spin_unlock(&inode->ordered_tree_lock); 1053 } 1054 1055 /* 1056 * lookup and return any extent before 'file_offset'. NULL is returned 1057 * if none is found 1058 */ 1059 struct btrfs_ordered_extent * 1060 btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset) 1061 { 1062 struct rb_node *node; 1063 struct btrfs_ordered_extent *entry = NULL; 1064 1065 spin_lock(&inode->ordered_tree_lock); 1066 node = ordered_tree_search(inode, file_offset); 1067 if (!node) 1068 goto out; 1069 1070 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 1071 refcount_inc(&entry->refs); 1072 trace_btrfs_ordered_extent_lookup_first(inode, entry); 1073 out: 1074 spin_unlock(&inode->ordered_tree_lock); 1075 return entry; 1076 } 1077 1078 /* 1079 * Lookup the first ordered extent that overlaps the range 1080 * [@file_offset, @file_offset + @len). 1081 * 1082 * The difference between this and btrfs_lookup_first_ordered_extent() is 1083 * that this one won't return any ordered extent that does not overlap the range. 1084 * And the difference against btrfs_lookup_ordered_extent() is, this function 1085 * ensures the first ordered extent gets returned. 1086 */ 1087 struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range( 1088 struct btrfs_inode *inode, u64 file_offset, u64 len) 1089 { 1090 struct rb_node *node; 1091 struct rb_node *cur; 1092 struct rb_node *prev; 1093 struct rb_node *next; 1094 struct btrfs_ordered_extent *entry = NULL; 1095 1096 spin_lock(&inode->ordered_tree_lock); 1097 node = inode->ordered_tree.rb_node; 1098 /* 1099 * Here we don't want to use tree_search() which will use tree->last 1100 * and screw up the search order. 1101 * And __tree_search() can't return the adjacent ordered extents 1102 * either, thus here we do our own search. 1103 */ 1104 while (node) { 1105 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 1106 1107 if (file_offset < entry->file_offset) { 1108 node = node->rb_left; 1109 } else if (file_offset >= entry_end(entry)) { 1110 node = node->rb_right; 1111 } else { 1112 /* 1113 * Direct hit, got an ordered extent that starts at 1114 * @file_offset 1115 */ 1116 goto out; 1117 } 1118 } 1119 if (!entry) { 1120 /* Empty tree */ 1121 goto out; 1122 } 1123 1124 cur = &entry->rb_node; 1125 /* We got an entry around @file_offset, check adjacent entries */ 1126 if (entry->file_offset < file_offset) { 1127 prev = cur; 1128 next = rb_next(cur); 1129 } else { 1130 prev = rb_prev(cur); 1131 next = cur; 1132 } 1133 if (prev) { 1134 entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); 1135 if (btrfs_range_overlaps(entry, file_offset, len)) 1136 goto out; 1137 } 1138 if (next) { 1139 entry = rb_entry(next, struct btrfs_ordered_extent, rb_node); 1140 if (btrfs_range_overlaps(entry, file_offset, len)) 1141 goto out; 1142 } 1143 /* No ordered extent in the range */ 1144 entry = NULL; 1145 out: 1146 if (entry) { 1147 refcount_inc(&entry->refs); 1148 trace_btrfs_ordered_extent_lookup_first_range(inode, entry); 1149 } 1150 1151 spin_unlock(&inode->ordered_tree_lock); 1152 return entry; 1153 } 1154 1155 /* 1156 * Lock the passed range and ensures all pending ordered extents in it are run 1157 * to completion. 1158 * 1159 * @inode: Inode whose ordered tree is to be searched 1160 * @start: Beginning of range to flush 1161 * @end: Last byte of range to lock 1162 * @cached_state: If passed, will return the extent state responsible for the 1163 * locked range. It's the caller's responsibility to free the 1164 * cached state. 1165 * 1166 * Always return with the given range locked, ensuring after it's called no 1167 * order extent can be pending. 1168 */ 1169 void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, 1170 u64 end, 1171 struct extent_state **cached_state) 1172 { 1173 struct btrfs_ordered_extent *ordered; 1174 struct extent_state *cache = NULL; 1175 struct extent_state **cachedp = &cache; 1176 1177 if (cached_state) 1178 cachedp = cached_state; 1179 1180 while (1) { 1181 btrfs_lock_extent(&inode->io_tree, start, end, cachedp); 1182 ordered = btrfs_lookup_ordered_range(inode, start, 1183 end - start + 1); 1184 if (!ordered) { 1185 /* 1186 * If no external cached_state has been passed then 1187 * decrement the extra ref taken for cachedp since we 1188 * aren't exposing it outside of this function 1189 */ 1190 if (!cached_state) 1191 refcount_dec(&cache->refs); 1192 break; 1193 } 1194 btrfs_unlock_extent(&inode->io_tree, start, end, cachedp); 1195 btrfs_start_ordered_extent(ordered); 1196 btrfs_put_ordered_extent(ordered); 1197 } 1198 } 1199 1200 /* 1201 * Lock the passed range and ensure all pending ordered extents in it are run 1202 * to completion in nowait mode. 1203 * 1204 * Return true if btrfs_lock_ordered_range does not return any extents, 1205 * otherwise false. 1206 */ 1207 bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, 1208 struct extent_state **cached_state) 1209 { 1210 struct btrfs_ordered_extent *ordered; 1211 1212 if (!btrfs_try_lock_extent(&inode->io_tree, start, end, cached_state)) 1213 return false; 1214 1215 ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1); 1216 if (!ordered) 1217 return true; 1218 1219 btrfs_put_ordered_extent(ordered); 1220 btrfs_unlock_extent(&inode->io_tree, start, end, cached_state); 1221 1222 return false; 1223 } 1224 1225 /* Split out a new ordered extent for this first @len bytes of @ordered. */ 1226 struct btrfs_ordered_extent *btrfs_split_ordered_extent( 1227 struct btrfs_ordered_extent *ordered, u64 len) 1228 { 1229 struct btrfs_inode *inode = ordered->inode; 1230 struct btrfs_root *root = inode->root; 1231 struct btrfs_fs_info *fs_info = root->fs_info; 1232 u64 file_offset = ordered->file_offset; 1233 u64 disk_bytenr = ordered->disk_bytenr; 1234 unsigned long flags = ordered->flags; 1235 struct btrfs_ordered_sum *sum, *tmpsum; 1236 struct btrfs_ordered_extent *new; 1237 struct rb_node *node; 1238 u64 offset = 0; 1239 1240 trace_btrfs_ordered_extent_split(inode, ordered); 1241 1242 ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED))); 1243 1244 /* 1245 * The entire bio must be covered by the ordered extent, but we can't 1246 * reduce the original extent to a zero length either. 1247 */ 1248 if (WARN_ON_ONCE(len >= ordered->num_bytes)) 1249 return ERR_PTR(-EINVAL); 1250 /* 1251 * If our ordered extent had an error there's no point in continuing. 1252 * The error may have come from a transaction abort done either by this 1253 * task or some other concurrent task, and the transaction abort path 1254 * iterates over all existing ordered extents and sets the flag 1255 * BTRFS_ORDERED_IOERR on them. 1256 */ 1257 if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { 1258 const int fs_error = BTRFS_FS_ERROR(fs_info); 1259 1260 return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); 1261 } 1262 /* We cannot split partially completed ordered extents. */ 1263 if (ordered->bytes_left) { 1264 ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); 1265 if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) 1266 return ERR_PTR(-EINVAL); 1267 } 1268 /* We cannot split a compressed ordered extent. */ 1269 if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) 1270 return ERR_PTR(-EINVAL); 1271 1272 new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr, 1273 len, 0, flags, ordered->compress_type); 1274 if (IS_ERR(new)) 1275 return new; 1276 1277 /* One ref for the tree. */ 1278 refcount_inc(&new->refs); 1279 1280 /* 1281 * Take the root's ordered_extent_lock to avoid a race with 1282 * btrfs_wait_ordered_extents() when updating the disk_bytenr and 1283 * disk_num_bytes fields of the ordered extent below. 1284 * 1285 * There's no concern about a previous caller of 1286 * btrfs_wait_ordered_extents() getting the trimmed ordered extent 1287 * before we insert the new one, because even if it gets the ordered 1288 * extent before it's trimmed and the new one inserted, right before it 1289 * uses it or during its use, the ordered extent might have been 1290 * trimmed in the meanwhile, and it missed the new ordered extent. 1291 * There's no way around this and it's harmless for current use cases, 1292 * so we take the root's ordered_extent_lock to fix that race during 1293 * trimming and silence tools like KCSAN. 1294 */ 1295 spin_lock_irq(&root->ordered_extent_lock); 1296 spin_lock(&inode->ordered_tree_lock); 1297 1298 /* 1299 * We don't have overlapping ordered extents (that would imply double 1300 * allocation of extents) and we checked above that the split length 1301 * does not cross the ordered extent's num_bytes field, so there's 1302 * no need to remove it and re-insert it in the tree. 1303 */ 1304 ordered->file_offset += len; 1305 ordered->disk_bytenr += len; 1306 ordered->num_bytes -= len; 1307 ordered->disk_num_bytes -= len; 1308 ordered->ram_bytes -= len; 1309 1310 if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { 1311 ASSERT(ordered->bytes_left == 0); 1312 new->bytes_left = 0; 1313 } else { 1314 ordered->bytes_left -= len; 1315 } 1316 1317 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) { 1318 if (ordered->truncated_len > len) { 1319 ordered->truncated_len -= len; 1320 } else { 1321 new->truncated_len = ordered->truncated_len; 1322 ordered->truncated_len = 0; 1323 } 1324 } 1325 1326 list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) { 1327 if (offset == len) 1328 break; 1329 list_move_tail(&sum->list, &new->list); 1330 offset += sum->len; 1331 } 1332 1333 node = tree_insert(&inode->ordered_tree, new->file_offset, &new->rb_node); 1334 if (unlikely(node)) 1335 btrfs_panic(fs_info, -EEXIST, 1336 "inconsistency in ordered tree at offset %llu after split", 1337 new->file_offset); 1338 spin_unlock(&inode->ordered_tree_lock); 1339 1340 list_add_tail(&new->root_extent_list, &root->ordered_extents); 1341 root->nr_ordered_extents++; 1342 spin_unlock_irq(&root->ordered_extent_lock); 1343 return new; 1344 } 1345 1346 int __init ordered_data_init(void) 1347 { 1348 btrfs_ordered_extent_cache = KMEM_CACHE(btrfs_ordered_extent, 0); 1349 if (!btrfs_ordered_extent_cache) 1350 return -ENOMEM; 1351 1352 return 0; 1353 } 1354 1355 void __cold ordered_data_exit(void) 1356 { 1357 kmem_cache_destroy(btrfs_ordered_extent_cache); 1358 } 1359