1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 #include <linux/writeback.h> 13 #include <linux/pagevec.h> 14 #include "extent_io.h" 15 #include "extent_map.h" 16 #include "compat.h" 17 #include "ctree.h" 18 #include "btrfs_inode.h" 19 20 static struct kmem_cache *extent_state_cache; 21 static struct kmem_cache *extent_buffer_cache; 22 23 static LIST_HEAD(buffers); 24 static LIST_HEAD(states); 25 26 #define LEAK_DEBUG 0 27 #if LEAK_DEBUG 28 static DEFINE_SPINLOCK(leak_lock); 29 #endif 30 31 #define BUFFER_LRU_MAX 64 32 33 struct tree_entry { 34 u64 start; 35 u64 end; 36 struct rb_node rb_node; 37 }; 38 39 struct extent_page_data { 40 struct bio *bio; 41 struct extent_io_tree *tree; 42 get_extent_t *get_extent; 43 44 /* tells writepage not to lock the state bits for this range 45 * it still does the unlocking 46 */ 47 unsigned int extent_locked:1; 48 49 /* tells the submit_bio code to use a WRITE_SYNC */ 50 unsigned int sync_io:1; 51 }; 52 53 int __init extent_io_init(void) 54 { 55 extent_state_cache = kmem_cache_create("extent_state", 56 sizeof(struct extent_state), 0, 57 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 58 if (!extent_state_cache) 59 return -ENOMEM; 60 61 extent_buffer_cache = kmem_cache_create("extent_buffers", 62 sizeof(struct extent_buffer), 0, 63 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 64 if (!extent_buffer_cache) 65 goto free_state_cache; 66 return 0; 67 68 free_state_cache: 69 kmem_cache_destroy(extent_state_cache); 70 return -ENOMEM; 71 } 72 73 void extent_io_exit(void) 74 { 75 struct extent_state *state; 76 struct extent_buffer *eb; 77 78 while (!list_empty(&states)) { 79 state = list_entry(states.next, struct extent_state, leak_list); 80 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 81 "state %lu in tree %p refs %d\n", 82 (unsigned long long)state->start, 83 (unsigned long long)state->end, 84 state->state, state->tree, atomic_read(&state->refs)); 85 list_del(&state->leak_list); 86 kmem_cache_free(extent_state_cache, state); 87 88 } 89 90 while (!list_empty(&buffers)) { 91 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 92 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 93 "refs %d\n", (unsigned long long)eb->start, 94 eb->len, atomic_read(&eb->refs)); 95 list_del(&eb->leak_list); 96 kmem_cache_free(extent_buffer_cache, eb); 97 } 98 if (extent_state_cache) 99 kmem_cache_destroy(extent_state_cache); 100 if (extent_buffer_cache) 101 kmem_cache_destroy(extent_buffer_cache); 102 } 103 104 void extent_io_tree_init(struct extent_io_tree *tree, 105 struct address_space *mapping, gfp_t mask) 106 { 107 tree->state.rb_node = NULL; 108 tree->buffer.rb_node = NULL; 109 tree->ops = NULL; 110 tree->dirty_bytes = 0; 111 spin_lock_init(&tree->lock); 112 spin_lock_init(&tree->buffer_lock); 113 tree->mapping = mapping; 114 } 115 116 static struct extent_state *alloc_extent_state(gfp_t mask) 117 { 118 struct extent_state *state; 119 #if LEAK_DEBUG 120 unsigned long flags; 121 #endif 122 123 state = kmem_cache_alloc(extent_state_cache, mask); 124 if (!state) 125 return state; 126 state->state = 0; 127 state->private = 0; 128 state->tree = NULL; 129 #if LEAK_DEBUG 130 spin_lock_irqsave(&leak_lock, flags); 131 list_add(&state->leak_list, &states); 132 spin_unlock_irqrestore(&leak_lock, flags); 133 #endif 134 atomic_set(&state->refs, 1); 135 init_waitqueue_head(&state->wq); 136 return state; 137 } 138 139 static void free_extent_state(struct extent_state *state) 140 { 141 if (!state) 142 return; 143 if (atomic_dec_and_test(&state->refs)) { 144 #if LEAK_DEBUG 145 unsigned long flags; 146 #endif 147 WARN_ON(state->tree); 148 #if LEAK_DEBUG 149 spin_lock_irqsave(&leak_lock, flags); 150 list_del(&state->leak_list); 151 spin_unlock_irqrestore(&leak_lock, flags); 152 #endif 153 kmem_cache_free(extent_state_cache, state); 154 } 155 } 156 157 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 158 struct rb_node *node) 159 { 160 struct rb_node **p = &root->rb_node; 161 struct rb_node *parent = NULL; 162 struct tree_entry *entry; 163 164 while (*p) { 165 parent = *p; 166 entry = rb_entry(parent, struct tree_entry, rb_node); 167 168 if (offset < entry->start) 169 p = &(*p)->rb_left; 170 else if (offset > entry->end) 171 p = &(*p)->rb_right; 172 else 173 return parent; 174 } 175 176 entry = rb_entry(node, struct tree_entry, rb_node); 177 rb_link_node(node, parent, p); 178 rb_insert_color(node, root); 179 return NULL; 180 } 181 182 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 183 struct rb_node **prev_ret, 184 struct rb_node **next_ret) 185 { 186 struct rb_root *root = &tree->state; 187 struct rb_node *n = root->rb_node; 188 struct rb_node *prev = NULL; 189 struct rb_node *orig_prev = NULL; 190 struct tree_entry *entry; 191 struct tree_entry *prev_entry = NULL; 192 193 while (n) { 194 entry = rb_entry(n, struct tree_entry, rb_node); 195 prev = n; 196 prev_entry = entry; 197 198 if (offset < entry->start) 199 n = n->rb_left; 200 else if (offset > entry->end) 201 n = n->rb_right; 202 else 203 return n; 204 } 205 206 if (prev_ret) { 207 orig_prev = prev; 208 while (prev && offset > prev_entry->end) { 209 prev = rb_next(prev); 210 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 211 } 212 *prev_ret = prev; 213 prev = orig_prev; 214 } 215 216 if (next_ret) { 217 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 218 while (prev && offset < prev_entry->start) { 219 prev = rb_prev(prev); 220 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 221 } 222 *next_ret = prev; 223 } 224 return NULL; 225 } 226 227 static inline struct rb_node *tree_search(struct extent_io_tree *tree, 228 u64 offset) 229 { 230 struct rb_node *prev = NULL; 231 struct rb_node *ret; 232 233 ret = __etree_search(tree, offset, &prev, NULL); 234 if (!ret) 235 return prev; 236 return ret; 237 } 238 239 static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, 240 u64 offset, struct rb_node *node) 241 { 242 struct rb_root *root = &tree->buffer; 243 struct rb_node **p = &root->rb_node; 244 struct rb_node *parent = NULL; 245 struct extent_buffer *eb; 246 247 while (*p) { 248 parent = *p; 249 eb = rb_entry(parent, struct extent_buffer, rb_node); 250 251 if (offset < eb->start) 252 p = &(*p)->rb_left; 253 else if (offset > eb->start) 254 p = &(*p)->rb_right; 255 else 256 return eb; 257 } 258 259 rb_link_node(node, parent, p); 260 rb_insert_color(node, root); 261 return NULL; 262 } 263 264 static struct extent_buffer *buffer_search(struct extent_io_tree *tree, 265 u64 offset) 266 { 267 struct rb_root *root = &tree->buffer; 268 struct rb_node *n = root->rb_node; 269 struct extent_buffer *eb; 270 271 while (n) { 272 eb = rb_entry(n, struct extent_buffer, rb_node); 273 if (offset < eb->start) 274 n = n->rb_left; 275 else if (offset > eb->start) 276 n = n->rb_right; 277 else 278 return eb; 279 } 280 return NULL; 281 } 282 283 /* 284 * utility function to look for merge candidates inside a given range. 285 * Any extents with matching state are merged together into a single 286 * extent in the tree. Extents with EXTENT_IO in their state field 287 * are not merged because the end_io handlers need to be able to do 288 * operations on them without sleeping (or doing allocations/splits). 289 * 290 * This should be called with the tree lock held. 291 */ 292 static int merge_state(struct extent_io_tree *tree, 293 struct extent_state *state) 294 { 295 struct extent_state *other; 296 struct rb_node *other_node; 297 298 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 299 return 0; 300 301 other_node = rb_prev(&state->rb_node); 302 if (other_node) { 303 other = rb_entry(other_node, struct extent_state, rb_node); 304 if (other->end == state->start - 1 && 305 other->state == state->state) { 306 state->start = other->start; 307 other->tree = NULL; 308 rb_erase(&other->rb_node, &tree->state); 309 free_extent_state(other); 310 } 311 } 312 other_node = rb_next(&state->rb_node); 313 if (other_node) { 314 other = rb_entry(other_node, struct extent_state, rb_node); 315 if (other->start == state->end + 1 && 316 other->state == state->state) { 317 other->start = state->start; 318 state->tree = NULL; 319 rb_erase(&state->rb_node, &tree->state); 320 free_extent_state(state); 321 } 322 } 323 return 0; 324 } 325 326 static void set_state_cb(struct extent_io_tree *tree, 327 struct extent_state *state, 328 unsigned long bits) 329 { 330 if (tree->ops && tree->ops->set_bit_hook) { 331 tree->ops->set_bit_hook(tree->mapping->host, state->start, 332 state->end, state->state, bits); 333 } 334 } 335 336 static void clear_state_cb(struct extent_io_tree *tree, 337 struct extent_state *state, 338 unsigned long bits) 339 { 340 if (tree->ops && tree->ops->clear_bit_hook) { 341 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 342 state->end, state->state, bits); 343 } 344 } 345 346 /* 347 * insert an extent_state struct into the tree. 'bits' are set on the 348 * struct before it is inserted. 349 * 350 * This may return -EEXIST if the extent is already there, in which case the 351 * state struct is freed. 352 * 353 * The tree lock is not taken internally. This is a utility function and 354 * probably isn't what you want to call (see set/clear_extent_bit). 355 */ 356 static int insert_state(struct extent_io_tree *tree, 357 struct extent_state *state, u64 start, u64 end, 358 int bits) 359 { 360 struct rb_node *node; 361 362 if (end < start) { 363 printk(KERN_ERR "btrfs end < start %llu %llu\n", 364 (unsigned long long)end, 365 (unsigned long long)start); 366 WARN_ON(1); 367 } 368 if (bits & EXTENT_DIRTY) 369 tree->dirty_bytes += end - start + 1; 370 set_state_cb(tree, state, bits); 371 state->state |= bits; 372 state->start = start; 373 state->end = end; 374 node = tree_insert(&tree->state, end, &state->rb_node); 375 if (node) { 376 struct extent_state *found; 377 found = rb_entry(node, struct extent_state, rb_node); 378 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 379 "%llu %llu\n", (unsigned long long)found->start, 380 (unsigned long long)found->end, 381 (unsigned long long)start, (unsigned long long)end); 382 free_extent_state(state); 383 return -EEXIST; 384 } 385 state->tree = tree; 386 merge_state(tree, state); 387 return 0; 388 } 389 390 /* 391 * split a given extent state struct in two, inserting the preallocated 392 * struct 'prealloc' as the newly created second half. 'split' indicates an 393 * offset inside 'orig' where it should be split. 394 * 395 * Before calling, 396 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 397 * are two extent state structs in the tree: 398 * prealloc: [orig->start, split - 1] 399 * orig: [ split, orig->end ] 400 * 401 * The tree locks are not taken by this function. They need to be held 402 * by the caller. 403 */ 404 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 405 struct extent_state *prealloc, u64 split) 406 { 407 struct rb_node *node; 408 prealloc->start = orig->start; 409 prealloc->end = split - 1; 410 prealloc->state = orig->state; 411 orig->start = split; 412 413 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 414 if (node) { 415 free_extent_state(prealloc); 416 return -EEXIST; 417 } 418 prealloc->tree = tree; 419 return 0; 420 } 421 422 /* 423 * utility function to clear some bits in an extent state struct. 424 * it will optionally wake up any one waiting on this state (wake == 1), or 425 * forcibly remove the state from the tree (delete == 1). 426 * 427 * If no bits are set on the state struct after clearing things, the 428 * struct is freed and removed from the tree 429 */ 430 static int clear_state_bit(struct extent_io_tree *tree, 431 struct extent_state *state, int bits, int wake, 432 int delete) 433 { 434 int ret = state->state & bits; 435 436 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 437 u64 range = state->end - state->start + 1; 438 WARN_ON(range > tree->dirty_bytes); 439 tree->dirty_bytes -= range; 440 } 441 clear_state_cb(tree, state, bits); 442 state->state &= ~bits; 443 if (wake) 444 wake_up(&state->wq); 445 if (delete || state->state == 0) { 446 if (state->tree) { 447 clear_state_cb(tree, state, state->state); 448 rb_erase(&state->rb_node, &tree->state); 449 state->tree = NULL; 450 free_extent_state(state); 451 } else { 452 WARN_ON(1); 453 } 454 } else { 455 merge_state(tree, state); 456 } 457 return ret; 458 } 459 460 /* 461 * clear some bits on a range in the tree. This may require splitting 462 * or inserting elements in the tree, so the gfp mask is used to 463 * indicate which allocations or sleeping are allowed. 464 * 465 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 466 * the given range from the tree regardless of state (ie for truncate). 467 * 468 * the range [start, end] is inclusive. 469 * 470 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 471 * bits were already set, or zero if none of the bits were already set. 472 */ 473 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 474 int bits, int wake, int delete, gfp_t mask) 475 { 476 struct extent_state *state; 477 struct extent_state *prealloc = NULL; 478 struct rb_node *node; 479 int err; 480 int set = 0; 481 482 again: 483 if (!prealloc && (mask & __GFP_WAIT)) { 484 prealloc = alloc_extent_state(mask); 485 if (!prealloc) 486 return -ENOMEM; 487 } 488 489 spin_lock(&tree->lock); 490 /* 491 * this search will find the extents that end after 492 * our range starts 493 */ 494 node = tree_search(tree, start); 495 if (!node) 496 goto out; 497 state = rb_entry(node, struct extent_state, rb_node); 498 if (state->start > end) 499 goto out; 500 WARN_ON(state->end < start); 501 502 /* 503 * | ---- desired range ---- | 504 * | state | or 505 * | ------------- state -------------- | 506 * 507 * We need to split the extent we found, and may flip 508 * bits on second half. 509 * 510 * If the extent we found extends past our range, we 511 * just split and search again. It'll get split again 512 * the next time though. 513 * 514 * If the extent we found is inside our range, we clear 515 * the desired bit on it. 516 */ 517 518 if (state->start < start) { 519 if (!prealloc) 520 prealloc = alloc_extent_state(GFP_ATOMIC); 521 err = split_state(tree, state, prealloc, start); 522 BUG_ON(err == -EEXIST); 523 prealloc = NULL; 524 if (err) 525 goto out; 526 if (state->end <= end) { 527 start = state->end + 1; 528 set |= clear_state_bit(tree, state, bits, 529 wake, delete); 530 } else { 531 start = state->start; 532 } 533 goto search_again; 534 } 535 /* 536 * | ---- desired range ---- | 537 * | state | 538 * We need to split the extent, and clear the bit 539 * on the first half 540 */ 541 if (state->start <= end && state->end > end) { 542 if (!prealloc) 543 prealloc = alloc_extent_state(GFP_ATOMIC); 544 err = split_state(tree, state, prealloc, end + 1); 545 BUG_ON(err == -EEXIST); 546 547 if (wake) 548 wake_up(&state->wq); 549 set |= clear_state_bit(tree, prealloc, bits, 550 wake, delete); 551 prealloc = NULL; 552 goto out; 553 } 554 555 start = state->end + 1; 556 set |= clear_state_bit(tree, state, bits, wake, delete); 557 goto search_again; 558 559 out: 560 spin_unlock(&tree->lock); 561 if (prealloc) 562 free_extent_state(prealloc); 563 564 return set; 565 566 search_again: 567 if (start > end) 568 goto out; 569 spin_unlock(&tree->lock); 570 if (mask & __GFP_WAIT) 571 cond_resched(); 572 goto again; 573 } 574 575 static int wait_on_state(struct extent_io_tree *tree, 576 struct extent_state *state) 577 __releases(tree->lock) 578 __acquires(tree->lock) 579 { 580 DEFINE_WAIT(wait); 581 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 582 spin_unlock(&tree->lock); 583 schedule(); 584 spin_lock(&tree->lock); 585 finish_wait(&state->wq, &wait); 586 return 0; 587 } 588 589 /* 590 * waits for one or more bits to clear on a range in the state tree. 591 * The range [start, end] is inclusive. 592 * The tree lock is taken by this function 593 */ 594 int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 595 { 596 struct extent_state *state; 597 struct rb_node *node; 598 599 spin_lock(&tree->lock); 600 again: 601 while (1) { 602 /* 603 * this search will find all the extents that end after 604 * our range starts 605 */ 606 node = tree_search(tree, start); 607 if (!node) 608 break; 609 610 state = rb_entry(node, struct extent_state, rb_node); 611 612 if (state->start > end) 613 goto out; 614 615 if (state->state & bits) { 616 start = state->start; 617 atomic_inc(&state->refs); 618 wait_on_state(tree, state); 619 free_extent_state(state); 620 goto again; 621 } 622 start = state->end + 1; 623 624 if (start > end) 625 break; 626 627 if (need_resched()) { 628 spin_unlock(&tree->lock); 629 cond_resched(); 630 spin_lock(&tree->lock); 631 } 632 } 633 out: 634 spin_unlock(&tree->lock); 635 return 0; 636 } 637 638 static void set_state_bits(struct extent_io_tree *tree, 639 struct extent_state *state, 640 int bits) 641 { 642 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 643 u64 range = state->end - state->start + 1; 644 tree->dirty_bytes += range; 645 } 646 set_state_cb(tree, state, bits); 647 state->state |= bits; 648 } 649 650 /* 651 * set some bits on a range in the tree. This may require allocations 652 * or sleeping, so the gfp mask is used to indicate what is allowed. 653 * 654 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 655 * range already has the desired bits set. The start of the existing 656 * range is returned in failed_start in this case. 657 * 658 * [start, end] is inclusive 659 * This takes the tree lock. 660 */ 661 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 662 int bits, int exclusive, u64 *failed_start, 663 gfp_t mask) 664 { 665 struct extent_state *state; 666 struct extent_state *prealloc = NULL; 667 struct rb_node *node; 668 int err = 0; 669 int set; 670 u64 last_start; 671 u64 last_end; 672 again: 673 if (!prealloc && (mask & __GFP_WAIT)) { 674 prealloc = alloc_extent_state(mask); 675 if (!prealloc) 676 return -ENOMEM; 677 } 678 679 spin_lock(&tree->lock); 680 /* 681 * this search will find all the extents that end after 682 * our range starts. 683 */ 684 node = tree_search(tree, start); 685 if (!node) { 686 err = insert_state(tree, prealloc, start, end, bits); 687 prealloc = NULL; 688 BUG_ON(err == -EEXIST); 689 goto out; 690 } 691 692 state = rb_entry(node, struct extent_state, rb_node); 693 last_start = state->start; 694 last_end = state->end; 695 696 /* 697 * | ---- desired range ---- | 698 * | state | 699 * 700 * Just lock what we found and keep going 701 */ 702 if (state->start == start && state->end <= end) { 703 set = state->state & bits; 704 if (set && exclusive) { 705 *failed_start = state->start; 706 err = -EEXIST; 707 goto out; 708 } 709 set_state_bits(tree, state, bits); 710 start = state->end + 1; 711 merge_state(tree, state); 712 goto search_again; 713 } 714 715 /* 716 * | ---- desired range ---- | 717 * | state | 718 * or 719 * | ------------- state -------------- | 720 * 721 * We need to split the extent we found, and may flip bits on 722 * second half. 723 * 724 * If the extent we found extends past our 725 * range, we just split and search again. It'll get split 726 * again the next time though. 727 * 728 * If the extent we found is inside our range, we set the 729 * desired bit on it. 730 */ 731 if (state->start < start) { 732 set = state->state & bits; 733 if (exclusive && set) { 734 *failed_start = start; 735 err = -EEXIST; 736 goto out; 737 } 738 err = split_state(tree, state, prealloc, start); 739 BUG_ON(err == -EEXIST); 740 prealloc = NULL; 741 if (err) 742 goto out; 743 if (state->end <= end) { 744 set_state_bits(tree, state, bits); 745 start = state->end + 1; 746 merge_state(tree, state); 747 } else { 748 start = state->start; 749 } 750 goto search_again; 751 } 752 /* 753 * | ---- desired range ---- | 754 * | state | or | state | 755 * 756 * There's a hole, we need to insert something in it and 757 * ignore the extent we found. 758 */ 759 if (state->start > start) { 760 u64 this_end; 761 if (end < last_start) 762 this_end = end; 763 else 764 this_end = last_start - 1; 765 err = insert_state(tree, prealloc, start, this_end, 766 bits); 767 prealloc = NULL; 768 BUG_ON(err == -EEXIST); 769 if (err) 770 goto out; 771 start = this_end + 1; 772 goto search_again; 773 } 774 /* 775 * | ---- desired range ---- | 776 * | state | 777 * We need to split the extent, and set the bit 778 * on the first half 779 */ 780 if (state->start <= end && state->end > end) { 781 set = state->state & bits; 782 if (exclusive && set) { 783 *failed_start = start; 784 err = -EEXIST; 785 goto out; 786 } 787 err = split_state(tree, state, prealloc, end + 1); 788 BUG_ON(err == -EEXIST); 789 790 set_state_bits(tree, prealloc, bits); 791 merge_state(tree, prealloc); 792 prealloc = NULL; 793 goto out; 794 } 795 796 goto search_again; 797 798 out: 799 spin_unlock(&tree->lock); 800 if (prealloc) 801 free_extent_state(prealloc); 802 803 return err; 804 805 search_again: 806 if (start > end) 807 goto out; 808 spin_unlock(&tree->lock); 809 if (mask & __GFP_WAIT) 810 cond_resched(); 811 goto again; 812 } 813 814 /* wrappers around set/clear extent bit */ 815 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 816 gfp_t mask) 817 { 818 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 819 mask); 820 } 821 822 int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 823 gfp_t mask) 824 { 825 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); 826 } 827 828 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 829 int bits, gfp_t mask) 830 { 831 return set_extent_bit(tree, start, end, bits, 0, NULL, 832 mask); 833 } 834 835 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 836 int bits, gfp_t mask) 837 { 838 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 839 } 840 841 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 842 gfp_t mask) 843 { 844 return set_extent_bit(tree, start, end, 845 EXTENT_DELALLOC | EXTENT_DIRTY, 846 0, NULL, mask); 847 } 848 849 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 850 gfp_t mask) 851 { 852 return clear_extent_bit(tree, start, end, 853 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 854 } 855 856 int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 857 gfp_t mask) 858 { 859 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); 860 } 861 862 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 863 gfp_t mask) 864 { 865 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 866 mask); 867 } 868 869 static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 870 gfp_t mask) 871 { 872 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 873 } 874 875 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 876 gfp_t mask) 877 { 878 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 879 mask); 880 } 881 882 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 883 u64 end, gfp_t mask) 884 { 885 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 886 } 887 888 static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, 889 gfp_t mask) 890 { 891 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 892 0, NULL, mask); 893 } 894 895 static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, 896 u64 end, gfp_t mask) 897 { 898 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 899 } 900 901 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 902 { 903 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 904 } 905 906 /* 907 * either insert or lock state struct between start and end use mask to tell 908 * us if waiting is desired. 909 */ 910 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 911 { 912 int err; 913 u64 failed_start; 914 while (1) { 915 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 916 &failed_start, mask); 917 if (err == -EEXIST && (mask & __GFP_WAIT)) { 918 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 919 start = failed_start; 920 } else { 921 break; 922 } 923 WARN_ON(start > end); 924 } 925 return err; 926 } 927 928 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 929 gfp_t mask) 930 { 931 int err; 932 u64 failed_start; 933 934 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 935 &failed_start, mask); 936 if (err == -EEXIST) { 937 if (failed_start > start) 938 clear_extent_bit(tree, start, failed_start - 1, 939 EXTENT_LOCKED, 1, 0, mask); 940 return 0; 941 } 942 return 1; 943 } 944 945 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 946 gfp_t mask) 947 { 948 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 949 } 950 951 /* 952 * helper function to set pages and extents in the tree dirty 953 */ 954 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) 955 { 956 unsigned long index = start >> PAGE_CACHE_SHIFT; 957 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 958 struct page *page; 959 960 while (index <= end_index) { 961 page = find_get_page(tree->mapping, index); 962 BUG_ON(!page); 963 __set_page_dirty_nobuffers(page); 964 page_cache_release(page); 965 index++; 966 } 967 set_extent_dirty(tree, start, end, GFP_NOFS); 968 return 0; 969 } 970 971 /* 972 * helper function to set both pages and extents in the tree writeback 973 */ 974 static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 975 { 976 unsigned long index = start >> PAGE_CACHE_SHIFT; 977 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 978 struct page *page; 979 980 while (index <= end_index) { 981 page = find_get_page(tree->mapping, index); 982 BUG_ON(!page); 983 set_page_writeback(page); 984 page_cache_release(page); 985 index++; 986 } 987 set_extent_writeback(tree, start, end, GFP_NOFS); 988 return 0; 989 } 990 991 /* 992 * find the first offset in the io tree with 'bits' set. zero is 993 * returned if we find something, and *start_ret and *end_ret are 994 * set to reflect the state struct that was found. 995 * 996 * If nothing was found, 1 is returned, < 0 on error 997 */ 998 int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 999 u64 *start_ret, u64 *end_ret, int bits) 1000 { 1001 struct rb_node *node; 1002 struct extent_state *state; 1003 int ret = 1; 1004 1005 spin_lock(&tree->lock); 1006 /* 1007 * this search will find all the extents that end after 1008 * our range starts. 1009 */ 1010 node = tree_search(tree, start); 1011 if (!node) 1012 goto out; 1013 1014 while (1) { 1015 state = rb_entry(node, struct extent_state, rb_node); 1016 if (state->end >= start && (state->state & bits)) { 1017 *start_ret = state->start; 1018 *end_ret = state->end; 1019 ret = 0; 1020 break; 1021 } 1022 node = rb_next(node); 1023 if (!node) 1024 break; 1025 } 1026 out: 1027 spin_unlock(&tree->lock); 1028 return ret; 1029 } 1030 1031 /* find the first state struct with 'bits' set after 'start', and 1032 * return it. tree->lock must be held. NULL will returned if 1033 * nothing was found after 'start' 1034 */ 1035 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 1036 u64 start, int bits) 1037 { 1038 struct rb_node *node; 1039 struct extent_state *state; 1040 1041 /* 1042 * this search will find all the extents that end after 1043 * our range starts. 1044 */ 1045 node = tree_search(tree, start); 1046 if (!node) 1047 goto out; 1048 1049 while (1) { 1050 state = rb_entry(node, struct extent_state, rb_node); 1051 if (state->end >= start && (state->state & bits)) 1052 return state; 1053 1054 node = rb_next(node); 1055 if (!node) 1056 break; 1057 } 1058 out: 1059 return NULL; 1060 } 1061 1062 /* 1063 * find a contiguous range of bytes in the file marked as delalloc, not 1064 * more than 'max_bytes'. start and end are used to return the range, 1065 * 1066 * 1 is returned if we find something, 0 if nothing was in the tree 1067 */ 1068 static noinline u64 find_delalloc_range(struct extent_io_tree *tree, 1069 u64 *start, u64 *end, u64 max_bytes) 1070 { 1071 struct rb_node *node; 1072 struct extent_state *state; 1073 u64 cur_start = *start; 1074 u64 found = 0; 1075 u64 total_bytes = 0; 1076 1077 spin_lock(&tree->lock); 1078 1079 /* 1080 * this search will find all the extents that end after 1081 * our range starts. 1082 */ 1083 node = tree_search(tree, cur_start); 1084 if (!node) { 1085 if (!found) 1086 *end = (u64)-1; 1087 goto out; 1088 } 1089 1090 while (1) { 1091 state = rb_entry(node, struct extent_state, rb_node); 1092 if (found && (state->start != cur_start || 1093 (state->state & EXTENT_BOUNDARY))) { 1094 goto out; 1095 } 1096 if (!(state->state & EXTENT_DELALLOC)) { 1097 if (!found) 1098 *end = state->end; 1099 goto out; 1100 } 1101 if (!found) 1102 *start = state->start; 1103 found++; 1104 *end = state->end; 1105 cur_start = state->end + 1; 1106 node = rb_next(node); 1107 if (!node) 1108 break; 1109 total_bytes += state->end - state->start + 1; 1110 if (total_bytes >= max_bytes) 1111 break; 1112 } 1113 out: 1114 spin_unlock(&tree->lock); 1115 return found; 1116 } 1117 1118 static noinline int __unlock_for_delalloc(struct inode *inode, 1119 struct page *locked_page, 1120 u64 start, u64 end) 1121 { 1122 int ret; 1123 struct page *pages[16]; 1124 unsigned long index = start >> PAGE_CACHE_SHIFT; 1125 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1126 unsigned long nr_pages = end_index - index + 1; 1127 int i; 1128 1129 if (index == locked_page->index && end_index == index) 1130 return 0; 1131 1132 while (nr_pages > 0) { 1133 ret = find_get_pages_contig(inode->i_mapping, index, 1134 min_t(unsigned long, nr_pages, 1135 ARRAY_SIZE(pages)), pages); 1136 for (i = 0; i < ret; i++) { 1137 if (pages[i] != locked_page) 1138 unlock_page(pages[i]); 1139 page_cache_release(pages[i]); 1140 } 1141 nr_pages -= ret; 1142 index += ret; 1143 cond_resched(); 1144 } 1145 return 0; 1146 } 1147 1148 static noinline int lock_delalloc_pages(struct inode *inode, 1149 struct page *locked_page, 1150 u64 delalloc_start, 1151 u64 delalloc_end) 1152 { 1153 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; 1154 unsigned long start_index = index; 1155 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; 1156 unsigned long pages_locked = 0; 1157 struct page *pages[16]; 1158 unsigned long nrpages; 1159 int ret; 1160 int i; 1161 1162 /* the caller is responsible for locking the start index */ 1163 if (index == locked_page->index && index == end_index) 1164 return 0; 1165 1166 /* skip the page at the start index */ 1167 nrpages = end_index - index + 1; 1168 while (nrpages > 0) { 1169 ret = find_get_pages_contig(inode->i_mapping, index, 1170 min_t(unsigned long, 1171 nrpages, ARRAY_SIZE(pages)), pages); 1172 if (ret == 0) { 1173 ret = -EAGAIN; 1174 goto done; 1175 } 1176 /* now we have an array of pages, lock them all */ 1177 for (i = 0; i < ret; i++) { 1178 /* 1179 * the caller is taking responsibility for 1180 * locked_page 1181 */ 1182 if (pages[i] != locked_page) { 1183 lock_page(pages[i]); 1184 if (!PageDirty(pages[i]) || 1185 pages[i]->mapping != inode->i_mapping) { 1186 ret = -EAGAIN; 1187 unlock_page(pages[i]); 1188 page_cache_release(pages[i]); 1189 goto done; 1190 } 1191 } 1192 page_cache_release(pages[i]); 1193 pages_locked++; 1194 } 1195 nrpages -= ret; 1196 index += ret; 1197 cond_resched(); 1198 } 1199 ret = 0; 1200 done: 1201 if (ret && pages_locked) { 1202 __unlock_for_delalloc(inode, locked_page, 1203 delalloc_start, 1204 ((u64)(start_index + pages_locked - 1)) << 1205 PAGE_CACHE_SHIFT); 1206 } 1207 return ret; 1208 } 1209 1210 /* 1211 * find a contiguous range of bytes in the file marked as delalloc, not 1212 * more than 'max_bytes'. start and end are used to return the range, 1213 * 1214 * 1 is returned if we find something, 0 if nothing was in the tree 1215 */ 1216 static noinline u64 find_lock_delalloc_range(struct inode *inode, 1217 struct extent_io_tree *tree, 1218 struct page *locked_page, 1219 u64 *start, u64 *end, 1220 u64 max_bytes) 1221 { 1222 u64 delalloc_start; 1223 u64 delalloc_end; 1224 u64 found; 1225 int ret; 1226 int loops = 0; 1227 1228 again: 1229 /* step one, find a bunch of delalloc bytes starting at start */ 1230 delalloc_start = *start; 1231 delalloc_end = 0; 1232 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, 1233 max_bytes); 1234 if (!found || delalloc_end <= *start) { 1235 *start = delalloc_start; 1236 *end = delalloc_end; 1237 return found; 1238 } 1239 1240 /* 1241 * start comes from the offset of locked_page. We have to lock 1242 * pages in order, so we can't process delalloc bytes before 1243 * locked_page 1244 */ 1245 if (delalloc_start < *start) 1246 delalloc_start = *start; 1247 1248 /* 1249 * make sure to limit the number of pages we try to lock down 1250 * if we're looping. 1251 */ 1252 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) 1253 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; 1254 1255 /* step two, lock all the pages after the page that has start */ 1256 ret = lock_delalloc_pages(inode, locked_page, 1257 delalloc_start, delalloc_end); 1258 if (ret == -EAGAIN) { 1259 /* some of the pages are gone, lets avoid looping by 1260 * shortening the size of the delalloc range we're searching 1261 */ 1262 if (!loops) { 1263 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1264 max_bytes = PAGE_CACHE_SIZE - offset; 1265 loops = 1; 1266 goto again; 1267 } else { 1268 found = 0; 1269 goto out_failed; 1270 } 1271 } 1272 BUG_ON(ret); 1273 1274 /* step three, lock the state bits for the whole range */ 1275 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1276 1277 /* then test to make sure it is all still delalloc */ 1278 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1279 EXTENT_DELALLOC, 1); 1280 if (!ret) { 1281 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1282 __unlock_for_delalloc(inode, locked_page, 1283 delalloc_start, delalloc_end); 1284 cond_resched(); 1285 goto again; 1286 } 1287 *start = delalloc_start; 1288 *end = delalloc_end; 1289 out_failed: 1290 return found; 1291 } 1292 1293 int extent_clear_unlock_delalloc(struct inode *inode, 1294 struct extent_io_tree *tree, 1295 u64 start, u64 end, struct page *locked_page, 1296 int unlock_pages, 1297 int clear_unlock, 1298 int clear_delalloc, int clear_dirty, 1299 int set_writeback, 1300 int end_writeback) 1301 { 1302 int ret; 1303 struct page *pages[16]; 1304 unsigned long index = start >> PAGE_CACHE_SHIFT; 1305 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1306 unsigned long nr_pages = end_index - index + 1; 1307 int i; 1308 int clear_bits = 0; 1309 1310 if (clear_unlock) 1311 clear_bits |= EXTENT_LOCKED; 1312 if (clear_dirty) 1313 clear_bits |= EXTENT_DIRTY; 1314 1315 if (clear_delalloc) 1316 clear_bits |= EXTENT_DELALLOC; 1317 1318 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1319 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1320 return 0; 1321 1322 while (nr_pages > 0) { 1323 ret = find_get_pages_contig(inode->i_mapping, index, 1324 min_t(unsigned long, 1325 nr_pages, ARRAY_SIZE(pages)), pages); 1326 for (i = 0; i < ret; i++) { 1327 if (pages[i] == locked_page) { 1328 page_cache_release(pages[i]); 1329 continue; 1330 } 1331 if (clear_dirty) 1332 clear_page_dirty_for_io(pages[i]); 1333 if (set_writeback) 1334 set_page_writeback(pages[i]); 1335 if (end_writeback) 1336 end_page_writeback(pages[i]); 1337 if (unlock_pages) 1338 unlock_page(pages[i]); 1339 page_cache_release(pages[i]); 1340 } 1341 nr_pages -= ret; 1342 index += ret; 1343 cond_resched(); 1344 } 1345 return 0; 1346 } 1347 1348 /* 1349 * count the number of bytes in the tree that have a given bit(s) 1350 * set. This can be fairly slow, except for EXTENT_DIRTY which is 1351 * cached. The total number found is returned. 1352 */ 1353 u64 count_range_bits(struct extent_io_tree *tree, 1354 u64 *start, u64 search_end, u64 max_bytes, 1355 unsigned long bits) 1356 { 1357 struct rb_node *node; 1358 struct extent_state *state; 1359 u64 cur_start = *start; 1360 u64 total_bytes = 0; 1361 int found = 0; 1362 1363 if (search_end <= cur_start) { 1364 WARN_ON(1); 1365 return 0; 1366 } 1367 1368 spin_lock(&tree->lock); 1369 if (cur_start == 0 && bits == EXTENT_DIRTY) { 1370 total_bytes = tree->dirty_bytes; 1371 goto out; 1372 } 1373 /* 1374 * this search will find all the extents that end after 1375 * our range starts. 1376 */ 1377 node = tree_search(tree, cur_start); 1378 if (!node) 1379 goto out; 1380 1381 while (1) { 1382 state = rb_entry(node, struct extent_state, rb_node); 1383 if (state->start > search_end) 1384 break; 1385 if (state->end >= cur_start && (state->state & bits)) { 1386 total_bytes += min(search_end, state->end) + 1 - 1387 max(cur_start, state->start); 1388 if (total_bytes >= max_bytes) 1389 break; 1390 if (!found) { 1391 *start = state->start; 1392 found = 1; 1393 } 1394 } 1395 node = rb_next(node); 1396 if (!node) 1397 break; 1398 } 1399 out: 1400 spin_unlock(&tree->lock); 1401 return total_bytes; 1402 } 1403 1404 /* 1405 * set the private field for a given byte offset in the tree. If there isn't 1406 * an extent_state there already, this does nothing. 1407 */ 1408 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1409 { 1410 struct rb_node *node; 1411 struct extent_state *state; 1412 int ret = 0; 1413 1414 spin_lock(&tree->lock); 1415 /* 1416 * this search will find all the extents that end after 1417 * our range starts. 1418 */ 1419 node = tree_search(tree, start); 1420 if (!node) { 1421 ret = -ENOENT; 1422 goto out; 1423 } 1424 state = rb_entry(node, struct extent_state, rb_node); 1425 if (state->start != start) { 1426 ret = -ENOENT; 1427 goto out; 1428 } 1429 state->private = private; 1430 out: 1431 spin_unlock(&tree->lock); 1432 return ret; 1433 } 1434 1435 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1436 { 1437 struct rb_node *node; 1438 struct extent_state *state; 1439 int ret = 0; 1440 1441 spin_lock(&tree->lock); 1442 /* 1443 * this search will find all the extents that end after 1444 * our range starts. 1445 */ 1446 node = tree_search(tree, start); 1447 if (!node) { 1448 ret = -ENOENT; 1449 goto out; 1450 } 1451 state = rb_entry(node, struct extent_state, rb_node); 1452 if (state->start != start) { 1453 ret = -ENOENT; 1454 goto out; 1455 } 1456 *private = state->private; 1457 out: 1458 spin_unlock(&tree->lock); 1459 return ret; 1460 } 1461 1462 /* 1463 * searches a range in the state tree for a given mask. 1464 * If 'filled' == 1, this returns 1 only if every extent in the tree 1465 * has the bits set. Otherwise, 1 is returned if any bit in the 1466 * range is found set. 1467 */ 1468 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1469 int bits, int filled) 1470 { 1471 struct extent_state *state = NULL; 1472 struct rb_node *node; 1473 int bitset = 0; 1474 1475 spin_lock(&tree->lock); 1476 node = tree_search(tree, start); 1477 while (node && start <= end) { 1478 state = rb_entry(node, struct extent_state, rb_node); 1479 1480 if (filled && state->start > start) { 1481 bitset = 0; 1482 break; 1483 } 1484 1485 if (state->start > end) 1486 break; 1487 1488 if (state->state & bits) { 1489 bitset = 1; 1490 if (!filled) 1491 break; 1492 } else if (filled) { 1493 bitset = 0; 1494 break; 1495 } 1496 start = state->end + 1; 1497 if (start > end) 1498 break; 1499 node = rb_next(node); 1500 if (!node) { 1501 if (filled) 1502 bitset = 0; 1503 break; 1504 } 1505 } 1506 spin_unlock(&tree->lock); 1507 return bitset; 1508 } 1509 1510 /* 1511 * helper function to set a given page up to date if all the 1512 * extents in the tree for that page are up to date 1513 */ 1514 static int check_page_uptodate(struct extent_io_tree *tree, 1515 struct page *page) 1516 { 1517 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1518 u64 end = start + PAGE_CACHE_SIZE - 1; 1519 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1520 SetPageUptodate(page); 1521 return 0; 1522 } 1523 1524 /* 1525 * helper function to unlock a page if all the extents in the tree 1526 * for that page are unlocked 1527 */ 1528 static int check_page_locked(struct extent_io_tree *tree, 1529 struct page *page) 1530 { 1531 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1532 u64 end = start + PAGE_CACHE_SIZE - 1; 1533 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1534 unlock_page(page); 1535 return 0; 1536 } 1537 1538 /* 1539 * helper function to end page writeback if all the extents 1540 * in the tree for that page are done with writeback 1541 */ 1542 static int check_page_writeback(struct extent_io_tree *tree, 1543 struct page *page) 1544 { 1545 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1546 u64 end = start + PAGE_CACHE_SIZE - 1; 1547 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1548 end_page_writeback(page); 1549 return 0; 1550 } 1551 1552 /* lots and lots of room for performance fixes in the end_bio funcs */ 1553 1554 /* 1555 * after a writepage IO is done, we need to: 1556 * clear the uptodate bits on error 1557 * clear the writeback bits in the extent tree for this IO 1558 * end_page_writeback if the page has no more pending IO 1559 * 1560 * Scheduling is not allowed, so the extent state tree is expected 1561 * to have one and only one object corresponding to this IO. 1562 */ 1563 static void end_bio_extent_writepage(struct bio *bio, int err) 1564 { 1565 int uptodate = err == 0; 1566 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1567 struct extent_io_tree *tree; 1568 u64 start; 1569 u64 end; 1570 int whole_page; 1571 int ret; 1572 1573 do { 1574 struct page *page = bvec->bv_page; 1575 tree = &BTRFS_I(page->mapping->host)->io_tree; 1576 1577 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1578 bvec->bv_offset; 1579 end = start + bvec->bv_len - 1; 1580 1581 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1582 whole_page = 1; 1583 else 1584 whole_page = 0; 1585 1586 if (--bvec >= bio->bi_io_vec) 1587 prefetchw(&bvec->bv_page->flags); 1588 if (tree->ops && tree->ops->writepage_end_io_hook) { 1589 ret = tree->ops->writepage_end_io_hook(page, start, 1590 end, NULL, uptodate); 1591 if (ret) 1592 uptodate = 0; 1593 } 1594 1595 if (!uptodate && tree->ops && 1596 tree->ops->writepage_io_failed_hook) { 1597 ret = tree->ops->writepage_io_failed_hook(bio, page, 1598 start, end, NULL); 1599 if (ret == 0) { 1600 uptodate = (err == 0); 1601 continue; 1602 } 1603 } 1604 1605 if (!uptodate) { 1606 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1607 ClearPageUptodate(page); 1608 SetPageError(page); 1609 } 1610 1611 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1612 1613 if (whole_page) 1614 end_page_writeback(page); 1615 else 1616 check_page_writeback(tree, page); 1617 } while (bvec >= bio->bi_io_vec); 1618 1619 bio_put(bio); 1620 } 1621 1622 /* 1623 * after a readpage IO is done, we need to: 1624 * clear the uptodate bits on error 1625 * set the uptodate bits if things worked 1626 * set the page up to date if all extents in the tree are uptodate 1627 * clear the lock bit in the extent tree 1628 * unlock the page if there are no other extents locked for it 1629 * 1630 * Scheduling is not allowed, so the extent state tree is expected 1631 * to have one and only one object corresponding to this IO. 1632 */ 1633 static void end_bio_extent_readpage(struct bio *bio, int err) 1634 { 1635 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1636 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1637 struct extent_io_tree *tree; 1638 u64 start; 1639 u64 end; 1640 int whole_page; 1641 int ret; 1642 1643 if (err) 1644 uptodate = 0; 1645 1646 do { 1647 struct page *page = bvec->bv_page; 1648 tree = &BTRFS_I(page->mapping->host)->io_tree; 1649 1650 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1651 bvec->bv_offset; 1652 end = start + bvec->bv_len - 1; 1653 1654 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1655 whole_page = 1; 1656 else 1657 whole_page = 0; 1658 1659 if (--bvec >= bio->bi_io_vec) 1660 prefetchw(&bvec->bv_page->flags); 1661 1662 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1663 ret = tree->ops->readpage_end_io_hook(page, start, end, 1664 NULL); 1665 if (ret) 1666 uptodate = 0; 1667 } 1668 if (!uptodate && tree->ops && 1669 tree->ops->readpage_io_failed_hook) { 1670 ret = tree->ops->readpage_io_failed_hook(bio, page, 1671 start, end, NULL); 1672 if (ret == 0) { 1673 uptodate = 1674 test_bit(BIO_UPTODATE, &bio->bi_flags); 1675 if (err) 1676 uptodate = 0; 1677 continue; 1678 } 1679 } 1680 1681 if (uptodate) { 1682 set_extent_uptodate(tree, start, end, 1683 GFP_ATOMIC); 1684 } 1685 unlock_extent(tree, start, end, GFP_ATOMIC); 1686 1687 if (whole_page) { 1688 if (uptodate) { 1689 SetPageUptodate(page); 1690 } else { 1691 ClearPageUptodate(page); 1692 SetPageError(page); 1693 } 1694 unlock_page(page); 1695 } else { 1696 if (uptodate) { 1697 check_page_uptodate(tree, page); 1698 } else { 1699 ClearPageUptodate(page); 1700 SetPageError(page); 1701 } 1702 check_page_locked(tree, page); 1703 } 1704 } while (bvec >= bio->bi_io_vec); 1705 1706 bio_put(bio); 1707 } 1708 1709 /* 1710 * IO done from prepare_write is pretty simple, we just unlock 1711 * the structs in the extent tree when done, and set the uptodate bits 1712 * as appropriate. 1713 */ 1714 static void end_bio_extent_preparewrite(struct bio *bio, int err) 1715 { 1716 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1717 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1718 struct extent_io_tree *tree; 1719 u64 start; 1720 u64 end; 1721 1722 do { 1723 struct page *page = bvec->bv_page; 1724 tree = &BTRFS_I(page->mapping->host)->io_tree; 1725 1726 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1727 bvec->bv_offset; 1728 end = start + bvec->bv_len - 1; 1729 1730 if (--bvec >= bio->bi_io_vec) 1731 prefetchw(&bvec->bv_page->flags); 1732 1733 if (uptodate) { 1734 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1735 } else { 1736 ClearPageUptodate(page); 1737 SetPageError(page); 1738 } 1739 1740 unlock_extent(tree, start, end, GFP_ATOMIC); 1741 1742 } while (bvec >= bio->bi_io_vec); 1743 1744 bio_put(bio); 1745 } 1746 1747 static struct bio * 1748 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1749 gfp_t gfp_flags) 1750 { 1751 struct bio *bio; 1752 1753 bio = bio_alloc(gfp_flags, nr_vecs); 1754 1755 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 1756 while (!bio && (nr_vecs /= 2)) 1757 bio = bio_alloc(gfp_flags, nr_vecs); 1758 } 1759 1760 if (bio) { 1761 bio->bi_size = 0; 1762 bio->bi_bdev = bdev; 1763 bio->bi_sector = first_sector; 1764 } 1765 return bio; 1766 } 1767 1768 static int submit_one_bio(int rw, struct bio *bio, int mirror_num, 1769 unsigned long bio_flags) 1770 { 1771 int ret = 0; 1772 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1773 struct page *page = bvec->bv_page; 1774 struct extent_io_tree *tree = bio->bi_private; 1775 u64 start; 1776 u64 end; 1777 1778 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1779 end = start + bvec->bv_len - 1; 1780 1781 bio->bi_private = NULL; 1782 1783 bio_get(bio); 1784 1785 if (tree->ops && tree->ops->submit_bio_hook) 1786 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1787 mirror_num, bio_flags); 1788 else 1789 submit_bio(rw, bio); 1790 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1791 ret = -EOPNOTSUPP; 1792 bio_put(bio); 1793 return ret; 1794 } 1795 1796 static int submit_extent_page(int rw, struct extent_io_tree *tree, 1797 struct page *page, sector_t sector, 1798 size_t size, unsigned long offset, 1799 struct block_device *bdev, 1800 struct bio **bio_ret, 1801 unsigned long max_pages, 1802 bio_end_io_t end_io_func, 1803 int mirror_num, 1804 unsigned long prev_bio_flags, 1805 unsigned long bio_flags) 1806 { 1807 int ret = 0; 1808 struct bio *bio; 1809 int nr; 1810 int contig = 0; 1811 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; 1812 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; 1813 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE); 1814 1815 if (bio_ret && *bio_ret) { 1816 bio = *bio_ret; 1817 if (old_compressed) 1818 contig = bio->bi_sector == sector; 1819 else 1820 contig = bio->bi_sector + (bio->bi_size >> 9) == 1821 sector; 1822 1823 if (prev_bio_flags != bio_flags || !contig || 1824 (tree->ops && tree->ops->merge_bio_hook && 1825 tree->ops->merge_bio_hook(page, offset, page_size, bio, 1826 bio_flags)) || 1827 bio_add_page(bio, page, page_size, offset) < page_size) { 1828 ret = submit_one_bio(rw, bio, mirror_num, 1829 prev_bio_flags); 1830 bio = NULL; 1831 } else { 1832 return 0; 1833 } 1834 } 1835 if (this_compressed) 1836 nr = BIO_MAX_PAGES; 1837 else 1838 nr = bio_get_nr_vecs(bdev); 1839 1840 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1841 1842 bio_add_page(bio, page, page_size, offset); 1843 bio->bi_end_io = end_io_func; 1844 bio->bi_private = tree; 1845 1846 if (bio_ret) 1847 *bio_ret = bio; 1848 else 1849 ret = submit_one_bio(rw, bio, mirror_num, bio_flags); 1850 1851 return ret; 1852 } 1853 1854 void set_page_extent_mapped(struct page *page) 1855 { 1856 if (!PagePrivate(page)) { 1857 SetPagePrivate(page); 1858 page_cache_get(page); 1859 set_page_private(page, EXTENT_PAGE_PRIVATE); 1860 } 1861 } 1862 1863 static void set_page_extent_head(struct page *page, unsigned long len) 1864 { 1865 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1866 } 1867 1868 /* 1869 * basic readpage implementation. Locked extent state structs are inserted 1870 * into the tree that are removed when the IO is done (by the end_io 1871 * handlers) 1872 */ 1873 static int __extent_read_full_page(struct extent_io_tree *tree, 1874 struct page *page, 1875 get_extent_t *get_extent, 1876 struct bio **bio, int mirror_num, 1877 unsigned long *bio_flags) 1878 { 1879 struct inode *inode = page->mapping->host; 1880 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1881 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1882 u64 end; 1883 u64 cur = start; 1884 u64 extent_offset; 1885 u64 last_byte = i_size_read(inode); 1886 u64 block_start; 1887 u64 cur_end; 1888 sector_t sector; 1889 struct extent_map *em; 1890 struct block_device *bdev; 1891 int ret; 1892 int nr = 0; 1893 size_t page_offset = 0; 1894 size_t iosize; 1895 size_t disk_io_size; 1896 size_t blocksize = inode->i_sb->s_blocksize; 1897 unsigned long this_bio_flag = 0; 1898 1899 set_page_extent_mapped(page); 1900 1901 end = page_end; 1902 lock_extent(tree, start, end, GFP_NOFS); 1903 1904 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 1905 char *userpage; 1906 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 1907 1908 if (zero_offset) { 1909 iosize = PAGE_CACHE_SIZE - zero_offset; 1910 userpage = kmap_atomic(page, KM_USER0); 1911 memset(userpage + zero_offset, 0, iosize); 1912 flush_dcache_page(page); 1913 kunmap_atomic(userpage, KM_USER0); 1914 } 1915 } 1916 while (cur <= end) { 1917 if (cur >= last_byte) { 1918 char *userpage; 1919 iosize = PAGE_CACHE_SIZE - page_offset; 1920 userpage = kmap_atomic(page, KM_USER0); 1921 memset(userpage + page_offset, 0, iosize); 1922 flush_dcache_page(page); 1923 kunmap_atomic(userpage, KM_USER0); 1924 set_extent_uptodate(tree, cur, cur + iosize - 1, 1925 GFP_NOFS); 1926 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1927 break; 1928 } 1929 em = get_extent(inode, page, page_offset, cur, 1930 end - cur + 1, 0); 1931 if (IS_ERR(em) || !em) { 1932 SetPageError(page); 1933 unlock_extent(tree, cur, end, GFP_NOFS); 1934 break; 1935 } 1936 extent_offset = cur - em->start; 1937 BUG_ON(extent_map_end(em) <= cur); 1938 BUG_ON(end < cur); 1939 1940 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 1941 this_bio_flag = EXTENT_BIO_COMPRESSED; 1942 1943 iosize = min(extent_map_end(em) - cur, end - cur + 1); 1944 cur_end = min(extent_map_end(em) - 1, end); 1945 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1946 if (this_bio_flag & EXTENT_BIO_COMPRESSED) { 1947 disk_io_size = em->block_len; 1948 sector = em->block_start >> 9; 1949 } else { 1950 sector = (em->block_start + extent_offset) >> 9; 1951 disk_io_size = iosize; 1952 } 1953 bdev = em->bdev; 1954 block_start = em->block_start; 1955 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 1956 block_start = EXTENT_MAP_HOLE; 1957 free_extent_map(em); 1958 em = NULL; 1959 1960 /* we've found a hole, just zero and go on */ 1961 if (block_start == EXTENT_MAP_HOLE) { 1962 char *userpage; 1963 userpage = kmap_atomic(page, KM_USER0); 1964 memset(userpage + page_offset, 0, iosize); 1965 flush_dcache_page(page); 1966 kunmap_atomic(userpage, KM_USER0); 1967 1968 set_extent_uptodate(tree, cur, cur + iosize - 1, 1969 GFP_NOFS); 1970 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1971 cur = cur + iosize; 1972 page_offset += iosize; 1973 continue; 1974 } 1975 /* the get_extent function already copied into the page */ 1976 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1977 check_page_uptodate(tree, page); 1978 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1979 cur = cur + iosize; 1980 page_offset += iosize; 1981 continue; 1982 } 1983 /* we have an inline extent but it didn't get marked up 1984 * to date. Error out 1985 */ 1986 if (block_start == EXTENT_MAP_INLINE) { 1987 SetPageError(page); 1988 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1989 cur = cur + iosize; 1990 page_offset += iosize; 1991 continue; 1992 } 1993 1994 ret = 0; 1995 if (tree->ops && tree->ops->readpage_io_hook) { 1996 ret = tree->ops->readpage_io_hook(page, cur, 1997 cur + iosize - 1); 1998 } 1999 if (!ret) { 2000 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2001 pnr -= page->index; 2002 ret = submit_extent_page(READ, tree, page, 2003 sector, disk_io_size, page_offset, 2004 bdev, bio, pnr, 2005 end_bio_extent_readpage, mirror_num, 2006 *bio_flags, 2007 this_bio_flag); 2008 nr++; 2009 *bio_flags = this_bio_flag; 2010 } 2011 if (ret) 2012 SetPageError(page); 2013 cur = cur + iosize; 2014 page_offset += iosize; 2015 } 2016 if (!nr) { 2017 if (!PageError(page)) 2018 SetPageUptodate(page); 2019 unlock_page(page); 2020 } 2021 return 0; 2022 } 2023 2024 int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 2025 get_extent_t *get_extent) 2026 { 2027 struct bio *bio = NULL; 2028 unsigned long bio_flags = 0; 2029 int ret; 2030 2031 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2032 &bio_flags); 2033 if (bio) 2034 submit_one_bio(READ, bio, 0, bio_flags); 2035 return ret; 2036 } 2037 2038 static noinline void update_nr_written(struct page *page, 2039 struct writeback_control *wbc, 2040 unsigned long nr_written) 2041 { 2042 wbc->nr_to_write -= nr_written; 2043 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && 2044 wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) 2045 page->mapping->writeback_index = page->index + nr_written; 2046 } 2047 2048 /* 2049 * the writepage semantics are similar to regular writepage. extent 2050 * records are inserted to lock ranges in the tree, and as dirty areas 2051 * are found, they are marked writeback. Then the lock bits are removed 2052 * and the end_io handler clears the writeback ranges 2053 */ 2054 static int __extent_writepage(struct page *page, struct writeback_control *wbc, 2055 void *data) 2056 { 2057 struct inode *inode = page->mapping->host; 2058 struct extent_page_data *epd = data; 2059 struct extent_io_tree *tree = epd->tree; 2060 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2061 u64 delalloc_start; 2062 u64 page_end = start + PAGE_CACHE_SIZE - 1; 2063 u64 end; 2064 u64 cur = start; 2065 u64 extent_offset; 2066 u64 last_byte = i_size_read(inode); 2067 u64 block_start; 2068 u64 iosize; 2069 u64 unlock_start; 2070 sector_t sector; 2071 struct extent_map *em; 2072 struct block_device *bdev; 2073 int ret; 2074 int nr = 0; 2075 size_t pg_offset = 0; 2076 size_t blocksize; 2077 loff_t i_size = i_size_read(inode); 2078 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 2079 u64 nr_delalloc; 2080 u64 delalloc_end; 2081 int page_started; 2082 int compressed; 2083 int write_flags; 2084 unsigned long nr_written = 0; 2085 2086 if (wbc->sync_mode == WB_SYNC_ALL) 2087 write_flags = WRITE_SYNC_PLUG; 2088 else 2089 write_flags = WRITE; 2090 2091 WARN_ON(!PageLocked(page)); 2092 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2093 if (page->index > end_index || 2094 (page->index == end_index && !pg_offset)) { 2095 page->mapping->a_ops->invalidatepage(page, 0); 2096 unlock_page(page); 2097 return 0; 2098 } 2099 2100 if (page->index == end_index) { 2101 char *userpage; 2102 2103 userpage = kmap_atomic(page, KM_USER0); 2104 memset(userpage + pg_offset, 0, 2105 PAGE_CACHE_SIZE - pg_offset); 2106 kunmap_atomic(userpage, KM_USER0); 2107 flush_dcache_page(page); 2108 } 2109 pg_offset = 0; 2110 2111 set_page_extent_mapped(page); 2112 2113 delalloc_start = start; 2114 delalloc_end = 0; 2115 page_started = 0; 2116 if (!epd->extent_locked) { 2117 /* 2118 * make sure the wbc mapping index is at least updated 2119 * to this page. 2120 */ 2121 update_nr_written(page, wbc, 0); 2122 2123 while (delalloc_end < page_end) { 2124 nr_delalloc = find_lock_delalloc_range(inode, tree, 2125 page, 2126 &delalloc_start, 2127 &delalloc_end, 2128 128 * 1024 * 1024); 2129 if (nr_delalloc == 0) { 2130 delalloc_start = delalloc_end + 1; 2131 continue; 2132 } 2133 tree->ops->fill_delalloc(inode, page, delalloc_start, 2134 delalloc_end, &page_started, 2135 &nr_written); 2136 delalloc_start = delalloc_end + 1; 2137 } 2138 2139 /* did the fill delalloc function already unlock and start 2140 * the IO? 2141 */ 2142 if (page_started) { 2143 ret = 0; 2144 /* 2145 * we've unlocked the page, so we can't update 2146 * the mapping's writeback index, just update 2147 * nr_to_write. 2148 */ 2149 wbc->nr_to_write -= nr_written; 2150 goto done_unlocked; 2151 } 2152 } 2153 lock_extent(tree, start, page_end, GFP_NOFS); 2154 2155 unlock_start = start; 2156 2157 if (tree->ops && tree->ops->writepage_start_hook) { 2158 ret = tree->ops->writepage_start_hook(page, start, 2159 page_end); 2160 if (ret == -EAGAIN) { 2161 unlock_extent(tree, start, page_end, GFP_NOFS); 2162 redirty_page_for_writepage(wbc, page); 2163 update_nr_written(page, wbc, nr_written); 2164 unlock_page(page); 2165 ret = 0; 2166 goto done_unlocked; 2167 } 2168 } 2169 2170 /* 2171 * we don't want to touch the inode after unlocking the page, 2172 * so we update the mapping writeback index now 2173 */ 2174 update_nr_written(page, wbc, nr_written + 1); 2175 2176 end = page_end; 2177 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2178 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); 2179 2180 if (last_byte <= start) { 2181 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2182 unlock_extent(tree, start, page_end, GFP_NOFS); 2183 if (tree->ops && tree->ops->writepage_end_io_hook) 2184 tree->ops->writepage_end_io_hook(page, start, 2185 page_end, NULL, 1); 2186 unlock_start = page_end + 1; 2187 goto done; 2188 } 2189 2190 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 2191 blocksize = inode->i_sb->s_blocksize; 2192 2193 while (cur <= end) { 2194 if (cur >= last_byte) { 2195 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2196 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2197 if (tree->ops && tree->ops->writepage_end_io_hook) 2198 tree->ops->writepage_end_io_hook(page, cur, 2199 page_end, NULL, 1); 2200 unlock_start = page_end + 1; 2201 break; 2202 } 2203 em = epd->get_extent(inode, page, pg_offset, cur, 2204 end - cur + 1, 1); 2205 if (IS_ERR(em) || !em) { 2206 SetPageError(page); 2207 break; 2208 } 2209 2210 extent_offset = cur - em->start; 2211 BUG_ON(extent_map_end(em) <= cur); 2212 BUG_ON(end < cur); 2213 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2214 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 2215 sector = (em->block_start + extent_offset) >> 9; 2216 bdev = em->bdev; 2217 block_start = em->block_start; 2218 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 2219 free_extent_map(em); 2220 em = NULL; 2221 2222 /* 2223 * compressed and inline extents are written through other 2224 * paths in the FS 2225 */ 2226 if (compressed || block_start == EXTENT_MAP_HOLE || 2227 block_start == EXTENT_MAP_INLINE) { 2228 clear_extent_dirty(tree, cur, 2229 cur + iosize - 1, GFP_NOFS); 2230 2231 unlock_extent(tree, unlock_start, cur + iosize - 1, 2232 GFP_NOFS); 2233 2234 /* 2235 * end_io notification does not happen here for 2236 * compressed extents 2237 */ 2238 if (!compressed && tree->ops && 2239 tree->ops->writepage_end_io_hook) 2240 tree->ops->writepage_end_io_hook(page, cur, 2241 cur + iosize - 1, 2242 NULL, 1); 2243 else if (compressed) { 2244 /* we don't want to end_page_writeback on 2245 * a compressed extent. this happens 2246 * elsewhere 2247 */ 2248 nr++; 2249 } 2250 2251 cur += iosize; 2252 pg_offset += iosize; 2253 unlock_start = cur; 2254 continue; 2255 } 2256 /* leave this out until we have a page_mkwrite call */ 2257 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2258 EXTENT_DIRTY, 0)) { 2259 cur = cur + iosize; 2260 pg_offset += iosize; 2261 continue; 2262 } 2263 2264 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 2265 if (tree->ops && tree->ops->writepage_io_hook) { 2266 ret = tree->ops->writepage_io_hook(page, cur, 2267 cur + iosize - 1); 2268 } else { 2269 ret = 0; 2270 } 2271 if (ret) { 2272 SetPageError(page); 2273 } else { 2274 unsigned long max_nr = end_index + 1; 2275 2276 set_range_writeback(tree, cur, cur + iosize - 1); 2277 if (!PageWriteback(page)) { 2278 printk(KERN_ERR "btrfs warning page %lu not " 2279 "writeback, cur %llu end %llu\n", 2280 page->index, (unsigned long long)cur, 2281 (unsigned long long)end); 2282 } 2283 2284 ret = submit_extent_page(write_flags, tree, page, 2285 sector, iosize, pg_offset, 2286 bdev, &epd->bio, max_nr, 2287 end_bio_extent_writepage, 2288 0, 0, 0); 2289 if (ret) 2290 SetPageError(page); 2291 } 2292 cur = cur + iosize; 2293 pg_offset += iosize; 2294 nr++; 2295 } 2296 done: 2297 if (nr == 0) { 2298 /* make sure the mapping tag for page dirty gets cleared */ 2299 set_page_writeback(page); 2300 end_page_writeback(page); 2301 } 2302 if (unlock_start <= page_end) 2303 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2304 unlock_page(page); 2305 2306 done_unlocked: 2307 2308 return 0; 2309 } 2310 2311 /** 2312 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 2313 * @mapping: address space structure to write 2314 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 2315 * @writepage: function called for each page 2316 * @data: data passed to writepage function 2317 * 2318 * If a page is already under I/O, write_cache_pages() skips it, even 2319 * if it's dirty. This is desirable behaviour for memory-cleaning writeback, 2320 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() 2321 * and msync() need to guarantee that all the data which was dirty at the time 2322 * the call was made get new I/O started against them. If wbc->sync_mode is 2323 * WB_SYNC_ALL then we were called for data integrity and we must wait for 2324 * existing IO to complete. 2325 */ 2326 static int extent_write_cache_pages(struct extent_io_tree *tree, 2327 struct address_space *mapping, 2328 struct writeback_control *wbc, 2329 writepage_t writepage, void *data, 2330 void (*flush_fn)(void *)) 2331 { 2332 struct backing_dev_info *bdi = mapping->backing_dev_info; 2333 int ret = 0; 2334 int done = 0; 2335 struct pagevec pvec; 2336 int nr_pages; 2337 pgoff_t index; 2338 pgoff_t end; /* Inclusive */ 2339 int scanned = 0; 2340 int range_whole = 0; 2341 2342 pagevec_init(&pvec, 0); 2343 if (wbc->range_cyclic) { 2344 index = mapping->writeback_index; /* Start from prev offset */ 2345 end = -1; 2346 } else { 2347 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2348 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2349 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2350 range_whole = 1; 2351 scanned = 1; 2352 } 2353 retry: 2354 while (!done && (index <= end) && 2355 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2356 PAGECACHE_TAG_DIRTY, min(end - index, 2357 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2358 unsigned i; 2359 2360 scanned = 1; 2361 for (i = 0; i < nr_pages; i++) { 2362 struct page *page = pvec.pages[i]; 2363 2364 /* 2365 * At this point we hold neither mapping->tree_lock nor 2366 * lock on the page itself: the page may be truncated or 2367 * invalidated (changing page->mapping to NULL), or even 2368 * swizzled back from swapper_space to tmpfs file 2369 * mapping 2370 */ 2371 if (tree->ops && tree->ops->write_cache_pages_lock_hook) 2372 tree->ops->write_cache_pages_lock_hook(page); 2373 else 2374 lock_page(page); 2375 2376 if (unlikely(page->mapping != mapping)) { 2377 unlock_page(page); 2378 continue; 2379 } 2380 2381 if (!wbc->range_cyclic && page->index > end) { 2382 done = 1; 2383 unlock_page(page); 2384 continue; 2385 } 2386 2387 if (wbc->sync_mode != WB_SYNC_NONE) { 2388 if (PageWriteback(page)) 2389 flush_fn(data); 2390 wait_on_page_writeback(page); 2391 } 2392 2393 if (PageWriteback(page) || 2394 !clear_page_dirty_for_io(page)) { 2395 unlock_page(page); 2396 continue; 2397 } 2398 2399 ret = (*writepage)(page, wbc, data); 2400 2401 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { 2402 unlock_page(page); 2403 ret = 0; 2404 } 2405 if (ret || wbc->nr_to_write <= 0) 2406 done = 1; 2407 if (wbc->nonblocking && bdi_write_congested(bdi)) { 2408 wbc->encountered_congestion = 1; 2409 done = 1; 2410 } 2411 } 2412 pagevec_release(&pvec); 2413 cond_resched(); 2414 } 2415 if (!scanned && !done) { 2416 /* 2417 * We hit the last page and there is more work to be done: wrap 2418 * back to the start of the file 2419 */ 2420 scanned = 1; 2421 index = 0; 2422 goto retry; 2423 } 2424 return ret; 2425 } 2426 2427 static void flush_epd_write_bio(struct extent_page_data *epd) 2428 { 2429 if (epd->bio) { 2430 if (epd->sync_io) 2431 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); 2432 else 2433 submit_one_bio(WRITE, epd->bio, 0, 0); 2434 epd->bio = NULL; 2435 } 2436 } 2437 2438 static noinline void flush_write_bio(void *data) 2439 { 2440 struct extent_page_data *epd = data; 2441 flush_epd_write_bio(epd); 2442 } 2443 2444 int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2445 get_extent_t *get_extent, 2446 struct writeback_control *wbc) 2447 { 2448 int ret; 2449 struct address_space *mapping = page->mapping; 2450 struct extent_page_data epd = { 2451 .bio = NULL, 2452 .tree = tree, 2453 .get_extent = get_extent, 2454 .extent_locked = 0, 2455 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2456 }; 2457 struct writeback_control wbc_writepages = { 2458 .bdi = wbc->bdi, 2459 .sync_mode = wbc->sync_mode, 2460 .older_than_this = NULL, 2461 .nr_to_write = 64, 2462 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2463 .range_end = (loff_t)-1, 2464 }; 2465 2466 ret = __extent_writepage(page, wbc, &epd); 2467 2468 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2469 __extent_writepage, &epd, flush_write_bio); 2470 flush_epd_write_bio(&epd); 2471 return ret; 2472 } 2473 2474 int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, 2475 u64 start, u64 end, get_extent_t *get_extent, 2476 int mode) 2477 { 2478 int ret = 0; 2479 struct address_space *mapping = inode->i_mapping; 2480 struct page *page; 2481 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> 2482 PAGE_CACHE_SHIFT; 2483 2484 struct extent_page_data epd = { 2485 .bio = NULL, 2486 .tree = tree, 2487 .get_extent = get_extent, 2488 .extent_locked = 1, 2489 .sync_io = mode == WB_SYNC_ALL, 2490 }; 2491 struct writeback_control wbc_writepages = { 2492 .bdi = inode->i_mapping->backing_dev_info, 2493 .sync_mode = mode, 2494 .older_than_this = NULL, 2495 .nr_to_write = nr_pages * 2, 2496 .range_start = start, 2497 .range_end = end + 1, 2498 }; 2499 2500 while (start <= end) { 2501 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 2502 if (clear_page_dirty_for_io(page)) 2503 ret = __extent_writepage(page, &wbc_writepages, &epd); 2504 else { 2505 if (tree->ops && tree->ops->writepage_end_io_hook) 2506 tree->ops->writepage_end_io_hook(page, start, 2507 start + PAGE_CACHE_SIZE - 1, 2508 NULL, 1); 2509 unlock_page(page); 2510 } 2511 page_cache_release(page); 2512 start += PAGE_CACHE_SIZE; 2513 } 2514 2515 flush_epd_write_bio(&epd); 2516 return ret; 2517 } 2518 2519 int extent_writepages(struct extent_io_tree *tree, 2520 struct address_space *mapping, 2521 get_extent_t *get_extent, 2522 struct writeback_control *wbc) 2523 { 2524 int ret = 0; 2525 struct extent_page_data epd = { 2526 .bio = NULL, 2527 .tree = tree, 2528 .get_extent = get_extent, 2529 .extent_locked = 0, 2530 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2531 }; 2532 2533 ret = extent_write_cache_pages(tree, mapping, wbc, 2534 __extent_writepage, &epd, 2535 flush_write_bio); 2536 flush_epd_write_bio(&epd); 2537 return ret; 2538 } 2539 2540 int extent_readpages(struct extent_io_tree *tree, 2541 struct address_space *mapping, 2542 struct list_head *pages, unsigned nr_pages, 2543 get_extent_t get_extent) 2544 { 2545 struct bio *bio = NULL; 2546 unsigned page_idx; 2547 struct pagevec pvec; 2548 unsigned long bio_flags = 0; 2549 2550 pagevec_init(&pvec, 0); 2551 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2552 struct page *page = list_entry(pages->prev, struct page, lru); 2553 2554 prefetchw(&page->flags); 2555 list_del(&page->lru); 2556 /* 2557 * what we want to do here is call add_to_page_cache_lru, 2558 * but that isn't exported, so we reproduce it here 2559 */ 2560 if (!add_to_page_cache(page, mapping, 2561 page->index, GFP_KERNEL)) { 2562 2563 /* open coding of lru_cache_add, also not exported */ 2564 page_cache_get(page); 2565 if (!pagevec_add(&pvec, page)) 2566 __pagevec_lru_add_file(&pvec); 2567 __extent_read_full_page(tree, page, get_extent, 2568 &bio, 0, &bio_flags); 2569 } 2570 page_cache_release(page); 2571 } 2572 if (pagevec_count(&pvec)) 2573 __pagevec_lru_add_file(&pvec); 2574 BUG_ON(!list_empty(pages)); 2575 if (bio) 2576 submit_one_bio(READ, bio, 0, bio_flags); 2577 return 0; 2578 } 2579 2580 /* 2581 * basic invalidatepage code, this waits on any locked or writeback 2582 * ranges corresponding to the page, and then deletes any extent state 2583 * records from the tree 2584 */ 2585 int extent_invalidatepage(struct extent_io_tree *tree, 2586 struct page *page, unsigned long offset) 2587 { 2588 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 2589 u64 end = start + PAGE_CACHE_SIZE - 1; 2590 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 2591 2592 start += (offset + blocksize - 1) & ~(blocksize - 1); 2593 if (start > end) 2594 return 0; 2595 2596 lock_extent(tree, start, end, GFP_NOFS); 2597 wait_on_extent_writeback(tree, start, end); 2598 clear_extent_bit(tree, start, end, 2599 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2600 1, 1, GFP_NOFS); 2601 return 0; 2602 } 2603 2604 /* 2605 * simple commit_write call, set_range_dirty is used to mark both 2606 * the pages and the extent records as dirty 2607 */ 2608 int extent_commit_write(struct extent_io_tree *tree, 2609 struct inode *inode, struct page *page, 2610 unsigned from, unsigned to) 2611 { 2612 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2613 2614 set_page_extent_mapped(page); 2615 set_page_dirty(page); 2616 2617 if (pos > inode->i_size) { 2618 i_size_write(inode, pos); 2619 mark_inode_dirty(inode); 2620 } 2621 return 0; 2622 } 2623 2624 int extent_prepare_write(struct extent_io_tree *tree, 2625 struct inode *inode, struct page *page, 2626 unsigned from, unsigned to, get_extent_t *get_extent) 2627 { 2628 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2629 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 2630 u64 block_start; 2631 u64 orig_block_start; 2632 u64 block_end; 2633 u64 cur_end; 2634 struct extent_map *em; 2635 unsigned blocksize = 1 << inode->i_blkbits; 2636 size_t page_offset = 0; 2637 size_t block_off_start; 2638 size_t block_off_end; 2639 int err = 0; 2640 int iocount = 0; 2641 int ret = 0; 2642 int isnew; 2643 2644 set_page_extent_mapped(page); 2645 2646 block_start = (page_start + from) & ~((u64)blocksize - 1); 2647 block_end = (page_start + to - 1) | (blocksize - 1); 2648 orig_block_start = block_start; 2649 2650 lock_extent(tree, page_start, page_end, GFP_NOFS); 2651 while (block_start <= block_end) { 2652 em = get_extent(inode, page, page_offset, block_start, 2653 block_end - block_start + 1, 1); 2654 if (IS_ERR(em) || !em) 2655 goto err; 2656 2657 cur_end = min(block_end, extent_map_end(em) - 1); 2658 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 2659 block_off_end = block_off_start + blocksize; 2660 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 2661 2662 if (!PageUptodate(page) && isnew && 2663 (block_off_end > to || block_off_start < from)) { 2664 void *kaddr; 2665 2666 kaddr = kmap_atomic(page, KM_USER0); 2667 if (block_off_end > to) 2668 memset(kaddr + to, 0, block_off_end - to); 2669 if (block_off_start < from) 2670 memset(kaddr + block_off_start, 0, 2671 from - block_off_start); 2672 flush_dcache_page(page); 2673 kunmap_atomic(kaddr, KM_USER0); 2674 } 2675 if ((em->block_start != EXTENT_MAP_HOLE && 2676 em->block_start != EXTENT_MAP_INLINE) && 2677 !isnew && !PageUptodate(page) && 2678 (block_off_end > to || block_off_start < from) && 2679 !test_range_bit(tree, block_start, cur_end, 2680 EXTENT_UPTODATE, 1)) { 2681 u64 sector; 2682 u64 extent_offset = block_start - em->start; 2683 size_t iosize; 2684 sector = (em->block_start + extent_offset) >> 9; 2685 iosize = (cur_end - block_start + blocksize) & 2686 ~((u64)blocksize - 1); 2687 /* 2688 * we've already got the extent locked, but we 2689 * need to split the state such that our end_bio 2690 * handler can clear the lock. 2691 */ 2692 set_extent_bit(tree, block_start, 2693 block_start + iosize - 1, 2694 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2695 ret = submit_extent_page(READ, tree, page, 2696 sector, iosize, page_offset, em->bdev, 2697 NULL, 1, 2698 end_bio_extent_preparewrite, 0, 2699 0, 0); 2700 iocount++; 2701 block_start = block_start + iosize; 2702 } else { 2703 set_extent_uptodate(tree, block_start, cur_end, 2704 GFP_NOFS); 2705 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2706 block_start = cur_end + 1; 2707 } 2708 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2709 free_extent_map(em); 2710 } 2711 if (iocount) { 2712 wait_extent_bit(tree, orig_block_start, 2713 block_end, EXTENT_LOCKED); 2714 } 2715 check_page_uptodate(tree, page); 2716 err: 2717 /* FIXME, zero out newly allocated blocks on error */ 2718 return err; 2719 } 2720 2721 /* 2722 * a helper for releasepage, this tests for areas of the page that 2723 * are locked or under IO and drops the related state bits if it is safe 2724 * to drop the page. 2725 */ 2726 int try_release_extent_state(struct extent_map_tree *map, 2727 struct extent_io_tree *tree, struct page *page, 2728 gfp_t mask) 2729 { 2730 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2731 u64 end = start + PAGE_CACHE_SIZE - 1; 2732 int ret = 1; 2733 2734 if (test_range_bit(tree, start, end, 2735 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2736 ret = 0; 2737 else { 2738 if ((mask & GFP_NOFS) == GFP_NOFS) 2739 mask = GFP_NOFS; 2740 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2741 1, 1, mask); 2742 } 2743 return ret; 2744 } 2745 2746 /* 2747 * a helper for releasepage. As long as there are no locked extents 2748 * in the range corresponding to the page, both state records and extent 2749 * map records are removed 2750 */ 2751 int try_release_extent_mapping(struct extent_map_tree *map, 2752 struct extent_io_tree *tree, struct page *page, 2753 gfp_t mask) 2754 { 2755 struct extent_map *em; 2756 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2757 u64 end = start + PAGE_CACHE_SIZE - 1; 2758 2759 if ((mask & __GFP_WAIT) && 2760 page->mapping->host->i_size > 16 * 1024 * 1024) { 2761 u64 len; 2762 while (start <= end) { 2763 len = end - start + 1; 2764 spin_lock(&map->lock); 2765 em = lookup_extent_mapping(map, start, len); 2766 if (!em || IS_ERR(em)) { 2767 spin_unlock(&map->lock); 2768 break; 2769 } 2770 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2771 em->start != start) { 2772 spin_unlock(&map->lock); 2773 free_extent_map(em); 2774 break; 2775 } 2776 if (!test_range_bit(tree, em->start, 2777 extent_map_end(em) - 1, 2778 EXTENT_LOCKED | EXTENT_WRITEBACK | 2779 EXTENT_ORDERED, 2780 0)) { 2781 remove_extent_mapping(map, em); 2782 /* once for the rb tree */ 2783 free_extent_map(em); 2784 } 2785 start = extent_map_end(em); 2786 spin_unlock(&map->lock); 2787 2788 /* once for us */ 2789 free_extent_map(em); 2790 } 2791 } 2792 return try_release_extent_state(map, tree, page, mask); 2793 } 2794 2795 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 2796 get_extent_t *get_extent) 2797 { 2798 struct inode *inode = mapping->host; 2799 u64 start = iblock << inode->i_blkbits; 2800 sector_t sector = 0; 2801 size_t blksize = (1 << inode->i_blkbits); 2802 struct extent_map *em; 2803 2804 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2805 GFP_NOFS); 2806 em = get_extent(inode, NULL, 0, start, blksize, 0); 2807 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2808 GFP_NOFS); 2809 if (!em || IS_ERR(em)) 2810 return 0; 2811 2812 if (em->block_start > EXTENT_MAP_LAST_BYTE) 2813 goto out; 2814 2815 sector = (em->block_start + start - em->start) >> inode->i_blkbits; 2816 out: 2817 free_extent_map(em); 2818 return sector; 2819 } 2820 2821 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2822 __u64 start, __u64 len, get_extent_t *get_extent) 2823 { 2824 int ret; 2825 u64 off = start; 2826 u64 max = start + len; 2827 u32 flags = 0; 2828 u64 disko = 0; 2829 struct extent_map *em = NULL; 2830 int end = 0; 2831 u64 em_start = 0, em_len = 0; 2832 unsigned long emflags; 2833 ret = 0; 2834 2835 if (len == 0) 2836 return -EINVAL; 2837 2838 lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2839 GFP_NOFS); 2840 em = get_extent(inode, NULL, 0, off, max - off, 0); 2841 if (!em) 2842 goto out; 2843 if (IS_ERR(em)) { 2844 ret = PTR_ERR(em); 2845 goto out; 2846 } 2847 while (!end) { 2848 off = em->start + em->len; 2849 if (off >= max) 2850 end = 1; 2851 2852 em_start = em->start; 2853 em_len = em->len; 2854 2855 disko = 0; 2856 flags = 0; 2857 2858 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 2859 end = 1; 2860 flags |= FIEMAP_EXTENT_LAST; 2861 } else if (em->block_start == EXTENT_MAP_HOLE) { 2862 flags |= FIEMAP_EXTENT_UNWRITTEN; 2863 } else if (em->block_start == EXTENT_MAP_INLINE) { 2864 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2865 FIEMAP_EXTENT_NOT_ALIGNED); 2866 } else if (em->block_start == EXTENT_MAP_DELALLOC) { 2867 flags |= (FIEMAP_EXTENT_DELALLOC | 2868 FIEMAP_EXTENT_UNKNOWN); 2869 } else { 2870 disko = em->block_start; 2871 } 2872 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2873 flags |= FIEMAP_EXTENT_ENCODED; 2874 2875 emflags = em->flags; 2876 free_extent_map(em); 2877 em = NULL; 2878 2879 if (!end) { 2880 em = get_extent(inode, NULL, 0, off, max - off, 0); 2881 if (!em) 2882 goto out; 2883 if (IS_ERR(em)) { 2884 ret = PTR_ERR(em); 2885 goto out; 2886 } 2887 emflags = em->flags; 2888 } 2889 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 2890 flags |= FIEMAP_EXTENT_LAST; 2891 end = 1; 2892 } 2893 2894 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 2895 em_len, flags); 2896 if (ret) 2897 goto out_free; 2898 } 2899 out_free: 2900 free_extent_map(em); 2901 out: 2902 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2903 GFP_NOFS); 2904 return ret; 2905 } 2906 2907 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2908 unsigned long i) 2909 { 2910 struct page *p; 2911 struct address_space *mapping; 2912 2913 if (i == 0) 2914 return eb->first_page; 2915 i += eb->start >> PAGE_CACHE_SHIFT; 2916 mapping = eb->first_page->mapping; 2917 if (!mapping) 2918 return NULL; 2919 2920 /* 2921 * extent_buffer_page is only called after pinning the page 2922 * by increasing the reference count. So we know the page must 2923 * be in the radix tree. 2924 */ 2925 rcu_read_lock(); 2926 p = radix_tree_lookup(&mapping->page_tree, i); 2927 rcu_read_unlock(); 2928 2929 return p; 2930 } 2931 2932 static inline unsigned long num_extent_pages(u64 start, u64 len) 2933 { 2934 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - 2935 (start >> PAGE_CACHE_SHIFT); 2936 } 2937 2938 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 2939 u64 start, 2940 unsigned long len, 2941 gfp_t mask) 2942 { 2943 struct extent_buffer *eb = NULL; 2944 #if LEAK_DEBUG 2945 unsigned long flags; 2946 #endif 2947 2948 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2949 eb->start = start; 2950 eb->len = len; 2951 spin_lock_init(&eb->lock); 2952 init_waitqueue_head(&eb->lock_wq); 2953 2954 #if LEAK_DEBUG 2955 spin_lock_irqsave(&leak_lock, flags); 2956 list_add(&eb->leak_list, &buffers); 2957 spin_unlock_irqrestore(&leak_lock, flags); 2958 #endif 2959 atomic_set(&eb->refs, 1); 2960 2961 return eb; 2962 } 2963 2964 static void __free_extent_buffer(struct extent_buffer *eb) 2965 { 2966 #if LEAK_DEBUG 2967 unsigned long flags; 2968 spin_lock_irqsave(&leak_lock, flags); 2969 list_del(&eb->leak_list); 2970 spin_unlock_irqrestore(&leak_lock, flags); 2971 #endif 2972 kmem_cache_free(extent_buffer_cache, eb); 2973 } 2974 2975 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 2976 u64 start, unsigned long len, 2977 struct page *page0, 2978 gfp_t mask) 2979 { 2980 unsigned long num_pages = num_extent_pages(start, len); 2981 unsigned long i; 2982 unsigned long index = start >> PAGE_CACHE_SHIFT; 2983 struct extent_buffer *eb; 2984 struct extent_buffer *exists = NULL; 2985 struct page *p; 2986 struct address_space *mapping = tree->mapping; 2987 int uptodate = 1; 2988 2989 spin_lock(&tree->buffer_lock); 2990 eb = buffer_search(tree, start); 2991 if (eb) { 2992 atomic_inc(&eb->refs); 2993 spin_unlock(&tree->buffer_lock); 2994 mark_page_accessed(eb->first_page); 2995 return eb; 2996 } 2997 spin_unlock(&tree->buffer_lock); 2998 2999 eb = __alloc_extent_buffer(tree, start, len, mask); 3000 if (!eb) 3001 return NULL; 3002 3003 if (page0) { 3004 eb->first_page = page0; 3005 i = 1; 3006 index++; 3007 page_cache_get(page0); 3008 mark_page_accessed(page0); 3009 set_page_extent_mapped(page0); 3010 set_page_extent_head(page0, len); 3011 uptodate = PageUptodate(page0); 3012 } else { 3013 i = 0; 3014 } 3015 for (; i < num_pages; i++, index++) { 3016 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3017 if (!p) { 3018 WARN_ON(1); 3019 goto free_eb; 3020 } 3021 set_page_extent_mapped(p); 3022 mark_page_accessed(p); 3023 if (i == 0) { 3024 eb->first_page = p; 3025 set_page_extent_head(p, len); 3026 } else { 3027 set_page_private(p, EXTENT_PAGE_PRIVATE); 3028 } 3029 if (!PageUptodate(p)) 3030 uptodate = 0; 3031 unlock_page(p); 3032 } 3033 if (uptodate) 3034 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3035 3036 spin_lock(&tree->buffer_lock); 3037 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3038 if (exists) { 3039 /* add one reference for the caller */ 3040 atomic_inc(&exists->refs); 3041 spin_unlock(&tree->buffer_lock); 3042 goto free_eb; 3043 } 3044 spin_unlock(&tree->buffer_lock); 3045 3046 /* add one reference for the tree */ 3047 atomic_inc(&eb->refs); 3048 return eb; 3049 3050 free_eb: 3051 if (!atomic_dec_and_test(&eb->refs)) 3052 return exists; 3053 for (index = 1; index < i; index++) 3054 page_cache_release(extent_buffer_page(eb, index)); 3055 page_cache_release(extent_buffer_page(eb, 0)); 3056 __free_extent_buffer(eb); 3057 return exists; 3058 } 3059 3060 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3061 u64 start, unsigned long len, 3062 gfp_t mask) 3063 { 3064 struct extent_buffer *eb; 3065 3066 spin_lock(&tree->buffer_lock); 3067 eb = buffer_search(tree, start); 3068 if (eb) 3069 atomic_inc(&eb->refs); 3070 spin_unlock(&tree->buffer_lock); 3071 3072 if (eb) 3073 mark_page_accessed(eb->first_page); 3074 3075 return eb; 3076 } 3077 3078 void free_extent_buffer(struct extent_buffer *eb) 3079 { 3080 if (!eb) 3081 return; 3082 3083 if (!atomic_dec_and_test(&eb->refs)) 3084 return; 3085 3086 WARN_ON(1); 3087 } 3088 3089 int clear_extent_buffer_dirty(struct extent_io_tree *tree, 3090 struct extent_buffer *eb) 3091 { 3092 unsigned long i; 3093 unsigned long num_pages; 3094 struct page *page; 3095 3096 num_pages = num_extent_pages(eb->start, eb->len); 3097 3098 for (i = 0; i < num_pages; i++) { 3099 page = extent_buffer_page(eb, i); 3100 if (!PageDirty(page)) 3101 continue; 3102 3103 lock_page(page); 3104 if (i == 0) 3105 set_page_extent_head(page, eb->len); 3106 else 3107 set_page_private(page, EXTENT_PAGE_PRIVATE); 3108 3109 clear_page_dirty_for_io(page); 3110 spin_lock_irq(&page->mapping->tree_lock); 3111 if (!PageDirty(page)) { 3112 radix_tree_tag_clear(&page->mapping->page_tree, 3113 page_index(page), 3114 PAGECACHE_TAG_DIRTY); 3115 } 3116 spin_unlock_irq(&page->mapping->tree_lock); 3117 unlock_page(page); 3118 } 3119 return 0; 3120 } 3121 3122 int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, 3123 struct extent_buffer *eb) 3124 { 3125 return wait_on_extent_writeback(tree, eb->start, 3126 eb->start + eb->len - 1); 3127 } 3128 3129 int set_extent_buffer_dirty(struct extent_io_tree *tree, 3130 struct extent_buffer *eb) 3131 { 3132 unsigned long i; 3133 unsigned long num_pages; 3134 int was_dirty = 0; 3135 3136 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 3137 num_pages = num_extent_pages(eb->start, eb->len); 3138 for (i = 0; i < num_pages; i++) 3139 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 3140 return was_dirty; 3141 } 3142 3143 int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3144 struct extent_buffer *eb) 3145 { 3146 unsigned long i; 3147 struct page *page; 3148 unsigned long num_pages; 3149 3150 num_pages = num_extent_pages(eb->start, eb->len); 3151 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3152 3153 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3154 GFP_NOFS); 3155 for (i = 0; i < num_pages; i++) { 3156 page = extent_buffer_page(eb, i); 3157 if (page) 3158 ClearPageUptodate(page); 3159 } 3160 return 0; 3161 } 3162 3163 int set_extent_buffer_uptodate(struct extent_io_tree *tree, 3164 struct extent_buffer *eb) 3165 { 3166 unsigned long i; 3167 struct page *page; 3168 unsigned long num_pages; 3169 3170 num_pages = num_extent_pages(eb->start, eb->len); 3171 3172 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3173 GFP_NOFS); 3174 for (i = 0; i < num_pages; i++) { 3175 page = extent_buffer_page(eb, i); 3176 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3177 ((i == num_pages - 1) && 3178 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 3179 check_page_uptodate(tree, page); 3180 continue; 3181 } 3182 SetPageUptodate(page); 3183 } 3184 return 0; 3185 } 3186 3187 int extent_range_uptodate(struct extent_io_tree *tree, 3188 u64 start, u64 end) 3189 { 3190 struct page *page; 3191 int ret; 3192 int pg_uptodate = 1; 3193 int uptodate; 3194 unsigned long index; 3195 3196 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3197 if (ret) 3198 return 1; 3199 while (start <= end) { 3200 index = start >> PAGE_CACHE_SHIFT; 3201 page = find_get_page(tree->mapping, index); 3202 uptodate = PageUptodate(page); 3203 page_cache_release(page); 3204 if (!uptodate) { 3205 pg_uptodate = 0; 3206 break; 3207 } 3208 start += PAGE_CACHE_SIZE; 3209 } 3210 return pg_uptodate; 3211 } 3212 3213 int extent_buffer_uptodate(struct extent_io_tree *tree, 3214 struct extent_buffer *eb) 3215 { 3216 int ret = 0; 3217 unsigned long num_pages; 3218 unsigned long i; 3219 struct page *page; 3220 int pg_uptodate = 1; 3221 3222 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3223 return 1; 3224 3225 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3226 EXTENT_UPTODATE, 1); 3227 if (ret) 3228 return ret; 3229 3230 num_pages = num_extent_pages(eb->start, eb->len); 3231 for (i = 0; i < num_pages; i++) { 3232 page = extent_buffer_page(eb, i); 3233 if (!PageUptodate(page)) { 3234 pg_uptodate = 0; 3235 break; 3236 } 3237 } 3238 return pg_uptodate; 3239 } 3240 3241 int read_extent_buffer_pages(struct extent_io_tree *tree, 3242 struct extent_buffer *eb, 3243 u64 start, int wait, 3244 get_extent_t *get_extent, int mirror_num) 3245 { 3246 unsigned long i; 3247 unsigned long start_i; 3248 struct page *page; 3249 int err; 3250 int ret = 0; 3251 int locked_pages = 0; 3252 int all_uptodate = 1; 3253 int inc_all_pages = 0; 3254 unsigned long num_pages; 3255 struct bio *bio = NULL; 3256 unsigned long bio_flags = 0; 3257 3258 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3259 return 0; 3260 3261 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3262 EXTENT_UPTODATE, 1)) { 3263 return 0; 3264 } 3265 3266 if (start) { 3267 WARN_ON(start < eb->start); 3268 start_i = (start >> PAGE_CACHE_SHIFT) - 3269 (eb->start >> PAGE_CACHE_SHIFT); 3270 } else { 3271 start_i = 0; 3272 } 3273 3274 num_pages = num_extent_pages(eb->start, eb->len); 3275 for (i = start_i; i < num_pages; i++) { 3276 page = extent_buffer_page(eb, i); 3277 if (!wait) { 3278 if (!trylock_page(page)) 3279 goto unlock_exit; 3280 } else { 3281 lock_page(page); 3282 } 3283 locked_pages++; 3284 if (!PageUptodate(page)) 3285 all_uptodate = 0; 3286 } 3287 if (all_uptodate) { 3288 if (start_i == 0) 3289 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3290 goto unlock_exit; 3291 } 3292 3293 for (i = start_i; i < num_pages; i++) { 3294 page = extent_buffer_page(eb, i); 3295 if (inc_all_pages) 3296 page_cache_get(page); 3297 if (!PageUptodate(page)) { 3298 if (start_i == 0) 3299 inc_all_pages = 1; 3300 ClearPageError(page); 3301 err = __extent_read_full_page(tree, page, 3302 get_extent, &bio, 3303 mirror_num, &bio_flags); 3304 if (err) 3305 ret = err; 3306 } else { 3307 unlock_page(page); 3308 } 3309 } 3310 3311 if (bio) 3312 submit_one_bio(READ, bio, mirror_num, bio_flags); 3313 3314 if (ret || !wait) 3315 return ret; 3316 3317 for (i = start_i; i < num_pages; i++) { 3318 page = extent_buffer_page(eb, i); 3319 wait_on_page_locked(page); 3320 if (!PageUptodate(page)) 3321 ret = -EIO; 3322 } 3323 3324 if (!ret) 3325 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3326 return ret; 3327 3328 unlock_exit: 3329 i = start_i; 3330 while (locked_pages > 0) { 3331 page = extent_buffer_page(eb, i); 3332 i++; 3333 unlock_page(page); 3334 locked_pages--; 3335 } 3336 return ret; 3337 } 3338 3339 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 3340 unsigned long start, 3341 unsigned long len) 3342 { 3343 size_t cur; 3344 size_t offset; 3345 struct page *page; 3346 char *kaddr; 3347 char *dst = (char *)dstv; 3348 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3349 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3350 3351 WARN_ON(start > eb->len); 3352 WARN_ON(start + len > eb->start + eb->len); 3353 3354 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3355 3356 while (len > 0) { 3357 page = extent_buffer_page(eb, i); 3358 3359 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3360 kaddr = kmap_atomic(page, KM_USER1); 3361 memcpy(dst, kaddr + offset, cur); 3362 kunmap_atomic(kaddr, KM_USER1); 3363 3364 dst += cur; 3365 len -= cur; 3366 offset = 0; 3367 i++; 3368 } 3369 } 3370 3371 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 3372 unsigned long min_len, char **token, char **map, 3373 unsigned long *map_start, 3374 unsigned long *map_len, int km) 3375 { 3376 size_t offset = start & (PAGE_CACHE_SIZE - 1); 3377 char *kaddr; 3378 struct page *p; 3379 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3380 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3381 unsigned long end_i = (start_offset + start + min_len - 1) >> 3382 PAGE_CACHE_SHIFT; 3383 3384 if (i != end_i) 3385 return -EINVAL; 3386 3387 if (i == 0) { 3388 offset = start_offset; 3389 *map_start = 0; 3390 } else { 3391 offset = 0; 3392 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; 3393 } 3394 3395 if (start + min_len > eb->len) { 3396 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 3397 "wanted %lu %lu\n", (unsigned long long)eb->start, 3398 eb->len, start, min_len); 3399 WARN_ON(1); 3400 } 3401 3402 p = extent_buffer_page(eb, i); 3403 kaddr = kmap_atomic(p, km); 3404 *token = kaddr; 3405 *map = kaddr + offset; 3406 *map_len = PAGE_CACHE_SIZE - offset; 3407 return 0; 3408 } 3409 3410 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 3411 unsigned long min_len, 3412 char **token, char **map, 3413 unsigned long *map_start, 3414 unsigned long *map_len, int km) 3415 { 3416 int err; 3417 int save = 0; 3418 if (eb->map_token) { 3419 unmap_extent_buffer(eb, eb->map_token, km); 3420 eb->map_token = NULL; 3421 save = 1; 3422 } 3423 err = map_private_extent_buffer(eb, start, min_len, token, map, 3424 map_start, map_len, km); 3425 if (!err && save) { 3426 eb->map_token = *token; 3427 eb->kaddr = *map; 3428 eb->map_start = *map_start; 3429 eb->map_len = *map_len; 3430 } 3431 return err; 3432 } 3433 3434 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 3435 { 3436 kunmap_atomic(token, km); 3437 } 3438 3439 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 3440 unsigned long start, 3441 unsigned long len) 3442 { 3443 size_t cur; 3444 size_t offset; 3445 struct page *page; 3446 char *kaddr; 3447 char *ptr = (char *)ptrv; 3448 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3449 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3450 int ret = 0; 3451 3452 WARN_ON(start > eb->len); 3453 WARN_ON(start + len > eb->start + eb->len); 3454 3455 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3456 3457 while (len > 0) { 3458 page = extent_buffer_page(eb, i); 3459 3460 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3461 3462 kaddr = kmap_atomic(page, KM_USER0); 3463 ret = memcmp(ptr, kaddr + offset, cur); 3464 kunmap_atomic(kaddr, KM_USER0); 3465 if (ret) 3466 break; 3467 3468 ptr += cur; 3469 len -= cur; 3470 offset = 0; 3471 i++; 3472 } 3473 return ret; 3474 } 3475 3476 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 3477 unsigned long start, unsigned long len) 3478 { 3479 size_t cur; 3480 size_t offset; 3481 struct page *page; 3482 char *kaddr; 3483 char *src = (char *)srcv; 3484 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3485 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3486 3487 WARN_ON(start > eb->len); 3488 WARN_ON(start + len > eb->start + eb->len); 3489 3490 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3491 3492 while (len > 0) { 3493 page = extent_buffer_page(eb, i); 3494 WARN_ON(!PageUptodate(page)); 3495 3496 cur = min(len, PAGE_CACHE_SIZE - offset); 3497 kaddr = kmap_atomic(page, KM_USER1); 3498 memcpy(kaddr + offset, src, cur); 3499 kunmap_atomic(kaddr, KM_USER1); 3500 3501 src += cur; 3502 len -= cur; 3503 offset = 0; 3504 i++; 3505 } 3506 } 3507 3508 void memset_extent_buffer(struct extent_buffer *eb, char c, 3509 unsigned long start, unsigned long len) 3510 { 3511 size_t cur; 3512 size_t offset; 3513 struct page *page; 3514 char *kaddr; 3515 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3516 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3517 3518 WARN_ON(start > eb->len); 3519 WARN_ON(start + len > eb->start + eb->len); 3520 3521 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3522 3523 while (len > 0) { 3524 page = extent_buffer_page(eb, i); 3525 WARN_ON(!PageUptodate(page)); 3526 3527 cur = min(len, PAGE_CACHE_SIZE - offset); 3528 kaddr = kmap_atomic(page, KM_USER0); 3529 memset(kaddr + offset, c, cur); 3530 kunmap_atomic(kaddr, KM_USER0); 3531 3532 len -= cur; 3533 offset = 0; 3534 i++; 3535 } 3536 } 3537 3538 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 3539 unsigned long dst_offset, unsigned long src_offset, 3540 unsigned long len) 3541 { 3542 u64 dst_len = dst->len; 3543 size_t cur; 3544 size_t offset; 3545 struct page *page; 3546 char *kaddr; 3547 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3548 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3549 3550 WARN_ON(src->len != dst_len); 3551 3552 offset = (start_offset + dst_offset) & 3553 ((unsigned long)PAGE_CACHE_SIZE - 1); 3554 3555 while (len > 0) { 3556 page = extent_buffer_page(dst, i); 3557 WARN_ON(!PageUptodate(page)); 3558 3559 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 3560 3561 kaddr = kmap_atomic(page, KM_USER0); 3562 read_extent_buffer(src, kaddr + offset, src_offset, cur); 3563 kunmap_atomic(kaddr, KM_USER0); 3564 3565 src_offset += cur; 3566 len -= cur; 3567 offset = 0; 3568 i++; 3569 } 3570 } 3571 3572 static void move_pages(struct page *dst_page, struct page *src_page, 3573 unsigned long dst_off, unsigned long src_off, 3574 unsigned long len) 3575 { 3576 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3577 if (dst_page == src_page) { 3578 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 3579 } else { 3580 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 3581 char *p = dst_kaddr + dst_off + len; 3582 char *s = src_kaddr + src_off + len; 3583 3584 while (len--) 3585 *--p = *--s; 3586 3587 kunmap_atomic(src_kaddr, KM_USER1); 3588 } 3589 kunmap_atomic(dst_kaddr, KM_USER0); 3590 } 3591 3592 static void copy_pages(struct page *dst_page, struct page *src_page, 3593 unsigned long dst_off, unsigned long src_off, 3594 unsigned long len) 3595 { 3596 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3597 char *src_kaddr; 3598 3599 if (dst_page != src_page) 3600 src_kaddr = kmap_atomic(src_page, KM_USER1); 3601 else 3602 src_kaddr = dst_kaddr; 3603 3604 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3605 kunmap_atomic(dst_kaddr, KM_USER0); 3606 if (dst_page != src_page) 3607 kunmap_atomic(src_kaddr, KM_USER1); 3608 } 3609 3610 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3611 unsigned long src_offset, unsigned long len) 3612 { 3613 size_t cur; 3614 size_t dst_off_in_page; 3615 size_t src_off_in_page; 3616 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3617 unsigned long dst_i; 3618 unsigned long src_i; 3619 3620 if (src_offset + len > dst->len) { 3621 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3622 "len %lu dst len %lu\n", src_offset, len, dst->len); 3623 BUG_ON(1); 3624 } 3625 if (dst_offset + len > dst->len) { 3626 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3627 "len %lu dst len %lu\n", dst_offset, len, dst->len); 3628 BUG_ON(1); 3629 } 3630 3631 while (len > 0) { 3632 dst_off_in_page = (start_offset + dst_offset) & 3633 ((unsigned long)PAGE_CACHE_SIZE - 1); 3634 src_off_in_page = (start_offset + src_offset) & 3635 ((unsigned long)PAGE_CACHE_SIZE - 1); 3636 3637 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3638 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 3639 3640 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 3641 src_off_in_page)); 3642 cur = min_t(unsigned long, cur, 3643 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); 3644 3645 copy_pages(extent_buffer_page(dst, dst_i), 3646 extent_buffer_page(dst, src_i), 3647 dst_off_in_page, src_off_in_page, cur); 3648 3649 src_offset += cur; 3650 dst_offset += cur; 3651 len -= cur; 3652 } 3653 } 3654 3655 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3656 unsigned long src_offset, unsigned long len) 3657 { 3658 size_t cur; 3659 size_t dst_off_in_page; 3660 size_t src_off_in_page; 3661 unsigned long dst_end = dst_offset + len - 1; 3662 unsigned long src_end = src_offset + len - 1; 3663 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3664 unsigned long dst_i; 3665 unsigned long src_i; 3666 3667 if (src_offset + len > dst->len) { 3668 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3669 "len %lu len %lu\n", src_offset, len, dst->len); 3670 BUG_ON(1); 3671 } 3672 if (dst_offset + len > dst->len) { 3673 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3674 "len %lu len %lu\n", dst_offset, len, dst->len); 3675 BUG_ON(1); 3676 } 3677 if (dst_offset < src_offset) { 3678 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 3679 return; 3680 } 3681 while (len > 0) { 3682 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 3683 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 3684 3685 dst_off_in_page = (start_offset + dst_end) & 3686 ((unsigned long)PAGE_CACHE_SIZE - 1); 3687 src_off_in_page = (start_offset + src_end) & 3688 ((unsigned long)PAGE_CACHE_SIZE - 1); 3689 3690 cur = min_t(unsigned long, len, src_off_in_page + 1); 3691 cur = min(cur, dst_off_in_page + 1); 3692 move_pages(extent_buffer_page(dst, dst_i), 3693 extent_buffer_page(dst, src_i), 3694 dst_off_in_page - cur + 1, 3695 src_off_in_page - cur + 1, cur); 3696 3697 dst_end -= cur; 3698 src_end -= cur; 3699 len -= cur; 3700 } 3701 } 3702 3703 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3704 { 3705 u64 start = page_offset(page); 3706 struct extent_buffer *eb; 3707 int ret = 1; 3708 unsigned long i; 3709 unsigned long num_pages; 3710 3711 spin_lock(&tree->buffer_lock); 3712 eb = buffer_search(tree, start); 3713 if (!eb) 3714 goto out; 3715 3716 if (atomic_read(&eb->refs) > 1) { 3717 ret = 0; 3718 goto out; 3719 } 3720 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3721 ret = 0; 3722 goto out; 3723 } 3724 /* at this point we can safely release the extent buffer */ 3725 num_pages = num_extent_pages(eb->start, eb->len); 3726 for (i = 0; i < num_pages; i++) 3727 page_cache_release(extent_buffer_page(eb, i)); 3728 rb_erase(&eb->rb_node, &tree->buffer); 3729 __free_extent_buffer(eb); 3730 out: 3731 spin_unlock(&tree->buffer_lock); 3732 return ret; 3733 } 3734