1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 #include <linux/writeback.h> 13 #include <linux/pagevec.h> 14 #include "extent_io.h" 15 #include "extent_map.h" 16 #include "compat.h" 17 #include "ctree.h" 18 #include "btrfs_inode.h" 19 20 /* temporary define until extent_map moves out of btrfs */ 21 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 22 unsigned long extra_flags, 23 void (*ctor)(void *, struct kmem_cache *, 24 unsigned long)); 25 26 static struct kmem_cache *extent_state_cache; 27 static struct kmem_cache *extent_buffer_cache; 28 29 static LIST_HEAD(buffers); 30 static LIST_HEAD(states); 31 32 #define LEAK_DEBUG 0 33 #if LEAK_DEBUG 34 static DEFINE_SPINLOCK(leak_lock); 35 #endif 36 37 #define BUFFER_LRU_MAX 64 38 39 struct tree_entry { 40 u64 start; 41 u64 end; 42 struct rb_node rb_node; 43 }; 44 45 struct extent_page_data { 46 struct bio *bio; 47 struct extent_io_tree *tree; 48 get_extent_t *get_extent; 49 50 /* tells writepage not to lock the state bits for this range 51 * it still does the unlocking 52 */ 53 int extent_locked; 54 }; 55 56 int __init extent_io_init(void) 57 { 58 extent_state_cache = btrfs_cache_create("extent_state", 59 sizeof(struct extent_state), 0, 60 NULL); 61 if (!extent_state_cache) 62 return -ENOMEM; 63 64 extent_buffer_cache = btrfs_cache_create("extent_buffers", 65 sizeof(struct extent_buffer), 0, 66 NULL); 67 if (!extent_buffer_cache) 68 goto free_state_cache; 69 return 0; 70 71 free_state_cache: 72 kmem_cache_destroy(extent_state_cache); 73 return -ENOMEM; 74 } 75 76 void extent_io_exit(void) 77 { 78 struct extent_state *state; 79 struct extent_buffer *eb; 80 81 while (!list_empty(&states)) { 82 state = list_entry(states.next, struct extent_state, leak_list); 83 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 84 "state %lu in tree %p refs %d\n", 85 (unsigned long long)state->start, 86 (unsigned long long)state->end, 87 state->state, state->tree, atomic_read(&state->refs)); 88 list_del(&state->leak_list); 89 kmem_cache_free(extent_state_cache, state); 90 91 } 92 93 while (!list_empty(&buffers)) { 94 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 95 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 96 "refs %d\n", (unsigned long long)eb->start, 97 eb->len, atomic_read(&eb->refs)); 98 list_del(&eb->leak_list); 99 kmem_cache_free(extent_buffer_cache, eb); 100 } 101 if (extent_state_cache) 102 kmem_cache_destroy(extent_state_cache); 103 if (extent_buffer_cache) 104 kmem_cache_destroy(extent_buffer_cache); 105 } 106 107 void extent_io_tree_init(struct extent_io_tree *tree, 108 struct address_space *mapping, gfp_t mask) 109 { 110 tree->state.rb_node = NULL; 111 tree->buffer.rb_node = NULL; 112 tree->ops = NULL; 113 tree->dirty_bytes = 0; 114 spin_lock_init(&tree->lock); 115 spin_lock_init(&tree->buffer_lock); 116 tree->mapping = mapping; 117 } 118 119 static struct extent_state *alloc_extent_state(gfp_t mask) 120 { 121 struct extent_state *state; 122 #if LEAK_DEBUG 123 unsigned long flags; 124 #endif 125 126 state = kmem_cache_alloc(extent_state_cache, mask); 127 if (!state) 128 return state; 129 state->state = 0; 130 state->private = 0; 131 state->tree = NULL; 132 #if LEAK_DEBUG 133 spin_lock_irqsave(&leak_lock, flags); 134 list_add(&state->leak_list, &states); 135 spin_unlock_irqrestore(&leak_lock, flags); 136 #endif 137 atomic_set(&state->refs, 1); 138 init_waitqueue_head(&state->wq); 139 return state; 140 } 141 142 static void free_extent_state(struct extent_state *state) 143 { 144 if (!state) 145 return; 146 if (atomic_dec_and_test(&state->refs)) { 147 #if LEAK_DEBUG 148 unsigned long flags; 149 #endif 150 WARN_ON(state->tree); 151 #if LEAK_DEBUG 152 spin_lock_irqsave(&leak_lock, flags); 153 list_del(&state->leak_list); 154 spin_unlock_irqrestore(&leak_lock, flags); 155 #endif 156 kmem_cache_free(extent_state_cache, state); 157 } 158 } 159 160 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 161 struct rb_node *node) 162 { 163 struct rb_node **p = &root->rb_node; 164 struct rb_node *parent = NULL; 165 struct tree_entry *entry; 166 167 while (*p) { 168 parent = *p; 169 entry = rb_entry(parent, struct tree_entry, rb_node); 170 171 if (offset < entry->start) 172 p = &(*p)->rb_left; 173 else if (offset > entry->end) 174 p = &(*p)->rb_right; 175 else 176 return parent; 177 } 178 179 entry = rb_entry(node, struct tree_entry, rb_node); 180 rb_link_node(node, parent, p); 181 rb_insert_color(node, root); 182 return NULL; 183 } 184 185 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 186 struct rb_node **prev_ret, 187 struct rb_node **next_ret) 188 { 189 struct rb_root *root = &tree->state; 190 struct rb_node *n = root->rb_node; 191 struct rb_node *prev = NULL; 192 struct rb_node *orig_prev = NULL; 193 struct tree_entry *entry; 194 struct tree_entry *prev_entry = NULL; 195 196 while (n) { 197 entry = rb_entry(n, struct tree_entry, rb_node); 198 prev = n; 199 prev_entry = entry; 200 201 if (offset < entry->start) 202 n = n->rb_left; 203 else if (offset > entry->end) 204 n = n->rb_right; 205 else 206 return n; 207 } 208 209 if (prev_ret) { 210 orig_prev = prev; 211 while (prev && offset > prev_entry->end) { 212 prev = rb_next(prev); 213 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 214 } 215 *prev_ret = prev; 216 prev = orig_prev; 217 } 218 219 if (next_ret) { 220 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 221 while (prev && offset < prev_entry->start) { 222 prev = rb_prev(prev); 223 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 224 } 225 *next_ret = prev; 226 } 227 return NULL; 228 } 229 230 static inline struct rb_node *tree_search(struct extent_io_tree *tree, 231 u64 offset) 232 { 233 struct rb_node *prev = NULL; 234 struct rb_node *ret; 235 236 ret = __etree_search(tree, offset, &prev, NULL); 237 if (!ret) 238 return prev; 239 return ret; 240 } 241 242 static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, 243 u64 offset, struct rb_node *node) 244 { 245 struct rb_root *root = &tree->buffer; 246 struct rb_node **p = &root->rb_node; 247 struct rb_node *parent = NULL; 248 struct extent_buffer *eb; 249 250 while (*p) { 251 parent = *p; 252 eb = rb_entry(parent, struct extent_buffer, rb_node); 253 254 if (offset < eb->start) 255 p = &(*p)->rb_left; 256 else if (offset > eb->start) 257 p = &(*p)->rb_right; 258 else 259 return eb; 260 } 261 262 rb_link_node(node, parent, p); 263 rb_insert_color(node, root); 264 return NULL; 265 } 266 267 static struct extent_buffer *buffer_search(struct extent_io_tree *tree, 268 u64 offset) 269 { 270 struct rb_root *root = &tree->buffer; 271 struct rb_node *n = root->rb_node; 272 struct extent_buffer *eb; 273 274 while (n) { 275 eb = rb_entry(n, struct extent_buffer, rb_node); 276 if (offset < eb->start) 277 n = n->rb_left; 278 else if (offset > eb->start) 279 n = n->rb_right; 280 else 281 return eb; 282 } 283 return NULL; 284 } 285 286 /* 287 * utility function to look for merge candidates inside a given range. 288 * Any extents with matching state are merged together into a single 289 * extent in the tree. Extents with EXTENT_IO in their state field 290 * are not merged because the end_io handlers need to be able to do 291 * operations on them without sleeping (or doing allocations/splits). 292 * 293 * This should be called with the tree lock held. 294 */ 295 static int merge_state(struct extent_io_tree *tree, 296 struct extent_state *state) 297 { 298 struct extent_state *other; 299 struct rb_node *other_node; 300 301 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 302 return 0; 303 304 other_node = rb_prev(&state->rb_node); 305 if (other_node) { 306 other = rb_entry(other_node, struct extent_state, rb_node); 307 if (other->end == state->start - 1 && 308 other->state == state->state) { 309 state->start = other->start; 310 other->tree = NULL; 311 rb_erase(&other->rb_node, &tree->state); 312 free_extent_state(other); 313 } 314 } 315 other_node = rb_next(&state->rb_node); 316 if (other_node) { 317 other = rb_entry(other_node, struct extent_state, rb_node); 318 if (other->start == state->end + 1 && 319 other->state == state->state) { 320 other->start = state->start; 321 state->tree = NULL; 322 rb_erase(&state->rb_node, &tree->state); 323 free_extent_state(state); 324 } 325 } 326 return 0; 327 } 328 329 static void set_state_cb(struct extent_io_tree *tree, 330 struct extent_state *state, 331 unsigned long bits) 332 { 333 if (tree->ops && tree->ops->set_bit_hook) { 334 tree->ops->set_bit_hook(tree->mapping->host, state->start, 335 state->end, state->state, bits); 336 } 337 } 338 339 static void clear_state_cb(struct extent_io_tree *tree, 340 struct extent_state *state, 341 unsigned long bits) 342 { 343 if (tree->ops && tree->ops->clear_bit_hook) { 344 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 345 state->end, state->state, bits); 346 } 347 } 348 349 /* 350 * insert an extent_state struct into the tree. 'bits' are set on the 351 * struct before it is inserted. 352 * 353 * This may return -EEXIST if the extent is already there, in which case the 354 * state struct is freed. 355 * 356 * The tree lock is not taken internally. This is a utility function and 357 * probably isn't what you want to call (see set/clear_extent_bit). 358 */ 359 static int insert_state(struct extent_io_tree *tree, 360 struct extent_state *state, u64 start, u64 end, 361 int bits) 362 { 363 struct rb_node *node; 364 365 if (end < start) { 366 printk(KERN_ERR "btrfs end < start %llu %llu\n", 367 (unsigned long long)end, 368 (unsigned long long)start); 369 WARN_ON(1); 370 } 371 if (bits & EXTENT_DIRTY) 372 tree->dirty_bytes += end - start + 1; 373 set_state_cb(tree, state, bits); 374 state->state |= bits; 375 state->start = start; 376 state->end = end; 377 node = tree_insert(&tree->state, end, &state->rb_node); 378 if (node) { 379 struct extent_state *found; 380 found = rb_entry(node, struct extent_state, rb_node); 381 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 382 "%llu %llu\n", (unsigned long long)found->start, 383 (unsigned long long)found->end, 384 (unsigned long long)start, (unsigned long long)end); 385 free_extent_state(state); 386 return -EEXIST; 387 } 388 state->tree = tree; 389 merge_state(tree, state); 390 return 0; 391 } 392 393 /* 394 * split a given extent state struct in two, inserting the preallocated 395 * struct 'prealloc' as the newly created second half. 'split' indicates an 396 * offset inside 'orig' where it should be split. 397 * 398 * Before calling, 399 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 400 * are two extent state structs in the tree: 401 * prealloc: [orig->start, split - 1] 402 * orig: [ split, orig->end ] 403 * 404 * The tree locks are not taken by this function. They need to be held 405 * by the caller. 406 */ 407 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 408 struct extent_state *prealloc, u64 split) 409 { 410 struct rb_node *node; 411 prealloc->start = orig->start; 412 prealloc->end = split - 1; 413 prealloc->state = orig->state; 414 orig->start = split; 415 416 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 417 if (node) { 418 struct extent_state *found; 419 found = rb_entry(node, struct extent_state, rb_node); 420 free_extent_state(prealloc); 421 return -EEXIST; 422 } 423 prealloc->tree = tree; 424 return 0; 425 } 426 427 /* 428 * utility function to clear some bits in an extent state struct. 429 * it will optionally wake up any one waiting on this state (wake == 1), or 430 * forcibly remove the state from the tree (delete == 1). 431 * 432 * If no bits are set on the state struct after clearing things, the 433 * struct is freed and removed from the tree 434 */ 435 static int clear_state_bit(struct extent_io_tree *tree, 436 struct extent_state *state, int bits, int wake, 437 int delete) 438 { 439 int ret = state->state & bits; 440 441 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 442 u64 range = state->end - state->start + 1; 443 WARN_ON(range > tree->dirty_bytes); 444 tree->dirty_bytes -= range; 445 } 446 clear_state_cb(tree, state, bits); 447 state->state &= ~bits; 448 if (wake) 449 wake_up(&state->wq); 450 if (delete || state->state == 0) { 451 if (state->tree) { 452 clear_state_cb(tree, state, state->state); 453 rb_erase(&state->rb_node, &tree->state); 454 state->tree = NULL; 455 free_extent_state(state); 456 } else { 457 WARN_ON(1); 458 } 459 } else { 460 merge_state(tree, state); 461 } 462 return ret; 463 } 464 465 /* 466 * clear some bits on a range in the tree. This may require splitting 467 * or inserting elements in the tree, so the gfp mask is used to 468 * indicate which allocations or sleeping are allowed. 469 * 470 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 471 * the given range from the tree regardless of state (ie for truncate). 472 * 473 * the range [start, end] is inclusive. 474 * 475 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 476 * bits were already set, or zero if none of the bits were already set. 477 */ 478 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 479 int bits, int wake, int delete, gfp_t mask) 480 { 481 struct extent_state *state; 482 struct extent_state *prealloc = NULL; 483 struct rb_node *node; 484 int err; 485 int set = 0; 486 487 again: 488 if (!prealloc && (mask & __GFP_WAIT)) { 489 prealloc = alloc_extent_state(mask); 490 if (!prealloc) 491 return -ENOMEM; 492 } 493 494 spin_lock(&tree->lock); 495 /* 496 * this search will find the extents that end after 497 * our range starts 498 */ 499 node = tree_search(tree, start); 500 if (!node) 501 goto out; 502 state = rb_entry(node, struct extent_state, rb_node); 503 if (state->start > end) 504 goto out; 505 WARN_ON(state->end < start); 506 507 /* 508 * | ---- desired range ---- | 509 * | state | or 510 * | ------------- state -------------- | 511 * 512 * We need to split the extent we found, and may flip 513 * bits on second half. 514 * 515 * If the extent we found extends past our range, we 516 * just split and search again. It'll get split again 517 * the next time though. 518 * 519 * If the extent we found is inside our range, we clear 520 * the desired bit on it. 521 */ 522 523 if (state->start < start) { 524 if (!prealloc) 525 prealloc = alloc_extent_state(GFP_ATOMIC); 526 err = split_state(tree, state, prealloc, start); 527 BUG_ON(err == -EEXIST); 528 prealloc = NULL; 529 if (err) 530 goto out; 531 if (state->end <= end) { 532 start = state->end + 1; 533 set |= clear_state_bit(tree, state, bits, 534 wake, delete); 535 } else { 536 start = state->start; 537 } 538 goto search_again; 539 } 540 /* 541 * | ---- desired range ---- | 542 * | state | 543 * We need to split the extent, and clear the bit 544 * on the first half 545 */ 546 if (state->start <= end && state->end > end) { 547 if (!prealloc) 548 prealloc = alloc_extent_state(GFP_ATOMIC); 549 err = split_state(tree, state, prealloc, end + 1); 550 BUG_ON(err == -EEXIST); 551 552 if (wake) 553 wake_up(&state->wq); 554 set |= clear_state_bit(tree, prealloc, bits, 555 wake, delete); 556 prealloc = NULL; 557 goto out; 558 } 559 560 start = state->end + 1; 561 set |= clear_state_bit(tree, state, bits, wake, delete); 562 goto search_again; 563 564 out: 565 spin_unlock(&tree->lock); 566 if (prealloc) 567 free_extent_state(prealloc); 568 569 return set; 570 571 search_again: 572 if (start > end) 573 goto out; 574 spin_unlock(&tree->lock); 575 if (mask & __GFP_WAIT) 576 cond_resched(); 577 goto again; 578 } 579 580 static int wait_on_state(struct extent_io_tree *tree, 581 struct extent_state *state) 582 __releases(tree->lock) 583 __acquires(tree->lock) 584 { 585 DEFINE_WAIT(wait); 586 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 587 spin_unlock(&tree->lock); 588 schedule(); 589 spin_lock(&tree->lock); 590 finish_wait(&state->wq, &wait); 591 return 0; 592 } 593 594 /* 595 * waits for one or more bits to clear on a range in the state tree. 596 * The range [start, end] is inclusive. 597 * The tree lock is taken by this function 598 */ 599 int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 600 { 601 struct extent_state *state; 602 struct rb_node *node; 603 604 spin_lock(&tree->lock); 605 again: 606 while (1) { 607 /* 608 * this search will find all the extents that end after 609 * our range starts 610 */ 611 node = tree_search(tree, start); 612 if (!node) 613 break; 614 615 state = rb_entry(node, struct extent_state, rb_node); 616 617 if (state->start > end) 618 goto out; 619 620 if (state->state & bits) { 621 start = state->start; 622 atomic_inc(&state->refs); 623 wait_on_state(tree, state); 624 free_extent_state(state); 625 goto again; 626 } 627 start = state->end + 1; 628 629 if (start > end) 630 break; 631 632 if (need_resched()) { 633 spin_unlock(&tree->lock); 634 cond_resched(); 635 spin_lock(&tree->lock); 636 } 637 } 638 out: 639 spin_unlock(&tree->lock); 640 return 0; 641 } 642 643 static void set_state_bits(struct extent_io_tree *tree, 644 struct extent_state *state, 645 int bits) 646 { 647 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 648 u64 range = state->end - state->start + 1; 649 tree->dirty_bytes += range; 650 } 651 set_state_cb(tree, state, bits); 652 state->state |= bits; 653 } 654 655 /* 656 * set some bits on a range in the tree. This may require allocations 657 * or sleeping, so the gfp mask is used to indicate what is allowed. 658 * 659 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 660 * range already has the desired bits set. The start of the existing 661 * range is returned in failed_start in this case. 662 * 663 * [start, end] is inclusive 664 * This takes the tree lock. 665 */ 666 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 667 int bits, int exclusive, u64 *failed_start, 668 gfp_t mask) 669 { 670 struct extent_state *state; 671 struct extent_state *prealloc = NULL; 672 struct rb_node *node; 673 int err = 0; 674 int set; 675 u64 last_start; 676 u64 last_end; 677 again: 678 if (!prealloc && (mask & __GFP_WAIT)) { 679 prealloc = alloc_extent_state(mask); 680 if (!prealloc) 681 return -ENOMEM; 682 } 683 684 spin_lock(&tree->lock); 685 /* 686 * this search will find all the extents that end after 687 * our range starts. 688 */ 689 node = tree_search(tree, start); 690 if (!node) { 691 err = insert_state(tree, prealloc, start, end, bits); 692 prealloc = NULL; 693 BUG_ON(err == -EEXIST); 694 goto out; 695 } 696 697 state = rb_entry(node, struct extent_state, rb_node); 698 last_start = state->start; 699 last_end = state->end; 700 701 /* 702 * | ---- desired range ---- | 703 * | state | 704 * 705 * Just lock what we found and keep going 706 */ 707 if (state->start == start && state->end <= end) { 708 set = state->state & bits; 709 if (set && exclusive) { 710 *failed_start = state->start; 711 err = -EEXIST; 712 goto out; 713 } 714 set_state_bits(tree, state, bits); 715 start = state->end + 1; 716 merge_state(tree, state); 717 goto search_again; 718 } 719 720 /* 721 * | ---- desired range ---- | 722 * | state | 723 * or 724 * | ------------- state -------------- | 725 * 726 * We need to split the extent we found, and may flip bits on 727 * second half. 728 * 729 * If the extent we found extends past our 730 * range, we just split and search again. It'll get split 731 * again the next time though. 732 * 733 * If the extent we found is inside our range, we set the 734 * desired bit on it. 735 */ 736 if (state->start < start) { 737 set = state->state & bits; 738 if (exclusive && set) { 739 *failed_start = start; 740 err = -EEXIST; 741 goto out; 742 } 743 err = split_state(tree, state, prealloc, start); 744 BUG_ON(err == -EEXIST); 745 prealloc = NULL; 746 if (err) 747 goto out; 748 if (state->end <= end) { 749 set_state_bits(tree, state, bits); 750 start = state->end + 1; 751 merge_state(tree, state); 752 } else { 753 start = state->start; 754 } 755 goto search_again; 756 } 757 /* 758 * | ---- desired range ---- | 759 * | state | or | state | 760 * 761 * There's a hole, we need to insert something in it and 762 * ignore the extent we found. 763 */ 764 if (state->start > start) { 765 u64 this_end; 766 if (end < last_start) 767 this_end = end; 768 else 769 this_end = last_start - 1; 770 err = insert_state(tree, prealloc, start, this_end, 771 bits); 772 prealloc = NULL; 773 BUG_ON(err == -EEXIST); 774 if (err) 775 goto out; 776 start = this_end + 1; 777 goto search_again; 778 } 779 /* 780 * | ---- desired range ---- | 781 * | state | 782 * We need to split the extent, and set the bit 783 * on the first half 784 */ 785 if (state->start <= end && state->end > end) { 786 set = state->state & bits; 787 if (exclusive && set) { 788 *failed_start = start; 789 err = -EEXIST; 790 goto out; 791 } 792 err = split_state(tree, state, prealloc, end + 1); 793 BUG_ON(err == -EEXIST); 794 795 set_state_bits(tree, prealloc, bits); 796 merge_state(tree, prealloc); 797 prealloc = NULL; 798 goto out; 799 } 800 801 goto search_again; 802 803 out: 804 spin_unlock(&tree->lock); 805 if (prealloc) 806 free_extent_state(prealloc); 807 808 return err; 809 810 search_again: 811 if (start > end) 812 goto out; 813 spin_unlock(&tree->lock); 814 if (mask & __GFP_WAIT) 815 cond_resched(); 816 goto again; 817 } 818 819 /* wrappers around set/clear extent bit */ 820 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 821 gfp_t mask) 822 { 823 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 824 mask); 825 } 826 827 int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 828 gfp_t mask) 829 { 830 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); 831 } 832 833 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 834 int bits, gfp_t mask) 835 { 836 return set_extent_bit(tree, start, end, bits, 0, NULL, 837 mask); 838 } 839 840 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 841 int bits, gfp_t mask) 842 { 843 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 844 } 845 846 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 847 gfp_t mask) 848 { 849 return set_extent_bit(tree, start, end, 850 EXTENT_DELALLOC | EXTENT_DIRTY, 851 0, NULL, mask); 852 } 853 854 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 855 gfp_t mask) 856 { 857 return clear_extent_bit(tree, start, end, 858 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 859 } 860 861 int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 862 gfp_t mask) 863 { 864 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); 865 } 866 867 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 868 gfp_t mask) 869 { 870 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 871 mask); 872 } 873 874 static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 875 gfp_t mask) 876 { 877 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 878 } 879 880 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 881 gfp_t mask) 882 { 883 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 884 mask); 885 } 886 887 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 888 u64 end, gfp_t mask) 889 { 890 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 891 } 892 893 static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, 894 gfp_t mask) 895 { 896 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 897 0, NULL, mask); 898 } 899 900 static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, 901 u64 end, gfp_t mask) 902 { 903 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 904 } 905 906 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 907 { 908 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 909 } 910 911 /* 912 * either insert or lock state struct between start and end use mask to tell 913 * us if waiting is desired. 914 */ 915 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 916 { 917 int err; 918 u64 failed_start; 919 while (1) { 920 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 921 &failed_start, mask); 922 if (err == -EEXIST && (mask & __GFP_WAIT)) { 923 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 924 start = failed_start; 925 } else { 926 break; 927 } 928 WARN_ON(start > end); 929 } 930 return err; 931 } 932 933 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 934 gfp_t mask) 935 { 936 int err; 937 u64 failed_start; 938 939 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 940 &failed_start, mask); 941 if (err == -EEXIST) { 942 if (failed_start > start) 943 clear_extent_bit(tree, start, failed_start - 1, 944 EXTENT_LOCKED, 1, 0, mask); 945 return 0; 946 } 947 return 1; 948 } 949 950 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 951 gfp_t mask) 952 { 953 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 954 } 955 956 /* 957 * helper function to set pages and extents in the tree dirty 958 */ 959 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) 960 { 961 unsigned long index = start >> PAGE_CACHE_SHIFT; 962 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 963 struct page *page; 964 965 while (index <= end_index) { 966 page = find_get_page(tree->mapping, index); 967 BUG_ON(!page); 968 __set_page_dirty_nobuffers(page); 969 page_cache_release(page); 970 index++; 971 } 972 set_extent_dirty(tree, start, end, GFP_NOFS); 973 return 0; 974 } 975 976 /* 977 * helper function to set both pages and extents in the tree writeback 978 */ 979 static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 980 { 981 unsigned long index = start >> PAGE_CACHE_SHIFT; 982 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 983 struct page *page; 984 985 while (index <= end_index) { 986 page = find_get_page(tree->mapping, index); 987 BUG_ON(!page); 988 set_page_writeback(page); 989 page_cache_release(page); 990 index++; 991 } 992 set_extent_writeback(tree, start, end, GFP_NOFS); 993 return 0; 994 } 995 996 /* 997 * find the first offset in the io tree with 'bits' set. zero is 998 * returned if we find something, and *start_ret and *end_ret are 999 * set to reflect the state struct that was found. 1000 * 1001 * If nothing was found, 1 is returned, < 0 on error 1002 */ 1003 int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1004 u64 *start_ret, u64 *end_ret, int bits) 1005 { 1006 struct rb_node *node; 1007 struct extent_state *state; 1008 int ret = 1; 1009 1010 spin_lock(&tree->lock); 1011 /* 1012 * this search will find all the extents that end after 1013 * our range starts. 1014 */ 1015 node = tree_search(tree, start); 1016 if (!node) 1017 goto out; 1018 1019 while (1) { 1020 state = rb_entry(node, struct extent_state, rb_node); 1021 if (state->end >= start && (state->state & bits)) { 1022 *start_ret = state->start; 1023 *end_ret = state->end; 1024 ret = 0; 1025 break; 1026 } 1027 node = rb_next(node); 1028 if (!node) 1029 break; 1030 } 1031 out: 1032 spin_unlock(&tree->lock); 1033 return ret; 1034 } 1035 1036 /* find the first state struct with 'bits' set after 'start', and 1037 * return it. tree->lock must be held. NULL will returned if 1038 * nothing was found after 'start' 1039 */ 1040 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 1041 u64 start, int bits) 1042 { 1043 struct rb_node *node; 1044 struct extent_state *state; 1045 1046 /* 1047 * this search will find all the extents that end after 1048 * our range starts. 1049 */ 1050 node = tree_search(tree, start); 1051 if (!node) 1052 goto out; 1053 1054 while (1) { 1055 state = rb_entry(node, struct extent_state, rb_node); 1056 if (state->end >= start && (state->state & bits)) 1057 return state; 1058 1059 node = rb_next(node); 1060 if (!node) 1061 break; 1062 } 1063 out: 1064 return NULL; 1065 } 1066 1067 /* 1068 * find a contiguous range of bytes in the file marked as delalloc, not 1069 * more than 'max_bytes'. start and end are used to return the range, 1070 * 1071 * 1 is returned if we find something, 0 if nothing was in the tree 1072 */ 1073 static noinline u64 find_delalloc_range(struct extent_io_tree *tree, 1074 u64 *start, u64 *end, u64 max_bytes) 1075 { 1076 struct rb_node *node; 1077 struct extent_state *state; 1078 u64 cur_start = *start; 1079 u64 found = 0; 1080 u64 total_bytes = 0; 1081 1082 spin_lock(&tree->lock); 1083 1084 /* 1085 * this search will find all the extents that end after 1086 * our range starts. 1087 */ 1088 node = tree_search(tree, cur_start); 1089 if (!node) { 1090 if (!found) 1091 *end = (u64)-1; 1092 goto out; 1093 } 1094 1095 while (1) { 1096 state = rb_entry(node, struct extent_state, rb_node); 1097 if (found && (state->start != cur_start || 1098 (state->state & EXTENT_BOUNDARY))) { 1099 goto out; 1100 } 1101 if (!(state->state & EXTENT_DELALLOC)) { 1102 if (!found) 1103 *end = state->end; 1104 goto out; 1105 } 1106 if (!found) 1107 *start = state->start; 1108 found++; 1109 *end = state->end; 1110 cur_start = state->end + 1; 1111 node = rb_next(node); 1112 if (!node) 1113 break; 1114 total_bytes += state->end - state->start + 1; 1115 if (total_bytes >= max_bytes) 1116 break; 1117 } 1118 out: 1119 spin_unlock(&tree->lock); 1120 return found; 1121 } 1122 1123 static noinline int __unlock_for_delalloc(struct inode *inode, 1124 struct page *locked_page, 1125 u64 start, u64 end) 1126 { 1127 int ret; 1128 struct page *pages[16]; 1129 unsigned long index = start >> PAGE_CACHE_SHIFT; 1130 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1131 unsigned long nr_pages = end_index - index + 1; 1132 int i; 1133 1134 if (index == locked_page->index && end_index == index) 1135 return 0; 1136 1137 while (nr_pages > 0) { 1138 ret = find_get_pages_contig(inode->i_mapping, index, 1139 min_t(unsigned long, nr_pages, 1140 ARRAY_SIZE(pages)), pages); 1141 for (i = 0; i < ret; i++) { 1142 if (pages[i] != locked_page) 1143 unlock_page(pages[i]); 1144 page_cache_release(pages[i]); 1145 } 1146 nr_pages -= ret; 1147 index += ret; 1148 cond_resched(); 1149 } 1150 return 0; 1151 } 1152 1153 static noinline int lock_delalloc_pages(struct inode *inode, 1154 struct page *locked_page, 1155 u64 delalloc_start, 1156 u64 delalloc_end) 1157 { 1158 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; 1159 unsigned long start_index = index; 1160 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; 1161 unsigned long pages_locked = 0; 1162 struct page *pages[16]; 1163 unsigned long nrpages; 1164 int ret; 1165 int i; 1166 1167 /* the caller is responsible for locking the start index */ 1168 if (index == locked_page->index && index == end_index) 1169 return 0; 1170 1171 /* skip the page at the start index */ 1172 nrpages = end_index - index + 1; 1173 while (nrpages > 0) { 1174 ret = find_get_pages_contig(inode->i_mapping, index, 1175 min_t(unsigned long, 1176 nrpages, ARRAY_SIZE(pages)), pages); 1177 if (ret == 0) { 1178 ret = -EAGAIN; 1179 goto done; 1180 } 1181 /* now we have an array of pages, lock them all */ 1182 for (i = 0; i < ret; i++) { 1183 /* 1184 * the caller is taking responsibility for 1185 * locked_page 1186 */ 1187 if (pages[i] != locked_page) { 1188 lock_page(pages[i]); 1189 if (!PageDirty(pages[i]) || 1190 pages[i]->mapping != inode->i_mapping) { 1191 ret = -EAGAIN; 1192 unlock_page(pages[i]); 1193 page_cache_release(pages[i]); 1194 goto done; 1195 } 1196 } 1197 page_cache_release(pages[i]); 1198 pages_locked++; 1199 } 1200 nrpages -= ret; 1201 index += ret; 1202 cond_resched(); 1203 } 1204 ret = 0; 1205 done: 1206 if (ret && pages_locked) { 1207 __unlock_for_delalloc(inode, locked_page, 1208 delalloc_start, 1209 ((u64)(start_index + pages_locked - 1)) << 1210 PAGE_CACHE_SHIFT); 1211 } 1212 return ret; 1213 } 1214 1215 /* 1216 * find a contiguous range of bytes in the file marked as delalloc, not 1217 * more than 'max_bytes'. start and end are used to return the range, 1218 * 1219 * 1 is returned if we find something, 0 if nothing was in the tree 1220 */ 1221 static noinline u64 find_lock_delalloc_range(struct inode *inode, 1222 struct extent_io_tree *tree, 1223 struct page *locked_page, 1224 u64 *start, u64 *end, 1225 u64 max_bytes) 1226 { 1227 u64 delalloc_start; 1228 u64 delalloc_end; 1229 u64 found; 1230 int ret; 1231 int loops = 0; 1232 1233 again: 1234 /* step one, find a bunch of delalloc bytes starting at start */ 1235 delalloc_start = *start; 1236 delalloc_end = 0; 1237 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, 1238 max_bytes); 1239 if (!found || delalloc_end <= *start) { 1240 *start = delalloc_start; 1241 *end = delalloc_end; 1242 return found; 1243 } 1244 1245 /* 1246 * start comes from the offset of locked_page. We have to lock 1247 * pages in order, so we can't process delalloc bytes before 1248 * locked_page 1249 */ 1250 if (delalloc_start < *start) 1251 delalloc_start = *start; 1252 1253 /* 1254 * make sure to limit the number of pages we try to lock down 1255 * if we're looping. 1256 */ 1257 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) 1258 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; 1259 1260 /* step two, lock all the pages after the page that has start */ 1261 ret = lock_delalloc_pages(inode, locked_page, 1262 delalloc_start, delalloc_end); 1263 if (ret == -EAGAIN) { 1264 /* some of the pages are gone, lets avoid looping by 1265 * shortening the size of the delalloc range we're searching 1266 */ 1267 if (!loops) { 1268 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1269 max_bytes = PAGE_CACHE_SIZE - offset; 1270 loops = 1; 1271 goto again; 1272 } else { 1273 found = 0; 1274 goto out_failed; 1275 } 1276 } 1277 BUG_ON(ret); 1278 1279 /* step three, lock the state bits for the whole range */ 1280 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1281 1282 /* then test to make sure it is all still delalloc */ 1283 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1284 EXTENT_DELALLOC, 1); 1285 if (!ret) { 1286 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1287 __unlock_for_delalloc(inode, locked_page, 1288 delalloc_start, delalloc_end); 1289 cond_resched(); 1290 goto again; 1291 } 1292 *start = delalloc_start; 1293 *end = delalloc_end; 1294 out_failed: 1295 return found; 1296 } 1297 1298 int extent_clear_unlock_delalloc(struct inode *inode, 1299 struct extent_io_tree *tree, 1300 u64 start, u64 end, struct page *locked_page, 1301 int unlock_pages, 1302 int clear_unlock, 1303 int clear_delalloc, int clear_dirty, 1304 int set_writeback, 1305 int end_writeback) 1306 { 1307 int ret; 1308 struct page *pages[16]; 1309 unsigned long index = start >> PAGE_CACHE_SHIFT; 1310 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1311 unsigned long nr_pages = end_index - index + 1; 1312 int i; 1313 int clear_bits = 0; 1314 1315 if (clear_unlock) 1316 clear_bits |= EXTENT_LOCKED; 1317 if (clear_dirty) 1318 clear_bits |= EXTENT_DIRTY; 1319 1320 if (clear_delalloc) 1321 clear_bits |= EXTENT_DELALLOC; 1322 1323 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1324 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1325 return 0; 1326 1327 while (nr_pages > 0) { 1328 ret = find_get_pages_contig(inode->i_mapping, index, 1329 min_t(unsigned long, 1330 nr_pages, ARRAY_SIZE(pages)), pages); 1331 for (i = 0; i < ret; i++) { 1332 if (pages[i] == locked_page) { 1333 page_cache_release(pages[i]); 1334 continue; 1335 } 1336 if (clear_dirty) 1337 clear_page_dirty_for_io(pages[i]); 1338 if (set_writeback) 1339 set_page_writeback(pages[i]); 1340 if (end_writeback) 1341 end_page_writeback(pages[i]); 1342 if (unlock_pages) 1343 unlock_page(pages[i]); 1344 page_cache_release(pages[i]); 1345 } 1346 nr_pages -= ret; 1347 index += ret; 1348 cond_resched(); 1349 } 1350 return 0; 1351 } 1352 1353 /* 1354 * count the number of bytes in the tree that have a given bit(s) 1355 * set. This can be fairly slow, except for EXTENT_DIRTY which is 1356 * cached. The total number found is returned. 1357 */ 1358 u64 count_range_bits(struct extent_io_tree *tree, 1359 u64 *start, u64 search_end, u64 max_bytes, 1360 unsigned long bits) 1361 { 1362 struct rb_node *node; 1363 struct extent_state *state; 1364 u64 cur_start = *start; 1365 u64 total_bytes = 0; 1366 int found = 0; 1367 1368 if (search_end <= cur_start) { 1369 WARN_ON(1); 1370 return 0; 1371 } 1372 1373 spin_lock(&tree->lock); 1374 if (cur_start == 0 && bits == EXTENT_DIRTY) { 1375 total_bytes = tree->dirty_bytes; 1376 goto out; 1377 } 1378 /* 1379 * this search will find all the extents that end after 1380 * our range starts. 1381 */ 1382 node = tree_search(tree, cur_start); 1383 if (!node) 1384 goto out; 1385 1386 while (1) { 1387 state = rb_entry(node, struct extent_state, rb_node); 1388 if (state->start > search_end) 1389 break; 1390 if (state->end >= cur_start && (state->state & bits)) { 1391 total_bytes += min(search_end, state->end) + 1 - 1392 max(cur_start, state->start); 1393 if (total_bytes >= max_bytes) 1394 break; 1395 if (!found) { 1396 *start = state->start; 1397 found = 1; 1398 } 1399 } 1400 node = rb_next(node); 1401 if (!node) 1402 break; 1403 } 1404 out: 1405 spin_unlock(&tree->lock); 1406 return total_bytes; 1407 } 1408 1409 #if 0 1410 /* 1411 * helper function to lock both pages and extents in the tree. 1412 * pages must be locked first. 1413 */ 1414 static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) 1415 { 1416 unsigned long index = start >> PAGE_CACHE_SHIFT; 1417 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1418 struct page *page; 1419 int err; 1420 1421 while (index <= end_index) { 1422 page = grab_cache_page(tree->mapping, index); 1423 if (!page) { 1424 err = -ENOMEM; 1425 goto failed; 1426 } 1427 if (IS_ERR(page)) { 1428 err = PTR_ERR(page); 1429 goto failed; 1430 } 1431 index++; 1432 } 1433 lock_extent(tree, start, end, GFP_NOFS); 1434 return 0; 1435 1436 failed: 1437 /* 1438 * we failed above in getting the page at 'index', so we undo here 1439 * up to but not including the page at 'index' 1440 */ 1441 end_index = index; 1442 index = start >> PAGE_CACHE_SHIFT; 1443 while (index < end_index) { 1444 page = find_get_page(tree->mapping, index); 1445 unlock_page(page); 1446 page_cache_release(page); 1447 index++; 1448 } 1449 return err; 1450 } 1451 1452 /* 1453 * helper function to unlock both pages and extents in the tree. 1454 */ 1455 static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) 1456 { 1457 unsigned long index = start >> PAGE_CACHE_SHIFT; 1458 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1459 struct page *page; 1460 1461 while (index <= end_index) { 1462 page = find_get_page(tree->mapping, index); 1463 unlock_page(page); 1464 page_cache_release(page); 1465 index++; 1466 } 1467 unlock_extent(tree, start, end, GFP_NOFS); 1468 return 0; 1469 } 1470 #endif 1471 1472 /* 1473 * set the private field for a given byte offset in the tree. If there isn't 1474 * an extent_state there already, this does nothing. 1475 */ 1476 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1477 { 1478 struct rb_node *node; 1479 struct extent_state *state; 1480 int ret = 0; 1481 1482 spin_lock(&tree->lock); 1483 /* 1484 * this search will find all the extents that end after 1485 * our range starts. 1486 */ 1487 node = tree_search(tree, start); 1488 if (!node) { 1489 ret = -ENOENT; 1490 goto out; 1491 } 1492 state = rb_entry(node, struct extent_state, rb_node); 1493 if (state->start != start) { 1494 ret = -ENOENT; 1495 goto out; 1496 } 1497 state->private = private; 1498 out: 1499 spin_unlock(&tree->lock); 1500 return ret; 1501 } 1502 1503 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1504 { 1505 struct rb_node *node; 1506 struct extent_state *state; 1507 int ret = 0; 1508 1509 spin_lock(&tree->lock); 1510 /* 1511 * this search will find all the extents that end after 1512 * our range starts. 1513 */ 1514 node = tree_search(tree, start); 1515 if (!node) { 1516 ret = -ENOENT; 1517 goto out; 1518 } 1519 state = rb_entry(node, struct extent_state, rb_node); 1520 if (state->start != start) { 1521 ret = -ENOENT; 1522 goto out; 1523 } 1524 *private = state->private; 1525 out: 1526 spin_unlock(&tree->lock); 1527 return ret; 1528 } 1529 1530 /* 1531 * searches a range in the state tree for a given mask. 1532 * If 'filled' == 1, this returns 1 only if every extent in the tree 1533 * has the bits set. Otherwise, 1 is returned if any bit in the 1534 * range is found set. 1535 */ 1536 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1537 int bits, int filled) 1538 { 1539 struct extent_state *state = NULL; 1540 struct rb_node *node; 1541 int bitset = 0; 1542 1543 spin_lock(&tree->lock); 1544 node = tree_search(tree, start); 1545 while (node && start <= end) { 1546 state = rb_entry(node, struct extent_state, rb_node); 1547 1548 if (filled && state->start > start) { 1549 bitset = 0; 1550 break; 1551 } 1552 1553 if (state->start > end) 1554 break; 1555 1556 if (state->state & bits) { 1557 bitset = 1; 1558 if (!filled) 1559 break; 1560 } else if (filled) { 1561 bitset = 0; 1562 break; 1563 } 1564 start = state->end + 1; 1565 if (start > end) 1566 break; 1567 node = rb_next(node); 1568 if (!node) { 1569 if (filled) 1570 bitset = 0; 1571 break; 1572 } 1573 } 1574 spin_unlock(&tree->lock); 1575 return bitset; 1576 } 1577 1578 /* 1579 * helper function to set a given page up to date if all the 1580 * extents in the tree for that page are up to date 1581 */ 1582 static int check_page_uptodate(struct extent_io_tree *tree, 1583 struct page *page) 1584 { 1585 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1586 u64 end = start + PAGE_CACHE_SIZE - 1; 1587 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1588 SetPageUptodate(page); 1589 return 0; 1590 } 1591 1592 /* 1593 * helper function to unlock a page if all the extents in the tree 1594 * for that page are unlocked 1595 */ 1596 static int check_page_locked(struct extent_io_tree *tree, 1597 struct page *page) 1598 { 1599 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1600 u64 end = start + PAGE_CACHE_SIZE - 1; 1601 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1602 unlock_page(page); 1603 return 0; 1604 } 1605 1606 /* 1607 * helper function to end page writeback if all the extents 1608 * in the tree for that page are done with writeback 1609 */ 1610 static int check_page_writeback(struct extent_io_tree *tree, 1611 struct page *page) 1612 { 1613 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1614 u64 end = start + PAGE_CACHE_SIZE - 1; 1615 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1616 end_page_writeback(page); 1617 return 0; 1618 } 1619 1620 /* lots and lots of room for performance fixes in the end_bio funcs */ 1621 1622 /* 1623 * after a writepage IO is done, we need to: 1624 * clear the uptodate bits on error 1625 * clear the writeback bits in the extent tree for this IO 1626 * end_page_writeback if the page has no more pending IO 1627 * 1628 * Scheduling is not allowed, so the extent state tree is expected 1629 * to have one and only one object corresponding to this IO. 1630 */ 1631 static void end_bio_extent_writepage(struct bio *bio, int err) 1632 { 1633 int uptodate = err == 0; 1634 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1635 struct extent_io_tree *tree; 1636 u64 start; 1637 u64 end; 1638 int whole_page; 1639 int ret; 1640 1641 do { 1642 struct page *page = bvec->bv_page; 1643 tree = &BTRFS_I(page->mapping->host)->io_tree; 1644 1645 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1646 bvec->bv_offset; 1647 end = start + bvec->bv_len - 1; 1648 1649 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1650 whole_page = 1; 1651 else 1652 whole_page = 0; 1653 1654 if (--bvec >= bio->bi_io_vec) 1655 prefetchw(&bvec->bv_page->flags); 1656 if (tree->ops && tree->ops->writepage_end_io_hook) { 1657 ret = tree->ops->writepage_end_io_hook(page, start, 1658 end, NULL, uptodate); 1659 if (ret) 1660 uptodate = 0; 1661 } 1662 1663 if (!uptodate && tree->ops && 1664 tree->ops->writepage_io_failed_hook) { 1665 ret = tree->ops->writepage_io_failed_hook(bio, page, 1666 start, end, NULL); 1667 if (ret == 0) { 1668 uptodate = (err == 0); 1669 continue; 1670 } 1671 } 1672 1673 if (!uptodate) { 1674 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1675 ClearPageUptodate(page); 1676 SetPageError(page); 1677 } 1678 1679 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1680 1681 if (whole_page) 1682 end_page_writeback(page); 1683 else 1684 check_page_writeback(tree, page); 1685 } while (bvec >= bio->bi_io_vec); 1686 1687 bio_put(bio); 1688 } 1689 1690 /* 1691 * after a readpage IO is done, we need to: 1692 * clear the uptodate bits on error 1693 * set the uptodate bits if things worked 1694 * set the page up to date if all extents in the tree are uptodate 1695 * clear the lock bit in the extent tree 1696 * unlock the page if there are no other extents locked for it 1697 * 1698 * Scheduling is not allowed, so the extent state tree is expected 1699 * to have one and only one object corresponding to this IO. 1700 */ 1701 static void end_bio_extent_readpage(struct bio *bio, int err) 1702 { 1703 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1704 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1705 struct extent_io_tree *tree; 1706 u64 start; 1707 u64 end; 1708 int whole_page; 1709 int ret; 1710 1711 if (err) 1712 uptodate = 0; 1713 1714 do { 1715 struct page *page = bvec->bv_page; 1716 tree = &BTRFS_I(page->mapping->host)->io_tree; 1717 1718 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1719 bvec->bv_offset; 1720 end = start + bvec->bv_len - 1; 1721 1722 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1723 whole_page = 1; 1724 else 1725 whole_page = 0; 1726 1727 if (--bvec >= bio->bi_io_vec) 1728 prefetchw(&bvec->bv_page->flags); 1729 1730 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1731 ret = tree->ops->readpage_end_io_hook(page, start, end, 1732 NULL); 1733 if (ret) 1734 uptodate = 0; 1735 } 1736 if (!uptodate && tree->ops && 1737 tree->ops->readpage_io_failed_hook) { 1738 ret = tree->ops->readpage_io_failed_hook(bio, page, 1739 start, end, NULL); 1740 if (ret == 0) { 1741 uptodate = 1742 test_bit(BIO_UPTODATE, &bio->bi_flags); 1743 if (err) 1744 uptodate = 0; 1745 continue; 1746 } 1747 } 1748 1749 if (uptodate) { 1750 set_extent_uptodate(tree, start, end, 1751 GFP_ATOMIC); 1752 } 1753 unlock_extent(tree, start, end, GFP_ATOMIC); 1754 1755 if (whole_page) { 1756 if (uptodate) { 1757 SetPageUptodate(page); 1758 } else { 1759 ClearPageUptodate(page); 1760 SetPageError(page); 1761 } 1762 unlock_page(page); 1763 } else { 1764 if (uptodate) { 1765 check_page_uptodate(tree, page); 1766 } else { 1767 ClearPageUptodate(page); 1768 SetPageError(page); 1769 } 1770 check_page_locked(tree, page); 1771 } 1772 } while (bvec >= bio->bi_io_vec); 1773 1774 bio_put(bio); 1775 } 1776 1777 /* 1778 * IO done from prepare_write is pretty simple, we just unlock 1779 * the structs in the extent tree when done, and set the uptodate bits 1780 * as appropriate. 1781 */ 1782 static void end_bio_extent_preparewrite(struct bio *bio, int err) 1783 { 1784 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1785 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1786 struct extent_io_tree *tree; 1787 u64 start; 1788 u64 end; 1789 1790 do { 1791 struct page *page = bvec->bv_page; 1792 tree = &BTRFS_I(page->mapping->host)->io_tree; 1793 1794 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1795 bvec->bv_offset; 1796 end = start + bvec->bv_len - 1; 1797 1798 if (--bvec >= bio->bi_io_vec) 1799 prefetchw(&bvec->bv_page->flags); 1800 1801 if (uptodate) { 1802 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1803 } else { 1804 ClearPageUptodate(page); 1805 SetPageError(page); 1806 } 1807 1808 unlock_extent(tree, start, end, GFP_ATOMIC); 1809 1810 } while (bvec >= bio->bi_io_vec); 1811 1812 bio_put(bio); 1813 } 1814 1815 static struct bio * 1816 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1817 gfp_t gfp_flags) 1818 { 1819 struct bio *bio; 1820 1821 bio = bio_alloc(gfp_flags, nr_vecs); 1822 1823 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 1824 while (!bio && (nr_vecs /= 2)) 1825 bio = bio_alloc(gfp_flags, nr_vecs); 1826 } 1827 1828 if (bio) { 1829 bio->bi_size = 0; 1830 bio->bi_bdev = bdev; 1831 bio->bi_sector = first_sector; 1832 } 1833 return bio; 1834 } 1835 1836 static int submit_one_bio(int rw, struct bio *bio, int mirror_num, 1837 unsigned long bio_flags) 1838 { 1839 int ret = 0; 1840 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1841 struct page *page = bvec->bv_page; 1842 struct extent_io_tree *tree = bio->bi_private; 1843 u64 start; 1844 u64 end; 1845 1846 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1847 end = start + bvec->bv_len - 1; 1848 1849 bio->bi_private = NULL; 1850 1851 bio_get(bio); 1852 1853 if (tree->ops && tree->ops->submit_bio_hook) 1854 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1855 mirror_num, bio_flags); 1856 else 1857 submit_bio(rw, bio); 1858 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1859 ret = -EOPNOTSUPP; 1860 bio_put(bio); 1861 return ret; 1862 } 1863 1864 static int submit_extent_page(int rw, struct extent_io_tree *tree, 1865 struct page *page, sector_t sector, 1866 size_t size, unsigned long offset, 1867 struct block_device *bdev, 1868 struct bio **bio_ret, 1869 unsigned long max_pages, 1870 bio_end_io_t end_io_func, 1871 int mirror_num, 1872 unsigned long prev_bio_flags, 1873 unsigned long bio_flags) 1874 { 1875 int ret = 0; 1876 struct bio *bio; 1877 int nr; 1878 int contig = 0; 1879 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; 1880 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; 1881 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE); 1882 1883 if (bio_ret && *bio_ret) { 1884 bio = *bio_ret; 1885 if (old_compressed) 1886 contig = bio->bi_sector == sector; 1887 else 1888 contig = bio->bi_sector + (bio->bi_size >> 9) == 1889 sector; 1890 1891 if (prev_bio_flags != bio_flags || !contig || 1892 (tree->ops && tree->ops->merge_bio_hook && 1893 tree->ops->merge_bio_hook(page, offset, page_size, bio, 1894 bio_flags)) || 1895 bio_add_page(bio, page, page_size, offset) < page_size) { 1896 ret = submit_one_bio(rw, bio, mirror_num, 1897 prev_bio_flags); 1898 bio = NULL; 1899 } else { 1900 return 0; 1901 } 1902 } 1903 if (this_compressed) 1904 nr = BIO_MAX_PAGES; 1905 else 1906 nr = bio_get_nr_vecs(bdev); 1907 1908 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1909 1910 bio_add_page(bio, page, page_size, offset); 1911 bio->bi_end_io = end_io_func; 1912 bio->bi_private = tree; 1913 1914 if (bio_ret) 1915 *bio_ret = bio; 1916 else 1917 ret = submit_one_bio(rw, bio, mirror_num, bio_flags); 1918 1919 return ret; 1920 } 1921 1922 void set_page_extent_mapped(struct page *page) 1923 { 1924 if (!PagePrivate(page)) { 1925 SetPagePrivate(page); 1926 page_cache_get(page); 1927 set_page_private(page, EXTENT_PAGE_PRIVATE); 1928 } 1929 } 1930 1931 static void set_page_extent_head(struct page *page, unsigned long len) 1932 { 1933 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1934 } 1935 1936 /* 1937 * basic readpage implementation. Locked extent state structs are inserted 1938 * into the tree that are removed when the IO is done (by the end_io 1939 * handlers) 1940 */ 1941 static int __extent_read_full_page(struct extent_io_tree *tree, 1942 struct page *page, 1943 get_extent_t *get_extent, 1944 struct bio **bio, int mirror_num, 1945 unsigned long *bio_flags) 1946 { 1947 struct inode *inode = page->mapping->host; 1948 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1949 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1950 u64 end; 1951 u64 cur = start; 1952 u64 extent_offset; 1953 u64 last_byte = i_size_read(inode); 1954 u64 block_start; 1955 u64 cur_end; 1956 sector_t sector; 1957 struct extent_map *em; 1958 struct block_device *bdev; 1959 int ret; 1960 int nr = 0; 1961 size_t page_offset = 0; 1962 size_t iosize; 1963 size_t disk_io_size; 1964 size_t blocksize = inode->i_sb->s_blocksize; 1965 unsigned long this_bio_flag = 0; 1966 1967 set_page_extent_mapped(page); 1968 1969 end = page_end; 1970 lock_extent(tree, start, end, GFP_NOFS); 1971 1972 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 1973 char *userpage; 1974 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 1975 1976 if (zero_offset) { 1977 iosize = PAGE_CACHE_SIZE - zero_offset; 1978 userpage = kmap_atomic(page, KM_USER0); 1979 memset(userpage + zero_offset, 0, iosize); 1980 flush_dcache_page(page); 1981 kunmap_atomic(userpage, KM_USER0); 1982 } 1983 } 1984 while (cur <= end) { 1985 if (cur >= last_byte) { 1986 char *userpage; 1987 iosize = PAGE_CACHE_SIZE - page_offset; 1988 userpage = kmap_atomic(page, KM_USER0); 1989 memset(userpage + page_offset, 0, iosize); 1990 flush_dcache_page(page); 1991 kunmap_atomic(userpage, KM_USER0); 1992 set_extent_uptodate(tree, cur, cur + iosize - 1, 1993 GFP_NOFS); 1994 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1995 break; 1996 } 1997 em = get_extent(inode, page, page_offset, cur, 1998 end - cur + 1, 0); 1999 if (IS_ERR(em) || !em) { 2000 SetPageError(page); 2001 unlock_extent(tree, cur, end, GFP_NOFS); 2002 break; 2003 } 2004 extent_offset = cur - em->start; 2005 BUG_ON(extent_map_end(em) <= cur); 2006 BUG_ON(end < cur); 2007 2008 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2009 this_bio_flag = EXTENT_BIO_COMPRESSED; 2010 2011 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2012 cur_end = min(extent_map_end(em) - 1, end); 2013 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 2014 if (this_bio_flag & EXTENT_BIO_COMPRESSED) { 2015 disk_io_size = em->block_len; 2016 sector = em->block_start >> 9; 2017 } else { 2018 sector = (em->block_start + extent_offset) >> 9; 2019 disk_io_size = iosize; 2020 } 2021 bdev = em->bdev; 2022 block_start = em->block_start; 2023 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 2024 block_start = EXTENT_MAP_HOLE; 2025 free_extent_map(em); 2026 em = NULL; 2027 2028 /* we've found a hole, just zero and go on */ 2029 if (block_start == EXTENT_MAP_HOLE) { 2030 char *userpage; 2031 userpage = kmap_atomic(page, KM_USER0); 2032 memset(userpage + page_offset, 0, iosize); 2033 flush_dcache_page(page); 2034 kunmap_atomic(userpage, KM_USER0); 2035 2036 set_extent_uptodate(tree, cur, cur + iosize - 1, 2037 GFP_NOFS); 2038 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2039 cur = cur + iosize; 2040 page_offset += iosize; 2041 continue; 2042 } 2043 /* the get_extent function already copied into the page */ 2044 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2045 check_page_uptodate(tree, page); 2046 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2047 cur = cur + iosize; 2048 page_offset += iosize; 2049 continue; 2050 } 2051 /* we have an inline extent but it didn't get marked up 2052 * to date. Error out 2053 */ 2054 if (block_start == EXTENT_MAP_INLINE) { 2055 SetPageError(page); 2056 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2057 cur = cur + iosize; 2058 page_offset += iosize; 2059 continue; 2060 } 2061 2062 ret = 0; 2063 if (tree->ops && tree->ops->readpage_io_hook) { 2064 ret = tree->ops->readpage_io_hook(page, cur, 2065 cur + iosize - 1); 2066 } 2067 if (!ret) { 2068 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2069 pnr -= page->index; 2070 ret = submit_extent_page(READ, tree, page, 2071 sector, disk_io_size, page_offset, 2072 bdev, bio, pnr, 2073 end_bio_extent_readpage, mirror_num, 2074 *bio_flags, 2075 this_bio_flag); 2076 nr++; 2077 *bio_flags = this_bio_flag; 2078 } 2079 if (ret) 2080 SetPageError(page); 2081 cur = cur + iosize; 2082 page_offset += iosize; 2083 } 2084 if (!nr) { 2085 if (!PageError(page)) 2086 SetPageUptodate(page); 2087 unlock_page(page); 2088 } 2089 return 0; 2090 } 2091 2092 int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 2093 get_extent_t *get_extent) 2094 { 2095 struct bio *bio = NULL; 2096 unsigned long bio_flags = 0; 2097 int ret; 2098 2099 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2100 &bio_flags); 2101 if (bio) 2102 submit_one_bio(READ, bio, 0, bio_flags); 2103 return ret; 2104 } 2105 2106 /* 2107 * the writepage semantics are similar to regular writepage. extent 2108 * records are inserted to lock ranges in the tree, and as dirty areas 2109 * are found, they are marked writeback. Then the lock bits are removed 2110 * and the end_io handler clears the writeback ranges 2111 */ 2112 static int __extent_writepage(struct page *page, struct writeback_control *wbc, 2113 void *data) 2114 { 2115 struct inode *inode = page->mapping->host; 2116 struct extent_page_data *epd = data; 2117 struct extent_io_tree *tree = epd->tree; 2118 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2119 u64 delalloc_start; 2120 u64 page_end = start + PAGE_CACHE_SIZE - 1; 2121 u64 end; 2122 u64 cur = start; 2123 u64 extent_offset; 2124 u64 last_byte = i_size_read(inode); 2125 u64 block_start; 2126 u64 iosize; 2127 u64 unlock_start; 2128 sector_t sector; 2129 struct extent_map *em; 2130 struct block_device *bdev; 2131 int ret; 2132 int nr = 0; 2133 size_t pg_offset = 0; 2134 size_t blocksize; 2135 loff_t i_size = i_size_read(inode); 2136 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 2137 u64 nr_delalloc; 2138 u64 delalloc_end; 2139 int page_started; 2140 int compressed; 2141 unsigned long nr_written = 0; 2142 2143 WARN_ON(!PageLocked(page)); 2144 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2145 if (page->index > end_index || 2146 (page->index == end_index && !pg_offset)) { 2147 page->mapping->a_ops->invalidatepage(page, 0); 2148 unlock_page(page); 2149 return 0; 2150 } 2151 2152 if (page->index == end_index) { 2153 char *userpage; 2154 2155 userpage = kmap_atomic(page, KM_USER0); 2156 memset(userpage + pg_offset, 0, 2157 PAGE_CACHE_SIZE - pg_offset); 2158 kunmap_atomic(userpage, KM_USER0); 2159 flush_dcache_page(page); 2160 } 2161 pg_offset = 0; 2162 2163 set_page_extent_mapped(page); 2164 2165 delalloc_start = start; 2166 delalloc_end = 0; 2167 page_started = 0; 2168 if (!epd->extent_locked) { 2169 while (delalloc_end < page_end) { 2170 nr_delalloc = find_lock_delalloc_range(inode, tree, 2171 page, 2172 &delalloc_start, 2173 &delalloc_end, 2174 128 * 1024 * 1024); 2175 if (nr_delalloc == 0) { 2176 delalloc_start = delalloc_end + 1; 2177 continue; 2178 } 2179 tree->ops->fill_delalloc(inode, page, delalloc_start, 2180 delalloc_end, &page_started, 2181 &nr_written); 2182 delalloc_start = delalloc_end + 1; 2183 } 2184 2185 /* did the fill delalloc function already unlock and start 2186 * the IO? 2187 */ 2188 if (page_started) { 2189 ret = 0; 2190 goto update_nr_written; 2191 } 2192 } 2193 lock_extent(tree, start, page_end, GFP_NOFS); 2194 2195 unlock_start = start; 2196 2197 if (tree->ops && tree->ops->writepage_start_hook) { 2198 ret = tree->ops->writepage_start_hook(page, start, 2199 page_end); 2200 if (ret == -EAGAIN) { 2201 unlock_extent(tree, start, page_end, GFP_NOFS); 2202 redirty_page_for_writepage(wbc, page); 2203 unlock_page(page); 2204 ret = 0; 2205 goto update_nr_written; 2206 } 2207 } 2208 2209 nr_written++; 2210 2211 end = page_end; 2212 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2213 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); 2214 2215 if (last_byte <= start) { 2216 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2217 unlock_extent(tree, start, page_end, GFP_NOFS); 2218 if (tree->ops && tree->ops->writepage_end_io_hook) 2219 tree->ops->writepage_end_io_hook(page, start, 2220 page_end, NULL, 1); 2221 unlock_start = page_end + 1; 2222 goto done; 2223 } 2224 2225 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 2226 blocksize = inode->i_sb->s_blocksize; 2227 2228 while (cur <= end) { 2229 if (cur >= last_byte) { 2230 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2231 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2232 if (tree->ops && tree->ops->writepage_end_io_hook) 2233 tree->ops->writepage_end_io_hook(page, cur, 2234 page_end, NULL, 1); 2235 unlock_start = page_end + 1; 2236 break; 2237 } 2238 em = epd->get_extent(inode, page, pg_offset, cur, 2239 end - cur + 1, 1); 2240 if (IS_ERR(em) || !em) { 2241 SetPageError(page); 2242 break; 2243 } 2244 2245 extent_offset = cur - em->start; 2246 BUG_ON(extent_map_end(em) <= cur); 2247 BUG_ON(end < cur); 2248 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2249 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 2250 sector = (em->block_start + extent_offset) >> 9; 2251 bdev = em->bdev; 2252 block_start = em->block_start; 2253 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 2254 free_extent_map(em); 2255 em = NULL; 2256 2257 /* 2258 * compressed and inline extents are written through other 2259 * paths in the FS 2260 */ 2261 if (compressed || block_start == EXTENT_MAP_HOLE || 2262 block_start == EXTENT_MAP_INLINE) { 2263 clear_extent_dirty(tree, cur, 2264 cur + iosize - 1, GFP_NOFS); 2265 2266 unlock_extent(tree, unlock_start, cur + iosize - 1, 2267 GFP_NOFS); 2268 2269 /* 2270 * end_io notification does not happen here for 2271 * compressed extents 2272 */ 2273 if (!compressed && tree->ops && 2274 tree->ops->writepage_end_io_hook) 2275 tree->ops->writepage_end_io_hook(page, cur, 2276 cur + iosize - 1, 2277 NULL, 1); 2278 else if (compressed) { 2279 /* we don't want to end_page_writeback on 2280 * a compressed extent. this happens 2281 * elsewhere 2282 */ 2283 nr++; 2284 } 2285 2286 cur += iosize; 2287 pg_offset += iosize; 2288 unlock_start = cur; 2289 continue; 2290 } 2291 /* leave this out until we have a page_mkwrite call */ 2292 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2293 EXTENT_DIRTY, 0)) { 2294 cur = cur + iosize; 2295 pg_offset += iosize; 2296 continue; 2297 } 2298 2299 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 2300 if (tree->ops && tree->ops->writepage_io_hook) { 2301 ret = tree->ops->writepage_io_hook(page, cur, 2302 cur + iosize - 1); 2303 } else { 2304 ret = 0; 2305 } 2306 if (ret) { 2307 SetPageError(page); 2308 } else { 2309 unsigned long max_nr = end_index + 1; 2310 2311 set_range_writeback(tree, cur, cur + iosize - 1); 2312 if (!PageWriteback(page)) { 2313 printk(KERN_ERR "btrfs warning page %lu not " 2314 "writeback, cur %llu end %llu\n", 2315 page->index, (unsigned long long)cur, 2316 (unsigned long long)end); 2317 } 2318 2319 ret = submit_extent_page(WRITE, tree, page, sector, 2320 iosize, pg_offset, bdev, 2321 &epd->bio, max_nr, 2322 end_bio_extent_writepage, 2323 0, 0, 0); 2324 if (ret) 2325 SetPageError(page); 2326 } 2327 cur = cur + iosize; 2328 pg_offset += iosize; 2329 nr++; 2330 } 2331 done: 2332 if (nr == 0) { 2333 /* make sure the mapping tag for page dirty gets cleared */ 2334 set_page_writeback(page); 2335 end_page_writeback(page); 2336 } 2337 if (unlock_start <= page_end) 2338 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2339 unlock_page(page); 2340 2341 update_nr_written: 2342 wbc->nr_to_write -= nr_written; 2343 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && 2344 wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) 2345 page->mapping->writeback_index = page->index + nr_written; 2346 return 0; 2347 } 2348 2349 /** 2350 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 2351 * @mapping: address space structure to write 2352 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 2353 * @writepage: function called for each page 2354 * @data: data passed to writepage function 2355 * 2356 * If a page is already under I/O, write_cache_pages() skips it, even 2357 * if it's dirty. This is desirable behaviour for memory-cleaning writeback, 2358 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() 2359 * and msync() need to guarantee that all the data which was dirty at the time 2360 * the call was made get new I/O started against them. If wbc->sync_mode is 2361 * WB_SYNC_ALL then we were called for data integrity and we must wait for 2362 * existing IO to complete. 2363 */ 2364 static int extent_write_cache_pages(struct extent_io_tree *tree, 2365 struct address_space *mapping, 2366 struct writeback_control *wbc, 2367 writepage_t writepage, void *data, 2368 void (*flush_fn)(void *)) 2369 { 2370 struct backing_dev_info *bdi = mapping->backing_dev_info; 2371 int ret = 0; 2372 int done = 0; 2373 struct pagevec pvec; 2374 int nr_pages; 2375 pgoff_t index; 2376 pgoff_t end; /* Inclusive */ 2377 int scanned = 0; 2378 int range_whole = 0; 2379 2380 pagevec_init(&pvec, 0); 2381 if (wbc->range_cyclic) { 2382 index = mapping->writeback_index; /* Start from prev offset */ 2383 end = -1; 2384 } else { 2385 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2386 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2387 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2388 range_whole = 1; 2389 scanned = 1; 2390 } 2391 retry: 2392 while (!done && (index <= end) && 2393 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2394 PAGECACHE_TAG_DIRTY, min(end - index, 2395 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2396 unsigned i; 2397 2398 scanned = 1; 2399 for (i = 0; i < nr_pages; i++) { 2400 struct page *page = pvec.pages[i]; 2401 2402 /* 2403 * At this point we hold neither mapping->tree_lock nor 2404 * lock on the page itself: the page may be truncated or 2405 * invalidated (changing page->mapping to NULL), or even 2406 * swizzled back from swapper_space to tmpfs file 2407 * mapping 2408 */ 2409 if (tree->ops && tree->ops->write_cache_pages_lock_hook) 2410 tree->ops->write_cache_pages_lock_hook(page); 2411 else 2412 lock_page(page); 2413 2414 if (unlikely(page->mapping != mapping)) { 2415 unlock_page(page); 2416 continue; 2417 } 2418 2419 if (!wbc->range_cyclic && page->index > end) { 2420 done = 1; 2421 unlock_page(page); 2422 continue; 2423 } 2424 2425 if (wbc->sync_mode != WB_SYNC_NONE) { 2426 if (PageWriteback(page)) 2427 flush_fn(data); 2428 wait_on_page_writeback(page); 2429 } 2430 2431 if (PageWriteback(page) || 2432 !clear_page_dirty_for_io(page)) { 2433 unlock_page(page); 2434 continue; 2435 } 2436 2437 ret = (*writepage)(page, wbc, data); 2438 2439 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { 2440 unlock_page(page); 2441 ret = 0; 2442 } 2443 if (ret || wbc->nr_to_write <= 0) 2444 done = 1; 2445 if (wbc->nonblocking && bdi_write_congested(bdi)) { 2446 wbc->encountered_congestion = 1; 2447 done = 1; 2448 } 2449 } 2450 pagevec_release(&pvec); 2451 cond_resched(); 2452 } 2453 if (!scanned && !done) { 2454 /* 2455 * We hit the last page and there is more work to be done: wrap 2456 * back to the start of the file 2457 */ 2458 scanned = 1; 2459 index = 0; 2460 goto retry; 2461 } 2462 return ret; 2463 } 2464 2465 static noinline void flush_write_bio(void *data) 2466 { 2467 struct extent_page_data *epd = data; 2468 if (epd->bio) { 2469 submit_one_bio(WRITE, epd->bio, 0, 0); 2470 epd->bio = NULL; 2471 } 2472 } 2473 2474 int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2475 get_extent_t *get_extent, 2476 struct writeback_control *wbc) 2477 { 2478 int ret; 2479 struct address_space *mapping = page->mapping; 2480 struct extent_page_data epd = { 2481 .bio = NULL, 2482 .tree = tree, 2483 .get_extent = get_extent, 2484 .extent_locked = 0, 2485 }; 2486 struct writeback_control wbc_writepages = { 2487 .bdi = wbc->bdi, 2488 .sync_mode = WB_SYNC_NONE, 2489 .older_than_this = NULL, 2490 .nr_to_write = 64, 2491 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2492 .range_end = (loff_t)-1, 2493 }; 2494 2495 2496 ret = __extent_writepage(page, wbc, &epd); 2497 2498 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2499 __extent_writepage, &epd, flush_write_bio); 2500 if (epd.bio) 2501 submit_one_bio(WRITE, epd.bio, 0, 0); 2502 return ret; 2503 } 2504 2505 int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, 2506 u64 start, u64 end, get_extent_t *get_extent, 2507 int mode) 2508 { 2509 int ret = 0; 2510 struct address_space *mapping = inode->i_mapping; 2511 struct page *page; 2512 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> 2513 PAGE_CACHE_SHIFT; 2514 2515 struct extent_page_data epd = { 2516 .bio = NULL, 2517 .tree = tree, 2518 .get_extent = get_extent, 2519 .extent_locked = 1, 2520 }; 2521 struct writeback_control wbc_writepages = { 2522 .bdi = inode->i_mapping->backing_dev_info, 2523 .sync_mode = mode, 2524 .older_than_this = NULL, 2525 .nr_to_write = nr_pages * 2, 2526 .range_start = start, 2527 .range_end = end + 1, 2528 }; 2529 2530 while (start <= end) { 2531 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 2532 if (clear_page_dirty_for_io(page)) 2533 ret = __extent_writepage(page, &wbc_writepages, &epd); 2534 else { 2535 if (tree->ops && tree->ops->writepage_end_io_hook) 2536 tree->ops->writepage_end_io_hook(page, start, 2537 start + PAGE_CACHE_SIZE - 1, 2538 NULL, 1); 2539 unlock_page(page); 2540 } 2541 page_cache_release(page); 2542 start += PAGE_CACHE_SIZE; 2543 } 2544 2545 if (epd.bio) 2546 submit_one_bio(WRITE, epd.bio, 0, 0); 2547 return ret; 2548 } 2549 2550 int extent_writepages(struct extent_io_tree *tree, 2551 struct address_space *mapping, 2552 get_extent_t *get_extent, 2553 struct writeback_control *wbc) 2554 { 2555 int ret = 0; 2556 struct extent_page_data epd = { 2557 .bio = NULL, 2558 .tree = tree, 2559 .get_extent = get_extent, 2560 .extent_locked = 0, 2561 }; 2562 2563 ret = extent_write_cache_pages(tree, mapping, wbc, 2564 __extent_writepage, &epd, 2565 flush_write_bio); 2566 if (epd.bio) 2567 submit_one_bio(WRITE, epd.bio, 0, 0); 2568 return ret; 2569 } 2570 2571 int extent_readpages(struct extent_io_tree *tree, 2572 struct address_space *mapping, 2573 struct list_head *pages, unsigned nr_pages, 2574 get_extent_t get_extent) 2575 { 2576 struct bio *bio = NULL; 2577 unsigned page_idx; 2578 struct pagevec pvec; 2579 unsigned long bio_flags = 0; 2580 2581 pagevec_init(&pvec, 0); 2582 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2583 struct page *page = list_entry(pages->prev, struct page, lru); 2584 2585 prefetchw(&page->flags); 2586 list_del(&page->lru); 2587 /* 2588 * what we want to do here is call add_to_page_cache_lru, 2589 * but that isn't exported, so we reproduce it here 2590 */ 2591 if (!add_to_page_cache(page, mapping, 2592 page->index, GFP_KERNEL)) { 2593 2594 /* open coding of lru_cache_add, also not exported */ 2595 page_cache_get(page); 2596 if (!pagevec_add(&pvec, page)) 2597 __pagevec_lru_add_file(&pvec); 2598 __extent_read_full_page(tree, page, get_extent, 2599 &bio, 0, &bio_flags); 2600 } 2601 page_cache_release(page); 2602 } 2603 if (pagevec_count(&pvec)) 2604 __pagevec_lru_add_file(&pvec); 2605 BUG_ON(!list_empty(pages)); 2606 if (bio) 2607 submit_one_bio(READ, bio, 0, bio_flags); 2608 return 0; 2609 } 2610 2611 /* 2612 * basic invalidatepage code, this waits on any locked or writeback 2613 * ranges corresponding to the page, and then deletes any extent state 2614 * records from the tree 2615 */ 2616 int extent_invalidatepage(struct extent_io_tree *tree, 2617 struct page *page, unsigned long offset) 2618 { 2619 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 2620 u64 end = start + PAGE_CACHE_SIZE - 1; 2621 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 2622 2623 start += (offset + blocksize - 1) & ~(blocksize - 1); 2624 if (start > end) 2625 return 0; 2626 2627 lock_extent(tree, start, end, GFP_NOFS); 2628 wait_on_extent_writeback(tree, start, end); 2629 clear_extent_bit(tree, start, end, 2630 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2631 1, 1, GFP_NOFS); 2632 return 0; 2633 } 2634 2635 /* 2636 * simple commit_write call, set_range_dirty is used to mark both 2637 * the pages and the extent records as dirty 2638 */ 2639 int extent_commit_write(struct extent_io_tree *tree, 2640 struct inode *inode, struct page *page, 2641 unsigned from, unsigned to) 2642 { 2643 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2644 2645 set_page_extent_mapped(page); 2646 set_page_dirty(page); 2647 2648 if (pos > inode->i_size) { 2649 i_size_write(inode, pos); 2650 mark_inode_dirty(inode); 2651 } 2652 return 0; 2653 } 2654 2655 int extent_prepare_write(struct extent_io_tree *tree, 2656 struct inode *inode, struct page *page, 2657 unsigned from, unsigned to, get_extent_t *get_extent) 2658 { 2659 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2660 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 2661 u64 block_start; 2662 u64 orig_block_start; 2663 u64 block_end; 2664 u64 cur_end; 2665 struct extent_map *em; 2666 unsigned blocksize = 1 << inode->i_blkbits; 2667 size_t page_offset = 0; 2668 size_t block_off_start; 2669 size_t block_off_end; 2670 int err = 0; 2671 int iocount = 0; 2672 int ret = 0; 2673 int isnew; 2674 2675 set_page_extent_mapped(page); 2676 2677 block_start = (page_start + from) & ~((u64)blocksize - 1); 2678 block_end = (page_start + to - 1) | (blocksize - 1); 2679 orig_block_start = block_start; 2680 2681 lock_extent(tree, page_start, page_end, GFP_NOFS); 2682 while (block_start <= block_end) { 2683 em = get_extent(inode, page, page_offset, block_start, 2684 block_end - block_start + 1, 1); 2685 if (IS_ERR(em) || !em) 2686 goto err; 2687 2688 cur_end = min(block_end, extent_map_end(em) - 1); 2689 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 2690 block_off_end = block_off_start + blocksize; 2691 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 2692 2693 if (!PageUptodate(page) && isnew && 2694 (block_off_end > to || block_off_start < from)) { 2695 void *kaddr; 2696 2697 kaddr = kmap_atomic(page, KM_USER0); 2698 if (block_off_end > to) 2699 memset(kaddr + to, 0, block_off_end - to); 2700 if (block_off_start < from) 2701 memset(kaddr + block_off_start, 0, 2702 from - block_off_start); 2703 flush_dcache_page(page); 2704 kunmap_atomic(kaddr, KM_USER0); 2705 } 2706 if ((em->block_start != EXTENT_MAP_HOLE && 2707 em->block_start != EXTENT_MAP_INLINE) && 2708 !isnew && !PageUptodate(page) && 2709 (block_off_end > to || block_off_start < from) && 2710 !test_range_bit(tree, block_start, cur_end, 2711 EXTENT_UPTODATE, 1)) { 2712 u64 sector; 2713 u64 extent_offset = block_start - em->start; 2714 size_t iosize; 2715 sector = (em->block_start + extent_offset) >> 9; 2716 iosize = (cur_end - block_start + blocksize) & 2717 ~((u64)blocksize - 1); 2718 /* 2719 * we've already got the extent locked, but we 2720 * need to split the state such that our end_bio 2721 * handler can clear the lock. 2722 */ 2723 set_extent_bit(tree, block_start, 2724 block_start + iosize - 1, 2725 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2726 ret = submit_extent_page(READ, tree, page, 2727 sector, iosize, page_offset, em->bdev, 2728 NULL, 1, 2729 end_bio_extent_preparewrite, 0, 2730 0, 0); 2731 iocount++; 2732 block_start = block_start + iosize; 2733 } else { 2734 set_extent_uptodate(tree, block_start, cur_end, 2735 GFP_NOFS); 2736 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2737 block_start = cur_end + 1; 2738 } 2739 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2740 free_extent_map(em); 2741 } 2742 if (iocount) { 2743 wait_extent_bit(tree, orig_block_start, 2744 block_end, EXTENT_LOCKED); 2745 } 2746 check_page_uptodate(tree, page); 2747 err: 2748 /* FIXME, zero out newly allocated blocks on error */ 2749 return err; 2750 } 2751 2752 /* 2753 * a helper for releasepage, this tests for areas of the page that 2754 * are locked or under IO and drops the related state bits if it is safe 2755 * to drop the page. 2756 */ 2757 int try_release_extent_state(struct extent_map_tree *map, 2758 struct extent_io_tree *tree, struct page *page, 2759 gfp_t mask) 2760 { 2761 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2762 u64 end = start + PAGE_CACHE_SIZE - 1; 2763 int ret = 1; 2764 2765 if (test_range_bit(tree, start, end, 2766 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2767 ret = 0; 2768 else { 2769 if ((mask & GFP_NOFS) == GFP_NOFS) 2770 mask = GFP_NOFS; 2771 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2772 1, 1, mask); 2773 } 2774 return ret; 2775 } 2776 2777 /* 2778 * a helper for releasepage. As long as there are no locked extents 2779 * in the range corresponding to the page, both state records and extent 2780 * map records are removed 2781 */ 2782 int try_release_extent_mapping(struct extent_map_tree *map, 2783 struct extent_io_tree *tree, struct page *page, 2784 gfp_t mask) 2785 { 2786 struct extent_map *em; 2787 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2788 u64 end = start + PAGE_CACHE_SIZE - 1; 2789 2790 if ((mask & __GFP_WAIT) && 2791 page->mapping->host->i_size > 16 * 1024 * 1024) { 2792 u64 len; 2793 while (start <= end) { 2794 len = end - start + 1; 2795 spin_lock(&map->lock); 2796 em = lookup_extent_mapping(map, start, len); 2797 if (!em || IS_ERR(em)) { 2798 spin_unlock(&map->lock); 2799 break; 2800 } 2801 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2802 em->start != start) { 2803 spin_unlock(&map->lock); 2804 free_extent_map(em); 2805 break; 2806 } 2807 if (!test_range_bit(tree, em->start, 2808 extent_map_end(em) - 1, 2809 EXTENT_LOCKED | EXTENT_WRITEBACK | 2810 EXTENT_ORDERED, 2811 0)) { 2812 remove_extent_mapping(map, em); 2813 /* once for the rb tree */ 2814 free_extent_map(em); 2815 } 2816 start = extent_map_end(em); 2817 spin_unlock(&map->lock); 2818 2819 /* once for us */ 2820 free_extent_map(em); 2821 } 2822 } 2823 return try_release_extent_state(map, tree, page, mask); 2824 } 2825 2826 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 2827 get_extent_t *get_extent) 2828 { 2829 struct inode *inode = mapping->host; 2830 u64 start = iblock << inode->i_blkbits; 2831 sector_t sector = 0; 2832 size_t blksize = (1 << inode->i_blkbits); 2833 struct extent_map *em; 2834 2835 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2836 GFP_NOFS); 2837 em = get_extent(inode, NULL, 0, start, blksize, 0); 2838 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2839 GFP_NOFS); 2840 if (!em || IS_ERR(em)) 2841 return 0; 2842 2843 if (em->block_start > EXTENT_MAP_LAST_BYTE) 2844 goto out; 2845 2846 sector = (em->block_start + start - em->start) >> inode->i_blkbits; 2847 out: 2848 free_extent_map(em); 2849 return sector; 2850 } 2851 2852 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2853 __u64 start, __u64 len, get_extent_t *get_extent) 2854 { 2855 int ret; 2856 u64 off = start; 2857 u64 max = start + len; 2858 u32 flags = 0; 2859 u64 disko = 0; 2860 struct extent_map *em = NULL; 2861 int end = 0; 2862 u64 em_start = 0, em_len = 0; 2863 unsigned long emflags; 2864 ret = 0; 2865 2866 if (len == 0) 2867 return -EINVAL; 2868 2869 lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2870 GFP_NOFS); 2871 em = get_extent(inode, NULL, 0, off, max - off, 0); 2872 if (!em) 2873 goto out; 2874 if (IS_ERR(em)) { 2875 ret = PTR_ERR(em); 2876 goto out; 2877 } 2878 while (!end) { 2879 off = em->start + em->len; 2880 if (off >= max) 2881 end = 1; 2882 2883 em_start = em->start; 2884 em_len = em->len; 2885 2886 disko = 0; 2887 flags = 0; 2888 2889 switch (em->block_start) { 2890 case EXTENT_MAP_LAST_BYTE: 2891 end = 1; 2892 flags |= FIEMAP_EXTENT_LAST; 2893 break; 2894 case EXTENT_MAP_HOLE: 2895 flags |= FIEMAP_EXTENT_UNWRITTEN; 2896 break; 2897 case EXTENT_MAP_INLINE: 2898 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2899 FIEMAP_EXTENT_NOT_ALIGNED); 2900 break; 2901 case EXTENT_MAP_DELALLOC: 2902 flags |= (FIEMAP_EXTENT_DELALLOC | 2903 FIEMAP_EXTENT_UNKNOWN); 2904 break; 2905 default: 2906 disko = em->block_start; 2907 break; 2908 } 2909 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2910 flags |= FIEMAP_EXTENT_ENCODED; 2911 2912 emflags = em->flags; 2913 free_extent_map(em); 2914 em = NULL; 2915 2916 if (!end) { 2917 em = get_extent(inode, NULL, 0, off, max - off, 0); 2918 if (!em) 2919 goto out; 2920 if (IS_ERR(em)) { 2921 ret = PTR_ERR(em); 2922 goto out; 2923 } 2924 emflags = em->flags; 2925 } 2926 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 2927 flags |= FIEMAP_EXTENT_LAST; 2928 end = 1; 2929 } 2930 2931 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 2932 em_len, flags); 2933 if (ret) 2934 goto out_free; 2935 } 2936 out_free: 2937 free_extent_map(em); 2938 out: 2939 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2940 GFP_NOFS); 2941 return ret; 2942 } 2943 2944 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2945 unsigned long i) 2946 { 2947 struct page *p; 2948 struct address_space *mapping; 2949 2950 if (i == 0) 2951 return eb->first_page; 2952 i += eb->start >> PAGE_CACHE_SHIFT; 2953 mapping = eb->first_page->mapping; 2954 if (!mapping) 2955 return NULL; 2956 2957 /* 2958 * extent_buffer_page is only called after pinning the page 2959 * by increasing the reference count. So we know the page must 2960 * be in the radix tree. 2961 */ 2962 rcu_read_lock(); 2963 p = radix_tree_lookup(&mapping->page_tree, i); 2964 rcu_read_unlock(); 2965 2966 return p; 2967 } 2968 2969 static inline unsigned long num_extent_pages(u64 start, u64 len) 2970 { 2971 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - 2972 (start >> PAGE_CACHE_SHIFT); 2973 } 2974 2975 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 2976 u64 start, 2977 unsigned long len, 2978 gfp_t mask) 2979 { 2980 struct extent_buffer *eb = NULL; 2981 #if LEAK_DEBUG 2982 unsigned long flags; 2983 #endif 2984 2985 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2986 eb->start = start; 2987 eb->len = len; 2988 spin_lock_init(&eb->lock); 2989 init_waitqueue_head(&eb->lock_wq); 2990 2991 #if LEAK_DEBUG 2992 spin_lock_irqsave(&leak_lock, flags); 2993 list_add(&eb->leak_list, &buffers); 2994 spin_unlock_irqrestore(&leak_lock, flags); 2995 #endif 2996 atomic_set(&eb->refs, 1); 2997 2998 return eb; 2999 } 3000 3001 static void __free_extent_buffer(struct extent_buffer *eb) 3002 { 3003 #if LEAK_DEBUG 3004 unsigned long flags; 3005 spin_lock_irqsave(&leak_lock, flags); 3006 list_del(&eb->leak_list); 3007 spin_unlock_irqrestore(&leak_lock, flags); 3008 #endif 3009 kmem_cache_free(extent_buffer_cache, eb); 3010 } 3011 3012 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3013 u64 start, unsigned long len, 3014 struct page *page0, 3015 gfp_t mask) 3016 { 3017 unsigned long num_pages = num_extent_pages(start, len); 3018 unsigned long i; 3019 unsigned long index = start >> PAGE_CACHE_SHIFT; 3020 struct extent_buffer *eb; 3021 struct extent_buffer *exists = NULL; 3022 struct page *p; 3023 struct address_space *mapping = tree->mapping; 3024 int uptodate = 1; 3025 3026 spin_lock(&tree->buffer_lock); 3027 eb = buffer_search(tree, start); 3028 if (eb) { 3029 atomic_inc(&eb->refs); 3030 spin_unlock(&tree->buffer_lock); 3031 mark_page_accessed(eb->first_page); 3032 return eb; 3033 } 3034 spin_unlock(&tree->buffer_lock); 3035 3036 eb = __alloc_extent_buffer(tree, start, len, mask); 3037 if (!eb) 3038 return NULL; 3039 3040 if (page0) { 3041 eb->first_page = page0; 3042 i = 1; 3043 index++; 3044 page_cache_get(page0); 3045 mark_page_accessed(page0); 3046 set_page_extent_mapped(page0); 3047 set_page_extent_head(page0, len); 3048 uptodate = PageUptodate(page0); 3049 } else { 3050 i = 0; 3051 } 3052 for (; i < num_pages; i++, index++) { 3053 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3054 if (!p) { 3055 WARN_ON(1); 3056 goto free_eb; 3057 } 3058 set_page_extent_mapped(p); 3059 mark_page_accessed(p); 3060 if (i == 0) { 3061 eb->first_page = p; 3062 set_page_extent_head(p, len); 3063 } else { 3064 set_page_private(p, EXTENT_PAGE_PRIVATE); 3065 } 3066 if (!PageUptodate(p)) 3067 uptodate = 0; 3068 unlock_page(p); 3069 } 3070 if (uptodate) 3071 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3072 3073 spin_lock(&tree->buffer_lock); 3074 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3075 if (exists) { 3076 /* add one reference for the caller */ 3077 atomic_inc(&exists->refs); 3078 spin_unlock(&tree->buffer_lock); 3079 goto free_eb; 3080 } 3081 spin_unlock(&tree->buffer_lock); 3082 3083 /* add one reference for the tree */ 3084 atomic_inc(&eb->refs); 3085 return eb; 3086 3087 free_eb: 3088 if (!atomic_dec_and_test(&eb->refs)) 3089 return exists; 3090 for (index = 1; index < i; index++) 3091 page_cache_release(extent_buffer_page(eb, index)); 3092 page_cache_release(extent_buffer_page(eb, 0)); 3093 __free_extent_buffer(eb); 3094 return exists; 3095 } 3096 3097 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3098 u64 start, unsigned long len, 3099 gfp_t mask) 3100 { 3101 struct extent_buffer *eb; 3102 3103 spin_lock(&tree->buffer_lock); 3104 eb = buffer_search(tree, start); 3105 if (eb) 3106 atomic_inc(&eb->refs); 3107 spin_unlock(&tree->buffer_lock); 3108 3109 if (eb) 3110 mark_page_accessed(eb->first_page); 3111 3112 return eb; 3113 } 3114 3115 void free_extent_buffer(struct extent_buffer *eb) 3116 { 3117 if (!eb) 3118 return; 3119 3120 if (!atomic_dec_and_test(&eb->refs)) 3121 return; 3122 3123 WARN_ON(1); 3124 } 3125 3126 int clear_extent_buffer_dirty(struct extent_io_tree *tree, 3127 struct extent_buffer *eb) 3128 { 3129 int set; 3130 unsigned long i; 3131 unsigned long num_pages; 3132 struct page *page; 3133 3134 u64 start = eb->start; 3135 u64 end = start + eb->len - 1; 3136 3137 set = clear_extent_dirty(tree, start, end, GFP_NOFS); 3138 num_pages = num_extent_pages(eb->start, eb->len); 3139 3140 for (i = 0; i < num_pages; i++) { 3141 page = extent_buffer_page(eb, i); 3142 if (!set && !PageDirty(page)) 3143 continue; 3144 3145 lock_page(page); 3146 if (i == 0) 3147 set_page_extent_head(page, eb->len); 3148 else 3149 set_page_private(page, EXTENT_PAGE_PRIVATE); 3150 3151 /* 3152 * if we're on the last page or the first page and the 3153 * block isn't aligned on a page boundary, do extra checks 3154 * to make sure we don't clean page that is partially dirty 3155 */ 3156 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3157 ((i == num_pages - 1) && 3158 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 3159 start = (u64)page->index << PAGE_CACHE_SHIFT; 3160 end = start + PAGE_CACHE_SIZE - 1; 3161 if (test_range_bit(tree, start, end, 3162 EXTENT_DIRTY, 0)) { 3163 unlock_page(page); 3164 continue; 3165 } 3166 } 3167 clear_page_dirty_for_io(page); 3168 spin_lock_irq(&page->mapping->tree_lock); 3169 if (!PageDirty(page)) { 3170 radix_tree_tag_clear(&page->mapping->page_tree, 3171 page_index(page), 3172 PAGECACHE_TAG_DIRTY); 3173 } 3174 spin_unlock_irq(&page->mapping->tree_lock); 3175 unlock_page(page); 3176 } 3177 return 0; 3178 } 3179 3180 int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, 3181 struct extent_buffer *eb) 3182 { 3183 return wait_on_extent_writeback(tree, eb->start, 3184 eb->start + eb->len - 1); 3185 } 3186 3187 int set_extent_buffer_dirty(struct extent_io_tree *tree, 3188 struct extent_buffer *eb) 3189 { 3190 unsigned long i; 3191 unsigned long num_pages; 3192 3193 num_pages = num_extent_pages(eb->start, eb->len); 3194 for (i = 0; i < num_pages; i++) { 3195 struct page *page = extent_buffer_page(eb, i); 3196 /* writepage may need to do something special for the 3197 * first page, we have to make sure page->private is 3198 * properly set. releasepage may drop page->private 3199 * on us if the page isn't already dirty. 3200 */ 3201 lock_page(page); 3202 if (i == 0) { 3203 set_page_extent_head(page, eb->len); 3204 } else if (PagePrivate(page) && 3205 page->private != EXTENT_PAGE_PRIVATE) { 3206 set_page_extent_mapped(page); 3207 } 3208 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 3209 set_extent_dirty(tree, page_offset(page), 3210 page_offset(page) + PAGE_CACHE_SIZE - 1, 3211 GFP_NOFS); 3212 unlock_page(page); 3213 } 3214 return 0; 3215 } 3216 3217 int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3218 struct extent_buffer *eb) 3219 { 3220 unsigned long i; 3221 struct page *page; 3222 unsigned long num_pages; 3223 3224 num_pages = num_extent_pages(eb->start, eb->len); 3225 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3226 3227 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3228 GFP_NOFS); 3229 for (i = 0; i < num_pages; i++) { 3230 page = extent_buffer_page(eb, i); 3231 if (page) 3232 ClearPageUptodate(page); 3233 } 3234 return 0; 3235 } 3236 3237 int set_extent_buffer_uptodate(struct extent_io_tree *tree, 3238 struct extent_buffer *eb) 3239 { 3240 unsigned long i; 3241 struct page *page; 3242 unsigned long num_pages; 3243 3244 num_pages = num_extent_pages(eb->start, eb->len); 3245 3246 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3247 GFP_NOFS); 3248 for (i = 0; i < num_pages; i++) { 3249 page = extent_buffer_page(eb, i); 3250 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3251 ((i == num_pages - 1) && 3252 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 3253 check_page_uptodate(tree, page); 3254 continue; 3255 } 3256 SetPageUptodate(page); 3257 } 3258 return 0; 3259 } 3260 3261 int extent_range_uptodate(struct extent_io_tree *tree, 3262 u64 start, u64 end) 3263 { 3264 struct page *page; 3265 int ret; 3266 int pg_uptodate = 1; 3267 int uptodate; 3268 unsigned long index; 3269 3270 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3271 if (ret) 3272 return 1; 3273 while (start <= end) { 3274 index = start >> PAGE_CACHE_SHIFT; 3275 page = find_get_page(tree->mapping, index); 3276 uptodate = PageUptodate(page); 3277 page_cache_release(page); 3278 if (!uptodate) { 3279 pg_uptodate = 0; 3280 break; 3281 } 3282 start += PAGE_CACHE_SIZE; 3283 } 3284 return pg_uptodate; 3285 } 3286 3287 int extent_buffer_uptodate(struct extent_io_tree *tree, 3288 struct extent_buffer *eb) 3289 { 3290 int ret = 0; 3291 unsigned long num_pages; 3292 unsigned long i; 3293 struct page *page; 3294 int pg_uptodate = 1; 3295 3296 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3297 return 1; 3298 3299 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3300 EXTENT_UPTODATE, 1); 3301 if (ret) 3302 return ret; 3303 3304 num_pages = num_extent_pages(eb->start, eb->len); 3305 for (i = 0; i < num_pages; i++) { 3306 page = extent_buffer_page(eb, i); 3307 if (!PageUptodate(page)) { 3308 pg_uptodate = 0; 3309 break; 3310 } 3311 } 3312 return pg_uptodate; 3313 } 3314 3315 int read_extent_buffer_pages(struct extent_io_tree *tree, 3316 struct extent_buffer *eb, 3317 u64 start, int wait, 3318 get_extent_t *get_extent, int mirror_num) 3319 { 3320 unsigned long i; 3321 unsigned long start_i; 3322 struct page *page; 3323 int err; 3324 int ret = 0; 3325 int locked_pages = 0; 3326 int all_uptodate = 1; 3327 int inc_all_pages = 0; 3328 unsigned long num_pages; 3329 struct bio *bio = NULL; 3330 unsigned long bio_flags = 0; 3331 3332 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3333 return 0; 3334 3335 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3336 EXTENT_UPTODATE, 1)) { 3337 return 0; 3338 } 3339 3340 if (start) { 3341 WARN_ON(start < eb->start); 3342 start_i = (start >> PAGE_CACHE_SHIFT) - 3343 (eb->start >> PAGE_CACHE_SHIFT); 3344 } else { 3345 start_i = 0; 3346 } 3347 3348 num_pages = num_extent_pages(eb->start, eb->len); 3349 for (i = start_i; i < num_pages; i++) { 3350 page = extent_buffer_page(eb, i); 3351 if (!wait) { 3352 if (!trylock_page(page)) 3353 goto unlock_exit; 3354 } else { 3355 lock_page(page); 3356 } 3357 locked_pages++; 3358 if (!PageUptodate(page)) 3359 all_uptodate = 0; 3360 } 3361 if (all_uptodate) { 3362 if (start_i == 0) 3363 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3364 goto unlock_exit; 3365 } 3366 3367 for (i = start_i; i < num_pages; i++) { 3368 page = extent_buffer_page(eb, i); 3369 if (inc_all_pages) 3370 page_cache_get(page); 3371 if (!PageUptodate(page)) { 3372 if (start_i == 0) 3373 inc_all_pages = 1; 3374 ClearPageError(page); 3375 err = __extent_read_full_page(tree, page, 3376 get_extent, &bio, 3377 mirror_num, &bio_flags); 3378 if (err) 3379 ret = err; 3380 } else { 3381 unlock_page(page); 3382 } 3383 } 3384 3385 if (bio) 3386 submit_one_bio(READ, bio, mirror_num, bio_flags); 3387 3388 if (ret || !wait) 3389 return ret; 3390 3391 for (i = start_i; i < num_pages; i++) { 3392 page = extent_buffer_page(eb, i); 3393 wait_on_page_locked(page); 3394 if (!PageUptodate(page)) 3395 ret = -EIO; 3396 } 3397 3398 if (!ret) 3399 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3400 return ret; 3401 3402 unlock_exit: 3403 i = start_i; 3404 while (locked_pages > 0) { 3405 page = extent_buffer_page(eb, i); 3406 i++; 3407 unlock_page(page); 3408 locked_pages--; 3409 } 3410 return ret; 3411 } 3412 3413 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 3414 unsigned long start, 3415 unsigned long len) 3416 { 3417 size_t cur; 3418 size_t offset; 3419 struct page *page; 3420 char *kaddr; 3421 char *dst = (char *)dstv; 3422 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3423 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3424 3425 WARN_ON(start > eb->len); 3426 WARN_ON(start + len > eb->start + eb->len); 3427 3428 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3429 3430 while (len > 0) { 3431 page = extent_buffer_page(eb, i); 3432 3433 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3434 kaddr = kmap_atomic(page, KM_USER1); 3435 memcpy(dst, kaddr + offset, cur); 3436 kunmap_atomic(kaddr, KM_USER1); 3437 3438 dst += cur; 3439 len -= cur; 3440 offset = 0; 3441 i++; 3442 } 3443 } 3444 3445 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 3446 unsigned long min_len, char **token, char **map, 3447 unsigned long *map_start, 3448 unsigned long *map_len, int km) 3449 { 3450 size_t offset = start & (PAGE_CACHE_SIZE - 1); 3451 char *kaddr; 3452 struct page *p; 3453 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3454 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3455 unsigned long end_i = (start_offset + start + min_len - 1) >> 3456 PAGE_CACHE_SHIFT; 3457 3458 if (i != end_i) 3459 return -EINVAL; 3460 3461 if (i == 0) { 3462 offset = start_offset; 3463 *map_start = 0; 3464 } else { 3465 offset = 0; 3466 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; 3467 } 3468 3469 if (start + min_len > eb->len) { 3470 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 3471 "wanted %lu %lu\n", (unsigned long long)eb->start, 3472 eb->len, start, min_len); 3473 WARN_ON(1); 3474 } 3475 3476 p = extent_buffer_page(eb, i); 3477 kaddr = kmap_atomic(p, km); 3478 *token = kaddr; 3479 *map = kaddr + offset; 3480 *map_len = PAGE_CACHE_SIZE - offset; 3481 return 0; 3482 } 3483 3484 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 3485 unsigned long min_len, 3486 char **token, char **map, 3487 unsigned long *map_start, 3488 unsigned long *map_len, int km) 3489 { 3490 int err; 3491 int save = 0; 3492 if (eb->map_token) { 3493 unmap_extent_buffer(eb, eb->map_token, km); 3494 eb->map_token = NULL; 3495 save = 1; 3496 } 3497 err = map_private_extent_buffer(eb, start, min_len, token, map, 3498 map_start, map_len, km); 3499 if (!err && save) { 3500 eb->map_token = *token; 3501 eb->kaddr = *map; 3502 eb->map_start = *map_start; 3503 eb->map_len = *map_len; 3504 } 3505 return err; 3506 } 3507 3508 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 3509 { 3510 kunmap_atomic(token, km); 3511 } 3512 3513 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 3514 unsigned long start, 3515 unsigned long len) 3516 { 3517 size_t cur; 3518 size_t offset; 3519 struct page *page; 3520 char *kaddr; 3521 char *ptr = (char *)ptrv; 3522 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3523 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3524 int ret = 0; 3525 3526 WARN_ON(start > eb->len); 3527 WARN_ON(start + len > eb->start + eb->len); 3528 3529 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3530 3531 while (len > 0) { 3532 page = extent_buffer_page(eb, i); 3533 3534 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3535 3536 kaddr = kmap_atomic(page, KM_USER0); 3537 ret = memcmp(ptr, kaddr + offset, cur); 3538 kunmap_atomic(kaddr, KM_USER0); 3539 if (ret) 3540 break; 3541 3542 ptr += cur; 3543 len -= cur; 3544 offset = 0; 3545 i++; 3546 } 3547 return ret; 3548 } 3549 3550 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 3551 unsigned long start, unsigned long len) 3552 { 3553 size_t cur; 3554 size_t offset; 3555 struct page *page; 3556 char *kaddr; 3557 char *src = (char *)srcv; 3558 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3559 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3560 3561 WARN_ON(start > eb->len); 3562 WARN_ON(start + len > eb->start + eb->len); 3563 3564 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3565 3566 while (len > 0) { 3567 page = extent_buffer_page(eb, i); 3568 WARN_ON(!PageUptodate(page)); 3569 3570 cur = min(len, PAGE_CACHE_SIZE - offset); 3571 kaddr = kmap_atomic(page, KM_USER1); 3572 memcpy(kaddr + offset, src, cur); 3573 kunmap_atomic(kaddr, KM_USER1); 3574 3575 src += cur; 3576 len -= cur; 3577 offset = 0; 3578 i++; 3579 } 3580 } 3581 3582 void memset_extent_buffer(struct extent_buffer *eb, char c, 3583 unsigned long start, unsigned long len) 3584 { 3585 size_t cur; 3586 size_t offset; 3587 struct page *page; 3588 char *kaddr; 3589 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3590 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3591 3592 WARN_ON(start > eb->len); 3593 WARN_ON(start + len > eb->start + eb->len); 3594 3595 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3596 3597 while (len > 0) { 3598 page = extent_buffer_page(eb, i); 3599 WARN_ON(!PageUptodate(page)); 3600 3601 cur = min(len, PAGE_CACHE_SIZE - offset); 3602 kaddr = kmap_atomic(page, KM_USER0); 3603 memset(kaddr + offset, c, cur); 3604 kunmap_atomic(kaddr, KM_USER0); 3605 3606 len -= cur; 3607 offset = 0; 3608 i++; 3609 } 3610 } 3611 3612 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 3613 unsigned long dst_offset, unsigned long src_offset, 3614 unsigned long len) 3615 { 3616 u64 dst_len = dst->len; 3617 size_t cur; 3618 size_t offset; 3619 struct page *page; 3620 char *kaddr; 3621 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3622 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3623 3624 WARN_ON(src->len != dst_len); 3625 3626 offset = (start_offset + dst_offset) & 3627 ((unsigned long)PAGE_CACHE_SIZE - 1); 3628 3629 while (len > 0) { 3630 page = extent_buffer_page(dst, i); 3631 WARN_ON(!PageUptodate(page)); 3632 3633 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 3634 3635 kaddr = kmap_atomic(page, KM_USER0); 3636 read_extent_buffer(src, kaddr + offset, src_offset, cur); 3637 kunmap_atomic(kaddr, KM_USER0); 3638 3639 src_offset += cur; 3640 len -= cur; 3641 offset = 0; 3642 i++; 3643 } 3644 } 3645 3646 static void move_pages(struct page *dst_page, struct page *src_page, 3647 unsigned long dst_off, unsigned long src_off, 3648 unsigned long len) 3649 { 3650 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3651 if (dst_page == src_page) { 3652 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 3653 } else { 3654 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 3655 char *p = dst_kaddr + dst_off + len; 3656 char *s = src_kaddr + src_off + len; 3657 3658 while (len--) 3659 *--p = *--s; 3660 3661 kunmap_atomic(src_kaddr, KM_USER1); 3662 } 3663 kunmap_atomic(dst_kaddr, KM_USER0); 3664 } 3665 3666 static void copy_pages(struct page *dst_page, struct page *src_page, 3667 unsigned long dst_off, unsigned long src_off, 3668 unsigned long len) 3669 { 3670 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3671 char *src_kaddr; 3672 3673 if (dst_page != src_page) 3674 src_kaddr = kmap_atomic(src_page, KM_USER1); 3675 else 3676 src_kaddr = dst_kaddr; 3677 3678 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3679 kunmap_atomic(dst_kaddr, KM_USER0); 3680 if (dst_page != src_page) 3681 kunmap_atomic(src_kaddr, KM_USER1); 3682 } 3683 3684 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3685 unsigned long src_offset, unsigned long len) 3686 { 3687 size_t cur; 3688 size_t dst_off_in_page; 3689 size_t src_off_in_page; 3690 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3691 unsigned long dst_i; 3692 unsigned long src_i; 3693 3694 if (src_offset + len > dst->len) { 3695 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3696 "len %lu dst len %lu\n", src_offset, len, dst->len); 3697 BUG_ON(1); 3698 } 3699 if (dst_offset + len > dst->len) { 3700 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3701 "len %lu dst len %lu\n", dst_offset, len, dst->len); 3702 BUG_ON(1); 3703 } 3704 3705 while (len > 0) { 3706 dst_off_in_page = (start_offset + dst_offset) & 3707 ((unsigned long)PAGE_CACHE_SIZE - 1); 3708 src_off_in_page = (start_offset + src_offset) & 3709 ((unsigned long)PAGE_CACHE_SIZE - 1); 3710 3711 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3712 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 3713 3714 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 3715 src_off_in_page)); 3716 cur = min_t(unsigned long, cur, 3717 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); 3718 3719 copy_pages(extent_buffer_page(dst, dst_i), 3720 extent_buffer_page(dst, src_i), 3721 dst_off_in_page, src_off_in_page, cur); 3722 3723 src_offset += cur; 3724 dst_offset += cur; 3725 len -= cur; 3726 } 3727 } 3728 3729 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3730 unsigned long src_offset, unsigned long len) 3731 { 3732 size_t cur; 3733 size_t dst_off_in_page; 3734 size_t src_off_in_page; 3735 unsigned long dst_end = dst_offset + len - 1; 3736 unsigned long src_end = src_offset + len - 1; 3737 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3738 unsigned long dst_i; 3739 unsigned long src_i; 3740 3741 if (src_offset + len > dst->len) { 3742 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3743 "len %lu len %lu\n", src_offset, len, dst->len); 3744 BUG_ON(1); 3745 } 3746 if (dst_offset + len > dst->len) { 3747 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3748 "len %lu len %lu\n", dst_offset, len, dst->len); 3749 BUG_ON(1); 3750 } 3751 if (dst_offset < src_offset) { 3752 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 3753 return; 3754 } 3755 while (len > 0) { 3756 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 3757 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 3758 3759 dst_off_in_page = (start_offset + dst_end) & 3760 ((unsigned long)PAGE_CACHE_SIZE - 1); 3761 src_off_in_page = (start_offset + src_end) & 3762 ((unsigned long)PAGE_CACHE_SIZE - 1); 3763 3764 cur = min_t(unsigned long, len, src_off_in_page + 1); 3765 cur = min(cur, dst_off_in_page + 1); 3766 move_pages(extent_buffer_page(dst, dst_i), 3767 extent_buffer_page(dst, src_i), 3768 dst_off_in_page - cur + 1, 3769 src_off_in_page - cur + 1, cur); 3770 3771 dst_end -= cur; 3772 src_end -= cur; 3773 len -= cur; 3774 } 3775 } 3776 3777 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3778 { 3779 u64 start = page_offset(page); 3780 struct extent_buffer *eb; 3781 int ret = 1; 3782 unsigned long i; 3783 unsigned long num_pages; 3784 3785 spin_lock(&tree->buffer_lock); 3786 eb = buffer_search(tree, start); 3787 if (!eb) 3788 goto out; 3789 3790 if (atomic_read(&eb->refs) > 1) { 3791 ret = 0; 3792 goto out; 3793 } 3794 /* at this point we can safely release the extent buffer */ 3795 num_pages = num_extent_pages(eb->start, eb->len); 3796 for (i = 0; i < num_pages; i++) 3797 page_cache_release(extent_buffer_page(eb, i)); 3798 rb_erase(&eb->rb_node, &tree->buffer); 3799 __free_extent_buffer(eb); 3800 out: 3801 spin_unlock(&tree->buffer_lock); 3802 return ret; 3803 } 3804