1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2010 Red Hat, Inc. 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 */ 6 #include <linux/module.h> 7 #include <linux/compiler.h> 8 #include <linux/fs.h> 9 #include <linux/iomap.h> 10 #include <linux/pagemap.h> 11 #include <linux/uio.h> 12 #include <linux/buffer_head.h> 13 #include <linux/dax.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/bio.h> 17 #include <linux/sched/signal.h> 18 #include <linux/migrate.h> 19 20 #include "../internal.h" 21 22 static struct iomap_page * 23 iomap_page_create(struct inode *inode, struct page *page) 24 { 25 struct iomap_page *iop = to_iomap_page(page); 26 27 if (iop || i_blocksize(inode) == PAGE_SIZE) 28 return iop; 29 30 iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); 31 atomic_set(&iop->read_count, 0); 32 atomic_set(&iop->write_count, 0); 33 bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); 34 35 /* 36 * migrate_page_move_mapping() assumes that pages with private data have 37 * their count elevated by 1. 38 */ 39 get_page(page); 40 set_page_private(page, (unsigned long)iop); 41 SetPagePrivate(page); 42 return iop; 43 } 44 45 static void 46 iomap_page_release(struct page *page) 47 { 48 struct iomap_page *iop = to_iomap_page(page); 49 50 if (!iop) 51 return; 52 WARN_ON_ONCE(atomic_read(&iop->read_count)); 53 WARN_ON_ONCE(atomic_read(&iop->write_count)); 54 ClearPagePrivate(page); 55 set_page_private(page, 0); 56 put_page(page); 57 kfree(iop); 58 } 59 60 /* 61 * Calculate the range inside the page that we actually need to read. 62 */ 63 static void 64 iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, 65 loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) 66 { 67 loff_t orig_pos = *pos; 68 loff_t isize = i_size_read(inode); 69 unsigned block_bits = inode->i_blkbits; 70 unsigned block_size = (1 << block_bits); 71 unsigned poff = offset_in_page(*pos); 72 unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); 73 unsigned first = poff >> block_bits; 74 unsigned last = (poff + plen - 1) >> block_bits; 75 76 /* 77 * If the block size is smaller than the page size we need to check the 78 * per-block uptodate status and adjust the offset and length if needed 79 * to avoid reading in already uptodate ranges. 80 */ 81 if (iop) { 82 unsigned int i; 83 84 /* move forward for each leading block marked uptodate */ 85 for (i = first; i <= last; i++) { 86 if (!test_bit(i, iop->uptodate)) 87 break; 88 *pos += block_size; 89 poff += block_size; 90 plen -= block_size; 91 first++; 92 } 93 94 /* truncate len if we find any trailing uptodate block(s) */ 95 for ( ; i <= last; i++) { 96 if (test_bit(i, iop->uptodate)) { 97 plen -= (last - i + 1) * block_size; 98 last = i - 1; 99 break; 100 } 101 } 102 } 103 104 /* 105 * If the extent spans the block that contains the i_size we need to 106 * handle both halves separately so that we properly zero data in the 107 * page cache for blocks that are entirely outside of i_size. 108 */ 109 if (orig_pos <= isize && orig_pos + length > isize) { 110 unsigned end = offset_in_page(isize - 1) >> block_bits; 111 112 if (first <= end && last > end) 113 plen -= (last - end) * block_size; 114 } 115 116 *offp = poff; 117 *lenp = plen; 118 } 119 120 static void 121 iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len) 122 { 123 struct iomap_page *iop = to_iomap_page(page); 124 struct inode *inode = page->mapping->host; 125 unsigned first = off >> inode->i_blkbits; 126 unsigned last = (off + len - 1) >> inode->i_blkbits; 127 unsigned int i; 128 bool uptodate = true; 129 130 if (iop) { 131 for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) { 132 if (i >= first && i <= last) 133 set_bit(i, iop->uptodate); 134 else if (!test_bit(i, iop->uptodate)) 135 uptodate = false; 136 } 137 } 138 139 if (uptodate && !PageError(page)) 140 SetPageUptodate(page); 141 } 142 143 static void 144 iomap_read_finish(struct iomap_page *iop, struct page *page) 145 { 146 if (!iop || atomic_dec_and_test(&iop->read_count)) 147 unlock_page(page); 148 } 149 150 static void 151 iomap_read_page_end_io(struct bio_vec *bvec, int error) 152 { 153 struct page *page = bvec->bv_page; 154 struct iomap_page *iop = to_iomap_page(page); 155 156 if (unlikely(error)) { 157 ClearPageUptodate(page); 158 SetPageError(page); 159 } else { 160 iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); 161 } 162 163 iomap_read_finish(iop, page); 164 } 165 166 static void 167 iomap_read_end_io(struct bio *bio) 168 { 169 int error = blk_status_to_errno(bio->bi_status); 170 struct bio_vec *bvec; 171 struct bvec_iter_all iter_all; 172 173 bio_for_each_segment_all(bvec, bio, iter_all) 174 iomap_read_page_end_io(bvec, error); 175 bio_put(bio); 176 } 177 178 struct iomap_readpage_ctx { 179 struct page *cur_page; 180 bool cur_page_in_bio; 181 bool is_readahead; 182 struct bio *bio; 183 struct list_head *pages; 184 }; 185 186 static void 187 iomap_read_inline_data(struct inode *inode, struct page *page, 188 struct iomap *iomap) 189 { 190 size_t size = i_size_read(inode); 191 void *addr; 192 193 if (PageUptodate(page)) 194 return; 195 196 BUG_ON(page->index); 197 BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); 198 199 addr = kmap_atomic(page); 200 memcpy(addr, iomap->inline_data, size); 201 memset(addr + size, 0, PAGE_SIZE - size); 202 kunmap_atomic(addr); 203 SetPageUptodate(page); 204 } 205 206 static loff_t 207 iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, 208 struct iomap *iomap) 209 { 210 struct iomap_readpage_ctx *ctx = data; 211 struct page *page = ctx->cur_page; 212 struct iomap_page *iop = iomap_page_create(inode, page); 213 bool same_page = false, is_contig = false; 214 loff_t orig_pos = pos; 215 unsigned poff, plen; 216 sector_t sector; 217 218 if (iomap->type == IOMAP_INLINE) { 219 WARN_ON_ONCE(pos); 220 iomap_read_inline_data(inode, page, iomap); 221 return PAGE_SIZE; 222 } 223 224 /* zero post-eof blocks as the page may be mapped */ 225 iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); 226 if (plen == 0) 227 goto done; 228 229 if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) { 230 zero_user(page, poff, plen); 231 iomap_set_range_uptodate(page, poff, plen); 232 goto done; 233 } 234 235 ctx->cur_page_in_bio = true; 236 237 /* 238 * Try to merge into a previous segment if we can. 239 */ 240 sector = iomap_sector(iomap, pos); 241 if (ctx->bio && bio_end_sector(ctx->bio) == sector) 242 is_contig = true; 243 244 if (is_contig && 245 __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) { 246 if (!same_page && iop) 247 atomic_inc(&iop->read_count); 248 goto done; 249 } 250 251 /* 252 * If we start a new segment we need to increase the read count, and we 253 * need to do so before submitting any previous full bio to make sure 254 * that we don't prematurely unlock the page. 255 */ 256 if (iop) 257 atomic_inc(&iop->read_count); 258 259 if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { 260 gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); 261 int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; 262 263 if (ctx->bio) 264 submit_bio(ctx->bio); 265 266 if (ctx->is_readahead) /* same as readahead_gfp_mask */ 267 gfp |= __GFP_NORETRY | __GFP_NOWARN; 268 ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); 269 ctx->bio->bi_opf = REQ_OP_READ; 270 if (ctx->is_readahead) 271 ctx->bio->bi_opf |= REQ_RAHEAD; 272 ctx->bio->bi_iter.bi_sector = sector; 273 bio_set_dev(ctx->bio, iomap->bdev); 274 ctx->bio->bi_end_io = iomap_read_end_io; 275 } 276 277 bio_add_page(ctx->bio, page, plen, poff); 278 done: 279 /* 280 * Move the caller beyond our range so that it keeps making progress. 281 * For that we have to include any leading non-uptodate ranges, but 282 * we can skip trailing ones as they will be handled in the next 283 * iteration. 284 */ 285 return pos - orig_pos + plen; 286 } 287 288 int 289 iomap_readpage(struct page *page, const struct iomap_ops *ops) 290 { 291 struct iomap_readpage_ctx ctx = { .cur_page = page }; 292 struct inode *inode = page->mapping->host; 293 unsigned poff; 294 loff_t ret; 295 296 for (poff = 0; poff < PAGE_SIZE; poff += ret) { 297 ret = iomap_apply(inode, page_offset(page) + poff, 298 PAGE_SIZE - poff, 0, ops, &ctx, 299 iomap_readpage_actor); 300 if (ret <= 0) { 301 WARN_ON_ONCE(ret == 0); 302 SetPageError(page); 303 break; 304 } 305 } 306 307 if (ctx.bio) { 308 submit_bio(ctx.bio); 309 WARN_ON_ONCE(!ctx.cur_page_in_bio); 310 } else { 311 WARN_ON_ONCE(ctx.cur_page_in_bio); 312 unlock_page(page); 313 } 314 315 /* 316 * Just like mpage_readpages and block_read_full_page we always 317 * return 0 and just mark the page as PageError on errors. This 318 * should be cleaned up all through the stack eventually. 319 */ 320 return 0; 321 } 322 EXPORT_SYMBOL_GPL(iomap_readpage); 323 324 static struct page * 325 iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, 326 loff_t length, loff_t *done) 327 { 328 while (!list_empty(pages)) { 329 struct page *page = lru_to_page(pages); 330 331 if (page_offset(page) >= (u64)pos + length) 332 break; 333 334 list_del(&page->lru); 335 if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, 336 GFP_NOFS)) 337 return page; 338 339 /* 340 * If we already have a page in the page cache at index we are 341 * done. Upper layers don't care if it is uptodate after the 342 * readpages call itself as every page gets checked again once 343 * actually needed. 344 */ 345 *done += PAGE_SIZE; 346 put_page(page); 347 } 348 349 return NULL; 350 } 351 352 static loff_t 353 iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, 354 void *data, struct iomap *iomap) 355 { 356 struct iomap_readpage_ctx *ctx = data; 357 loff_t done, ret; 358 359 for (done = 0; done < length; done += ret) { 360 if (ctx->cur_page && offset_in_page(pos + done) == 0) { 361 if (!ctx->cur_page_in_bio) 362 unlock_page(ctx->cur_page); 363 put_page(ctx->cur_page); 364 ctx->cur_page = NULL; 365 } 366 if (!ctx->cur_page) { 367 ctx->cur_page = iomap_next_page(inode, ctx->pages, 368 pos, length, &done); 369 if (!ctx->cur_page) 370 break; 371 ctx->cur_page_in_bio = false; 372 } 373 ret = iomap_readpage_actor(inode, pos + done, length - done, 374 ctx, iomap); 375 } 376 377 return done; 378 } 379 380 int 381 iomap_readpages(struct address_space *mapping, struct list_head *pages, 382 unsigned nr_pages, const struct iomap_ops *ops) 383 { 384 struct iomap_readpage_ctx ctx = { 385 .pages = pages, 386 .is_readahead = true, 387 }; 388 loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); 389 loff_t last = page_offset(list_entry(pages->next, struct page, lru)); 390 loff_t length = last - pos + PAGE_SIZE, ret = 0; 391 392 while (length > 0) { 393 ret = iomap_apply(mapping->host, pos, length, 0, ops, 394 &ctx, iomap_readpages_actor); 395 if (ret <= 0) { 396 WARN_ON_ONCE(ret == 0); 397 goto done; 398 } 399 pos += ret; 400 length -= ret; 401 } 402 ret = 0; 403 done: 404 if (ctx.bio) 405 submit_bio(ctx.bio); 406 if (ctx.cur_page) { 407 if (!ctx.cur_page_in_bio) 408 unlock_page(ctx.cur_page); 409 put_page(ctx.cur_page); 410 } 411 412 /* 413 * Check that we didn't lose a page due to the arcance calling 414 * conventions.. 415 */ 416 WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); 417 return ret; 418 } 419 EXPORT_SYMBOL_GPL(iomap_readpages); 420 421 /* 422 * iomap_is_partially_uptodate checks whether blocks within a page are 423 * uptodate or not. 424 * 425 * Returns true if all blocks which correspond to a file portion 426 * we want to read within the page are uptodate. 427 */ 428 int 429 iomap_is_partially_uptodate(struct page *page, unsigned long from, 430 unsigned long count) 431 { 432 struct iomap_page *iop = to_iomap_page(page); 433 struct inode *inode = page->mapping->host; 434 unsigned len, first, last; 435 unsigned i; 436 437 /* Limit range to one page */ 438 len = min_t(unsigned, PAGE_SIZE - from, count); 439 440 /* First and last blocks in range within page */ 441 first = from >> inode->i_blkbits; 442 last = (from + len - 1) >> inode->i_blkbits; 443 444 if (iop) { 445 for (i = first; i <= last; i++) 446 if (!test_bit(i, iop->uptodate)) 447 return 0; 448 return 1; 449 } 450 451 return 0; 452 } 453 EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); 454 455 int 456 iomap_releasepage(struct page *page, gfp_t gfp_mask) 457 { 458 /* 459 * mm accommodates an old ext3 case where clean pages might not have had 460 * the dirty bit cleared. Thus, it can send actual dirty pages to 461 * ->releasepage() via shrink_active_list(), skip those here. 462 */ 463 if (PageDirty(page) || PageWriteback(page)) 464 return 0; 465 iomap_page_release(page); 466 return 1; 467 } 468 EXPORT_SYMBOL_GPL(iomap_releasepage); 469 470 void 471 iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) 472 { 473 /* 474 * If we are invalidating the entire page, clear the dirty state from it 475 * and release it to avoid unnecessary buildup of the LRU. 476 */ 477 if (offset == 0 && len == PAGE_SIZE) { 478 WARN_ON_ONCE(PageWriteback(page)); 479 cancel_dirty_page(page); 480 iomap_page_release(page); 481 } 482 } 483 EXPORT_SYMBOL_GPL(iomap_invalidatepage); 484 485 #ifdef CONFIG_MIGRATION 486 int 487 iomap_migrate_page(struct address_space *mapping, struct page *newpage, 488 struct page *page, enum migrate_mode mode) 489 { 490 int ret; 491 492 ret = migrate_page_move_mapping(mapping, newpage, page, 0); 493 if (ret != MIGRATEPAGE_SUCCESS) 494 return ret; 495 496 if (page_has_private(page)) { 497 ClearPagePrivate(page); 498 get_page(newpage); 499 set_page_private(newpage, page_private(page)); 500 set_page_private(page, 0); 501 put_page(page); 502 SetPagePrivate(newpage); 503 } 504 505 if (mode != MIGRATE_SYNC_NO_COPY) 506 migrate_page_copy(newpage, page); 507 else 508 migrate_page_states(newpage, page); 509 return MIGRATEPAGE_SUCCESS; 510 } 511 EXPORT_SYMBOL_GPL(iomap_migrate_page); 512 #endif /* CONFIG_MIGRATION */ 513 514 static void 515 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) 516 { 517 loff_t i_size = i_size_read(inode); 518 519 /* 520 * Only truncate newly allocated pages beyoned EOF, even if the 521 * write started inside the existing inode size. 522 */ 523 if (pos + len > i_size) 524 truncate_pagecache_range(inode, max(pos, i_size), pos + len); 525 } 526 527 static int 528 iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page, 529 unsigned poff, unsigned plen, unsigned from, unsigned to, 530 struct iomap *iomap) 531 { 532 struct bio_vec bvec; 533 struct bio bio; 534 535 if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) { 536 zero_user_segments(page, poff, from, to, poff + plen); 537 iomap_set_range_uptodate(page, poff, plen); 538 return 0; 539 } 540 541 bio_init(&bio, &bvec, 1); 542 bio.bi_opf = REQ_OP_READ; 543 bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); 544 bio_set_dev(&bio, iomap->bdev); 545 __bio_add_page(&bio, page, plen, poff); 546 return submit_bio_wait(&bio); 547 } 548 549 static int 550 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, 551 struct page *page, struct iomap *iomap) 552 { 553 struct iomap_page *iop = iomap_page_create(inode, page); 554 loff_t block_size = i_blocksize(inode); 555 loff_t block_start = pos & ~(block_size - 1); 556 loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1); 557 unsigned from = offset_in_page(pos), to = from + len, poff, plen; 558 int status = 0; 559 560 if (PageUptodate(page)) 561 return 0; 562 563 do { 564 iomap_adjust_read_range(inode, iop, &block_start, 565 block_end - block_start, &poff, &plen); 566 if (plen == 0) 567 break; 568 569 if ((from > poff && from < poff + plen) || 570 (to > poff && to < poff + plen)) { 571 status = iomap_read_page_sync(inode, block_start, page, 572 poff, plen, from, to, iomap); 573 if (status) 574 break; 575 } 576 577 } while ((block_start += plen) < block_end); 578 579 return status; 580 } 581 582 static int 583 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, 584 struct page **pagep, struct iomap *iomap) 585 { 586 const struct iomap_page_ops *page_ops = iomap->page_ops; 587 pgoff_t index = pos >> PAGE_SHIFT; 588 struct page *page; 589 int status = 0; 590 591 BUG_ON(pos + len > iomap->offset + iomap->length); 592 593 if (fatal_signal_pending(current)) 594 return -EINTR; 595 596 if (page_ops && page_ops->page_prepare) { 597 status = page_ops->page_prepare(inode, pos, len, iomap); 598 if (status) 599 return status; 600 } 601 602 page = grab_cache_page_write_begin(inode->i_mapping, index, flags); 603 if (!page) { 604 status = -ENOMEM; 605 goto out_no_page; 606 } 607 608 if (iomap->type == IOMAP_INLINE) 609 iomap_read_inline_data(inode, page, iomap); 610 else if (iomap->flags & IOMAP_F_BUFFER_HEAD) 611 status = __block_write_begin_int(page, pos, len, NULL, iomap); 612 else 613 status = __iomap_write_begin(inode, pos, len, page, iomap); 614 615 if (unlikely(status)) 616 goto out_unlock; 617 618 *pagep = page; 619 return 0; 620 621 out_unlock: 622 unlock_page(page); 623 put_page(page); 624 iomap_write_failed(inode, pos, len); 625 626 out_no_page: 627 if (page_ops && page_ops->page_done) 628 page_ops->page_done(inode, pos, 0, NULL, iomap); 629 return status; 630 } 631 632 int 633 iomap_set_page_dirty(struct page *page) 634 { 635 struct address_space *mapping = page_mapping(page); 636 int newly_dirty; 637 638 if (unlikely(!mapping)) 639 return !TestSetPageDirty(page); 640 641 /* 642 * Lock out page->mem_cgroup migration to keep PageDirty 643 * synchronized with per-memcg dirty page counters. 644 */ 645 lock_page_memcg(page); 646 newly_dirty = !TestSetPageDirty(page); 647 if (newly_dirty) 648 __set_page_dirty(page, mapping, 0); 649 unlock_page_memcg(page); 650 651 if (newly_dirty) 652 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 653 return newly_dirty; 654 } 655 EXPORT_SYMBOL_GPL(iomap_set_page_dirty); 656 657 static int 658 __iomap_write_end(struct inode *inode, loff_t pos, unsigned len, 659 unsigned copied, struct page *page, struct iomap *iomap) 660 { 661 flush_dcache_page(page); 662 663 /* 664 * The blocks that were entirely written will now be uptodate, so we 665 * don't have to worry about a readpage reading them and overwriting a 666 * partial write. However if we have encountered a short write and only 667 * partially written into a block, it will not be marked uptodate, so a 668 * readpage might come in and destroy our partial write. 669 * 670 * Do the simplest thing, and just treat any short write to a non 671 * uptodate page as a zero-length write, and force the caller to redo 672 * the whole thing. 673 */ 674 if (unlikely(copied < len && !PageUptodate(page))) 675 return 0; 676 iomap_set_range_uptodate(page, offset_in_page(pos), len); 677 iomap_set_page_dirty(page); 678 return copied; 679 } 680 681 static int 682 iomap_write_end_inline(struct inode *inode, struct page *page, 683 struct iomap *iomap, loff_t pos, unsigned copied) 684 { 685 void *addr; 686 687 WARN_ON_ONCE(!PageUptodate(page)); 688 BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); 689 690 addr = kmap_atomic(page); 691 memcpy(iomap->inline_data + pos, addr + pos, copied); 692 kunmap_atomic(addr); 693 694 mark_inode_dirty(inode); 695 return copied; 696 } 697 698 static int 699 iomap_write_end(struct inode *inode, loff_t pos, unsigned len, 700 unsigned copied, struct page *page, struct iomap *iomap) 701 { 702 const struct iomap_page_ops *page_ops = iomap->page_ops; 703 loff_t old_size = inode->i_size; 704 int ret; 705 706 if (iomap->type == IOMAP_INLINE) { 707 ret = iomap_write_end_inline(inode, page, iomap, pos, copied); 708 } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) { 709 ret = block_write_end(NULL, inode->i_mapping, pos, len, copied, 710 page, NULL); 711 } else { 712 ret = __iomap_write_end(inode, pos, len, copied, page, iomap); 713 } 714 715 /* 716 * Update the in-memory inode size after copying the data into the page 717 * cache. It's up to the file system to write the updated size to disk, 718 * preferably after I/O completion so that no stale data is exposed. 719 */ 720 if (pos + ret > old_size) { 721 i_size_write(inode, pos + ret); 722 iomap->flags |= IOMAP_F_SIZE_CHANGED; 723 } 724 unlock_page(page); 725 726 if (old_size < pos) 727 pagecache_isize_extended(inode, old_size, pos); 728 if (page_ops && page_ops->page_done) 729 page_ops->page_done(inode, pos, ret, page, iomap); 730 put_page(page); 731 732 if (ret < len) 733 iomap_write_failed(inode, pos, len); 734 return ret; 735 } 736 737 static loff_t 738 iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, 739 struct iomap *iomap) 740 { 741 struct iov_iter *i = data; 742 long status = 0; 743 ssize_t written = 0; 744 unsigned int flags = AOP_FLAG_NOFS; 745 746 do { 747 struct page *page; 748 unsigned long offset; /* Offset into pagecache page */ 749 unsigned long bytes; /* Bytes to write to page */ 750 size_t copied; /* Bytes copied from user */ 751 752 offset = offset_in_page(pos); 753 bytes = min_t(unsigned long, PAGE_SIZE - offset, 754 iov_iter_count(i)); 755 again: 756 if (bytes > length) 757 bytes = length; 758 759 /* 760 * Bring in the user page that we will copy from _first_. 761 * Otherwise there's a nasty deadlock on copying from the 762 * same page as we're writing to, without it being marked 763 * up-to-date. 764 * 765 * Not only is this an optimisation, but it is also required 766 * to check that the address is actually valid, when atomic 767 * usercopies are used, below. 768 */ 769 if (unlikely(iov_iter_fault_in_readable(i, bytes))) { 770 status = -EFAULT; 771 break; 772 } 773 774 status = iomap_write_begin(inode, pos, bytes, flags, &page, 775 iomap); 776 if (unlikely(status)) 777 break; 778 779 if (mapping_writably_mapped(inode->i_mapping)) 780 flush_dcache_page(page); 781 782 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 783 784 flush_dcache_page(page); 785 786 status = iomap_write_end(inode, pos, bytes, copied, page, 787 iomap); 788 if (unlikely(status < 0)) 789 break; 790 copied = status; 791 792 cond_resched(); 793 794 iov_iter_advance(i, copied); 795 if (unlikely(copied == 0)) { 796 /* 797 * If we were unable to copy any data at all, we must 798 * fall back to a single segment length write. 799 * 800 * If we didn't fallback here, we could livelock 801 * because not all segments in the iov can be copied at 802 * once without a pagefault. 803 */ 804 bytes = min_t(unsigned long, PAGE_SIZE - offset, 805 iov_iter_single_seg_count(i)); 806 goto again; 807 } 808 pos += copied; 809 written += copied; 810 length -= copied; 811 812 balance_dirty_pages_ratelimited(inode->i_mapping); 813 } while (iov_iter_count(i) && length); 814 815 return written ? written : status; 816 } 817 818 ssize_t 819 iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, 820 const struct iomap_ops *ops) 821 { 822 struct inode *inode = iocb->ki_filp->f_mapping->host; 823 loff_t pos = iocb->ki_pos, ret = 0, written = 0; 824 825 while (iov_iter_count(iter)) { 826 ret = iomap_apply(inode, pos, iov_iter_count(iter), 827 IOMAP_WRITE, ops, iter, iomap_write_actor); 828 if (ret <= 0) 829 break; 830 pos += ret; 831 written += ret; 832 } 833 834 return written ? written : ret; 835 } 836 EXPORT_SYMBOL_GPL(iomap_file_buffered_write); 837 838 static struct page * 839 __iomap_read_page(struct inode *inode, loff_t offset) 840 { 841 struct address_space *mapping = inode->i_mapping; 842 struct page *page; 843 844 page = read_mapping_page(mapping, offset >> PAGE_SHIFT, NULL); 845 if (IS_ERR(page)) 846 return page; 847 if (!PageUptodate(page)) { 848 put_page(page); 849 return ERR_PTR(-EIO); 850 } 851 return page; 852 } 853 854 static loff_t 855 iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, 856 struct iomap *iomap) 857 { 858 long status = 0; 859 ssize_t written = 0; 860 861 do { 862 struct page *page, *rpage; 863 unsigned long offset; /* Offset into pagecache page */ 864 unsigned long bytes; /* Bytes to write to page */ 865 866 offset = offset_in_page(pos); 867 bytes = min_t(loff_t, PAGE_SIZE - offset, length); 868 869 rpage = __iomap_read_page(inode, pos); 870 if (IS_ERR(rpage)) 871 return PTR_ERR(rpage); 872 873 status = iomap_write_begin(inode, pos, bytes, 874 AOP_FLAG_NOFS, &page, iomap); 875 put_page(rpage); 876 if (unlikely(status)) 877 return status; 878 879 WARN_ON_ONCE(!PageUptodate(page)); 880 881 status = iomap_write_end(inode, pos, bytes, bytes, page, iomap); 882 if (unlikely(status <= 0)) { 883 if (WARN_ON_ONCE(status == 0)) 884 return -EIO; 885 return status; 886 } 887 888 cond_resched(); 889 890 pos += status; 891 written += status; 892 length -= status; 893 894 balance_dirty_pages_ratelimited(inode->i_mapping); 895 } while (length); 896 897 return written; 898 } 899 900 int 901 iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, 902 const struct iomap_ops *ops) 903 { 904 loff_t ret; 905 906 while (len) { 907 ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL, 908 iomap_dirty_actor); 909 if (ret <= 0) 910 return ret; 911 pos += ret; 912 len -= ret; 913 } 914 915 return 0; 916 } 917 EXPORT_SYMBOL_GPL(iomap_file_dirty); 918 919 static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, 920 unsigned bytes, struct iomap *iomap) 921 { 922 struct page *page; 923 int status; 924 925 status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page, 926 iomap); 927 if (status) 928 return status; 929 930 zero_user(page, offset, bytes); 931 mark_page_accessed(page); 932 933 return iomap_write_end(inode, pos, bytes, bytes, page, iomap); 934 } 935 936 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, 937 struct iomap *iomap) 938 { 939 return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, 940 iomap_sector(iomap, pos & PAGE_MASK), offset, bytes); 941 } 942 943 static loff_t 944 iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, 945 void *data, struct iomap *iomap) 946 { 947 bool *did_zero = data; 948 loff_t written = 0; 949 int status; 950 951 /* already zeroed? we're done. */ 952 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) 953 return count; 954 955 do { 956 unsigned offset, bytes; 957 958 offset = offset_in_page(pos); 959 bytes = min_t(loff_t, PAGE_SIZE - offset, count); 960 961 if (IS_DAX(inode)) 962 status = iomap_dax_zero(pos, offset, bytes, iomap); 963 else 964 status = iomap_zero(inode, pos, offset, bytes, iomap); 965 if (status < 0) 966 return status; 967 968 pos += bytes; 969 count -= bytes; 970 written += bytes; 971 if (did_zero) 972 *did_zero = true; 973 } while (count > 0); 974 975 return written; 976 } 977 978 int 979 iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, 980 const struct iomap_ops *ops) 981 { 982 loff_t ret; 983 984 while (len > 0) { 985 ret = iomap_apply(inode, pos, len, IOMAP_ZERO, 986 ops, did_zero, iomap_zero_range_actor); 987 if (ret <= 0) 988 return ret; 989 990 pos += ret; 991 len -= ret; 992 } 993 994 return 0; 995 } 996 EXPORT_SYMBOL_GPL(iomap_zero_range); 997 998 int 999 iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, 1000 const struct iomap_ops *ops) 1001 { 1002 unsigned int blocksize = i_blocksize(inode); 1003 unsigned int off = pos & (blocksize - 1); 1004 1005 /* Block boundary? Nothing to do */ 1006 if (!off) 1007 return 0; 1008 return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops); 1009 } 1010 EXPORT_SYMBOL_GPL(iomap_truncate_page); 1011 1012 static loff_t 1013 iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, 1014 void *data, struct iomap *iomap) 1015 { 1016 struct page *page = data; 1017 int ret; 1018 1019 if (iomap->flags & IOMAP_F_BUFFER_HEAD) { 1020 ret = __block_write_begin_int(page, pos, length, NULL, iomap); 1021 if (ret) 1022 return ret; 1023 block_commit_write(page, 0, length); 1024 } else { 1025 WARN_ON_ONCE(!PageUptodate(page)); 1026 iomap_page_create(inode, page); 1027 set_page_dirty(page); 1028 } 1029 1030 return length; 1031 } 1032 1033 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) 1034 { 1035 struct page *page = vmf->page; 1036 struct inode *inode = file_inode(vmf->vma->vm_file); 1037 unsigned long length; 1038 loff_t offset, size; 1039 ssize_t ret; 1040 1041 lock_page(page); 1042 size = i_size_read(inode); 1043 if ((page->mapping != inode->i_mapping) || 1044 (page_offset(page) > size)) { 1045 /* We overload EFAULT to mean page got truncated */ 1046 ret = -EFAULT; 1047 goto out_unlock; 1048 } 1049 1050 /* page is wholly or partially inside EOF */ 1051 if (((page->index + 1) << PAGE_SHIFT) > size) 1052 length = offset_in_page(size); 1053 else 1054 length = PAGE_SIZE; 1055 1056 offset = page_offset(page); 1057 while (length > 0) { 1058 ret = iomap_apply(inode, offset, length, 1059 IOMAP_WRITE | IOMAP_FAULT, ops, page, 1060 iomap_page_mkwrite_actor); 1061 if (unlikely(ret <= 0)) 1062 goto out_unlock; 1063 offset += ret; 1064 length -= ret; 1065 } 1066 1067 wait_for_stable_page(page); 1068 return VM_FAULT_LOCKED; 1069 out_unlock: 1070 unlock_page(page); 1071 return block_page_mkwrite_return(ret); 1072 } 1073 EXPORT_SYMBOL_GPL(iomap_page_mkwrite); 1074