1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2010 Red Hat, Inc. 4 * Copyright (C) 2016-2023 Christoph Hellwig. 5 */ 6 #include <linux/iomap.h> 7 #include <linux/buffer_head.h> 8 #include <linux/writeback.h> 9 #include <linux/swap.h> 10 #include <linux/migrate.h> 11 #include "internal.h" 12 #include "trace.h" 13 14 #include "../internal.h" 15 16 /* 17 * Structure allocated for each folio to track per-block uptodate, dirty state 18 * and I/O completions. 19 */ 20 struct iomap_folio_state { 21 spinlock_t state_lock; 22 unsigned int read_bytes_pending; 23 atomic_t write_bytes_pending; 24 25 /* 26 * Each block has two bits in this bitmap: 27 * Bits [0..blocks_per_folio) has the uptodate status. 28 * Bits [b_p_f...(2*b_p_f)) has the dirty status. 29 */ 30 unsigned long state[]; 31 }; 32 33 static inline bool ifs_is_fully_uptodate(struct folio *folio, 34 struct iomap_folio_state *ifs) 35 { 36 struct inode *inode = folio->mapping->host; 37 38 return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)); 39 } 40 41 static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs, 42 unsigned int block) 43 { 44 return test_bit(block, ifs->state); 45 } 46 47 static bool ifs_set_range_uptodate(struct folio *folio, 48 struct iomap_folio_state *ifs, size_t off, size_t len) 49 { 50 struct inode *inode = folio->mapping->host; 51 unsigned int first_blk = off >> inode->i_blkbits; 52 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 53 unsigned int nr_blks = last_blk - first_blk + 1; 54 55 bitmap_set(ifs->state, first_blk, nr_blks); 56 return ifs_is_fully_uptodate(folio, ifs); 57 } 58 59 static void iomap_set_range_uptodate(struct folio *folio, size_t off, 60 size_t len) 61 { 62 struct iomap_folio_state *ifs = folio->private; 63 unsigned long flags; 64 bool uptodate = true; 65 66 if (ifs) { 67 spin_lock_irqsave(&ifs->state_lock, flags); 68 uptodate = ifs_set_range_uptodate(folio, ifs, off, len); 69 spin_unlock_irqrestore(&ifs->state_lock, flags); 70 } 71 72 if (uptodate) 73 folio_mark_uptodate(folio); 74 } 75 76 static inline bool ifs_block_is_dirty(struct folio *folio, 77 struct iomap_folio_state *ifs, int block) 78 { 79 struct inode *inode = folio->mapping->host; 80 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 81 82 return test_bit(block + blks_per_folio, ifs->state); 83 } 84 85 static unsigned ifs_find_dirty_range(struct folio *folio, 86 struct iomap_folio_state *ifs, u64 *range_start, u64 range_end) 87 { 88 struct inode *inode = folio->mapping->host; 89 unsigned start_blk = 90 offset_in_folio(folio, *range_start) >> inode->i_blkbits; 91 unsigned end_blk = min_not_zero( 92 offset_in_folio(folio, range_end) >> inode->i_blkbits, 93 i_blocks_per_folio(inode, folio)); 94 unsigned nblks = 1; 95 96 while (!ifs_block_is_dirty(folio, ifs, start_blk)) 97 if (++start_blk == end_blk) 98 return 0; 99 100 while (start_blk + nblks < end_blk) { 101 if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks)) 102 break; 103 nblks++; 104 } 105 106 *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); 107 return nblks << inode->i_blkbits; 108 } 109 110 static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start, 111 u64 range_end) 112 { 113 struct iomap_folio_state *ifs = folio->private; 114 115 if (*range_start >= range_end) 116 return 0; 117 118 if (ifs) 119 return ifs_find_dirty_range(folio, ifs, range_start, range_end); 120 return range_end - *range_start; 121 } 122 123 static void ifs_clear_range_dirty(struct folio *folio, 124 struct iomap_folio_state *ifs, size_t off, size_t len) 125 { 126 struct inode *inode = folio->mapping->host; 127 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 128 unsigned int first_blk = (off >> inode->i_blkbits); 129 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 130 unsigned int nr_blks = last_blk - first_blk + 1; 131 unsigned long flags; 132 133 spin_lock_irqsave(&ifs->state_lock, flags); 134 bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks); 135 spin_unlock_irqrestore(&ifs->state_lock, flags); 136 } 137 138 static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len) 139 { 140 struct iomap_folio_state *ifs = folio->private; 141 142 if (ifs) 143 ifs_clear_range_dirty(folio, ifs, off, len); 144 } 145 146 static void ifs_set_range_dirty(struct folio *folio, 147 struct iomap_folio_state *ifs, size_t off, size_t len) 148 { 149 struct inode *inode = folio->mapping->host; 150 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 151 unsigned int first_blk = (off >> inode->i_blkbits); 152 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 153 unsigned int nr_blks = last_blk - first_blk + 1; 154 unsigned long flags; 155 156 spin_lock_irqsave(&ifs->state_lock, flags); 157 bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks); 158 spin_unlock_irqrestore(&ifs->state_lock, flags); 159 } 160 161 static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len) 162 { 163 struct iomap_folio_state *ifs = folio->private; 164 165 if (ifs) 166 ifs_set_range_dirty(folio, ifs, off, len); 167 } 168 169 static struct iomap_folio_state *ifs_alloc(struct inode *inode, 170 struct folio *folio, unsigned int flags) 171 { 172 struct iomap_folio_state *ifs = folio->private; 173 unsigned int nr_blocks = i_blocks_per_folio(inode, folio); 174 gfp_t gfp; 175 176 if (ifs || nr_blocks <= 1) 177 return ifs; 178 179 if (flags & IOMAP_NOWAIT) 180 gfp = GFP_NOWAIT; 181 else 182 gfp = GFP_NOFS | __GFP_NOFAIL; 183 184 /* 185 * ifs->state tracks two sets of state flags when the 186 * filesystem block size is smaller than the folio size. 187 * The first state tracks per-block uptodate and the 188 * second tracks per-block dirty state. 189 */ 190 ifs = kzalloc(struct_size(ifs, state, 191 BITS_TO_LONGS(2 * nr_blocks)), gfp); 192 if (!ifs) 193 return ifs; 194 195 spin_lock_init(&ifs->state_lock); 196 if (folio_test_uptodate(folio)) 197 bitmap_set(ifs->state, 0, nr_blocks); 198 if (folio_test_dirty(folio)) 199 bitmap_set(ifs->state, nr_blocks, nr_blocks); 200 folio_attach_private(folio, ifs); 201 202 return ifs; 203 } 204 205 static void ifs_free(struct folio *folio) 206 { 207 struct iomap_folio_state *ifs = folio_detach_private(folio); 208 209 if (!ifs) 210 return; 211 WARN_ON_ONCE(ifs->read_bytes_pending != 0); 212 WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending)); 213 WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) != 214 folio_test_uptodate(folio)); 215 kfree(ifs); 216 } 217 218 /* 219 * Calculate the range inside the folio that we actually need to read. 220 */ 221 static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, 222 loff_t *pos, loff_t length, size_t *offp, size_t *lenp) 223 { 224 struct iomap_folio_state *ifs = folio->private; 225 loff_t orig_pos = *pos; 226 loff_t isize = i_size_read(inode); 227 unsigned block_bits = inode->i_blkbits; 228 unsigned block_size = (1 << block_bits); 229 size_t poff = offset_in_folio(folio, *pos); 230 size_t plen = min_t(loff_t, folio_size(folio) - poff, length); 231 size_t orig_plen = plen; 232 unsigned first = poff >> block_bits; 233 unsigned last = (poff + plen - 1) >> block_bits; 234 235 /* 236 * If the block size is smaller than the page size, we need to check the 237 * per-block uptodate status and adjust the offset and length if needed 238 * to avoid reading in already uptodate ranges. 239 */ 240 if (ifs) { 241 unsigned int i; 242 243 /* move forward for each leading block marked uptodate */ 244 for (i = first; i <= last; i++) { 245 if (!ifs_block_is_uptodate(ifs, i)) 246 break; 247 *pos += block_size; 248 poff += block_size; 249 plen -= block_size; 250 first++; 251 } 252 253 /* truncate len if we find any trailing uptodate block(s) */ 254 while (++i <= last) { 255 if (ifs_block_is_uptodate(ifs, i)) { 256 plen -= (last - i + 1) * block_size; 257 last = i - 1; 258 break; 259 } 260 } 261 } 262 263 /* 264 * If the extent spans the block that contains the i_size, we need to 265 * handle both halves separately so that we properly zero data in the 266 * page cache for blocks that are entirely outside of i_size. 267 */ 268 if (orig_pos <= isize && orig_pos + orig_plen > isize) { 269 unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; 270 271 if (first <= end && last > end) 272 plen -= (last - end) * block_size; 273 } 274 275 *offp = poff; 276 *lenp = plen; 277 } 278 279 static void iomap_finish_folio_read(struct folio *folio, size_t off, 280 size_t len, int error) 281 { 282 struct iomap_folio_state *ifs = folio->private; 283 bool uptodate = !error; 284 bool finished = true; 285 286 if (ifs) { 287 unsigned long flags; 288 289 spin_lock_irqsave(&ifs->state_lock, flags); 290 if (!error) 291 uptodate = ifs_set_range_uptodate(folio, ifs, off, len); 292 ifs->read_bytes_pending -= len; 293 finished = !ifs->read_bytes_pending; 294 spin_unlock_irqrestore(&ifs->state_lock, flags); 295 } 296 297 if (finished) 298 folio_end_read(folio, uptodate); 299 } 300 301 static void iomap_read_end_io(struct bio *bio) 302 { 303 int error = blk_status_to_errno(bio->bi_status); 304 struct folio_iter fi; 305 306 bio_for_each_folio_all(fi, bio) 307 iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); 308 bio_put(bio); 309 } 310 311 struct iomap_readpage_ctx { 312 struct folio *cur_folio; 313 bool cur_folio_in_bio; 314 struct bio *bio; 315 struct readahead_control *rac; 316 }; 317 318 /** 319 * iomap_read_inline_data - copy inline data into the page cache 320 * @iter: iteration structure 321 * @folio: folio to copy to 322 * 323 * Copy the inline data in @iter into @folio and zero out the rest of the folio. 324 * Only a single IOMAP_INLINE extent is allowed at the end of each file. 325 * Returns zero for success to complete the read, or the usual negative errno. 326 */ 327 static int iomap_read_inline_data(const struct iomap_iter *iter, 328 struct folio *folio) 329 { 330 const struct iomap *iomap = iomap_iter_srcmap(iter); 331 size_t size = i_size_read(iter->inode) - iomap->offset; 332 size_t offset = offset_in_folio(folio, iomap->offset); 333 334 if (folio_test_uptodate(folio)) 335 return 0; 336 337 if (WARN_ON_ONCE(size > iomap->length)) 338 return -EIO; 339 if (offset > 0) 340 ifs_alloc(iter->inode, folio, iter->flags); 341 342 folio_fill_tail(folio, offset, iomap->inline_data, size); 343 iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); 344 return 0; 345 } 346 347 static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, 348 loff_t pos) 349 { 350 const struct iomap *srcmap = iomap_iter_srcmap(iter); 351 352 return srcmap->type != IOMAP_MAPPED || 353 (srcmap->flags & IOMAP_F_NEW) || 354 pos >= i_size_read(iter->inode); 355 } 356 357 static int iomap_readpage_iter(struct iomap_iter *iter, 358 struct iomap_readpage_ctx *ctx) 359 { 360 const struct iomap *iomap = &iter->iomap; 361 loff_t pos = iter->pos; 362 loff_t length = iomap_length(iter); 363 struct folio *folio = ctx->cur_folio; 364 struct iomap_folio_state *ifs; 365 size_t poff, plen; 366 sector_t sector; 367 int ret; 368 369 if (iomap->type == IOMAP_INLINE) { 370 ret = iomap_read_inline_data(iter, folio); 371 if (ret) 372 return ret; 373 return iomap_iter_advance(iter, &length); 374 } 375 376 /* zero post-eof blocks as the page may be mapped */ 377 ifs = ifs_alloc(iter->inode, folio, iter->flags); 378 iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); 379 if (plen == 0) 380 goto done; 381 382 if (iomap_block_needs_zeroing(iter, pos)) { 383 folio_zero_range(folio, poff, plen); 384 iomap_set_range_uptodate(folio, poff, plen); 385 goto done; 386 } 387 388 ctx->cur_folio_in_bio = true; 389 if (ifs) { 390 spin_lock_irq(&ifs->state_lock); 391 ifs->read_bytes_pending += plen; 392 spin_unlock_irq(&ifs->state_lock); 393 } 394 395 sector = iomap_sector(iomap, pos); 396 if (!ctx->bio || 397 bio_end_sector(ctx->bio) != sector || 398 !bio_add_folio(ctx->bio, folio, plen, poff)) { 399 gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); 400 gfp_t orig_gfp = gfp; 401 unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); 402 403 if (ctx->bio) 404 submit_bio(ctx->bio); 405 406 if (ctx->rac) /* same as readahead_gfp_mask */ 407 gfp |= __GFP_NORETRY | __GFP_NOWARN; 408 ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), 409 REQ_OP_READ, gfp); 410 /* 411 * If the bio_alloc fails, try it again for a single page to 412 * avoid having to deal with partial page reads. This emulates 413 * what do_mpage_read_folio does. 414 */ 415 if (!ctx->bio) { 416 ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, 417 orig_gfp); 418 } 419 if (ctx->rac) 420 ctx->bio->bi_opf |= REQ_RAHEAD; 421 ctx->bio->bi_iter.bi_sector = sector; 422 ctx->bio->bi_end_io = iomap_read_end_io; 423 bio_add_folio_nofail(ctx->bio, folio, plen, poff); 424 } 425 426 done: 427 /* 428 * Move the caller beyond our range so that it keeps making progress. 429 * For that, we have to include any leading non-uptodate ranges, but 430 * we can skip trailing ones as they will be handled in the next 431 * iteration. 432 */ 433 length = pos - iter->pos + plen; 434 return iomap_iter_advance(iter, &length); 435 } 436 437 static int iomap_read_folio_iter(struct iomap_iter *iter, 438 struct iomap_readpage_ctx *ctx) 439 { 440 int ret; 441 442 while (iomap_length(iter)) { 443 ret = iomap_readpage_iter(iter, ctx); 444 if (ret) 445 return ret; 446 } 447 448 return 0; 449 } 450 451 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) 452 { 453 struct iomap_iter iter = { 454 .inode = folio->mapping->host, 455 .pos = folio_pos(folio), 456 .len = folio_size(folio), 457 }; 458 struct iomap_readpage_ctx ctx = { 459 .cur_folio = folio, 460 }; 461 int ret; 462 463 trace_iomap_readpage(iter.inode, 1); 464 465 while ((ret = iomap_iter(&iter, ops)) > 0) 466 iter.status = iomap_read_folio_iter(&iter, &ctx); 467 468 if (ctx.bio) { 469 submit_bio(ctx.bio); 470 WARN_ON_ONCE(!ctx.cur_folio_in_bio); 471 } else { 472 WARN_ON_ONCE(ctx.cur_folio_in_bio); 473 folio_unlock(folio); 474 } 475 476 /* 477 * Just like mpage_readahead and block_read_full_folio, we always 478 * return 0 and just set the folio error flag on errors. This 479 * should be cleaned up throughout the stack eventually. 480 */ 481 return 0; 482 } 483 EXPORT_SYMBOL_GPL(iomap_read_folio); 484 485 static int iomap_readahead_iter(struct iomap_iter *iter, 486 struct iomap_readpage_ctx *ctx) 487 { 488 int ret; 489 490 while (iomap_length(iter)) { 491 if (ctx->cur_folio && 492 offset_in_folio(ctx->cur_folio, iter->pos) == 0) { 493 if (!ctx->cur_folio_in_bio) 494 folio_unlock(ctx->cur_folio); 495 ctx->cur_folio = NULL; 496 } 497 if (!ctx->cur_folio) { 498 ctx->cur_folio = readahead_folio(ctx->rac); 499 ctx->cur_folio_in_bio = false; 500 } 501 ret = iomap_readpage_iter(iter, ctx); 502 if (ret) 503 return ret; 504 } 505 506 return 0; 507 } 508 509 /** 510 * iomap_readahead - Attempt to read pages from a file. 511 * @rac: Describes the pages to be read. 512 * @ops: The operations vector for the filesystem. 513 * 514 * This function is for filesystems to call to implement their readahead 515 * address_space operation. 516 * 517 * Context: The @ops callbacks may submit I/O (eg to read the addresses of 518 * blocks from disc), and may wait for it. The caller may be trying to 519 * access a different page, and so sleeping excessively should be avoided. 520 * It may allocate memory, but should avoid costly allocations. This 521 * function is called with memalloc_nofs set, so allocations will not cause 522 * the filesystem to be reentered. 523 */ 524 void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) 525 { 526 struct iomap_iter iter = { 527 .inode = rac->mapping->host, 528 .pos = readahead_pos(rac), 529 .len = readahead_length(rac), 530 }; 531 struct iomap_readpage_ctx ctx = { 532 .rac = rac, 533 }; 534 535 trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); 536 537 while (iomap_iter(&iter, ops) > 0) 538 iter.status = iomap_readahead_iter(&iter, &ctx); 539 540 if (ctx.bio) 541 submit_bio(ctx.bio); 542 if (ctx.cur_folio) { 543 if (!ctx.cur_folio_in_bio) 544 folio_unlock(ctx.cur_folio); 545 } 546 } 547 EXPORT_SYMBOL_GPL(iomap_readahead); 548 549 /* 550 * iomap_is_partially_uptodate checks whether blocks within a folio are 551 * uptodate or not. 552 * 553 * Returns true if all blocks which correspond to the specified part 554 * of the folio are uptodate. 555 */ 556 bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count) 557 { 558 struct iomap_folio_state *ifs = folio->private; 559 struct inode *inode = folio->mapping->host; 560 unsigned first, last, i; 561 562 if (!ifs) 563 return false; 564 565 /* Caller's range may extend past the end of this folio */ 566 count = min(folio_size(folio) - from, count); 567 568 /* First and last blocks in range within folio */ 569 first = from >> inode->i_blkbits; 570 last = (from + count - 1) >> inode->i_blkbits; 571 572 for (i = first; i <= last; i++) 573 if (!ifs_block_is_uptodate(ifs, i)) 574 return false; 575 return true; 576 } 577 EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); 578 579 /** 580 * iomap_get_folio - get a folio reference for writing 581 * @iter: iteration structure 582 * @pos: start offset of write 583 * @len: Suggested size of folio to create. 584 * 585 * Returns a locked reference to the folio at @pos, or an error pointer if the 586 * folio could not be obtained. 587 */ 588 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) 589 { 590 fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; 591 592 if (iter->flags & IOMAP_NOWAIT) 593 fgp |= FGP_NOWAIT; 594 if (iter->flags & IOMAP_DONTCACHE) 595 fgp |= FGP_DONTCACHE; 596 fgp |= fgf_set_order(len); 597 598 return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, 599 fgp, mapping_gfp_mask(iter->inode->i_mapping)); 600 } 601 EXPORT_SYMBOL_GPL(iomap_get_folio); 602 603 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags) 604 { 605 trace_iomap_release_folio(folio->mapping->host, folio_pos(folio), 606 folio_size(folio)); 607 608 /* 609 * If the folio is dirty, we refuse to release our metadata because 610 * it may be partially dirty. Once we track per-block dirty state, 611 * we can release the metadata if every block is dirty. 612 */ 613 if (folio_test_dirty(folio)) 614 return false; 615 ifs_free(folio); 616 return true; 617 } 618 EXPORT_SYMBOL_GPL(iomap_release_folio); 619 620 void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) 621 { 622 trace_iomap_invalidate_folio(folio->mapping->host, 623 folio_pos(folio) + offset, len); 624 625 /* 626 * If we're invalidating the entire folio, clear the dirty state 627 * from it and release it to avoid unnecessary buildup of the LRU. 628 */ 629 if (offset == 0 && len == folio_size(folio)) { 630 WARN_ON_ONCE(folio_test_writeback(folio)); 631 folio_cancel_dirty(folio); 632 ifs_free(folio); 633 } 634 } 635 EXPORT_SYMBOL_GPL(iomap_invalidate_folio); 636 637 bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio) 638 { 639 struct inode *inode = mapping->host; 640 size_t len = folio_size(folio); 641 642 ifs_alloc(inode, folio, 0); 643 iomap_set_range_dirty(folio, 0, len); 644 return filemap_dirty_folio(mapping, folio); 645 } 646 EXPORT_SYMBOL_GPL(iomap_dirty_folio); 647 648 static void 649 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) 650 { 651 loff_t i_size = i_size_read(inode); 652 653 /* 654 * Only truncate newly allocated pages beyoned EOF, even if the 655 * write started inside the existing inode size. 656 */ 657 if (pos + len > i_size) 658 truncate_pagecache_range(inode, max(pos, i_size), 659 pos + len - 1); 660 } 661 662 static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, 663 size_t poff, size_t plen, const struct iomap *iomap) 664 { 665 struct bio_vec bvec; 666 struct bio bio; 667 668 bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ); 669 bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); 670 bio_add_folio_nofail(&bio, folio, plen, poff); 671 return submit_bio_wait(&bio); 672 } 673 674 static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, 675 struct folio *folio) 676 { 677 const struct iomap *srcmap = iomap_iter_srcmap(iter); 678 struct iomap_folio_state *ifs; 679 loff_t pos = iter->pos; 680 loff_t block_size = i_blocksize(iter->inode); 681 loff_t block_start = round_down(pos, block_size); 682 loff_t block_end = round_up(pos + len, block_size); 683 unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio); 684 size_t from = offset_in_folio(folio, pos), to = from + len; 685 size_t poff, plen; 686 687 /* 688 * If the write or zeroing completely overlaps the current folio, then 689 * entire folio will be dirtied so there is no need for 690 * per-block state tracking structures to be attached to this folio. 691 * For the unshare case, we must read in the ondisk contents because we 692 * are not changing pagecache contents. 693 */ 694 if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && 695 pos + len >= folio_pos(folio) + folio_size(folio)) 696 return 0; 697 698 ifs = ifs_alloc(iter->inode, folio, iter->flags); 699 if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1) 700 return -EAGAIN; 701 702 if (folio_test_uptodate(folio)) 703 return 0; 704 705 do { 706 iomap_adjust_read_range(iter->inode, folio, &block_start, 707 block_end - block_start, &poff, &plen); 708 if (plen == 0) 709 break; 710 711 if (!(iter->flags & IOMAP_UNSHARE) && 712 (from <= poff || from >= poff + plen) && 713 (to <= poff || to >= poff + plen)) 714 continue; 715 716 if (iomap_block_needs_zeroing(iter, block_start)) { 717 if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) 718 return -EIO; 719 folio_zero_segments(folio, poff, from, to, poff + plen); 720 } else { 721 int status; 722 723 if (iter->flags & IOMAP_NOWAIT) 724 return -EAGAIN; 725 726 status = iomap_read_folio_sync(block_start, folio, 727 poff, plen, srcmap); 728 if (status) 729 return status; 730 } 731 iomap_set_range_uptodate(folio, poff, plen); 732 } while ((block_start += plen) < block_end); 733 734 return 0; 735 } 736 737 static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len) 738 { 739 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 740 loff_t pos = iter->pos; 741 742 if (!mapping_large_folio_support(iter->inode->i_mapping)) 743 len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); 744 745 if (folio_ops && folio_ops->get_folio) 746 return folio_ops->get_folio(iter, pos, len); 747 else 748 return iomap_get_folio(iter, pos, len); 749 } 750 751 static void __iomap_put_folio(struct iomap_iter *iter, size_t ret, 752 struct folio *folio) 753 { 754 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 755 loff_t pos = iter->pos; 756 757 if (folio_ops && folio_ops->put_folio) { 758 folio_ops->put_folio(iter->inode, pos, ret, folio); 759 } else { 760 folio_unlock(folio); 761 folio_put(folio); 762 } 763 } 764 765 /* trim pos and bytes to within a given folio */ 766 static loff_t iomap_trim_folio_range(struct iomap_iter *iter, 767 struct folio *folio, size_t *offset, u64 *bytes) 768 { 769 loff_t pos = iter->pos; 770 size_t fsize = folio_size(folio); 771 772 WARN_ON_ONCE(pos < folio_pos(folio)); 773 WARN_ON_ONCE(pos >= folio_pos(folio) + fsize); 774 775 *offset = offset_in_folio(folio, pos); 776 *bytes = min(*bytes, fsize - *offset); 777 778 return pos; 779 } 780 781 static int iomap_write_begin_inline(const struct iomap_iter *iter, 782 struct folio *folio) 783 { 784 /* needs more work for the tailpacking case; disable for now */ 785 if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) 786 return -EIO; 787 return iomap_read_inline_data(iter, folio); 788 } 789 790 /* 791 * Grab and prepare a folio for write based on iter state. Returns the folio, 792 * offset, and length. Callers can optionally pass a max length *plen, 793 * otherwise init to zero. 794 */ 795 static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, 796 size_t *poffset, u64 *plen) 797 { 798 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 799 const struct iomap *srcmap = iomap_iter_srcmap(iter); 800 loff_t pos = iter->pos; 801 u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); 802 struct folio *folio; 803 int status = 0; 804 805 len = min_not_zero(len, *plen); 806 BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); 807 if (srcmap != &iter->iomap) 808 BUG_ON(pos + len > srcmap->offset + srcmap->length); 809 810 if (fatal_signal_pending(current)) 811 return -EINTR; 812 813 folio = __iomap_get_folio(iter, len); 814 if (IS_ERR(folio)) 815 return PTR_ERR(folio); 816 817 /* 818 * Now we have a locked folio, before we do anything with it we need to 819 * check that the iomap we have cached is not stale. The inode extent 820 * mapping can change due to concurrent IO in flight (e.g. 821 * IOMAP_UNWRITTEN state can change and memory reclaim could have 822 * reclaimed a previously partially written page at this index after IO 823 * completion before this write reaches this file offset) and hence we 824 * could do the wrong thing here (zero a page range incorrectly or fail 825 * to zero) and corrupt data. 826 */ 827 if (folio_ops && folio_ops->iomap_valid) { 828 bool iomap_valid = folio_ops->iomap_valid(iter->inode, 829 &iter->iomap); 830 if (!iomap_valid) { 831 iter->iomap.flags |= IOMAP_F_STALE; 832 status = 0; 833 goto out_unlock; 834 } 835 } 836 837 pos = iomap_trim_folio_range(iter, folio, poffset, &len); 838 839 if (srcmap->type == IOMAP_INLINE) 840 status = iomap_write_begin_inline(iter, folio); 841 else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) 842 status = __block_write_begin_int(folio, pos, len, NULL, srcmap); 843 else 844 status = __iomap_write_begin(iter, len, folio); 845 846 if (unlikely(status)) 847 goto out_unlock; 848 849 *foliop = folio; 850 *plen = len; 851 return 0; 852 853 out_unlock: 854 __iomap_put_folio(iter, 0, folio); 855 856 return status; 857 } 858 859 static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, 860 size_t copied, struct folio *folio) 861 { 862 flush_dcache_folio(folio); 863 864 /* 865 * The blocks that were entirely written will now be uptodate, so we 866 * don't have to worry about a read_folio reading them and overwriting a 867 * partial write. However, if we've encountered a short write and only 868 * partially written into a block, it will not be marked uptodate, so a 869 * read_folio might come in and destroy our partial write. 870 * 871 * Do the simplest thing and just treat any short write to a 872 * non-uptodate page as a zero-length write, and force the caller to 873 * redo the whole thing. 874 */ 875 if (unlikely(copied < len && !folio_test_uptodate(folio))) 876 return false; 877 iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); 878 iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); 879 filemap_dirty_folio(inode->i_mapping, folio); 880 return true; 881 } 882 883 static void iomap_write_end_inline(const struct iomap_iter *iter, 884 struct folio *folio, loff_t pos, size_t copied) 885 { 886 const struct iomap *iomap = &iter->iomap; 887 void *addr; 888 889 WARN_ON_ONCE(!folio_test_uptodate(folio)); 890 BUG_ON(!iomap_inline_data_valid(iomap)); 891 892 flush_dcache_folio(folio); 893 addr = kmap_local_folio(folio, pos); 894 memcpy(iomap_inline_data(iomap, pos), addr, copied); 895 kunmap_local(addr); 896 897 mark_inode_dirty(iter->inode); 898 } 899 900 /* 901 * Returns true if all copied bytes have been written to the pagecache, 902 * otherwise return false. 903 */ 904 static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, 905 struct folio *folio) 906 { 907 const struct iomap *srcmap = iomap_iter_srcmap(iter); 908 loff_t pos = iter->pos; 909 910 if (srcmap->type == IOMAP_INLINE) { 911 iomap_write_end_inline(iter, folio, pos, copied); 912 return true; 913 } 914 915 if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { 916 size_t bh_written; 917 918 bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, 919 len, copied, folio, NULL); 920 WARN_ON_ONCE(bh_written != copied && bh_written != 0); 921 return bh_written == copied; 922 } 923 924 return __iomap_write_end(iter->inode, pos, len, copied, folio); 925 } 926 927 static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) 928 { 929 ssize_t total_written = 0; 930 int status = 0; 931 struct address_space *mapping = iter->inode->i_mapping; 932 size_t chunk = mapping_max_folio_size(mapping); 933 unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; 934 935 do { 936 struct folio *folio; 937 loff_t old_size; 938 size_t offset; /* Offset into folio */ 939 u64 bytes; /* Bytes to write to folio */ 940 size_t copied; /* Bytes copied from user */ 941 u64 written; /* Bytes have been written */ 942 loff_t pos; 943 944 bytes = iov_iter_count(i); 945 retry: 946 offset = iter->pos & (chunk - 1); 947 bytes = min(chunk - offset, bytes); 948 status = balance_dirty_pages_ratelimited_flags(mapping, 949 bdp_flags); 950 if (unlikely(status)) 951 break; 952 953 if (bytes > iomap_length(iter)) 954 bytes = iomap_length(iter); 955 956 /* 957 * Bring in the user page that we'll copy from _first_. 958 * Otherwise there's a nasty deadlock on copying from the 959 * same page as we're writing to, without it being marked 960 * up-to-date. 961 * 962 * For async buffered writes the assumption is that the user 963 * page has already been faulted in. This can be optimized by 964 * faulting the user page. 965 */ 966 if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { 967 status = -EFAULT; 968 break; 969 } 970 971 status = iomap_write_begin(iter, &folio, &offset, &bytes); 972 if (unlikely(status)) { 973 iomap_write_failed(iter->inode, iter->pos, bytes); 974 break; 975 } 976 if (iter->iomap.flags & IOMAP_F_STALE) 977 break; 978 979 pos = iter->pos; 980 981 if (mapping_writably_mapped(mapping)) 982 flush_dcache_folio(folio); 983 984 copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); 985 written = iomap_write_end(iter, bytes, copied, folio) ? 986 copied : 0; 987 988 /* 989 * Update the in-memory inode size after copying the data into 990 * the page cache. It's up to the file system to write the 991 * updated size to disk, preferably after I/O completion so that 992 * no stale data is exposed. Only once that's done can we 993 * unlock and release the folio. 994 */ 995 old_size = iter->inode->i_size; 996 if (pos + written > old_size) { 997 i_size_write(iter->inode, pos + written); 998 iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; 999 } 1000 __iomap_put_folio(iter, written, folio); 1001 1002 if (old_size < pos) 1003 pagecache_isize_extended(iter->inode, old_size, pos); 1004 1005 cond_resched(); 1006 if (unlikely(written == 0)) { 1007 /* 1008 * A short copy made iomap_write_end() reject the 1009 * thing entirely. Might be memory poisoning 1010 * halfway through, might be a race with munmap, 1011 * might be severe memory pressure. 1012 */ 1013 iomap_write_failed(iter->inode, pos, bytes); 1014 iov_iter_revert(i, copied); 1015 1016 if (chunk > PAGE_SIZE) 1017 chunk /= 2; 1018 if (copied) { 1019 bytes = copied; 1020 goto retry; 1021 } 1022 } else { 1023 total_written += written; 1024 iomap_iter_advance(iter, &written); 1025 } 1026 } while (iov_iter_count(i) && iomap_length(iter)); 1027 1028 return total_written ? 0 : status; 1029 } 1030 1031 ssize_t 1032 iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, 1033 const struct iomap_ops *ops, void *private) 1034 { 1035 struct iomap_iter iter = { 1036 .inode = iocb->ki_filp->f_mapping->host, 1037 .pos = iocb->ki_pos, 1038 .len = iov_iter_count(i), 1039 .flags = IOMAP_WRITE, 1040 .private = private, 1041 }; 1042 ssize_t ret; 1043 1044 if (iocb->ki_flags & IOCB_NOWAIT) 1045 iter.flags |= IOMAP_NOWAIT; 1046 if (iocb->ki_flags & IOCB_DONTCACHE) 1047 iter.flags |= IOMAP_DONTCACHE; 1048 1049 while ((ret = iomap_iter(&iter, ops)) > 0) 1050 iter.status = iomap_write_iter(&iter, i); 1051 1052 if (unlikely(iter.pos == iocb->ki_pos)) 1053 return ret; 1054 ret = iter.pos - iocb->ki_pos; 1055 iocb->ki_pos = iter.pos; 1056 return ret; 1057 } 1058 EXPORT_SYMBOL_GPL(iomap_file_buffered_write); 1059 1060 static void iomap_write_delalloc_ifs_punch(struct inode *inode, 1061 struct folio *folio, loff_t start_byte, loff_t end_byte, 1062 struct iomap *iomap, iomap_punch_t punch) 1063 { 1064 unsigned int first_blk, last_blk, i; 1065 loff_t last_byte; 1066 u8 blkbits = inode->i_blkbits; 1067 struct iomap_folio_state *ifs; 1068 1069 /* 1070 * When we have per-block dirty tracking, there can be 1071 * blocks within a folio which are marked uptodate 1072 * but not dirty. In that case it is necessary to punch 1073 * out such blocks to avoid leaking any delalloc blocks. 1074 */ 1075 ifs = folio->private; 1076 if (!ifs) 1077 return; 1078 1079 last_byte = min_t(loff_t, end_byte - 1, 1080 folio_pos(folio) + folio_size(folio) - 1); 1081 first_blk = offset_in_folio(folio, start_byte) >> blkbits; 1082 last_blk = offset_in_folio(folio, last_byte) >> blkbits; 1083 for (i = first_blk; i <= last_blk; i++) { 1084 if (!ifs_block_is_dirty(folio, ifs, i)) 1085 punch(inode, folio_pos(folio) + (i << blkbits), 1086 1 << blkbits, iomap); 1087 } 1088 } 1089 1090 static void iomap_write_delalloc_punch(struct inode *inode, struct folio *folio, 1091 loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, 1092 struct iomap *iomap, iomap_punch_t punch) 1093 { 1094 if (!folio_test_dirty(folio)) 1095 return; 1096 1097 /* if dirty, punch up to offset */ 1098 if (start_byte > *punch_start_byte) { 1099 punch(inode, *punch_start_byte, start_byte - *punch_start_byte, 1100 iomap); 1101 } 1102 1103 /* Punch non-dirty blocks within folio */ 1104 iomap_write_delalloc_ifs_punch(inode, folio, start_byte, end_byte, 1105 iomap, punch); 1106 1107 /* 1108 * Make sure the next punch start is correctly bound to 1109 * the end of this data range, not the end of the folio. 1110 */ 1111 *punch_start_byte = min_t(loff_t, end_byte, 1112 folio_pos(folio) + folio_size(folio)); 1113 } 1114 1115 /* 1116 * Scan the data range passed to us for dirty page cache folios. If we find a 1117 * dirty folio, punch out the preceding range and update the offset from which 1118 * the next punch will start from. 1119 * 1120 * We can punch out storage reservations under clean pages because they either 1121 * contain data that has been written back - in which case the delalloc punch 1122 * over that range is a no-op - or they have been read faults in which case they 1123 * contain zeroes and we can remove the delalloc backing range and any new 1124 * writes to those pages will do the normal hole filling operation... 1125 * 1126 * This makes the logic simple: we only need to keep the delalloc extents only 1127 * over the dirty ranges of the page cache. 1128 * 1129 * This function uses [start_byte, end_byte) intervals (i.e. open ended) to 1130 * simplify range iterations. 1131 */ 1132 static void iomap_write_delalloc_scan(struct inode *inode, 1133 loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, 1134 struct iomap *iomap, iomap_punch_t punch) 1135 { 1136 while (start_byte < end_byte) { 1137 struct folio *folio; 1138 1139 /* grab locked page */ 1140 folio = filemap_lock_folio(inode->i_mapping, 1141 start_byte >> PAGE_SHIFT); 1142 if (IS_ERR(folio)) { 1143 start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + 1144 PAGE_SIZE; 1145 continue; 1146 } 1147 1148 iomap_write_delalloc_punch(inode, folio, punch_start_byte, 1149 start_byte, end_byte, iomap, punch); 1150 1151 /* move offset to start of next folio in range */ 1152 start_byte = folio_pos(folio) + folio_size(folio); 1153 folio_unlock(folio); 1154 folio_put(folio); 1155 } 1156 } 1157 1158 /* 1159 * When a short write occurs, the filesystem might need to use ->iomap_end 1160 * to remove space reservations created in ->iomap_begin. 1161 * 1162 * For filesystems that use delayed allocation, there can be dirty pages over 1163 * the delalloc extent outside the range of a short write but still within the 1164 * delalloc extent allocated for this iomap if the write raced with page 1165 * faults. 1166 * 1167 * Punch out all the delalloc blocks in the range given except for those that 1168 * have dirty data still pending in the page cache - those are going to be 1169 * written and so must still retain the delalloc backing for writeback. 1170 * 1171 * The punch() callback *must* only punch delalloc extents in the range passed 1172 * to it. It must skip over all other types of extents in the range and leave 1173 * them completely unchanged. It must do this punch atomically with respect to 1174 * other extent modifications. 1175 * 1176 * The punch() callback may be called with a folio locked to prevent writeback 1177 * extent allocation racing at the edge of the range we are currently punching. 1178 * The locked folio may or may not cover the range being punched, so it is not 1179 * safe for the punch() callback to lock folios itself. 1180 * 1181 * Lock order is: 1182 * 1183 * inode->i_rwsem (shared or exclusive) 1184 * inode->i_mapping->invalidate_lock (exclusive) 1185 * folio_lock() 1186 * ->punch 1187 * internal filesystem allocation lock 1188 * 1189 * As we are scanning the page cache for data, we don't need to reimplement the 1190 * wheel - mapping_seek_hole_data() does exactly what we need to identify the 1191 * start and end of data ranges correctly even for sub-folio block sizes. This 1192 * byte range based iteration is especially convenient because it means we 1193 * don't have to care about variable size folios, nor where the start or end of 1194 * the data range lies within a folio, if they lie within the same folio or even 1195 * if there are multiple discontiguous data ranges within the folio. 1196 * 1197 * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so 1198 * can return data ranges that exist in the cache beyond EOF. e.g. a page fault 1199 * spanning EOF will initialise the post-EOF data to zeroes and mark it up to 1200 * date. A write page fault can then mark it dirty. If we then fail a write() 1201 * beyond EOF into that up to date cached range, we allocate a delalloc block 1202 * beyond EOF and then have to punch it out. Because the range is up to date, 1203 * mapping_seek_hole_data() will return it, and we will skip the punch because 1204 * the folio is dirty. THis is incorrect - we always need to punch out delalloc 1205 * beyond EOF in this case as writeback will never write back and covert that 1206 * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, 1207 * resulting in always punching out the range from the EOF to the end of the 1208 * range the iomap spans. 1209 * 1210 * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it 1211 * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA 1212 * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) 1213 * returns the end of the data range (data_end). Using closed intervals would 1214 * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose 1215 * the code to subtle off-by-one bugs.... 1216 */ 1217 void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, 1218 loff_t end_byte, unsigned flags, struct iomap *iomap, 1219 iomap_punch_t punch) 1220 { 1221 loff_t punch_start_byte = start_byte; 1222 loff_t scan_end_byte = min(i_size_read(inode), end_byte); 1223 1224 /* 1225 * The caller must hold invalidate_lock to avoid races with page faults 1226 * re-instantiating folios and dirtying them via ->page_mkwrite whilst 1227 * we walk the cache and perform delalloc extent removal. Failing to do 1228 * this can leave dirty pages with no space reservation in the cache. 1229 */ 1230 lockdep_assert_held_write(&inode->i_mapping->invalidate_lock); 1231 1232 while (start_byte < scan_end_byte) { 1233 loff_t data_end; 1234 1235 start_byte = mapping_seek_hole_data(inode->i_mapping, 1236 start_byte, scan_end_byte, SEEK_DATA); 1237 /* 1238 * If there is no more data to scan, all that is left is to 1239 * punch out the remaining range. 1240 * 1241 * Note that mapping_seek_hole_data is only supposed to return 1242 * either an offset or -ENXIO, so WARN on any other error as 1243 * that would be an API change without updating the callers. 1244 */ 1245 if (start_byte == -ENXIO || start_byte == scan_end_byte) 1246 break; 1247 if (WARN_ON_ONCE(start_byte < 0)) 1248 return; 1249 WARN_ON_ONCE(start_byte < punch_start_byte); 1250 WARN_ON_ONCE(start_byte > scan_end_byte); 1251 1252 /* 1253 * We find the end of this contiguous cached data range by 1254 * seeking from start_byte to the beginning of the next hole. 1255 */ 1256 data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, 1257 scan_end_byte, SEEK_HOLE); 1258 if (WARN_ON_ONCE(data_end < 0)) 1259 return; 1260 1261 /* 1262 * If we race with post-direct I/O invalidation of the page cache, 1263 * there might be no data left at start_byte. 1264 */ 1265 if (data_end == start_byte) 1266 continue; 1267 1268 WARN_ON_ONCE(data_end < start_byte); 1269 WARN_ON_ONCE(data_end > scan_end_byte); 1270 1271 iomap_write_delalloc_scan(inode, &punch_start_byte, start_byte, 1272 data_end, iomap, punch); 1273 1274 /* The next data search starts at the end of this one. */ 1275 start_byte = data_end; 1276 } 1277 1278 if (punch_start_byte < end_byte) 1279 punch(inode, punch_start_byte, end_byte - punch_start_byte, 1280 iomap); 1281 } 1282 EXPORT_SYMBOL_GPL(iomap_write_delalloc_release); 1283 1284 static int iomap_unshare_iter(struct iomap_iter *iter) 1285 { 1286 struct iomap *iomap = &iter->iomap; 1287 u64 bytes = iomap_length(iter); 1288 int status; 1289 1290 if (!iomap_want_unshare_iter(iter)) 1291 return iomap_iter_advance(iter, &bytes); 1292 1293 do { 1294 struct folio *folio; 1295 size_t offset; 1296 bool ret; 1297 1298 bytes = min_t(u64, SIZE_MAX, bytes); 1299 status = iomap_write_begin(iter, &folio, &offset, &bytes); 1300 if (unlikely(status)) 1301 return status; 1302 if (iomap->flags & IOMAP_F_STALE) 1303 break; 1304 1305 ret = iomap_write_end(iter, bytes, bytes, folio); 1306 __iomap_put_folio(iter, bytes, folio); 1307 if (WARN_ON_ONCE(!ret)) 1308 return -EIO; 1309 1310 cond_resched(); 1311 1312 balance_dirty_pages_ratelimited(iter->inode->i_mapping); 1313 1314 status = iomap_iter_advance(iter, &bytes); 1315 if (status) 1316 break; 1317 } while (bytes > 0); 1318 1319 return status; 1320 } 1321 1322 int 1323 iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, 1324 const struct iomap_ops *ops) 1325 { 1326 struct iomap_iter iter = { 1327 .inode = inode, 1328 .pos = pos, 1329 .flags = IOMAP_WRITE | IOMAP_UNSHARE, 1330 }; 1331 loff_t size = i_size_read(inode); 1332 int ret; 1333 1334 if (pos < 0 || pos >= size) 1335 return 0; 1336 1337 iter.len = min(len, size - pos); 1338 while ((ret = iomap_iter(&iter, ops)) > 0) 1339 iter.status = iomap_unshare_iter(&iter); 1340 return ret; 1341 } 1342 EXPORT_SYMBOL_GPL(iomap_file_unshare); 1343 1344 /* 1345 * Flush the remaining range of the iter and mark the current mapping stale. 1346 * This is used when zero range sees an unwritten mapping that may have had 1347 * dirty pagecache over it. 1348 */ 1349 static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) 1350 { 1351 struct address_space *mapping = i->inode->i_mapping; 1352 loff_t end = i->pos + i->len - 1; 1353 1354 i->iomap.flags |= IOMAP_F_STALE; 1355 return filemap_write_and_wait_range(mapping, i->pos, end); 1356 } 1357 1358 static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) 1359 { 1360 u64 bytes = iomap_length(iter); 1361 int status; 1362 1363 do { 1364 struct folio *folio; 1365 size_t offset; 1366 bool ret; 1367 1368 bytes = min_t(u64, SIZE_MAX, bytes); 1369 status = iomap_write_begin(iter, &folio, &offset, &bytes); 1370 if (status) 1371 return status; 1372 if (iter->iomap.flags & IOMAP_F_STALE) 1373 break; 1374 1375 /* warn about zeroing folios beyond eof that won't write back */ 1376 WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); 1377 1378 folio_zero_range(folio, offset, bytes); 1379 folio_mark_accessed(folio); 1380 1381 ret = iomap_write_end(iter, bytes, bytes, folio); 1382 __iomap_put_folio(iter, bytes, folio); 1383 if (WARN_ON_ONCE(!ret)) 1384 return -EIO; 1385 1386 status = iomap_iter_advance(iter, &bytes); 1387 if (status) 1388 break; 1389 } while (bytes > 0); 1390 1391 if (did_zero) 1392 *did_zero = true; 1393 return status; 1394 } 1395 1396 int 1397 iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, 1398 const struct iomap_ops *ops, void *private) 1399 { 1400 struct iomap_iter iter = { 1401 .inode = inode, 1402 .pos = pos, 1403 .len = len, 1404 .flags = IOMAP_ZERO, 1405 .private = private, 1406 }; 1407 struct address_space *mapping = inode->i_mapping; 1408 unsigned int blocksize = i_blocksize(inode); 1409 unsigned int off = pos & (blocksize - 1); 1410 loff_t plen = min_t(loff_t, len, blocksize - off); 1411 int ret; 1412 bool range_dirty; 1413 1414 /* 1415 * Zero range can skip mappings that are zero on disk so long as 1416 * pagecache is clean. If pagecache was dirty prior to zero range, the 1417 * mapping converts on writeback completion and so must be zeroed. 1418 * 1419 * The simplest way to deal with this across a range is to flush 1420 * pagecache and process the updated mappings. To avoid excessive 1421 * flushing on partial eof zeroing, special case it to zero the 1422 * unaligned start portion if already dirty in pagecache. 1423 */ 1424 if (off && 1425 filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { 1426 iter.len = plen; 1427 while ((ret = iomap_iter(&iter, ops)) > 0) 1428 iter.status = iomap_zero_iter(&iter, did_zero); 1429 1430 iter.len = len - (iter.pos - pos); 1431 if (ret || !iter.len) 1432 return ret; 1433 } 1434 1435 /* 1436 * To avoid an unconditional flush, check pagecache state and only flush 1437 * if dirty and the fs returns a mapping that might convert on 1438 * writeback. 1439 */ 1440 range_dirty = filemap_range_needs_writeback(inode->i_mapping, 1441 iter.pos, iter.pos + iter.len - 1); 1442 while ((ret = iomap_iter(&iter, ops)) > 0) { 1443 const struct iomap *srcmap = iomap_iter_srcmap(&iter); 1444 1445 if (srcmap->type == IOMAP_HOLE || 1446 srcmap->type == IOMAP_UNWRITTEN) { 1447 s64 status; 1448 1449 if (range_dirty) { 1450 range_dirty = false; 1451 status = iomap_zero_iter_flush_and_stale(&iter); 1452 } else { 1453 status = iomap_iter_advance_full(&iter); 1454 } 1455 iter.status = status; 1456 continue; 1457 } 1458 1459 iter.status = iomap_zero_iter(&iter, did_zero); 1460 } 1461 return ret; 1462 } 1463 EXPORT_SYMBOL_GPL(iomap_zero_range); 1464 1465 int 1466 iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, 1467 const struct iomap_ops *ops, void *private) 1468 { 1469 unsigned int blocksize = i_blocksize(inode); 1470 unsigned int off = pos & (blocksize - 1); 1471 1472 /* Block boundary? Nothing to do */ 1473 if (!off) 1474 return 0; 1475 return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, 1476 private); 1477 } 1478 EXPORT_SYMBOL_GPL(iomap_truncate_page); 1479 1480 static int iomap_folio_mkwrite_iter(struct iomap_iter *iter, 1481 struct folio *folio) 1482 { 1483 loff_t length = iomap_length(iter); 1484 int ret; 1485 1486 if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { 1487 ret = __block_write_begin_int(folio, iter->pos, length, NULL, 1488 &iter->iomap); 1489 if (ret) 1490 return ret; 1491 block_commit_write(folio, 0, length); 1492 } else { 1493 WARN_ON_ONCE(!folio_test_uptodate(folio)); 1494 folio_mark_dirty(folio); 1495 } 1496 1497 return iomap_iter_advance(iter, &length); 1498 } 1499 1500 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, 1501 void *private) 1502 { 1503 struct iomap_iter iter = { 1504 .inode = file_inode(vmf->vma->vm_file), 1505 .flags = IOMAP_WRITE | IOMAP_FAULT, 1506 .private = private, 1507 }; 1508 struct folio *folio = page_folio(vmf->page); 1509 ssize_t ret; 1510 1511 folio_lock(folio); 1512 ret = folio_mkwrite_check_truncate(folio, iter.inode); 1513 if (ret < 0) 1514 goto out_unlock; 1515 iter.pos = folio_pos(folio); 1516 iter.len = ret; 1517 while ((ret = iomap_iter(&iter, ops)) > 0) 1518 iter.status = iomap_folio_mkwrite_iter(&iter, folio); 1519 1520 if (ret < 0) 1521 goto out_unlock; 1522 folio_wait_stable(folio); 1523 return VM_FAULT_LOCKED; 1524 out_unlock: 1525 folio_unlock(folio); 1526 return vmf_fs_error(ret); 1527 } 1528 EXPORT_SYMBOL_GPL(iomap_page_mkwrite); 1529 1530 static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, 1531 size_t len) 1532 { 1533 struct iomap_folio_state *ifs = folio->private; 1534 1535 WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); 1536 WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0); 1537 1538 if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending)) 1539 folio_end_writeback(folio); 1540 } 1541 1542 /* 1543 * We're now finished for good with this ioend structure. Update the page 1544 * state, release holds on bios, and finally free up memory. Do not use the 1545 * ioend after this. 1546 */ 1547 u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) 1548 { 1549 struct inode *inode = ioend->io_inode; 1550 struct bio *bio = &ioend->io_bio; 1551 struct folio_iter fi; 1552 u32 folio_count = 0; 1553 1554 if (ioend->io_error) { 1555 mapping_set_error(inode->i_mapping, ioend->io_error); 1556 if (!bio_flagged(bio, BIO_QUIET)) { 1557 pr_err_ratelimited( 1558 "%s: writeback error on inode %lu, offset %lld, sector %llu", 1559 inode->i_sb->s_id, inode->i_ino, 1560 ioend->io_offset, ioend->io_sector); 1561 } 1562 } 1563 1564 /* walk all folios in bio, ending page IO on them */ 1565 bio_for_each_folio_all(fi, bio) { 1566 iomap_finish_folio_write(inode, fi.folio, fi.length); 1567 folio_count++; 1568 } 1569 1570 bio_put(bio); /* frees the ioend */ 1571 return folio_count; 1572 } 1573 1574 static void iomap_writepage_end_bio(struct bio *bio) 1575 { 1576 struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); 1577 1578 ioend->io_error = blk_status_to_errno(bio->bi_status); 1579 iomap_finish_ioend_buffered(ioend); 1580 } 1581 1582 /* 1583 * Submit an ioend. 1584 * 1585 * If @error is non-zero, it means that we have a situation where some part of 1586 * the submission process has failed after we've marked pages for writeback. 1587 * We cannot cancel ioend directly in that case, so call the bio end I/O handler 1588 * with the error status here to run the normal I/O completion handler to clear 1589 * the writeback bit and let the file system proess the errors. 1590 */ 1591 static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) 1592 { 1593 if (!wpc->ioend) 1594 return error; 1595 1596 /* 1597 * Let the file systems prepare the I/O submission and hook in an I/O 1598 * comletion handler. This also needs to happen in case after a 1599 * failure happened so that the file system end I/O handler gets called 1600 * to clean up. 1601 */ 1602 if (wpc->ops->submit_ioend) { 1603 error = wpc->ops->submit_ioend(wpc, error); 1604 } else { 1605 if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) 1606 error = -EIO; 1607 if (!error) 1608 submit_bio(&wpc->ioend->io_bio); 1609 } 1610 1611 if (error) { 1612 wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); 1613 bio_endio(&wpc->ioend->io_bio); 1614 } 1615 1616 wpc->ioend = NULL; 1617 return error; 1618 } 1619 1620 static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, 1621 loff_t pos, u16 ioend_flags) 1622 { 1623 struct bio *bio; 1624 1625 bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, 1626 REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), 1627 GFP_NOFS, &iomap_ioend_bioset); 1628 bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); 1629 bio->bi_end_io = iomap_writepage_end_bio; 1630 bio->bi_write_hint = wpc->inode->i_write_hint; 1631 wbc_init_bio(wpc->wbc, bio); 1632 wpc->nr_folios = 0; 1633 return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags); 1634 } 1635 1636 static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, 1637 u16 ioend_flags) 1638 { 1639 if (ioend_flags & IOMAP_IOEND_BOUNDARY) 1640 return false; 1641 if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != 1642 (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) 1643 return false; 1644 if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) 1645 return false; 1646 if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && 1647 iomap_sector(&wpc->iomap, pos) != 1648 bio_end_sector(&wpc->ioend->io_bio)) 1649 return false; 1650 /* 1651 * Limit ioend bio chain lengths to minimise IO completion latency. This 1652 * also prevents long tight loops ending page writeback on all the 1653 * folios in the ioend. 1654 */ 1655 if (wpc->nr_folios >= IOEND_BATCH_SIZE) 1656 return false; 1657 return true; 1658 } 1659 1660 /* 1661 * Test to see if we have an existing ioend structure that we could append to 1662 * first; otherwise finish off the current ioend and start another. 1663 * 1664 * If a new ioend is created and cached, the old ioend is submitted to the block 1665 * layer instantly. Batching optimisations are provided by higher level block 1666 * plugging. 1667 * 1668 * At the end of a writeback pass, there will be a cached ioend remaining on the 1669 * writepage context that the caller will need to submit. 1670 */ 1671 ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, 1672 loff_t pos, loff_t end_pos, unsigned int dirty_len) 1673 { 1674 struct iomap_folio_state *ifs = folio->private; 1675 size_t poff = offset_in_folio(folio, pos); 1676 unsigned int ioend_flags = 0; 1677 unsigned int map_len = min_t(u64, dirty_len, 1678 wpc->iomap.offset + wpc->iomap.length - pos); 1679 int error; 1680 1681 trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap); 1682 1683 WARN_ON_ONCE(!folio->private && map_len < dirty_len); 1684 1685 switch (wpc->iomap.type) { 1686 case IOMAP_INLINE: 1687 WARN_ON_ONCE(1); 1688 return -EIO; 1689 case IOMAP_HOLE: 1690 return map_len; 1691 default: 1692 break; 1693 } 1694 1695 if (wpc->iomap.type == IOMAP_UNWRITTEN) 1696 ioend_flags |= IOMAP_IOEND_UNWRITTEN; 1697 if (wpc->iomap.flags & IOMAP_F_SHARED) 1698 ioend_flags |= IOMAP_IOEND_SHARED; 1699 if (folio_test_dropbehind(folio)) 1700 ioend_flags |= IOMAP_IOEND_DONTCACHE; 1701 if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) 1702 ioend_flags |= IOMAP_IOEND_BOUNDARY; 1703 1704 if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { 1705 new_ioend: 1706 error = iomap_submit_ioend(wpc, 0); 1707 if (error) 1708 return error; 1709 wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); 1710 } 1711 1712 if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) 1713 goto new_ioend; 1714 1715 if (ifs) 1716 atomic_add(map_len, &ifs->write_bytes_pending); 1717 1718 /* 1719 * Clamp io_offset and io_size to the incore EOF so that ondisk 1720 * file size updates in the ioend completion are byte-accurate. 1721 * This avoids recovering files with zeroed tail regions when 1722 * writeback races with appending writes: 1723 * 1724 * Thread 1: Thread 2: 1725 * ------------ ----------- 1726 * write [A, A+B] 1727 * update inode size to A+B 1728 * submit I/O [A, A+BS] 1729 * write [A+B, A+B+C] 1730 * update inode size to A+B+C 1731 * <I/O completes, updates disk size to min(A+B+C, A+BS)> 1732 * <power failure> 1733 * 1734 * After reboot: 1735 * 1) with A+B+C < A+BS, the file has zero padding in range 1736 * [A+B, A+B+C] 1737 * 1738 * |< Block Size (BS) >| 1739 * |DDDDDDDDDDDD0000000000000| 1740 * ^ ^ ^ 1741 * A A+B A+B+C 1742 * (EOF) 1743 * 1744 * 2) with A+B+C > A+BS, the file has zero padding in range 1745 * [A+B, A+BS] 1746 * 1747 * |< Block Size (BS) >|< Block Size (BS) >| 1748 * |DDDDDDDDDDDD0000000000000|00000000000000000000000000| 1749 * ^ ^ ^ ^ 1750 * A A+B A+BS A+B+C 1751 * (EOF) 1752 * 1753 * D = Valid Data 1754 * 0 = Zero Padding 1755 * 1756 * Note that this defeats the ability to chain the ioends of 1757 * appending writes. 1758 */ 1759 wpc->ioend->io_size += map_len; 1760 if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) 1761 wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; 1762 1763 wbc_account_cgroup_owner(wpc->wbc, folio, map_len); 1764 return map_len; 1765 } 1766 EXPORT_SYMBOL_GPL(iomap_add_to_ioend); 1767 1768 static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, 1769 struct folio *folio, u64 pos, u32 rlen, u64 end_pos, 1770 bool *wb_pending) 1771 { 1772 do { 1773 ssize_t ret; 1774 1775 ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos); 1776 if (WARN_ON_ONCE(ret == 0 || ret > rlen)) 1777 return -EIO; 1778 if (ret < 0) 1779 return ret; 1780 rlen -= ret; 1781 pos += ret; 1782 1783 /* 1784 * Holes are not be written back by ->writeback_range, so track 1785 * if we did handle anything that is not a hole here. 1786 */ 1787 if (wpc->iomap.type != IOMAP_HOLE) 1788 *wb_pending = true; 1789 } while (rlen); 1790 1791 return 0; 1792 } 1793 1794 /* 1795 * Check interaction of the folio with the file end. 1796 * 1797 * If the folio is entirely beyond i_size, return false. If it straddles 1798 * i_size, adjust end_pos and zero all data beyond i_size. 1799 */ 1800 static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, 1801 u64 *end_pos) 1802 { 1803 u64 isize = i_size_read(inode); 1804 1805 if (*end_pos > isize) { 1806 size_t poff = offset_in_folio(folio, isize); 1807 pgoff_t end_index = isize >> PAGE_SHIFT; 1808 1809 /* 1810 * If the folio is entirely ouside of i_size, skip it. 1811 * 1812 * This can happen due to a truncate operation that is in 1813 * progress and in that case truncate will finish it off once 1814 * we've dropped the folio lock. 1815 * 1816 * Note that the pgoff_t used for end_index is an unsigned long. 1817 * If the given offset is greater than 16TB on a 32-bit system, 1818 * then if we checked if the folio is fully outside i_size with 1819 * "if (folio->index >= end_index + 1)", "end_index + 1" would 1820 * overflow and evaluate to 0. Hence this folio would be 1821 * redirtied and written out repeatedly, which would result in 1822 * an infinite loop; the user program performing this operation 1823 * would hang. Instead, we can detect this situation by 1824 * checking if the folio is totally beyond i_size or if its 1825 * offset is just equal to the EOF. 1826 */ 1827 if (folio->index > end_index || 1828 (folio->index == end_index && poff == 0)) 1829 return false; 1830 1831 /* 1832 * The folio straddles i_size. 1833 * 1834 * It must be zeroed out on each and every writepage invocation 1835 * because it may be mmapped: 1836 * 1837 * A file is mapped in multiples of the page size. For a 1838 * file that is not a multiple of the page size, the 1839 * remaining memory is zeroed when mapped, and writes to that 1840 * region are not written out to the file. 1841 * 1842 * Also adjust the end_pos to the end of file and skip writeback 1843 * for all blocks entirely beyond i_size. 1844 */ 1845 folio_zero_segment(folio, poff, folio_size(folio)); 1846 *end_pos = isize; 1847 } 1848 1849 return true; 1850 } 1851 1852 static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, 1853 struct folio *folio) 1854 { 1855 struct iomap_folio_state *ifs = folio->private; 1856 struct inode *inode = wpc->inode; 1857 u64 pos = folio_pos(folio); 1858 u64 end_pos = pos + folio_size(folio); 1859 u64 end_aligned = 0; 1860 bool wb_pending = false; 1861 int error = 0; 1862 u32 rlen; 1863 1864 WARN_ON_ONCE(!folio_test_locked(folio)); 1865 WARN_ON_ONCE(folio_test_dirty(folio)); 1866 WARN_ON_ONCE(folio_test_writeback(folio)); 1867 1868 trace_iomap_writepage(inode, pos, folio_size(folio)); 1869 1870 if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) { 1871 folio_unlock(folio); 1872 return 0; 1873 } 1874 WARN_ON_ONCE(end_pos <= pos); 1875 1876 if (i_blocks_per_folio(inode, folio) > 1) { 1877 if (!ifs) { 1878 ifs = ifs_alloc(inode, folio, 0); 1879 iomap_set_range_dirty(folio, 0, end_pos - pos); 1880 } 1881 1882 /* 1883 * Keep the I/O completion handler from clearing the writeback 1884 * bit until we have submitted all blocks by adding a bias to 1885 * ifs->write_bytes_pending, which is dropped after submitting 1886 * all blocks. 1887 */ 1888 WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); 1889 atomic_inc(&ifs->write_bytes_pending); 1890 } 1891 1892 /* 1893 * Set the writeback bit ASAP, as the I/O completion for the single 1894 * block per folio case happen hit as soon as we're submitting the bio. 1895 */ 1896 folio_start_writeback(folio); 1897 1898 /* 1899 * Walk through the folio to find dirty areas to write back. 1900 */ 1901 end_aligned = round_up(end_pos, i_blocksize(inode)); 1902 while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { 1903 error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, 1904 &wb_pending); 1905 if (error) 1906 break; 1907 pos += rlen; 1908 } 1909 1910 if (wb_pending) 1911 wpc->nr_folios++; 1912 1913 /* 1914 * We can have dirty bits set past end of file in page_mkwrite path 1915 * while mapping the last partial folio. Hence it's better to clear 1916 * all the dirty bits in the folio here. 1917 */ 1918 iomap_clear_range_dirty(folio, 0, folio_size(folio)); 1919 1920 /* 1921 * Usually the writeback bit is cleared by the I/O completion handler. 1922 * But we may end up either not actually writing any blocks, or (when 1923 * there are multiple blocks in a folio) all I/O might have finished 1924 * already at this point. In that case we need to clear the writeback 1925 * bit ourselves right after unlocking the page. 1926 */ 1927 folio_unlock(folio); 1928 if (ifs) { 1929 if (atomic_dec_and_test(&ifs->write_bytes_pending)) 1930 folio_end_writeback(folio); 1931 } else { 1932 if (!wb_pending) 1933 folio_end_writeback(folio); 1934 } 1935 mapping_set_error(inode->i_mapping, error); 1936 return error; 1937 } 1938 1939 int 1940 iomap_writepages(struct iomap_writepage_ctx *wpc) 1941 { 1942 struct address_space *mapping = wpc->inode->i_mapping; 1943 struct folio *folio = NULL; 1944 int error; 1945 1946 /* 1947 * Writeback from reclaim context should never happen except in the case 1948 * of a VM regression so warn about it and refuse to write the data. 1949 */ 1950 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC | PF_KSWAPD)) == 1951 PF_MEMALLOC)) 1952 return -EIO; 1953 1954 while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) 1955 error = iomap_writepage_map(wpc, folio); 1956 return iomap_submit_ioend(wpc, error); 1957 } 1958 EXPORT_SYMBOL_GPL(iomap_writepages); 1959