1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2010 Red Hat, Inc. 4 * Copyright (C) 2016-2023 Christoph Hellwig. 5 */ 6 #include <linux/module.h> 7 #include <linux/compiler.h> 8 #include <linux/fs.h> 9 #include <linux/iomap.h> 10 #include <linux/pagemap.h> 11 #include <linux/uio.h> 12 #include <linux/buffer_head.h> 13 #include <linux/dax.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/bio.h> 17 #include <linux/sched/signal.h> 18 #include <linux/migrate.h> 19 #include "internal.h" 20 #include "trace.h" 21 22 #include "../internal.h" 23 24 /* 25 * Structure allocated for each folio to track per-block uptodate, dirty state 26 * and I/O completions. 27 */ 28 struct iomap_folio_state { 29 spinlock_t state_lock; 30 unsigned int read_bytes_pending; 31 atomic_t write_bytes_pending; 32 33 /* 34 * Each block has two bits in this bitmap: 35 * Bits [0..blocks_per_folio) has the uptodate status. 36 * Bits [b_p_f...(2*b_p_f)) has the dirty status. 37 */ 38 unsigned long state[]; 39 }; 40 41 static inline bool ifs_is_fully_uptodate(struct folio *folio, 42 struct iomap_folio_state *ifs) 43 { 44 struct inode *inode = folio->mapping->host; 45 46 return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)); 47 } 48 49 static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs, 50 unsigned int block) 51 { 52 return test_bit(block, ifs->state); 53 } 54 55 static bool ifs_set_range_uptodate(struct folio *folio, 56 struct iomap_folio_state *ifs, size_t off, size_t len) 57 { 58 struct inode *inode = folio->mapping->host; 59 unsigned int first_blk = off >> inode->i_blkbits; 60 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 61 unsigned int nr_blks = last_blk - first_blk + 1; 62 63 bitmap_set(ifs->state, first_blk, nr_blks); 64 return ifs_is_fully_uptodate(folio, ifs); 65 } 66 67 static void iomap_set_range_uptodate(struct folio *folio, size_t off, 68 size_t len) 69 { 70 struct iomap_folio_state *ifs = folio->private; 71 unsigned long flags; 72 bool uptodate = true; 73 74 if (folio_test_uptodate(folio)) 75 return; 76 77 if (ifs) { 78 spin_lock_irqsave(&ifs->state_lock, flags); 79 uptodate = ifs_set_range_uptodate(folio, ifs, off, len); 80 spin_unlock_irqrestore(&ifs->state_lock, flags); 81 } 82 83 if (uptodate) 84 folio_mark_uptodate(folio); 85 } 86 87 static inline bool ifs_block_is_dirty(struct folio *folio, 88 struct iomap_folio_state *ifs, int block) 89 { 90 struct inode *inode = folio->mapping->host; 91 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 92 93 return test_bit(block + blks_per_folio, ifs->state); 94 } 95 96 static unsigned ifs_find_dirty_range(struct folio *folio, 97 struct iomap_folio_state *ifs, u64 *range_start, u64 range_end) 98 { 99 struct inode *inode = folio->mapping->host; 100 unsigned start_blk = 101 offset_in_folio(folio, *range_start) >> inode->i_blkbits; 102 unsigned end_blk = min_not_zero( 103 offset_in_folio(folio, range_end) >> inode->i_blkbits, 104 i_blocks_per_folio(inode, folio)); 105 unsigned nblks = 1; 106 107 while (!ifs_block_is_dirty(folio, ifs, start_blk)) 108 if (++start_blk == end_blk) 109 return 0; 110 111 while (start_blk + nblks < end_blk) { 112 if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks)) 113 break; 114 nblks++; 115 } 116 117 *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); 118 return nblks << inode->i_blkbits; 119 } 120 121 static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start, 122 u64 range_end) 123 { 124 struct iomap_folio_state *ifs = folio->private; 125 126 if (*range_start >= range_end) 127 return 0; 128 129 if (ifs) 130 return ifs_find_dirty_range(folio, ifs, range_start, range_end); 131 return range_end - *range_start; 132 } 133 134 static void ifs_clear_range_dirty(struct folio *folio, 135 struct iomap_folio_state *ifs, size_t off, size_t len) 136 { 137 struct inode *inode = folio->mapping->host; 138 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 139 unsigned int first_blk = (off >> inode->i_blkbits); 140 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 141 unsigned int nr_blks = last_blk - first_blk + 1; 142 unsigned long flags; 143 144 spin_lock_irqsave(&ifs->state_lock, flags); 145 bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks); 146 spin_unlock_irqrestore(&ifs->state_lock, flags); 147 } 148 149 static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len) 150 { 151 struct iomap_folio_state *ifs = folio->private; 152 153 if (ifs) 154 ifs_clear_range_dirty(folio, ifs, off, len); 155 } 156 157 static void ifs_set_range_dirty(struct folio *folio, 158 struct iomap_folio_state *ifs, size_t off, size_t len) 159 { 160 struct inode *inode = folio->mapping->host; 161 unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); 162 unsigned int first_blk = (off >> inode->i_blkbits); 163 unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; 164 unsigned int nr_blks = last_blk - first_blk + 1; 165 unsigned long flags; 166 167 spin_lock_irqsave(&ifs->state_lock, flags); 168 bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks); 169 spin_unlock_irqrestore(&ifs->state_lock, flags); 170 } 171 172 static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len) 173 { 174 struct iomap_folio_state *ifs = folio->private; 175 176 if (ifs) 177 ifs_set_range_dirty(folio, ifs, off, len); 178 } 179 180 static struct iomap_folio_state *ifs_alloc(struct inode *inode, 181 struct folio *folio, unsigned int flags) 182 { 183 struct iomap_folio_state *ifs = folio->private; 184 unsigned int nr_blocks = i_blocks_per_folio(inode, folio); 185 gfp_t gfp; 186 187 if (ifs || nr_blocks <= 1) 188 return ifs; 189 190 if (flags & IOMAP_NOWAIT) 191 gfp = GFP_NOWAIT; 192 else 193 gfp = GFP_NOFS | __GFP_NOFAIL; 194 195 /* 196 * ifs->state tracks two sets of state flags when the 197 * filesystem block size is smaller than the folio size. 198 * The first state tracks per-block uptodate and the 199 * second tracks per-block dirty state. 200 */ 201 ifs = kzalloc(struct_size(ifs, state, 202 BITS_TO_LONGS(2 * nr_blocks)), gfp); 203 if (!ifs) 204 return ifs; 205 206 spin_lock_init(&ifs->state_lock); 207 if (folio_test_uptodate(folio)) 208 bitmap_set(ifs->state, 0, nr_blocks); 209 if (folio_test_dirty(folio)) 210 bitmap_set(ifs->state, nr_blocks, nr_blocks); 211 folio_attach_private(folio, ifs); 212 213 return ifs; 214 } 215 216 static void ifs_free(struct folio *folio) 217 { 218 struct iomap_folio_state *ifs = folio_detach_private(folio); 219 220 if (!ifs) 221 return; 222 WARN_ON_ONCE(ifs->read_bytes_pending != 0); 223 WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending)); 224 WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) != 225 folio_test_uptodate(folio)); 226 kfree(ifs); 227 } 228 229 /* 230 * Calculate the range inside the folio that we actually need to read. 231 */ 232 static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, 233 loff_t *pos, loff_t length, size_t *offp, size_t *lenp) 234 { 235 struct iomap_folio_state *ifs = folio->private; 236 loff_t orig_pos = *pos; 237 loff_t isize = i_size_read(inode); 238 unsigned block_bits = inode->i_blkbits; 239 unsigned block_size = (1 << block_bits); 240 size_t poff = offset_in_folio(folio, *pos); 241 size_t plen = min_t(loff_t, folio_size(folio) - poff, length); 242 size_t orig_plen = plen; 243 unsigned first = poff >> block_bits; 244 unsigned last = (poff + plen - 1) >> block_bits; 245 246 /* 247 * If the block size is smaller than the page size, we need to check the 248 * per-block uptodate status and adjust the offset and length if needed 249 * to avoid reading in already uptodate ranges. 250 */ 251 if (ifs) { 252 unsigned int i; 253 254 /* move forward for each leading block marked uptodate */ 255 for (i = first; i <= last; i++) { 256 if (!ifs_block_is_uptodate(ifs, i)) 257 break; 258 *pos += block_size; 259 poff += block_size; 260 plen -= block_size; 261 first++; 262 } 263 264 /* truncate len if we find any trailing uptodate block(s) */ 265 while (++i <= last) { 266 if (ifs_block_is_uptodate(ifs, i)) { 267 plen -= (last - i + 1) * block_size; 268 last = i - 1; 269 break; 270 } 271 } 272 } 273 274 /* 275 * If the extent spans the block that contains the i_size, we need to 276 * handle both halves separately so that we properly zero data in the 277 * page cache for blocks that are entirely outside of i_size. 278 */ 279 if (orig_pos <= isize && orig_pos + orig_plen > isize) { 280 unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; 281 282 if (first <= end && last > end) 283 plen -= (last - end) * block_size; 284 } 285 286 *offp = poff; 287 *lenp = plen; 288 } 289 290 static void iomap_finish_folio_read(struct folio *folio, size_t off, 291 size_t len, int error) 292 { 293 struct iomap_folio_state *ifs = folio->private; 294 bool uptodate = !error; 295 bool finished = true; 296 297 if (ifs) { 298 unsigned long flags; 299 300 spin_lock_irqsave(&ifs->state_lock, flags); 301 if (!error) 302 uptodate = ifs_set_range_uptodate(folio, ifs, off, len); 303 ifs->read_bytes_pending -= len; 304 finished = !ifs->read_bytes_pending; 305 spin_unlock_irqrestore(&ifs->state_lock, flags); 306 } 307 308 if (finished) 309 folio_end_read(folio, uptodate); 310 } 311 312 static void iomap_read_end_io(struct bio *bio) 313 { 314 int error = blk_status_to_errno(bio->bi_status); 315 struct folio_iter fi; 316 317 bio_for_each_folio_all(fi, bio) 318 iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); 319 bio_put(bio); 320 } 321 322 struct iomap_readpage_ctx { 323 struct folio *cur_folio; 324 bool cur_folio_in_bio; 325 struct bio *bio; 326 struct readahead_control *rac; 327 }; 328 329 /** 330 * iomap_read_inline_data - copy inline data into the page cache 331 * @iter: iteration structure 332 * @folio: folio to copy to 333 * 334 * Copy the inline data in @iter into @folio and zero out the rest of the folio. 335 * Only a single IOMAP_INLINE extent is allowed at the end of each file. 336 * Returns zero for success to complete the read, or the usual negative errno. 337 */ 338 static int iomap_read_inline_data(const struct iomap_iter *iter, 339 struct folio *folio) 340 { 341 const struct iomap *iomap = iomap_iter_srcmap(iter); 342 size_t size = i_size_read(iter->inode) - iomap->offset; 343 size_t offset = offset_in_folio(folio, iomap->offset); 344 345 if (folio_test_uptodate(folio)) 346 return 0; 347 348 if (WARN_ON_ONCE(size > iomap->length)) 349 return -EIO; 350 if (offset > 0) 351 ifs_alloc(iter->inode, folio, iter->flags); 352 353 folio_fill_tail(folio, offset, iomap->inline_data, size); 354 iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); 355 return 0; 356 } 357 358 static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, 359 loff_t pos) 360 { 361 const struct iomap *srcmap = iomap_iter_srcmap(iter); 362 363 return srcmap->type != IOMAP_MAPPED || 364 (srcmap->flags & IOMAP_F_NEW) || 365 pos >= i_size_read(iter->inode); 366 } 367 368 static int iomap_readpage_iter(struct iomap_iter *iter, 369 struct iomap_readpage_ctx *ctx) 370 { 371 const struct iomap *iomap = &iter->iomap; 372 loff_t pos = iter->pos; 373 loff_t length = iomap_length(iter); 374 struct folio *folio = ctx->cur_folio; 375 struct iomap_folio_state *ifs; 376 size_t poff, plen; 377 sector_t sector; 378 int ret; 379 380 if (iomap->type == IOMAP_INLINE) { 381 ret = iomap_read_inline_data(iter, folio); 382 if (ret) 383 return ret; 384 return iomap_iter_advance(iter, &length); 385 } 386 387 /* zero post-eof blocks as the page may be mapped */ 388 ifs = ifs_alloc(iter->inode, folio, iter->flags); 389 iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); 390 if (plen == 0) 391 goto done; 392 393 if (iomap_block_needs_zeroing(iter, pos)) { 394 folio_zero_range(folio, poff, plen); 395 iomap_set_range_uptodate(folio, poff, plen); 396 goto done; 397 } 398 399 ctx->cur_folio_in_bio = true; 400 if (ifs) { 401 spin_lock_irq(&ifs->state_lock); 402 ifs->read_bytes_pending += plen; 403 spin_unlock_irq(&ifs->state_lock); 404 } 405 406 sector = iomap_sector(iomap, pos); 407 if (!ctx->bio || 408 bio_end_sector(ctx->bio) != sector || 409 !bio_add_folio(ctx->bio, folio, plen, poff)) { 410 gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); 411 gfp_t orig_gfp = gfp; 412 unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); 413 414 if (ctx->bio) 415 submit_bio(ctx->bio); 416 417 if (ctx->rac) /* same as readahead_gfp_mask */ 418 gfp |= __GFP_NORETRY | __GFP_NOWARN; 419 ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), 420 REQ_OP_READ, gfp); 421 /* 422 * If the bio_alloc fails, try it again for a single page to 423 * avoid having to deal with partial page reads. This emulates 424 * what do_mpage_read_folio does. 425 */ 426 if (!ctx->bio) { 427 ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, 428 orig_gfp); 429 } 430 if (ctx->rac) 431 ctx->bio->bi_opf |= REQ_RAHEAD; 432 ctx->bio->bi_iter.bi_sector = sector; 433 ctx->bio->bi_end_io = iomap_read_end_io; 434 bio_add_folio_nofail(ctx->bio, folio, plen, poff); 435 } 436 437 done: 438 /* 439 * Move the caller beyond our range so that it keeps making progress. 440 * For that, we have to include any leading non-uptodate ranges, but 441 * we can skip trailing ones as they will be handled in the next 442 * iteration. 443 */ 444 length = pos - iter->pos + plen; 445 return iomap_iter_advance(iter, &length); 446 } 447 448 static int iomap_read_folio_iter(struct iomap_iter *iter, 449 struct iomap_readpage_ctx *ctx) 450 { 451 int ret; 452 453 while (iomap_length(iter)) { 454 ret = iomap_readpage_iter(iter, ctx); 455 if (ret) 456 return ret; 457 } 458 459 return 0; 460 } 461 462 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) 463 { 464 struct iomap_iter iter = { 465 .inode = folio->mapping->host, 466 .pos = folio_pos(folio), 467 .len = folio_size(folio), 468 }; 469 struct iomap_readpage_ctx ctx = { 470 .cur_folio = folio, 471 }; 472 int ret; 473 474 trace_iomap_readpage(iter.inode, 1); 475 476 while ((ret = iomap_iter(&iter, ops)) > 0) 477 iter.status = iomap_read_folio_iter(&iter, &ctx); 478 479 if (ctx.bio) { 480 submit_bio(ctx.bio); 481 WARN_ON_ONCE(!ctx.cur_folio_in_bio); 482 } else { 483 WARN_ON_ONCE(ctx.cur_folio_in_bio); 484 folio_unlock(folio); 485 } 486 487 /* 488 * Just like mpage_readahead and block_read_full_folio, we always 489 * return 0 and just set the folio error flag on errors. This 490 * should be cleaned up throughout the stack eventually. 491 */ 492 return 0; 493 } 494 EXPORT_SYMBOL_GPL(iomap_read_folio); 495 496 static int iomap_readahead_iter(struct iomap_iter *iter, 497 struct iomap_readpage_ctx *ctx) 498 { 499 int ret; 500 501 while (iomap_length(iter)) { 502 if (ctx->cur_folio && 503 offset_in_folio(ctx->cur_folio, iter->pos) == 0) { 504 if (!ctx->cur_folio_in_bio) 505 folio_unlock(ctx->cur_folio); 506 ctx->cur_folio = NULL; 507 } 508 if (!ctx->cur_folio) { 509 ctx->cur_folio = readahead_folio(ctx->rac); 510 ctx->cur_folio_in_bio = false; 511 } 512 ret = iomap_readpage_iter(iter, ctx); 513 if (ret) 514 return ret; 515 } 516 517 return 0; 518 } 519 520 /** 521 * iomap_readahead - Attempt to read pages from a file. 522 * @rac: Describes the pages to be read. 523 * @ops: The operations vector for the filesystem. 524 * 525 * This function is for filesystems to call to implement their readahead 526 * address_space operation. 527 * 528 * Context: The @ops callbacks may submit I/O (eg to read the addresses of 529 * blocks from disc), and may wait for it. The caller may be trying to 530 * access a different page, and so sleeping excessively should be avoided. 531 * It may allocate memory, but should avoid costly allocations. This 532 * function is called with memalloc_nofs set, so allocations will not cause 533 * the filesystem to be reentered. 534 */ 535 void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) 536 { 537 struct iomap_iter iter = { 538 .inode = rac->mapping->host, 539 .pos = readahead_pos(rac), 540 .len = readahead_length(rac), 541 }; 542 struct iomap_readpage_ctx ctx = { 543 .rac = rac, 544 }; 545 546 trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); 547 548 while (iomap_iter(&iter, ops) > 0) 549 iter.status = iomap_readahead_iter(&iter, &ctx); 550 551 if (ctx.bio) 552 submit_bio(ctx.bio); 553 if (ctx.cur_folio) { 554 if (!ctx.cur_folio_in_bio) 555 folio_unlock(ctx.cur_folio); 556 } 557 } 558 EXPORT_SYMBOL_GPL(iomap_readahead); 559 560 /* 561 * iomap_is_partially_uptodate checks whether blocks within a folio are 562 * uptodate or not. 563 * 564 * Returns true if all blocks which correspond to the specified part 565 * of the folio are uptodate. 566 */ 567 bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count) 568 { 569 struct iomap_folio_state *ifs = folio->private; 570 struct inode *inode = folio->mapping->host; 571 unsigned first, last, i; 572 573 if (!ifs) 574 return false; 575 576 /* Caller's range may extend past the end of this folio */ 577 count = min(folio_size(folio) - from, count); 578 579 /* First and last blocks in range within folio */ 580 first = from >> inode->i_blkbits; 581 last = (from + count - 1) >> inode->i_blkbits; 582 583 for (i = first; i <= last; i++) 584 if (!ifs_block_is_uptodate(ifs, i)) 585 return false; 586 return true; 587 } 588 EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); 589 590 /** 591 * iomap_get_folio - get a folio reference for writing 592 * @iter: iteration structure 593 * @pos: start offset of write 594 * @len: Suggested size of folio to create. 595 * 596 * Returns a locked reference to the folio at @pos, or an error pointer if the 597 * folio could not be obtained. 598 */ 599 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) 600 { 601 fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; 602 603 if (iter->flags & IOMAP_NOWAIT) 604 fgp |= FGP_NOWAIT; 605 if (iter->flags & IOMAP_DONTCACHE) 606 fgp |= FGP_DONTCACHE; 607 fgp |= fgf_set_order(len); 608 609 return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, 610 fgp, mapping_gfp_mask(iter->inode->i_mapping)); 611 } 612 EXPORT_SYMBOL_GPL(iomap_get_folio); 613 614 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags) 615 { 616 trace_iomap_release_folio(folio->mapping->host, folio_pos(folio), 617 folio_size(folio)); 618 619 /* 620 * If the folio is dirty, we refuse to release our metadata because 621 * it may be partially dirty. Once we track per-block dirty state, 622 * we can release the metadata if every block is dirty. 623 */ 624 if (folio_test_dirty(folio)) 625 return false; 626 ifs_free(folio); 627 return true; 628 } 629 EXPORT_SYMBOL_GPL(iomap_release_folio); 630 631 void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) 632 { 633 trace_iomap_invalidate_folio(folio->mapping->host, 634 folio_pos(folio) + offset, len); 635 636 /* 637 * If we're invalidating the entire folio, clear the dirty state 638 * from it and release it to avoid unnecessary buildup of the LRU. 639 */ 640 if (offset == 0 && len == folio_size(folio)) { 641 WARN_ON_ONCE(folio_test_writeback(folio)); 642 folio_cancel_dirty(folio); 643 ifs_free(folio); 644 } 645 } 646 EXPORT_SYMBOL_GPL(iomap_invalidate_folio); 647 648 bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio) 649 { 650 struct inode *inode = mapping->host; 651 size_t len = folio_size(folio); 652 653 ifs_alloc(inode, folio, 0); 654 iomap_set_range_dirty(folio, 0, len); 655 return filemap_dirty_folio(mapping, folio); 656 } 657 EXPORT_SYMBOL_GPL(iomap_dirty_folio); 658 659 static void 660 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) 661 { 662 loff_t i_size = i_size_read(inode); 663 664 /* 665 * Only truncate newly allocated pages beyoned EOF, even if the 666 * write started inside the existing inode size. 667 */ 668 if (pos + len > i_size) 669 truncate_pagecache_range(inode, max(pos, i_size), 670 pos + len - 1); 671 } 672 673 static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, 674 size_t poff, size_t plen, const struct iomap *iomap) 675 { 676 struct bio_vec bvec; 677 struct bio bio; 678 679 bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ); 680 bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); 681 bio_add_folio_nofail(&bio, folio, plen, poff); 682 return submit_bio_wait(&bio); 683 } 684 685 static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, 686 struct folio *folio) 687 { 688 const struct iomap *srcmap = iomap_iter_srcmap(iter); 689 struct iomap_folio_state *ifs; 690 loff_t pos = iter->pos; 691 loff_t block_size = i_blocksize(iter->inode); 692 loff_t block_start = round_down(pos, block_size); 693 loff_t block_end = round_up(pos + len, block_size); 694 unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio); 695 size_t from = offset_in_folio(folio, pos), to = from + len; 696 size_t poff, plen; 697 698 /* 699 * If the write or zeroing completely overlaps the current folio, then 700 * entire folio will be dirtied so there is no need for 701 * per-block state tracking structures to be attached to this folio. 702 * For the unshare case, we must read in the ondisk contents because we 703 * are not changing pagecache contents. 704 */ 705 if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && 706 pos + len >= folio_pos(folio) + folio_size(folio)) 707 return 0; 708 709 ifs = ifs_alloc(iter->inode, folio, iter->flags); 710 if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1) 711 return -EAGAIN; 712 713 if (folio_test_uptodate(folio)) 714 return 0; 715 716 do { 717 iomap_adjust_read_range(iter->inode, folio, &block_start, 718 block_end - block_start, &poff, &plen); 719 if (plen == 0) 720 break; 721 722 if (!(iter->flags & IOMAP_UNSHARE) && 723 (from <= poff || from >= poff + plen) && 724 (to <= poff || to >= poff + plen)) 725 continue; 726 727 if (iomap_block_needs_zeroing(iter, block_start)) { 728 if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) 729 return -EIO; 730 folio_zero_segments(folio, poff, from, to, poff + plen); 731 } else { 732 int status; 733 734 if (iter->flags & IOMAP_NOWAIT) 735 return -EAGAIN; 736 737 status = iomap_read_folio_sync(block_start, folio, 738 poff, plen, srcmap); 739 if (status) 740 return status; 741 } 742 iomap_set_range_uptodate(folio, poff, plen); 743 } while ((block_start += plen) < block_end); 744 745 return 0; 746 } 747 748 static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len) 749 { 750 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 751 loff_t pos = iter->pos; 752 753 if (!mapping_large_folio_support(iter->inode->i_mapping)) 754 len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); 755 756 if (folio_ops && folio_ops->get_folio) 757 return folio_ops->get_folio(iter, pos, len); 758 else 759 return iomap_get_folio(iter, pos, len); 760 } 761 762 static void __iomap_put_folio(struct iomap_iter *iter, size_t ret, 763 struct folio *folio) 764 { 765 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 766 loff_t pos = iter->pos; 767 768 if (folio_ops && folio_ops->put_folio) { 769 folio_ops->put_folio(iter->inode, pos, ret, folio); 770 } else { 771 folio_unlock(folio); 772 folio_put(folio); 773 } 774 } 775 776 /* trim pos and bytes to within a given folio */ 777 static loff_t iomap_trim_folio_range(struct iomap_iter *iter, 778 struct folio *folio, size_t *offset, u64 *bytes) 779 { 780 loff_t pos = iter->pos; 781 size_t fsize = folio_size(folio); 782 783 WARN_ON_ONCE(pos < folio_pos(folio)); 784 WARN_ON_ONCE(pos >= folio_pos(folio) + fsize); 785 786 *offset = offset_in_folio(folio, pos); 787 *bytes = min(*bytes, fsize - *offset); 788 789 return pos; 790 } 791 792 static int iomap_write_begin_inline(const struct iomap_iter *iter, 793 struct folio *folio) 794 { 795 /* needs more work for the tailpacking case; disable for now */ 796 if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) 797 return -EIO; 798 return iomap_read_inline_data(iter, folio); 799 } 800 801 /* 802 * Grab and prepare a folio for write based on iter state. Returns the folio, 803 * offset, and length. Callers can optionally pass a max length *plen, 804 * otherwise init to zero. 805 */ 806 static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, 807 size_t *poffset, u64 *plen) 808 { 809 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; 810 const struct iomap *srcmap = iomap_iter_srcmap(iter); 811 loff_t pos = iter->pos; 812 u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); 813 struct folio *folio; 814 int status = 0; 815 816 len = min_not_zero(len, *plen); 817 BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); 818 if (srcmap != &iter->iomap) 819 BUG_ON(pos + len > srcmap->offset + srcmap->length); 820 821 if (fatal_signal_pending(current)) 822 return -EINTR; 823 824 folio = __iomap_get_folio(iter, len); 825 if (IS_ERR(folio)) 826 return PTR_ERR(folio); 827 828 /* 829 * Now we have a locked folio, before we do anything with it we need to 830 * check that the iomap we have cached is not stale. The inode extent 831 * mapping can change due to concurrent IO in flight (e.g. 832 * IOMAP_UNWRITTEN state can change and memory reclaim could have 833 * reclaimed a previously partially written page at this index after IO 834 * completion before this write reaches this file offset) and hence we 835 * could do the wrong thing here (zero a page range incorrectly or fail 836 * to zero) and corrupt data. 837 */ 838 if (folio_ops && folio_ops->iomap_valid) { 839 bool iomap_valid = folio_ops->iomap_valid(iter->inode, 840 &iter->iomap); 841 if (!iomap_valid) { 842 iter->iomap.flags |= IOMAP_F_STALE; 843 status = 0; 844 goto out_unlock; 845 } 846 } 847 848 pos = iomap_trim_folio_range(iter, folio, poffset, &len); 849 850 if (srcmap->type == IOMAP_INLINE) 851 status = iomap_write_begin_inline(iter, folio); 852 else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) 853 status = __block_write_begin_int(folio, pos, len, NULL, srcmap); 854 else 855 status = __iomap_write_begin(iter, len, folio); 856 857 if (unlikely(status)) 858 goto out_unlock; 859 860 *foliop = folio; 861 *plen = len; 862 return 0; 863 864 out_unlock: 865 __iomap_put_folio(iter, 0, folio); 866 867 return status; 868 } 869 870 static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, 871 size_t copied, struct folio *folio) 872 { 873 flush_dcache_folio(folio); 874 875 /* 876 * The blocks that were entirely written will now be uptodate, so we 877 * don't have to worry about a read_folio reading them and overwriting a 878 * partial write. However, if we've encountered a short write and only 879 * partially written into a block, it will not be marked uptodate, so a 880 * read_folio might come in and destroy our partial write. 881 * 882 * Do the simplest thing and just treat any short write to a 883 * non-uptodate page as a zero-length write, and force the caller to 884 * redo the whole thing. 885 */ 886 if (unlikely(copied < len && !folio_test_uptodate(folio))) 887 return false; 888 iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); 889 iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); 890 filemap_dirty_folio(inode->i_mapping, folio); 891 return true; 892 } 893 894 static void iomap_write_end_inline(const struct iomap_iter *iter, 895 struct folio *folio, loff_t pos, size_t copied) 896 { 897 const struct iomap *iomap = &iter->iomap; 898 void *addr; 899 900 WARN_ON_ONCE(!folio_test_uptodate(folio)); 901 BUG_ON(!iomap_inline_data_valid(iomap)); 902 903 flush_dcache_folio(folio); 904 addr = kmap_local_folio(folio, pos); 905 memcpy(iomap_inline_data(iomap, pos), addr, copied); 906 kunmap_local(addr); 907 908 mark_inode_dirty(iter->inode); 909 } 910 911 /* 912 * Returns true if all copied bytes have been written to the pagecache, 913 * otherwise return false. 914 */ 915 static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, 916 struct folio *folio) 917 { 918 const struct iomap *srcmap = iomap_iter_srcmap(iter); 919 loff_t pos = iter->pos; 920 921 if (srcmap->type == IOMAP_INLINE) { 922 iomap_write_end_inline(iter, folio, pos, copied); 923 return true; 924 } 925 926 if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { 927 size_t bh_written; 928 929 bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, 930 len, copied, folio, NULL); 931 WARN_ON_ONCE(bh_written != copied && bh_written != 0); 932 return bh_written == copied; 933 } 934 935 return __iomap_write_end(iter->inode, pos, len, copied, folio); 936 } 937 938 static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) 939 { 940 ssize_t total_written = 0; 941 int status = 0; 942 struct address_space *mapping = iter->inode->i_mapping; 943 size_t chunk = mapping_max_folio_size(mapping); 944 unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; 945 946 do { 947 struct folio *folio; 948 loff_t old_size; 949 size_t offset; /* Offset into folio */ 950 u64 bytes; /* Bytes to write to folio */ 951 size_t copied; /* Bytes copied from user */ 952 u64 written; /* Bytes have been written */ 953 loff_t pos; 954 955 bytes = iov_iter_count(i); 956 retry: 957 offset = iter->pos & (chunk - 1); 958 bytes = min(chunk - offset, bytes); 959 status = balance_dirty_pages_ratelimited_flags(mapping, 960 bdp_flags); 961 if (unlikely(status)) 962 break; 963 964 if (bytes > iomap_length(iter)) 965 bytes = iomap_length(iter); 966 967 /* 968 * Bring in the user page that we'll copy from _first_. 969 * Otherwise there's a nasty deadlock on copying from the 970 * same page as we're writing to, without it being marked 971 * up-to-date. 972 * 973 * For async buffered writes the assumption is that the user 974 * page has already been faulted in. This can be optimized by 975 * faulting the user page. 976 */ 977 if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { 978 status = -EFAULT; 979 break; 980 } 981 982 status = iomap_write_begin(iter, &folio, &offset, &bytes); 983 if (unlikely(status)) { 984 iomap_write_failed(iter->inode, iter->pos, bytes); 985 break; 986 } 987 if (iter->iomap.flags & IOMAP_F_STALE) 988 break; 989 990 pos = iter->pos; 991 992 if (mapping_writably_mapped(mapping)) 993 flush_dcache_folio(folio); 994 995 copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); 996 written = iomap_write_end(iter, bytes, copied, folio) ? 997 copied : 0; 998 999 /* 1000 * Update the in-memory inode size after copying the data into 1001 * the page cache. It's up to the file system to write the 1002 * updated size to disk, preferably after I/O completion so that 1003 * no stale data is exposed. Only once that's done can we 1004 * unlock and release the folio. 1005 */ 1006 old_size = iter->inode->i_size; 1007 if (pos + written > old_size) { 1008 i_size_write(iter->inode, pos + written); 1009 iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; 1010 } 1011 __iomap_put_folio(iter, written, folio); 1012 1013 if (old_size < pos) 1014 pagecache_isize_extended(iter->inode, old_size, pos); 1015 1016 cond_resched(); 1017 if (unlikely(written == 0)) { 1018 /* 1019 * A short copy made iomap_write_end() reject the 1020 * thing entirely. Might be memory poisoning 1021 * halfway through, might be a race with munmap, 1022 * might be severe memory pressure. 1023 */ 1024 iomap_write_failed(iter->inode, pos, bytes); 1025 iov_iter_revert(i, copied); 1026 1027 if (chunk > PAGE_SIZE) 1028 chunk /= 2; 1029 if (copied) { 1030 bytes = copied; 1031 goto retry; 1032 } 1033 } else { 1034 total_written += written; 1035 iomap_iter_advance(iter, &written); 1036 } 1037 } while (iov_iter_count(i) && iomap_length(iter)); 1038 1039 return total_written ? 0 : status; 1040 } 1041 1042 ssize_t 1043 iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, 1044 const struct iomap_ops *ops, void *private) 1045 { 1046 struct iomap_iter iter = { 1047 .inode = iocb->ki_filp->f_mapping->host, 1048 .pos = iocb->ki_pos, 1049 .len = iov_iter_count(i), 1050 .flags = IOMAP_WRITE, 1051 .private = private, 1052 }; 1053 ssize_t ret; 1054 1055 if (iocb->ki_flags & IOCB_NOWAIT) 1056 iter.flags |= IOMAP_NOWAIT; 1057 if (iocb->ki_flags & IOCB_DONTCACHE) 1058 iter.flags |= IOMAP_DONTCACHE; 1059 1060 while ((ret = iomap_iter(&iter, ops)) > 0) 1061 iter.status = iomap_write_iter(&iter, i); 1062 1063 if (unlikely(iter.pos == iocb->ki_pos)) 1064 return ret; 1065 ret = iter.pos - iocb->ki_pos; 1066 iocb->ki_pos = iter.pos; 1067 return ret; 1068 } 1069 EXPORT_SYMBOL_GPL(iomap_file_buffered_write); 1070 1071 static void iomap_write_delalloc_ifs_punch(struct inode *inode, 1072 struct folio *folio, loff_t start_byte, loff_t end_byte, 1073 struct iomap *iomap, iomap_punch_t punch) 1074 { 1075 unsigned int first_blk, last_blk, i; 1076 loff_t last_byte; 1077 u8 blkbits = inode->i_blkbits; 1078 struct iomap_folio_state *ifs; 1079 1080 /* 1081 * When we have per-block dirty tracking, there can be 1082 * blocks within a folio which are marked uptodate 1083 * but not dirty. In that case it is necessary to punch 1084 * out such blocks to avoid leaking any delalloc blocks. 1085 */ 1086 ifs = folio->private; 1087 if (!ifs) 1088 return; 1089 1090 last_byte = min_t(loff_t, end_byte - 1, 1091 folio_pos(folio) + folio_size(folio) - 1); 1092 first_blk = offset_in_folio(folio, start_byte) >> blkbits; 1093 last_blk = offset_in_folio(folio, last_byte) >> blkbits; 1094 for (i = first_blk; i <= last_blk; i++) { 1095 if (!ifs_block_is_dirty(folio, ifs, i)) 1096 punch(inode, folio_pos(folio) + (i << blkbits), 1097 1 << blkbits, iomap); 1098 } 1099 } 1100 1101 static void iomap_write_delalloc_punch(struct inode *inode, struct folio *folio, 1102 loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, 1103 struct iomap *iomap, iomap_punch_t punch) 1104 { 1105 if (!folio_test_dirty(folio)) 1106 return; 1107 1108 /* if dirty, punch up to offset */ 1109 if (start_byte > *punch_start_byte) { 1110 punch(inode, *punch_start_byte, start_byte - *punch_start_byte, 1111 iomap); 1112 } 1113 1114 /* Punch non-dirty blocks within folio */ 1115 iomap_write_delalloc_ifs_punch(inode, folio, start_byte, end_byte, 1116 iomap, punch); 1117 1118 /* 1119 * Make sure the next punch start is correctly bound to 1120 * the end of this data range, not the end of the folio. 1121 */ 1122 *punch_start_byte = min_t(loff_t, end_byte, 1123 folio_pos(folio) + folio_size(folio)); 1124 } 1125 1126 /* 1127 * Scan the data range passed to us for dirty page cache folios. If we find a 1128 * dirty folio, punch out the preceding range and update the offset from which 1129 * the next punch will start from. 1130 * 1131 * We can punch out storage reservations under clean pages because they either 1132 * contain data that has been written back - in which case the delalloc punch 1133 * over that range is a no-op - or they have been read faults in which case they 1134 * contain zeroes and we can remove the delalloc backing range and any new 1135 * writes to those pages will do the normal hole filling operation... 1136 * 1137 * This makes the logic simple: we only need to keep the delalloc extents only 1138 * over the dirty ranges of the page cache. 1139 * 1140 * This function uses [start_byte, end_byte) intervals (i.e. open ended) to 1141 * simplify range iterations. 1142 */ 1143 static void iomap_write_delalloc_scan(struct inode *inode, 1144 loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, 1145 struct iomap *iomap, iomap_punch_t punch) 1146 { 1147 while (start_byte < end_byte) { 1148 struct folio *folio; 1149 1150 /* grab locked page */ 1151 folio = filemap_lock_folio(inode->i_mapping, 1152 start_byte >> PAGE_SHIFT); 1153 if (IS_ERR(folio)) { 1154 start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + 1155 PAGE_SIZE; 1156 continue; 1157 } 1158 1159 iomap_write_delalloc_punch(inode, folio, punch_start_byte, 1160 start_byte, end_byte, iomap, punch); 1161 1162 /* move offset to start of next folio in range */ 1163 start_byte = folio_pos(folio) + folio_size(folio); 1164 folio_unlock(folio); 1165 folio_put(folio); 1166 } 1167 } 1168 1169 /* 1170 * When a short write occurs, the filesystem might need to use ->iomap_end 1171 * to remove space reservations created in ->iomap_begin. 1172 * 1173 * For filesystems that use delayed allocation, there can be dirty pages over 1174 * the delalloc extent outside the range of a short write but still within the 1175 * delalloc extent allocated for this iomap if the write raced with page 1176 * faults. 1177 * 1178 * Punch out all the delalloc blocks in the range given except for those that 1179 * have dirty data still pending in the page cache - those are going to be 1180 * written and so must still retain the delalloc backing for writeback. 1181 * 1182 * The punch() callback *must* only punch delalloc extents in the range passed 1183 * to it. It must skip over all other types of extents in the range and leave 1184 * them completely unchanged. It must do this punch atomically with respect to 1185 * other extent modifications. 1186 * 1187 * The punch() callback may be called with a folio locked to prevent writeback 1188 * extent allocation racing at the edge of the range we are currently punching. 1189 * The locked folio may or may not cover the range being punched, so it is not 1190 * safe for the punch() callback to lock folios itself. 1191 * 1192 * Lock order is: 1193 * 1194 * inode->i_rwsem (shared or exclusive) 1195 * inode->i_mapping->invalidate_lock (exclusive) 1196 * folio_lock() 1197 * ->punch 1198 * internal filesystem allocation lock 1199 * 1200 * As we are scanning the page cache for data, we don't need to reimplement the 1201 * wheel - mapping_seek_hole_data() does exactly what we need to identify the 1202 * start and end of data ranges correctly even for sub-folio block sizes. This 1203 * byte range based iteration is especially convenient because it means we 1204 * don't have to care about variable size folios, nor where the start or end of 1205 * the data range lies within a folio, if they lie within the same folio or even 1206 * if there are multiple discontiguous data ranges within the folio. 1207 * 1208 * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so 1209 * can return data ranges that exist in the cache beyond EOF. e.g. a page fault 1210 * spanning EOF will initialise the post-EOF data to zeroes and mark it up to 1211 * date. A write page fault can then mark it dirty. If we then fail a write() 1212 * beyond EOF into that up to date cached range, we allocate a delalloc block 1213 * beyond EOF and then have to punch it out. Because the range is up to date, 1214 * mapping_seek_hole_data() will return it, and we will skip the punch because 1215 * the folio is dirty. THis is incorrect - we always need to punch out delalloc 1216 * beyond EOF in this case as writeback will never write back and covert that 1217 * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, 1218 * resulting in always punching out the range from the EOF to the end of the 1219 * range the iomap spans. 1220 * 1221 * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it 1222 * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA 1223 * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) 1224 * returns the end of the data range (data_end). Using closed intervals would 1225 * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose 1226 * the code to subtle off-by-one bugs.... 1227 */ 1228 void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, 1229 loff_t end_byte, unsigned flags, struct iomap *iomap, 1230 iomap_punch_t punch) 1231 { 1232 loff_t punch_start_byte = start_byte; 1233 loff_t scan_end_byte = min(i_size_read(inode), end_byte); 1234 1235 /* 1236 * The caller must hold invalidate_lock to avoid races with page faults 1237 * re-instantiating folios and dirtying them via ->page_mkwrite whilst 1238 * we walk the cache and perform delalloc extent removal. Failing to do 1239 * this can leave dirty pages with no space reservation in the cache. 1240 */ 1241 lockdep_assert_held_write(&inode->i_mapping->invalidate_lock); 1242 1243 while (start_byte < scan_end_byte) { 1244 loff_t data_end; 1245 1246 start_byte = mapping_seek_hole_data(inode->i_mapping, 1247 start_byte, scan_end_byte, SEEK_DATA); 1248 /* 1249 * If there is no more data to scan, all that is left is to 1250 * punch out the remaining range. 1251 * 1252 * Note that mapping_seek_hole_data is only supposed to return 1253 * either an offset or -ENXIO, so WARN on any other error as 1254 * that would be an API change without updating the callers. 1255 */ 1256 if (start_byte == -ENXIO || start_byte == scan_end_byte) 1257 break; 1258 if (WARN_ON_ONCE(start_byte < 0)) 1259 return; 1260 WARN_ON_ONCE(start_byte < punch_start_byte); 1261 WARN_ON_ONCE(start_byte > scan_end_byte); 1262 1263 /* 1264 * We find the end of this contiguous cached data range by 1265 * seeking from start_byte to the beginning of the next hole. 1266 */ 1267 data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, 1268 scan_end_byte, SEEK_HOLE); 1269 if (WARN_ON_ONCE(data_end < 0)) 1270 return; 1271 1272 /* 1273 * If we race with post-direct I/O invalidation of the page cache, 1274 * there might be no data left at start_byte. 1275 */ 1276 if (data_end == start_byte) 1277 continue; 1278 1279 WARN_ON_ONCE(data_end < start_byte); 1280 WARN_ON_ONCE(data_end > scan_end_byte); 1281 1282 iomap_write_delalloc_scan(inode, &punch_start_byte, start_byte, 1283 data_end, iomap, punch); 1284 1285 /* The next data search starts at the end of this one. */ 1286 start_byte = data_end; 1287 } 1288 1289 if (punch_start_byte < end_byte) 1290 punch(inode, punch_start_byte, end_byte - punch_start_byte, 1291 iomap); 1292 } 1293 EXPORT_SYMBOL_GPL(iomap_write_delalloc_release); 1294 1295 static int iomap_unshare_iter(struct iomap_iter *iter) 1296 { 1297 struct iomap *iomap = &iter->iomap; 1298 u64 bytes = iomap_length(iter); 1299 int status; 1300 1301 if (!iomap_want_unshare_iter(iter)) 1302 return iomap_iter_advance(iter, &bytes); 1303 1304 do { 1305 struct folio *folio; 1306 size_t offset; 1307 bool ret; 1308 1309 bytes = min_t(u64, SIZE_MAX, bytes); 1310 status = iomap_write_begin(iter, &folio, &offset, &bytes); 1311 if (unlikely(status)) 1312 return status; 1313 if (iomap->flags & IOMAP_F_STALE) 1314 break; 1315 1316 ret = iomap_write_end(iter, bytes, bytes, folio); 1317 __iomap_put_folio(iter, bytes, folio); 1318 if (WARN_ON_ONCE(!ret)) 1319 return -EIO; 1320 1321 cond_resched(); 1322 1323 balance_dirty_pages_ratelimited(iter->inode->i_mapping); 1324 1325 status = iomap_iter_advance(iter, &bytes); 1326 if (status) 1327 break; 1328 } while (bytes > 0); 1329 1330 return status; 1331 } 1332 1333 int 1334 iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, 1335 const struct iomap_ops *ops) 1336 { 1337 struct iomap_iter iter = { 1338 .inode = inode, 1339 .pos = pos, 1340 .flags = IOMAP_WRITE | IOMAP_UNSHARE, 1341 }; 1342 loff_t size = i_size_read(inode); 1343 int ret; 1344 1345 if (pos < 0 || pos >= size) 1346 return 0; 1347 1348 iter.len = min(len, size - pos); 1349 while ((ret = iomap_iter(&iter, ops)) > 0) 1350 iter.status = iomap_unshare_iter(&iter); 1351 return ret; 1352 } 1353 EXPORT_SYMBOL_GPL(iomap_file_unshare); 1354 1355 /* 1356 * Flush the remaining range of the iter and mark the current mapping stale. 1357 * This is used when zero range sees an unwritten mapping that may have had 1358 * dirty pagecache over it. 1359 */ 1360 static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) 1361 { 1362 struct address_space *mapping = i->inode->i_mapping; 1363 loff_t end = i->pos + i->len - 1; 1364 1365 i->iomap.flags |= IOMAP_F_STALE; 1366 return filemap_write_and_wait_range(mapping, i->pos, end); 1367 } 1368 1369 static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) 1370 { 1371 u64 bytes = iomap_length(iter); 1372 int status; 1373 1374 do { 1375 struct folio *folio; 1376 size_t offset; 1377 bool ret; 1378 1379 bytes = min_t(u64, SIZE_MAX, bytes); 1380 status = iomap_write_begin(iter, &folio, &offset, &bytes); 1381 if (status) 1382 return status; 1383 if (iter->iomap.flags & IOMAP_F_STALE) 1384 break; 1385 1386 /* warn about zeroing folios beyond eof that won't write back */ 1387 WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); 1388 1389 folio_zero_range(folio, offset, bytes); 1390 folio_mark_accessed(folio); 1391 1392 ret = iomap_write_end(iter, bytes, bytes, folio); 1393 __iomap_put_folio(iter, bytes, folio); 1394 if (WARN_ON_ONCE(!ret)) 1395 return -EIO; 1396 1397 status = iomap_iter_advance(iter, &bytes); 1398 if (status) 1399 break; 1400 } while (bytes > 0); 1401 1402 if (did_zero) 1403 *did_zero = true; 1404 return status; 1405 } 1406 1407 int 1408 iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, 1409 const struct iomap_ops *ops, void *private) 1410 { 1411 struct iomap_iter iter = { 1412 .inode = inode, 1413 .pos = pos, 1414 .len = len, 1415 .flags = IOMAP_ZERO, 1416 .private = private, 1417 }; 1418 struct address_space *mapping = inode->i_mapping; 1419 unsigned int blocksize = i_blocksize(inode); 1420 unsigned int off = pos & (blocksize - 1); 1421 loff_t plen = min_t(loff_t, len, blocksize - off); 1422 int ret; 1423 bool range_dirty; 1424 1425 /* 1426 * Zero range can skip mappings that are zero on disk so long as 1427 * pagecache is clean. If pagecache was dirty prior to zero range, the 1428 * mapping converts on writeback completion and so must be zeroed. 1429 * 1430 * The simplest way to deal with this across a range is to flush 1431 * pagecache and process the updated mappings. To avoid excessive 1432 * flushing on partial eof zeroing, special case it to zero the 1433 * unaligned start portion if already dirty in pagecache. 1434 */ 1435 if (off && 1436 filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { 1437 iter.len = plen; 1438 while ((ret = iomap_iter(&iter, ops)) > 0) 1439 iter.status = iomap_zero_iter(&iter, did_zero); 1440 1441 iter.len = len - (iter.pos - pos); 1442 if (ret || !iter.len) 1443 return ret; 1444 } 1445 1446 /* 1447 * To avoid an unconditional flush, check pagecache state and only flush 1448 * if dirty and the fs returns a mapping that might convert on 1449 * writeback. 1450 */ 1451 range_dirty = filemap_range_needs_writeback(inode->i_mapping, 1452 iter.pos, iter.pos + iter.len - 1); 1453 while ((ret = iomap_iter(&iter, ops)) > 0) { 1454 const struct iomap *srcmap = iomap_iter_srcmap(&iter); 1455 1456 if (srcmap->type == IOMAP_HOLE || 1457 srcmap->type == IOMAP_UNWRITTEN) { 1458 s64 status; 1459 1460 if (range_dirty) { 1461 range_dirty = false; 1462 status = iomap_zero_iter_flush_and_stale(&iter); 1463 } else { 1464 status = iomap_iter_advance_full(&iter); 1465 } 1466 iter.status = status; 1467 continue; 1468 } 1469 1470 iter.status = iomap_zero_iter(&iter, did_zero); 1471 } 1472 return ret; 1473 } 1474 EXPORT_SYMBOL_GPL(iomap_zero_range); 1475 1476 int 1477 iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, 1478 const struct iomap_ops *ops, void *private) 1479 { 1480 unsigned int blocksize = i_blocksize(inode); 1481 unsigned int off = pos & (blocksize - 1); 1482 1483 /* Block boundary? Nothing to do */ 1484 if (!off) 1485 return 0; 1486 return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, 1487 private); 1488 } 1489 EXPORT_SYMBOL_GPL(iomap_truncate_page); 1490 1491 static int iomap_folio_mkwrite_iter(struct iomap_iter *iter, 1492 struct folio *folio) 1493 { 1494 loff_t length = iomap_length(iter); 1495 int ret; 1496 1497 if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { 1498 ret = __block_write_begin_int(folio, iter->pos, length, NULL, 1499 &iter->iomap); 1500 if (ret) 1501 return ret; 1502 block_commit_write(folio, 0, length); 1503 } else { 1504 WARN_ON_ONCE(!folio_test_uptodate(folio)); 1505 folio_mark_dirty(folio); 1506 } 1507 1508 return iomap_iter_advance(iter, &length); 1509 } 1510 1511 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, 1512 void *private) 1513 { 1514 struct iomap_iter iter = { 1515 .inode = file_inode(vmf->vma->vm_file), 1516 .flags = IOMAP_WRITE | IOMAP_FAULT, 1517 .private = private, 1518 }; 1519 struct folio *folio = page_folio(vmf->page); 1520 ssize_t ret; 1521 1522 folio_lock(folio); 1523 ret = folio_mkwrite_check_truncate(folio, iter.inode); 1524 if (ret < 0) 1525 goto out_unlock; 1526 iter.pos = folio_pos(folio); 1527 iter.len = ret; 1528 while ((ret = iomap_iter(&iter, ops)) > 0) 1529 iter.status = iomap_folio_mkwrite_iter(&iter, folio); 1530 1531 if (ret < 0) 1532 goto out_unlock; 1533 folio_wait_stable(folio); 1534 return VM_FAULT_LOCKED; 1535 out_unlock: 1536 folio_unlock(folio); 1537 return vmf_fs_error(ret); 1538 } 1539 EXPORT_SYMBOL_GPL(iomap_page_mkwrite); 1540 1541 static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, 1542 size_t len) 1543 { 1544 struct iomap_folio_state *ifs = folio->private; 1545 1546 WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); 1547 WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0); 1548 1549 if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending)) 1550 folio_end_writeback(folio); 1551 } 1552 1553 /* 1554 * We're now finished for good with this ioend structure. Update the page 1555 * state, release holds on bios, and finally free up memory. Do not use the 1556 * ioend after this. 1557 */ 1558 u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) 1559 { 1560 struct inode *inode = ioend->io_inode; 1561 struct bio *bio = &ioend->io_bio; 1562 struct folio_iter fi; 1563 u32 folio_count = 0; 1564 1565 if (ioend->io_error) { 1566 mapping_set_error(inode->i_mapping, ioend->io_error); 1567 if (!bio_flagged(bio, BIO_QUIET)) { 1568 pr_err_ratelimited( 1569 "%s: writeback error on inode %lu, offset %lld, sector %llu", 1570 inode->i_sb->s_id, inode->i_ino, 1571 ioend->io_offset, ioend->io_sector); 1572 } 1573 } 1574 1575 /* walk all folios in bio, ending page IO on them */ 1576 bio_for_each_folio_all(fi, bio) { 1577 iomap_finish_folio_write(inode, fi.folio, fi.length); 1578 folio_count++; 1579 } 1580 1581 bio_put(bio); /* frees the ioend */ 1582 return folio_count; 1583 } 1584 1585 static void iomap_writepage_end_bio(struct bio *bio) 1586 { 1587 struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); 1588 1589 ioend->io_error = blk_status_to_errno(bio->bi_status); 1590 iomap_finish_ioend_buffered(ioend); 1591 } 1592 1593 /* 1594 * Submit an ioend. 1595 * 1596 * If @error is non-zero, it means that we have a situation where some part of 1597 * the submission process has failed after we've marked pages for writeback. 1598 * We cannot cancel ioend directly in that case, so call the bio end I/O handler 1599 * with the error status here to run the normal I/O completion handler to clear 1600 * the writeback bit and let the file system proess the errors. 1601 */ 1602 static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) 1603 { 1604 if (!wpc->ioend) 1605 return error; 1606 1607 /* 1608 * Let the file systems prepare the I/O submission and hook in an I/O 1609 * comletion handler. This also needs to happen in case after a 1610 * failure happened so that the file system end I/O handler gets called 1611 * to clean up. 1612 */ 1613 if (wpc->ops->submit_ioend) { 1614 error = wpc->ops->submit_ioend(wpc, error); 1615 } else { 1616 if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) 1617 error = -EIO; 1618 if (!error) 1619 submit_bio(&wpc->ioend->io_bio); 1620 } 1621 1622 if (error) { 1623 wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); 1624 bio_endio(&wpc->ioend->io_bio); 1625 } 1626 1627 wpc->ioend = NULL; 1628 return error; 1629 } 1630 1631 static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, 1632 struct writeback_control *wbc, struct inode *inode, loff_t pos, 1633 u16 ioend_flags) 1634 { 1635 struct bio *bio; 1636 1637 bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, 1638 REQ_OP_WRITE | wbc_to_write_flags(wbc), 1639 GFP_NOFS, &iomap_ioend_bioset); 1640 bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); 1641 bio->bi_end_io = iomap_writepage_end_bio; 1642 bio->bi_write_hint = inode->i_write_hint; 1643 wbc_init_bio(wbc, bio); 1644 wpc->nr_folios = 0; 1645 return iomap_init_ioend(inode, bio, pos, ioend_flags); 1646 } 1647 1648 static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, 1649 u16 ioend_flags) 1650 { 1651 if (ioend_flags & IOMAP_IOEND_BOUNDARY) 1652 return false; 1653 if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != 1654 (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) 1655 return false; 1656 if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) 1657 return false; 1658 if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && 1659 iomap_sector(&wpc->iomap, pos) != 1660 bio_end_sector(&wpc->ioend->io_bio)) 1661 return false; 1662 /* 1663 * Limit ioend bio chain lengths to minimise IO completion latency. This 1664 * also prevents long tight loops ending page writeback on all the 1665 * folios in the ioend. 1666 */ 1667 if (wpc->nr_folios >= IOEND_BATCH_SIZE) 1668 return false; 1669 return true; 1670 } 1671 1672 /* 1673 * Test to see if we have an existing ioend structure that we could append to 1674 * first; otherwise finish off the current ioend and start another. 1675 * 1676 * If a new ioend is created and cached, the old ioend is submitted to the block 1677 * layer instantly. Batching optimisations are provided by higher level block 1678 * plugging. 1679 * 1680 * At the end of a writeback pass, there will be a cached ioend remaining on the 1681 * writepage context that the caller will need to submit. 1682 */ 1683 static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, 1684 struct writeback_control *wbc, struct folio *folio, 1685 struct inode *inode, loff_t pos, loff_t end_pos, 1686 unsigned len) 1687 { 1688 struct iomap_folio_state *ifs = folio->private; 1689 size_t poff = offset_in_folio(folio, pos); 1690 unsigned int ioend_flags = 0; 1691 int error; 1692 1693 if (wpc->iomap.type == IOMAP_UNWRITTEN) 1694 ioend_flags |= IOMAP_IOEND_UNWRITTEN; 1695 if (wpc->iomap.flags & IOMAP_F_SHARED) 1696 ioend_flags |= IOMAP_IOEND_SHARED; 1697 if (folio_test_dropbehind(folio)) 1698 ioend_flags |= IOMAP_IOEND_DONTCACHE; 1699 if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) 1700 ioend_flags |= IOMAP_IOEND_BOUNDARY; 1701 1702 if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { 1703 new_ioend: 1704 error = iomap_submit_ioend(wpc, 0); 1705 if (error) 1706 return error; 1707 wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos, 1708 ioend_flags); 1709 } 1710 1711 if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) 1712 goto new_ioend; 1713 1714 if (ifs) 1715 atomic_add(len, &ifs->write_bytes_pending); 1716 1717 /* 1718 * Clamp io_offset and io_size to the incore EOF so that ondisk 1719 * file size updates in the ioend completion are byte-accurate. 1720 * This avoids recovering files with zeroed tail regions when 1721 * writeback races with appending writes: 1722 * 1723 * Thread 1: Thread 2: 1724 * ------------ ----------- 1725 * write [A, A+B] 1726 * update inode size to A+B 1727 * submit I/O [A, A+BS] 1728 * write [A+B, A+B+C] 1729 * update inode size to A+B+C 1730 * <I/O completes, updates disk size to min(A+B+C, A+BS)> 1731 * <power failure> 1732 * 1733 * After reboot: 1734 * 1) with A+B+C < A+BS, the file has zero padding in range 1735 * [A+B, A+B+C] 1736 * 1737 * |< Block Size (BS) >| 1738 * |DDDDDDDDDDDD0000000000000| 1739 * ^ ^ ^ 1740 * A A+B A+B+C 1741 * (EOF) 1742 * 1743 * 2) with A+B+C > A+BS, the file has zero padding in range 1744 * [A+B, A+BS] 1745 * 1746 * |< Block Size (BS) >|< Block Size (BS) >| 1747 * |DDDDDDDDDDDD0000000000000|00000000000000000000000000| 1748 * ^ ^ ^ ^ 1749 * A A+B A+BS A+B+C 1750 * (EOF) 1751 * 1752 * D = Valid Data 1753 * 0 = Zero Padding 1754 * 1755 * Note that this defeats the ability to chain the ioends of 1756 * appending writes. 1757 */ 1758 wpc->ioend->io_size += len; 1759 if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) 1760 wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; 1761 1762 wbc_account_cgroup_owner(wbc, folio, len); 1763 return 0; 1764 } 1765 1766 static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, 1767 struct writeback_control *wbc, struct folio *folio, 1768 struct inode *inode, u64 pos, u64 end_pos, 1769 unsigned dirty_len, unsigned *count) 1770 { 1771 int error; 1772 1773 do { 1774 unsigned map_len; 1775 1776 error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len); 1777 if (error) 1778 break; 1779 trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); 1780 1781 map_len = min_t(u64, dirty_len, 1782 wpc->iomap.offset + wpc->iomap.length - pos); 1783 WARN_ON_ONCE(!folio->private && map_len < dirty_len); 1784 1785 switch (wpc->iomap.type) { 1786 case IOMAP_INLINE: 1787 WARN_ON_ONCE(1); 1788 error = -EIO; 1789 break; 1790 case IOMAP_HOLE: 1791 break; 1792 default: 1793 error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, 1794 end_pos, map_len); 1795 if (!error) 1796 (*count)++; 1797 break; 1798 } 1799 dirty_len -= map_len; 1800 pos += map_len; 1801 } while (dirty_len && !error); 1802 1803 /* 1804 * We cannot cancel the ioend directly here on error. We may have 1805 * already set other pages under writeback and hence we have to run I/O 1806 * completion to mark the error state of the pages under writeback 1807 * appropriately. 1808 * 1809 * Just let the file system know what portion of the folio failed to 1810 * map. 1811 */ 1812 if (error && wpc->ops->discard_folio) 1813 wpc->ops->discard_folio(folio, pos); 1814 return error; 1815 } 1816 1817 /* 1818 * Check interaction of the folio with the file end. 1819 * 1820 * If the folio is entirely beyond i_size, return false. If it straddles 1821 * i_size, adjust end_pos and zero all data beyond i_size. 1822 */ 1823 static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, 1824 u64 *end_pos) 1825 { 1826 u64 isize = i_size_read(inode); 1827 1828 if (*end_pos > isize) { 1829 size_t poff = offset_in_folio(folio, isize); 1830 pgoff_t end_index = isize >> PAGE_SHIFT; 1831 1832 /* 1833 * If the folio is entirely ouside of i_size, skip it. 1834 * 1835 * This can happen due to a truncate operation that is in 1836 * progress and in that case truncate will finish it off once 1837 * we've dropped the folio lock. 1838 * 1839 * Note that the pgoff_t used for end_index is an unsigned long. 1840 * If the given offset is greater than 16TB on a 32-bit system, 1841 * then if we checked if the folio is fully outside i_size with 1842 * "if (folio->index >= end_index + 1)", "end_index + 1" would 1843 * overflow and evaluate to 0. Hence this folio would be 1844 * redirtied and written out repeatedly, which would result in 1845 * an infinite loop; the user program performing this operation 1846 * would hang. Instead, we can detect this situation by 1847 * checking if the folio is totally beyond i_size or if its 1848 * offset is just equal to the EOF. 1849 */ 1850 if (folio->index > end_index || 1851 (folio->index == end_index && poff == 0)) 1852 return false; 1853 1854 /* 1855 * The folio straddles i_size. 1856 * 1857 * It must be zeroed out on each and every writepage invocation 1858 * because it may be mmapped: 1859 * 1860 * A file is mapped in multiples of the page size. For a 1861 * file that is not a multiple of the page size, the 1862 * remaining memory is zeroed when mapped, and writes to that 1863 * region are not written out to the file. 1864 * 1865 * Also adjust the end_pos to the end of file and skip writeback 1866 * for all blocks entirely beyond i_size. 1867 */ 1868 folio_zero_segment(folio, poff, folio_size(folio)); 1869 *end_pos = isize; 1870 } 1871 1872 return true; 1873 } 1874 1875 static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, 1876 struct writeback_control *wbc, struct folio *folio) 1877 { 1878 struct iomap_folio_state *ifs = folio->private; 1879 struct inode *inode = folio->mapping->host; 1880 u64 pos = folio_pos(folio); 1881 u64 end_pos = pos + folio_size(folio); 1882 u64 end_aligned = 0; 1883 unsigned count = 0; 1884 int error = 0; 1885 u32 rlen; 1886 1887 WARN_ON_ONCE(!folio_test_locked(folio)); 1888 WARN_ON_ONCE(folio_test_dirty(folio)); 1889 WARN_ON_ONCE(folio_test_writeback(folio)); 1890 1891 trace_iomap_writepage(inode, pos, folio_size(folio)); 1892 1893 if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) { 1894 folio_unlock(folio); 1895 return 0; 1896 } 1897 WARN_ON_ONCE(end_pos <= pos); 1898 1899 if (i_blocks_per_folio(inode, folio) > 1) { 1900 if (!ifs) { 1901 ifs = ifs_alloc(inode, folio, 0); 1902 iomap_set_range_dirty(folio, 0, end_pos - pos); 1903 } 1904 1905 /* 1906 * Keep the I/O completion handler from clearing the writeback 1907 * bit until we have submitted all blocks by adding a bias to 1908 * ifs->write_bytes_pending, which is dropped after submitting 1909 * all blocks. 1910 */ 1911 WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); 1912 atomic_inc(&ifs->write_bytes_pending); 1913 } 1914 1915 /* 1916 * Set the writeback bit ASAP, as the I/O completion for the single 1917 * block per folio case happen hit as soon as we're submitting the bio. 1918 */ 1919 folio_start_writeback(folio); 1920 1921 /* 1922 * Walk through the folio to find dirty areas to write back. 1923 */ 1924 end_aligned = round_up(end_pos, i_blocksize(inode)); 1925 while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { 1926 error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, 1927 pos, end_pos, rlen, &count); 1928 if (error) 1929 break; 1930 pos += rlen; 1931 } 1932 1933 if (count) 1934 wpc->nr_folios++; 1935 1936 /* 1937 * We can have dirty bits set past end of file in page_mkwrite path 1938 * while mapping the last partial folio. Hence it's better to clear 1939 * all the dirty bits in the folio here. 1940 */ 1941 iomap_clear_range_dirty(folio, 0, folio_size(folio)); 1942 1943 /* 1944 * Usually the writeback bit is cleared by the I/O completion handler. 1945 * But we may end up either not actually writing any blocks, or (when 1946 * there are multiple blocks in a folio) all I/O might have finished 1947 * already at this point. In that case we need to clear the writeback 1948 * bit ourselves right after unlocking the page. 1949 */ 1950 folio_unlock(folio); 1951 if (ifs) { 1952 if (atomic_dec_and_test(&ifs->write_bytes_pending)) 1953 folio_end_writeback(folio); 1954 } else { 1955 if (!count) 1956 folio_end_writeback(folio); 1957 } 1958 mapping_set_error(inode->i_mapping, error); 1959 return error; 1960 } 1961 1962 int 1963 iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, 1964 struct iomap_writepage_ctx *wpc, 1965 const struct iomap_writeback_ops *ops) 1966 { 1967 struct folio *folio = NULL; 1968 int error; 1969 1970 /* 1971 * Writeback from reclaim context should never happen except in the case 1972 * of a VM regression so warn about it and refuse to write the data. 1973 */ 1974 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC | PF_KSWAPD)) == 1975 PF_MEMALLOC)) 1976 return -EIO; 1977 1978 wpc->ops = ops; 1979 while ((folio = writeback_iter(mapping, wbc, folio, &error))) 1980 error = iomap_writepage_map(wpc, wbc, folio); 1981 return iomap_submit_ioend(wpc, error); 1982 } 1983 EXPORT_SYMBOL_GPL(iomap_writepages); 1984