Lines Matching +full:i +full:- +full:cache +full:- +full:block +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2016-2025 Christoph Hellwig.
28 * Used for sub block zeroing in iomap_dio_zero()
38 loff_t size; member
62 if (dio->dops && dio->dops->bio_set) in iomap_dio_alloc_bio()
63 return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf, in iomap_dio_alloc_bio()
64 GFP_KERNEL, dio->dops->bio_set); in iomap_dio_alloc_bio()
65 return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL); in iomap_dio_alloc_bio()
71 struct kiocb *iocb = dio->iocb; in iomap_dio_submit_bio()
73 atomic_inc(&dio->ref); in iomap_dio_submit_bio()
76 if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) { in iomap_dio_submit_bio()
78 WRITE_ONCE(iocb->private, bio); in iomap_dio_submit_bio()
81 if (dio->dops && dio->dops->submit_io) { in iomap_dio_submit_bio()
82 dio->dops->submit_io(iter, bio, pos); in iomap_dio_submit_bio()
84 WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE); in iomap_dio_submit_bio()
91 const struct iomap_dio_ops *dops = dio->dops; in iomap_dio_complete()
92 struct kiocb *iocb = dio->iocb; in iomap_dio_complete()
93 loff_t offset = iocb->ki_pos; in iomap_dio_complete()
94 ssize_t ret = dio->error; in iomap_dio_complete()
96 if (dops && dops->end_io) in iomap_dio_complete()
97 ret = dops->end_io(iocb, dio->size, ret, dio->flags); in iomap_dio_complete()
100 ret = dio->size; in iomap_dio_complete()
102 if (offset + ret > dio->i_size && in iomap_dio_complete()
103 !(dio->flags & IOMAP_DIO_WRITE)) in iomap_dio_complete()
104 ret = dio->i_size - offset; in iomap_dio_complete()
109 * non-direct readahead, or faulted in by get_user_pages() if the source in iomap_dio_complete()
114 * And this page cache invalidation has to be after ->end_io(), as some in iomap_dio_complete()
116 * ->end_io() when necessary, otherwise a racing buffer read would cache in iomap_dio_complete()
119 if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) && in iomap_dio_complete()
120 !(dio->flags & IOMAP_DIO_NO_INVALIDATE)) in iomap_dio_complete()
121 kiocb_invalidate_post_direct_write(iocb, dio->size); in iomap_dio_complete()
123 inode_dio_end(file_inode(iocb->ki_filp)); in iomap_dio_complete()
126 iocb->ki_pos += ret; in iomap_dio_complete()
132 if (dio->flags & IOMAP_DIO_NEED_SYNC) in iomap_dio_complete()
135 ret += dio->done_before; in iomap_dio_complete()
137 trace_iomap_dio_complete(iocb, dio->error, ret); in iomap_dio_complete()
151 struct kiocb *iocb = dio->iocb; in iomap_dio_complete_work()
153 iocb->ki_complete(iocb, iomap_dio_complete(dio)); in iomap_dio_complete_work()
163 cmpxchg(&dio->error, 0, ret); in iomap_dio_set_error()
167 * Called when dio->ref reaches zero from an I/O completion.
171 struct kiocb *iocb = dio->iocb; in iomap_dio_done()
173 if (dio->wait_for_completion) { in iomap_dio_done()
175 * Synchronous I/O, task itself will handle any completion work in iomap_dio_done()
178 struct task_struct *waiter = dio->submit.waiter; in iomap_dio_done()
180 WRITE_ONCE(dio->submit.waiter, NULL); in iomap_dio_done()
182 } else if (dio->flags & IOMAP_DIO_INLINE_COMP) { in iomap_dio_done()
183 WRITE_ONCE(iocb->private, NULL); in iomap_dio_done()
184 iomap_dio_complete_work(&dio->aio.work); in iomap_dio_done()
185 } else if (dio->flags & IOMAP_DIO_CALLER_COMP) { in iomap_dio_done()
192 iocb->private = dio; in iomap_dio_done()
193 iocb->dio_complete = iomap_dio_deferred_complete; in iomap_dio_done()
196 * Invoke ->ki_complete() directly. We've assigned our in iomap_dio_done()
199 * notice ->dio_complete being set and will defer calling that in iomap_dio_done()
207 iocb->ki_complete(iocb, 0); in iomap_dio_done()
209 struct inode *inode = file_inode(iocb->ki_filp); in iomap_dio_done()
217 INIT_WORK(&dio->aio.work, iomap_dio_complete_work); in iomap_dio_done()
218 queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); in iomap_dio_done()
224 struct iomap_dio *dio = bio->bi_private; in iomap_dio_bio_end_io()
225 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); in iomap_dio_bio_end_io()
227 if (bio->bi_status) in iomap_dio_bio_end_io()
228 iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); in iomap_dio_bio_end_io()
230 if (atomic_dec_and_test(&dio->ref)) in iomap_dio_bio_end_io()
244 struct iomap_dio *dio = ioend->io_bio.bi_private; in iomap_finish_ioend_direct()
245 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); in iomap_finish_ioend_direct()
246 u32 vec_count = ioend->io_bio.bi_vcnt; in iomap_finish_ioend_direct()
248 if (ioend->io_error) in iomap_finish_ioend_direct()
249 iomap_dio_set_error(dio, ioend->io_error); in iomap_finish_ioend_direct()
251 if (atomic_dec_and_test(&dio->ref)) { in iomap_finish_ioend_direct()
256 * avoid deadlocks with buffered I/O completions. Tough luck if in iomap_finish_ioend_direct()
260 if (!dio->iocb->ki_filp->f_mapping->nrpages) { in iomap_finish_ioend_direct()
261 dio->flags |= IOMAP_DIO_INLINE_COMP; in iomap_finish_ioend_direct()
262 dio->flags |= IOMAP_DIO_NO_INVALIDATE; in iomap_finish_ioend_direct()
264 dio->flags &= ~IOMAP_DIO_CALLER_COMP; in iomap_finish_ioend_direct()
269 bio_check_pages_dirty(&ioend->io_bio); in iomap_finish_ioend_direct()
271 bio_release_pages(&ioend->io_bio, false); in iomap_finish_ioend_direct()
272 bio_put(&ioend->io_bio); in iomap_finish_ioend_direct()
276 * Return the number of bvecs completed as even direct I/O completions in iomap_finish_ioend_direct()
277 * do significant per-folio work and we'll still want to give up the in iomap_finish_ioend_direct()
286 struct inode *inode = file_inode(dio->iocb->ki_filp); in iomap_dio_zero()
292 * Max block size supported is 64k in iomap_dio_zero()
295 return -EINVAL; in iomap_dio_zero()
298 fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, in iomap_dio_zero()
300 bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); in iomap_dio_zero()
301 bio->bi_private = dio; in iomap_dio_zero()
302 bio->bi_end_io = iomap_dio_bio_end_io; in iomap_dio_zero()
310 * Use a FUA write if we need datasync semantics and this is a pure data I/O
311 * that doesn't require any metadata updates (including after I/O completion
313 * doesn't have a volatile write cache or supports FUA.
314 * This allows us to avoid cache flushes on I/O completion.
319 if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY)) in iomap_dio_can_use_fua()
321 if (!(dio->flags & IOMAP_DIO_WRITE_THROUGH)) in iomap_dio_can_use_fua()
323 return !bdev_write_cache(iomap->bdev) || bdev_fua(iomap->bdev); in iomap_dio_can_use_fua()
328 const struct iomap *iomap = &iter->iomap; in iomap_dio_bio_iter()
329 struct inode *inode = iter->inode; in iomap_dio_bio_iter()
332 loff_t pos = iter->pos; in iomap_dio_bio_iter()
340 if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1)) in iomap_dio_bio_iter()
341 return -EINVAL; in iomap_dio_bio_iter()
343 if (dio->flags & IOMAP_DIO_WRITE) { in iomap_dio_bio_iter()
346 if (iomap->flags & IOMAP_F_ATOMIC_BIO) { in iomap_dio_bio_iter()
352 if (length != iter->len) in iomap_dio_bio_iter()
353 return -EINVAL; in iomap_dio_bio_iter()
357 if (iomap->type == IOMAP_UNWRITTEN) { in iomap_dio_bio_iter()
358 dio->flags |= IOMAP_DIO_UNWRITTEN; in iomap_dio_bio_iter()
362 if (iomap->flags & IOMAP_F_SHARED) in iomap_dio_bio_iter()
363 dio->flags |= IOMAP_DIO_COW; in iomap_dio_bio_iter()
365 if (iomap->flags & IOMAP_F_NEW) in iomap_dio_bio_iter()
367 else if (iomap->type == IOMAP_MAPPED && in iomap_dio_bio_iter()
372 dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; in iomap_dio_bio_iter()
376 * don't require additional I/O at completion time. in iomap_dio_bio_iter()
379 * extend the file size, or issue metadata I/O or cache flushes in iomap_dio_bio_iter()
383 ((dio->flags & IOMAP_DIO_NEED_SYNC) && in iomap_dio_bio_iter()
385 dio->flags &= ~IOMAP_DIO_CALLER_COMP; in iomap_dio_bio_iter()
392 * are operating on right now. The iter will be re-expanded once in iomap_dio_bio_iter()
395 orig_count = iov_iter_count(dio->submit.iter); in iomap_dio_bio_iter()
396 iov_iter_truncate(dio->submit.iter, length); in iomap_dio_bio_iter()
398 if (!iov_iter_count(dio->submit.iter)) in iomap_dio_bio_iter()
406 if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP))) in iomap_dio_bio_iter()
407 dio->iocb->ki_flags &= ~IOCB_HIPRI; in iomap_dio_bio_iter()
410 /* zero out from the start of the block to the write offset */ in iomap_dio_bio_iter()
411 pad = pos & (fs_block_size - 1); in iomap_dio_bio_iter()
413 ret = iomap_dio_zero(iter, dio, pos - pad, pad); in iomap_dio_bio_iter()
418 nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); in iomap_dio_bio_iter()
421 if (dio->error) { in iomap_dio_bio_iter()
422 iov_iter_revert(dio->submit.iter, copied); in iomap_dio_bio_iter()
428 fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, in iomap_dio_bio_iter()
430 bio->bi_iter.bi_sector = iomap_sector(iomap, pos); in iomap_dio_bio_iter()
431 bio->bi_write_hint = inode->i_write_hint; in iomap_dio_bio_iter()
432 bio->bi_ioprio = dio->iocb->ki_ioprio; in iomap_dio_bio_iter()
433 bio->bi_private = dio; in iomap_dio_bio_iter()
434 bio->bi_end_io = iomap_dio_bio_end_io; in iomap_dio_bio_iter()
436 ret = bio_iov_iter_get_pages(bio, dio->submit.iter, in iomap_dio_bio_iter()
437 bdev_logical_block_size(iomap->bdev) - 1); in iomap_dio_bio_iter()
441 * through to the sub-block tail zeroing here, otherwise in iomap_dio_bio_iter()
443 * the block we haven't written data to. in iomap_dio_bio_iter()
449 n = bio->bi_iter.bi_size; in iomap_dio_bio_iter()
454 * the tail (complete FS block), similar to when in iomap_dio_bio_iter()
457 ret = -EINVAL; in iomap_dio_bio_iter()
461 if (dio->flags & IOMAP_DIO_WRITE) in iomap_dio_bio_iter()
463 else if (dio->flags & IOMAP_DIO_DIRTY) in iomap_dio_bio_iter()
466 dio->size += n; in iomap_dio_bio_iter()
469 nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, in iomap_dio_bio_iter()
472 * We can only poll for single bio I/Os. in iomap_dio_bio_iter()
475 dio->iocb->ki_flags &= ~IOCB_HIPRI; in iomap_dio_bio_iter()
481 * We need to zeroout the tail of a sub-block write if the extent type in iomap_dio_bio_iter()
483 * the block tail in the latter case, we can expose stale data via mmap in iomap_dio_bio_iter()
484 * reads of the EOF block. in iomap_dio_bio_iter()
488 ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) { in iomap_dio_bio_iter()
489 /* zero out from the end of the write to the end of the block */ in iomap_dio_bio_iter()
490 pad = pos & (fs_block_size - 1); in iomap_dio_bio_iter()
493 fs_block_size - pad); in iomap_dio_bio_iter()
497 iov_iter_reexpand(dio->submit.iter, orig_count - copied); in iomap_dio_bio_iter()
505 loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter); in iomap_dio_hole_iter()
507 dio->size += length; in iomap_dio_hole_iter()
509 return -EFAULT; in iomap_dio_hole_iter()
515 const struct iomap *iomap = &iomi->iomap; in iomap_dio_inline_iter()
516 struct iov_iter *iter = dio->submit.iter; in iomap_dio_inline_iter()
517 void *inline_data = iomap_inline_data(iomap, iomi->pos); in iomap_dio_inline_iter()
519 loff_t pos = iomi->pos; in iomap_dio_inline_iter()
523 return -EIO; in iomap_dio_inline_iter()
526 return -EIO; in iomap_dio_inline_iter()
528 if (dio->flags & IOMAP_DIO_WRITE) { in iomap_dio_inline_iter()
529 loff_t size = iomi->inode->i_size; in iomap_dio_inline_iter() local
531 if (pos > size) in iomap_dio_inline_iter()
532 memset(iomap_inline_data(iomap, size), 0, pos - size); in iomap_dio_inline_iter()
535 if (pos + copied > size) in iomap_dio_inline_iter()
536 i_size_write(iomi->inode, pos + copied); in iomap_dio_inline_iter()
537 mark_inode_dirty(iomi->inode); in iomap_dio_inline_iter()
542 dio->size += copied; in iomap_dio_inline_iter()
544 return -EFAULT; in iomap_dio_inline_iter()
550 switch (iter->iomap.type) { in iomap_dio_iter()
552 if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) in iomap_dio_iter()
553 return -EIO; in iomap_dio_iter()
556 if (!(dio->flags & IOMAP_DIO_WRITE)) in iomap_dio_iter()
568 * DELALLOC block that the page-mkwrite allocated. in iomap_dio_iter()
570 pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n", in iomap_dio_iter()
571 dio->iocb->ki_filp, current->comm); in iomap_dio_iter()
572 return -EIO; in iomap_dio_iter()
575 return -EIO; in iomap_dio_iter()
589 * __iomap_dio_rw can return a partial result if it encounters a non-resident
590 * page in @iter after preparing a transfer. In that case, the non-resident
596 * Returns -ENOTBLK In case of a page invalidation invalidation failure for
597 * writes. The callers needs to fall back to buffered I/O in this case.
604 struct inode *inode = file_inode(iocb->ki_filp); in __iomap_dio_rw()
607 .pos = iocb->ki_pos, in __iomap_dio_rw()
625 return ERR_PTR(-ENOMEM); in __iomap_dio_rw()
627 dio->iocb = iocb; in __iomap_dio_rw()
628 atomic_set(&dio->ref, 1); in __iomap_dio_rw()
629 dio->size = 0; in __iomap_dio_rw()
630 dio->i_size = i_size_read(inode); in __iomap_dio_rw()
631 dio->dops = dops; in __iomap_dio_rw()
632 dio->error = 0; in __iomap_dio_rw()
633 dio->flags = 0; in __iomap_dio_rw()
634 dio->done_before = done_before; in __iomap_dio_rw()
636 dio->submit.iter = iter; in __iomap_dio_rw()
637 dio->submit.waiter = current; in __iomap_dio_rw()
639 if (iocb->ki_flags & IOCB_NOWAIT) in __iomap_dio_rw()
644 dio->flags |= IOMAP_DIO_INLINE_COMP; in __iomap_dio_rw()
646 if (iomi.pos >= dio->i_size) in __iomap_dio_rw()
650 dio->flags |= IOMAP_DIO_DIRTY; in __iomap_dio_rw()
657 dio->flags |= IOMAP_DIO_WRITE; in __iomap_dio_rw()
665 if (iocb->ki_flags & IOCB_DIO_CALLER_COMP) in __iomap_dio_rw()
666 dio->flags |= IOMAP_DIO_CALLER_COMP; in __iomap_dio_rw()
669 ret = -EAGAIN; in __iomap_dio_rw()
670 if (iomi.pos >= dio->i_size || in __iomap_dio_rw()
671 iomi.pos + iomi.len > dio->i_size) in __iomap_dio_rw()
676 if (iocb->ki_flags & IOCB_ATOMIC) in __iomap_dio_rw()
681 dio->flags |= IOMAP_DIO_NEED_SYNC; in __iomap_dio_rw()
686 * FUA writes through the device's write cache, or a in __iomap_dio_rw()
688 * cache. For the former, Any non-FUA write that occurs in __iomap_dio_rw()
690 * whether a cache flush is necessary. in __iomap_dio_rw()
692 if (!(iocb->ki_flags & IOCB_SYNC)) in __iomap_dio_rw()
693 dio->flags |= IOMAP_DIO_WRITE_THROUGH; in __iomap_dio_rw()
697 * Try to invalidate cache pages for the range we are writing. in __iomap_dio_rw()
699 * buffered I/O. in __iomap_dio_rw()
703 if (ret != -EAGAIN) { in __iomap_dio_rw()
706 if (iocb->ki_flags & IOCB_ATOMIC) { in __iomap_dio_rw()
712 ret = -EAGAIN; in __iomap_dio_rw()
715 ret = -ENOTBLK; in __iomap_dio_rw()
721 if (!wait_for_completion && !inode->i_sb->s_dio_done_wq) { in __iomap_dio_rw()
722 ret = sb_init_dio_done_wq(inode->i_sb); in __iomap_dio_rw()
735 * We can only poll for single bio I/Os. in __iomap_dio_rw()
737 iocb->ki_flags &= ~IOCB_HIPRI; in __iomap_dio_rw()
747 if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size) in __iomap_dio_rw()
748 iov_iter_revert(iter, iomi.pos - dio->i_size); in __iomap_dio_rw()
750 if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) { in __iomap_dio_rw()
751 if (!(iocb->ki_flags & IOCB_NOWAIT)) in __iomap_dio_rw()
756 /* magic error code to fall back to buffered I/O */ in __iomap_dio_rw()
757 if (ret == -ENOTBLK) { in __iomap_dio_rw()
766 * media, we don't need to flush the cache on IO completion. Clear the in __iomap_dio_rw()
769 if (dio->flags & IOMAP_DIO_WRITE_THROUGH) in __iomap_dio_rw()
770 dio->flags &= ~IOMAP_DIO_NEED_SYNC; in __iomap_dio_rw()
781 * I/O completion handler will complete and free it. in __iomap_dio_rw()
783 * iocb, the I/O completion handler will wake us up on the drop in __iomap_dio_rw()
785 * after we got woken by the I/O completion handler. in __iomap_dio_rw()
787 dio->wait_for_completion = wait_for_completion; in __iomap_dio_rw()
788 if (!atomic_dec_and_test(&dio->ref)) { in __iomap_dio_rw()
791 return ERR_PTR(-EIOCBQUEUED); in __iomap_dio_rw()
796 if (!READ_ONCE(dio->submit.waiter)) in __iomap_dio_rw()
835 return -ENOMEM; in iomap_dio_init()