11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 607a1006aSKent Overstreet #include "bkey_buf.h" 71c6fdbd8SKent Overstreet #include "btree_update.h" 81c6fdbd8SKent Overstreet #include "buckets.h" 91c6fdbd8SKent Overstreet #include "clock.h" 101c6fdbd8SKent Overstreet #include "error.h" 11e2d9912cSKent Overstreet #include "extents.h" 1208c07feaSKent Overstreet #include "extent_update.h" 131c6fdbd8SKent Overstreet #include "fs.h" 141c6fdbd8SKent Overstreet #include "fs-io.h" 151c6fdbd8SKent Overstreet #include "fsck.h" 161c6fdbd8SKent Overstreet #include "inode.h" 171c6fdbd8SKent Overstreet #include "journal.h" 181c6fdbd8SKent Overstreet #include "io.h" 191c6fdbd8SKent Overstreet #include "keylist.h" 201c6fdbd8SKent Overstreet #include "quota.h" 2176426098SKent Overstreet #include "reflink.h" 221c6fdbd8SKent Overstreet #include "trace.h" 231c6fdbd8SKent Overstreet 241c6fdbd8SKent Overstreet #include <linux/aio.h> 251c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 261c6fdbd8SKent Overstreet #include <linux/falloc.h> 271c6fdbd8SKent Overstreet #include <linux/migrate.h> 281c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 291c6fdbd8SKent Overstreet #include <linux/pagevec.h> 309ba2eb25SKent Overstreet #include <linux/rmap.h> 311c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 321c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 331c6fdbd8SKent Overstreet #include <linux/uio.h> 341c6fdbd8SKent Overstreet #include <linux/writeback.h> 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 371c6fdbd8SKent Overstreet 389567413cSKent Overstreet struct folio_vec { 399567413cSKent Overstreet struct folio *fv_folio; 409567413cSKent Overstreet size_t fv_offset; 419567413cSKent Overstreet size_t fv_len; 429567413cSKent Overstreet }; 439567413cSKent Overstreet 449567413cSKent Overstreet static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv) 459567413cSKent Overstreet { 469567413cSKent Overstreet 479567413cSKent Overstreet struct folio *folio = page_folio(bv.bv_page); 489567413cSKent Overstreet size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) + 499567413cSKent Overstreet bv.bv_offset; 509567413cSKent Overstreet size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len); 519567413cSKent Overstreet 529567413cSKent Overstreet return (struct folio_vec) { 539567413cSKent Overstreet .fv_folio = folio, 549567413cSKent Overstreet .fv_offset = offset, 559567413cSKent Overstreet .fv_len = len, 569567413cSKent Overstreet }; 579567413cSKent Overstreet } 589567413cSKent Overstreet 599567413cSKent Overstreet static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio, 609567413cSKent Overstreet struct bvec_iter iter) 619567413cSKent Overstreet { 629567413cSKent Overstreet return biovec_to_foliovec(bio_iter_iovec(bio, iter)); 639567413cSKent Overstreet } 649567413cSKent Overstreet 659567413cSKent Overstreet #define __bio_for_each_folio(bvl, bio, iter, start) \ 669567413cSKent Overstreet for (iter = (start); \ 679567413cSKent Overstreet (iter).bi_size && \ 689567413cSKent Overstreet ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \ 699567413cSKent Overstreet bio_advance_iter_single((bio), &(iter), (bvl).fv_len)) 709567413cSKent Overstreet 719567413cSKent Overstreet /** 729567413cSKent Overstreet * bio_for_each_folio - iterate over folios within a bio 739567413cSKent Overstreet * 749567413cSKent Overstreet * Like other non-_all versions, this iterates over what bio->bi_iter currently 759567413cSKent Overstreet * points to. This version is for drivers, where the bio may have previously 769567413cSKent Overstreet * been split or cloned. 779567413cSKent Overstreet */ 789567413cSKent Overstreet #define bio_for_each_folio(bvl, bio, iter) \ 799567413cSKent Overstreet __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter) 809567413cSKent Overstreet 816b9857b2SBrian Foster /* 826b9857b2SBrian Foster * Use u64 for the end pos and sector helpers because if the folio covers the 836b9857b2SBrian Foster * max supported range of the mapping, the start offset of the next folio 846b9857b2SBrian Foster * overflows loff_t. This breaks much of the range based processing in the 856b9857b2SBrian Foster * buffered write path. 866b9857b2SBrian Foster */ 876b9857b2SBrian Foster static inline u64 folio_end_pos(struct folio *folio) 8830bff594SKent Overstreet { 8930bff594SKent Overstreet return folio_pos(folio) + folio_size(folio); 9030bff594SKent Overstreet } 9130bff594SKent Overstreet 9230bff594SKent Overstreet static inline size_t folio_sectors(struct folio *folio) 9330bff594SKent Overstreet { 9430bff594SKent Overstreet return PAGE_SECTORS << folio_order(folio); 9530bff594SKent Overstreet } 9630bff594SKent Overstreet 9730bff594SKent Overstreet static inline loff_t folio_sector(struct folio *folio) 9830bff594SKent Overstreet { 9930bff594SKent Overstreet return folio_pos(folio) >> 9; 10030bff594SKent Overstreet } 10130bff594SKent Overstreet 1026b9857b2SBrian Foster static inline u64 folio_end_sector(struct folio *folio) 10330bff594SKent Overstreet { 10430bff594SKent Overstreet return folio_end_pos(folio) >> 9; 10530bff594SKent Overstreet } 10630bff594SKent Overstreet 10740022c01SKent Overstreet typedef DARRAY(struct folio *) folios; 10840022c01SKent Overstreet 10940022c01SKent Overstreet static int filemap_get_contig_folios_d(struct address_space *mapping, 1106b9857b2SBrian Foster loff_t start, u64 end, 11140022c01SKent Overstreet int fgp_flags, gfp_t gfp, 11240022c01SKent Overstreet folios *folios) 11340022c01SKent Overstreet { 11440022c01SKent Overstreet struct folio *f; 1156b9857b2SBrian Foster u64 pos = start; 11640022c01SKent Overstreet int ret = 0; 11740022c01SKent Overstreet 11840022c01SKent Overstreet while (pos < end) { 11940022c01SKent Overstreet if ((u64) pos >= (u64) start + (1ULL << 20)) 12040022c01SKent Overstreet fgp_flags &= ~FGP_CREAT; 12140022c01SKent Overstreet 12240022c01SKent Overstreet ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL); 12340022c01SKent Overstreet if (ret) 12440022c01SKent Overstreet break; 12540022c01SKent Overstreet 12640022c01SKent Overstreet f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); 127b6898917SKent Overstreet if (IS_ERR_OR_NULL(f)) 12840022c01SKent Overstreet break; 12940022c01SKent Overstreet 13040022c01SKent Overstreet BUG_ON(folios->nr && folio_pos(f) != pos); 13140022c01SKent Overstreet 13240022c01SKent Overstreet pos = folio_end_pos(f); 13340022c01SKent Overstreet darray_push(folios, f); 13440022c01SKent Overstreet } 13540022c01SKent Overstreet 13640022c01SKent Overstreet if (!folios->nr && !ret && (fgp_flags & FGP_CREAT)) 13740022c01SKent Overstreet ret = -ENOMEM; 13840022c01SKent Overstreet 13940022c01SKent Overstreet return folios->nr ? 0 : ret; 14040022c01SKent Overstreet } 14140022c01SKent Overstreet 142a8b3a677SKent Overstreet struct nocow_flush { 143a8b3a677SKent Overstreet struct closure *cl; 144a8b3a677SKent Overstreet struct bch_dev *ca; 145a8b3a677SKent Overstreet struct bio bio; 146a8b3a677SKent Overstreet }; 147a8b3a677SKent Overstreet 148a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 149a8b3a677SKent Overstreet { 150a8b3a677SKent Overstreet 151a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 152a8b3a677SKent Overstreet 153a8b3a677SKent Overstreet closure_put(bio->cl); 154a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 155a8b3a677SKent Overstreet bio_put(&bio->bio); 156a8b3a677SKent Overstreet } 157a8b3a677SKent Overstreet 158a8b3a677SKent Overstreet static void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 159a8b3a677SKent Overstreet struct bch_inode_info *inode, 160a8b3a677SKent Overstreet struct closure *cl) 161a8b3a677SKent Overstreet { 162a8b3a677SKent Overstreet struct nocow_flush *bio; 163a8b3a677SKent Overstreet struct bch_dev *ca; 164a8b3a677SKent Overstreet struct bch_devs_mask devs; 165a8b3a677SKent Overstreet unsigned dev; 166a8b3a677SKent Overstreet 167a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 168a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 169a8b3a677SKent Overstreet return; 170a8b3a677SKent Overstreet 171a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 172a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 173a8b3a677SKent Overstreet 174a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 175a8b3a677SKent Overstreet rcu_read_lock(); 176a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 177a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 178a8b3a677SKent Overstreet ca = NULL; 179a8b3a677SKent Overstreet rcu_read_unlock(); 180a8b3a677SKent Overstreet 181a8b3a677SKent Overstreet if (!ca) 182a8b3a677SKent Overstreet continue; 183a8b3a677SKent Overstreet 184a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 185a8b3a677SKent Overstreet REQ_OP_FLUSH, 186a8b3a677SKent Overstreet GFP_KERNEL, 187a8b3a677SKent Overstreet &c->nocow_flush_bioset), 188a8b3a677SKent Overstreet struct nocow_flush, bio); 189a8b3a677SKent Overstreet bio->cl = cl; 190a8b3a677SKent Overstreet bio->ca = ca; 191a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 192a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 193a8b3a677SKent Overstreet } 194a8b3a677SKent Overstreet } 195a8b3a677SKent Overstreet 196a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 197a8b3a677SKent Overstreet struct bch_inode_info *inode) 198a8b3a677SKent Overstreet { 199a8b3a677SKent Overstreet struct closure cl; 200a8b3a677SKent Overstreet 201a8b3a677SKent Overstreet closure_init_stack(&cl); 202a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 203a8b3a677SKent Overstreet closure_sync(&cl); 204a8b3a677SKent Overstreet 205a8b3a677SKent Overstreet return 0; 206a8b3a677SKent Overstreet } 207a8b3a677SKent Overstreet 2087f5e31e1SKent Overstreet static inline bool bio_full(struct bio *bio, unsigned len) 2097f5e31e1SKent Overstreet { 2107f5e31e1SKent Overstreet if (bio->bi_vcnt >= bio->bi_max_vecs) 2117f5e31e1SKent Overstreet return true; 2127f5e31e1SKent Overstreet if (bio->bi_iter.bi_size > UINT_MAX - len) 2137f5e31e1SKent Overstreet return true; 2147f5e31e1SKent Overstreet return false; 2157f5e31e1SKent Overstreet } 2167f5e31e1SKent Overstreet 217eb8e6e9cSKent Overstreet static inline struct address_space *faults_disabled_mapping(void) 218eb8e6e9cSKent Overstreet { 219eb8e6e9cSKent Overstreet return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); 220eb8e6e9cSKent Overstreet } 221eb8e6e9cSKent Overstreet 222eb8e6e9cSKent Overstreet static inline void set_fdm_dropped_locks(void) 223eb8e6e9cSKent Overstreet { 224eb8e6e9cSKent Overstreet current->faults_disabled_mapping = 225eb8e6e9cSKent Overstreet (void *) (((unsigned long) current->faults_disabled_mapping)|1); 226eb8e6e9cSKent Overstreet } 227eb8e6e9cSKent Overstreet 228eb8e6e9cSKent Overstreet static inline bool fdm_dropped_locks(void) 229eb8e6e9cSKent Overstreet { 230eb8e6e9cSKent Overstreet return ((unsigned long) current->faults_disabled_mapping) & 1; 231eb8e6e9cSKent Overstreet } 232eb8e6e9cSKent Overstreet 2331c6fdbd8SKent Overstreet struct quota_res { 2341c6fdbd8SKent Overstreet u64 sectors; 2351c6fdbd8SKent Overstreet }; 2361c6fdbd8SKent Overstreet 2379a3df993SKent Overstreet struct bch_writepage_io { 2381c6fdbd8SKent Overstreet struct bch_inode_info *inode; 2391c6fdbd8SKent Overstreet 2401c6fdbd8SKent Overstreet /* must be last: */ 2411c6fdbd8SKent Overstreet struct bch_write_op op; 2421c6fdbd8SKent Overstreet }; 2431c6fdbd8SKent Overstreet 2441c6fdbd8SKent Overstreet struct dio_write { 2451c6fdbd8SKent Overstreet struct kiocb *req; 246182c7bbfSKent Overstreet struct address_space *mapping; 247182c7bbfSKent Overstreet struct bch_inode_info *inode; 248ed484030SKent Overstreet struct mm_struct *mm; 2491c6fdbd8SKent Overstreet unsigned loop:1, 2506b1b186aSKent Overstreet extending:1, 2511c6fdbd8SKent Overstreet sync:1, 252a1ee777bSKent Overstreet flush:1, 2531c6fdbd8SKent Overstreet free_iov:1; 2541c6fdbd8SKent Overstreet struct quota_res quota_res; 255042a1f26SKent Overstreet u64 written; 2561c6fdbd8SKent Overstreet 2571c6fdbd8SKent Overstreet struct iov_iter iter; 2581c6fdbd8SKent Overstreet struct iovec inline_vecs[2]; 2591c6fdbd8SKent Overstreet 2601c6fdbd8SKent Overstreet /* must be last: */ 2619a3df993SKent Overstreet struct bch_write_op op; 2621c6fdbd8SKent Overstreet }; 2631c6fdbd8SKent Overstreet 2641c6fdbd8SKent Overstreet struct dio_read { 2651c6fdbd8SKent Overstreet struct closure cl; 2661c6fdbd8SKent Overstreet struct kiocb *req; 2671c6fdbd8SKent Overstreet long ret; 268b4725cc1SKent Overstreet bool should_dirty; 2691c6fdbd8SKent Overstreet struct bch_read_bio rbio; 2701c6fdbd8SKent Overstreet }; 2711c6fdbd8SKent Overstreet 2721c6fdbd8SKent Overstreet /* pagecache_block must be held */ 273a023127aSKent Overstreet static noinline int write_invalidate_inode_pages_range(struct address_space *mapping, 2741c6fdbd8SKent Overstreet loff_t start, loff_t end) 2751c6fdbd8SKent Overstreet { 2761c6fdbd8SKent Overstreet int ret; 2771c6fdbd8SKent Overstreet 2781c6fdbd8SKent Overstreet /* 2791c6fdbd8SKent Overstreet * XXX: the way this is currently implemented, we can spin if a process 2801c6fdbd8SKent Overstreet * is continually redirtying a specific page 2811c6fdbd8SKent Overstreet */ 2821c6fdbd8SKent Overstreet do { 2831c6fdbd8SKent Overstreet if (!mapping->nrpages) 2841c6fdbd8SKent Overstreet return 0; 2851c6fdbd8SKent Overstreet 2861c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, start, end); 2871c6fdbd8SKent Overstreet if (ret) 2881c6fdbd8SKent Overstreet break; 2891c6fdbd8SKent Overstreet 2901c6fdbd8SKent Overstreet if (!mapping->nrpages) 2911c6fdbd8SKent Overstreet return 0; 2921c6fdbd8SKent Overstreet 2931c6fdbd8SKent Overstreet ret = invalidate_inode_pages2_range(mapping, 2941c6fdbd8SKent Overstreet start >> PAGE_SHIFT, 2951c6fdbd8SKent Overstreet end >> PAGE_SHIFT); 2961c6fdbd8SKent Overstreet } while (ret == -EBUSY); 2971c6fdbd8SKent Overstreet 2981c6fdbd8SKent Overstreet return ret; 2991c6fdbd8SKent Overstreet } 3001c6fdbd8SKent Overstreet 3011c6fdbd8SKent Overstreet /* quotas */ 3021c6fdbd8SKent Overstreet 3031c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 3041c6fdbd8SKent Overstreet 3056b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 3061c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3071c6fdbd8SKent Overstreet struct quota_res *res) 3081c6fdbd8SKent Overstreet { 3091c6fdbd8SKent Overstreet BUG_ON(res->sectors > inode->ei_quota_reserved); 3101c6fdbd8SKent Overstreet 3111c6fdbd8SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, 31226609b61SKent Overstreet -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); 3131c6fdbd8SKent Overstreet inode->ei_quota_reserved -= res->sectors; 3141c6fdbd8SKent Overstreet res->sectors = 0; 3151c6fdbd8SKent Overstreet } 3161c6fdbd8SKent Overstreet 3176b1b186aSKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 3186b1b186aSKent Overstreet struct bch_inode_info *inode, 3196b1b186aSKent Overstreet struct quota_res *res) 3206b1b186aSKent Overstreet { 3216b1b186aSKent Overstreet if (res->sectors) { 3226b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 3236b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, res); 3246b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3256b1b186aSKent Overstreet } 3266b1b186aSKent Overstreet } 3276b1b186aSKent Overstreet 3281c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 3291c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3301c6fdbd8SKent Overstreet struct quota_res *res, 331e8540e56SKent Overstreet u64 sectors, 3321c6fdbd8SKent Overstreet bool check_enospc) 3331c6fdbd8SKent Overstreet { 3341c6fdbd8SKent Overstreet int ret; 3351c6fdbd8SKent Overstreet 336cb1b479dSKent Overstreet if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) 337cb1b479dSKent Overstreet return 0; 338cb1b479dSKent Overstreet 3391c6fdbd8SKent Overstreet mutex_lock(&inode->ei_quota_lock); 3401c6fdbd8SKent Overstreet ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, 34126609b61SKent Overstreet check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); 3421c6fdbd8SKent Overstreet if (likely(!ret)) { 3431c6fdbd8SKent Overstreet inode->ei_quota_reserved += sectors; 3441c6fdbd8SKent Overstreet res->sectors += sectors; 3451c6fdbd8SKent Overstreet } 3461c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3471c6fdbd8SKent Overstreet 3481c6fdbd8SKent Overstreet return ret; 3491c6fdbd8SKent Overstreet } 3501c6fdbd8SKent Overstreet 3511c6fdbd8SKent Overstreet #else 3521c6fdbd8SKent Overstreet 3536b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 3546b1b186aSKent Overstreet struct bch_inode_info *inode, 3556b1b186aSKent Overstreet struct quota_res *res) {} 3566b1b186aSKent Overstreet 3571c6fdbd8SKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 3581c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3596b1b186aSKent Overstreet struct quota_res *res) {} 3601c6fdbd8SKent Overstreet 3611c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 3621c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3631c6fdbd8SKent Overstreet struct quota_res *res, 3641c6fdbd8SKent Overstreet unsigned sectors, 3651c6fdbd8SKent Overstreet bool check_enospc) 3661c6fdbd8SKent Overstreet { 3671c6fdbd8SKent Overstreet return 0; 3681c6fdbd8SKent Overstreet } 3691c6fdbd8SKent Overstreet 3701c6fdbd8SKent Overstreet #endif 3711c6fdbd8SKent Overstreet 3721c6fdbd8SKent Overstreet /* i_size updates: */ 3731c6fdbd8SKent Overstreet 3742ea90048SKent Overstreet struct inode_new_size { 3752ea90048SKent Overstreet loff_t new_size; 3762ea90048SKent Overstreet u64 now; 3772ea90048SKent Overstreet unsigned fields; 3782ea90048SKent Overstreet }; 3792ea90048SKent Overstreet 3801c6fdbd8SKent Overstreet static int inode_set_size(struct bch_inode_info *inode, 3811c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 3821c6fdbd8SKent Overstreet void *p) 3831c6fdbd8SKent Overstreet { 3842ea90048SKent Overstreet struct inode_new_size *s = p; 3851c6fdbd8SKent Overstreet 3862ea90048SKent Overstreet bi->bi_size = s->new_size; 3872ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 3882ea90048SKent Overstreet bi->bi_atime = s->now; 3892ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 3902ea90048SKent Overstreet bi->bi_mtime = s->now; 3912ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 3922ea90048SKent Overstreet bi->bi_ctime = s->now; 3931c6fdbd8SKent Overstreet 3941c6fdbd8SKent Overstreet return 0; 3951c6fdbd8SKent Overstreet } 3961c6fdbd8SKent Overstreet 39776426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 3981c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3992ea90048SKent Overstreet loff_t new_size, unsigned fields) 4001c6fdbd8SKent Overstreet { 4012ea90048SKent Overstreet struct inode_new_size s = { 4022ea90048SKent Overstreet .new_size = new_size, 4032ea90048SKent Overstreet .now = bch2_current_time(c), 4042ea90048SKent Overstreet .fields = fields, 4052ea90048SKent Overstreet }; 4062ea90048SKent Overstreet 4072ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 4081c6fdbd8SKent Overstreet } 4091c6fdbd8SKent Overstreet 4106b1b186aSKent Overstreet static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 411190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 4121c6fdbd8SKent Overstreet { 413b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 414b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 415b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 416b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 417b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 418b44a66a6SKent Overstreet 4191c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 420cb1b479dSKent Overstreet if (quota_res && 421cb1b479dSKent Overstreet !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && 422cb1b479dSKent Overstreet sectors > 0) { 4231c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 4241c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 4251c6fdbd8SKent Overstreet 4261c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 4271c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 4281c6fdbd8SKent Overstreet } else { 42926609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 4301c6fdbd8SKent Overstreet } 4311c6fdbd8SKent Overstreet #endif 4326b1b186aSKent Overstreet } 4336b1b186aSKent Overstreet 4346b1b186aSKent Overstreet static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 4356b1b186aSKent Overstreet struct quota_res *quota_res, s64 sectors) 4366b1b186aSKent Overstreet { 4376b1b186aSKent Overstreet if (sectors) { 4386b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 4396b1b186aSKent Overstreet __i_sectors_acct(c, inode, quota_res, sectors); 4401c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 4411c6fdbd8SKent Overstreet } 4426b1b186aSKent Overstreet } 4431c6fdbd8SKent Overstreet 4441c6fdbd8SKent Overstreet /* page state: */ 4451c6fdbd8SKent Overstreet 4461c6fdbd8SKent Overstreet /* stored in page->private: */ 4471c6fdbd8SKent Overstreet 448a1774a05SKent Overstreet #define BCH_FOLIO_SECTOR_STATE() \ 449a1774a05SKent Overstreet x(unallocated) \ 450a1774a05SKent Overstreet x(reserved) \ 451a1774a05SKent Overstreet x(dirty) \ 452a1774a05SKent Overstreet x(dirty_reserved) \ 453a1774a05SKent Overstreet x(allocated) 454a1774a05SKent Overstreet 455a1774a05SKent Overstreet enum bch_folio_sector_state { 456a1774a05SKent Overstreet #define x(n) SECTOR_##n, 457a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 458a1774a05SKent Overstreet #undef x 459a1774a05SKent Overstreet }; 460a1774a05SKent Overstreet 461*73bd774dSKent Overstreet static const char * const bch2_folio_sector_states[] = { 462a1774a05SKent Overstreet #define x(n) #n, 463a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 464a1774a05SKent Overstreet #undef x 465a1774a05SKent Overstreet NULL 466a1774a05SKent Overstreet }; 467a1774a05SKent Overstreet 468a1774a05SKent Overstreet static inline enum bch_folio_sector_state 469a1774a05SKent Overstreet folio_sector_dirty(enum bch_folio_sector_state state) 470a1774a05SKent Overstreet { 471a1774a05SKent Overstreet switch (state) { 472a1774a05SKent Overstreet case SECTOR_unallocated: 473a1774a05SKent Overstreet return SECTOR_dirty; 474a1774a05SKent Overstreet case SECTOR_reserved: 475a1774a05SKent Overstreet return SECTOR_dirty_reserved; 476a1774a05SKent Overstreet default: 477a1774a05SKent Overstreet return state; 478a1774a05SKent Overstreet } 479a1774a05SKent Overstreet } 480a1774a05SKent Overstreet 481a1774a05SKent Overstreet static inline enum bch_folio_sector_state 482a1774a05SKent Overstreet folio_sector_undirty(enum bch_folio_sector_state state) 483a1774a05SKent Overstreet { 484a1774a05SKent Overstreet switch (state) { 485a1774a05SKent Overstreet case SECTOR_dirty: 486a1774a05SKent Overstreet return SECTOR_unallocated; 487a1774a05SKent Overstreet case SECTOR_dirty_reserved: 488a1774a05SKent Overstreet return SECTOR_reserved; 489a1774a05SKent Overstreet default: 490a1774a05SKent Overstreet return state; 491a1774a05SKent Overstreet } 492a1774a05SKent Overstreet } 493a1774a05SKent Overstreet 494a1774a05SKent Overstreet static inline enum bch_folio_sector_state 495a1774a05SKent Overstreet folio_sector_reserve(enum bch_folio_sector_state state) 496a1774a05SKent Overstreet { 497a1774a05SKent Overstreet switch (state) { 498a1774a05SKent Overstreet case SECTOR_unallocated: 499a1774a05SKent Overstreet return SECTOR_reserved; 500a1774a05SKent Overstreet case SECTOR_dirty: 501a1774a05SKent Overstreet return SECTOR_dirty_reserved; 502a1774a05SKent Overstreet default: 503a1774a05SKent Overstreet return state; 504a1774a05SKent Overstreet } 505a1774a05SKent Overstreet } 506a1774a05SKent Overstreet 5073342ac13SKent Overstreet struct bch_folio_sector { 508b44a66a6SKent Overstreet /* Uncompressed, fully allocated replicas (or on disk reservation): */ 509b44a66a6SKent Overstreet unsigned nr_replicas:4; 510f81b648dSKent Overstreet 511b44a66a6SKent Overstreet /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ 512b44a66a6SKent Overstreet unsigned replicas_reserved:4; 5131c6fdbd8SKent Overstreet 514f57a6a5dSKent Overstreet /* i_sectors: */ 515a1774a05SKent Overstreet enum bch_folio_sector_state state:8; 5161c6fdbd8SKent Overstreet }; 5171c6fdbd8SKent Overstreet 5183342ac13SKent Overstreet struct bch_folio { 5193826ee0bSKent Overstreet spinlock_t lock; 5207f5e31e1SKent Overstreet atomic_t write_count; 5213342ac13SKent Overstreet /* 5223342ac13SKent Overstreet * Is the sector state up to date with the btree? 5233342ac13SKent Overstreet * (Not the data itself) 5243342ac13SKent Overstreet */ 525e6ec361fSKent Overstreet bool uptodate; 52649fe78ffSKent Overstreet struct bch_folio_sector s[]; 527f57a6a5dSKent Overstreet }; 528f57a6a5dSKent Overstreet 529a1774a05SKent Overstreet static inline void folio_sector_set(struct folio *folio, 530a1774a05SKent Overstreet struct bch_folio *s, 531a1774a05SKent Overstreet unsigned i, unsigned n) 532a1774a05SKent Overstreet { 533a1774a05SKent Overstreet s->s[i].state = n; 534a1774a05SKent Overstreet } 535a1774a05SKent Overstreet 536bf98ee10SBrian Foster /* file offset (to folio offset) to bch_folio_sector index */ 537bf98ee10SBrian Foster static inline int folio_pos_to_s(struct folio *folio, loff_t pos) 538bf98ee10SBrian Foster { 539bf98ee10SBrian Foster u64 f_offset = pos - folio_pos(folio); 540bf98ee10SBrian Foster BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio)); 541bf98ee10SBrian Foster return f_offset >> SECTOR_SHIFT; 542bf98ee10SBrian Foster } 543bf98ee10SBrian Foster 54430bff594SKent Overstreet static inline struct bch_folio *__bch2_folio(struct folio *folio) 5451c6fdbd8SKent Overstreet { 54630bff594SKent Overstreet return folio_has_private(folio) 54730bff594SKent Overstreet ? (struct bch_folio *) folio_get_private(folio) 548f57a6a5dSKent Overstreet : NULL; 549f57a6a5dSKent Overstreet } 5501c6fdbd8SKent Overstreet 55130bff594SKent Overstreet static inline struct bch_folio *bch2_folio(struct folio *folio) 552f57a6a5dSKent Overstreet { 55330bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 5541c6fdbd8SKent Overstreet 55530bff594SKent Overstreet return __bch2_folio(folio); 556f57a6a5dSKent Overstreet } 557f57a6a5dSKent Overstreet 55830bff594SKent Overstreet /* for newly allocated folios: */ 55930bff594SKent Overstreet static void __bch2_folio_release(struct folio *folio) 560f57a6a5dSKent Overstreet { 56130bff594SKent Overstreet kfree(folio_detach_private(folio)); 562f57a6a5dSKent Overstreet } 563f57a6a5dSKent Overstreet 56430bff594SKent Overstreet static void bch2_folio_release(struct folio *folio) 565f57a6a5dSKent Overstreet { 56630bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 56730bff594SKent Overstreet __bch2_folio_release(folio); 568f57a6a5dSKent Overstreet } 569f57a6a5dSKent Overstreet 57030bff594SKent Overstreet /* for newly allocated folios: */ 57130bff594SKent Overstreet static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) 572f57a6a5dSKent Overstreet { 5733342ac13SKent Overstreet struct bch_folio *s; 574f57a6a5dSKent Overstreet 57549fe78ffSKent Overstreet s = kzalloc(sizeof(*s) + 57649fe78ffSKent Overstreet sizeof(struct bch_folio_sector) * 57770d41c9eSKent Overstreet folio_sectors(folio), gfp); 578f57a6a5dSKent Overstreet if (!s) 579f57a6a5dSKent Overstreet return NULL; 580f57a6a5dSKent Overstreet 5813826ee0bSKent Overstreet spin_lock_init(&s->lock); 58230bff594SKent Overstreet folio_attach_private(folio, s); 5831c6fdbd8SKent Overstreet return s; 5841c6fdbd8SKent Overstreet } 5851c6fdbd8SKent Overstreet 58630bff594SKent Overstreet static struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) 587f57a6a5dSKent Overstreet { 58830bff594SKent Overstreet return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); 589f57a6a5dSKent Overstreet } 590f57a6a5dSKent Overstreet 59179203111SKent Overstreet static unsigned bkey_to_sector_state(struct bkey_s_c k) 592b44a66a6SKent Overstreet { 59379203111SKent Overstreet if (bkey_extent_is_reservation(k)) 594a1774a05SKent Overstreet return SECTOR_reserved; 59579203111SKent Overstreet if (bkey_extent_is_allocation(k.k)) 596a1774a05SKent Overstreet return SECTOR_allocated; 597a1774a05SKent Overstreet return SECTOR_unallocated; 598b44a66a6SKent Overstreet } 599b44a66a6SKent Overstreet 60030bff594SKent Overstreet static void __bch2_folio_set(struct folio *folio, 601e6ec361fSKent Overstreet unsigned pg_offset, unsigned pg_len, 602e6ec361fSKent Overstreet unsigned nr_ptrs, unsigned state) 603e6ec361fSKent Overstreet { 60470d41c9eSKent Overstreet struct bch_folio *s = bch2_folio(folio); 60533e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio); 606e6ec361fSKent Overstreet 60733e2eb96SKent Overstreet BUG_ON(pg_offset >= sectors); 60833e2eb96SKent Overstreet BUG_ON(pg_offset + pg_len > sectors); 609e6ec361fSKent Overstreet 610e6ec361fSKent Overstreet spin_lock(&s->lock); 611e6ec361fSKent Overstreet 612e6ec361fSKent Overstreet for (i = pg_offset; i < pg_offset + pg_len; i++) { 613e6ec361fSKent Overstreet s->s[i].nr_replicas = nr_ptrs; 614a1774a05SKent Overstreet folio_sector_set(folio, s, i, state); 615e6ec361fSKent Overstreet } 616e6ec361fSKent Overstreet 61733e2eb96SKent Overstreet if (i == sectors) 618e6ec361fSKent Overstreet s->uptodate = true; 619e6ec361fSKent Overstreet 620e6ec361fSKent Overstreet spin_unlock(&s->lock); 621e6ec361fSKent Overstreet } 622e6ec361fSKent Overstreet 6233342ac13SKent Overstreet /* 6243342ac13SKent Overstreet * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the 6253342ac13SKent Overstreet * extents btree: 6263342ac13SKent Overstreet */ 6273342ac13SKent Overstreet static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, 62830bff594SKent Overstreet struct folio **folios, unsigned nr_folios) 629e6ec361fSKent Overstreet { 630e6ec361fSKent Overstreet struct btree_trans trans; 631e6ec361fSKent Overstreet struct btree_iter iter; 632e6ec361fSKent Overstreet struct bkey_s_c k; 63370d41c9eSKent Overstreet struct bch_folio *s; 63430bff594SKent Overstreet u64 offset = folio_sector(folios[0]); 63570d41c9eSKent Overstreet unsigned folio_idx; 636e6ec361fSKent Overstreet u32 snapshot; 63770d41c9eSKent Overstreet bool need_set = false; 638e6ec361fSKent Overstreet int ret; 639e6ec361fSKent Overstreet 64070d41c9eSKent Overstreet for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { 64170d41c9eSKent Overstreet s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); 64270d41c9eSKent Overstreet if (!s) 64370d41c9eSKent Overstreet return -ENOMEM; 64470d41c9eSKent Overstreet 64570d41c9eSKent Overstreet need_set |= !s->uptodate; 64670d41c9eSKent Overstreet } 64770d41c9eSKent Overstreet 64870d41c9eSKent Overstreet if (!need_set) 64970d41c9eSKent Overstreet return 0; 65070d41c9eSKent Overstreet 65170d41c9eSKent Overstreet folio_idx = 0; 652e6ec361fSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 653e6ec361fSKent Overstreet retry: 654e6ec361fSKent Overstreet bch2_trans_begin(&trans); 655e6ec361fSKent Overstreet 656e6ec361fSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 657e6ec361fSKent Overstreet if (ret) 658e6ec361fSKent Overstreet goto err; 659e6ec361fSKent Overstreet 660e6ec361fSKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 661e6ec361fSKent Overstreet SPOS(inum.inum, offset, snapshot), 662e6ec361fSKent Overstreet BTREE_ITER_SLOTS, k, ret) { 663e6ec361fSKent Overstreet unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); 66479203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 665e6ec361fSKent Overstreet 66630bff594SKent Overstreet while (folio_idx < nr_folios) { 66730bff594SKent Overstreet struct folio *folio = folios[folio_idx]; 66830bff594SKent Overstreet u64 folio_start = folio_sector(folio); 66930bff594SKent Overstreet u64 folio_end = folio_end_sector(folio); 67030bff594SKent Overstreet unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - folio_start; 67130bff594SKent Overstreet unsigned folio_len = min(k.k->p.offset, folio_end) - folio_offset - folio_start; 672e6ec361fSKent Overstreet 67330bff594SKent Overstreet BUG_ON(k.k->p.offset < folio_start); 67430bff594SKent Overstreet BUG_ON(bkey_start_offset(k.k) > folio_end); 675e6ec361fSKent Overstreet 67670d41c9eSKent Overstreet if (!bch2_folio(folio)->uptodate) 67730bff594SKent Overstreet __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); 678e6ec361fSKent Overstreet 67930bff594SKent Overstreet if (k.k->p.offset < folio_end) 680e6ec361fSKent Overstreet break; 68130bff594SKent Overstreet folio_idx++; 682e6ec361fSKent Overstreet } 683e6ec361fSKent Overstreet 68430bff594SKent Overstreet if (folio_idx == nr_folios) 685e6ec361fSKent Overstreet break; 686e6ec361fSKent Overstreet } 687e6ec361fSKent Overstreet 688e6ec361fSKent Overstreet offset = iter.pos.offset; 689e6ec361fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 690e6ec361fSKent Overstreet err: 691549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 692e6ec361fSKent Overstreet goto retry; 693e6ec361fSKent Overstreet bch2_trans_exit(&trans); 694e6ec361fSKent Overstreet 695e6ec361fSKent Overstreet return ret; 696e6ec361fSKent Overstreet } 697e6ec361fSKent Overstreet 698b44a66a6SKent Overstreet static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) 699b44a66a6SKent Overstreet { 700b44a66a6SKent Overstreet struct bvec_iter iter; 7019567413cSKent Overstreet struct folio_vec fv; 702b44a66a6SKent Overstreet unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v 703b44a66a6SKent Overstreet ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); 70479203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 705b44a66a6SKent Overstreet 7069567413cSKent Overstreet bio_for_each_folio(fv, bio, iter) 7079567413cSKent Overstreet __bch2_folio_set(fv.fv_folio, 7089567413cSKent Overstreet fv.fv_offset >> 9, 7099567413cSKent Overstreet fv.fv_len >> 9, 7109567413cSKent Overstreet nr_ptrs, state); 711b44a66a6SKent Overstreet } 712b44a66a6SKent Overstreet 713dcfc593fSKent Overstreet static void mark_pagecache_unallocated(struct bch_inode_info *inode, 714dcfc593fSKent Overstreet u64 start, u64 end) 715dcfc593fSKent Overstreet { 716dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 717dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 718dcfc593fSKent Overstreet struct folio_batch fbatch; 719dcfc593fSKent Overstreet unsigned i, j; 720dcfc593fSKent Overstreet 721dcfc593fSKent Overstreet if (end <= start) 722dcfc593fSKent Overstreet return; 723dcfc593fSKent Overstreet 724dcfc593fSKent Overstreet folio_batch_init(&fbatch); 725dcfc593fSKent Overstreet 726dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 727dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 728dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 729dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 73033e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 73133e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 73230bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 73330bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 7343342ac13SKent Overstreet struct bch_folio *s; 735dcfc593fSKent Overstreet 73630bff594SKent Overstreet BUG_ON(end <= folio_start); 737dcfc593fSKent Overstreet 738dcfc593fSKent Overstreet folio_lock(folio); 73930bff594SKent Overstreet s = bch2_folio(folio); 740dcfc593fSKent Overstreet 741dcfc593fSKent Overstreet if (s) { 742dcfc593fSKent Overstreet spin_lock(&s->lock); 74330bff594SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) 744dcfc593fSKent Overstreet s->s[j].nr_replicas = 0; 745dcfc593fSKent Overstreet spin_unlock(&s->lock); 746dcfc593fSKent Overstreet } 747dcfc593fSKent Overstreet 748dcfc593fSKent Overstreet folio_unlock(folio); 749dcfc593fSKent Overstreet } 750dcfc593fSKent Overstreet folio_batch_release(&fbatch); 751dcfc593fSKent Overstreet cond_resched(); 752dcfc593fSKent Overstreet } 753dcfc593fSKent Overstreet } 754dcfc593fSKent Overstreet 755dcfc593fSKent Overstreet static void mark_pagecache_reserved(struct bch_inode_info *inode, 756dcfc593fSKent Overstreet u64 start, u64 end) 757dcfc593fSKent Overstreet { 758dcfc593fSKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 759dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 760dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 761dcfc593fSKent Overstreet struct folio_batch fbatch; 762dcfc593fSKent Overstreet s64 i_sectors_delta = 0; 763dcfc593fSKent Overstreet unsigned i, j; 764dcfc593fSKent Overstreet 765dcfc593fSKent Overstreet if (end <= start) 766dcfc593fSKent Overstreet return; 767dcfc593fSKent Overstreet 768dcfc593fSKent Overstreet folio_batch_init(&fbatch); 769dcfc593fSKent Overstreet 770dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 771dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 772dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 773dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 77433e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 77533e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 77630bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 77730bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 7783342ac13SKent Overstreet struct bch_folio *s; 779dcfc593fSKent Overstreet 78030bff594SKent Overstreet BUG_ON(end <= folio_start); 781dcfc593fSKent Overstreet 782dcfc593fSKent Overstreet folio_lock(folio); 78330bff594SKent Overstreet s = bch2_folio(folio); 784dcfc593fSKent Overstreet 785dcfc593fSKent Overstreet if (s) { 786dcfc593fSKent Overstreet spin_lock(&s->lock); 787a1774a05SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) { 788a1774a05SKent Overstreet i_sectors_delta -= s->s[j].state == SECTOR_dirty; 789a1774a05SKent Overstreet folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); 790dcfc593fSKent Overstreet } 791dcfc593fSKent Overstreet spin_unlock(&s->lock); 792dcfc593fSKent Overstreet } 793dcfc593fSKent Overstreet 794dcfc593fSKent Overstreet folio_unlock(folio); 795dcfc593fSKent Overstreet } 796dcfc593fSKent Overstreet folio_batch_release(&fbatch); 797dcfc593fSKent Overstreet cond_resched(); 798dcfc593fSKent Overstreet } 799dcfc593fSKent Overstreet 800dcfc593fSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 801dcfc593fSKent Overstreet } 802dcfc593fSKent Overstreet 803e1036a2aSKent Overstreet static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) 804e1036a2aSKent Overstreet { 805e1036a2aSKent Overstreet /* XXX: this should not be open coded */ 806e1036a2aSKent Overstreet return inode->ei_inode.bi_data_replicas 807e1036a2aSKent Overstreet ? inode->ei_inode.bi_data_replicas - 1 808e1036a2aSKent Overstreet : c->opts.data_replicas; 809e1036a2aSKent Overstreet } 810e1036a2aSKent Overstreet 8113342ac13SKent Overstreet static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, 812f57a6a5dSKent Overstreet unsigned nr_replicas) 813f57a6a5dSKent Overstreet { 814f57a6a5dSKent Overstreet return max(0, (int) nr_replicas - 815f57a6a5dSKent Overstreet s->nr_replicas - 816f57a6a5dSKent Overstreet s->replicas_reserved); 817f57a6a5dSKent Overstreet } 818f57a6a5dSKent Overstreet 81930bff594SKent Overstreet static int bch2_get_folio_disk_reservation(struct bch_fs *c, 820f57a6a5dSKent Overstreet struct bch_inode_info *inode, 82130bff594SKent Overstreet struct folio *folio, bool check_enospc) 8221c6fdbd8SKent Overstreet { 82330bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 824e1036a2aSKent Overstreet unsigned nr_replicas = inode_nr_replicas(c, inode); 825f57a6a5dSKent Overstreet struct disk_reservation disk_res = { 0 }; 82633e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; 827f81b648dSKent Overstreet int ret; 8281c6fdbd8SKent Overstreet 829f57a6a5dSKent Overstreet if (!s) 830f57a6a5dSKent Overstreet return -ENOMEM; 8311c6fdbd8SKent Overstreet 83233e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 833f57a6a5dSKent Overstreet disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); 834f57a6a5dSKent Overstreet 835f57a6a5dSKent Overstreet if (!disk_res_sectors) 836f57a6a5dSKent Overstreet return 0; 837f57a6a5dSKent Overstreet 838f57a6a5dSKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 839f57a6a5dSKent Overstreet disk_res_sectors, 1, 840f57a6a5dSKent Overstreet !check_enospc 841f57a6a5dSKent Overstreet ? BCH_DISK_RESERVATION_NOFAIL 842f57a6a5dSKent Overstreet : 0); 8431c6fdbd8SKent Overstreet if (unlikely(ret)) 844f81b648dSKent Overstreet return ret; 845f81b648dSKent Overstreet 84633e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 847f57a6a5dSKent Overstreet s->s[i].replicas_reserved += 848f57a6a5dSKent Overstreet sectors_to_reserve(&s->s[i], nr_replicas); 849f57a6a5dSKent Overstreet 850f57a6a5dSKent Overstreet return 0; 8511c6fdbd8SKent Overstreet } 8521c6fdbd8SKent Overstreet 85330bff594SKent Overstreet struct bch2_folio_reservation { 854d1542e03SKent Overstreet struct disk_reservation disk; 855d1542e03SKent Overstreet struct quota_res quota; 856d1542e03SKent Overstreet }; 857d1542e03SKent Overstreet 85830bff594SKent Overstreet static void bch2_folio_reservation_init(struct bch_fs *c, 859f57a6a5dSKent Overstreet struct bch_inode_info *inode, 86030bff594SKent Overstreet struct bch2_folio_reservation *res) 861d1542e03SKent Overstreet { 862d1542e03SKent Overstreet memset(res, 0, sizeof(*res)); 863d1542e03SKent Overstreet 864d1542e03SKent Overstreet res->disk.nr_replicas = inode_nr_replicas(c, inode); 865d1542e03SKent Overstreet } 866d1542e03SKent Overstreet 86730bff594SKent Overstreet static void bch2_folio_reservation_put(struct bch_fs *c, 868d1542e03SKent Overstreet struct bch_inode_info *inode, 86930bff594SKent Overstreet struct bch2_folio_reservation *res) 870d1542e03SKent Overstreet { 871d1542e03SKent Overstreet bch2_disk_reservation_put(c, &res->disk); 872d1542e03SKent Overstreet bch2_quota_reservation_put(c, inode, &res->quota); 873d1542e03SKent Overstreet } 874d1542e03SKent Overstreet 87530bff594SKent Overstreet static int bch2_folio_reservation_get(struct bch_fs *c, 87630bff594SKent Overstreet struct bch_inode_info *inode, 87730bff594SKent Overstreet struct folio *folio, 87830bff594SKent Overstreet struct bch2_folio_reservation *res, 879bd954215SKent Overstreet unsigned offset, unsigned len) 880f57a6a5dSKent Overstreet { 88130bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 882d1542e03SKent Overstreet unsigned i, disk_sectors = 0, quota_sectors = 0; 883f57a6a5dSKent Overstreet int ret; 884f57a6a5dSKent Overstreet 885f57a6a5dSKent Overstreet if (!s) 886f57a6a5dSKent Overstreet return -ENOMEM; 887f57a6a5dSKent Overstreet 888e6ec361fSKent Overstreet BUG_ON(!s->uptodate); 889e6ec361fSKent Overstreet 8904b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 8914b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 892d1542e03SKent Overstreet i++) { 893d1542e03SKent Overstreet disk_sectors += sectors_to_reserve(&s->s[i], 894d1542e03SKent Overstreet res->disk.nr_replicas); 895a1774a05SKent Overstreet quota_sectors += s->s[i].state == SECTOR_unallocated; 8961c6fdbd8SKent Overstreet } 8971c6fdbd8SKent Overstreet 898d1542e03SKent Overstreet if (disk_sectors) { 899bd954215SKent Overstreet ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); 900d1542e03SKent Overstreet if (unlikely(ret)) 901d1542e03SKent Overstreet return ret; 902d1542e03SKent Overstreet } 903d1542e03SKent Overstreet 904d1542e03SKent Overstreet if (quota_sectors) { 905d1542e03SKent Overstreet ret = bch2_quota_reservation_add(c, inode, &res->quota, 906bd954215SKent Overstreet quota_sectors, true); 907d1542e03SKent Overstreet if (unlikely(ret)) { 908d1542e03SKent Overstreet struct disk_reservation tmp = { 909d1542e03SKent Overstreet .sectors = disk_sectors 910d1542e03SKent Overstreet }; 911d1542e03SKent Overstreet 912d1542e03SKent Overstreet bch2_disk_reservation_put(c, &tmp); 913d1542e03SKent Overstreet res->disk.sectors -= disk_sectors; 914d1542e03SKent Overstreet return ret; 915d1542e03SKent Overstreet } 916d1542e03SKent Overstreet } 917d1542e03SKent Overstreet 918d1542e03SKent Overstreet return 0; 919f57a6a5dSKent Overstreet } 920f57a6a5dSKent Overstreet 92130bff594SKent Overstreet static void bch2_clear_folio_bits(struct folio *folio) 9221c6fdbd8SKent Overstreet { 92330bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 9241c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 92530bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 926d1542e03SKent Overstreet struct disk_reservation disk_res = { 0 }; 92733e2eb96SKent Overstreet int i, sectors = folio_sectors(folio), dirty_sectors = 0; 9281c6fdbd8SKent Overstreet 929f57a6a5dSKent Overstreet if (!s) 9301c6fdbd8SKent Overstreet return; 9311c6fdbd8SKent Overstreet 93230bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 93330bff594SKent Overstreet EBUG_ON(folio_test_writeback(folio)); 9343826ee0bSKent Overstreet 93533e2eb96SKent Overstreet for (i = 0; i < sectors; i++) { 936d1542e03SKent Overstreet disk_res.sectors += s->s[i].replicas_reserved; 937d1542e03SKent Overstreet s->s[i].replicas_reserved = 0; 938d1542e03SKent Overstreet 939a1774a05SKent Overstreet dirty_sectors -= s->s[i].state == SECTOR_dirty; 940a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); 941f57a6a5dSKent Overstreet } 942adfcfaf0SKent Overstreet 943d1542e03SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 944d1542e03SKent Overstreet 945b44a66a6SKent Overstreet i_sectors_acct(c, inode, NULL, dirty_sectors); 946adfcfaf0SKent Overstreet 94730bff594SKent Overstreet bch2_folio_release(folio); 9481c6fdbd8SKent Overstreet } 9491c6fdbd8SKent Overstreet 95030bff594SKent Overstreet static void bch2_set_folio_dirty(struct bch_fs *c, 95130bff594SKent Overstreet struct bch_inode_info *inode, 95230bff594SKent Overstreet struct folio *folio, 95330bff594SKent Overstreet struct bch2_folio_reservation *res, 954d1542e03SKent Overstreet unsigned offset, unsigned len) 9551c6fdbd8SKent Overstreet { 95630bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 957f57a6a5dSKent Overstreet unsigned i, dirty_sectors = 0; 9581c6fdbd8SKent Overstreet 95930bff594SKent Overstreet WARN_ON((u64) folio_pos(folio) + offset + len > 960877dfb34SKent Overstreet round_up((u64) i_size_read(&inode->v), block_bytes(c))); 961fb472ac5SKent Overstreet 96234fdcf06SKent Overstreet BUG_ON(!s->uptodate); 96334fdcf06SKent Overstreet 9643826ee0bSKent Overstreet spin_lock(&s->lock); 9653826ee0bSKent Overstreet 9664b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 9674b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 968d1542e03SKent Overstreet i++) { 969d1542e03SKent Overstreet unsigned sectors = sectors_to_reserve(&s->s[i], 970d1542e03SKent Overstreet res->disk.nr_replicas); 9711c6fdbd8SKent Overstreet 972406d6d5aSKent Overstreet /* 973406d6d5aSKent Overstreet * This can happen if we race with the error path in 974406d6d5aSKent Overstreet * bch2_writepage_io_done(): 975406d6d5aSKent Overstreet */ 976406d6d5aSKent Overstreet sectors = min_t(unsigned, sectors, res->disk.sectors); 977406d6d5aSKent Overstreet 978d1542e03SKent Overstreet s->s[i].replicas_reserved += sectors; 979d1542e03SKent Overstreet res->disk.sectors -= sectors; 980adfcfaf0SKent Overstreet 981a1774a05SKent Overstreet dirty_sectors += s->s[i].state == SECTOR_unallocated; 982a1774a05SKent Overstreet 983a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); 984f57a6a5dSKent Overstreet } 985f57a6a5dSKent Overstreet 9863826ee0bSKent Overstreet spin_unlock(&s->lock); 9873826ee0bSKent Overstreet 988d1542e03SKent Overstreet i_sectors_acct(c, inode, &res->quota, dirty_sectors); 9891c6fdbd8SKent Overstreet 99030bff594SKent Overstreet if (!folio_test_dirty(folio)) 99130bff594SKent Overstreet filemap_dirty_folio(inode->v.i_mapping, folio); 9921c6fdbd8SKent Overstreet } 9931c6fdbd8SKent Overstreet 9941c6fdbd8SKent Overstreet vm_fault_t bch2_page_fault(struct vm_fault *vmf) 9951c6fdbd8SKent Overstreet { 9961c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 997eb8e6e9cSKent Overstreet struct address_space *mapping = file->f_mapping; 998eb8e6e9cSKent Overstreet struct address_space *fdm = faults_disabled_mapping(); 9991c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 1000*73bd774dSKent Overstreet vm_fault_t ret; 10011c6fdbd8SKent Overstreet 1002eb8e6e9cSKent Overstreet if (fdm == mapping) 1003eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 1004eb8e6e9cSKent Overstreet 1005eb8e6e9cSKent Overstreet /* Lock ordering: */ 1006eb8e6e9cSKent Overstreet if (fdm > mapping) { 1007eb8e6e9cSKent Overstreet struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); 1008eb8e6e9cSKent Overstreet 1009a7ecd30cSKent Overstreet if (bch2_pagecache_add_tryget(inode)) 1010eb8e6e9cSKent Overstreet goto got_lock; 1011eb8e6e9cSKent Overstreet 1012a7ecd30cSKent Overstreet bch2_pagecache_block_put(fdm_host); 1013eb8e6e9cSKent Overstreet 1014a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 1015a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1016eb8e6e9cSKent Overstreet 1017a7ecd30cSKent Overstreet bch2_pagecache_block_get(fdm_host); 1018eb8e6e9cSKent Overstreet 1019eb8e6e9cSKent Overstreet /* Signal that lock has been dropped: */ 1020eb8e6e9cSKent Overstreet set_fdm_dropped_locks(); 1021eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 1022eb8e6e9cSKent Overstreet } 1023eb8e6e9cSKent Overstreet 1024a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 1025eb8e6e9cSKent Overstreet got_lock: 10261c6fdbd8SKent Overstreet ret = filemap_fault(vmf); 1027a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 10281c6fdbd8SKent Overstreet 10291c6fdbd8SKent Overstreet return ret; 10301c6fdbd8SKent Overstreet } 10311c6fdbd8SKent Overstreet 10321c6fdbd8SKent Overstreet vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) 10331c6fdbd8SKent Overstreet { 103430bff594SKent Overstreet struct folio *folio = page_folio(vmf->page); 10351c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 10361c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 10371c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 10381c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 103930bff594SKent Overstreet struct bch2_folio_reservation res; 10406cc3535dSKent Overstreet unsigned len; 10416cc3535dSKent Overstreet loff_t isize; 1042*73bd774dSKent Overstreet vm_fault_t ret; 10431c6fdbd8SKent Overstreet 104430bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1045d1542e03SKent Overstreet 10461c6fdbd8SKent Overstreet sb_start_pagefault(inode->v.i_sb); 10471c6fdbd8SKent Overstreet file_update_time(file); 10481c6fdbd8SKent Overstreet 10491c6fdbd8SKent Overstreet /* 10501c6fdbd8SKent Overstreet * Not strictly necessary, but helps avoid dio writes livelocking in 10511c6fdbd8SKent Overstreet * write_invalidate_inode_pages_range() - can drop this if/when we get 10521c6fdbd8SKent Overstreet * a write_invalidate_inode_pages_range() that works without dropping 10531c6fdbd8SKent Overstreet * page lock before invalidating page 10541c6fdbd8SKent Overstreet */ 1055a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 10561c6fdbd8SKent Overstreet 105730bff594SKent Overstreet folio_lock(folio); 10586cc3535dSKent Overstreet isize = i_size_read(&inode->v); 10596cc3535dSKent Overstreet 106030bff594SKent Overstreet if (folio->mapping != mapping || folio_pos(folio) >= isize) { 106130bff594SKent Overstreet folio_unlock(folio); 10621c6fdbd8SKent Overstreet ret = VM_FAULT_NOPAGE; 10631c6fdbd8SKent Overstreet goto out; 10641c6fdbd8SKent Overstreet } 10651c6fdbd8SKent Overstreet 106633e2eb96SKent Overstreet len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); 10676cc3535dSKent Overstreet 106870d41c9eSKent Overstreet if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: 106970d41c9eSKent Overstreet bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { 107030bff594SKent Overstreet folio_unlock(folio); 10711c6fdbd8SKent Overstreet ret = VM_FAULT_SIGBUS; 10721c6fdbd8SKent Overstreet goto out; 10731c6fdbd8SKent Overstreet } 10741c6fdbd8SKent Overstreet 107530bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, &res, 0, len); 107630bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 10771b783a69SKent Overstreet 107830bff594SKent Overstreet folio_wait_stable(folio); 1079e6ec361fSKent Overstreet ret = VM_FAULT_LOCKED; 10801c6fdbd8SKent Overstreet out: 1081a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 10821c6fdbd8SKent Overstreet sb_end_pagefault(inode->v.i_sb); 1083d1542e03SKent Overstreet 10841c6fdbd8SKent Overstreet return ret; 10851c6fdbd8SKent Overstreet } 10861c6fdbd8SKent Overstreet 10871c6fdbd8SKent Overstreet void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) 10881c6fdbd8SKent Overstreet { 10891c6fdbd8SKent Overstreet if (offset || length < folio_size(folio)) 10901c6fdbd8SKent Overstreet return; 10911c6fdbd8SKent Overstreet 109230bff594SKent Overstreet bch2_clear_folio_bits(folio); 10931c6fdbd8SKent Overstreet } 10941c6fdbd8SKent Overstreet 10951c6fdbd8SKent Overstreet bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) 10961c6fdbd8SKent Overstreet { 1097a6d90385SKent Overstreet if (folio_test_dirty(folio) || folio_test_writeback(folio)) 10981c6fdbd8SKent Overstreet return false; 10991c6fdbd8SKent Overstreet 110030bff594SKent Overstreet bch2_clear_folio_bits(folio); 11011c6fdbd8SKent Overstreet return true; 11021c6fdbd8SKent Overstreet } 11031c6fdbd8SKent Overstreet 11041c6fdbd8SKent Overstreet /* readpage(s): */ 11051c6fdbd8SKent Overstreet 11061c6fdbd8SKent Overstreet static void bch2_readpages_end_io(struct bio *bio) 11071c6fdbd8SKent Overstreet { 110830bff594SKent Overstreet struct folio_iter fi; 11091c6fdbd8SKent Overstreet 111030bff594SKent Overstreet bio_for_each_folio_all(fi, bio) { 11111c6fdbd8SKent Overstreet if (!bio->bi_status) { 111230bff594SKent Overstreet folio_mark_uptodate(fi.folio); 11131c6fdbd8SKent Overstreet } else { 111430bff594SKent Overstreet folio_clear_uptodate(fi.folio); 111530bff594SKent Overstreet folio_set_error(fi.folio); 11161c6fdbd8SKent Overstreet } 111730bff594SKent Overstreet folio_unlock(fi.folio); 11181c6fdbd8SKent Overstreet } 11191c6fdbd8SKent Overstreet 11201c6fdbd8SKent Overstreet bio_put(bio); 11211c6fdbd8SKent Overstreet } 11221c6fdbd8SKent Overstreet 11231c6fdbd8SKent Overstreet struct readpages_iter { 11241c6fdbd8SKent Overstreet struct address_space *mapping; 11251c6fdbd8SKent Overstreet unsigned idx; 11269567413cSKent Overstreet folios folios; 11271c6fdbd8SKent Overstreet }; 11281c6fdbd8SKent Overstreet 11291c6fdbd8SKent Overstreet static int readpages_iter_init(struct readpages_iter *iter, 11301c6fdbd8SKent Overstreet struct readahead_control *ractl) 11311c6fdbd8SKent Overstreet { 11329567413cSKent Overstreet struct folio **fi; 11339567413cSKent Overstreet int ret; 11341c6fdbd8SKent Overstreet 11351c6fdbd8SKent Overstreet memset(iter, 0, sizeof(*iter)); 11361c6fdbd8SKent Overstreet 11371c6fdbd8SKent Overstreet iter->mapping = ractl->mapping; 11381c6fdbd8SKent Overstreet 11399567413cSKent Overstreet ret = filemap_get_contig_folios_d(iter->mapping, 11409567413cSKent Overstreet ractl->_index << PAGE_SHIFT, 11419567413cSKent Overstreet (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, 11429567413cSKent Overstreet 0, mapping_gfp_mask(iter->mapping), 11439567413cSKent Overstreet &iter->folios); 11449567413cSKent Overstreet if (ret) 11459567413cSKent Overstreet return ret; 11461c6fdbd8SKent Overstreet 11479567413cSKent Overstreet darray_for_each(iter->folios, fi) { 11489567413cSKent Overstreet ractl->_nr_pages -= 1U << folio_order(*fi); 114970d41c9eSKent Overstreet __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); 11509567413cSKent Overstreet folio_put(*fi); 11519567413cSKent Overstreet folio_put(*fi); 11521c6fdbd8SKent Overstreet } 11531c6fdbd8SKent Overstreet 11541c6fdbd8SKent Overstreet return 0; 11551c6fdbd8SKent Overstreet } 11561c6fdbd8SKent Overstreet 11579567413cSKent Overstreet static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) 11581c6fdbd8SKent Overstreet { 11599567413cSKent Overstreet if (iter->idx >= iter->folios.nr) 11601c6fdbd8SKent Overstreet return NULL; 11619567413cSKent Overstreet return iter->folios.data[iter->idx]; 11629567413cSKent Overstreet } 11631c6fdbd8SKent Overstreet 11649567413cSKent Overstreet static inline void readpage_iter_advance(struct readpages_iter *iter) 11659567413cSKent Overstreet { 11669567413cSKent Overstreet iter->idx++; 11671c6fdbd8SKent Overstreet } 11681c6fdbd8SKent Overstreet 116935189e09SKent Overstreet static bool extent_partial_reads_expensive(struct bkey_s_c k) 117035189e09SKent Overstreet { 117135189e09SKent Overstreet struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 117235189e09SKent Overstreet struct bch_extent_crc_unpacked crc; 117335189e09SKent Overstreet const union bch_extent_entry *i; 117435189e09SKent Overstreet 117535189e09SKent Overstreet bkey_for_each_crc(k.k, ptrs, crc, i) 117635189e09SKent Overstreet if (crc.csum_type || crc.compression_type) 117735189e09SKent Overstreet return true; 117835189e09SKent Overstreet return false; 117935189e09SKent Overstreet } 118035189e09SKent Overstreet 118170d41c9eSKent Overstreet static int readpage_bio_extend(struct btree_trans *trans, 118270d41c9eSKent Overstreet struct readpages_iter *iter, 118376426098SKent Overstreet struct bio *bio, 118476426098SKent Overstreet unsigned sectors_this_extent, 11851c6fdbd8SKent Overstreet bool get_more) 11861c6fdbd8SKent Overstreet { 118770d41c9eSKent Overstreet /* Don't hold btree locks while allocating memory: */ 118870d41c9eSKent Overstreet bch2_trans_unlock(trans); 118970d41c9eSKent Overstreet 119076426098SKent Overstreet while (bio_sectors(bio) < sectors_this_extent && 11911c6fdbd8SKent Overstreet bio->bi_vcnt < bio->bi_max_vecs) { 11929567413cSKent Overstreet struct folio *folio = readpage_iter_peek(iter); 11931c6fdbd8SKent Overstreet int ret; 11941c6fdbd8SKent Overstreet 119530bff594SKent Overstreet if (folio) { 11969567413cSKent Overstreet readpage_iter_advance(iter); 11971c6fdbd8SKent Overstreet } else { 11989567413cSKent Overstreet pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; 11999567413cSKent Overstreet 12001c6fdbd8SKent Overstreet if (!get_more) 12011c6fdbd8SKent Overstreet break; 12021c6fdbd8SKent Overstreet 120330bff594SKent Overstreet folio = xa_load(&iter->mapping->i_pages, folio_offset); 120430bff594SKent Overstreet if (folio && !xa_is_value(folio)) 12051c6fdbd8SKent Overstreet break; 12061c6fdbd8SKent Overstreet 120730bff594SKent Overstreet folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); 120830bff594SKent Overstreet if (!folio) 12091c6fdbd8SKent Overstreet break; 12101c6fdbd8SKent Overstreet 121170d41c9eSKent Overstreet if (!__bch2_folio_create(folio, GFP_KERNEL)) { 121230bff594SKent Overstreet folio_put(folio); 1213f57a6a5dSKent Overstreet break; 1214f57a6a5dSKent Overstreet } 12151c6fdbd8SKent Overstreet 121670d41c9eSKent Overstreet ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); 12171c6fdbd8SKent Overstreet if (ret) { 121830bff594SKent Overstreet __bch2_folio_release(folio); 121930bff594SKent Overstreet folio_put(folio); 12201c6fdbd8SKent Overstreet break; 12211c6fdbd8SKent Overstreet } 12221c6fdbd8SKent Overstreet 122330bff594SKent Overstreet folio_put(folio); 12241c6fdbd8SKent Overstreet } 12251c6fdbd8SKent Overstreet 12269567413cSKent Overstreet BUG_ON(folio_sector(folio) != bio_end_sector(bio)); 12279567413cSKent Overstreet 122830bff594SKent Overstreet BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); 12291c6fdbd8SKent Overstreet } 123070d41c9eSKent Overstreet 123170d41c9eSKent Overstreet return bch2_trans_relock(trans); 12321c6fdbd8SKent Overstreet } 12331c6fdbd8SKent Overstreet 12348c6d298aSKent Overstreet static void bchfs_read(struct btree_trans *trans, 12358c6d298aSKent Overstreet struct bch_read_bio *rbio, 12368c6d298aSKent Overstreet subvol_inum inum, 12371c6fdbd8SKent Overstreet struct readpages_iter *readpages_iter) 12381c6fdbd8SKent Overstreet { 12390f238367SKent Overstreet struct bch_fs *c = trans->c; 12408c6d298aSKent Overstreet struct btree_iter iter; 124107a1006aSKent Overstreet struct bkey_buf sk; 12421c6fdbd8SKent Overstreet int flags = BCH_READ_RETRY_IF_STALE| 12431c6fdbd8SKent Overstreet BCH_READ_MAY_PROMOTE; 12448c6d298aSKent Overstreet u32 snapshot; 124576426098SKent Overstreet int ret = 0; 12461c6fdbd8SKent Overstreet 12471c6fdbd8SKent Overstreet rbio->c = c; 12481c6fdbd8SKent Overstreet rbio->start_time = local_clock(); 12498c6d298aSKent Overstreet rbio->subvol = inum.subvol; 125035189e09SKent Overstreet 125107a1006aSKent Overstreet bch2_bkey_buf_init(&sk); 125276426098SKent Overstreet retry: 1253700c25b3SKent Overstreet bch2_trans_begin(trans); 12548c6d298aSKent Overstreet iter = (struct btree_iter) { NULL }; 1255700c25b3SKent Overstreet 12568c6d298aSKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 12578c6d298aSKent Overstreet if (ret) 12588c6d298aSKent Overstreet goto err; 12598c6d298aSKent Overstreet 12608c6d298aSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 12618c6d298aSKent Overstreet SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), 126257cfdd8bSKent Overstreet BTREE_ITER_SLOTS); 12631c6fdbd8SKent Overstreet while (1) { 12641c6fdbd8SKent Overstreet struct bkey_s_c k; 126576426098SKent Overstreet unsigned bytes, sectors, offset_into_extent; 12665ff75ccbSKent Overstreet enum btree_id data_btree = BTREE_ID_extents; 12671c6fdbd8SKent Overstreet 12683737e0ddSKent Overstreet /* 12693737e0ddSKent Overstreet * read_extent -> io_time_reset may cause a transaction restart 12703737e0ddSKent Overstreet * without returning an error, we need to check for that here: 12713737e0ddSKent Overstreet */ 1272549d173cSKent Overstreet ret = bch2_trans_relock(trans); 1273549d173cSKent Overstreet if (ret) 12743737e0ddSKent Overstreet break; 12753737e0ddSKent Overstreet 12768c6d298aSKent Overstreet bch2_btree_iter_set_pos(&iter, 12778c6d298aSKent Overstreet POS(inum.inum, rbio->bio.bi_iter.bi_sector)); 12781c6fdbd8SKent Overstreet 12798c6d298aSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 128076426098SKent Overstreet ret = bkey_err(k); 128176426098SKent Overstreet if (ret) 128276426098SKent Overstreet break; 12831c6fdbd8SKent Overstreet 12848c6d298aSKent Overstreet offset_into_extent = iter.pos.offset - 128506ed8558SKent Overstreet bkey_start_offset(k.k); 128676426098SKent Overstreet sectors = k.k->size - offset_into_extent; 128776426098SKent Overstreet 128807a1006aSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 128913dcd4abSKent Overstreet 12905ff75ccbSKent Overstreet ret = bch2_read_indirect_extent(trans, &data_btree, 129122d8a33dSYuxuan Shui &offset_into_extent, &sk); 129276426098SKent Overstreet if (ret) 129376426098SKent Overstreet break; 129476426098SKent Overstreet 129513dcd4abSKent Overstreet k = bkey_i_to_s_c(sk.k); 129613dcd4abSKent Overstreet 129776426098SKent Overstreet sectors = min(sectors, k.k->size - offset_into_extent); 129876426098SKent Overstreet 129970d41c9eSKent Overstreet if (readpages_iter) { 130070d41c9eSKent Overstreet ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, 130135189e09SKent Overstreet extent_partial_reads_expensive(k)); 130270d41c9eSKent Overstreet if (ret) 130370d41c9eSKent Overstreet break; 130470d41c9eSKent Overstreet } 13051c6fdbd8SKent Overstreet 130676426098SKent Overstreet bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; 130706ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 13081c6fdbd8SKent Overstreet 130906ed8558SKent Overstreet if (rbio->bio.bi_iter.bi_size == bytes) 13101c6fdbd8SKent Overstreet flags |= BCH_READ_LAST_FRAGMENT; 13111c6fdbd8SKent Overstreet 1312b44a66a6SKent Overstreet bch2_bio_page_state_set(&rbio->bio, k); 13131c6fdbd8SKent Overstreet 13148c6d298aSKent Overstreet bch2_read_extent(trans, rbio, iter.pos, 13155ff75ccbSKent Overstreet data_btree, k, offset_into_extent, flags); 13161c6fdbd8SKent Overstreet 13171c6fdbd8SKent Overstreet if (flags & BCH_READ_LAST_FRAGMENT) 131835189e09SKent Overstreet break; 13191c6fdbd8SKent Overstreet 132006ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 132106ed8558SKent Overstreet bio_advance(&rbio->bio, bytes); 1322084d42bbSKent Overstreet 1323084d42bbSKent Overstreet ret = btree_trans_too_many_iters(trans); 1324084d42bbSKent Overstreet if (ret) 1325084d42bbSKent Overstreet break; 13261c6fdbd8SKent Overstreet } 13278c6d298aSKent Overstreet err: 13288c6d298aSKent Overstreet bch2_trans_iter_exit(trans, &iter); 132976426098SKent Overstreet 1330549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 133176426098SKent Overstreet goto retry; 133276426098SKent Overstreet 133335189e09SKent Overstreet if (ret) { 13347fec8266SKent Overstreet bch_err_inum_offset_ratelimited(c, 13357fec8266SKent Overstreet iter.pos.inode, 13367fec8266SKent Overstreet iter.pos.offset << 9, 13370fefe8d8SKent Overstreet "read error %i from btree lookup", ret); 13380fefe8d8SKent Overstreet rbio->bio.bi_status = BLK_STS_IOERR; 133976426098SKent Overstreet bio_endio(&rbio->bio); 13401c6fdbd8SKent Overstreet } 13411c6fdbd8SKent Overstreet 134207a1006aSKent Overstreet bch2_bkey_buf_exit(&sk, c); 134335189e09SKent Overstreet } 134435189e09SKent Overstreet 13451c6fdbd8SKent Overstreet void bch2_readahead(struct readahead_control *ractl) 13461c6fdbd8SKent Overstreet { 13471c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); 13481c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 134901ad6737SKent Overstreet struct bch_io_opts opts; 1350424eb881SKent Overstreet struct btree_trans trans; 135130bff594SKent Overstreet struct folio *folio; 13521c6fdbd8SKent Overstreet struct readpages_iter readpages_iter; 13531c6fdbd8SKent Overstreet int ret; 13541c6fdbd8SKent Overstreet 135501ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 135601ad6737SKent Overstreet 13571c6fdbd8SKent Overstreet ret = readpages_iter_init(&readpages_iter, ractl); 13581c6fdbd8SKent Overstreet BUG_ON(ret); 13591c6fdbd8SKent Overstreet 136020bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 13611c6fdbd8SKent Overstreet 1362a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 13631c6fdbd8SKent Overstreet 13649567413cSKent Overstreet while ((folio = readpage_iter_peek(&readpages_iter))) { 13651c6fdbd8SKent Overstreet unsigned n = min_t(unsigned, 13669567413cSKent Overstreet readpages_iter.folios.nr - 13671c6fdbd8SKent Overstreet readpages_iter.idx, 13681c6fdbd8SKent Overstreet BIO_MAX_VECS); 13691c6fdbd8SKent Overstreet struct bch_read_bio *rbio = 13701c6fdbd8SKent Overstreet rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, 13715718fda0SKent Overstreet GFP_KERNEL, &c->bio_read), 13721c6fdbd8SKent Overstreet opts); 13731c6fdbd8SKent Overstreet 13749567413cSKent Overstreet readpage_iter_advance(&readpages_iter); 13751c6fdbd8SKent Overstreet 13769567413cSKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 13771c6fdbd8SKent Overstreet rbio->bio.bi_end_io = bch2_readpages_end_io; 137830bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 13791c6fdbd8SKent Overstreet 13808c6d298aSKent Overstreet bchfs_read(&trans, rbio, inode_inum(inode), 13810f238367SKent Overstreet &readpages_iter); 13825718fda0SKent Overstreet bch2_trans_unlock(&trans); 13831c6fdbd8SKent Overstreet } 13841c6fdbd8SKent Overstreet 1385a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1386424eb881SKent Overstreet 1387424eb881SKent Overstreet bch2_trans_exit(&trans); 13889567413cSKent Overstreet darray_exit(&readpages_iter.folios); 13891c6fdbd8SKent Overstreet } 13901c6fdbd8SKent Overstreet 139130bff594SKent Overstreet static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, 139230bff594SKent Overstreet subvol_inum inum, struct folio *folio) 13931c6fdbd8SKent Overstreet { 1394424eb881SKent Overstreet struct btree_trans trans; 13951c6fdbd8SKent Overstreet 139630bff594SKent Overstreet bch2_folio_create(folio, __GFP_NOFAIL); 13971c6fdbd8SKent Overstreet 13981c6fdbd8SKent Overstreet rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; 139930bff594SKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 140030bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 14011c6fdbd8SKent Overstreet 140220bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 14038c6d298aSKent Overstreet bchfs_read(&trans, rbio, inum, NULL); 1404424eb881SKent Overstreet bch2_trans_exit(&trans); 14051c6fdbd8SKent Overstreet } 14061c6fdbd8SKent Overstreet 140730bff594SKent Overstreet static void bch2_read_single_folio_end_io(struct bio *bio) 14081c6fdbd8SKent Overstreet { 14091c6fdbd8SKent Overstreet complete(bio->bi_private); 14101c6fdbd8SKent Overstreet } 14111c6fdbd8SKent Overstreet 141230bff594SKent Overstreet static int bch2_read_single_folio(struct folio *folio, 14131c6fdbd8SKent Overstreet struct address_space *mapping) 14141c6fdbd8SKent Overstreet { 14151c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 14161c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 14171c6fdbd8SKent Overstreet struct bch_read_bio *rbio; 141801ad6737SKent Overstreet struct bch_io_opts opts; 14191c6fdbd8SKent Overstreet int ret; 14201c6fdbd8SKent Overstreet DECLARE_COMPLETION_ONSTACK(done); 14211c6fdbd8SKent Overstreet 142201ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 142301ad6737SKent Overstreet 14245718fda0SKent Overstreet rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), 142501ad6737SKent Overstreet opts); 14261c6fdbd8SKent Overstreet rbio->bio.bi_private = &done; 142730bff594SKent Overstreet rbio->bio.bi_end_io = bch2_read_single_folio_end_io; 14281c6fdbd8SKent Overstreet 142930bff594SKent Overstreet __bchfs_readfolio(c, rbio, inode_inum(inode), folio); 14301c6fdbd8SKent Overstreet wait_for_completion(&done); 14311c6fdbd8SKent Overstreet 14321c6fdbd8SKent Overstreet ret = blk_status_to_errno(rbio->bio.bi_status); 14331c6fdbd8SKent Overstreet bio_put(&rbio->bio); 14341c6fdbd8SKent Overstreet 14351c6fdbd8SKent Overstreet if (ret < 0) 14361c6fdbd8SKent Overstreet return ret; 14371c6fdbd8SKent Overstreet 143830bff594SKent Overstreet folio_mark_uptodate(folio); 14391c6fdbd8SKent Overstreet return 0; 14401c6fdbd8SKent Overstreet } 14411c6fdbd8SKent Overstreet 14421c6fdbd8SKent Overstreet int bch2_read_folio(struct file *file, struct folio *folio) 14431c6fdbd8SKent Overstreet { 14441c6fdbd8SKent Overstreet int ret; 14451c6fdbd8SKent Overstreet 144630bff594SKent Overstreet ret = bch2_read_single_folio(folio, folio->mapping); 14471c6fdbd8SKent Overstreet folio_unlock(folio); 14485c1ef830SKent Overstreet return bch2_err_class(ret); 14491c6fdbd8SKent Overstreet } 14501c6fdbd8SKent Overstreet 14511c6fdbd8SKent Overstreet /* writepages: */ 14521c6fdbd8SKent Overstreet 14531c6fdbd8SKent Overstreet struct bch_writepage_state { 14541c6fdbd8SKent Overstreet struct bch_writepage_io *io; 14551c6fdbd8SKent Overstreet struct bch_io_opts opts; 145649fe78ffSKent Overstreet struct bch_folio_sector *tmp; 145749fe78ffSKent Overstreet unsigned tmp_sectors; 14581c6fdbd8SKent Overstreet }; 14591c6fdbd8SKent Overstreet 14601c6fdbd8SKent Overstreet static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, 14611c6fdbd8SKent Overstreet struct bch_inode_info *inode) 14621c6fdbd8SKent Overstreet { 146301ad6737SKent Overstreet struct bch_writepage_state ret = { 0 }; 146401ad6737SKent Overstreet 146501ad6737SKent Overstreet bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); 146601ad6737SKent Overstreet return ret; 14671c6fdbd8SKent Overstreet } 14681c6fdbd8SKent Overstreet 14699f311f21SKent Overstreet static void bch2_writepage_io_done(struct bch_write_op *op) 14701c6fdbd8SKent Overstreet { 14719f311f21SKent Overstreet struct bch_writepage_io *io = 14729f311f21SKent Overstreet container_of(op, struct bch_writepage_io, op); 14739a3df993SKent Overstreet struct bch_fs *c = io->op.c; 14749a3df993SKent Overstreet struct bio *bio = &io->op.wbio.bio; 1475ff9c301fSKent Overstreet struct folio_iter fi; 1476b3fce09cSKent Overstreet unsigned i; 14771c6fdbd8SKent Overstreet 14789a3df993SKent Overstreet if (io->op.error) { 147933c74e41SKent Overstreet set_bit(EI_INODE_ERROR, &io->inode->ei_flags); 148033c74e41SKent Overstreet 1481ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 14823342ac13SKent Overstreet struct bch_folio *s; 1483b3fce09cSKent Overstreet 1484ff9c301fSKent Overstreet folio_set_error(fi.folio); 1485ff9c301fSKent Overstreet mapping_set_error(fi.folio->mapping, -EIO); 1486b3fce09cSKent Overstreet 1487ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 14883826ee0bSKent Overstreet spin_lock(&s->lock); 1489ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 1490b3fce09cSKent Overstreet s->s[i].nr_replicas = 0; 14913826ee0bSKent Overstreet spin_unlock(&s->lock); 149275812e70SKent Overstreet } 14931c6fdbd8SKent Overstreet } 14941c6fdbd8SKent Overstreet 14954be1a412SKent Overstreet if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { 1496ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 14973342ac13SKent Overstreet struct bch_folio *s; 14984be1a412SKent Overstreet 1499ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 15004be1a412SKent Overstreet spin_lock(&s->lock); 1501ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 15024be1a412SKent Overstreet s->s[i].nr_replicas = 0; 15034be1a412SKent Overstreet spin_unlock(&s->lock); 15044be1a412SKent Overstreet } 15054be1a412SKent Overstreet } 15064be1a412SKent Overstreet 15071c6fdbd8SKent Overstreet /* 15081c6fdbd8SKent Overstreet * racing with fallocate can cause us to add fewer sectors than 15091c6fdbd8SKent Overstreet * expected - but we shouldn't add more sectors than expected: 15101c6fdbd8SKent Overstreet */ 1511f8494d25SKent Overstreet WARN_ON_ONCE(io->op.i_sectors_delta > 0); 15121c6fdbd8SKent Overstreet 15131c6fdbd8SKent Overstreet /* 15141c6fdbd8SKent Overstreet * (error (due to going RO) halfway through a page can screw that up 15151c6fdbd8SKent Overstreet * slightly) 15161c6fdbd8SKent Overstreet * XXX wtf? 15179a3df993SKent Overstreet BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); 15181c6fdbd8SKent Overstreet */ 15191c6fdbd8SKent Overstreet 15201c6fdbd8SKent Overstreet /* 15211c6fdbd8SKent Overstreet * PageWriteback is effectively our ref on the inode - fixup i_blocks 15221c6fdbd8SKent Overstreet * before calling end_page_writeback: 15231c6fdbd8SKent Overstreet */ 15249a3df993SKent Overstreet i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); 15251c6fdbd8SKent Overstreet 1526ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 1527ff9c301fSKent Overstreet struct bch_folio *s = __bch2_folio(fi.folio); 15287f5e31e1SKent Overstreet 15297f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 1530ff9c301fSKent Overstreet folio_end_writeback(fi.folio); 15317f5e31e1SKent Overstreet } 15321c6fdbd8SKent Overstreet 15339f311f21SKent Overstreet bio_put(&io->op.wbio.bio); 15341c6fdbd8SKent Overstreet } 15351c6fdbd8SKent Overstreet 15361c6fdbd8SKent Overstreet static void bch2_writepage_do_io(struct bch_writepage_state *w) 15371c6fdbd8SKent Overstreet { 15381c6fdbd8SKent Overstreet struct bch_writepage_io *io = w->io; 15391c6fdbd8SKent Overstreet 15401c6fdbd8SKent Overstreet w->io = NULL; 15419f311f21SKent Overstreet closure_call(&io->op.cl, bch2_write, NULL, NULL); 15421c6fdbd8SKent Overstreet } 15431c6fdbd8SKent Overstreet 15441c6fdbd8SKent Overstreet /* 15451c6fdbd8SKent Overstreet * Get a bch_writepage_io and add @page to it - appending to an existing one if 15461c6fdbd8SKent Overstreet * possible, else allocating a new one: 15471c6fdbd8SKent Overstreet */ 15481c6fdbd8SKent Overstreet static void bch2_writepage_io_alloc(struct bch_fs *c, 154950fe5bd6SKent Overstreet struct writeback_control *wbc, 15501c6fdbd8SKent Overstreet struct bch_writepage_state *w, 15511c6fdbd8SKent Overstreet struct bch_inode_info *inode, 15527f5e31e1SKent Overstreet u64 sector, 15531c6fdbd8SKent Overstreet unsigned nr_replicas) 15541c6fdbd8SKent Overstreet { 15551c6fdbd8SKent Overstreet struct bch_write_op *op; 15561c6fdbd8SKent Overstreet 15571c6fdbd8SKent Overstreet w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, 15581c6fdbd8SKent Overstreet REQ_OP_WRITE, 15595718fda0SKent Overstreet GFP_KERNEL, 15601c6fdbd8SKent Overstreet &c->writepage_bioset), 15619a3df993SKent Overstreet struct bch_writepage_io, op.wbio.bio); 15621c6fdbd8SKent Overstreet 15639a3df993SKent Overstreet w->io->inode = inode; 15649a3df993SKent Overstreet op = &w->io->op; 15659a3df993SKent Overstreet bch2_write_op_init(op, c, w->opts); 15669a3df993SKent Overstreet op->target = w->opts.foreground_target; 15671c6fdbd8SKent Overstreet op->nr_replicas = nr_replicas; 15681c6fdbd8SKent Overstreet op->res.nr_replicas = nr_replicas; 15691c6fdbd8SKent Overstreet op->write_point = writepoint_hashed(inode->ei_last_dirtied); 15708c6d298aSKent Overstreet op->subvol = inode->ei_subvol; 15717f5e31e1SKent Overstreet op->pos = POS(inode->v.i_ino, sector); 15729f311f21SKent Overstreet op->end_io = bch2_writepage_io_done; 1573a8b3a677SKent Overstreet op->devs_need_flush = &inode->ei_devs_need_flush; 15747f5e31e1SKent Overstreet op->wbio.bio.bi_iter.bi_sector = sector; 157550fe5bd6SKent Overstreet op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); 15761c6fdbd8SKent Overstreet } 15771c6fdbd8SKent Overstreet 15781c6fdbd8SKent Overstreet static int __bch2_writepage(struct folio *folio, 15791c6fdbd8SKent Overstreet struct writeback_control *wbc, 15801c6fdbd8SKent Overstreet void *data) 15811c6fdbd8SKent Overstreet { 158230bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 15831c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 15841c6fdbd8SKent Overstreet struct bch_writepage_state *w = data; 158549fe78ffSKent Overstreet struct bch_folio *s; 158630bff594SKent Overstreet unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; 15871c6fdbd8SKent Overstreet loff_t i_size = i_size_read(&inode->v); 1588e1036a2aSKent Overstreet int ret; 15891c6fdbd8SKent Overstreet 159030bff594SKent Overstreet EBUG_ON(!folio_test_uptodate(folio)); 15911c6fdbd8SKent Overstreet 159230bff594SKent Overstreet /* Is the folio fully inside i_size? */ 159333e2eb96SKent Overstreet if (folio_end_pos(folio) <= i_size) 15941c6fdbd8SKent Overstreet goto do_io; 15951c6fdbd8SKent Overstreet 159630bff594SKent Overstreet /* Is the folio fully outside i_size? (truncate in progress) */ 159733e2eb96SKent Overstreet if (folio_pos(folio) >= i_size) { 159830bff594SKent Overstreet folio_unlock(folio); 15991c6fdbd8SKent Overstreet return 0; 16001c6fdbd8SKent Overstreet } 16011c6fdbd8SKent Overstreet 16021c6fdbd8SKent Overstreet /* 160330bff594SKent Overstreet * The folio straddles i_size. It must be zeroed out on each and every 16041c6fdbd8SKent Overstreet * writepage invocation because it may be mmapped. "A file is mapped 160530bff594SKent Overstreet * in multiples of the folio size. For a file that is not a multiple of 160630bff594SKent Overstreet * the folio size, the remaining memory is zeroed when mapped, and 16071c6fdbd8SKent Overstreet * writes to that region are not written out to the file." 16081c6fdbd8SKent Overstreet */ 160933e2eb96SKent Overstreet folio_zero_segment(folio, 161033e2eb96SKent Overstreet i_size - folio_pos(folio), 161133e2eb96SKent Overstreet folio_size(folio)); 16121c6fdbd8SKent Overstreet do_io: 161330bff594SKent Overstreet f_sectors = folio_sectors(folio); 161470d41c9eSKent Overstreet s = bch2_folio(folio); 1615f81b648dSKent Overstreet 161649fe78ffSKent Overstreet if (f_sectors > w->tmp_sectors) { 161749fe78ffSKent Overstreet kfree(w->tmp); 161849fe78ffSKent Overstreet w->tmp = kzalloc(sizeof(struct bch_folio_sector) * 161949fe78ffSKent Overstreet f_sectors, __GFP_NOFAIL); 162049fe78ffSKent Overstreet w->tmp_sectors = f_sectors; 162149fe78ffSKent Overstreet } 162249fe78ffSKent Overstreet 1623f74a5051SKent Overstreet /* 1624f74a5051SKent Overstreet * Things get really hairy with errors during writeback: 1625f74a5051SKent Overstreet */ 162630bff594SKent Overstreet ret = bch2_get_folio_disk_reservation(c, inode, folio, false); 1627f74a5051SKent Overstreet BUG_ON(ret); 1628e1036a2aSKent Overstreet 16297f5e31e1SKent Overstreet /* Before unlocking the page, get copy of reservations: */ 1630f74a5051SKent Overstreet spin_lock(&s->lock); 163149fe78ffSKent Overstreet memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); 16327f5e31e1SKent Overstreet 163330bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1634a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 16357f5e31e1SKent Overstreet continue; 16367f5e31e1SKent Overstreet 1637f81b648dSKent Overstreet nr_replicas_this_write = 1638f57a6a5dSKent Overstreet min_t(unsigned, nr_replicas_this_write, 1639f57a6a5dSKent Overstreet s->s[i].nr_replicas + 1640f57a6a5dSKent Overstreet s->s[i].replicas_reserved); 16417f5e31e1SKent Overstreet } 16421c6fdbd8SKent Overstreet 164330bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1644a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 16457f5e31e1SKent Overstreet continue; 16467f5e31e1SKent Overstreet 1647f57a6a5dSKent Overstreet s->s[i].nr_replicas = w->opts.compression 1648f57a6a5dSKent Overstreet ? 0 : nr_replicas_this_write; 1649e1036a2aSKent Overstreet 1650f57a6a5dSKent Overstreet s->s[i].replicas_reserved = 0; 1651a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_allocated); 1652f57a6a5dSKent Overstreet } 1653a1774a05SKent Overstreet spin_unlock(&s->lock); 16541c6fdbd8SKent Overstreet 16557f5e31e1SKent Overstreet BUG_ON(atomic_read(&s->write_count)); 16567f5e31e1SKent Overstreet atomic_set(&s->write_count, 1); 16577f5e31e1SKent Overstreet 165830bff594SKent Overstreet BUG_ON(folio_test_writeback(folio)); 165930bff594SKent Overstreet folio_start_writeback(folio); 16607f5e31e1SKent Overstreet 166130bff594SKent Overstreet folio_unlock(folio); 16621c6fdbd8SKent Overstreet 16637f5e31e1SKent Overstreet offset = 0; 16647f5e31e1SKent Overstreet while (1) { 1665f74a5051SKent Overstreet unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; 16667f5e31e1SKent Overstreet u64 sector; 16677f5e31e1SKent Overstreet 166830bff594SKent Overstreet while (offset < f_sectors && 1669a1774a05SKent Overstreet w->tmp[offset].state < SECTOR_dirty) 16707f5e31e1SKent Overstreet offset++; 16717f5e31e1SKent Overstreet 167230bff594SKent Overstreet if (offset == f_sectors) 16737f5e31e1SKent Overstreet break; 16747f5e31e1SKent Overstreet 167530bff594SKent Overstreet while (offset + sectors < f_sectors && 1676a1774a05SKent Overstreet w->tmp[offset + sectors].state >= SECTOR_dirty) { 167749fe78ffSKent Overstreet reserved_sectors += w->tmp[offset + sectors].replicas_reserved; 1678a1774a05SKent Overstreet dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; 16797f5e31e1SKent Overstreet sectors++; 16807f5e31e1SKent Overstreet } 1681f74a5051SKent Overstreet BUG_ON(!sectors); 1682f74a5051SKent Overstreet 168330bff594SKent Overstreet sector = folio_sector(folio) + offset; 16847f5e31e1SKent Overstreet 16851c6fdbd8SKent Overstreet if (w->io && 16869a3df993SKent Overstreet (w->io->op.res.nr_replicas != nr_replicas_this_write || 168733e2eb96SKent Overstreet bio_full(&w->io->op.wbio.bio, sectors << 9) || 1688f59b3464SKent Overstreet w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= 1689f59b3464SKent Overstreet (BIO_MAX_VECS * PAGE_SIZE) || 16909a3df993SKent Overstreet bio_end_sector(&w->io->op.wbio.bio) != sector)) 16911c6fdbd8SKent Overstreet bch2_writepage_do_io(w); 16921c6fdbd8SKent Overstreet 16931c6fdbd8SKent Overstreet if (!w->io) 169450fe5bd6SKent Overstreet bch2_writepage_io_alloc(c, wbc, w, inode, sector, 1695f81b648dSKent Overstreet nr_replicas_this_write); 16961c6fdbd8SKent Overstreet 16977f5e31e1SKent Overstreet atomic_inc(&s->write_count); 16987f5e31e1SKent Overstreet 16999a3df993SKent Overstreet BUG_ON(inode != w->io->inode); 170030bff594SKent Overstreet BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, 17017f5e31e1SKent Overstreet sectors << 9, offset << 9)); 17021c6fdbd8SKent Overstreet 17036cc3535dSKent Overstreet /* Check for writing past i_size: */ 17048eb71e9eSKent Overstreet WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > 170580fe580cSKent Overstreet round_up(i_size, block_bytes(c)) && 17068eb71e9eSKent Overstreet !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), 17078eb71e9eSKent Overstreet "writing past i_size: %llu > %llu (unrounded %llu)\n", 17088eb71e9eSKent Overstreet bio_end_sector(&w->io->op.wbio.bio) << 9, 17098eb71e9eSKent Overstreet round_up(i_size, block_bytes(c)), 17108eb71e9eSKent Overstreet i_size); 17116cc3535dSKent Overstreet 17129a3df993SKent Overstreet w->io->op.res.sectors += reserved_sectors; 17139a3df993SKent Overstreet w->io->op.i_sectors_delta -= dirty_sectors; 17141c6fdbd8SKent Overstreet w->io->op.new_i_size = i_size; 17151c6fdbd8SKent Overstreet 17167f5e31e1SKent Overstreet offset += sectors; 17177f5e31e1SKent Overstreet } 17187f5e31e1SKent Overstreet 17197f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 172030bff594SKent Overstreet folio_end_writeback(folio); 17217f5e31e1SKent Overstreet 17221c6fdbd8SKent Overstreet return 0; 17231c6fdbd8SKent Overstreet } 17241c6fdbd8SKent Overstreet 17251c6fdbd8SKent Overstreet int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) 17261c6fdbd8SKent Overstreet { 17271c6fdbd8SKent Overstreet struct bch_fs *c = mapping->host->i_sb->s_fs_info; 17281c6fdbd8SKent Overstreet struct bch_writepage_state w = 17291c6fdbd8SKent Overstreet bch_writepage_state_init(c, to_bch_ei(mapping->host)); 17301c6fdbd8SKent Overstreet struct blk_plug plug; 17311c6fdbd8SKent Overstreet int ret; 17321c6fdbd8SKent Overstreet 17331c6fdbd8SKent Overstreet blk_start_plug(&plug); 17341c6fdbd8SKent Overstreet ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); 17351c6fdbd8SKent Overstreet if (w.io) 17361c6fdbd8SKent Overstreet bch2_writepage_do_io(&w); 17371c6fdbd8SKent Overstreet blk_finish_plug(&plug); 173849fe78ffSKent Overstreet kfree(w.tmp); 17395c1ef830SKent Overstreet return bch2_err_class(ret); 17401c6fdbd8SKent Overstreet } 17411c6fdbd8SKent Overstreet 17421c6fdbd8SKent Overstreet /* buffered writes: */ 17431c6fdbd8SKent Overstreet 17441c6fdbd8SKent Overstreet int bch2_write_begin(struct file *file, struct address_space *mapping, 17451c6fdbd8SKent Overstreet loff_t pos, unsigned len, 17461c6fdbd8SKent Overstreet struct page **pagep, void **fsdata) 17471c6fdbd8SKent Overstreet { 17481c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 17491c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 175030bff594SKent Overstreet struct bch2_folio_reservation *res; 175130bff594SKent Overstreet struct folio *folio; 175233e2eb96SKent Overstreet unsigned offset; 17531c6fdbd8SKent Overstreet int ret = -ENOMEM; 17541c6fdbd8SKent Overstreet 1755d1542e03SKent Overstreet res = kmalloc(sizeof(*res), GFP_KERNEL); 1756d1542e03SKent Overstreet if (!res) 1757d1542e03SKent Overstreet return -ENOMEM; 1758d1542e03SKent Overstreet 175930bff594SKent Overstreet bch2_folio_reservation_init(c, inode, res); 1760d1542e03SKent Overstreet *fsdata = res; 17611c6fdbd8SKent Overstreet 1762a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 17631c6fdbd8SKent Overstreet 176433e2eb96SKent Overstreet folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, 176530bff594SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, 176630bff594SKent Overstreet mapping_gfp_mask(mapping)); 1767b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) 17681c6fdbd8SKent Overstreet goto err_unlock; 17691c6fdbd8SKent Overstreet 177030bff594SKent Overstreet if (folio_test_uptodate(folio)) 17711c6fdbd8SKent Overstreet goto out; 17721c6fdbd8SKent Overstreet 177333e2eb96SKent Overstreet offset = pos - folio_pos(folio); 177433e2eb96SKent Overstreet len = min_t(size_t, len, folio_end_pos(folio) - pos); 177533e2eb96SKent Overstreet 177630bff594SKent Overstreet /* If we're writing entire folio, don't need to read it in first: */ 177733e2eb96SKent Overstreet if (!offset && len == folio_size(folio)) 17781c6fdbd8SKent Overstreet goto out; 17791c6fdbd8SKent Overstreet 17801c6fdbd8SKent Overstreet if (!offset && pos + len >= inode->v.i_size) { 178130bff594SKent Overstreet folio_zero_segment(folio, len, folio_size(folio)); 178230bff594SKent Overstreet flush_dcache_folio(folio); 17831c6fdbd8SKent Overstreet goto out; 17841c6fdbd8SKent Overstreet } 17851c6fdbd8SKent Overstreet 178633e2eb96SKent Overstreet if (folio_pos(folio) >= inode->v.i_size) { 178730bff594SKent Overstreet folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); 178830bff594SKent Overstreet flush_dcache_folio(folio); 17891c6fdbd8SKent Overstreet goto out; 17901c6fdbd8SKent Overstreet } 17911c6fdbd8SKent Overstreet readpage: 179230bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 17931c6fdbd8SKent Overstreet if (ret) 17941c6fdbd8SKent Overstreet goto err; 17951c6fdbd8SKent Overstreet out: 179630bff594SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 1797e6ec361fSKent Overstreet if (ret) 17983a4d3656SKent Overstreet goto err; 1799e6ec361fSKent Overstreet 180030bff594SKent Overstreet ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); 18011c6fdbd8SKent Overstreet if (ret) { 180230bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 18031c6fdbd8SKent Overstreet /* 180430bff594SKent Overstreet * If the folio hasn't been read in, we won't know if we 18051c6fdbd8SKent Overstreet * actually need a reservation - we don't actually need 180630bff594SKent Overstreet * to read here, we just need to check if the folio is 18071c6fdbd8SKent Overstreet * fully backed by uncompressed data: 18081c6fdbd8SKent Overstreet */ 18091c6fdbd8SKent Overstreet goto readpage; 18101c6fdbd8SKent Overstreet } 18111c6fdbd8SKent Overstreet 18121c6fdbd8SKent Overstreet goto err; 18131c6fdbd8SKent Overstreet } 18141c6fdbd8SKent Overstreet 181530bff594SKent Overstreet *pagep = &folio->page; 18161c6fdbd8SKent Overstreet return 0; 18171c6fdbd8SKent Overstreet err: 181830bff594SKent Overstreet folio_unlock(folio); 181930bff594SKent Overstreet folio_put(folio); 18201c6fdbd8SKent Overstreet *pagep = NULL; 18211c6fdbd8SKent Overstreet err_unlock: 1822a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1823d1542e03SKent Overstreet kfree(res); 1824d1542e03SKent Overstreet *fsdata = NULL; 18255c1ef830SKent Overstreet return bch2_err_class(ret); 18261c6fdbd8SKent Overstreet } 18271c6fdbd8SKent Overstreet 18281c6fdbd8SKent Overstreet int bch2_write_end(struct file *file, struct address_space *mapping, 18291c6fdbd8SKent Overstreet loff_t pos, unsigned len, unsigned copied, 18301c6fdbd8SKent Overstreet struct page *page, void *fsdata) 18311c6fdbd8SKent Overstreet { 18321c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 18331c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 183430bff594SKent Overstreet struct bch2_folio_reservation *res = fsdata; 183530bff594SKent Overstreet struct folio *folio = page_folio(page); 183633e2eb96SKent Overstreet unsigned offset = pos - folio_pos(folio); 18371c6fdbd8SKent Overstreet 18381c6fdbd8SKent Overstreet lockdep_assert_held(&inode->v.i_rwsem); 183933e2eb96SKent Overstreet BUG_ON(offset + copied > folio_size(folio)); 18401c6fdbd8SKent Overstreet 184130bff594SKent Overstreet if (unlikely(copied < len && !folio_test_uptodate(folio))) { 18421c6fdbd8SKent Overstreet /* 184330bff594SKent Overstreet * The folio needs to be read in, but that would destroy 18441c6fdbd8SKent Overstreet * our partial write - simplest thing is to just force 18451c6fdbd8SKent Overstreet * userspace to redo the write: 18461c6fdbd8SKent Overstreet */ 184730bff594SKent Overstreet folio_zero_range(folio, 0, folio_size(folio)); 184830bff594SKent Overstreet flush_dcache_folio(folio); 18491c6fdbd8SKent Overstreet copied = 0; 18501c6fdbd8SKent Overstreet } 18511c6fdbd8SKent Overstreet 18521c6fdbd8SKent Overstreet spin_lock(&inode->v.i_lock); 18531c6fdbd8SKent Overstreet if (pos + copied > inode->v.i_size) 18541c6fdbd8SKent Overstreet i_size_write(&inode->v, pos + copied); 18551c6fdbd8SKent Overstreet spin_unlock(&inode->v.i_lock); 18561c6fdbd8SKent Overstreet 18571c6fdbd8SKent Overstreet if (copied) { 185830bff594SKent Overstreet if (!folio_test_uptodate(folio)) 185930bff594SKent Overstreet folio_mark_uptodate(folio); 1860d1542e03SKent Overstreet 186130bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, res, offset, copied); 18621c6fdbd8SKent Overstreet 18631c6fdbd8SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 18641c6fdbd8SKent Overstreet } 18651c6fdbd8SKent Overstreet 186630bff594SKent Overstreet folio_unlock(folio); 186730bff594SKent Overstreet folio_put(folio); 1868a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 18691c6fdbd8SKent Overstreet 187030bff594SKent Overstreet bch2_folio_reservation_put(c, inode, res); 1871d1542e03SKent Overstreet kfree(res); 1872d1542e03SKent Overstreet 18731c6fdbd8SKent Overstreet return copied; 18741c6fdbd8SKent Overstreet } 18751c6fdbd8SKent Overstreet 1876c42b57c4SKent Overstreet static noinline void folios_trunc(folios *folios, struct folio **fi) 1877c42b57c4SKent Overstreet { 1878c42b57c4SKent Overstreet while (folios->data + folios->nr > fi) { 1879c42b57c4SKent Overstreet struct folio *f = darray_pop(folios); 1880c42b57c4SKent Overstreet 1881c42b57c4SKent Overstreet folio_unlock(f); 1882c42b57c4SKent Overstreet folio_put(f); 1883c42b57c4SKent Overstreet } 1884c42b57c4SKent Overstreet } 18851c6fdbd8SKent Overstreet 18861c6fdbd8SKent Overstreet static int __bch2_buffered_write(struct bch_inode_info *inode, 18871c6fdbd8SKent Overstreet struct address_space *mapping, 18881c6fdbd8SKent Overstreet struct iov_iter *iter, 18891c6fdbd8SKent Overstreet loff_t pos, unsigned len) 18901c6fdbd8SKent Overstreet { 18911c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 189230bff594SKent Overstreet struct bch2_folio_reservation res; 1893c42b57c4SKent Overstreet folios folios; 1894c42b57c4SKent Overstreet struct folio **fi, *f; 1895c42b57c4SKent Overstreet unsigned copied = 0, f_offset; 18966b9857b2SBrian Foster u64 end = pos + len, f_pos; 1897335f7d4fSBrian Foster loff_t last_folio_pos = inode->v.i_size; 18981c6fdbd8SKent Overstreet int ret = 0; 18991c6fdbd8SKent Overstreet 19001c6fdbd8SKent Overstreet BUG_ON(!len); 19011c6fdbd8SKent Overstreet 190230bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1903c42b57c4SKent Overstreet darray_init(&folios); 1904d1542e03SKent Overstreet 190540022c01SKent Overstreet ret = filemap_get_contig_folios_d(mapping, pos, end, 190640022c01SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, 190740022c01SKent Overstreet mapping_gfp_mask(mapping), 190840022c01SKent Overstreet &folios); 190940022c01SKent Overstreet if (ret) 19101c6fdbd8SKent Overstreet goto out; 191140022c01SKent Overstreet 191240022c01SKent Overstreet BUG_ON(!folios.nr); 191340022c01SKent Overstreet 1914c42b57c4SKent Overstreet f = darray_first(folios); 1915c42b57c4SKent Overstreet if (pos != folio_pos(f) && !folio_test_uptodate(f)) { 1916c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 19171c6fdbd8SKent Overstreet if (ret) 19181c6fdbd8SKent Overstreet goto out; 19191c6fdbd8SKent Overstreet } 19201c6fdbd8SKent Overstreet 1921c42b57c4SKent Overstreet f = darray_last(folios); 1922335f7d4fSBrian Foster end = min(end, folio_end_pos(f)); 1923335f7d4fSBrian Foster last_folio_pos = folio_pos(f); 1924c42b57c4SKent Overstreet if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { 1925c42b57c4SKent Overstreet if (end >= inode->v.i_size) { 1926c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 19271c6fdbd8SKent Overstreet } else { 1928c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 19291c6fdbd8SKent Overstreet if (ret) 19301c6fdbd8SKent Overstreet goto out; 19311c6fdbd8SKent Overstreet } 19321c6fdbd8SKent Overstreet } 19331c6fdbd8SKent Overstreet 193470d41c9eSKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); 193570d41c9eSKent Overstreet if (ret) 193670d41c9eSKent Overstreet goto out; 193770d41c9eSKent Overstreet 1938c42b57c4SKent Overstreet f_pos = pos; 1939c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1940c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1941c42b57c4SKent Overstreet struct folio *f = *fi; 19426b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 19431c6fdbd8SKent Overstreet 1944353448f3SKent Overstreet /* 1945353448f3SKent Overstreet * XXX: per POSIX and fstests generic/275, on -ENOSPC we're 1946353448f3SKent Overstreet * supposed to write as much as we have disk space for. 1947353448f3SKent Overstreet * 1948353448f3SKent Overstreet * On failure here we should still write out a partial page if 1949353448f3SKent Overstreet * we aren't completely out of disk space - we don't do that 1950353448f3SKent Overstreet * yet: 1951353448f3SKent Overstreet */ 1952c42b57c4SKent Overstreet ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); 1953353448f3SKent Overstreet if (unlikely(ret)) { 1954c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1955c42b57c4SKent Overstreet if (!folios.nr) 19561c6fdbd8SKent Overstreet goto out; 1957c42b57c4SKent Overstreet 1958c42b57c4SKent Overstreet end = min(end, folio_end_pos(darray_last(folios))); 1959353448f3SKent Overstreet break; 1960353448f3SKent Overstreet } 1961d1542e03SKent Overstreet 1962c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1963c42b57c4SKent Overstreet f_offset = 0; 19641c6fdbd8SKent Overstreet } 19651c6fdbd8SKent Overstreet 19661c6fdbd8SKent Overstreet if (mapping_writably_mapped(mapping)) 1967c42b57c4SKent Overstreet darray_for_each(folios, fi) 1968c42b57c4SKent Overstreet flush_dcache_folio(*fi); 19691c6fdbd8SKent Overstreet 1970c42b57c4SKent Overstreet f_pos = pos; 1971c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1972c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1973c42b57c4SKent Overstreet struct folio *f = *fi; 19746b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 1975c42b57c4SKent Overstreet unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); 1976d1542e03SKent Overstreet 1977c42b57c4SKent Overstreet if (!f_copied) { 1978c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1979912bdf17SKent Overstreet break; 1980912bdf17SKent Overstreet } 1981912bdf17SKent Overstreet 1982c42b57c4SKent Overstreet if (!folio_test_uptodate(f) && 1983c42b57c4SKent Overstreet f_copied != folio_size(f) && 1984c42b57c4SKent Overstreet pos + copied + f_copied < inode->v.i_size) { 1985c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 1986c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1987912bdf17SKent Overstreet break; 19881c6fdbd8SKent Overstreet } 19891c6fdbd8SKent Overstreet 1990c42b57c4SKent Overstreet flush_dcache_folio(f); 1991c42b57c4SKent Overstreet copied += f_copied; 1992c42b57c4SKent Overstreet 1993c42b57c4SKent Overstreet if (f_copied != f_len) { 1994c42b57c4SKent Overstreet folios_trunc(&folios, fi + 1); 1995c42b57c4SKent Overstreet break; 1996c42b57c4SKent Overstreet } 1997c42b57c4SKent Overstreet 1998c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1999c42b57c4SKent Overstreet f_offset = 0; 2000c42b57c4SKent Overstreet } 2001c42b57c4SKent Overstreet 20021c6fdbd8SKent Overstreet if (!copied) 20031c6fdbd8SKent Overstreet goto out; 20041c6fdbd8SKent Overstreet 2005c42b57c4SKent Overstreet end = pos + copied; 2006c42b57c4SKent Overstreet 2007877dfb34SKent Overstreet spin_lock(&inode->v.i_lock); 2008c42b57c4SKent Overstreet if (end > inode->v.i_size) 2009c42b57c4SKent Overstreet i_size_write(&inode->v, end); 2010877dfb34SKent Overstreet spin_unlock(&inode->v.i_lock); 2011877dfb34SKent Overstreet 2012c42b57c4SKent Overstreet f_pos = pos; 2013c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 2014c42b57c4SKent Overstreet darray_for_each(folios, fi) { 2015c42b57c4SKent Overstreet struct folio *f = *fi; 20166b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 2017d1542e03SKent Overstreet 2018c42b57c4SKent Overstreet if (!folio_test_uptodate(f)) 2019c42b57c4SKent Overstreet folio_mark_uptodate(f); 2020d1542e03SKent Overstreet 2021c42b57c4SKent Overstreet bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); 2022d1542e03SKent Overstreet 2023c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 2024c42b57c4SKent Overstreet f_offset = 0; 2025d1542e03SKent Overstreet } 2026877dfb34SKent Overstreet 2027877dfb34SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 20281c6fdbd8SKent Overstreet out: 2029c42b57c4SKent Overstreet darray_for_each(folios, fi) { 2030c42b57c4SKent Overstreet folio_unlock(*fi); 2031c42b57c4SKent Overstreet folio_put(*fi); 20321c6fdbd8SKent Overstreet } 20331c6fdbd8SKent Overstreet 2034335f7d4fSBrian Foster /* 2035335f7d4fSBrian Foster * If the last folio added to the mapping starts beyond current EOF, we 2036335f7d4fSBrian Foster * performed a short write but left around at least one post-EOF folio. 2037335f7d4fSBrian Foster * Clean up the mapping before we return. 2038335f7d4fSBrian Foster */ 2039335f7d4fSBrian Foster if (last_folio_pos >= inode->v.i_size) 2040335f7d4fSBrian Foster truncate_pagecache(&inode->v, inode->v.i_size); 2041335f7d4fSBrian Foster 2042c42b57c4SKent Overstreet darray_exit(&folios); 204330bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 20441c6fdbd8SKent Overstreet 20451c6fdbd8SKent Overstreet return copied ?: ret; 20461c6fdbd8SKent Overstreet } 20471c6fdbd8SKent Overstreet 20481c6fdbd8SKent Overstreet static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) 20491c6fdbd8SKent Overstreet { 20501c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 20511c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 20521c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 20531c6fdbd8SKent Overstreet loff_t pos = iocb->ki_pos; 20541c6fdbd8SKent Overstreet ssize_t written = 0; 20551c6fdbd8SKent Overstreet int ret = 0; 20561c6fdbd8SKent Overstreet 2057a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 20581c6fdbd8SKent Overstreet 20591c6fdbd8SKent Overstreet do { 20601c6fdbd8SKent Overstreet unsigned offset = pos & (PAGE_SIZE - 1); 2061c42b57c4SKent Overstreet unsigned bytes = iov_iter_count(iter); 20621c6fdbd8SKent Overstreet again: 20631c6fdbd8SKent Overstreet /* 20641c6fdbd8SKent Overstreet * Bring in the user page that we will copy from _first_. 20651c6fdbd8SKent Overstreet * Otherwise there's a nasty deadlock on copying from the 20661c6fdbd8SKent Overstreet * same page as we're writing to, without it being marked 20671c6fdbd8SKent Overstreet * up-to-date. 20681c6fdbd8SKent Overstreet * 20691c6fdbd8SKent Overstreet * Not only is this an optimisation, but it is also required 20701c6fdbd8SKent Overstreet * to check that the address is actually valid, when atomic 20711c6fdbd8SKent Overstreet * usercopies are used, below. 20721c6fdbd8SKent Overstreet */ 20731c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 20741c6fdbd8SKent Overstreet bytes = min_t(unsigned long, iov_iter_count(iter), 20751c6fdbd8SKent Overstreet PAGE_SIZE - offset); 20761c6fdbd8SKent Overstreet 20771c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 20781c6fdbd8SKent Overstreet ret = -EFAULT; 20791c6fdbd8SKent Overstreet break; 20801c6fdbd8SKent Overstreet } 20811c6fdbd8SKent Overstreet } 20821c6fdbd8SKent Overstreet 20831c6fdbd8SKent Overstreet if (unlikely(fatal_signal_pending(current))) { 20841c6fdbd8SKent Overstreet ret = -EINTR; 20851c6fdbd8SKent Overstreet break; 20861c6fdbd8SKent Overstreet } 20871c6fdbd8SKent Overstreet 20881c6fdbd8SKent Overstreet ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); 20891c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 20901c6fdbd8SKent Overstreet break; 20911c6fdbd8SKent Overstreet 20921c6fdbd8SKent Overstreet cond_resched(); 20931c6fdbd8SKent Overstreet 20941c6fdbd8SKent Overstreet if (unlikely(ret == 0)) { 20951c6fdbd8SKent Overstreet /* 20961c6fdbd8SKent Overstreet * If we were unable to copy any data at all, we must 20971c6fdbd8SKent Overstreet * fall back to a single segment length write. 20981c6fdbd8SKent Overstreet * 20991c6fdbd8SKent Overstreet * If we didn't fallback here, we could livelock 21001c6fdbd8SKent Overstreet * because not all segments in the iov can be copied at 21011c6fdbd8SKent Overstreet * once without a pagefault. 21021c6fdbd8SKent Overstreet */ 21031c6fdbd8SKent Overstreet bytes = min_t(unsigned long, PAGE_SIZE - offset, 21041c6fdbd8SKent Overstreet iov_iter_single_seg_count(iter)); 21051c6fdbd8SKent Overstreet goto again; 21061c6fdbd8SKent Overstreet } 21071c6fdbd8SKent Overstreet pos += ret; 21081c6fdbd8SKent Overstreet written += ret; 2109912bdf17SKent Overstreet ret = 0; 21101c6fdbd8SKent Overstreet 21111c6fdbd8SKent Overstreet balance_dirty_pages_ratelimited(mapping); 21121c6fdbd8SKent Overstreet } while (iov_iter_count(iter)); 21131c6fdbd8SKent Overstreet 2114a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 21151c6fdbd8SKent Overstreet 21161c6fdbd8SKent Overstreet return written ? written : ret; 21171c6fdbd8SKent Overstreet } 21181c6fdbd8SKent Overstreet 21191c6fdbd8SKent Overstreet /* O_DIRECT reads */ 21201c6fdbd8SKent Overstreet 2121b4725cc1SKent Overstreet static void bio_check_or_release(struct bio *bio, bool check_dirty) 2122b4725cc1SKent Overstreet { 2123b4725cc1SKent Overstreet if (check_dirty) { 2124b4725cc1SKent Overstreet bio_check_pages_dirty(bio); 2125b4725cc1SKent Overstreet } else { 2126b4725cc1SKent Overstreet bio_release_pages(bio, false); 2127b4725cc1SKent Overstreet bio_put(bio); 2128b4725cc1SKent Overstreet } 2129b4725cc1SKent Overstreet } 2130b4725cc1SKent Overstreet 21311c6fdbd8SKent Overstreet static void bch2_dio_read_complete(struct closure *cl) 21321c6fdbd8SKent Overstreet { 21331c6fdbd8SKent Overstreet struct dio_read *dio = container_of(cl, struct dio_read, cl); 21341c6fdbd8SKent Overstreet 21351c6fdbd8SKent Overstreet dio->req->ki_complete(dio->req, dio->ret); 2136b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 21371c6fdbd8SKent Overstreet } 21381c6fdbd8SKent Overstreet 21391c6fdbd8SKent Overstreet static void bch2_direct_IO_read_endio(struct bio *bio) 21401c6fdbd8SKent Overstreet { 21411c6fdbd8SKent Overstreet struct dio_read *dio = bio->bi_private; 21421c6fdbd8SKent Overstreet 21431c6fdbd8SKent Overstreet if (bio->bi_status) 21441c6fdbd8SKent Overstreet dio->ret = blk_status_to_errno(bio->bi_status); 21451c6fdbd8SKent Overstreet 21461c6fdbd8SKent Overstreet closure_put(&dio->cl); 21471c6fdbd8SKent Overstreet } 21481c6fdbd8SKent Overstreet 21491c6fdbd8SKent Overstreet static void bch2_direct_IO_read_split_endio(struct bio *bio) 21501c6fdbd8SKent Overstreet { 2151b4725cc1SKent Overstreet struct dio_read *dio = bio->bi_private; 2152b4725cc1SKent Overstreet bool should_dirty = dio->should_dirty; 2153b4725cc1SKent Overstreet 21541c6fdbd8SKent Overstreet bch2_direct_IO_read_endio(bio); 2155b4725cc1SKent Overstreet bio_check_or_release(bio, should_dirty); 21561c6fdbd8SKent Overstreet } 21571c6fdbd8SKent Overstreet 21581c6fdbd8SKent Overstreet static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) 21591c6fdbd8SKent Overstreet { 21601c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 21611c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 21621c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 216301ad6737SKent Overstreet struct bch_io_opts opts; 21641c6fdbd8SKent Overstreet struct dio_read *dio; 21651c6fdbd8SKent Overstreet struct bio *bio; 21661c6fdbd8SKent Overstreet loff_t offset = req->ki_pos; 21671c6fdbd8SKent Overstreet bool sync = is_sync_kiocb(req); 21681c6fdbd8SKent Overstreet size_t shorten; 21691c6fdbd8SKent Overstreet ssize_t ret; 21701c6fdbd8SKent Overstreet 217101ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 217201ad6737SKent Overstreet 21731c6fdbd8SKent Overstreet if ((offset|iter->count) & (block_bytes(c) - 1)) 21741c6fdbd8SKent Overstreet return -EINVAL; 21751c6fdbd8SKent Overstreet 21761c6fdbd8SKent Overstreet ret = min_t(loff_t, iter->count, 21771c6fdbd8SKent Overstreet max_t(loff_t, 0, i_size_read(&inode->v) - offset)); 21781c6fdbd8SKent Overstreet 21791c6fdbd8SKent Overstreet if (!ret) 21801c6fdbd8SKent Overstreet return ret; 21811c6fdbd8SKent Overstreet 21821c6fdbd8SKent Overstreet shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); 21831c6fdbd8SKent Overstreet iter->count -= shorten; 21841c6fdbd8SKent Overstreet 21851c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 21864d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 21871c6fdbd8SKent Overstreet REQ_OP_READ, 21881c6fdbd8SKent Overstreet GFP_KERNEL, 21891c6fdbd8SKent Overstreet &c->dio_read_bioset); 21901c6fdbd8SKent Overstreet 21911c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_endio; 21921c6fdbd8SKent Overstreet 21931c6fdbd8SKent Overstreet dio = container_of(bio, struct dio_read, rbio.bio); 21941c6fdbd8SKent Overstreet closure_init(&dio->cl, NULL); 21951c6fdbd8SKent Overstreet 21961c6fdbd8SKent Overstreet /* 21971c6fdbd8SKent Overstreet * this is a _really_ horrible hack just to avoid an atomic sub at the 21981c6fdbd8SKent Overstreet * end: 21991c6fdbd8SKent Overstreet */ 22001c6fdbd8SKent Overstreet if (!sync) { 22011c6fdbd8SKent Overstreet set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); 22021c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 22031c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER - 22041c6fdbd8SKent Overstreet CLOSURE_RUNNING + 22051c6fdbd8SKent Overstreet CLOSURE_DESTRUCTOR); 22061c6fdbd8SKent Overstreet } else { 22071c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 22081c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER + 1); 22091c6fdbd8SKent Overstreet } 22101c6fdbd8SKent Overstreet 22111c6fdbd8SKent Overstreet dio->req = req; 22121c6fdbd8SKent Overstreet dio->ret = ret; 2213b4725cc1SKent Overstreet /* 2214b4725cc1SKent Overstreet * This is one of the sketchier things I've encountered: we have to skip 2215b4725cc1SKent Overstreet * the dirtying of requests that are internal from the kernel (i.e. from 2216b4725cc1SKent Overstreet * loopback), because we'll deadlock on page_lock. 2217b4725cc1SKent Overstreet */ 2218b4725cc1SKent Overstreet dio->should_dirty = iter_is_iovec(iter); 22191c6fdbd8SKent Overstreet 22201c6fdbd8SKent Overstreet goto start; 22211c6fdbd8SKent Overstreet while (iter->count) { 22221c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 22234d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 22241c6fdbd8SKent Overstreet REQ_OP_READ, 22251c6fdbd8SKent Overstreet GFP_KERNEL, 22261c6fdbd8SKent Overstreet &c->bio_read); 22271c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_split_endio; 22281c6fdbd8SKent Overstreet start: 22291c6fdbd8SKent Overstreet bio->bi_opf = REQ_OP_READ|REQ_SYNC; 22301c6fdbd8SKent Overstreet bio->bi_iter.bi_sector = offset >> 9; 22311c6fdbd8SKent Overstreet bio->bi_private = dio; 22321c6fdbd8SKent Overstreet 22331c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, iter); 22341c6fdbd8SKent Overstreet if (ret < 0) { 22351c6fdbd8SKent Overstreet /* XXX: fault inject this path */ 22361c6fdbd8SKent Overstreet bio->bi_status = BLK_STS_RESOURCE; 22371c6fdbd8SKent Overstreet bio_endio(bio); 22381c6fdbd8SKent Overstreet break; 22391c6fdbd8SKent Overstreet } 22401c6fdbd8SKent Overstreet 22411c6fdbd8SKent Overstreet offset += bio->bi_iter.bi_size; 2242b4725cc1SKent Overstreet 2243b4725cc1SKent Overstreet if (dio->should_dirty) 22441c6fdbd8SKent Overstreet bio_set_pages_dirty(bio); 22451c6fdbd8SKent Overstreet 22461c6fdbd8SKent Overstreet if (iter->count) 22471c6fdbd8SKent Overstreet closure_get(&dio->cl); 22481c6fdbd8SKent Overstreet 22498c6d298aSKent Overstreet bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); 22501c6fdbd8SKent Overstreet } 22511c6fdbd8SKent Overstreet 22521c6fdbd8SKent Overstreet iter->count += shorten; 22531c6fdbd8SKent Overstreet 22541c6fdbd8SKent Overstreet if (sync) { 22551c6fdbd8SKent Overstreet closure_sync(&dio->cl); 22561c6fdbd8SKent Overstreet closure_debug_destroy(&dio->cl); 22571c6fdbd8SKent Overstreet ret = dio->ret; 2258b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 22591c6fdbd8SKent Overstreet return ret; 22601c6fdbd8SKent Overstreet } else { 22611c6fdbd8SKent Overstreet return -EIOCBQUEUED; 22621c6fdbd8SKent Overstreet } 22631c6fdbd8SKent Overstreet } 22641c6fdbd8SKent Overstreet 22651c6fdbd8SKent Overstreet ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) 22661c6fdbd8SKent Overstreet { 22671c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 22681c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 22691c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 22701c6fdbd8SKent Overstreet size_t count = iov_iter_count(iter); 22711c6fdbd8SKent Overstreet ssize_t ret; 22721c6fdbd8SKent Overstreet 22731c6fdbd8SKent Overstreet if (!count) 22741c6fdbd8SKent Overstreet return 0; /* skip atime */ 22751c6fdbd8SKent Overstreet 22761c6fdbd8SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 22771c6fdbd8SKent Overstreet struct blk_plug plug; 22781c6fdbd8SKent Overstreet 2279a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 22801c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 22811c6fdbd8SKent Overstreet iocb->ki_pos, 22821c6fdbd8SKent Overstreet iocb->ki_pos + count - 1); 22831c6fdbd8SKent Overstreet if (ret < 0) 22845c1ef830SKent Overstreet goto out; 2285a023127aSKent Overstreet } 22861c6fdbd8SKent Overstreet 22871c6fdbd8SKent Overstreet file_accessed(file); 22881c6fdbd8SKent Overstreet 22891c6fdbd8SKent Overstreet blk_start_plug(&plug); 22901c6fdbd8SKent Overstreet ret = bch2_direct_IO_read(iocb, iter); 22911c6fdbd8SKent Overstreet blk_finish_plug(&plug); 22921c6fdbd8SKent Overstreet 22931c6fdbd8SKent Overstreet if (ret >= 0) 22941c6fdbd8SKent Overstreet iocb->ki_pos += ret; 22951c6fdbd8SKent Overstreet } else { 2296a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 22971c6fdbd8SKent Overstreet ret = generic_file_read_iter(iocb, iter); 2298a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 22991c6fdbd8SKent Overstreet } 23005c1ef830SKent Overstreet out: 23015c1ef830SKent Overstreet return bch2_err_class(ret); 23021c6fdbd8SKent Overstreet } 23031c6fdbd8SKent Overstreet 23041c6fdbd8SKent Overstreet /* O_DIRECT writes */ 23051c6fdbd8SKent Overstreet 23066fed42bbSKent Overstreet static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, 23076fed42bbSKent Overstreet u64 offset, u64 size, 23086fed42bbSKent Overstreet unsigned nr_replicas, bool compressed) 23096fed42bbSKent Overstreet { 23106fed42bbSKent Overstreet struct btree_trans trans; 23116fed42bbSKent Overstreet struct btree_iter iter; 23126fed42bbSKent Overstreet struct bkey_s_c k; 23136fed42bbSKent Overstreet u64 end = offset + size; 23146fed42bbSKent Overstreet u32 snapshot; 23156fed42bbSKent Overstreet bool ret = true; 23166fed42bbSKent Overstreet int err; 23176fed42bbSKent Overstreet 23186fed42bbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 23196fed42bbSKent Overstreet retry: 23206fed42bbSKent Overstreet bch2_trans_begin(&trans); 23216fed42bbSKent Overstreet 23226fed42bbSKent Overstreet err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 23236fed42bbSKent Overstreet if (err) 23246fed42bbSKent Overstreet goto err; 23256fed42bbSKent Overstreet 2326e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 23276fed42bbSKent Overstreet SPOS(inum.inum, offset, snapshot), 23286fed42bbSKent Overstreet BTREE_ITER_SLOTS, k, err) { 2329e88a75ebSKent Overstreet if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) 23306fed42bbSKent Overstreet break; 23316fed42bbSKent Overstreet 23328c6d298aSKent Overstreet if (k.k->p.snapshot != snapshot || 23338c6d298aSKent Overstreet nr_replicas > bch2_bkey_replicas(c, k) || 23346fed42bbSKent Overstreet (!compressed && bch2_bkey_sectors_compressed(k))) { 23356fed42bbSKent Overstreet ret = false; 23366fed42bbSKent Overstreet break; 23376fed42bbSKent Overstreet } 23386fed42bbSKent Overstreet } 23396fed42bbSKent Overstreet 23406fed42bbSKent Overstreet offset = iter.pos.offset; 23416fed42bbSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 23426fed42bbSKent Overstreet err: 2343549d173cSKent Overstreet if (bch2_err_matches(err, BCH_ERR_transaction_restart)) 23446fed42bbSKent Overstreet goto retry; 23456fed42bbSKent Overstreet bch2_trans_exit(&trans); 23466fed42bbSKent Overstreet 23476fed42bbSKent Overstreet return err ? false : ret; 23486fed42bbSKent Overstreet } 23496fed42bbSKent Overstreet 2350182c7bbfSKent Overstreet static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) 2351182c7bbfSKent Overstreet { 2352182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2353182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2354182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2355182c7bbfSKent Overstreet 2356182c7bbfSKent Overstreet return bch2_check_range_allocated(c, inode_inum(inode), 2357182c7bbfSKent Overstreet dio->op.pos.offset, bio_sectors(bio), 2358182c7bbfSKent Overstreet dio->op.opts.data_replicas, 2359182c7bbfSKent Overstreet dio->op.opts.compression != 0); 2360182c7bbfSKent Overstreet } 2361182c7bbfSKent Overstreet 2362a1ee777bSKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *); 2363a1ee777bSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio); 2364a1ee777bSKent Overstreet 23651c6fdbd8SKent Overstreet /* 23661c6fdbd8SKent Overstreet * We're going to return -EIOCBQUEUED, but we haven't finished consuming the 23671c6fdbd8SKent Overstreet * iov_iter yet, so we need to stash a copy of the iovec: it might be on the 23681c6fdbd8SKent Overstreet * caller's stack, we're not guaranteed that it will live for the duration of 23691c6fdbd8SKent Overstreet * the IO: 23701c6fdbd8SKent Overstreet */ 23711c6fdbd8SKent Overstreet static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) 23721c6fdbd8SKent Overstreet { 23731c6fdbd8SKent Overstreet struct iovec *iov = dio->inline_vecs; 23741c6fdbd8SKent Overstreet 23751c6fdbd8SKent Overstreet /* 23761c6fdbd8SKent Overstreet * iov_iter has a single embedded iovec - nothing to do: 23771c6fdbd8SKent Overstreet */ 23781c6fdbd8SKent Overstreet if (iter_is_ubuf(&dio->iter)) 23791c6fdbd8SKent Overstreet return 0; 23801c6fdbd8SKent Overstreet 23811c6fdbd8SKent Overstreet /* 23821c6fdbd8SKent Overstreet * We don't currently handle non-iovec iov_iters here - return an error, 23831c6fdbd8SKent Overstreet * and we'll fall back to doing the IO synchronously: 23841c6fdbd8SKent Overstreet */ 23851c6fdbd8SKent Overstreet if (!iter_is_iovec(&dio->iter)) 23861c6fdbd8SKent Overstreet return -1; 23871c6fdbd8SKent Overstreet 23881c6fdbd8SKent Overstreet if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { 23891c6fdbd8SKent Overstreet iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), 23901c6fdbd8SKent Overstreet GFP_KERNEL); 23911c6fdbd8SKent Overstreet if (unlikely(!iov)) 23921c6fdbd8SKent Overstreet return -ENOMEM; 23931c6fdbd8SKent Overstreet 23941c6fdbd8SKent Overstreet dio->free_iov = true; 23951c6fdbd8SKent Overstreet } 23961c6fdbd8SKent Overstreet 23971c6fdbd8SKent Overstreet memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); 23981c6fdbd8SKent Overstreet dio->iter.__iov = iov; 23991c6fdbd8SKent Overstreet return 0; 24001c6fdbd8SKent Overstreet } 24011c6fdbd8SKent Overstreet 2402a1ee777bSKent Overstreet static void bch2_dio_write_flush_done(struct closure *cl) 2403a1ee777bSKent Overstreet { 2404a1ee777bSKent Overstreet struct dio_write *dio = container_of(cl, struct dio_write, op.cl); 2405a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2406a1ee777bSKent Overstreet 2407a1ee777bSKent Overstreet closure_debug_destroy(cl); 2408a1ee777bSKent Overstreet 2409a1ee777bSKent Overstreet dio->op.error = bch2_journal_error(&c->journal); 2410a1ee777bSKent Overstreet 2411a1ee777bSKent Overstreet bch2_dio_write_done(dio); 2412a1ee777bSKent Overstreet } 2413a1ee777bSKent Overstreet 2414a1ee777bSKent Overstreet static noinline void bch2_dio_write_flush(struct dio_write *dio) 2415a1ee777bSKent Overstreet { 2416a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2417a1ee777bSKent Overstreet struct bch_inode_unpacked inode; 2418a1ee777bSKent Overstreet int ret; 2419a1ee777bSKent Overstreet 2420a1ee777bSKent Overstreet dio->flush = 0; 2421a1ee777bSKent Overstreet 2422a1ee777bSKent Overstreet closure_init(&dio->op.cl, NULL); 2423a1ee777bSKent Overstreet 2424a1ee777bSKent Overstreet if (!dio->op.error) { 2425a1ee777bSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); 2426a8b3a677SKent Overstreet if (ret) { 2427a1ee777bSKent Overstreet dio->op.error = ret; 2428a8b3a677SKent Overstreet } else { 2429a1ee777bSKent Overstreet bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl); 2430a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); 2431a8b3a677SKent Overstreet } 2432a1ee777bSKent Overstreet } 2433a1ee777bSKent Overstreet 2434a1ee777bSKent Overstreet if (dio->sync) { 2435a1ee777bSKent Overstreet closure_sync(&dio->op.cl); 2436a1ee777bSKent Overstreet closure_debug_destroy(&dio->op.cl); 2437a1ee777bSKent Overstreet } else { 2438a1ee777bSKent Overstreet continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); 2439a1ee777bSKent Overstreet } 2440a1ee777bSKent Overstreet } 2441042a1f26SKent Overstreet 2442182c7bbfSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio) 2443182c7bbfSKent Overstreet { 2444182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2445182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2446182c7bbfSKent Overstreet bool sync = dio->sync; 2447a1ee777bSKent Overstreet long ret; 2448a1ee777bSKent Overstreet 2449a1ee777bSKent Overstreet if (unlikely(dio->flush)) { 2450a1ee777bSKent Overstreet bch2_dio_write_flush(dio); 2451a1ee777bSKent Overstreet if (!sync) 2452a1ee777bSKent Overstreet return -EIOCBQUEUED; 2453a1ee777bSKent Overstreet } 2454182c7bbfSKent Overstreet 2455a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 2456182c7bbfSKent Overstreet 2457182c7bbfSKent Overstreet if (dio->free_iov) 2458182c7bbfSKent Overstreet kfree(dio->iter.__iov); 2459a1ee777bSKent Overstreet 2460a1ee777bSKent Overstreet ret = dio->op.error ?: ((long) dio->written << 9); 2461182c7bbfSKent Overstreet bio_put(&dio->op.wbio.bio); 2462182c7bbfSKent Overstreet 2463182c7bbfSKent Overstreet /* inode->i_dio_count is our ref on inode and thus bch_fs */ 2464182c7bbfSKent Overstreet inode_dio_end(&inode->v); 2465182c7bbfSKent Overstreet 2466182c7bbfSKent Overstreet if (ret < 0) 2467182c7bbfSKent Overstreet ret = bch2_err_class(ret); 2468182c7bbfSKent Overstreet 2469182c7bbfSKent Overstreet if (!sync) { 2470182c7bbfSKent Overstreet req->ki_complete(req, ret); 2471182c7bbfSKent Overstreet ret = -EIOCBQUEUED; 2472182c7bbfSKent Overstreet } 2473182c7bbfSKent Overstreet return ret; 2474182c7bbfSKent Overstreet } 2475182c7bbfSKent Overstreet 2476182c7bbfSKent Overstreet static __always_inline void bch2_dio_write_end(struct dio_write *dio) 2477182c7bbfSKent Overstreet { 2478182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2479182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2480182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2481182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2482182c7bbfSKent Overstreet 2483182c7bbfSKent Overstreet req->ki_pos += (u64) dio->op.written << 9; 2484182c7bbfSKent Overstreet dio->written += dio->op.written; 2485182c7bbfSKent Overstreet 24866b1b186aSKent Overstreet if (dio->extending) { 2487182c7bbfSKent Overstreet spin_lock(&inode->v.i_lock); 2488182c7bbfSKent Overstreet if (req->ki_pos > inode->v.i_size) 2489182c7bbfSKent Overstreet i_size_write(&inode->v, req->ki_pos); 2490182c7bbfSKent Overstreet spin_unlock(&inode->v.i_lock); 24916b1b186aSKent Overstreet } 24926b1b186aSKent Overstreet 24936b1b186aSKent Overstreet if (dio->op.i_sectors_delta || dio->quota_res.sectors) { 24946b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 24956b1b186aSKent Overstreet __i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); 24966b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, &dio->quota_res); 24976b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 24986b1b186aSKent Overstreet } 2499182c7bbfSKent Overstreet 2500182c7bbfSKent Overstreet bio_release_pages(bio, false); 2501182c7bbfSKent Overstreet 2502182c7bbfSKent Overstreet if (unlikely(dio->op.error)) 2503182c7bbfSKent Overstreet set_bit(EI_INODE_ERROR, &inode->ei_flags); 2504182c7bbfSKent Overstreet } 2505182c7bbfSKent Overstreet 25064d868d18SKent Overstreet static __always_inline long bch2_dio_write_loop(struct dio_write *dio) 25071c6fdbd8SKent Overstreet { 2508182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 25091c6fdbd8SKent Overstreet struct kiocb *req = dio->req; 2510182c7bbfSKent Overstreet struct address_space *mapping = dio->mapping; 2511182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 251201ad6737SKent Overstreet struct bch_io_opts opts; 25139a3df993SKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2514eb8e6e9cSKent Overstreet unsigned unaligned, iter_count; 2515eb8e6e9cSKent Overstreet bool sync = dio->sync, dropped_locks; 25161c6fdbd8SKent Overstreet long ret; 25171c6fdbd8SKent Overstreet 251801ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 251901ad6737SKent Overstreet 25201c6fdbd8SKent Overstreet while (1) { 2521eb8e6e9cSKent Overstreet iter_count = dio->iter.count; 2522eb8e6e9cSKent Overstreet 2523182c7bbfSKent Overstreet EBUG_ON(current->faults_disabled_mapping); 25241c6fdbd8SKent Overstreet current->faults_disabled_mapping = mapping; 25251c6fdbd8SKent Overstreet 25261c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, &dio->iter); 25271c6fdbd8SKent Overstreet 2528eb8e6e9cSKent Overstreet dropped_locks = fdm_dropped_locks(); 2529eb8e6e9cSKent Overstreet 25301c6fdbd8SKent Overstreet current->faults_disabled_mapping = NULL; 25311c6fdbd8SKent Overstreet 2532eb8e6e9cSKent Overstreet /* 2533eb8e6e9cSKent Overstreet * If the fault handler returned an error but also signalled 2534eb8e6e9cSKent Overstreet * that it dropped & retook ei_pagecache_lock, we just need to 2535eb8e6e9cSKent Overstreet * re-shoot down the page cache and retry: 2536eb8e6e9cSKent Overstreet */ 2537eb8e6e9cSKent Overstreet if (dropped_locks && ret) 2538eb8e6e9cSKent Overstreet ret = 0; 2539eb8e6e9cSKent Overstreet 25401c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 25411c6fdbd8SKent Overstreet goto err; 25421c6fdbd8SKent Overstreet 2543eb8e6e9cSKent Overstreet if (unlikely(dropped_locks)) { 2544eb8e6e9cSKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 2545eb8e6e9cSKent Overstreet req->ki_pos, 2546eb8e6e9cSKent Overstreet req->ki_pos + iter_count - 1); 2547eb8e6e9cSKent Overstreet if (unlikely(ret)) 2548eb8e6e9cSKent Overstreet goto err; 2549eb8e6e9cSKent Overstreet 2550eb8e6e9cSKent Overstreet if (!bio->bi_iter.bi_size) 2551eb8e6e9cSKent Overstreet continue; 2552eb8e6e9cSKent Overstreet } 2553eb8e6e9cSKent Overstreet 25540a426c32SKent Overstreet unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); 25550a426c32SKent Overstreet bio->bi_iter.bi_size -= unaligned; 25560a426c32SKent Overstreet iov_iter_revert(&dio->iter, unaligned); 25570a426c32SKent Overstreet 25580a426c32SKent Overstreet if (!bio->bi_iter.bi_size) { 25590a426c32SKent Overstreet /* 25600a426c32SKent Overstreet * bio_iov_iter_get_pages was only able to get < 25610a426c32SKent Overstreet * blocksize worth of pages: 25620a426c32SKent Overstreet */ 25630a426c32SKent Overstreet ret = -EFAULT; 25640a426c32SKent Overstreet goto err; 25650a426c32SKent Overstreet } 25660a426c32SKent Overstreet 256701ad6737SKent Overstreet bch2_write_op_init(&dio->op, c, opts); 2568182c7bbfSKent Overstreet dio->op.end_io = sync 2569182c7bbfSKent Overstreet ? NULL 2570182c7bbfSKent Overstreet : bch2_dio_write_loop_async; 2571042a1f26SKent Overstreet dio->op.target = dio->op.opts.foreground_target; 2572042a1f26SKent Overstreet dio->op.write_point = writepoint_hashed((unsigned long) current); 2573042a1f26SKent Overstreet dio->op.nr_replicas = dio->op.opts.data_replicas; 25748c6d298aSKent Overstreet dio->op.subvol = inode->ei_subvol; 2575042a1f26SKent Overstreet dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); 2576a8b3a677SKent Overstreet dio->op.devs_need_flush = &inode->ei_devs_need_flush; 2577042a1f26SKent Overstreet 25781df3e199SKent Overstreet if (sync) 25791df3e199SKent Overstreet dio->op.flags |= BCH_WRITE_SYNC; 2580a6336910SKent Overstreet dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; 2581042a1f26SKent Overstreet 25826b1b186aSKent Overstreet ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, 25836b1b186aSKent Overstreet bio_sectors(bio), true); 25846b1b186aSKent Overstreet if (unlikely(ret)) 25856b1b186aSKent Overstreet goto err; 25866b1b186aSKent Overstreet 2587042a1f26SKent Overstreet ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), 2588042a1f26SKent Overstreet dio->op.opts.data_replicas, 0); 2589042a1f26SKent Overstreet if (unlikely(ret) && 2590182c7bbfSKent Overstreet !bch2_dio_write_check_allocated(dio)) 2591042a1f26SKent Overstreet goto err; 25921c6fdbd8SKent Overstreet 25931c6fdbd8SKent Overstreet task_io_account_write(bio->bi_iter.bi_size); 25941c6fdbd8SKent Overstreet 2595182c7bbfSKent Overstreet if (unlikely(dio->iter.count) && 2596182c7bbfSKent Overstreet !dio->sync && 2597182c7bbfSKent Overstreet !dio->loop && 2598182c7bbfSKent Overstreet bch2_dio_write_copy_iov(dio)) 2599286d8ad0SKent Overstreet dio->sync = sync = true; 2600182c7bbfSKent Overstreet 26011c6fdbd8SKent Overstreet dio->loop = true; 2602f8f30863SKent Overstreet closure_call(&dio->op.cl, bch2_write, NULL, NULL); 26031c6fdbd8SKent Overstreet 2604182c7bbfSKent Overstreet if (!sync) 26051c6fdbd8SKent Overstreet return -EIOCBQUEUED; 26069a3df993SKent Overstreet 2607182c7bbfSKent Overstreet bch2_dio_write_end(dio); 26089a3df993SKent Overstreet 2609182c7bbfSKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 26101c6fdbd8SKent Overstreet break; 2611f8f30863SKent Overstreet 26121c6fdbd8SKent Overstreet bio_reset(bio, NULL, REQ_OP_WRITE); 26131c6fdbd8SKent Overstreet } 2614182c7bbfSKent Overstreet out: 2615182c7bbfSKent Overstreet return bch2_dio_write_done(dio); 26161c6fdbd8SKent Overstreet err: 2617182c7bbfSKent Overstreet dio->op.error = ret; 26181c6fdbd8SKent Overstreet 26195468f119SKent Overstreet bio_release_pages(bio, false); 26206b1b186aSKent Overstreet 26216b1b186aSKent Overstreet bch2_quota_reservation_put(c, inode, &dio->quota_res); 2622182c7bbfSKent Overstreet goto out; 26231c6fdbd8SKent Overstreet } 26241c6fdbd8SKent Overstreet 26254d868d18SKent Overstreet static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) 26261c6fdbd8SKent Overstreet { 2627182c7bbfSKent Overstreet struct mm_struct *mm = dio->mm; 26281c6fdbd8SKent Overstreet 2629182c7bbfSKent Overstreet bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); 2630182c7bbfSKent Overstreet 2631182c7bbfSKent Overstreet if (mm) 2632182c7bbfSKent Overstreet kthread_use_mm(mm); 26331c6fdbd8SKent Overstreet bch2_dio_write_loop(dio); 2634182c7bbfSKent Overstreet if (mm) 2635182c7bbfSKent Overstreet kthread_unuse_mm(mm); 26361c6fdbd8SKent Overstreet } 26371c6fdbd8SKent Overstreet 26384d868d18SKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *op) 26394d868d18SKent Overstreet { 26404d868d18SKent Overstreet struct dio_write *dio = container_of(op, struct dio_write, op); 26414d868d18SKent Overstreet 26424d868d18SKent Overstreet bch2_dio_write_end(dio); 26434d868d18SKent Overstreet 26444d868d18SKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 26454d868d18SKent Overstreet bch2_dio_write_done(dio); 26464d868d18SKent Overstreet else 26474d868d18SKent Overstreet bch2_dio_write_continue(dio); 26484d868d18SKent Overstreet } 26494d868d18SKent Overstreet 26501c6fdbd8SKent Overstreet static noinline 26511c6fdbd8SKent Overstreet ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) 26521c6fdbd8SKent Overstreet { 26531c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 265454847d25SKent Overstreet struct address_space *mapping = file->f_mapping; 26551c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 26561c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 26571c6fdbd8SKent Overstreet struct dio_write *dio; 26581c6fdbd8SKent Overstreet struct bio *bio; 26597edcfbfeSKent Overstreet bool locked = true, extending; 26601c6fdbd8SKent Overstreet ssize_t ret; 26611c6fdbd8SKent Overstreet 26627edcfbfeSKent Overstreet prefetch(&c->opts); 26637edcfbfeSKent Overstreet prefetch((void *) &c->opts + 64); 26647edcfbfeSKent Overstreet prefetch(&inode->ei_inode); 26657edcfbfeSKent Overstreet prefetch((void *) &inode->ei_inode + 64); 26661c6fdbd8SKent Overstreet 26677edcfbfeSKent Overstreet inode_lock(&inode->v); 26687edcfbfeSKent Overstreet 26697edcfbfeSKent Overstreet ret = generic_write_checks(req, iter); 26707edcfbfeSKent Overstreet if (unlikely(ret <= 0)) 26717edcfbfeSKent Overstreet goto err; 26727edcfbfeSKent Overstreet 26737edcfbfeSKent Overstreet ret = file_remove_privs(file); 26747edcfbfeSKent Overstreet if (unlikely(ret)) 26757edcfbfeSKent Overstreet goto err; 26767edcfbfeSKent Overstreet 26777edcfbfeSKent Overstreet ret = file_update_time(file); 26787edcfbfeSKent Overstreet if (unlikely(ret)) 26797edcfbfeSKent Overstreet goto err; 26801c6fdbd8SKent Overstreet 2681919dbbd1SKent Overstreet if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) 26827edcfbfeSKent Overstreet goto err; 26837edcfbfeSKent Overstreet 26847edcfbfeSKent Overstreet inode_dio_begin(&inode->v); 2685a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 26867edcfbfeSKent Overstreet 26877edcfbfeSKent Overstreet extending = req->ki_pos + iter->count > inode->v.i_size; 26887edcfbfeSKent Overstreet if (!extending) { 26897edcfbfeSKent Overstreet inode_unlock(&inode->v); 26907edcfbfeSKent Overstreet locked = false; 26917edcfbfeSKent Overstreet } 26921c6fdbd8SKent Overstreet 26931c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 26944d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 26951c6fdbd8SKent Overstreet REQ_OP_WRITE, 26961c6fdbd8SKent Overstreet GFP_KERNEL, 26971c6fdbd8SKent Overstreet &c->dio_write_bioset); 26989a3df993SKent Overstreet dio = container_of(bio, struct dio_write, op.wbio.bio); 26991c6fdbd8SKent Overstreet dio->req = req; 2700182c7bbfSKent Overstreet dio->mapping = mapping; 2701182c7bbfSKent Overstreet dio->inode = inode; 2702ed484030SKent Overstreet dio->mm = current->mm; 27031c6fdbd8SKent Overstreet dio->loop = false; 27046b1b186aSKent Overstreet dio->extending = extending; 27057edcfbfeSKent Overstreet dio->sync = is_sync_kiocb(req) || extending; 2706a1ee777bSKent Overstreet dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; 27071c6fdbd8SKent Overstreet dio->free_iov = false; 27081c6fdbd8SKent Overstreet dio->quota_res.sectors = 0; 2709042a1f26SKent Overstreet dio->written = 0; 27101c6fdbd8SKent Overstreet dio->iter = *iter; 2711182c7bbfSKent Overstreet dio->op.c = c; 27129a3df993SKent Overstreet 2713a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 271454847d25SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 271554847d25SKent Overstreet req->ki_pos, 271654847d25SKent Overstreet req->ki_pos + iter->count - 1); 271754847d25SKent Overstreet if (unlikely(ret)) 271854847d25SKent Overstreet goto err_put_bio; 2719a023127aSKent Overstreet } 272054847d25SKent Overstreet 27217edcfbfeSKent Overstreet ret = bch2_dio_write_loop(dio); 27221c6fdbd8SKent Overstreet err: 27237edcfbfeSKent Overstreet if (locked) 27247edcfbfeSKent Overstreet inode_unlock(&inode->v); 27257edcfbfeSKent Overstreet return ret; 27267edcfbfeSKent Overstreet err_put_bio: 2727a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 27281c6fdbd8SKent Overstreet bio_put(bio); 27297edcfbfeSKent Overstreet inode_dio_end(&inode->v); 27307edcfbfeSKent Overstreet goto err; 27311c6fdbd8SKent Overstreet } 27321c6fdbd8SKent Overstreet 27337edcfbfeSKent Overstreet ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) 27341c6fdbd8SKent Overstreet { 27351c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 27367edcfbfeSKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 27371c6fdbd8SKent Overstreet ssize_t ret; 27381c6fdbd8SKent Overstreet 27395c1ef830SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 27405c1ef830SKent Overstreet ret = bch2_direct_write(iocb, from); 27415c1ef830SKent Overstreet goto out; 27425c1ef830SKent Overstreet } 27431c6fdbd8SKent Overstreet 27447edcfbfeSKent Overstreet inode_lock(&inode->v); 27457edcfbfeSKent Overstreet 27467edcfbfeSKent Overstreet ret = generic_write_checks(iocb, from); 27477edcfbfeSKent Overstreet if (ret <= 0) 27487edcfbfeSKent Overstreet goto unlock; 27497edcfbfeSKent Overstreet 27501c6fdbd8SKent Overstreet ret = file_remove_privs(file); 27511c6fdbd8SKent Overstreet if (ret) 27527edcfbfeSKent Overstreet goto unlock; 27531c6fdbd8SKent Overstreet 27541c6fdbd8SKent Overstreet ret = file_update_time(file); 27551c6fdbd8SKent Overstreet if (ret) 27567edcfbfeSKent Overstreet goto unlock; 27571c6fdbd8SKent Overstreet 27587edcfbfeSKent Overstreet ret = bch2_buffered_write(iocb, from); 27591c6fdbd8SKent Overstreet if (likely(ret > 0)) 27601c6fdbd8SKent Overstreet iocb->ki_pos += ret; 27617edcfbfeSKent Overstreet unlock: 27621c6fdbd8SKent Overstreet inode_unlock(&inode->v); 27631c6fdbd8SKent Overstreet 27647edcfbfeSKent Overstreet if (ret > 0) 27651c6fdbd8SKent Overstreet ret = generic_write_sync(iocb, ret); 27665c1ef830SKent Overstreet out: 27675c1ef830SKent Overstreet return bch2_err_class(ret); 27681c6fdbd8SKent Overstreet } 27691c6fdbd8SKent Overstreet 27701c6fdbd8SKent Overstreet /* fsync: */ 27711c6fdbd8SKent Overstreet 277268a2054dSKent Overstreet /* 277368a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 277468a2054dSKent Overstreet * insert trigger: look up the btree inode instead 277568a2054dSKent Overstreet */ 2776a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 2777a8b3a677SKent Overstreet struct bch_inode_info *inode) 277868a2054dSKent Overstreet { 2779a8b3a677SKent Overstreet struct bch_inode_unpacked u; 278068a2054dSKent Overstreet int ret; 278168a2054dSKent Overstreet 278268a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 278368a2054dSKent Overstreet return 0; 278468a2054dSKent Overstreet 2785a8b3a677SKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 278668a2054dSKent Overstreet if (ret) 278768a2054dSKent Overstreet return ret; 278868a2054dSKent Overstreet 2789a8b3a677SKent Overstreet return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 2790a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 279168a2054dSKent Overstreet } 279268a2054dSKent Overstreet 27931c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 27941c6fdbd8SKent Overstreet { 27951c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 27961c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 279768a2054dSKent Overstreet int ret, ret2, ret3; 27981c6fdbd8SKent Overstreet 27991c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 280068a2054dSKent Overstreet ret2 = sync_inode_metadata(&inode->v, 1); 2801a8b3a677SKent Overstreet ret3 = bch2_flush_inode(c, inode); 28021c6fdbd8SKent Overstreet 28035c1ef830SKent Overstreet return bch2_err_class(ret ?: ret2 ?: ret3); 28041c6fdbd8SKent Overstreet } 28051c6fdbd8SKent Overstreet 28061c6fdbd8SKent Overstreet /* truncate: */ 28071c6fdbd8SKent Overstreet 28086fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 28091c6fdbd8SKent Overstreet struct bpos start, 28101c6fdbd8SKent Overstreet struct bpos end) 28111c6fdbd8SKent Overstreet { 2812424eb881SKent Overstreet struct btree_trans trans; 281367e0dd8fSKent Overstreet struct btree_iter iter; 28141c6fdbd8SKent Overstreet struct bkey_s_c k; 28151c6fdbd8SKent Overstreet int ret = 0; 28161c6fdbd8SKent Overstreet 281720bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 28186fed42bbSKent Overstreet retry: 28196fed42bbSKent Overstreet bch2_trans_begin(&trans); 28206fed42bbSKent Overstreet 28216fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); 28226fed42bbSKent Overstreet if (ret) 28236fed42bbSKent Overstreet goto err; 2824424eb881SKent Overstreet 2825c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 28264ad6aa46SBrian Foster if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { 28271c6fdbd8SKent Overstreet ret = 1; 28281c6fdbd8SKent Overstreet break; 28291c6fdbd8SKent Overstreet } 28306fed42bbSKent Overstreet start = iter.pos; 283167e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 28326fed42bbSKent Overstreet err: 2833549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 28346fed42bbSKent Overstreet goto retry; 28351c6fdbd8SKent Overstreet 28369a796fdbSKent Overstreet bch2_trans_exit(&trans); 28379a796fdbSKent Overstreet return ret; 28381c6fdbd8SKent Overstreet } 28391c6fdbd8SKent Overstreet 2840959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 28411c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 28421c6fdbd8SKent Overstreet { 28431c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 28441c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 28453342ac13SKent Overstreet struct bch_folio *s; 28461c6fdbd8SKent Overstreet unsigned start_offset = start & (PAGE_SIZE - 1); 28471c6fdbd8SKent Overstreet unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; 2848a99b1cafSKent Overstreet unsigned i; 284930bff594SKent Overstreet struct folio *folio; 2850b19d307dSKent Overstreet s64 i_sectors_delta = 0; 28511c6fdbd8SKent Overstreet int ret = 0; 28526b9857b2SBrian Foster u64 end_pos; 28531c6fdbd8SKent Overstreet 285430bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 2855b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) { 28561c6fdbd8SKent Overstreet /* 28571c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 285830bff594SKent Overstreet * folio 28591c6fdbd8SKent Overstreet */ 28606fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 2861c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 2862c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 28631c6fdbd8SKent Overstreet if (ret <= 0) 28641c6fdbd8SKent Overstreet return ret; 28651c6fdbd8SKent Overstreet 286630bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 286730bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 2868b6898917SKent Overstreet if (unlikely(IS_ERR_OR_NULL(folio))) { 28691c6fdbd8SKent Overstreet ret = -ENOMEM; 28701c6fdbd8SKent Overstreet goto out; 28711c6fdbd8SKent Overstreet } 28721c6fdbd8SKent Overstreet } 28731c6fdbd8SKent Overstreet 2874959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 2875959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 2876959f7368SKent Overstreet 2877959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 28786b9857b2SBrian Foster end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio); 2879959f7368SKent Overstreet 2880959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 2881959f7368SKent Overstreet if (start_offset == 0 && 2882959f7368SKent Overstreet end_offset == folio_size(folio)) { 2883959f7368SKent Overstreet ret = 0; 2884959f7368SKent Overstreet goto unlock; 2885959f7368SKent Overstreet } 2886959f7368SKent Overstreet 288730bff594SKent Overstreet s = bch2_folio_create(folio, 0); 2888a99b1cafSKent Overstreet if (!s) { 2889a99b1cafSKent Overstreet ret = -ENOMEM; 2890a99b1cafSKent Overstreet goto unlock; 2891a99b1cafSKent Overstreet } 2892a99b1cafSKent Overstreet 289330bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 289430bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 28951c6fdbd8SKent Overstreet if (ret) 28961c6fdbd8SKent Overstreet goto unlock; 28971c6fdbd8SKent Overstreet } 28981c6fdbd8SKent Overstreet 289934fdcf06SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 290034fdcf06SKent Overstreet if (ret) 290134fdcf06SKent Overstreet goto unlock; 2902c437e153SKent Overstreet 2903a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 2904a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 2905a99b1cafSKent Overstreet i++) { 2906a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 2907a1774a05SKent Overstreet 2908a1774a05SKent Overstreet i_sectors_delta -= s->s[i].state == SECTOR_dirty; 2909a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_unallocated); 2910a99b1cafSKent Overstreet } 2911a99b1cafSKent Overstreet 2912b19d307dSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 2913b19d307dSKent Overstreet 291474163da7SKent Overstreet /* 291530bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 291674163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 29174ad6aa46SBrian Foster * be responsible for the i_size update. 29184ad6aa46SBrian Foster * 29194ad6aa46SBrian Foster * Note that we shouldn't ever see a folio beyond EOF, but check and 29204ad6aa46SBrian Foster * warn if so. This has been observed by failure to clean up folios 29214ad6aa46SBrian Foster * after a short write and there's still a chance reclaim will fix 29224ad6aa46SBrian Foster * things up. 292374163da7SKent Overstreet */ 29244ad6aa46SBrian Foster WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size); 29254ad6aa46SBrian Foster end_pos = folio_end_pos(folio); 29264ad6aa46SBrian Foster if (inode->v.i_size > folio_pos(folio)) 29276b9857b2SBrian Foster end_pos = min_t(u64, inode->v.i_size, end_pos); 2928bf98ee10SBrian Foster ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty; 292974163da7SKent Overstreet 293030bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 2931a99b1cafSKent Overstreet 29321c6fdbd8SKent Overstreet /* 29331c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 29341c6fdbd8SKent Overstreet * 293530bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 29361c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 29371c6fdbd8SKent Overstreet */ 293830bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 29391c6fdbd8SKent Overstreet 29409ba2eb25SKent Overstreet /* 29419ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 29429ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 29439ba2eb25SKent Overstreet * redirty the full page: 29449ba2eb25SKent Overstreet */ 294530bff594SKent Overstreet folio_mkclean(folio); 294630bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 29471c6fdbd8SKent Overstreet unlock: 294830bff594SKent Overstreet folio_unlock(folio); 294930bff594SKent Overstreet folio_put(folio); 29501c6fdbd8SKent Overstreet out: 29511c6fdbd8SKent Overstreet return ret; 29521c6fdbd8SKent Overstreet } 29531c6fdbd8SKent Overstreet 2954959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 29551c6fdbd8SKent Overstreet { 2956959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 2957959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 29581c6fdbd8SKent Overstreet } 29591c6fdbd8SKent Overstreet 2960959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 296174163da7SKent Overstreet loff_t start, loff_t end) 296274163da7SKent Overstreet { 2963959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 296474163da7SKent Overstreet start, end); 296574163da7SKent Overstreet 296674163da7SKent Overstreet if (ret >= 0 && 296774163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 2968959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 2969959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 297074163da7SKent Overstreet start, end); 297174163da7SKent Overstreet return ret; 297274163da7SKent Overstreet } 297374163da7SKent Overstreet 297468a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 297568a507a2SKent Overstreet struct bch_inode_info *inode, 2976e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 2977e0541a93SKent Overstreet struct iattr *iattr) 29781c6fdbd8SKent Overstreet { 29791c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 29801c6fdbd8SKent Overstreet int ret; 29811c6fdbd8SKent Overstreet 2982e0541a93SKent Overstreet /* 2983e0541a93SKent Overstreet * sync appends: 29842925fc49SKent Overstreet * 29852925fc49SKent Overstreet * this has to be done _before_ extending i_size: 2986e0541a93SKent Overstreet */ 2987e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 29881c6fdbd8SKent Overstreet if (ret) 29891c6fdbd8SKent Overstreet return ret; 29901c6fdbd8SKent Overstreet 29911c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 29921c6fdbd8SKent Overstreet 299368a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 29941c6fdbd8SKent Overstreet } 29951c6fdbd8SKent Overstreet 299654e2264eSKent Overstreet static int bch2_truncate_finish_fn(struct bch_inode_info *inode, 299754e2264eSKent Overstreet struct bch_inode_unpacked *bi, 299854e2264eSKent Overstreet void *p) 299954e2264eSKent Overstreet { 300054e2264eSKent Overstreet bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 300154e2264eSKent Overstreet return 0; 300254e2264eSKent Overstreet } 300354e2264eSKent Overstreet 300454e2264eSKent Overstreet static int bch2_truncate_start_fn(struct bch_inode_info *inode, 300554e2264eSKent Overstreet struct bch_inode_unpacked *bi, void *p) 300654e2264eSKent Overstreet { 300754e2264eSKent Overstreet u64 *new_i_size = p; 300854e2264eSKent Overstreet 300954e2264eSKent Overstreet bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 301054e2264eSKent Overstreet bi->bi_size = *new_i_size; 301154e2264eSKent Overstreet return 0; 301254e2264eSKent Overstreet } 301354e2264eSKent Overstreet 301468a507a2SKent Overstreet int bch2_truncate(struct mnt_idmap *idmap, 301568a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 30161c6fdbd8SKent Overstreet { 30171c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 30181c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 3019e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 302054e2264eSKent Overstreet u64 new_i_size = iattr->ia_size; 30212e87eae1SKent Overstreet s64 i_sectors_delta = 0; 30221c6fdbd8SKent Overstreet int ret = 0; 30231c6fdbd8SKent Overstreet 302468a507a2SKent Overstreet /* 302578d66ab1SDan Robertson * If the truncate call with change the size of the file, the 302678d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 302778d66ab1SDan Robertson * do not need to update the cmtimes. 302868a507a2SKent Overstreet */ 302978d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 303068a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 303168a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 303268a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 303368a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 303468a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 303578d66ab1SDan Robertson } 303668a507a2SKent Overstreet 30371c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 3038a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 30391c6fdbd8SKent Overstreet 30406fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 3041e0541a93SKent Overstreet if (ret) 3042e0541a93SKent Overstreet goto err; 30431c6fdbd8SKent Overstreet 3044c45d473dSKent Overstreet /* 3045c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 3046c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 3047c45d473dSKent Overstreet * before the inode). 3048c45d473dSKent Overstreet */ 3049c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 3050c45d473dSKent Overstreet if (ret) 3051c45d473dSKent Overstreet goto err; 3052c45d473dSKent Overstreet 30538eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 30548eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 30558eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 30568eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 3057e0541a93SKent Overstreet 3058e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 305968a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 306054e2264eSKent Overstreet goto err; 30611c6fdbd8SKent Overstreet } 30621c6fdbd8SKent Overstreet 306368a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 306468a507a2SKent Overstreet 3065959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 306674163da7SKent Overstreet if (unlikely(ret < 0)) 306754e2264eSKent Overstreet goto err; 30681c6fdbd8SKent Overstreet 30696cc3535dSKent Overstreet /* 30706cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 30716cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 30726cc3535dSKent Overstreet * i_size first: 30736cc3535dSKent Overstreet * 30746cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 30756cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 30766cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 30776cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 30786cc3535dSKent Overstreet */ 3079e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 30801c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 3081e0541a93SKent Overstreet inode_u.bi_size, 30821c6fdbd8SKent Overstreet iattr->ia_size - 1); 30831c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 30841c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 30851c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 30861c6fdbd8SKent Overstreet iattr->ia_size - 1); 30871c6fdbd8SKent Overstreet if (ret) 308854e2264eSKent Overstreet goto err; 30891c6fdbd8SKent Overstreet 309054e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 309154e2264eSKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 309254e2264eSKent Overstreet &new_i_size, 0); 309354e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 30941c6fdbd8SKent Overstreet 30951c6fdbd8SKent Overstreet if (unlikely(ret)) 309654e2264eSKent Overstreet goto err; 30971c6fdbd8SKent Overstreet 30981c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 30991c6fdbd8SKent Overstreet 31008c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 3101a99b1cafSKent Overstreet round_up(iattr->ia_size, block_bytes(c)) >> 9, 310268a2054dSKent Overstreet U64_MAX, &i_sectors_delta); 31032e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 31042e87eae1SKent Overstreet 3105b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 3106b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 3107b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 3108b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 3109b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 31101c6fdbd8SKent Overstreet if (unlikely(ret)) 311154e2264eSKent Overstreet goto err; 31121c6fdbd8SKent Overstreet 311354e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 311468a507a2SKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 311554e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 311668a507a2SKent Overstreet 311768a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 311854e2264eSKent Overstreet err: 3119a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 31205c1ef830SKent Overstreet return bch2_err_class(ret); 31211c6fdbd8SKent Overstreet } 31221c6fdbd8SKent Overstreet 31231c6fdbd8SKent Overstreet /* fallocate: */ 31241c6fdbd8SKent Overstreet 3125050197b1SKent Overstreet static int inode_update_times_fn(struct bch_inode_info *inode, 3126050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 3127050197b1SKent Overstreet { 3128050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3129050197b1SKent Overstreet 3130050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 3131050197b1SKent Overstreet return 0; 3132050197b1SKent Overstreet } 3133050197b1SKent Overstreet 31342e87eae1SKent Overstreet static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 31351c6fdbd8SKent Overstreet { 31361c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 313774163da7SKent Overstreet u64 end = offset + len; 313874163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 313974163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 314074163da7SKent Overstreet bool truncated_last_page; 31411c6fdbd8SKent Overstreet int ret = 0; 31421c6fdbd8SKent Overstreet 3143959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 314474163da7SKent Overstreet if (unlikely(ret < 0)) 31451c6fdbd8SKent Overstreet goto err; 31461c6fdbd8SKent Overstreet 314774163da7SKent Overstreet truncated_last_page = ret; 31481c6fdbd8SKent Overstreet 314974163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 31501c6fdbd8SKent Overstreet 315174163da7SKent Overstreet if (block_start < block_end) { 31522e87eae1SKent Overstreet s64 i_sectors_delta = 0; 31532e87eae1SKent Overstreet 31548c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 315574163da7SKent Overstreet block_start >> 9, block_end >> 9, 31562e87eae1SKent Overstreet &i_sectors_delta); 31572e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 31582e87eae1SKent Overstreet } 3159050197b1SKent Overstreet 3160050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 316174163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 316274163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 316374163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 316474163da7SKent Overstreet } else { 3165050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 316674163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 316774163da7SKent Overstreet } 3168050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 31691c6fdbd8SKent Overstreet err: 31701c6fdbd8SKent Overstreet return ret; 31711c6fdbd8SKent Overstreet } 31721c6fdbd8SKent Overstreet 31732e87eae1SKent Overstreet static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 31745f786787SKent Overstreet loff_t offset, loff_t len, 31755f786787SKent Overstreet bool insert) 31761c6fdbd8SKent Overstreet { 31771c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 31781c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 317907a1006aSKent Overstreet struct bkey_buf copy; 3180d69f41d6SKent Overstreet struct btree_trans trans; 318167e0dd8fSKent Overstreet struct btree_iter src, dst, del; 31825f786787SKent Overstreet loff_t shift, new_size; 31835f786787SKent Overstreet u64 src_start; 318450dc0f69SKent Overstreet int ret = 0; 31851c6fdbd8SKent Overstreet 31861c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 31871c6fdbd8SKent Overstreet return -EINVAL; 31881c6fdbd8SKent Overstreet 31895f786787SKent Overstreet if (insert) { 31905f786787SKent Overstreet if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 319174163da7SKent Overstreet return -EFBIG; 31925f786787SKent Overstreet 31935f786787SKent Overstreet if (offset >= inode->v.i_size) 319474163da7SKent Overstreet return -EINVAL; 31955f786787SKent Overstreet 31965f786787SKent Overstreet src_start = U64_MAX; 31975f786787SKent Overstreet shift = len; 31985f786787SKent Overstreet } else { 31991c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 320074163da7SKent Overstreet return -EINVAL; 32011c6fdbd8SKent Overstreet 32025f786787SKent Overstreet src_start = offset + len; 32035f786787SKent Overstreet shift = -len; 32045f786787SKent Overstreet } 32051c6fdbd8SKent Overstreet 32065f786787SKent Overstreet new_size = inode->v.i_size + shift; 32071c6fdbd8SKent Overstreet 32081c6fdbd8SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 32091c6fdbd8SKent Overstreet if (ret) 321074163da7SKent Overstreet return ret; 32111c6fdbd8SKent Overstreet 32125f786787SKent Overstreet if (insert) { 32135f786787SKent Overstreet i_size_write(&inode->v, new_size); 32145f786787SKent Overstreet mutex_lock(&inode->ei_update_lock); 32155f786787SKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 32165f786787SKent Overstreet ATTR_MTIME|ATTR_CTIME); 32175f786787SKent Overstreet mutex_unlock(&inode->ei_update_lock); 32185f786787SKent Overstreet } else { 32192e87eae1SKent Overstreet s64 i_sectors_delta = 0; 32202e87eae1SKent Overstreet 32218c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 32222e87eae1SKent Overstreet offset >> 9, (offset + len) >> 9, 32232e87eae1SKent Overstreet &i_sectors_delta); 32242e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 32252e87eae1SKent Overstreet 322663095894SKent Overstreet if (ret) 322774163da7SKent Overstreet return ret; 32285f786787SKent Overstreet } 32298ef231bdSKent Overstreet 323050dc0f69SKent Overstreet bch2_bkey_buf_init(©); 3231f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 323267e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 32335f786787SKent Overstreet POS(inode->v.i_ino, src_start >> 9), 323463095894SKent Overstreet BTREE_ITER_INTENT); 323567e0dd8fSKent Overstreet bch2_trans_copy_iter(&dst, &src); 323667e0dd8fSKent Overstreet bch2_trans_copy_iter(&del, &src); 32375f786787SKent Overstreet 3238549d173cSKent Overstreet while (ret == 0 || 3239549d173cSKent Overstreet bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 324063095894SKent Overstreet struct disk_reservation disk_res = 324163095894SKent Overstreet bch2_disk_reservation_init(c, 0); 324263095894SKent Overstreet struct bkey_i delete; 324363095894SKent Overstreet struct bkey_s_c k; 324463095894SKent Overstreet struct bpos next_pos; 32455f786787SKent Overstreet struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 32465f786787SKent Overstreet struct bpos atomic_end; 32472d594dfbSKent Overstreet unsigned trigger_flags = 0; 32486fed42bbSKent Overstreet u32 snapshot; 32496fed42bbSKent Overstreet 32506fed42bbSKent Overstreet bch2_trans_begin(&trans); 32516fed42bbSKent Overstreet 32526fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 32536fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 32546fed42bbSKent Overstreet if (ret) 32556fed42bbSKent Overstreet continue; 32566fed42bbSKent Overstreet 32576fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&src, snapshot); 32586fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&dst, snapshot); 32596fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&del, snapshot); 326063095894SKent Overstreet 3261700c25b3SKent Overstreet bch2_trans_begin(&trans); 3262700c25b3SKent Overstreet 32635f786787SKent Overstreet k = insert 326467e0dd8fSKent Overstreet ? bch2_btree_iter_peek_prev(&src) 3265c72f687aSKent Overstreet : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 326663095894SKent Overstreet if ((ret = bkey_err(k))) 326750dc0f69SKent Overstreet continue; 326863095894SKent Overstreet 326963095894SKent Overstreet if (!k.k || k.k->p.inode != inode->v.i_ino) 327063095894SKent Overstreet break; 327163095894SKent Overstreet 32725f786787SKent Overstreet if (insert && 3273e88a75ebSKent Overstreet bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 32745f786787SKent Overstreet break; 32755f786787SKent Overstreet reassemble: 327607a1006aSKent Overstreet bch2_bkey_buf_reassemble(©, c, k); 32775f786787SKent Overstreet 32785f786787SKent Overstreet if (insert && 3279e88a75ebSKent Overstreet bkey_lt(bkey_start_pos(k.k), move_pos)) 328035189e09SKent Overstreet bch2_cut_front(move_pos, copy.k); 32815f786787SKent Overstreet 328235189e09SKent Overstreet copy.k->k.p.offset += shift >> 9; 328367e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); 32841c6fdbd8SKent Overstreet 328567e0dd8fSKent Overstreet ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 32863c7f3b7aSKent Overstreet if (ret) 328750dc0f69SKent Overstreet continue; 3288e2d9912cSKent Overstreet 3289e88a75ebSKent Overstreet if (!bkey_eq(atomic_end, copy.k->k.p)) { 32905f786787SKent Overstreet if (insert) { 32915f786787SKent Overstreet move_pos = atomic_end; 32925f786787SKent Overstreet move_pos.offset -= shift >> 9; 32935f786787SKent Overstreet goto reassemble; 32945f786787SKent Overstreet } else { 3295085ab693SKent Overstreet bch2_cut_back(atomic_end, copy.k); 32965f786787SKent Overstreet } 32975f786787SKent Overstreet } 32985f786787SKent Overstreet 329963095894SKent Overstreet bkey_init(&delete.k); 3300283eda57SKent Overstreet delete.k.p = copy.k->k.p; 3301283eda57SKent Overstreet delete.k.size = copy.k->k.size; 3302283eda57SKent Overstreet delete.k.p.offset -= shift >> 9; 330367e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 33041c6fdbd8SKent Overstreet 33055f786787SKent Overstreet next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 330663095894SKent Overstreet 33077c4ca54aSKent Overstreet if (copy.k->k.size != k.k->size) { 330863095894SKent Overstreet /* We might end up splitting compressed extents: */ 330963095894SKent Overstreet unsigned nr_ptrs = 33104de77495SKent Overstreet bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 331163095894SKent Overstreet 331263095894SKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 331335189e09SKent Overstreet copy.k->k.size, nr_ptrs, 33141c6fdbd8SKent Overstreet BCH_DISK_RESERVATION_NOFAIL); 33151c6fdbd8SKent Overstreet BUG_ON(ret); 331663095894SKent Overstreet } 33171c6fdbd8SKent Overstreet 331867e0dd8fSKent Overstreet ret = bch2_btree_iter_traverse(&del) ?: 331967e0dd8fSKent Overstreet bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 332067e0dd8fSKent Overstreet bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 332168a2054dSKent Overstreet bch2_trans_commit(&trans, &disk_res, NULL, 33222d594dfbSKent Overstreet BTREE_INSERT_NOFAIL); 33231c6fdbd8SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 332450dc0f69SKent Overstreet 332563095894SKent Overstreet if (!ret) 332667e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&src, next_pos); 332750dc0f69SKent Overstreet } 332867e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &del); 332967e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &dst); 333067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &src); 333150dc0f69SKent Overstreet bch2_trans_exit(&trans); 333250dc0f69SKent Overstreet bch2_bkey_buf_exit(©, c); 333363095894SKent Overstreet 33348ef231bdSKent Overstreet if (ret) 333574163da7SKent Overstreet return ret; 33361c6fdbd8SKent Overstreet 333774163da7SKent Overstreet mutex_lock(&inode->ei_update_lock); 33385f786787SKent Overstreet if (!insert) { 33398ef231bdSKent Overstreet i_size_write(&inode->v, new_size); 33408ef231bdSKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 33418ef231bdSKent Overstreet ATTR_MTIME|ATTR_CTIME); 334274163da7SKent Overstreet } else { 334374163da7SKent Overstreet /* We need an inode update to update bi_journal_seq for fsync: */ 334474163da7SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 334574163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 33465f786787SKent Overstreet } 334774163da7SKent Overstreet mutex_unlock(&inode->ei_update_lock); 33481c6fdbd8SKent Overstreet return ret; 33491c6fdbd8SKent Overstreet } 33501c6fdbd8SKent Overstreet 3351694015c2SKent Overstreet static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 3352694015c2SKent Overstreet u64 start_sector, u64 end_sector) 33531c6fdbd8SKent Overstreet { 33541c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3355190fa7afSKent Overstreet struct btree_trans trans; 335667e0dd8fSKent Overstreet struct btree_iter iter; 3357694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 335801ad6737SKent Overstreet struct bch_io_opts opts; 3359694015c2SKent Overstreet int ret = 0; 33601c6fdbd8SKent Overstreet 336101ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 3362f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); 33631c6fdbd8SKent Overstreet 336467e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3365694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 3366190fa7afSKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 33671c6fdbd8SKent Overstreet 3368e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 33692e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3370190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 33711c6fdbd8SKent Overstreet struct bkey_s_c k; 3372694015c2SKent Overstreet unsigned sectors; 33736fed42bbSKent Overstreet u32 snapshot; 33741c6fdbd8SKent Overstreet 3375163e885aSKent Overstreet bch2_trans_begin(&trans); 3376a8abd3a7SKent Overstreet 33776fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 33786fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 33796fed42bbSKent Overstreet if (ret) 33806fed42bbSKent Overstreet goto bkey_err; 33816fed42bbSKent Overstreet 33826fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 33836fed42bbSKent Overstreet 338467e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 33850f238367SKent Overstreet if ((ret = bkey_err(k))) 33860f238367SKent Overstreet goto bkey_err; 33871c6fdbd8SKent Overstreet 33881c6fdbd8SKent Overstreet /* already reserved */ 338979203111SKent Overstreet if (bkey_extent_is_reservation(k) && 339079203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 339167e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 33921c6fdbd8SKent Overstreet continue; 33931c6fdbd8SKent Overstreet } 33941c6fdbd8SKent Overstreet 3395190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 3396190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 339767e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 33981c6fdbd8SKent Overstreet continue; 33991c6fdbd8SKent Overstreet } 34001c6fdbd8SKent Overstreet 3401a8b3a677SKent Overstreet /* 3402a8b3a677SKent Overstreet * XXX: for nocow mode, we should promote shared extents to 3403a8b3a677SKent Overstreet * unshared here 3404a8b3a677SKent Overstreet */ 3405a8b3a677SKent Overstreet 340670de7a47SKent Overstreet sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset; 34071c6fdbd8SKent Overstreet 34081c6fdbd8SKent Overstreet if (!bkey_extent_is_allocation(k.k)) { 34091c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 3410190fa7afSKent Overstreet "a_res, 34111c6fdbd8SKent Overstreet sectors, true); 34121c6fdbd8SKent Overstreet if (unlikely(ret)) 34130f238367SKent Overstreet goto bkey_err; 34141c6fdbd8SKent Overstreet } 34151c6fdbd8SKent Overstreet 341670de7a47SKent Overstreet ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, 341770de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 341870de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 34198810386fSKent Overstreet if (ret) 34208810386fSKent Overstreet goto bkey_err; 342170de7a47SKent Overstreet 34222e87eae1SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 34230f238367SKent Overstreet bkey_err: 3424190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 3425549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 34261c6fdbd8SKent Overstreet ret = 0; 342750dc0f69SKent Overstreet } 342874163da7SKent Overstreet 3429dcfc593fSKent Overstreet bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */ 3430dcfc593fSKent Overstreet mark_pagecache_reserved(inode, start_sector, iter.pos.offset); 3431dcfc593fSKent Overstreet 3432098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 343374163da7SKent Overstreet struct quota_res quota_res = { 0 }; 343474163da7SKent Overstreet s64 i_sectors_delta = 0; 343574163da7SKent Overstreet 343674163da7SKent Overstreet bch2_fpunch_at(&trans, &iter, inode_inum(inode), 343774163da7SKent Overstreet end_sector, &i_sectors_delta); 343874163da7SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 343974163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 344074163da7SKent Overstreet } 344174163da7SKent Overstreet 344267e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3443694015c2SKent Overstreet bch2_trans_exit(&trans); 3444694015c2SKent Overstreet return ret; 3445694015c2SKent Overstreet } 344650dc0f69SKent Overstreet 3447694015c2SKent Overstreet static long bchfs_fallocate(struct bch_inode_info *inode, int mode, 3448694015c2SKent Overstreet loff_t offset, loff_t len) 3449694015c2SKent Overstreet { 3450694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 345174163da7SKent Overstreet u64 end = offset + len; 345274163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 345374163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 345474163da7SKent Overstreet bool truncated_last_page = false; 345574163da7SKent Overstreet int ret, ret2 = 0; 3456694015c2SKent Overstreet 3457694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 3458694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 3459694015c2SKent Overstreet if (ret) 346074163da7SKent Overstreet return ret; 3461694015c2SKent Overstreet } 3462694015c2SKent Overstreet 3463694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 3464959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 346574163da7SKent Overstreet if (unlikely(ret < 0)) 346674163da7SKent Overstreet return ret; 3467694015c2SKent Overstreet 346874163da7SKent Overstreet truncated_last_page = ret; 3469694015c2SKent Overstreet 3470694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 347174163da7SKent Overstreet 347274163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 347374163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 3474694015c2SKent Overstreet } 3475694015c2SKent Overstreet 3476694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 3477e0541a93SKent Overstreet 3478e0541a93SKent Overstreet /* 347974163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 348074163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 3481e0541a93SKent Overstreet */ 348274163da7SKent Overstreet if (ret && 3483098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 348474163da7SKent Overstreet return ret; 34851c6fdbd8SKent Overstreet 348674163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 3487e0541a93SKent Overstreet end = inode->v.i_size; 348874163da7SKent Overstreet 348974163da7SKent Overstreet if (end >= inode->v.i_size && 349074163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 349174163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 349274163da7SKent Overstreet spin_lock(&inode->v.i_lock); 3493e0541a93SKent Overstreet i_size_write(&inode->v, end); 349474163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 3495e0541a93SKent Overstreet 34961c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 349774163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 34981c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 34991c6fdbd8SKent Overstreet } 350074163da7SKent Overstreet 350174163da7SKent Overstreet return ret ?: ret2; 35021c6fdbd8SKent Overstreet } 35031c6fdbd8SKent Overstreet 35041c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 35051c6fdbd8SKent Overstreet loff_t offset, loff_t len) 35061c6fdbd8SKent Overstreet { 35071c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 35082a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 35092a9101a9SKent Overstreet long ret; 35102a9101a9SKent Overstreet 3511d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 35122a9101a9SKent Overstreet return -EROFS; 35131c6fdbd8SKent Overstreet 351474163da7SKent Overstreet inode_lock(&inode->v); 351574163da7SKent Overstreet inode_dio_wait(&inode->v); 3516a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 351774163da7SKent Overstreet 351807bfcc0bSKent Overstreet ret = file_modified(file); 351907bfcc0bSKent Overstreet if (ret) 352007bfcc0bSKent Overstreet goto err; 352107bfcc0bSKent Overstreet 35221c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 35232a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 35242a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 35252a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 35262a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 35272a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 35282a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 35292a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 35302a9101a9SKent Overstreet else 35312a9101a9SKent Overstreet ret = -EOPNOTSUPP; 353207bfcc0bSKent Overstreet err: 3533a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 353474163da7SKent Overstreet inode_unlock(&inode->v); 3535d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 35361c6fdbd8SKent Overstreet 35375c1ef830SKent Overstreet return bch2_err_class(ret); 35381c6fdbd8SKent Overstreet } 35391c6fdbd8SKent Overstreet 3540c72f687aSKent Overstreet /* 3541c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 3542c72f687aSKent Overstreet * Does not check pagecache 3543c72f687aSKent Overstreet */ 3544e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 3545e8540e56SKent Overstreet struct quota_res *res, 3546e8540e56SKent Overstreet u64 start, u64 end) 3547e8540e56SKent Overstreet { 3548e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3549e8540e56SKent Overstreet struct btree_trans trans; 3550e8540e56SKent Overstreet struct btree_iter iter; 3551e8540e56SKent Overstreet struct bkey_s_c k; 3552e8540e56SKent Overstreet u32 snapshot; 3553e8540e56SKent Overstreet u64 sectors = end - start; 3554e8540e56SKent Overstreet u64 pos = start; 3555e8540e56SKent Overstreet int ret; 3556e8540e56SKent Overstreet 3557e8540e56SKent Overstreet bch2_trans_init(&trans, c, 0, 0); 3558e8540e56SKent Overstreet retry: 3559e8540e56SKent Overstreet bch2_trans_begin(&trans); 3560e8540e56SKent Overstreet 3561e8540e56SKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); 3562e8540e56SKent Overstreet if (ret) 3563e8540e56SKent Overstreet goto err; 3564e8540e56SKent Overstreet 3565e8540e56SKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3566e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 3567e8540e56SKent Overstreet 3568e8540e56SKent Overstreet while (!(ret = btree_trans_too_many_iters(&trans)) && 3569e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 3570e8540e56SKent Overstreet !(ret = bkey_err(k))) { 3571e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 3572e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 3573e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 3574e8540e56SKent Overstreet BUG_ON(s > sectors); 3575e8540e56SKent Overstreet sectors -= s; 3576e8540e56SKent Overstreet } 3577e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 3578e8540e56SKent Overstreet } 3579e8540e56SKent Overstreet pos = iter.pos.offset; 3580e8540e56SKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3581e8540e56SKent Overstreet err: 3582e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 3583e8540e56SKent Overstreet goto retry; 3584e8540e56SKent Overstreet 3585e8540e56SKent Overstreet bch2_trans_exit(&trans); 3586e8540e56SKent Overstreet 3587e8540e56SKent Overstreet if (ret) 3588e8540e56SKent Overstreet return ret; 3589e8540e56SKent Overstreet 3590e8540e56SKent Overstreet return bch2_quota_reservation_add(c, inode, res, sectors, true); 3591e8540e56SKent Overstreet } 3592e8540e56SKent Overstreet 359376426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 359476426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 359576426098SKent Overstreet loff_t len, unsigned remap_flags) 359676426098SKent Overstreet { 359776426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 359876426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 359976426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 3600e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 36012e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3602677fc056SKent Overstreet u64 aligned_len; 360376426098SKent Overstreet loff_t ret = 0; 360476426098SKent Overstreet 360576426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 360676426098SKent Overstreet return -EINVAL; 360776426098SKent Overstreet 360876426098SKent Overstreet if (remap_flags & REMAP_FILE_DEDUP) 360976426098SKent Overstreet return -EOPNOTSUPP; 361076426098SKent Overstreet 361176426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 361276426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 361376426098SKent Overstreet return -EINVAL; 361476426098SKent Overstreet 361576426098SKent Overstreet if (src == dst && 361676426098SKent Overstreet abs(pos_src - pos_dst) < len) 361776426098SKent Overstreet return -EINVAL; 361876426098SKent Overstreet 361976426098SKent Overstreet bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 362076426098SKent Overstreet 362176426098SKent Overstreet inode_dio_wait(&src->v); 362276426098SKent Overstreet inode_dio_wait(&dst->v); 362376426098SKent Overstreet 362476426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 362576426098SKent Overstreet file_dst, pos_dst, 362676426098SKent Overstreet &len, remap_flags); 362776426098SKent Overstreet if (ret < 0 || len == 0) 36282e87eae1SKent Overstreet goto err; 362976426098SKent Overstreet 3630677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 363176426098SKent Overstreet 363276426098SKent Overstreet ret = write_invalidate_inode_pages_range(dst->v.i_mapping, 3633677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 363476426098SKent Overstreet if (ret) 36352e87eae1SKent Overstreet goto err; 363676426098SKent Overstreet 3637e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 3638e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 3639e8540e56SKent Overstreet if (ret) 3640e8540e56SKent Overstreet goto err; 3641e8540e56SKent Overstreet 3642e8540e56SKent Overstreet file_update_time(file_dst); 3643e8540e56SKent Overstreet 3644dcfc593fSKent Overstreet mark_pagecache_unallocated(src, pos_src >> 9, 3645dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 364676426098SKent Overstreet 36472e87eae1SKent Overstreet ret = bch2_remap_range(c, 36486fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 36496fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 365076426098SKent Overstreet aligned_len >> 9, 36512e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 36522e87eae1SKent Overstreet if (ret < 0) 36532e87eae1SKent Overstreet goto err; 365476426098SKent Overstreet 36552e87eae1SKent Overstreet /* 36562e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 36572e87eae1SKent Overstreet */ 3658677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 36592e87eae1SKent Overstreet 3660e8540e56SKent Overstreet i_sectors_acct(c, dst, "a_res, i_sectors_delta); 36612e87eae1SKent Overstreet 36622e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 3663677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 3664677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 36652e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 3666e7084c9cSKent Overstreet 366768a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 366868a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 3669a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 36702e87eae1SKent Overstreet err: 3671e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 367276426098SKent Overstreet bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 367376426098SKent Overstreet 36745c1ef830SKent Overstreet return bch2_err_class(ret); 367576426098SKent Overstreet } 367676426098SKent Overstreet 36771c6fdbd8SKent Overstreet /* fseek: */ 36781c6fdbd8SKent Overstreet 3679bf98ee10SBrian Foster static int folio_data_offset(struct folio *folio, loff_t pos) 36801c6fdbd8SKent Overstreet { 368130bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 3682a86a92cbSKent Overstreet unsigned i, sectors = folio_sectors(folio); 3683f81b648dSKent Overstreet 3684543ef2ebSKent Overstreet if (s) 3685bf98ee10SBrian Foster for (i = folio_pos_to_s(folio, pos); i < sectors; i++) 3686a1774a05SKent Overstreet if (s->s[i].state >= SECTOR_dirty) 3687bf98ee10SBrian Foster return i << SECTOR_SHIFT; 3688f57a6a5dSKent Overstreet 3689543ef2ebSKent Overstreet return -1; 36901c6fdbd8SKent Overstreet } 36911c6fdbd8SKent Overstreet 3692543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_data(struct inode *vinode, 36931c6fdbd8SKent Overstreet loff_t start_offset, 36941c6fdbd8SKent Overstreet loff_t end_offset) 36951c6fdbd8SKent Overstreet { 36961c6fdbd8SKent Overstreet struct folio_batch fbatch; 36971c6fdbd8SKent Overstreet pgoff_t start_index = start_offset >> PAGE_SHIFT; 36981c6fdbd8SKent Overstreet pgoff_t end_index = end_offset >> PAGE_SHIFT; 36991c6fdbd8SKent Overstreet pgoff_t index = start_index; 37001c6fdbd8SKent Overstreet unsigned i; 3701543ef2ebSKent Overstreet loff_t ret; 3702543ef2ebSKent Overstreet int offset; 37031c6fdbd8SKent Overstreet 37041c6fdbd8SKent Overstreet folio_batch_init(&fbatch); 37051c6fdbd8SKent Overstreet 37061c6fdbd8SKent Overstreet while (filemap_get_folios(vinode->i_mapping, 37071c6fdbd8SKent Overstreet &index, end_index, &fbatch)) { 37081c6fdbd8SKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 37091c6fdbd8SKent Overstreet struct folio *folio = fbatch.folios[i]; 37101c6fdbd8SKent Overstreet 37111c6fdbd8SKent Overstreet folio_lock(folio); 3712543ef2ebSKent Overstreet offset = folio_data_offset(folio, 3713bf98ee10SBrian Foster max(folio_pos(folio), start_offset)); 3714543ef2ebSKent Overstreet if (offset >= 0) { 3715a86a92cbSKent Overstreet ret = clamp(folio_pos(folio) + offset, 3716543ef2ebSKent Overstreet start_offset, end_offset); 37171c6fdbd8SKent Overstreet folio_unlock(folio); 37181c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 3719543ef2ebSKent Overstreet return ret; 37201c6fdbd8SKent Overstreet } 37211c6fdbd8SKent Overstreet folio_unlock(folio); 37221c6fdbd8SKent Overstreet } 37231c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 37241c6fdbd8SKent Overstreet cond_resched(); 37251c6fdbd8SKent Overstreet } 37261c6fdbd8SKent Overstreet 37271c6fdbd8SKent Overstreet return end_offset; 37281c6fdbd8SKent Overstreet } 37291c6fdbd8SKent Overstreet 37301c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 37311c6fdbd8SKent Overstreet { 37321c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 37331c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3734424eb881SKent Overstreet struct btree_trans trans; 373567e0dd8fSKent Overstreet struct btree_iter iter; 37361c6fdbd8SKent Overstreet struct bkey_s_c k; 37376fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 37381c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 37396fed42bbSKent Overstreet u32 snapshot; 37401c6fdbd8SKent Overstreet int ret; 37411c6fdbd8SKent Overstreet 37421c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 37431c6fdbd8SKent Overstreet if (offset >= isize) 37441c6fdbd8SKent Overstreet return -ENXIO; 37451c6fdbd8SKent Overstreet 374620bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 37476fed42bbSKent Overstreet retry: 37486fed42bbSKent Overstreet bch2_trans_begin(&trans); 37496fed42bbSKent Overstreet 37506fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 37516fed42bbSKent Overstreet if (ret) 37526fed42bbSKent Overstreet goto err; 3753424eb881SKent Overstreet 3754c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, 3755c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 3756c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 3757c72f687aSKent Overstreet 0, k, ret) { 3758c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 37591c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 37601c6fdbd8SKent Overstreet break; 37611c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 37621c6fdbd8SKent Overstreet break; 37631c6fdbd8SKent Overstreet } 376467e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 37656fed42bbSKent Overstreet err: 3766549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 37676fed42bbSKent Overstreet goto retry; 37681c6fdbd8SKent Overstreet 37699a796fdbSKent Overstreet bch2_trans_exit(&trans); 37701c6fdbd8SKent Overstreet if (ret) 37711c6fdbd8SKent Overstreet return ret; 37721c6fdbd8SKent Overstreet 37731c6fdbd8SKent Overstreet if (next_data > offset) 3774543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 37751c6fdbd8SKent Overstreet offset, next_data); 37761c6fdbd8SKent Overstreet 3777e10d3094SKent Overstreet if (next_data >= isize) 37781c6fdbd8SKent Overstreet return -ENXIO; 37791c6fdbd8SKent Overstreet 37801c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 37811c6fdbd8SKent Overstreet } 37821c6fdbd8SKent Overstreet 3783e8d28c3eSKent Overstreet static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) 37841c6fdbd8SKent Overstreet { 3785e8d28c3eSKent Overstreet struct folio *folio; 3786e8d28c3eSKent Overstreet struct bch_folio *s; 3787bf98ee10SBrian Foster unsigned i, sectors; 3788e8d28c3eSKent Overstreet bool ret = true; 3789543ef2ebSKent Overstreet 3790e8d28c3eSKent Overstreet folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT); 3791b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) 3792e8d28c3eSKent Overstreet return true; 3793e8d28c3eSKent Overstreet 3794e8d28c3eSKent Overstreet s = bch2_folio(folio); 3795543ef2ebSKent Overstreet if (!s) 3796e8d28c3eSKent Overstreet goto unlock; 3797543ef2ebSKent Overstreet 3798e8d28c3eSKent Overstreet sectors = folio_sectors(folio); 3799bf98ee10SBrian Foster for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) 3800a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) { 3801bf98ee10SBrian Foster *offset = max(*offset, 3802bf98ee10SBrian Foster folio_pos(folio) + (i << SECTOR_SHIFT)); 3803e8d28c3eSKent Overstreet goto unlock; 3804543ef2ebSKent Overstreet } 3805543ef2ebSKent Overstreet 3806e8d28c3eSKent Overstreet *offset = folio_end_pos(folio); 3807e8d28c3eSKent Overstreet ret = false; 3808e8d28c3eSKent Overstreet unlock: 380930bff594SKent Overstreet folio_unlock(folio); 38101c6fdbd8SKent Overstreet return ret; 38111c6fdbd8SKent Overstreet } 38121c6fdbd8SKent Overstreet 3813543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_hole(struct inode *vinode, 38141c6fdbd8SKent Overstreet loff_t start_offset, 38151c6fdbd8SKent Overstreet loff_t end_offset) 38161c6fdbd8SKent Overstreet { 38171c6fdbd8SKent Overstreet struct address_space *mapping = vinode->i_mapping; 3818e8d28c3eSKent Overstreet loff_t offset = start_offset; 38191c6fdbd8SKent Overstreet 3820e8d28c3eSKent Overstreet while (offset < end_offset && 3821e8d28c3eSKent Overstreet !folio_hole_offset(mapping, &offset)) 3822e8d28c3eSKent Overstreet ; 3823543ef2ebSKent Overstreet 3824e8d28c3eSKent Overstreet return min(offset, end_offset); 38251c6fdbd8SKent Overstreet } 38261c6fdbd8SKent Overstreet 38271c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 38281c6fdbd8SKent Overstreet { 38291c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 38301c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3831424eb881SKent Overstreet struct btree_trans trans; 383267e0dd8fSKent Overstreet struct btree_iter iter; 38331c6fdbd8SKent Overstreet struct bkey_s_c k; 38346fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 38351c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 38366fed42bbSKent Overstreet u32 snapshot; 38371c6fdbd8SKent Overstreet int ret; 38381c6fdbd8SKent Overstreet 38391c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 38401c6fdbd8SKent Overstreet if (offset >= isize) 38411c6fdbd8SKent Overstreet return -ENXIO; 38421c6fdbd8SKent Overstreet 384320bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 38446fed42bbSKent Overstreet retry: 38456fed42bbSKent Overstreet bch2_trans_begin(&trans); 38466fed42bbSKent Overstreet 38476fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 38486fed42bbSKent Overstreet if (ret) 38496fed42bbSKent Overstreet goto err; 3850424eb881SKent Overstreet 3851e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 38526fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 385394f651e2SKent Overstreet BTREE_ITER_SLOTS, k, ret) { 38541c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 3855543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 38561c6fdbd8SKent Overstreet offset, MAX_LFS_FILESIZE); 38571c6fdbd8SKent Overstreet break; 38581c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 3859543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 38601c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 38611c6fdbd8SKent Overstreet k.k->p.offset << 9); 38621c6fdbd8SKent Overstreet 38631c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 38641c6fdbd8SKent Overstreet break; 38651c6fdbd8SKent Overstreet } else { 38661c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 38671c6fdbd8SKent Overstreet } 38681c6fdbd8SKent Overstreet } 386967e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 38706fed42bbSKent Overstreet err: 3871549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 38726fed42bbSKent Overstreet goto retry; 38731c6fdbd8SKent Overstreet 38749a796fdbSKent Overstreet bch2_trans_exit(&trans); 38751c6fdbd8SKent Overstreet if (ret) 38761c6fdbd8SKent Overstreet return ret; 38771c6fdbd8SKent Overstreet 38781c6fdbd8SKent Overstreet if (next_hole > isize) 38791c6fdbd8SKent Overstreet next_hole = isize; 38801c6fdbd8SKent Overstreet 38811c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 38821c6fdbd8SKent Overstreet } 38831c6fdbd8SKent Overstreet 38841c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 38851c6fdbd8SKent Overstreet { 38865c1ef830SKent Overstreet loff_t ret; 38875c1ef830SKent Overstreet 38881c6fdbd8SKent Overstreet switch (whence) { 38891c6fdbd8SKent Overstreet case SEEK_SET: 38901c6fdbd8SKent Overstreet case SEEK_CUR: 38911c6fdbd8SKent Overstreet case SEEK_END: 38925c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 38935c1ef830SKent Overstreet break; 38941c6fdbd8SKent Overstreet case SEEK_DATA: 38955c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 38965c1ef830SKent Overstreet break; 38971c6fdbd8SKent Overstreet case SEEK_HOLE: 38985c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 38995c1ef830SKent Overstreet break; 39005c1ef830SKent Overstreet default: 39015c1ef830SKent Overstreet ret = -EINVAL; 39025c1ef830SKent Overstreet break; 39031c6fdbd8SKent Overstreet } 39041c6fdbd8SKent Overstreet 39055c1ef830SKent Overstreet return bch2_err_class(ret); 39061c6fdbd8SKent Overstreet } 39071c6fdbd8SKent Overstreet 39081c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 39091c6fdbd8SKent Overstreet { 3910a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 39111c6fdbd8SKent Overstreet bioset_exit(&c->dio_write_bioset); 39121c6fdbd8SKent Overstreet bioset_exit(&c->dio_read_bioset); 39131c6fdbd8SKent Overstreet bioset_exit(&c->writepage_bioset); 39141c6fdbd8SKent Overstreet } 39151c6fdbd8SKent Overstreet 39161c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 39171c6fdbd8SKent Overstreet { 39181c6fdbd8SKent Overstreet int ret = 0; 39191c6fdbd8SKent Overstreet 39201c6fdbd8SKent Overstreet pr_verbose_init(c->opts, ""); 39211c6fdbd8SKent Overstreet 39221c6fdbd8SKent Overstreet if (bioset_init(&c->writepage_bioset, 39239a3df993SKent Overstreet 4, offsetof(struct bch_writepage_io, op.wbio.bio), 392465d48e35SKent Overstreet BIOSET_NEED_BVECS)) 392565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_writepage_bioset_init; 392665d48e35SKent Overstreet 392765d48e35SKent Overstreet if (bioset_init(&c->dio_read_bioset, 39281c6fdbd8SKent Overstreet 4, offsetof(struct dio_read, rbio.bio), 392965d48e35SKent Overstreet BIOSET_NEED_BVECS)) 393065d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_read_bioset_init; 393165d48e35SKent Overstreet 393265d48e35SKent Overstreet if (bioset_init(&c->dio_write_bioset, 39339a3df993SKent Overstreet 4, offsetof(struct dio_write, op.wbio.bio), 393465d48e35SKent Overstreet BIOSET_NEED_BVECS)) 393565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_write_bioset_init; 393665d48e35SKent Overstreet 393765d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 3938a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 393965d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 39401c6fdbd8SKent Overstreet 39411c6fdbd8SKent Overstreet pr_verbose_init(c->opts, "ret %i", ret); 39421c6fdbd8SKent Overstreet return ret; 39431c6fdbd8SKent Overstreet } 39441c6fdbd8SKent Overstreet 39451c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 3946