11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 607a1006aSKent Overstreet #include "bkey_buf.h" 71c6fdbd8SKent Overstreet #include "btree_update.h" 81c6fdbd8SKent Overstreet #include "buckets.h" 91c6fdbd8SKent Overstreet #include "clock.h" 101c6fdbd8SKent Overstreet #include "error.h" 11e2d9912cSKent Overstreet #include "extents.h" 1208c07feaSKent Overstreet #include "extent_update.h" 131c6fdbd8SKent Overstreet #include "fs.h" 141c6fdbd8SKent Overstreet #include "fs-io.h" 151c6fdbd8SKent Overstreet #include "fsck.h" 161c6fdbd8SKent Overstreet #include "inode.h" 171c6fdbd8SKent Overstreet #include "journal.h" 181c6fdbd8SKent Overstreet #include "io.h" 191c6fdbd8SKent Overstreet #include "keylist.h" 201c6fdbd8SKent Overstreet #include "quota.h" 2176426098SKent Overstreet #include "reflink.h" 221c6fdbd8SKent Overstreet #include "trace.h" 231c6fdbd8SKent Overstreet 241c6fdbd8SKent Overstreet #include <linux/aio.h> 251c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 261c6fdbd8SKent Overstreet #include <linux/falloc.h> 271c6fdbd8SKent Overstreet #include <linux/migrate.h> 281c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 291c6fdbd8SKent Overstreet #include <linux/pagevec.h> 309ba2eb25SKent Overstreet #include <linux/rmap.h> 311c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 321c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 331c6fdbd8SKent Overstreet #include <linux/uio.h> 341c6fdbd8SKent Overstreet #include <linux/writeback.h> 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 371c6fdbd8SKent Overstreet 38*4198bf03SKent Overstreet static int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool); 39a09818c7SKent Overstreet 409567413cSKent Overstreet struct folio_vec { 419567413cSKent Overstreet struct folio *fv_folio; 429567413cSKent Overstreet size_t fv_offset; 439567413cSKent Overstreet size_t fv_len; 449567413cSKent Overstreet }; 459567413cSKent Overstreet 469567413cSKent Overstreet static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv) 479567413cSKent Overstreet { 489567413cSKent Overstreet 499567413cSKent Overstreet struct folio *folio = page_folio(bv.bv_page); 509567413cSKent Overstreet size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) + 519567413cSKent Overstreet bv.bv_offset; 529567413cSKent Overstreet size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len); 539567413cSKent Overstreet 549567413cSKent Overstreet return (struct folio_vec) { 559567413cSKent Overstreet .fv_folio = folio, 569567413cSKent Overstreet .fv_offset = offset, 579567413cSKent Overstreet .fv_len = len, 589567413cSKent Overstreet }; 599567413cSKent Overstreet } 609567413cSKent Overstreet 619567413cSKent Overstreet static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio, 629567413cSKent Overstreet struct bvec_iter iter) 639567413cSKent Overstreet { 649567413cSKent Overstreet return biovec_to_foliovec(bio_iter_iovec(bio, iter)); 659567413cSKent Overstreet } 669567413cSKent Overstreet 679567413cSKent Overstreet #define __bio_for_each_folio(bvl, bio, iter, start) \ 689567413cSKent Overstreet for (iter = (start); \ 699567413cSKent Overstreet (iter).bi_size && \ 709567413cSKent Overstreet ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \ 719567413cSKent Overstreet bio_advance_iter_single((bio), &(iter), (bvl).fv_len)) 729567413cSKent Overstreet 739567413cSKent Overstreet /** 749567413cSKent Overstreet * bio_for_each_folio - iterate over folios within a bio 759567413cSKent Overstreet * 769567413cSKent Overstreet * Like other non-_all versions, this iterates over what bio->bi_iter currently 779567413cSKent Overstreet * points to. This version is for drivers, where the bio may have previously 789567413cSKent Overstreet * been split or cloned. 799567413cSKent Overstreet */ 809567413cSKent Overstreet #define bio_for_each_folio(bvl, bio, iter) \ 819567413cSKent Overstreet __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter) 829567413cSKent Overstreet 836b9857b2SBrian Foster /* 846b9857b2SBrian Foster * Use u64 for the end pos and sector helpers because if the folio covers the 856b9857b2SBrian Foster * max supported range of the mapping, the start offset of the next folio 866b9857b2SBrian Foster * overflows loff_t. This breaks much of the range based processing in the 876b9857b2SBrian Foster * buffered write path. 886b9857b2SBrian Foster */ 896b9857b2SBrian Foster static inline u64 folio_end_pos(struct folio *folio) 9030bff594SKent Overstreet { 9130bff594SKent Overstreet return folio_pos(folio) + folio_size(folio); 9230bff594SKent Overstreet } 9330bff594SKent Overstreet 9430bff594SKent Overstreet static inline size_t folio_sectors(struct folio *folio) 9530bff594SKent Overstreet { 9630bff594SKent Overstreet return PAGE_SECTORS << folio_order(folio); 9730bff594SKent Overstreet } 9830bff594SKent Overstreet 9930bff594SKent Overstreet static inline loff_t folio_sector(struct folio *folio) 10030bff594SKent Overstreet { 10130bff594SKent Overstreet return folio_pos(folio) >> 9; 10230bff594SKent Overstreet } 10330bff594SKent Overstreet 1046b9857b2SBrian Foster static inline u64 folio_end_sector(struct folio *folio) 10530bff594SKent Overstreet { 10630bff594SKent Overstreet return folio_end_pos(folio) >> 9; 10730bff594SKent Overstreet } 10830bff594SKent Overstreet 10940022c01SKent Overstreet typedef DARRAY(struct folio *) folios; 11040022c01SKent Overstreet 11140022c01SKent Overstreet static int filemap_get_contig_folios_d(struct address_space *mapping, 1126b9857b2SBrian Foster loff_t start, u64 end, 11340022c01SKent Overstreet int fgp_flags, gfp_t gfp, 11440022c01SKent Overstreet folios *folios) 11540022c01SKent Overstreet { 11640022c01SKent Overstreet struct folio *f; 1176b9857b2SBrian Foster u64 pos = start; 11840022c01SKent Overstreet int ret = 0; 11940022c01SKent Overstreet 12040022c01SKent Overstreet while (pos < end) { 12140022c01SKent Overstreet if ((u64) pos >= (u64) start + (1ULL << 20)) 12240022c01SKent Overstreet fgp_flags &= ~FGP_CREAT; 12340022c01SKent Overstreet 12440022c01SKent Overstreet ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL); 12540022c01SKent Overstreet if (ret) 12640022c01SKent Overstreet break; 12740022c01SKent Overstreet 12840022c01SKent Overstreet f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); 129b6898917SKent Overstreet if (IS_ERR_OR_NULL(f)) 13040022c01SKent Overstreet break; 13140022c01SKent Overstreet 13240022c01SKent Overstreet BUG_ON(folios->nr && folio_pos(f) != pos); 13340022c01SKent Overstreet 13440022c01SKent Overstreet pos = folio_end_pos(f); 13540022c01SKent Overstreet darray_push(folios, f); 13640022c01SKent Overstreet } 13740022c01SKent Overstreet 13840022c01SKent Overstreet if (!folios->nr && !ret && (fgp_flags & FGP_CREAT)) 13940022c01SKent Overstreet ret = -ENOMEM; 14040022c01SKent Overstreet 14140022c01SKent Overstreet return folios->nr ? 0 : ret; 14240022c01SKent Overstreet } 14340022c01SKent Overstreet 144a8b3a677SKent Overstreet struct nocow_flush { 145a8b3a677SKent Overstreet struct closure *cl; 146a8b3a677SKent Overstreet struct bch_dev *ca; 147a8b3a677SKent Overstreet struct bio bio; 148a8b3a677SKent Overstreet }; 149a8b3a677SKent Overstreet 150a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 151a8b3a677SKent Overstreet { 152a8b3a677SKent Overstreet 153a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 154a8b3a677SKent Overstreet 155a8b3a677SKent Overstreet closure_put(bio->cl); 156a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 157a8b3a677SKent Overstreet bio_put(&bio->bio); 158a8b3a677SKent Overstreet } 159a8b3a677SKent Overstreet 160a8b3a677SKent Overstreet static void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 161a8b3a677SKent Overstreet struct bch_inode_info *inode, 162a8b3a677SKent Overstreet struct closure *cl) 163a8b3a677SKent Overstreet { 164a8b3a677SKent Overstreet struct nocow_flush *bio; 165a8b3a677SKent Overstreet struct bch_dev *ca; 166a8b3a677SKent Overstreet struct bch_devs_mask devs; 167a8b3a677SKent Overstreet unsigned dev; 168a8b3a677SKent Overstreet 169a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 170a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 171a8b3a677SKent Overstreet return; 172a8b3a677SKent Overstreet 173a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 174a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 175a8b3a677SKent Overstreet 176a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 177a8b3a677SKent Overstreet rcu_read_lock(); 178a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 179a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 180a8b3a677SKent Overstreet ca = NULL; 181a8b3a677SKent Overstreet rcu_read_unlock(); 182a8b3a677SKent Overstreet 183a8b3a677SKent Overstreet if (!ca) 184a8b3a677SKent Overstreet continue; 185a8b3a677SKent Overstreet 186a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 187a8b3a677SKent Overstreet REQ_OP_FLUSH, 188a8b3a677SKent Overstreet GFP_KERNEL, 189a8b3a677SKent Overstreet &c->nocow_flush_bioset), 190a8b3a677SKent Overstreet struct nocow_flush, bio); 191a8b3a677SKent Overstreet bio->cl = cl; 192a8b3a677SKent Overstreet bio->ca = ca; 193a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 194a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 195a8b3a677SKent Overstreet } 196a8b3a677SKent Overstreet } 197a8b3a677SKent Overstreet 198a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 199a8b3a677SKent Overstreet struct bch_inode_info *inode) 200a8b3a677SKent Overstreet { 201a8b3a677SKent Overstreet struct closure cl; 202a8b3a677SKent Overstreet 203a8b3a677SKent Overstreet closure_init_stack(&cl); 204a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 205a8b3a677SKent Overstreet closure_sync(&cl); 206a8b3a677SKent Overstreet 207a8b3a677SKent Overstreet return 0; 208a8b3a677SKent Overstreet } 209a8b3a677SKent Overstreet 2107f5e31e1SKent Overstreet static inline bool bio_full(struct bio *bio, unsigned len) 2117f5e31e1SKent Overstreet { 2127f5e31e1SKent Overstreet if (bio->bi_vcnt >= bio->bi_max_vecs) 2137f5e31e1SKent Overstreet return true; 2147f5e31e1SKent Overstreet if (bio->bi_iter.bi_size > UINT_MAX - len) 2157f5e31e1SKent Overstreet return true; 2167f5e31e1SKent Overstreet return false; 2177f5e31e1SKent Overstreet } 2187f5e31e1SKent Overstreet 219eb8e6e9cSKent Overstreet static inline struct address_space *faults_disabled_mapping(void) 220eb8e6e9cSKent Overstreet { 221eb8e6e9cSKent Overstreet return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); 222eb8e6e9cSKent Overstreet } 223eb8e6e9cSKent Overstreet 224eb8e6e9cSKent Overstreet static inline void set_fdm_dropped_locks(void) 225eb8e6e9cSKent Overstreet { 226eb8e6e9cSKent Overstreet current->faults_disabled_mapping = 227eb8e6e9cSKent Overstreet (void *) (((unsigned long) current->faults_disabled_mapping)|1); 228eb8e6e9cSKent Overstreet } 229eb8e6e9cSKent Overstreet 230eb8e6e9cSKent Overstreet static inline bool fdm_dropped_locks(void) 231eb8e6e9cSKent Overstreet { 232eb8e6e9cSKent Overstreet return ((unsigned long) current->faults_disabled_mapping) & 1; 233eb8e6e9cSKent Overstreet } 234eb8e6e9cSKent Overstreet 2351c6fdbd8SKent Overstreet struct quota_res { 2361c6fdbd8SKent Overstreet u64 sectors; 2371c6fdbd8SKent Overstreet }; 2381c6fdbd8SKent Overstreet 2399a3df993SKent Overstreet struct bch_writepage_io { 2401c6fdbd8SKent Overstreet struct bch_inode_info *inode; 2411c6fdbd8SKent Overstreet 2421c6fdbd8SKent Overstreet /* must be last: */ 2431c6fdbd8SKent Overstreet struct bch_write_op op; 2441c6fdbd8SKent Overstreet }; 2451c6fdbd8SKent Overstreet 2461c6fdbd8SKent Overstreet struct dio_write { 2471c6fdbd8SKent Overstreet struct kiocb *req; 248182c7bbfSKent Overstreet struct address_space *mapping; 249182c7bbfSKent Overstreet struct bch_inode_info *inode; 250ed484030SKent Overstreet struct mm_struct *mm; 2511c6fdbd8SKent Overstreet unsigned loop:1, 2526b1b186aSKent Overstreet extending:1, 2531c6fdbd8SKent Overstreet sync:1, 254a1ee777bSKent Overstreet flush:1, 2551c6fdbd8SKent Overstreet free_iov:1; 2561c6fdbd8SKent Overstreet struct quota_res quota_res; 257042a1f26SKent Overstreet u64 written; 2581c6fdbd8SKent Overstreet 2591c6fdbd8SKent Overstreet struct iov_iter iter; 2601c6fdbd8SKent Overstreet struct iovec inline_vecs[2]; 2611c6fdbd8SKent Overstreet 2621c6fdbd8SKent Overstreet /* must be last: */ 2639a3df993SKent Overstreet struct bch_write_op op; 2641c6fdbd8SKent Overstreet }; 2651c6fdbd8SKent Overstreet 2661c6fdbd8SKent Overstreet struct dio_read { 2671c6fdbd8SKent Overstreet struct closure cl; 2681c6fdbd8SKent Overstreet struct kiocb *req; 2691c6fdbd8SKent Overstreet long ret; 270b4725cc1SKent Overstreet bool should_dirty; 2711c6fdbd8SKent Overstreet struct bch_read_bio rbio; 2721c6fdbd8SKent Overstreet }; 2731c6fdbd8SKent Overstreet 2741c6fdbd8SKent Overstreet /* pagecache_block must be held */ 275a023127aSKent Overstreet static noinline int write_invalidate_inode_pages_range(struct address_space *mapping, 2761c6fdbd8SKent Overstreet loff_t start, loff_t end) 2771c6fdbd8SKent Overstreet { 2781c6fdbd8SKent Overstreet int ret; 2791c6fdbd8SKent Overstreet 2801c6fdbd8SKent Overstreet /* 2811c6fdbd8SKent Overstreet * XXX: the way this is currently implemented, we can spin if a process 2821c6fdbd8SKent Overstreet * is continually redirtying a specific page 2831c6fdbd8SKent Overstreet */ 2841c6fdbd8SKent Overstreet do { 2851c6fdbd8SKent Overstreet if (!mapping->nrpages) 2861c6fdbd8SKent Overstreet return 0; 2871c6fdbd8SKent Overstreet 2881c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, start, end); 2891c6fdbd8SKent Overstreet if (ret) 2901c6fdbd8SKent Overstreet break; 2911c6fdbd8SKent Overstreet 2921c6fdbd8SKent Overstreet if (!mapping->nrpages) 2931c6fdbd8SKent Overstreet return 0; 2941c6fdbd8SKent Overstreet 2951c6fdbd8SKent Overstreet ret = invalidate_inode_pages2_range(mapping, 2961c6fdbd8SKent Overstreet start >> PAGE_SHIFT, 2971c6fdbd8SKent Overstreet end >> PAGE_SHIFT); 2981c6fdbd8SKent Overstreet } while (ret == -EBUSY); 2991c6fdbd8SKent Overstreet 3001c6fdbd8SKent Overstreet return ret; 3011c6fdbd8SKent Overstreet } 3021c6fdbd8SKent Overstreet 3031c6fdbd8SKent Overstreet /* quotas */ 3041c6fdbd8SKent Overstreet 3051c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 3061c6fdbd8SKent Overstreet 3076b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 3081c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3091c6fdbd8SKent Overstreet struct quota_res *res) 3101c6fdbd8SKent Overstreet { 3111c6fdbd8SKent Overstreet BUG_ON(res->sectors > inode->ei_quota_reserved); 3121c6fdbd8SKent Overstreet 3131c6fdbd8SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, 31426609b61SKent Overstreet -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); 3151c6fdbd8SKent Overstreet inode->ei_quota_reserved -= res->sectors; 3161c6fdbd8SKent Overstreet res->sectors = 0; 3171c6fdbd8SKent Overstreet } 3181c6fdbd8SKent Overstreet 3196b1b186aSKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 3206b1b186aSKent Overstreet struct bch_inode_info *inode, 3216b1b186aSKent Overstreet struct quota_res *res) 3226b1b186aSKent Overstreet { 3236b1b186aSKent Overstreet if (res->sectors) { 3246b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 3256b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, res); 3266b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3276b1b186aSKent Overstreet } 3286b1b186aSKent Overstreet } 3296b1b186aSKent Overstreet 3301c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 3311c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3321c6fdbd8SKent Overstreet struct quota_res *res, 333e8540e56SKent Overstreet u64 sectors, 3341c6fdbd8SKent Overstreet bool check_enospc) 3351c6fdbd8SKent Overstreet { 3361c6fdbd8SKent Overstreet int ret; 3371c6fdbd8SKent Overstreet 338cb1b479dSKent Overstreet if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) 339cb1b479dSKent Overstreet return 0; 340cb1b479dSKent Overstreet 3411c6fdbd8SKent Overstreet mutex_lock(&inode->ei_quota_lock); 3421c6fdbd8SKent Overstreet ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, 34326609b61SKent Overstreet check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); 3441c6fdbd8SKent Overstreet if (likely(!ret)) { 3451c6fdbd8SKent Overstreet inode->ei_quota_reserved += sectors; 3461c6fdbd8SKent Overstreet res->sectors += sectors; 3471c6fdbd8SKent Overstreet } 3481c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3491c6fdbd8SKent Overstreet 3501c6fdbd8SKent Overstreet return ret; 3511c6fdbd8SKent Overstreet } 3521c6fdbd8SKent Overstreet 3531c6fdbd8SKent Overstreet #else 3541c6fdbd8SKent Overstreet 3556b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 3566b1b186aSKent Overstreet struct bch_inode_info *inode, 3576b1b186aSKent Overstreet struct quota_res *res) {} 3586b1b186aSKent Overstreet 3591c6fdbd8SKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 3601c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3616b1b186aSKent Overstreet struct quota_res *res) {} 3621c6fdbd8SKent Overstreet 3631c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 3641c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3651c6fdbd8SKent Overstreet struct quota_res *res, 3661c6fdbd8SKent Overstreet unsigned sectors, 3671c6fdbd8SKent Overstreet bool check_enospc) 3681c6fdbd8SKent Overstreet { 3691c6fdbd8SKent Overstreet return 0; 3701c6fdbd8SKent Overstreet } 3711c6fdbd8SKent Overstreet 3721c6fdbd8SKent Overstreet #endif 3731c6fdbd8SKent Overstreet 3741c6fdbd8SKent Overstreet /* i_size updates: */ 3751c6fdbd8SKent Overstreet 3762ea90048SKent Overstreet struct inode_new_size { 3772ea90048SKent Overstreet loff_t new_size; 3782ea90048SKent Overstreet u64 now; 3792ea90048SKent Overstreet unsigned fields; 3802ea90048SKent Overstreet }; 3812ea90048SKent Overstreet 3821c6fdbd8SKent Overstreet static int inode_set_size(struct bch_inode_info *inode, 3831c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 3841c6fdbd8SKent Overstreet void *p) 3851c6fdbd8SKent Overstreet { 3862ea90048SKent Overstreet struct inode_new_size *s = p; 3871c6fdbd8SKent Overstreet 3882ea90048SKent Overstreet bi->bi_size = s->new_size; 3892ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 3902ea90048SKent Overstreet bi->bi_atime = s->now; 3912ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 3922ea90048SKent Overstreet bi->bi_mtime = s->now; 3932ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 3942ea90048SKent Overstreet bi->bi_ctime = s->now; 3951c6fdbd8SKent Overstreet 3961c6fdbd8SKent Overstreet return 0; 3971c6fdbd8SKent Overstreet } 3981c6fdbd8SKent Overstreet 39976426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 4001c6fdbd8SKent Overstreet struct bch_inode_info *inode, 4012ea90048SKent Overstreet loff_t new_size, unsigned fields) 4021c6fdbd8SKent Overstreet { 4032ea90048SKent Overstreet struct inode_new_size s = { 4042ea90048SKent Overstreet .new_size = new_size, 4052ea90048SKent Overstreet .now = bch2_current_time(c), 4062ea90048SKent Overstreet .fields = fields, 4072ea90048SKent Overstreet }; 4082ea90048SKent Overstreet 4092ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 4101c6fdbd8SKent Overstreet } 4111c6fdbd8SKent Overstreet 4126b1b186aSKent Overstreet static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 413190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 4141c6fdbd8SKent Overstreet { 415b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 416b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 417b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 418b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 419b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 420b44a66a6SKent Overstreet 4211c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 422cb1b479dSKent Overstreet if (quota_res && 423cb1b479dSKent Overstreet !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && 424cb1b479dSKent Overstreet sectors > 0) { 4251c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 4261c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 4271c6fdbd8SKent Overstreet 4281c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 4291c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 4301c6fdbd8SKent Overstreet } else { 43126609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 4321c6fdbd8SKent Overstreet } 4331c6fdbd8SKent Overstreet #endif 4346b1b186aSKent Overstreet } 4356b1b186aSKent Overstreet 4366b1b186aSKent Overstreet static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 4376b1b186aSKent Overstreet struct quota_res *quota_res, s64 sectors) 4386b1b186aSKent Overstreet { 4396b1b186aSKent Overstreet if (sectors) { 4406b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 4416b1b186aSKent Overstreet __i_sectors_acct(c, inode, quota_res, sectors); 4421c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 4431c6fdbd8SKent Overstreet } 4446b1b186aSKent Overstreet } 4451c6fdbd8SKent Overstreet 4461c6fdbd8SKent Overstreet /* page state: */ 4471c6fdbd8SKent Overstreet 4481c6fdbd8SKent Overstreet /* stored in page->private: */ 4491c6fdbd8SKent Overstreet 450a1774a05SKent Overstreet #define BCH_FOLIO_SECTOR_STATE() \ 451a1774a05SKent Overstreet x(unallocated) \ 452a1774a05SKent Overstreet x(reserved) \ 453a1774a05SKent Overstreet x(dirty) \ 454a1774a05SKent Overstreet x(dirty_reserved) \ 455a1774a05SKent Overstreet x(allocated) 456a1774a05SKent Overstreet 457a1774a05SKent Overstreet enum bch_folio_sector_state { 458a1774a05SKent Overstreet #define x(n) SECTOR_##n, 459a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 460a1774a05SKent Overstreet #undef x 461a1774a05SKent Overstreet }; 462a1774a05SKent Overstreet 46373bd774dSKent Overstreet static const char * const bch2_folio_sector_states[] = { 464a1774a05SKent Overstreet #define x(n) #n, 465a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 466a1774a05SKent Overstreet #undef x 467a1774a05SKent Overstreet NULL 468a1774a05SKent Overstreet }; 469a1774a05SKent Overstreet 470a1774a05SKent Overstreet static inline enum bch_folio_sector_state 471a1774a05SKent Overstreet folio_sector_dirty(enum bch_folio_sector_state state) 472a1774a05SKent Overstreet { 473a1774a05SKent Overstreet switch (state) { 474a1774a05SKent Overstreet case SECTOR_unallocated: 475a1774a05SKent Overstreet return SECTOR_dirty; 476a1774a05SKent Overstreet case SECTOR_reserved: 477a1774a05SKent Overstreet return SECTOR_dirty_reserved; 478a1774a05SKent Overstreet default: 479a1774a05SKent Overstreet return state; 480a1774a05SKent Overstreet } 481a1774a05SKent Overstreet } 482a1774a05SKent Overstreet 483a1774a05SKent Overstreet static inline enum bch_folio_sector_state 484a1774a05SKent Overstreet folio_sector_undirty(enum bch_folio_sector_state state) 485a1774a05SKent Overstreet { 486a1774a05SKent Overstreet switch (state) { 487a1774a05SKent Overstreet case SECTOR_dirty: 488a1774a05SKent Overstreet return SECTOR_unallocated; 489a1774a05SKent Overstreet case SECTOR_dirty_reserved: 490a1774a05SKent Overstreet return SECTOR_reserved; 491a1774a05SKent Overstreet default: 492a1774a05SKent Overstreet return state; 493a1774a05SKent Overstreet } 494a1774a05SKent Overstreet } 495a1774a05SKent Overstreet 496a1774a05SKent Overstreet static inline enum bch_folio_sector_state 497a1774a05SKent Overstreet folio_sector_reserve(enum bch_folio_sector_state state) 498a1774a05SKent Overstreet { 499a1774a05SKent Overstreet switch (state) { 500a1774a05SKent Overstreet case SECTOR_unallocated: 501a1774a05SKent Overstreet return SECTOR_reserved; 502a1774a05SKent Overstreet case SECTOR_dirty: 503a1774a05SKent Overstreet return SECTOR_dirty_reserved; 504a1774a05SKent Overstreet default: 505a1774a05SKent Overstreet return state; 506a1774a05SKent Overstreet } 507a1774a05SKent Overstreet } 508a1774a05SKent Overstreet 5093342ac13SKent Overstreet struct bch_folio_sector { 510b44a66a6SKent Overstreet /* Uncompressed, fully allocated replicas (or on disk reservation): */ 511b44a66a6SKent Overstreet unsigned nr_replicas:4; 512f81b648dSKent Overstreet 513b44a66a6SKent Overstreet /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ 514b44a66a6SKent Overstreet unsigned replicas_reserved:4; 5151c6fdbd8SKent Overstreet 516f57a6a5dSKent Overstreet /* i_sectors: */ 517a1774a05SKent Overstreet enum bch_folio_sector_state state:8; 5181c6fdbd8SKent Overstreet }; 5191c6fdbd8SKent Overstreet 5203342ac13SKent Overstreet struct bch_folio { 5213826ee0bSKent Overstreet spinlock_t lock; 5227f5e31e1SKent Overstreet atomic_t write_count; 5233342ac13SKent Overstreet /* 5243342ac13SKent Overstreet * Is the sector state up to date with the btree? 5253342ac13SKent Overstreet * (Not the data itself) 5263342ac13SKent Overstreet */ 527e6ec361fSKent Overstreet bool uptodate; 52849fe78ffSKent Overstreet struct bch_folio_sector s[]; 529f57a6a5dSKent Overstreet }; 530f57a6a5dSKent Overstreet 531a1774a05SKent Overstreet static inline void folio_sector_set(struct folio *folio, 532a1774a05SKent Overstreet struct bch_folio *s, 533a1774a05SKent Overstreet unsigned i, unsigned n) 534a1774a05SKent Overstreet { 535a1774a05SKent Overstreet s->s[i].state = n; 536a1774a05SKent Overstreet } 537a1774a05SKent Overstreet 538bf98ee10SBrian Foster /* file offset (to folio offset) to bch_folio_sector index */ 539bf98ee10SBrian Foster static inline int folio_pos_to_s(struct folio *folio, loff_t pos) 540bf98ee10SBrian Foster { 541bf98ee10SBrian Foster u64 f_offset = pos - folio_pos(folio); 542bf98ee10SBrian Foster BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio)); 543bf98ee10SBrian Foster return f_offset >> SECTOR_SHIFT; 544bf98ee10SBrian Foster } 545bf98ee10SBrian Foster 54630bff594SKent Overstreet static inline struct bch_folio *__bch2_folio(struct folio *folio) 5471c6fdbd8SKent Overstreet { 54830bff594SKent Overstreet return folio_has_private(folio) 54930bff594SKent Overstreet ? (struct bch_folio *) folio_get_private(folio) 550f57a6a5dSKent Overstreet : NULL; 551f57a6a5dSKent Overstreet } 5521c6fdbd8SKent Overstreet 55330bff594SKent Overstreet static inline struct bch_folio *bch2_folio(struct folio *folio) 554f57a6a5dSKent Overstreet { 55530bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 5561c6fdbd8SKent Overstreet 55730bff594SKent Overstreet return __bch2_folio(folio); 558f57a6a5dSKent Overstreet } 559f57a6a5dSKent Overstreet 56030bff594SKent Overstreet /* for newly allocated folios: */ 56130bff594SKent Overstreet static void __bch2_folio_release(struct folio *folio) 562f57a6a5dSKent Overstreet { 56330bff594SKent Overstreet kfree(folio_detach_private(folio)); 564f57a6a5dSKent Overstreet } 565f57a6a5dSKent Overstreet 56630bff594SKent Overstreet static void bch2_folio_release(struct folio *folio) 567f57a6a5dSKent Overstreet { 56830bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 56930bff594SKent Overstreet __bch2_folio_release(folio); 570f57a6a5dSKent Overstreet } 571f57a6a5dSKent Overstreet 57230bff594SKent Overstreet /* for newly allocated folios: */ 57330bff594SKent Overstreet static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) 574f57a6a5dSKent Overstreet { 5753342ac13SKent Overstreet struct bch_folio *s; 576f57a6a5dSKent Overstreet 57749fe78ffSKent Overstreet s = kzalloc(sizeof(*s) + 57849fe78ffSKent Overstreet sizeof(struct bch_folio_sector) * 57970d41c9eSKent Overstreet folio_sectors(folio), gfp); 580f57a6a5dSKent Overstreet if (!s) 581f57a6a5dSKent Overstreet return NULL; 582f57a6a5dSKent Overstreet 5833826ee0bSKent Overstreet spin_lock_init(&s->lock); 58430bff594SKent Overstreet folio_attach_private(folio, s); 5851c6fdbd8SKent Overstreet return s; 5861c6fdbd8SKent Overstreet } 5871c6fdbd8SKent Overstreet 58830bff594SKent Overstreet static struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) 589f57a6a5dSKent Overstreet { 59030bff594SKent Overstreet return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); 591f57a6a5dSKent Overstreet } 592f57a6a5dSKent Overstreet 59379203111SKent Overstreet static unsigned bkey_to_sector_state(struct bkey_s_c k) 594b44a66a6SKent Overstreet { 59579203111SKent Overstreet if (bkey_extent_is_reservation(k)) 596a1774a05SKent Overstreet return SECTOR_reserved; 59779203111SKent Overstreet if (bkey_extent_is_allocation(k.k)) 598a1774a05SKent Overstreet return SECTOR_allocated; 599a1774a05SKent Overstreet return SECTOR_unallocated; 600b44a66a6SKent Overstreet } 601b44a66a6SKent Overstreet 60230bff594SKent Overstreet static void __bch2_folio_set(struct folio *folio, 603e6ec361fSKent Overstreet unsigned pg_offset, unsigned pg_len, 604e6ec361fSKent Overstreet unsigned nr_ptrs, unsigned state) 605e6ec361fSKent Overstreet { 60670d41c9eSKent Overstreet struct bch_folio *s = bch2_folio(folio); 60733e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio); 608e6ec361fSKent Overstreet 60933e2eb96SKent Overstreet BUG_ON(pg_offset >= sectors); 61033e2eb96SKent Overstreet BUG_ON(pg_offset + pg_len > sectors); 611e6ec361fSKent Overstreet 612e6ec361fSKent Overstreet spin_lock(&s->lock); 613e6ec361fSKent Overstreet 614e6ec361fSKent Overstreet for (i = pg_offset; i < pg_offset + pg_len; i++) { 615e6ec361fSKent Overstreet s->s[i].nr_replicas = nr_ptrs; 616a1774a05SKent Overstreet folio_sector_set(folio, s, i, state); 617e6ec361fSKent Overstreet } 618e6ec361fSKent Overstreet 61933e2eb96SKent Overstreet if (i == sectors) 620e6ec361fSKent Overstreet s->uptodate = true; 621e6ec361fSKent Overstreet 622e6ec361fSKent Overstreet spin_unlock(&s->lock); 623e6ec361fSKent Overstreet } 624e6ec361fSKent Overstreet 6253342ac13SKent Overstreet /* 6263342ac13SKent Overstreet * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the 6273342ac13SKent Overstreet * extents btree: 6283342ac13SKent Overstreet */ 6293342ac13SKent Overstreet static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, 63030bff594SKent Overstreet struct folio **folios, unsigned nr_folios) 631e6ec361fSKent Overstreet { 632e6ec361fSKent Overstreet struct btree_trans trans; 633e6ec361fSKent Overstreet struct btree_iter iter; 634e6ec361fSKent Overstreet struct bkey_s_c k; 63570d41c9eSKent Overstreet struct bch_folio *s; 63630bff594SKent Overstreet u64 offset = folio_sector(folios[0]); 63770d41c9eSKent Overstreet unsigned folio_idx; 638e6ec361fSKent Overstreet u32 snapshot; 63970d41c9eSKent Overstreet bool need_set = false; 640e6ec361fSKent Overstreet int ret; 641e6ec361fSKent Overstreet 64270d41c9eSKent Overstreet for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { 64370d41c9eSKent Overstreet s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); 64470d41c9eSKent Overstreet if (!s) 64570d41c9eSKent Overstreet return -ENOMEM; 64670d41c9eSKent Overstreet 64770d41c9eSKent Overstreet need_set |= !s->uptodate; 64870d41c9eSKent Overstreet } 64970d41c9eSKent Overstreet 65070d41c9eSKent Overstreet if (!need_set) 65170d41c9eSKent Overstreet return 0; 65270d41c9eSKent Overstreet 65370d41c9eSKent Overstreet folio_idx = 0; 654e6ec361fSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 655e6ec361fSKent Overstreet retry: 656e6ec361fSKent Overstreet bch2_trans_begin(&trans); 657e6ec361fSKent Overstreet 658e6ec361fSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 659e6ec361fSKent Overstreet if (ret) 660e6ec361fSKent Overstreet goto err; 661e6ec361fSKent Overstreet 662e6ec361fSKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 663e6ec361fSKent Overstreet SPOS(inum.inum, offset, snapshot), 664e6ec361fSKent Overstreet BTREE_ITER_SLOTS, k, ret) { 665e6ec361fSKent Overstreet unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); 66679203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 667e6ec361fSKent Overstreet 66830bff594SKent Overstreet while (folio_idx < nr_folios) { 66930bff594SKent Overstreet struct folio *folio = folios[folio_idx]; 67030bff594SKent Overstreet u64 folio_start = folio_sector(folio); 67130bff594SKent Overstreet u64 folio_end = folio_end_sector(folio); 67230bff594SKent Overstreet unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - folio_start; 67330bff594SKent Overstreet unsigned folio_len = min(k.k->p.offset, folio_end) - folio_offset - folio_start; 674e6ec361fSKent Overstreet 67530bff594SKent Overstreet BUG_ON(k.k->p.offset < folio_start); 67630bff594SKent Overstreet BUG_ON(bkey_start_offset(k.k) > folio_end); 677e6ec361fSKent Overstreet 67870d41c9eSKent Overstreet if (!bch2_folio(folio)->uptodate) 67930bff594SKent Overstreet __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); 680e6ec361fSKent Overstreet 68130bff594SKent Overstreet if (k.k->p.offset < folio_end) 682e6ec361fSKent Overstreet break; 68330bff594SKent Overstreet folio_idx++; 684e6ec361fSKent Overstreet } 685e6ec361fSKent Overstreet 68630bff594SKent Overstreet if (folio_idx == nr_folios) 687e6ec361fSKent Overstreet break; 688e6ec361fSKent Overstreet } 689e6ec361fSKent Overstreet 690e6ec361fSKent Overstreet offset = iter.pos.offset; 691e6ec361fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 692e6ec361fSKent Overstreet err: 693549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 694e6ec361fSKent Overstreet goto retry; 695e6ec361fSKent Overstreet bch2_trans_exit(&trans); 696e6ec361fSKent Overstreet 697e6ec361fSKent Overstreet return ret; 698e6ec361fSKent Overstreet } 699e6ec361fSKent Overstreet 700b44a66a6SKent Overstreet static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) 701b44a66a6SKent Overstreet { 702b44a66a6SKent Overstreet struct bvec_iter iter; 7039567413cSKent Overstreet struct folio_vec fv; 704b44a66a6SKent Overstreet unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v 705b44a66a6SKent Overstreet ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); 70679203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 707b44a66a6SKent Overstreet 7089567413cSKent Overstreet bio_for_each_folio(fv, bio, iter) 7099567413cSKent Overstreet __bch2_folio_set(fv.fv_folio, 7109567413cSKent Overstreet fv.fv_offset >> 9, 7119567413cSKent Overstreet fv.fv_len >> 9, 7129567413cSKent Overstreet nr_ptrs, state); 713b44a66a6SKent Overstreet } 714b44a66a6SKent Overstreet 715dcfc593fSKent Overstreet static void mark_pagecache_unallocated(struct bch_inode_info *inode, 716dcfc593fSKent Overstreet u64 start, u64 end) 717dcfc593fSKent Overstreet { 718dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 719dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 720dcfc593fSKent Overstreet struct folio_batch fbatch; 721dcfc593fSKent Overstreet unsigned i, j; 722dcfc593fSKent Overstreet 723dcfc593fSKent Overstreet if (end <= start) 724dcfc593fSKent Overstreet return; 725dcfc593fSKent Overstreet 726dcfc593fSKent Overstreet folio_batch_init(&fbatch); 727dcfc593fSKent Overstreet 728dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 729dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 730dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 731dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 73233e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 73333e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 73430bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 73530bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 7363342ac13SKent Overstreet struct bch_folio *s; 737dcfc593fSKent Overstreet 73830bff594SKent Overstreet BUG_ON(end <= folio_start); 739dcfc593fSKent Overstreet 740dcfc593fSKent Overstreet folio_lock(folio); 74130bff594SKent Overstreet s = bch2_folio(folio); 742dcfc593fSKent Overstreet 743dcfc593fSKent Overstreet if (s) { 744dcfc593fSKent Overstreet spin_lock(&s->lock); 74530bff594SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) 746dcfc593fSKent Overstreet s->s[j].nr_replicas = 0; 747dcfc593fSKent Overstreet spin_unlock(&s->lock); 748dcfc593fSKent Overstreet } 749dcfc593fSKent Overstreet 750dcfc593fSKent Overstreet folio_unlock(folio); 751dcfc593fSKent Overstreet } 752dcfc593fSKent Overstreet folio_batch_release(&fbatch); 753dcfc593fSKent Overstreet cond_resched(); 754dcfc593fSKent Overstreet } 755dcfc593fSKent Overstreet } 756dcfc593fSKent Overstreet 757dcfc593fSKent Overstreet static void mark_pagecache_reserved(struct bch_inode_info *inode, 758dcfc593fSKent Overstreet u64 start, u64 end) 759dcfc593fSKent Overstreet { 760dcfc593fSKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 761dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 762dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 763dcfc593fSKent Overstreet struct folio_batch fbatch; 764dcfc593fSKent Overstreet s64 i_sectors_delta = 0; 765dcfc593fSKent Overstreet unsigned i, j; 766dcfc593fSKent Overstreet 767dcfc593fSKent Overstreet if (end <= start) 768dcfc593fSKent Overstreet return; 769dcfc593fSKent Overstreet 770dcfc593fSKent Overstreet folio_batch_init(&fbatch); 771dcfc593fSKent Overstreet 772dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 773dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 774dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 775dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 77633e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 77733e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 77830bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 77930bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 7803342ac13SKent Overstreet struct bch_folio *s; 781dcfc593fSKent Overstreet 78230bff594SKent Overstreet BUG_ON(end <= folio_start); 783dcfc593fSKent Overstreet 784dcfc593fSKent Overstreet folio_lock(folio); 78530bff594SKent Overstreet s = bch2_folio(folio); 786dcfc593fSKent Overstreet 787dcfc593fSKent Overstreet if (s) { 788dcfc593fSKent Overstreet spin_lock(&s->lock); 789a1774a05SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) { 790a1774a05SKent Overstreet i_sectors_delta -= s->s[j].state == SECTOR_dirty; 791a1774a05SKent Overstreet folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); 792dcfc593fSKent Overstreet } 793dcfc593fSKent Overstreet spin_unlock(&s->lock); 794dcfc593fSKent Overstreet } 795dcfc593fSKent Overstreet 796dcfc593fSKent Overstreet folio_unlock(folio); 797dcfc593fSKent Overstreet } 798dcfc593fSKent Overstreet folio_batch_release(&fbatch); 799dcfc593fSKent Overstreet cond_resched(); 800dcfc593fSKent Overstreet } 801dcfc593fSKent Overstreet 802dcfc593fSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 803dcfc593fSKent Overstreet } 804dcfc593fSKent Overstreet 805e1036a2aSKent Overstreet static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) 806e1036a2aSKent Overstreet { 807e1036a2aSKent Overstreet /* XXX: this should not be open coded */ 808e1036a2aSKent Overstreet return inode->ei_inode.bi_data_replicas 809e1036a2aSKent Overstreet ? inode->ei_inode.bi_data_replicas - 1 810e1036a2aSKent Overstreet : c->opts.data_replicas; 811e1036a2aSKent Overstreet } 812e1036a2aSKent Overstreet 8133342ac13SKent Overstreet static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, 814f57a6a5dSKent Overstreet unsigned nr_replicas) 815f57a6a5dSKent Overstreet { 816f57a6a5dSKent Overstreet return max(0, (int) nr_replicas - 817f57a6a5dSKent Overstreet s->nr_replicas - 818f57a6a5dSKent Overstreet s->replicas_reserved); 819f57a6a5dSKent Overstreet } 820f57a6a5dSKent Overstreet 82130bff594SKent Overstreet static int bch2_get_folio_disk_reservation(struct bch_fs *c, 822f57a6a5dSKent Overstreet struct bch_inode_info *inode, 82330bff594SKent Overstreet struct folio *folio, bool check_enospc) 8241c6fdbd8SKent Overstreet { 82530bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 826e1036a2aSKent Overstreet unsigned nr_replicas = inode_nr_replicas(c, inode); 827f57a6a5dSKent Overstreet struct disk_reservation disk_res = { 0 }; 82833e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; 829f81b648dSKent Overstreet int ret; 8301c6fdbd8SKent Overstreet 831f57a6a5dSKent Overstreet if (!s) 832f57a6a5dSKent Overstreet return -ENOMEM; 8331c6fdbd8SKent Overstreet 83433e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 835f57a6a5dSKent Overstreet disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); 836f57a6a5dSKent Overstreet 837f57a6a5dSKent Overstreet if (!disk_res_sectors) 838f57a6a5dSKent Overstreet return 0; 839f57a6a5dSKent Overstreet 840f57a6a5dSKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 841f57a6a5dSKent Overstreet disk_res_sectors, 1, 842f57a6a5dSKent Overstreet !check_enospc 843f57a6a5dSKent Overstreet ? BCH_DISK_RESERVATION_NOFAIL 844f57a6a5dSKent Overstreet : 0); 8451c6fdbd8SKent Overstreet if (unlikely(ret)) 846f81b648dSKent Overstreet return ret; 847f81b648dSKent Overstreet 84833e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 849f57a6a5dSKent Overstreet s->s[i].replicas_reserved += 850f57a6a5dSKent Overstreet sectors_to_reserve(&s->s[i], nr_replicas); 851f57a6a5dSKent Overstreet 852f57a6a5dSKent Overstreet return 0; 8531c6fdbd8SKent Overstreet } 8541c6fdbd8SKent Overstreet 85530bff594SKent Overstreet struct bch2_folio_reservation { 856d1542e03SKent Overstreet struct disk_reservation disk; 857d1542e03SKent Overstreet struct quota_res quota; 858d1542e03SKent Overstreet }; 859d1542e03SKent Overstreet 86030bff594SKent Overstreet static void bch2_folio_reservation_init(struct bch_fs *c, 861f57a6a5dSKent Overstreet struct bch_inode_info *inode, 86230bff594SKent Overstreet struct bch2_folio_reservation *res) 863d1542e03SKent Overstreet { 864d1542e03SKent Overstreet memset(res, 0, sizeof(*res)); 865d1542e03SKent Overstreet 866d1542e03SKent Overstreet res->disk.nr_replicas = inode_nr_replicas(c, inode); 867d1542e03SKent Overstreet } 868d1542e03SKent Overstreet 86930bff594SKent Overstreet static void bch2_folio_reservation_put(struct bch_fs *c, 870d1542e03SKent Overstreet struct bch_inode_info *inode, 87130bff594SKent Overstreet struct bch2_folio_reservation *res) 872d1542e03SKent Overstreet { 873d1542e03SKent Overstreet bch2_disk_reservation_put(c, &res->disk); 874d1542e03SKent Overstreet bch2_quota_reservation_put(c, inode, &res->quota); 875d1542e03SKent Overstreet } 876d1542e03SKent Overstreet 87730bff594SKent Overstreet static int bch2_folio_reservation_get(struct bch_fs *c, 87830bff594SKent Overstreet struct bch_inode_info *inode, 87930bff594SKent Overstreet struct folio *folio, 88030bff594SKent Overstreet struct bch2_folio_reservation *res, 881bd954215SKent Overstreet unsigned offset, unsigned len) 882f57a6a5dSKent Overstreet { 88330bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 884d1542e03SKent Overstreet unsigned i, disk_sectors = 0, quota_sectors = 0; 885f57a6a5dSKent Overstreet int ret; 886f57a6a5dSKent Overstreet 887f57a6a5dSKent Overstreet if (!s) 888f57a6a5dSKent Overstreet return -ENOMEM; 889f57a6a5dSKent Overstreet 890e6ec361fSKent Overstreet BUG_ON(!s->uptodate); 891e6ec361fSKent Overstreet 8924b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 8934b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 894d1542e03SKent Overstreet i++) { 895d1542e03SKent Overstreet disk_sectors += sectors_to_reserve(&s->s[i], 896d1542e03SKent Overstreet res->disk.nr_replicas); 897a1774a05SKent Overstreet quota_sectors += s->s[i].state == SECTOR_unallocated; 8981c6fdbd8SKent Overstreet } 8991c6fdbd8SKent Overstreet 900d1542e03SKent Overstreet if (disk_sectors) { 901bd954215SKent Overstreet ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); 902d1542e03SKent Overstreet if (unlikely(ret)) 903d1542e03SKent Overstreet return ret; 904d1542e03SKent Overstreet } 905d1542e03SKent Overstreet 906d1542e03SKent Overstreet if (quota_sectors) { 907d1542e03SKent Overstreet ret = bch2_quota_reservation_add(c, inode, &res->quota, 908bd954215SKent Overstreet quota_sectors, true); 909d1542e03SKent Overstreet if (unlikely(ret)) { 910d1542e03SKent Overstreet struct disk_reservation tmp = { 911d1542e03SKent Overstreet .sectors = disk_sectors 912d1542e03SKent Overstreet }; 913d1542e03SKent Overstreet 914d1542e03SKent Overstreet bch2_disk_reservation_put(c, &tmp); 915d1542e03SKent Overstreet res->disk.sectors -= disk_sectors; 916d1542e03SKent Overstreet return ret; 917d1542e03SKent Overstreet } 918d1542e03SKent Overstreet } 919d1542e03SKent Overstreet 920d1542e03SKent Overstreet return 0; 921f57a6a5dSKent Overstreet } 922f57a6a5dSKent Overstreet 92330bff594SKent Overstreet static void bch2_clear_folio_bits(struct folio *folio) 9241c6fdbd8SKent Overstreet { 92530bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 9261c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 92730bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 928d1542e03SKent Overstreet struct disk_reservation disk_res = { 0 }; 92933e2eb96SKent Overstreet int i, sectors = folio_sectors(folio), dirty_sectors = 0; 9301c6fdbd8SKent Overstreet 931f57a6a5dSKent Overstreet if (!s) 9321c6fdbd8SKent Overstreet return; 9331c6fdbd8SKent Overstreet 93430bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 93530bff594SKent Overstreet EBUG_ON(folio_test_writeback(folio)); 9363826ee0bSKent Overstreet 93733e2eb96SKent Overstreet for (i = 0; i < sectors; i++) { 938d1542e03SKent Overstreet disk_res.sectors += s->s[i].replicas_reserved; 939d1542e03SKent Overstreet s->s[i].replicas_reserved = 0; 940d1542e03SKent Overstreet 941a1774a05SKent Overstreet dirty_sectors -= s->s[i].state == SECTOR_dirty; 942a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); 943f57a6a5dSKent Overstreet } 944adfcfaf0SKent Overstreet 945d1542e03SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 946d1542e03SKent Overstreet 947b44a66a6SKent Overstreet i_sectors_acct(c, inode, NULL, dirty_sectors); 948adfcfaf0SKent Overstreet 94930bff594SKent Overstreet bch2_folio_release(folio); 9501c6fdbd8SKent Overstreet } 9511c6fdbd8SKent Overstreet 95230bff594SKent Overstreet static void bch2_set_folio_dirty(struct bch_fs *c, 95330bff594SKent Overstreet struct bch_inode_info *inode, 95430bff594SKent Overstreet struct folio *folio, 95530bff594SKent Overstreet struct bch2_folio_reservation *res, 956d1542e03SKent Overstreet unsigned offset, unsigned len) 9571c6fdbd8SKent Overstreet { 95830bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 959f57a6a5dSKent Overstreet unsigned i, dirty_sectors = 0; 9601c6fdbd8SKent Overstreet 96130bff594SKent Overstreet WARN_ON((u64) folio_pos(folio) + offset + len > 962877dfb34SKent Overstreet round_up((u64) i_size_read(&inode->v), block_bytes(c))); 963fb472ac5SKent Overstreet 96434fdcf06SKent Overstreet BUG_ON(!s->uptodate); 96534fdcf06SKent Overstreet 9663826ee0bSKent Overstreet spin_lock(&s->lock); 9673826ee0bSKent Overstreet 9684b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 9694b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 970d1542e03SKent Overstreet i++) { 971d1542e03SKent Overstreet unsigned sectors = sectors_to_reserve(&s->s[i], 972d1542e03SKent Overstreet res->disk.nr_replicas); 9731c6fdbd8SKent Overstreet 974406d6d5aSKent Overstreet /* 975406d6d5aSKent Overstreet * This can happen if we race with the error path in 976406d6d5aSKent Overstreet * bch2_writepage_io_done(): 977406d6d5aSKent Overstreet */ 978406d6d5aSKent Overstreet sectors = min_t(unsigned, sectors, res->disk.sectors); 979406d6d5aSKent Overstreet 980d1542e03SKent Overstreet s->s[i].replicas_reserved += sectors; 981d1542e03SKent Overstreet res->disk.sectors -= sectors; 982adfcfaf0SKent Overstreet 983a1774a05SKent Overstreet dirty_sectors += s->s[i].state == SECTOR_unallocated; 984a1774a05SKent Overstreet 985a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); 986f57a6a5dSKent Overstreet } 987f57a6a5dSKent Overstreet 9883826ee0bSKent Overstreet spin_unlock(&s->lock); 9893826ee0bSKent Overstreet 990d1542e03SKent Overstreet i_sectors_acct(c, inode, &res->quota, dirty_sectors); 9911c6fdbd8SKent Overstreet 99230bff594SKent Overstreet if (!folio_test_dirty(folio)) 99330bff594SKent Overstreet filemap_dirty_folio(inode->v.i_mapping, folio); 9941c6fdbd8SKent Overstreet } 9951c6fdbd8SKent Overstreet 9961c6fdbd8SKent Overstreet vm_fault_t bch2_page_fault(struct vm_fault *vmf) 9971c6fdbd8SKent Overstreet { 9981c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 999eb8e6e9cSKent Overstreet struct address_space *mapping = file->f_mapping; 1000eb8e6e9cSKent Overstreet struct address_space *fdm = faults_disabled_mapping(); 10011c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 100273bd774dSKent Overstreet vm_fault_t ret; 10031c6fdbd8SKent Overstreet 1004eb8e6e9cSKent Overstreet if (fdm == mapping) 1005eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 1006eb8e6e9cSKent Overstreet 1007eb8e6e9cSKent Overstreet /* Lock ordering: */ 1008eb8e6e9cSKent Overstreet if (fdm > mapping) { 1009eb8e6e9cSKent Overstreet struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); 1010eb8e6e9cSKent Overstreet 1011a7ecd30cSKent Overstreet if (bch2_pagecache_add_tryget(inode)) 1012eb8e6e9cSKent Overstreet goto got_lock; 1013eb8e6e9cSKent Overstreet 1014a7ecd30cSKent Overstreet bch2_pagecache_block_put(fdm_host); 1015eb8e6e9cSKent Overstreet 1016a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 1017a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1018eb8e6e9cSKent Overstreet 1019a7ecd30cSKent Overstreet bch2_pagecache_block_get(fdm_host); 1020eb8e6e9cSKent Overstreet 1021eb8e6e9cSKent Overstreet /* Signal that lock has been dropped: */ 1022eb8e6e9cSKent Overstreet set_fdm_dropped_locks(); 1023eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 1024eb8e6e9cSKent Overstreet } 1025eb8e6e9cSKent Overstreet 1026a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 1027eb8e6e9cSKent Overstreet got_lock: 10281c6fdbd8SKent Overstreet ret = filemap_fault(vmf); 1029a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 10301c6fdbd8SKent Overstreet 10311c6fdbd8SKent Overstreet return ret; 10321c6fdbd8SKent Overstreet } 10331c6fdbd8SKent Overstreet 10341c6fdbd8SKent Overstreet vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) 10351c6fdbd8SKent Overstreet { 103630bff594SKent Overstreet struct folio *folio = page_folio(vmf->page); 10371c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 10381c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 10391c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 10401c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 104130bff594SKent Overstreet struct bch2_folio_reservation res; 10426cc3535dSKent Overstreet unsigned len; 10436cc3535dSKent Overstreet loff_t isize; 104473bd774dSKent Overstreet vm_fault_t ret; 10451c6fdbd8SKent Overstreet 104630bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1047d1542e03SKent Overstreet 10481c6fdbd8SKent Overstreet sb_start_pagefault(inode->v.i_sb); 10491c6fdbd8SKent Overstreet file_update_time(file); 10501c6fdbd8SKent Overstreet 10511c6fdbd8SKent Overstreet /* 10521c6fdbd8SKent Overstreet * Not strictly necessary, but helps avoid dio writes livelocking in 10531c6fdbd8SKent Overstreet * write_invalidate_inode_pages_range() - can drop this if/when we get 10541c6fdbd8SKent Overstreet * a write_invalidate_inode_pages_range() that works without dropping 10551c6fdbd8SKent Overstreet * page lock before invalidating page 10561c6fdbd8SKent Overstreet */ 1057a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 10581c6fdbd8SKent Overstreet 105930bff594SKent Overstreet folio_lock(folio); 10606cc3535dSKent Overstreet isize = i_size_read(&inode->v); 10616cc3535dSKent Overstreet 106230bff594SKent Overstreet if (folio->mapping != mapping || folio_pos(folio) >= isize) { 106330bff594SKent Overstreet folio_unlock(folio); 10641c6fdbd8SKent Overstreet ret = VM_FAULT_NOPAGE; 10651c6fdbd8SKent Overstreet goto out; 10661c6fdbd8SKent Overstreet } 10671c6fdbd8SKent Overstreet 106833e2eb96SKent Overstreet len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); 10696cc3535dSKent Overstreet 107070d41c9eSKent Overstreet if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: 107170d41c9eSKent Overstreet bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { 107230bff594SKent Overstreet folio_unlock(folio); 10731c6fdbd8SKent Overstreet ret = VM_FAULT_SIGBUS; 10741c6fdbd8SKent Overstreet goto out; 10751c6fdbd8SKent Overstreet } 10761c6fdbd8SKent Overstreet 107730bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, &res, 0, len); 107830bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 10791b783a69SKent Overstreet 108030bff594SKent Overstreet folio_wait_stable(folio); 1081e6ec361fSKent Overstreet ret = VM_FAULT_LOCKED; 10821c6fdbd8SKent Overstreet out: 1083a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 10841c6fdbd8SKent Overstreet sb_end_pagefault(inode->v.i_sb); 1085d1542e03SKent Overstreet 10861c6fdbd8SKent Overstreet return ret; 10871c6fdbd8SKent Overstreet } 10881c6fdbd8SKent Overstreet 10891c6fdbd8SKent Overstreet void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) 10901c6fdbd8SKent Overstreet { 10911c6fdbd8SKent Overstreet if (offset || length < folio_size(folio)) 10921c6fdbd8SKent Overstreet return; 10931c6fdbd8SKent Overstreet 109430bff594SKent Overstreet bch2_clear_folio_bits(folio); 10951c6fdbd8SKent Overstreet } 10961c6fdbd8SKent Overstreet 10971c6fdbd8SKent Overstreet bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) 10981c6fdbd8SKent Overstreet { 1099a6d90385SKent Overstreet if (folio_test_dirty(folio) || folio_test_writeback(folio)) 11001c6fdbd8SKent Overstreet return false; 11011c6fdbd8SKent Overstreet 110230bff594SKent Overstreet bch2_clear_folio_bits(folio); 11031c6fdbd8SKent Overstreet return true; 11041c6fdbd8SKent Overstreet } 11051c6fdbd8SKent Overstreet 11061c6fdbd8SKent Overstreet /* readpage(s): */ 11071c6fdbd8SKent Overstreet 11081c6fdbd8SKent Overstreet static void bch2_readpages_end_io(struct bio *bio) 11091c6fdbd8SKent Overstreet { 111030bff594SKent Overstreet struct folio_iter fi; 11111c6fdbd8SKent Overstreet 111230bff594SKent Overstreet bio_for_each_folio_all(fi, bio) { 11131c6fdbd8SKent Overstreet if (!bio->bi_status) { 111430bff594SKent Overstreet folio_mark_uptodate(fi.folio); 11151c6fdbd8SKent Overstreet } else { 111630bff594SKent Overstreet folio_clear_uptodate(fi.folio); 111730bff594SKent Overstreet folio_set_error(fi.folio); 11181c6fdbd8SKent Overstreet } 111930bff594SKent Overstreet folio_unlock(fi.folio); 11201c6fdbd8SKent Overstreet } 11211c6fdbd8SKent Overstreet 11221c6fdbd8SKent Overstreet bio_put(bio); 11231c6fdbd8SKent Overstreet } 11241c6fdbd8SKent Overstreet 11251c6fdbd8SKent Overstreet struct readpages_iter { 11261c6fdbd8SKent Overstreet struct address_space *mapping; 11271c6fdbd8SKent Overstreet unsigned idx; 11289567413cSKent Overstreet folios folios; 11291c6fdbd8SKent Overstreet }; 11301c6fdbd8SKent Overstreet 11311c6fdbd8SKent Overstreet static int readpages_iter_init(struct readpages_iter *iter, 11321c6fdbd8SKent Overstreet struct readahead_control *ractl) 11331c6fdbd8SKent Overstreet { 11349567413cSKent Overstreet struct folio **fi; 11359567413cSKent Overstreet int ret; 11361c6fdbd8SKent Overstreet 11371c6fdbd8SKent Overstreet memset(iter, 0, sizeof(*iter)); 11381c6fdbd8SKent Overstreet 11391c6fdbd8SKent Overstreet iter->mapping = ractl->mapping; 11401c6fdbd8SKent Overstreet 11419567413cSKent Overstreet ret = filemap_get_contig_folios_d(iter->mapping, 11429567413cSKent Overstreet ractl->_index << PAGE_SHIFT, 11439567413cSKent Overstreet (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, 11449567413cSKent Overstreet 0, mapping_gfp_mask(iter->mapping), 11459567413cSKent Overstreet &iter->folios); 11469567413cSKent Overstreet if (ret) 11479567413cSKent Overstreet return ret; 11481c6fdbd8SKent Overstreet 11499567413cSKent Overstreet darray_for_each(iter->folios, fi) { 11509567413cSKent Overstreet ractl->_nr_pages -= 1U << folio_order(*fi); 115170d41c9eSKent Overstreet __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); 11529567413cSKent Overstreet folio_put(*fi); 11539567413cSKent Overstreet folio_put(*fi); 11541c6fdbd8SKent Overstreet } 11551c6fdbd8SKent Overstreet 11561c6fdbd8SKent Overstreet return 0; 11571c6fdbd8SKent Overstreet } 11581c6fdbd8SKent Overstreet 11599567413cSKent Overstreet static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) 11601c6fdbd8SKent Overstreet { 11619567413cSKent Overstreet if (iter->idx >= iter->folios.nr) 11621c6fdbd8SKent Overstreet return NULL; 11639567413cSKent Overstreet return iter->folios.data[iter->idx]; 11649567413cSKent Overstreet } 11651c6fdbd8SKent Overstreet 11669567413cSKent Overstreet static inline void readpage_iter_advance(struct readpages_iter *iter) 11679567413cSKent Overstreet { 11689567413cSKent Overstreet iter->idx++; 11691c6fdbd8SKent Overstreet } 11701c6fdbd8SKent Overstreet 117135189e09SKent Overstreet static bool extent_partial_reads_expensive(struct bkey_s_c k) 117235189e09SKent Overstreet { 117335189e09SKent Overstreet struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 117435189e09SKent Overstreet struct bch_extent_crc_unpacked crc; 117535189e09SKent Overstreet const union bch_extent_entry *i; 117635189e09SKent Overstreet 117735189e09SKent Overstreet bkey_for_each_crc(k.k, ptrs, crc, i) 117835189e09SKent Overstreet if (crc.csum_type || crc.compression_type) 117935189e09SKent Overstreet return true; 118035189e09SKent Overstreet return false; 118135189e09SKent Overstreet } 118235189e09SKent Overstreet 118370d41c9eSKent Overstreet static int readpage_bio_extend(struct btree_trans *trans, 118470d41c9eSKent Overstreet struct readpages_iter *iter, 118576426098SKent Overstreet struct bio *bio, 118676426098SKent Overstreet unsigned sectors_this_extent, 11871c6fdbd8SKent Overstreet bool get_more) 11881c6fdbd8SKent Overstreet { 118970d41c9eSKent Overstreet /* Don't hold btree locks while allocating memory: */ 119070d41c9eSKent Overstreet bch2_trans_unlock(trans); 119170d41c9eSKent Overstreet 119276426098SKent Overstreet while (bio_sectors(bio) < sectors_this_extent && 11931c6fdbd8SKent Overstreet bio->bi_vcnt < bio->bi_max_vecs) { 11949567413cSKent Overstreet struct folio *folio = readpage_iter_peek(iter); 11951c6fdbd8SKent Overstreet int ret; 11961c6fdbd8SKent Overstreet 119730bff594SKent Overstreet if (folio) { 11989567413cSKent Overstreet readpage_iter_advance(iter); 11991c6fdbd8SKent Overstreet } else { 12009567413cSKent Overstreet pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; 12019567413cSKent Overstreet 12021c6fdbd8SKent Overstreet if (!get_more) 12031c6fdbd8SKent Overstreet break; 12041c6fdbd8SKent Overstreet 120530bff594SKent Overstreet folio = xa_load(&iter->mapping->i_pages, folio_offset); 120630bff594SKent Overstreet if (folio && !xa_is_value(folio)) 12071c6fdbd8SKent Overstreet break; 12081c6fdbd8SKent Overstreet 120930bff594SKent Overstreet folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); 121030bff594SKent Overstreet if (!folio) 12111c6fdbd8SKent Overstreet break; 12121c6fdbd8SKent Overstreet 121370d41c9eSKent Overstreet if (!__bch2_folio_create(folio, GFP_KERNEL)) { 121430bff594SKent Overstreet folio_put(folio); 1215f57a6a5dSKent Overstreet break; 1216f57a6a5dSKent Overstreet } 12171c6fdbd8SKent Overstreet 121870d41c9eSKent Overstreet ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); 12191c6fdbd8SKent Overstreet if (ret) { 122030bff594SKent Overstreet __bch2_folio_release(folio); 122130bff594SKent Overstreet folio_put(folio); 12221c6fdbd8SKent Overstreet break; 12231c6fdbd8SKent Overstreet } 12241c6fdbd8SKent Overstreet 122530bff594SKent Overstreet folio_put(folio); 12261c6fdbd8SKent Overstreet } 12271c6fdbd8SKent Overstreet 12289567413cSKent Overstreet BUG_ON(folio_sector(folio) != bio_end_sector(bio)); 12299567413cSKent Overstreet 123030bff594SKent Overstreet BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); 12311c6fdbd8SKent Overstreet } 123270d41c9eSKent Overstreet 123370d41c9eSKent Overstreet return bch2_trans_relock(trans); 12341c6fdbd8SKent Overstreet } 12351c6fdbd8SKent Overstreet 12368c6d298aSKent Overstreet static void bchfs_read(struct btree_trans *trans, 12378c6d298aSKent Overstreet struct bch_read_bio *rbio, 12388c6d298aSKent Overstreet subvol_inum inum, 12391c6fdbd8SKent Overstreet struct readpages_iter *readpages_iter) 12401c6fdbd8SKent Overstreet { 12410f238367SKent Overstreet struct bch_fs *c = trans->c; 12428c6d298aSKent Overstreet struct btree_iter iter; 124307a1006aSKent Overstreet struct bkey_buf sk; 12441c6fdbd8SKent Overstreet int flags = BCH_READ_RETRY_IF_STALE| 12451c6fdbd8SKent Overstreet BCH_READ_MAY_PROMOTE; 12468c6d298aSKent Overstreet u32 snapshot; 124776426098SKent Overstreet int ret = 0; 12481c6fdbd8SKent Overstreet 12491c6fdbd8SKent Overstreet rbio->c = c; 12501c6fdbd8SKent Overstreet rbio->start_time = local_clock(); 12518c6d298aSKent Overstreet rbio->subvol = inum.subvol; 125235189e09SKent Overstreet 125307a1006aSKent Overstreet bch2_bkey_buf_init(&sk); 125476426098SKent Overstreet retry: 1255700c25b3SKent Overstreet bch2_trans_begin(trans); 12568c6d298aSKent Overstreet iter = (struct btree_iter) { NULL }; 1257700c25b3SKent Overstreet 12588c6d298aSKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 12598c6d298aSKent Overstreet if (ret) 12608c6d298aSKent Overstreet goto err; 12618c6d298aSKent Overstreet 12628c6d298aSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 12638c6d298aSKent Overstreet SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), 126457cfdd8bSKent Overstreet BTREE_ITER_SLOTS); 12651c6fdbd8SKent Overstreet while (1) { 12661c6fdbd8SKent Overstreet struct bkey_s_c k; 126776426098SKent Overstreet unsigned bytes, sectors, offset_into_extent; 12685ff75ccbSKent Overstreet enum btree_id data_btree = BTREE_ID_extents; 12691c6fdbd8SKent Overstreet 12703737e0ddSKent Overstreet /* 12713737e0ddSKent Overstreet * read_extent -> io_time_reset may cause a transaction restart 12723737e0ddSKent Overstreet * without returning an error, we need to check for that here: 12733737e0ddSKent Overstreet */ 1274549d173cSKent Overstreet ret = bch2_trans_relock(trans); 1275549d173cSKent Overstreet if (ret) 12763737e0ddSKent Overstreet break; 12773737e0ddSKent Overstreet 12788c6d298aSKent Overstreet bch2_btree_iter_set_pos(&iter, 12798c6d298aSKent Overstreet POS(inum.inum, rbio->bio.bi_iter.bi_sector)); 12801c6fdbd8SKent Overstreet 12818c6d298aSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 128276426098SKent Overstreet ret = bkey_err(k); 128376426098SKent Overstreet if (ret) 128476426098SKent Overstreet break; 12851c6fdbd8SKent Overstreet 12868c6d298aSKent Overstreet offset_into_extent = iter.pos.offset - 128706ed8558SKent Overstreet bkey_start_offset(k.k); 128876426098SKent Overstreet sectors = k.k->size - offset_into_extent; 128976426098SKent Overstreet 129007a1006aSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 129113dcd4abSKent Overstreet 12925ff75ccbSKent Overstreet ret = bch2_read_indirect_extent(trans, &data_btree, 129322d8a33dSYuxuan Shui &offset_into_extent, &sk); 129476426098SKent Overstreet if (ret) 129576426098SKent Overstreet break; 129676426098SKent Overstreet 129713dcd4abSKent Overstreet k = bkey_i_to_s_c(sk.k); 129813dcd4abSKent Overstreet 129976426098SKent Overstreet sectors = min(sectors, k.k->size - offset_into_extent); 130076426098SKent Overstreet 130170d41c9eSKent Overstreet if (readpages_iter) { 130270d41c9eSKent Overstreet ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, 130335189e09SKent Overstreet extent_partial_reads_expensive(k)); 130470d41c9eSKent Overstreet if (ret) 130570d41c9eSKent Overstreet break; 130670d41c9eSKent Overstreet } 13071c6fdbd8SKent Overstreet 130876426098SKent Overstreet bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; 130906ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 13101c6fdbd8SKent Overstreet 131106ed8558SKent Overstreet if (rbio->bio.bi_iter.bi_size == bytes) 13121c6fdbd8SKent Overstreet flags |= BCH_READ_LAST_FRAGMENT; 13131c6fdbd8SKent Overstreet 1314b44a66a6SKent Overstreet bch2_bio_page_state_set(&rbio->bio, k); 13151c6fdbd8SKent Overstreet 13168c6d298aSKent Overstreet bch2_read_extent(trans, rbio, iter.pos, 13175ff75ccbSKent Overstreet data_btree, k, offset_into_extent, flags); 13181c6fdbd8SKent Overstreet 13191c6fdbd8SKent Overstreet if (flags & BCH_READ_LAST_FRAGMENT) 132035189e09SKent Overstreet break; 13211c6fdbd8SKent Overstreet 132206ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 132306ed8558SKent Overstreet bio_advance(&rbio->bio, bytes); 1324084d42bbSKent Overstreet 1325084d42bbSKent Overstreet ret = btree_trans_too_many_iters(trans); 1326084d42bbSKent Overstreet if (ret) 1327084d42bbSKent Overstreet break; 13281c6fdbd8SKent Overstreet } 13298c6d298aSKent Overstreet err: 13308c6d298aSKent Overstreet bch2_trans_iter_exit(trans, &iter); 133176426098SKent Overstreet 1332549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 133376426098SKent Overstreet goto retry; 133476426098SKent Overstreet 133535189e09SKent Overstreet if (ret) { 13367fec8266SKent Overstreet bch_err_inum_offset_ratelimited(c, 13377fec8266SKent Overstreet iter.pos.inode, 13387fec8266SKent Overstreet iter.pos.offset << 9, 13390fefe8d8SKent Overstreet "read error %i from btree lookup", ret); 13400fefe8d8SKent Overstreet rbio->bio.bi_status = BLK_STS_IOERR; 134176426098SKent Overstreet bio_endio(&rbio->bio); 13421c6fdbd8SKent Overstreet } 13431c6fdbd8SKent Overstreet 134407a1006aSKent Overstreet bch2_bkey_buf_exit(&sk, c); 134535189e09SKent Overstreet } 134635189e09SKent Overstreet 13471c6fdbd8SKent Overstreet void bch2_readahead(struct readahead_control *ractl) 13481c6fdbd8SKent Overstreet { 13491c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); 13501c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 135101ad6737SKent Overstreet struct bch_io_opts opts; 1352424eb881SKent Overstreet struct btree_trans trans; 135330bff594SKent Overstreet struct folio *folio; 13541c6fdbd8SKent Overstreet struct readpages_iter readpages_iter; 13551c6fdbd8SKent Overstreet int ret; 13561c6fdbd8SKent Overstreet 135701ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 135801ad6737SKent Overstreet 13591c6fdbd8SKent Overstreet ret = readpages_iter_init(&readpages_iter, ractl); 13601c6fdbd8SKent Overstreet BUG_ON(ret); 13611c6fdbd8SKent Overstreet 136220bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 13631c6fdbd8SKent Overstreet 1364a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 13651c6fdbd8SKent Overstreet 13669567413cSKent Overstreet while ((folio = readpage_iter_peek(&readpages_iter))) { 13671c6fdbd8SKent Overstreet unsigned n = min_t(unsigned, 13689567413cSKent Overstreet readpages_iter.folios.nr - 13691c6fdbd8SKent Overstreet readpages_iter.idx, 13701c6fdbd8SKent Overstreet BIO_MAX_VECS); 13711c6fdbd8SKent Overstreet struct bch_read_bio *rbio = 13721c6fdbd8SKent Overstreet rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, 13735718fda0SKent Overstreet GFP_KERNEL, &c->bio_read), 13741c6fdbd8SKent Overstreet opts); 13751c6fdbd8SKent Overstreet 13769567413cSKent Overstreet readpage_iter_advance(&readpages_iter); 13771c6fdbd8SKent Overstreet 13789567413cSKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 13791c6fdbd8SKent Overstreet rbio->bio.bi_end_io = bch2_readpages_end_io; 138030bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 13811c6fdbd8SKent Overstreet 13828c6d298aSKent Overstreet bchfs_read(&trans, rbio, inode_inum(inode), 13830f238367SKent Overstreet &readpages_iter); 13845718fda0SKent Overstreet bch2_trans_unlock(&trans); 13851c6fdbd8SKent Overstreet } 13861c6fdbd8SKent Overstreet 1387a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1388424eb881SKent Overstreet 1389424eb881SKent Overstreet bch2_trans_exit(&trans); 13909567413cSKent Overstreet darray_exit(&readpages_iter.folios); 13911c6fdbd8SKent Overstreet } 13921c6fdbd8SKent Overstreet 139330bff594SKent Overstreet static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, 139430bff594SKent Overstreet subvol_inum inum, struct folio *folio) 13951c6fdbd8SKent Overstreet { 1396424eb881SKent Overstreet struct btree_trans trans; 13971c6fdbd8SKent Overstreet 139830bff594SKent Overstreet bch2_folio_create(folio, __GFP_NOFAIL); 13991c6fdbd8SKent Overstreet 14001c6fdbd8SKent Overstreet rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; 140130bff594SKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 140230bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 14031c6fdbd8SKent Overstreet 140420bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 14058c6d298aSKent Overstreet bchfs_read(&trans, rbio, inum, NULL); 1406424eb881SKent Overstreet bch2_trans_exit(&trans); 14071c6fdbd8SKent Overstreet } 14081c6fdbd8SKent Overstreet 140930bff594SKent Overstreet static void bch2_read_single_folio_end_io(struct bio *bio) 14101c6fdbd8SKent Overstreet { 14111c6fdbd8SKent Overstreet complete(bio->bi_private); 14121c6fdbd8SKent Overstreet } 14131c6fdbd8SKent Overstreet 141430bff594SKent Overstreet static int bch2_read_single_folio(struct folio *folio, 14151c6fdbd8SKent Overstreet struct address_space *mapping) 14161c6fdbd8SKent Overstreet { 14171c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 14181c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 14191c6fdbd8SKent Overstreet struct bch_read_bio *rbio; 142001ad6737SKent Overstreet struct bch_io_opts opts; 14211c6fdbd8SKent Overstreet int ret; 14221c6fdbd8SKent Overstreet DECLARE_COMPLETION_ONSTACK(done); 14231c6fdbd8SKent Overstreet 142401ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 142501ad6737SKent Overstreet 14265718fda0SKent Overstreet rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), 142701ad6737SKent Overstreet opts); 14281c6fdbd8SKent Overstreet rbio->bio.bi_private = &done; 142930bff594SKent Overstreet rbio->bio.bi_end_io = bch2_read_single_folio_end_io; 14301c6fdbd8SKent Overstreet 143130bff594SKent Overstreet __bchfs_readfolio(c, rbio, inode_inum(inode), folio); 14321c6fdbd8SKent Overstreet wait_for_completion(&done); 14331c6fdbd8SKent Overstreet 14341c6fdbd8SKent Overstreet ret = blk_status_to_errno(rbio->bio.bi_status); 14351c6fdbd8SKent Overstreet bio_put(&rbio->bio); 14361c6fdbd8SKent Overstreet 14371c6fdbd8SKent Overstreet if (ret < 0) 14381c6fdbd8SKent Overstreet return ret; 14391c6fdbd8SKent Overstreet 144030bff594SKent Overstreet folio_mark_uptodate(folio); 14411c6fdbd8SKent Overstreet return 0; 14421c6fdbd8SKent Overstreet } 14431c6fdbd8SKent Overstreet 14441c6fdbd8SKent Overstreet int bch2_read_folio(struct file *file, struct folio *folio) 14451c6fdbd8SKent Overstreet { 14461c6fdbd8SKent Overstreet int ret; 14471c6fdbd8SKent Overstreet 144830bff594SKent Overstreet ret = bch2_read_single_folio(folio, folio->mapping); 14491c6fdbd8SKent Overstreet folio_unlock(folio); 14505c1ef830SKent Overstreet return bch2_err_class(ret); 14511c6fdbd8SKent Overstreet } 14521c6fdbd8SKent Overstreet 14531c6fdbd8SKent Overstreet /* writepages: */ 14541c6fdbd8SKent Overstreet 14551c6fdbd8SKent Overstreet struct bch_writepage_state { 14561c6fdbd8SKent Overstreet struct bch_writepage_io *io; 14571c6fdbd8SKent Overstreet struct bch_io_opts opts; 145849fe78ffSKent Overstreet struct bch_folio_sector *tmp; 145949fe78ffSKent Overstreet unsigned tmp_sectors; 14601c6fdbd8SKent Overstreet }; 14611c6fdbd8SKent Overstreet 14621c6fdbd8SKent Overstreet static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, 14631c6fdbd8SKent Overstreet struct bch_inode_info *inode) 14641c6fdbd8SKent Overstreet { 146501ad6737SKent Overstreet struct bch_writepage_state ret = { 0 }; 146601ad6737SKent Overstreet 146701ad6737SKent Overstreet bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); 146801ad6737SKent Overstreet return ret; 14691c6fdbd8SKent Overstreet } 14701c6fdbd8SKent Overstreet 14719f311f21SKent Overstreet static void bch2_writepage_io_done(struct bch_write_op *op) 14721c6fdbd8SKent Overstreet { 14739f311f21SKent Overstreet struct bch_writepage_io *io = 14749f311f21SKent Overstreet container_of(op, struct bch_writepage_io, op); 14759a3df993SKent Overstreet struct bch_fs *c = io->op.c; 14769a3df993SKent Overstreet struct bio *bio = &io->op.wbio.bio; 1477ff9c301fSKent Overstreet struct folio_iter fi; 1478b3fce09cSKent Overstreet unsigned i; 14791c6fdbd8SKent Overstreet 14809a3df993SKent Overstreet if (io->op.error) { 148133c74e41SKent Overstreet set_bit(EI_INODE_ERROR, &io->inode->ei_flags); 148233c74e41SKent Overstreet 1483ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 14843342ac13SKent Overstreet struct bch_folio *s; 1485b3fce09cSKent Overstreet 1486ff9c301fSKent Overstreet folio_set_error(fi.folio); 1487ff9c301fSKent Overstreet mapping_set_error(fi.folio->mapping, -EIO); 1488b3fce09cSKent Overstreet 1489ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 14903826ee0bSKent Overstreet spin_lock(&s->lock); 1491ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 1492b3fce09cSKent Overstreet s->s[i].nr_replicas = 0; 14933826ee0bSKent Overstreet spin_unlock(&s->lock); 149475812e70SKent Overstreet } 14951c6fdbd8SKent Overstreet } 14961c6fdbd8SKent Overstreet 14974be1a412SKent Overstreet if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { 1498ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 14993342ac13SKent Overstreet struct bch_folio *s; 15004be1a412SKent Overstreet 1501ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 15024be1a412SKent Overstreet spin_lock(&s->lock); 1503ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 15044be1a412SKent Overstreet s->s[i].nr_replicas = 0; 15054be1a412SKent Overstreet spin_unlock(&s->lock); 15064be1a412SKent Overstreet } 15074be1a412SKent Overstreet } 15084be1a412SKent Overstreet 15091c6fdbd8SKent Overstreet /* 15101c6fdbd8SKent Overstreet * racing with fallocate can cause us to add fewer sectors than 15111c6fdbd8SKent Overstreet * expected - but we shouldn't add more sectors than expected: 15121c6fdbd8SKent Overstreet */ 1513f8494d25SKent Overstreet WARN_ON_ONCE(io->op.i_sectors_delta > 0); 15141c6fdbd8SKent Overstreet 15151c6fdbd8SKent Overstreet /* 15161c6fdbd8SKent Overstreet * (error (due to going RO) halfway through a page can screw that up 15171c6fdbd8SKent Overstreet * slightly) 15181c6fdbd8SKent Overstreet * XXX wtf? 15199a3df993SKent Overstreet BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); 15201c6fdbd8SKent Overstreet */ 15211c6fdbd8SKent Overstreet 15221c6fdbd8SKent Overstreet /* 15231c6fdbd8SKent Overstreet * PageWriteback is effectively our ref on the inode - fixup i_blocks 15241c6fdbd8SKent Overstreet * before calling end_page_writeback: 15251c6fdbd8SKent Overstreet */ 15269a3df993SKent Overstreet i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); 15271c6fdbd8SKent Overstreet 1528ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 1529ff9c301fSKent Overstreet struct bch_folio *s = __bch2_folio(fi.folio); 15307f5e31e1SKent Overstreet 15317f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 1532ff9c301fSKent Overstreet folio_end_writeback(fi.folio); 15337f5e31e1SKent Overstreet } 15341c6fdbd8SKent Overstreet 15359f311f21SKent Overstreet bio_put(&io->op.wbio.bio); 15361c6fdbd8SKent Overstreet } 15371c6fdbd8SKent Overstreet 15381c6fdbd8SKent Overstreet static void bch2_writepage_do_io(struct bch_writepage_state *w) 15391c6fdbd8SKent Overstreet { 15401c6fdbd8SKent Overstreet struct bch_writepage_io *io = w->io; 15411c6fdbd8SKent Overstreet 15421c6fdbd8SKent Overstreet w->io = NULL; 15439f311f21SKent Overstreet closure_call(&io->op.cl, bch2_write, NULL, NULL); 15441c6fdbd8SKent Overstreet } 15451c6fdbd8SKent Overstreet 15461c6fdbd8SKent Overstreet /* 15471c6fdbd8SKent Overstreet * Get a bch_writepage_io and add @page to it - appending to an existing one if 15481c6fdbd8SKent Overstreet * possible, else allocating a new one: 15491c6fdbd8SKent Overstreet */ 15501c6fdbd8SKent Overstreet static void bch2_writepage_io_alloc(struct bch_fs *c, 155150fe5bd6SKent Overstreet struct writeback_control *wbc, 15521c6fdbd8SKent Overstreet struct bch_writepage_state *w, 15531c6fdbd8SKent Overstreet struct bch_inode_info *inode, 15547f5e31e1SKent Overstreet u64 sector, 15551c6fdbd8SKent Overstreet unsigned nr_replicas) 15561c6fdbd8SKent Overstreet { 15571c6fdbd8SKent Overstreet struct bch_write_op *op; 15581c6fdbd8SKent Overstreet 15591c6fdbd8SKent Overstreet w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, 15601c6fdbd8SKent Overstreet REQ_OP_WRITE, 15615718fda0SKent Overstreet GFP_KERNEL, 15621c6fdbd8SKent Overstreet &c->writepage_bioset), 15639a3df993SKent Overstreet struct bch_writepage_io, op.wbio.bio); 15641c6fdbd8SKent Overstreet 15659a3df993SKent Overstreet w->io->inode = inode; 15669a3df993SKent Overstreet op = &w->io->op; 15679a3df993SKent Overstreet bch2_write_op_init(op, c, w->opts); 15689a3df993SKent Overstreet op->target = w->opts.foreground_target; 15691c6fdbd8SKent Overstreet op->nr_replicas = nr_replicas; 15701c6fdbd8SKent Overstreet op->res.nr_replicas = nr_replicas; 15711c6fdbd8SKent Overstreet op->write_point = writepoint_hashed(inode->ei_last_dirtied); 15728c6d298aSKent Overstreet op->subvol = inode->ei_subvol; 15737f5e31e1SKent Overstreet op->pos = POS(inode->v.i_ino, sector); 15749f311f21SKent Overstreet op->end_io = bch2_writepage_io_done; 1575a8b3a677SKent Overstreet op->devs_need_flush = &inode->ei_devs_need_flush; 15767f5e31e1SKent Overstreet op->wbio.bio.bi_iter.bi_sector = sector; 157750fe5bd6SKent Overstreet op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); 15781c6fdbd8SKent Overstreet } 15791c6fdbd8SKent Overstreet 15801c6fdbd8SKent Overstreet static int __bch2_writepage(struct folio *folio, 15811c6fdbd8SKent Overstreet struct writeback_control *wbc, 15821c6fdbd8SKent Overstreet void *data) 15831c6fdbd8SKent Overstreet { 158430bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 15851c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 15861c6fdbd8SKent Overstreet struct bch_writepage_state *w = data; 158749fe78ffSKent Overstreet struct bch_folio *s; 158830bff594SKent Overstreet unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; 15891c6fdbd8SKent Overstreet loff_t i_size = i_size_read(&inode->v); 1590e1036a2aSKent Overstreet int ret; 15911c6fdbd8SKent Overstreet 159230bff594SKent Overstreet EBUG_ON(!folio_test_uptodate(folio)); 15931c6fdbd8SKent Overstreet 159430bff594SKent Overstreet /* Is the folio fully inside i_size? */ 159533e2eb96SKent Overstreet if (folio_end_pos(folio) <= i_size) 15961c6fdbd8SKent Overstreet goto do_io; 15971c6fdbd8SKent Overstreet 159830bff594SKent Overstreet /* Is the folio fully outside i_size? (truncate in progress) */ 159933e2eb96SKent Overstreet if (folio_pos(folio) >= i_size) { 160030bff594SKent Overstreet folio_unlock(folio); 16011c6fdbd8SKent Overstreet return 0; 16021c6fdbd8SKent Overstreet } 16031c6fdbd8SKent Overstreet 16041c6fdbd8SKent Overstreet /* 160530bff594SKent Overstreet * The folio straddles i_size. It must be zeroed out on each and every 16061c6fdbd8SKent Overstreet * writepage invocation because it may be mmapped. "A file is mapped 160730bff594SKent Overstreet * in multiples of the folio size. For a file that is not a multiple of 160830bff594SKent Overstreet * the folio size, the remaining memory is zeroed when mapped, and 16091c6fdbd8SKent Overstreet * writes to that region are not written out to the file." 16101c6fdbd8SKent Overstreet */ 161133e2eb96SKent Overstreet folio_zero_segment(folio, 161233e2eb96SKent Overstreet i_size - folio_pos(folio), 161333e2eb96SKent Overstreet folio_size(folio)); 16141c6fdbd8SKent Overstreet do_io: 161530bff594SKent Overstreet f_sectors = folio_sectors(folio); 161670d41c9eSKent Overstreet s = bch2_folio(folio); 1617f81b648dSKent Overstreet 161849fe78ffSKent Overstreet if (f_sectors > w->tmp_sectors) { 161949fe78ffSKent Overstreet kfree(w->tmp); 162049fe78ffSKent Overstreet w->tmp = kzalloc(sizeof(struct bch_folio_sector) * 162149fe78ffSKent Overstreet f_sectors, __GFP_NOFAIL); 162249fe78ffSKent Overstreet w->tmp_sectors = f_sectors; 162349fe78ffSKent Overstreet } 162449fe78ffSKent Overstreet 1625f74a5051SKent Overstreet /* 1626f74a5051SKent Overstreet * Things get really hairy with errors during writeback: 1627f74a5051SKent Overstreet */ 162830bff594SKent Overstreet ret = bch2_get_folio_disk_reservation(c, inode, folio, false); 1629f74a5051SKent Overstreet BUG_ON(ret); 1630e1036a2aSKent Overstreet 16317f5e31e1SKent Overstreet /* Before unlocking the page, get copy of reservations: */ 1632f74a5051SKent Overstreet spin_lock(&s->lock); 163349fe78ffSKent Overstreet memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); 16347f5e31e1SKent Overstreet 163530bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1636a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 16377f5e31e1SKent Overstreet continue; 16387f5e31e1SKent Overstreet 1639f81b648dSKent Overstreet nr_replicas_this_write = 1640f57a6a5dSKent Overstreet min_t(unsigned, nr_replicas_this_write, 1641f57a6a5dSKent Overstreet s->s[i].nr_replicas + 1642f57a6a5dSKent Overstreet s->s[i].replicas_reserved); 16437f5e31e1SKent Overstreet } 16441c6fdbd8SKent Overstreet 164530bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1646a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 16477f5e31e1SKent Overstreet continue; 16487f5e31e1SKent Overstreet 1649f57a6a5dSKent Overstreet s->s[i].nr_replicas = w->opts.compression 1650f57a6a5dSKent Overstreet ? 0 : nr_replicas_this_write; 1651e1036a2aSKent Overstreet 1652f57a6a5dSKent Overstreet s->s[i].replicas_reserved = 0; 1653a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_allocated); 1654f57a6a5dSKent Overstreet } 1655a1774a05SKent Overstreet spin_unlock(&s->lock); 16561c6fdbd8SKent Overstreet 16577f5e31e1SKent Overstreet BUG_ON(atomic_read(&s->write_count)); 16587f5e31e1SKent Overstreet atomic_set(&s->write_count, 1); 16597f5e31e1SKent Overstreet 166030bff594SKent Overstreet BUG_ON(folio_test_writeback(folio)); 166130bff594SKent Overstreet folio_start_writeback(folio); 16627f5e31e1SKent Overstreet 166330bff594SKent Overstreet folio_unlock(folio); 16641c6fdbd8SKent Overstreet 16657f5e31e1SKent Overstreet offset = 0; 16667f5e31e1SKent Overstreet while (1) { 1667f74a5051SKent Overstreet unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; 16687f5e31e1SKent Overstreet u64 sector; 16697f5e31e1SKent Overstreet 167030bff594SKent Overstreet while (offset < f_sectors && 1671a1774a05SKent Overstreet w->tmp[offset].state < SECTOR_dirty) 16727f5e31e1SKent Overstreet offset++; 16737f5e31e1SKent Overstreet 167430bff594SKent Overstreet if (offset == f_sectors) 16757f5e31e1SKent Overstreet break; 16767f5e31e1SKent Overstreet 167730bff594SKent Overstreet while (offset + sectors < f_sectors && 1678a1774a05SKent Overstreet w->tmp[offset + sectors].state >= SECTOR_dirty) { 167949fe78ffSKent Overstreet reserved_sectors += w->tmp[offset + sectors].replicas_reserved; 1680a1774a05SKent Overstreet dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; 16817f5e31e1SKent Overstreet sectors++; 16827f5e31e1SKent Overstreet } 1683f74a5051SKent Overstreet BUG_ON(!sectors); 1684f74a5051SKent Overstreet 168530bff594SKent Overstreet sector = folio_sector(folio) + offset; 16867f5e31e1SKent Overstreet 16871c6fdbd8SKent Overstreet if (w->io && 16889a3df993SKent Overstreet (w->io->op.res.nr_replicas != nr_replicas_this_write || 168933e2eb96SKent Overstreet bio_full(&w->io->op.wbio.bio, sectors << 9) || 1690f59b3464SKent Overstreet w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= 1691f59b3464SKent Overstreet (BIO_MAX_VECS * PAGE_SIZE) || 16929a3df993SKent Overstreet bio_end_sector(&w->io->op.wbio.bio) != sector)) 16931c6fdbd8SKent Overstreet bch2_writepage_do_io(w); 16941c6fdbd8SKent Overstreet 16951c6fdbd8SKent Overstreet if (!w->io) 169650fe5bd6SKent Overstreet bch2_writepage_io_alloc(c, wbc, w, inode, sector, 1697f81b648dSKent Overstreet nr_replicas_this_write); 16981c6fdbd8SKent Overstreet 16997f5e31e1SKent Overstreet atomic_inc(&s->write_count); 17007f5e31e1SKent Overstreet 17019a3df993SKent Overstreet BUG_ON(inode != w->io->inode); 170230bff594SKent Overstreet BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, 17037f5e31e1SKent Overstreet sectors << 9, offset << 9)); 17041c6fdbd8SKent Overstreet 17056cc3535dSKent Overstreet /* Check for writing past i_size: */ 17068eb71e9eSKent Overstreet WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > 170780fe580cSKent Overstreet round_up(i_size, block_bytes(c)) && 17088eb71e9eSKent Overstreet !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), 17098eb71e9eSKent Overstreet "writing past i_size: %llu > %llu (unrounded %llu)\n", 17108eb71e9eSKent Overstreet bio_end_sector(&w->io->op.wbio.bio) << 9, 17118eb71e9eSKent Overstreet round_up(i_size, block_bytes(c)), 17128eb71e9eSKent Overstreet i_size); 17136cc3535dSKent Overstreet 17149a3df993SKent Overstreet w->io->op.res.sectors += reserved_sectors; 17159a3df993SKent Overstreet w->io->op.i_sectors_delta -= dirty_sectors; 17161c6fdbd8SKent Overstreet w->io->op.new_i_size = i_size; 17171c6fdbd8SKent Overstreet 17187f5e31e1SKent Overstreet offset += sectors; 17197f5e31e1SKent Overstreet } 17207f5e31e1SKent Overstreet 17217f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 172230bff594SKent Overstreet folio_end_writeback(folio); 17237f5e31e1SKent Overstreet 17241c6fdbd8SKent Overstreet return 0; 17251c6fdbd8SKent Overstreet } 17261c6fdbd8SKent Overstreet 17271c6fdbd8SKent Overstreet int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) 17281c6fdbd8SKent Overstreet { 17291c6fdbd8SKent Overstreet struct bch_fs *c = mapping->host->i_sb->s_fs_info; 17301c6fdbd8SKent Overstreet struct bch_writepage_state w = 17311c6fdbd8SKent Overstreet bch_writepage_state_init(c, to_bch_ei(mapping->host)); 17321c6fdbd8SKent Overstreet struct blk_plug plug; 17331c6fdbd8SKent Overstreet int ret; 17341c6fdbd8SKent Overstreet 17351c6fdbd8SKent Overstreet blk_start_plug(&plug); 17361c6fdbd8SKent Overstreet ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); 17371c6fdbd8SKent Overstreet if (w.io) 17381c6fdbd8SKent Overstreet bch2_writepage_do_io(&w); 17391c6fdbd8SKent Overstreet blk_finish_plug(&plug); 174049fe78ffSKent Overstreet kfree(w.tmp); 17415c1ef830SKent Overstreet return bch2_err_class(ret); 17421c6fdbd8SKent Overstreet } 17431c6fdbd8SKent Overstreet 17441c6fdbd8SKent Overstreet /* buffered writes: */ 17451c6fdbd8SKent Overstreet 17461c6fdbd8SKent Overstreet int bch2_write_begin(struct file *file, struct address_space *mapping, 17471c6fdbd8SKent Overstreet loff_t pos, unsigned len, 17481c6fdbd8SKent Overstreet struct page **pagep, void **fsdata) 17491c6fdbd8SKent Overstreet { 17501c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 17511c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 175230bff594SKent Overstreet struct bch2_folio_reservation *res; 175330bff594SKent Overstreet struct folio *folio; 175433e2eb96SKent Overstreet unsigned offset; 17551c6fdbd8SKent Overstreet int ret = -ENOMEM; 17561c6fdbd8SKent Overstreet 1757d1542e03SKent Overstreet res = kmalloc(sizeof(*res), GFP_KERNEL); 1758d1542e03SKent Overstreet if (!res) 1759d1542e03SKent Overstreet return -ENOMEM; 1760d1542e03SKent Overstreet 176130bff594SKent Overstreet bch2_folio_reservation_init(c, inode, res); 1762d1542e03SKent Overstreet *fsdata = res; 17631c6fdbd8SKent Overstreet 1764a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 17651c6fdbd8SKent Overstreet 176633e2eb96SKent Overstreet folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, 176730bff594SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, 176830bff594SKent Overstreet mapping_gfp_mask(mapping)); 1769b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) 17701c6fdbd8SKent Overstreet goto err_unlock; 17711c6fdbd8SKent Overstreet 177230bff594SKent Overstreet if (folio_test_uptodate(folio)) 17731c6fdbd8SKent Overstreet goto out; 17741c6fdbd8SKent Overstreet 177533e2eb96SKent Overstreet offset = pos - folio_pos(folio); 177633e2eb96SKent Overstreet len = min_t(size_t, len, folio_end_pos(folio) - pos); 177733e2eb96SKent Overstreet 177830bff594SKent Overstreet /* If we're writing entire folio, don't need to read it in first: */ 177933e2eb96SKent Overstreet if (!offset && len == folio_size(folio)) 17801c6fdbd8SKent Overstreet goto out; 17811c6fdbd8SKent Overstreet 17821c6fdbd8SKent Overstreet if (!offset && pos + len >= inode->v.i_size) { 178330bff594SKent Overstreet folio_zero_segment(folio, len, folio_size(folio)); 178430bff594SKent Overstreet flush_dcache_folio(folio); 17851c6fdbd8SKent Overstreet goto out; 17861c6fdbd8SKent Overstreet } 17871c6fdbd8SKent Overstreet 178833e2eb96SKent Overstreet if (folio_pos(folio) >= inode->v.i_size) { 178930bff594SKent Overstreet folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); 179030bff594SKent Overstreet flush_dcache_folio(folio); 17911c6fdbd8SKent Overstreet goto out; 17921c6fdbd8SKent Overstreet } 17931c6fdbd8SKent Overstreet readpage: 179430bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 17951c6fdbd8SKent Overstreet if (ret) 17961c6fdbd8SKent Overstreet goto err; 17971c6fdbd8SKent Overstreet out: 179830bff594SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 1799e6ec361fSKent Overstreet if (ret) 18003a4d3656SKent Overstreet goto err; 1801e6ec361fSKent Overstreet 180230bff594SKent Overstreet ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); 18031c6fdbd8SKent Overstreet if (ret) { 180430bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 18051c6fdbd8SKent Overstreet /* 180630bff594SKent Overstreet * If the folio hasn't been read in, we won't know if we 18071c6fdbd8SKent Overstreet * actually need a reservation - we don't actually need 180830bff594SKent Overstreet * to read here, we just need to check if the folio is 18091c6fdbd8SKent Overstreet * fully backed by uncompressed data: 18101c6fdbd8SKent Overstreet */ 18111c6fdbd8SKent Overstreet goto readpage; 18121c6fdbd8SKent Overstreet } 18131c6fdbd8SKent Overstreet 18141c6fdbd8SKent Overstreet goto err; 18151c6fdbd8SKent Overstreet } 18161c6fdbd8SKent Overstreet 181730bff594SKent Overstreet *pagep = &folio->page; 18181c6fdbd8SKent Overstreet return 0; 18191c6fdbd8SKent Overstreet err: 182030bff594SKent Overstreet folio_unlock(folio); 182130bff594SKent Overstreet folio_put(folio); 18221c6fdbd8SKent Overstreet *pagep = NULL; 18231c6fdbd8SKent Overstreet err_unlock: 1824a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1825d1542e03SKent Overstreet kfree(res); 1826d1542e03SKent Overstreet *fsdata = NULL; 18275c1ef830SKent Overstreet return bch2_err_class(ret); 18281c6fdbd8SKent Overstreet } 18291c6fdbd8SKent Overstreet 18301c6fdbd8SKent Overstreet int bch2_write_end(struct file *file, struct address_space *mapping, 18311c6fdbd8SKent Overstreet loff_t pos, unsigned len, unsigned copied, 18321c6fdbd8SKent Overstreet struct page *page, void *fsdata) 18331c6fdbd8SKent Overstreet { 18341c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 18351c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 183630bff594SKent Overstreet struct bch2_folio_reservation *res = fsdata; 183730bff594SKent Overstreet struct folio *folio = page_folio(page); 183833e2eb96SKent Overstreet unsigned offset = pos - folio_pos(folio); 18391c6fdbd8SKent Overstreet 18401c6fdbd8SKent Overstreet lockdep_assert_held(&inode->v.i_rwsem); 184133e2eb96SKent Overstreet BUG_ON(offset + copied > folio_size(folio)); 18421c6fdbd8SKent Overstreet 184330bff594SKent Overstreet if (unlikely(copied < len && !folio_test_uptodate(folio))) { 18441c6fdbd8SKent Overstreet /* 184530bff594SKent Overstreet * The folio needs to be read in, but that would destroy 18461c6fdbd8SKent Overstreet * our partial write - simplest thing is to just force 18471c6fdbd8SKent Overstreet * userspace to redo the write: 18481c6fdbd8SKent Overstreet */ 184930bff594SKent Overstreet folio_zero_range(folio, 0, folio_size(folio)); 185030bff594SKent Overstreet flush_dcache_folio(folio); 18511c6fdbd8SKent Overstreet copied = 0; 18521c6fdbd8SKent Overstreet } 18531c6fdbd8SKent Overstreet 18541c6fdbd8SKent Overstreet spin_lock(&inode->v.i_lock); 18551c6fdbd8SKent Overstreet if (pos + copied > inode->v.i_size) 18561c6fdbd8SKent Overstreet i_size_write(&inode->v, pos + copied); 18571c6fdbd8SKent Overstreet spin_unlock(&inode->v.i_lock); 18581c6fdbd8SKent Overstreet 18591c6fdbd8SKent Overstreet if (copied) { 186030bff594SKent Overstreet if (!folio_test_uptodate(folio)) 186130bff594SKent Overstreet folio_mark_uptodate(folio); 1862d1542e03SKent Overstreet 186330bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, res, offset, copied); 18641c6fdbd8SKent Overstreet 18651c6fdbd8SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 18661c6fdbd8SKent Overstreet } 18671c6fdbd8SKent Overstreet 186830bff594SKent Overstreet folio_unlock(folio); 186930bff594SKent Overstreet folio_put(folio); 1870a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 18711c6fdbd8SKent Overstreet 187230bff594SKent Overstreet bch2_folio_reservation_put(c, inode, res); 1873d1542e03SKent Overstreet kfree(res); 1874d1542e03SKent Overstreet 18751c6fdbd8SKent Overstreet return copied; 18761c6fdbd8SKent Overstreet } 18771c6fdbd8SKent Overstreet 1878c42b57c4SKent Overstreet static noinline void folios_trunc(folios *folios, struct folio **fi) 1879c42b57c4SKent Overstreet { 1880c42b57c4SKent Overstreet while (folios->data + folios->nr > fi) { 1881c42b57c4SKent Overstreet struct folio *f = darray_pop(folios); 1882c42b57c4SKent Overstreet 1883c42b57c4SKent Overstreet folio_unlock(f); 1884c42b57c4SKent Overstreet folio_put(f); 1885c42b57c4SKent Overstreet } 1886c42b57c4SKent Overstreet } 18871c6fdbd8SKent Overstreet 18881c6fdbd8SKent Overstreet static int __bch2_buffered_write(struct bch_inode_info *inode, 18891c6fdbd8SKent Overstreet struct address_space *mapping, 18901c6fdbd8SKent Overstreet struct iov_iter *iter, 18911c6fdbd8SKent Overstreet loff_t pos, unsigned len) 18921c6fdbd8SKent Overstreet { 18931c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 189430bff594SKent Overstreet struct bch2_folio_reservation res; 1895c42b57c4SKent Overstreet folios folios; 1896c42b57c4SKent Overstreet struct folio **fi, *f; 1897c42b57c4SKent Overstreet unsigned copied = 0, f_offset; 18986b9857b2SBrian Foster u64 end = pos + len, f_pos; 1899335f7d4fSBrian Foster loff_t last_folio_pos = inode->v.i_size; 19001c6fdbd8SKent Overstreet int ret = 0; 19011c6fdbd8SKent Overstreet 19021c6fdbd8SKent Overstreet BUG_ON(!len); 19031c6fdbd8SKent Overstreet 190430bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1905c42b57c4SKent Overstreet darray_init(&folios); 1906d1542e03SKent Overstreet 190740022c01SKent Overstreet ret = filemap_get_contig_folios_d(mapping, pos, end, 190840022c01SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, 190940022c01SKent Overstreet mapping_gfp_mask(mapping), 191040022c01SKent Overstreet &folios); 191140022c01SKent Overstreet if (ret) 19121c6fdbd8SKent Overstreet goto out; 191340022c01SKent Overstreet 191440022c01SKent Overstreet BUG_ON(!folios.nr); 191540022c01SKent Overstreet 1916c42b57c4SKent Overstreet f = darray_first(folios); 1917c42b57c4SKent Overstreet if (pos != folio_pos(f) && !folio_test_uptodate(f)) { 1918c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 19191c6fdbd8SKent Overstreet if (ret) 19201c6fdbd8SKent Overstreet goto out; 19211c6fdbd8SKent Overstreet } 19221c6fdbd8SKent Overstreet 1923c42b57c4SKent Overstreet f = darray_last(folios); 1924335f7d4fSBrian Foster end = min(end, folio_end_pos(f)); 1925335f7d4fSBrian Foster last_folio_pos = folio_pos(f); 1926c42b57c4SKent Overstreet if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { 1927c42b57c4SKent Overstreet if (end >= inode->v.i_size) { 1928c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 19291c6fdbd8SKent Overstreet } else { 1930c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 19311c6fdbd8SKent Overstreet if (ret) 19321c6fdbd8SKent Overstreet goto out; 19331c6fdbd8SKent Overstreet } 19341c6fdbd8SKent Overstreet } 19351c6fdbd8SKent Overstreet 193670d41c9eSKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); 193770d41c9eSKent Overstreet if (ret) 193870d41c9eSKent Overstreet goto out; 193970d41c9eSKent Overstreet 1940c42b57c4SKent Overstreet f_pos = pos; 1941c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1942c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1943c42b57c4SKent Overstreet struct folio *f = *fi; 19446b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 19451c6fdbd8SKent Overstreet 1946353448f3SKent Overstreet /* 1947353448f3SKent Overstreet * XXX: per POSIX and fstests generic/275, on -ENOSPC we're 1948353448f3SKent Overstreet * supposed to write as much as we have disk space for. 1949353448f3SKent Overstreet * 1950353448f3SKent Overstreet * On failure here we should still write out a partial page if 1951353448f3SKent Overstreet * we aren't completely out of disk space - we don't do that 1952353448f3SKent Overstreet * yet: 1953353448f3SKent Overstreet */ 1954c42b57c4SKent Overstreet ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); 1955353448f3SKent Overstreet if (unlikely(ret)) { 1956c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1957c42b57c4SKent Overstreet if (!folios.nr) 19581c6fdbd8SKent Overstreet goto out; 1959c42b57c4SKent Overstreet 1960c42b57c4SKent Overstreet end = min(end, folio_end_pos(darray_last(folios))); 1961353448f3SKent Overstreet break; 1962353448f3SKent Overstreet } 1963d1542e03SKent Overstreet 1964c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1965c42b57c4SKent Overstreet f_offset = 0; 19661c6fdbd8SKent Overstreet } 19671c6fdbd8SKent Overstreet 19681c6fdbd8SKent Overstreet if (mapping_writably_mapped(mapping)) 1969c42b57c4SKent Overstreet darray_for_each(folios, fi) 1970c42b57c4SKent Overstreet flush_dcache_folio(*fi); 19711c6fdbd8SKent Overstreet 1972c42b57c4SKent Overstreet f_pos = pos; 1973c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1974c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1975c42b57c4SKent Overstreet struct folio *f = *fi; 19766b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 1977c42b57c4SKent Overstreet unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); 1978d1542e03SKent Overstreet 1979c42b57c4SKent Overstreet if (!f_copied) { 1980c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1981912bdf17SKent Overstreet break; 1982912bdf17SKent Overstreet } 1983912bdf17SKent Overstreet 1984c42b57c4SKent Overstreet if (!folio_test_uptodate(f) && 1985c42b57c4SKent Overstreet f_copied != folio_size(f) && 1986c42b57c4SKent Overstreet pos + copied + f_copied < inode->v.i_size) { 1987c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 1988c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1989912bdf17SKent Overstreet break; 19901c6fdbd8SKent Overstreet } 19911c6fdbd8SKent Overstreet 1992c42b57c4SKent Overstreet flush_dcache_folio(f); 1993c42b57c4SKent Overstreet copied += f_copied; 1994c42b57c4SKent Overstreet 1995c42b57c4SKent Overstreet if (f_copied != f_len) { 1996c42b57c4SKent Overstreet folios_trunc(&folios, fi + 1); 1997c42b57c4SKent Overstreet break; 1998c42b57c4SKent Overstreet } 1999c42b57c4SKent Overstreet 2000c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 2001c42b57c4SKent Overstreet f_offset = 0; 2002c42b57c4SKent Overstreet } 2003c42b57c4SKent Overstreet 20041c6fdbd8SKent Overstreet if (!copied) 20051c6fdbd8SKent Overstreet goto out; 20061c6fdbd8SKent Overstreet 2007c42b57c4SKent Overstreet end = pos + copied; 2008c42b57c4SKent Overstreet 2009877dfb34SKent Overstreet spin_lock(&inode->v.i_lock); 2010c42b57c4SKent Overstreet if (end > inode->v.i_size) 2011c42b57c4SKent Overstreet i_size_write(&inode->v, end); 2012877dfb34SKent Overstreet spin_unlock(&inode->v.i_lock); 2013877dfb34SKent Overstreet 2014c42b57c4SKent Overstreet f_pos = pos; 2015c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 2016c42b57c4SKent Overstreet darray_for_each(folios, fi) { 2017c42b57c4SKent Overstreet struct folio *f = *fi; 20186b9857b2SBrian Foster u64 f_len = min(end, folio_end_pos(f)) - f_pos; 2019d1542e03SKent Overstreet 2020c42b57c4SKent Overstreet if (!folio_test_uptodate(f)) 2021c42b57c4SKent Overstreet folio_mark_uptodate(f); 2022d1542e03SKent Overstreet 2023c42b57c4SKent Overstreet bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); 2024d1542e03SKent Overstreet 2025c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 2026c42b57c4SKent Overstreet f_offset = 0; 2027d1542e03SKent Overstreet } 2028877dfb34SKent Overstreet 2029877dfb34SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 20301c6fdbd8SKent Overstreet out: 2031c42b57c4SKent Overstreet darray_for_each(folios, fi) { 2032c42b57c4SKent Overstreet folio_unlock(*fi); 2033c42b57c4SKent Overstreet folio_put(*fi); 20341c6fdbd8SKent Overstreet } 20351c6fdbd8SKent Overstreet 2036335f7d4fSBrian Foster /* 2037335f7d4fSBrian Foster * If the last folio added to the mapping starts beyond current EOF, we 2038335f7d4fSBrian Foster * performed a short write but left around at least one post-EOF folio. 2039335f7d4fSBrian Foster * Clean up the mapping before we return. 2040335f7d4fSBrian Foster */ 2041335f7d4fSBrian Foster if (last_folio_pos >= inode->v.i_size) 2042335f7d4fSBrian Foster truncate_pagecache(&inode->v, inode->v.i_size); 2043335f7d4fSBrian Foster 2044c42b57c4SKent Overstreet darray_exit(&folios); 204530bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 20461c6fdbd8SKent Overstreet 20471c6fdbd8SKent Overstreet return copied ?: ret; 20481c6fdbd8SKent Overstreet } 20491c6fdbd8SKent Overstreet 20501c6fdbd8SKent Overstreet static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) 20511c6fdbd8SKent Overstreet { 20521c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 20531c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 20541c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 20551c6fdbd8SKent Overstreet loff_t pos = iocb->ki_pos; 20561c6fdbd8SKent Overstreet ssize_t written = 0; 20571c6fdbd8SKent Overstreet int ret = 0; 20581c6fdbd8SKent Overstreet 2059a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 20601c6fdbd8SKent Overstreet 20611c6fdbd8SKent Overstreet do { 20621c6fdbd8SKent Overstreet unsigned offset = pos & (PAGE_SIZE - 1); 2063c42b57c4SKent Overstreet unsigned bytes = iov_iter_count(iter); 20641c6fdbd8SKent Overstreet again: 20651c6fdbd8SKent Overstreet /* 20661c6fdbd8SKent Overstreet * Bring in the user page that we will copy from _first_. 20671c6fdbd8SKent Overstreet * Otherwise there's a nasty deadlock on copying from the 20681c6fdbd8SKent Overstreet * same page as we're writing to, without it being marked 20691c6fdbd8SKent Overstreet * up-to-date. 20701c6fdbd8SKent Overstreet * 20711c6fdbd8SKent Overstreet * Not only is this an optimisation, but it is also required 20721c6fdbd8SKent Overstreet * to check that the address is actually valid, when atomic 20731c6fdbd8SKent Overstreet * usercopies are used, below. 20741c6fdbd8SKent Overstreet */ 20751c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 20761c6fdbd8SKent Overstreet bytes = min_t(unsigned long, iov_iter_count(iter), 20771c6fdbd8SKent Overstreet PAGE_SIZE - offset); 20781c6fdbd8SKent Overstreet 20791c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 20801c6fdbd8SKent Overstreet ret = -EFAULT; 20811c6fdbd8SKent Overstreet break; 20821c6fdbd8SKent Overstreet } 20831c6fdbd8SKent Overstreet } 20841c6fdbd8SKent Overstreet 20851c6fdbd8SKent Overstreet if (unlikely(fatal_signal_pending(current))) { 20861c6fdbd8SKent Overstreet ret = -EINTR; 20871c6fdbd8SKent Overstreet break; 20881c6fdbd8SKent Overstreet } 20891c6fdbd8SKent Overstreet 20901c6fdbd8SKent Overstreet ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); 20911c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 20921c6fdbd8SKent Overstreet break; 20931c6fdbd8SKent Overstreet 20941c6fdbd8SKent Overstreet cond_resched(); 20951c6fdbd8SKent Overstreet 20961c6fdbd8SKent Overstreet if (unlikely(ret == 0)) { 20971c6fdbd8SKent Overstreet /* 20981c6fdbd8SKent Overstreet * If we were unable to copy any data at all, we must 20991c6fdbd8SKent Overstreet * fall back to a single segment length write. 21001c6fdbd8SKent Overstreet * 21011c6fdbd8SKent Overstreet * If we didn't fallback here, we could livelock 21021c6fdbd8SKent Overstreet * because not all segments in the iov can be copied at 21031c6fdbd8SKent Overstreet * once without a pagefault. 21041c6fdbd8SKent Overstreet */ 21051c6fdbd8SKent Overstreet bytes = min_t(unsigned long, PAGE_SIZE - offset, 21061c6fdbd8SKent Overstreet iov_iter_single_seg_count(iter)); 21071c6fdbd8SKent Overstreet goto again; 21081c6fdbd8SKent Overstreet } 21091c6fdbd8SKent Overstreet pos += ret; 21101c6fdbd8SKent Overstreet written += ret; 2111912bdf17SKent Overstreet ret = 0; 21121c6fdbd8SKent Overstreet 21131c6fdbd8SKent Overstreet balance_dirty_pages_ratelimited(mapping); 21141c6fdbd8SKent Overstreet } while (iov_iter_count(iter)); 21151c6fdbd8SKent Overstreet 2116a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 21171c6fdbd8SKent Overstreet 21181c6fdbd8SKent Overstreet return written ? written : ret; 21191c6fdbd8SKent Overstreet } 21201c6fdbd8SKent Overstreet 21211c6fdbd8SKent Overstreet /* O_DIRECT reads */ 21221c6fdbd8SKent Overstreet 2123b4725cc1SKent Overstreet static void bio_check_or_release(struct bio *bio, bool check_dirty) 2124b4725cc1SKent Overstreet { 2125b4725cc1SKent Overstreet if (check_dirty) { 2126b4725cc1SKent Overstreet bio_check_pages_dirty(bio); 2127b4725cc1SKent Overstreet } else { 2128b4725cc1SKent Overstreet bio_release_pages(bio, false); 2129b4725cc1SKent Overstreet bio_put(bio); 2130b4725cc1SKent Overstreet } 2131b4725cc1SKent Overstreet } 2132b4725cc1SKent Overstreet 21331c6fdbd8SKent Overstreet static void bch2_dio_read_complete(struct closure *cl) 21341c6fdbd8SKent Overstreet { 21351c6fdbd8SKent Overstreet struct dio_read *dio = container_of(cl, struct dio_read, cl); 21361c6fdbd8SKent Overstreet 21371c6fdbd8SKent Overstreet dio->req->ki_complete(dio->req, dio->ret); 2138b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 21391c6fdbd8SKent Overstreet } 21401c6fdbd8SKent Overstreet 21411c6fdbd8SKent Overstreet static void bch2_direct_IO_read_endio(struct bio *bio) 21421c6fdbd8SKent Overstreet { 21431c6fdbd8SKent Overstreet struct dio_read *dio = bio->bi_private; 21441c6fdbd8SKent Overstreet 21451c6fdbd8SKent Overstreet if (bio->bi_status) 21461c6fdbd8SKent Overstreet dio->ret = blk_status_to_errno(bio->bi_status); 21471c6fdbd8SKent Overstreet 21481c6fdbd8SKent Overstreet closure_put(&dio->cl); 21491c6fdbd8SKent Overstreet } 21501c6fdbd8SKent Overstreet 21511c6fdbd8SKent Overstreet static void bch2_direct_IO_read_split_endio(struct bio *bio) 21521c6fdbd8SKent Overstreet { 2153b4725cc1SKent Overstreet struct dio_read *dio = bio->bi_private; 2154b4725cc1SKent Overstreet bool should_dirty = dio->should_dirty; 2155b4725cc1SKent Overstreet 21561c6fdbd8SKent Overstreet bch2_direct_IO_read_endio(bio); 2157b4725cc1SKent Overstreet bio_check_or_release(bio, should_dirty); 21581c6fdbd8SKent Overstreet } 21591c6fdbd8SKent Overstreet 21601c6fdbd8SKent Overstreet static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) 21611c6fdbd8SKent Overstreet { 21621c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 21631c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 21641c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 216501ad6737SKent Overstreet struct bch_io_opts opts; 21661c6fdbd8SKent Overstreet struct dio_read *dio; 21671c6fdbd8SKent Overstreet struct bio *bio; 21681c6fdbd8SKent Overstreet loff_t offset = req->ki_pos; 21691c6fdbd8SKent Overstreet bool sync = is_sync_kiocb(req); 21701c6fdbd8SKent Overstreet size_t shorten; 21711c6fdbd8SKent Overstreet ssize_t ret; 21721c6fdbd8SKent Overstreet 217301ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 217401ad6737SKent Overstreet 21751c6fdbd8SKent Overstreet if ((offset|iter->count) & (block_bytes(c) - 1)) 21761c6fdbd8SKent Overstreet return -EINVAL; 21771c6fdbd8SKent Overstreet 21781c6fdbd8SKent Overstreet ret = min_t(loff_t, iter->count, 21791c6fdbd8SKent Overstreet max_t(loff_t, 0, i_size_read(&inode->v) - offset)); 21801c6fdbd8SKent Overstreet 21811c6fdbd8SKent Overstreet if (!ret) 21821c6fdbd8SKent Overstreet return ret; 21831c6fdbd8SKent Overstreet 21841c6fdbd8SKent Overstreet shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); 21851c6fdbd8SKent Overstreet iter->count -= shorten; 21861c6fdbd8SKent Overstreet 21871c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 21884d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 21891c6fdbd8SKent Overstreet REQ_OP_READ, 21901c6fdbd8SKent Overstreet GFP_KERNEL, 21911c6fdbd8SKent Overstreet &c->dio_read_bioset); 21921c6fdbd8SKent Overstreet 21931c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_endio; 21941c6fdbd8SKent Overstreet 21951c6fdbd8SKent Overstreet dio = container_of(bio, struct dio_read, rbio.bio); 21961c6fdbd8SKent Overstreet closure_init(&dio->cl, NULL); 21971c6fdbd8SKent Overstreet 21981c6fdbd8SKent Overstreet /* 21991c6fdbd8SKent Overstreet * this is a _really_ horrible hack just to avoid an atomic sub at the 22001c6fdbd8SKent Overstreet * end: 22011c6fdbd8SKent Overstreet */ 22021c6fdbd8SKent Overstreet if (!sync) { 22031c6fdbd8SKent Overstreet set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); 22041c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 22051c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER - 22061c6fdbd8SKent Overstreet CLOSURE_RUNNING + 22071c6fdbd8SKent Overstreet CLOSURE_DESTRUCTOR); 22081c6fdbd8SKent Overstreet } else { 22091c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 22101c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER + 1); 22111c6fdbd8SKent Overstreet } 22121c6fdbd8SKent Overstreet 22131c6fdbd8SKent Overstreet dio->req = req; 22141c6fdbd8SKent Overstreet dio->ret = ret; 2215b4725cc1SKent Overstreet /* 2216b4725cc1SKent Overstreet * This is one of the sketchier things I've encountered: we have to skip 2217b4725cc1SKent Overstreet * the dirtying of requests that are internal from the kernel (i.e. from 2218b4725cc1SKent Overstreet * loopback), because we'll deadlock on page_lock. 2219b4725cc1SKent Overstreet */ 2220b4725cc1SKent Overstreet dio->should_dirty = iter_is_iovec(iter); 22211c6fdbd8SKent Overstreet 22221c6fdbd8SKent Overstreet goto start; 22231c6fdbd8SKent Overstreet while (iter->count) { 22241c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 22254d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 22261c6fdbd8SKent Overstreet REQ_OP_READ, 22271c6fdbd8SKent Overstreet GFP_KERNEL, 22281c6fdbd8SKent Overstreet &c->bio_read); 22291c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_split_endio; 22301c6fdbd8SKent Overstreet start: 22311c6fdbd8SKent Overstreet bio->bi_opf = REQ_OP_READ|REQ_SYNC; 22321c6fdbd8SKent Overstreet bio->bi_iter.bi_sector = offset >> 9; 22331c6fdbd8SKent Overstreet bio->bi_private = dio; 22341c6fdbd8SKent Overstreet 22351c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, iter); 22361c6fdbd8SKent Overstreet if (ret < 0) { 22371c6fdbd8SKent Overstreet /* XXX: fault inject this path */ 22381c6fdbd8SKent Overstreet bio->bi_status = BLK_STS_RESOURCE; 22391c6fdbd8SKent Overstreet bio_endio(bio); 22401c6fdbd8SKent Overstreet break; 22411c6fdbd8SKent Overstreet } 22421c6fdbd8SKent Overstreet 22431c6fdbd8SKent Overstreet offset += bio->bi_iter.bi_size; 2244b4725cc1SKent Overstreet 2245b4725cc1SKent Overstreet if (dio->should_dirty) 22461c6fdbd8SKent Overstreet bio_set_pages_dirty(bio); 22471c6fdbd8SKent Overstreet 22481c6fdbd8SKent Overstreet if (iter->count) 22491c6fdbd8SKent Overstreet closure_get(&dio->cl); 22501c6fdbd8SKent Overstreet 22518c6d298aSKent Overstreet bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); 22521c6fdbd8SKent Overstreet } 22531c6fdbd8SKent Overstreet 22541c6fdbd8SKent Overstreet iter->count += shorten; 22551c6fdbd8SKent Overstreet 22561c6fdbd8SKent Overstreet if (sync) { 22571c6fdbd8SKent Overstreet closure_sync(&dio->cl); 22581c6fdbd8SKent Overstreet closure_debug_destroy(&dio->cl); 22591c6fdbd8SKent Overstreet ret = dio->ret; 2260b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 22611c6fdbd8SKent Overstreet return ret; 22621c6fdbd8SKent Overstreet } else { 22631c6fdbd8SKent Overstreet return -EIOCBQUEUED; 22641c6fdbd8SKent Overstreet } 22651c6fdbd8SKent Overstreet } 22661c6fdbd8SKent Overstreet 22671c6fdbd8SKent Overstreet ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) 22681c6fdbd8SKent Overstreet { 22691c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 22701c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 22711c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 22721c6fdbd8SKent Overstreet size_t count = iov_iter_count(iter); 22731c6fdbd8SKent Overstreet ssize_t ret; 22741c6fdbd8SKent Overstreet 22751c6fdbd8SKent Overstreet if (!count) 22761c6fdbd8SKent Overstreet return 0; /* skip atime */ 22771c6fdbd8SKent Overstreet 22781c6fdbd8SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 22791c6fdbd8SKent Overstreet struct blk_plug plug; 22801c6fdbd8SKent Overstreet 2281a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 22821c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 22831c6fdbd8SKent Overstreet iocb->ki_pos, 22841c6fdbd8SKent Overstreet iocb->ki_pos + count - 1); 22851c6fdbd8SKent Overstreet if (ret < 0) 22865c1ef830SKent Overstreet goto out; 2287a023127aSKent Overstreet } 22881c6fdbd8SKent Overstreet 22891c6fdbd8SKent Overstreet file_accessed(file); 22901c6fdbd8SKent Overstreet 22911c6fdbd8SKent Overstreet blk_start_plug(&plug); 22921c6fdbd8SKent Overstreet ret = bch2_direct_IO_read(iocb, iter); 22931c6fdbd8SKent Overstreet blk_finish_plug(&plug); 22941c6fdbd8SKent Overstreet 22951c6fdbd8SKent Overstreet if (ret >= 0) 22961c6fdbd8SKent Overstreet iocb->ki_pos += ret; 22971c6fdbd8SKent Overstreet } else { 2298a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 22991c6fdbd8SKent Overstreet ret = generic_file_read_iter(iocb, iter); 2300a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 23011c6fdbd8SKent Overstreet } 23025c1ef830SKent Overstreet out: 23035c1ef830SKent Overstreet return bch2_err_class(ret); 23041c6fdbd8SKent Overstreet } 23051c6fdbd8SKent Overstreet 23061c6fdbd8SKent Overstreet /* O_DIRECT writes */ 23071c6fdbd8SKent Overstreet 23086fed42bbSKent Overstreet static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, 23096fed42bbSKent Overstreet u64 offset, u64 size, 23106fed42bbSKent Overstreet unsigned nr_replicas, bool compressed) 23116fed42bbSKent Overstreet { 23126fed42bbSKent Overstreet struct btree_trans trans; 23136fed42bbSKent Overstreet struct btree_iter iter; 23146fed42bbSKent Overstreet struct bkey_s_c k; 23156fed42bbSKent Overstreet u64 end = offset + size; 23166fed42bbSKent Overstreet u32 snapshot; 23176fed42bbSKent Overstreet bool ret = true; 23186fed42bbSKent Overstreet int err; 23196fed42bbSKent Overstreet 23206fed42bbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 23216fed42bbSKent Overstreet retry: 23226fed42bbSKent Overstreet bch2_trans_begin(&trans); 23236fed42bbSKent Overstreet 23246fed42bbSKent Overstreet err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 23256fed42bbSKent Overstreet if (err) 23266fed42bbSKent Overstreet goto err; 23276fed42bbSKent Overstreet 2328e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 23296fed42bbSKent Overstreet SPOS(inum.inum, offset, snapshot), 23306fed42bbSKent Overstreet BTREE_ITER_SLOTS, k, err) { 2331e88a75ebSKent Overstreet if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) 23326fed42bbSKent Overstreet break; 23336fed42bbSKent Overstreet 23348c6d298aSKent Overstreet if (k.k->p.snapshot != snapshot || 23358c6d298aSKent Overstreet nr_replicas > bch2_bkey_replicas(c, k) || 23366fed42bbSKent Overstreet (!compressed && bch2_bkey_sectors_compressed(k))) { 23376fed42bbSKent Overstreet ret = false; 23386fed42bbSKent Overstreet break; 23396fed42bbSKent Overstreet } 23406fed42bbSKent Overstreet } 23416fed42bbSKent Overstreet 23426fed42bbSKent Overstreet offset = iter.pos.offset; 23436fed42bbSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 23446fed42bbSKent Overstreet err: 2345549d173cSKent Overstreet if (bch2_err_matches(err, BCH_ERR_transaction_restart)) 23466fed42bbSKent Overstreet goto retry; 23476fed42bbSKent Overstreet bch2_trans_exit(&trans); 23486fed42bbSKent Overstreet 23496fed42bbSKent Overstreet return err ? false : ret; 23506fed42bbSKent Overstreet } 23516fed42bbSKent Overstreet 2352182c7bbfSKent Overstreet static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) 2353182c7bbfSKent Overstreet { 2354182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2355182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2356182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2357182c7bbfSKent Overstreet 2358182c7bbfSKent Overstreet return bch2_check_range_allocated(c, inode_inum(inode), 2359182c7bbfSKent Overstreet dio->op.pos.offset, bio_sectors(bio), 2360182c7bbfSKent Overstreet dio->op.opts.data_replicas, 2361182c7bbfSKent Overstreet dio->op.opts.compression != 0); 2362182c7bbfSKent Overstreet } 2363182c7bbfSKent Overstreet 2364a1ee777bSKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *); 2365a1ee777bSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio); 2366a1ee777bSKent Overstreet 23671c6fdbd8SKent Overstreet /* 23681c6fdbd8SKent Overstreet * We're going to return -EIOCBQUEUED, but we haven't finished consuming the 23691c6fdbd8SKent Overstreet * iov_iter yet, so we need to stash a copy of the iovec: it might be on the 23701c6fdbd8SKent Overstreet * caller's stack, we're not guaranteed that it will live for the duration of 23711c6fdbd8SKent Overstreet * the IO: 23721c6fdbd8SKent Overstreet */ 23731c6fdbd8SKent Overstreet static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) 23741c6fdbd8SKent Overstreet { 23751c6fdbd8SKent Overstreet struct iovec *iov = dio->inline_vecs; 23761c6fdbd8SKent Overstreet 23771c6fdbd8SKent Overstreet /* 23781c6fdbd8SKent Overstreet * iov_iter has a single embedded iovec - nothing to do: 23791c6fdbd8SKent Overstreet */ 23801c6fdbd8SKent Overstreet if (iter_is_ubuf(&dio->iter)) 23811c6fdbd8SKent Overstreet return 0; 23821c6fdbd8SKent Overstreet 23831c6fdbd8SKent Overstreet /* 23841c6fdbd8SKent Overstreet * We don't currently handle non-iovec iov_iters here - return an error, 23851c6fdbd8SKent Overstreet * and we'll fall back to doing the IO synchronously: 23861c6fdbd8SKent Overstreet */ 23871c6fdbd8SKent Overstreet if (!iter_is_iovec(&dio->iter)) 23881c6fdbd8SKent Overstreet return -1; 23891c6fdbd8SKent Overstreet 23901c6fdbd8SKent Overstreet if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { 23911c6fdbd8SKent Overstreet iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), 23921c6fdbd8SKent Overstreet GFP_KERNEL); 23931c6fdbd8SKent Overstreet if (unlikely(!iov)) 23941c6fdbd8SKent Overstreet return -ENOMEM; 23951c6fdbd8SKent Overstreet 23961c6fdbd8SKent Overstreet dio->free_iov = true; 23971c6fdbd8SKent Overstreet } 23981c6fdbd8SKent Overstreet 23991c6fdbd8SKent Overstreet memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); 24001c6fdbd8SKent Overstreet dio->iter.__iov = iov; 24011c6fdbd8SKent Overstreet return 0; 24021c6fdbd8SKent Overstreet } 24031c6fdbd8SKent Overstreet 2404a1ee777bSKent Overstreet static void bch2_dio_write_flush_done(struct closure *cl) 2405a1ee777bSKent Overstreet { 2406a1ee777bSKent Overstreet struct dio_write *dio = container_of(cl, struct dio_write, op.cl); 2407a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2408a1ee777bSKent Overstreet 2409a1ee777bSKent Overstreet closure_debug_destroy(cl); 2410a1ee777bSKent Overstreet 2411a1ee777bSKent Overstreet dio->op.error = bch2_journal_error(&c->journal); 2412a1ee777bSKent Overstreet 2413a1ee777bSKent Overstreet bch2_dio_write_done(dio); 2414a1ee777bSKent Overstreet } 2415a1ee777bSKent Overstreet 2416a1ee777bSKent Overstreet static noinline void bch2_dio_write_flush(struct dio_write *dio) 2417a1ee777bSKent Overstreet { 2418a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2419a1ee777bSKent Overstreet struct bch_inode_unpacked inode; 2420a1ee777bSKent Overstreet int ret; 2421a1ee777bSKent Overstreet 2422a1ee777bSKent Overstreet dio->flush = 0; 2423a1ee777bSKent Overstreet 2424a1ee777bSKent Overstreet closure_init(&dio->op.cl, NULL); 2425a1ee777bSKent Overstreet 2426a1ee777bSKent Overstreet if (!dio->op.error) { 2427a1ee777bSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); 2428a8b3a677SKent Overstreet if (ret) { 2429a1ee777bSKent Overstreet dio->op.error = ret; 2430a8b3a677SKent Overstreet } else { 2431a1ee777bSKent Overstreet bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl); 2432a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); 2433a8b3a677SKent Overstreet } 2434a1ee777bSKent Overstreet } 2435a1ee777bSKent Overstreet 2436a1ee777bSKent Overstreet if (dio->sync) { 2437a1ee777bSKent Overstreet closure_sync(&dio->op.cl); 2438a1ee777bSKent Overstreet closure_debug_destroy(&dio->op.cl); 2439a1ee777bSKent Overstreet } else { 2440a1ee777bSKent Overstreet continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); 2441a1ee777bSKent Overstreet } 2442a1ee777bSKent Overstreet } 2443042a1f26SKent Overstreet 2444182c7bbfSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio) 2445182c7bbfSKent Overstreet { 2446182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2447182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2448182c7bbfSKent Overstreet bool sync = dio->sync; 2449a1ee777bSKent Overstreet long ret; 2450a1ee777bSKent Overstreet 2451a1ee777bSKent Overstreet if (unlikely(dio->flush)) { 2452a1ee777bSKent Overstreet bch2_dio_write_flush(dio); 2453a1ee777bSKent Overstreet if (!sync) 2454a1ee777bSKent Overstreet return -EIOCBQUEUED; 2455a1ee777bSKent Overstreet } 2456182c7bbfSKent Overstreet 2457a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 2458182c7bbfSKent Overstreet 2459182c7bbfSKent Overstreet if (dio->free_iov) 2460182c7bbfSKent Overstreet kfree(dio->iter.__iov); 2461a1ee777bSKent Overstreet 2462a1ee777bSKent Overstreet ret = dio->op.error ?: ((long) dio->written << 9); 2463182c7bbfSKent Overstreet bio_put(&dio->op.wbio.bio); 2464182c7bbfSKent Overstreet 2465182c7bbfSKent Overstreet /* inode->i_dio_count is our ref on inode and thus bch_fs */ 2466182c7bbfSKent Overstreet inode_dio_end(&inode->v); 2467182c7bbfSKent Overstreet 2468182c7bbfSKent Overstreet if (ret < 0) 2469182c7bbfSKent Overstreet ret = bch2_err_class(ret); 2470182c7bbfSKent Overstreet 2471182c7bbfSKent Overstreet if (!sync) { 2472182c7bbfSKent Overstreet req->ki_complete(req, ret); 2473182c7bbfSKent Overstreet ret = -EIOCBQUEUED; 2474182c7bbfSKent Overstreet } 2475182c7bbfSKent Overstreet return ret; 2476182c7bbfSKent Overstreet } 2477182c7bbfSKent Overstreet 2478182c7bbfSKent Overstreet static __always_inline void bch2_dio_write_end(struct dio_write *dio) 2479182c7bbfSKent Overstreet { 2480182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2481182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2482182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2483182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2484182c7bbfSKent Overstreet 2485182c7bbfSKent Overstreet req->ki_pos += (u64) dio->op.written << 9; 2486182c7bbfSKent Overstreet dio->written += dio->op.written; 2487182c7bbfSKent Overstreet 24886b1b186aSKent Overstreet if (dio->extending) { 2489182c7bbfSKent Overstreet spin_lock(&inode->v.i_lock); 2490182c7bbfSKent Overstreet if (req->ki_pos > inode->v.i_size) 2491182c7bbfSKent Overstreet i_size_write(&inode->v, req->ki_pos); 2492182c7bbfSKent Overstreet spin_unlock(&inode->v.i_lock); 24936b1b186aSKent Overstreet } 24946b1b186aSKent Overstreet 24956b1b186aSKent Overstreet if (dio->op.i_sectors_delta || dio->quota_res.sectors) { 24966b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 24976b1b186aSKent Overstreet __i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); 24986b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, &dio->quota_res); 24996b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 25006b1b186aSKent Overstreet } 2501182c7bbfSKent Overstreet 2502182c7bbfSKent Overstreet bio_release_pages(bio, false); 2503182c7bbfSKent Overstreet 2504182c7bbfSKent Overstreet if (unlikely(dio->op.error)) 2505182c7bbfSKent Overstreet set_bit(EI_INODE_ERROR, &inode->ei_flags); 2506182c7bbfSKent Overstreet } 2507182c7bbfSKent Overstreet 25084d868d18SKent Overstreet static __always_inline long bch2_dio_write_loop(struct dio_write *dio) 25091c6fdbd8SKent Overstreet { 2510182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 25111c6fdbd8SKent Overstreet struct kiocb *req = dio->req; 2512182c7bbfSKent Overstreet struct address_space *mapping = dio->mapping; 2513182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 251401ad6737SKent Overstreet struct bch_io_opts opts; 25159a3df993SKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2516eb8e6e9cSKent Overstreet unsigned unaligned, iter_count; 2517eb8e6e9cSKent Overstreet bool sync = dio->sync, dropped_locks; 25181c6fdbd8SKent Overstreet long ret; 25191c6fdbd8SKent Overstreet 252001ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 252101ad6737SKent Overstreet 25221c6fdbd8SKent Overstreet while (1) { 2523eb8e6e9cSKent Overstreet iter_count = dio->iter.count; 2524eb8e6e9cSKent Overstreet 2525182c7bbfSKent Overstreet EBUG_ON(current->faults_disabled_mapping); 25261c6fdbd8SKent Overstreet current->faults_disabled_mapping = mapping; 25271c6fdbd8SKent Overstreet 25281c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, &dio->iter); 25291c6fdbd8SKent Overstreet 2530eb8e6e9cSKent Overstreet dropped_locks = fdm_dropped_locks(); 2531eb8e6e9cSKent Overstreet 25321c6fdbd8SKent Overstreet current->faults_disabled_mapping = NULL; 25331c6fdbd8SKent Overstreet 2534eb8e6e9cSKent Overstreet /* 2535eb8e6e9cSKent Overstreet * If the fault handler returned an error but also signalled 2536eb8e6e9cSKent Overstreet * that it dropped & retook ei_pagecache_lock, we just need to 2537eb8e6e9cSKent Overstreet * re-shoot down the page cache and retry: 2538eb8e6e9cSKent Overstreet */ 2539eb8e6e9cSKent Overstreet if (dropped_locks && ret) 2540eb8e6e9cSKent Overstreet ret = 0; 2541eb8e6e9cSKent Overstreet 25421c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 25431c6fdbd8SKent Overstreet goto err; 25441c6fdbd8SKent Overstreet 2545eb8e6e9cSKent Overstreet if (unlikely(dropped_locks)) { 2546eb8e6e9cSKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 2547eb8e6e9cSKent Overstreet req->ki_pos, 2548eb8e6e9cSKent Overstreet req->ki_pos + iter_count - 1); 2549eb8e6e9cSKent Overstreet if (unlikely(ret)) 2550eb8e6e9cSKent Overstreet goto err; 2551eb8e6e9cSKent Overstreet 2552eb8e6e9cSKent Overstreet if (!bio->bi_iter.bi_size) 2553eb8e6e9cSKent Overstreet continue; 2554eb8e6e9cSKent Overstreet } 2555eb8e6e9cSKent Overstreet 25560a426c32SKent Overstreet unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); 25570a426c32SKent Overstreet bio->bi_iter.bi_size -= unaligned; 25580a426c32SKent Overstreet iov_iter_revert(&dio->iter, unaligned); 25590a426c32SKent Overstreet 25600a426c32SKent Overstreet if (!bio->bi_iter.bi_size) { 25610a426c32SKent Overstreet /* 25620a426c32SKent Overstreet * bio_iov_iter_get_pages was only able to get < 25630a426c32SKent Overstreet * blocksize worth of pages: 25640a426c32SKent Overstreet */ 25650a426c32SKent Overstreet ret = -EFAULT; 25660a426c32SKent Overstreet goto err; 25670a426c32SKent Overstreet } 25680a426c32SKent Overstreet 256901ad6737SKent Overstreet bch2_write_op_init(&dio->op, c, opts); 2570182c7bbfSKent Overstreet dio->op.end_io = sync 2571182c7bbfSKent Overstreet ? NULL 2572182c7bbfSKent Overstreet : bch2_dio_write_loop_async; 2573042a1f26SKent Overstreet dio->op.target = dio->op.opts.foreground_target; 2574042a1f26SKent Overstreet dio->op.write_point = writepoint_hashed((unsigned long) current); 2575042a1f26SKent Overstreet dio->op.nr_replicas = dio->op.opts.data_replicas; 25768c6d298aSKent Overstreet dio->op.subvol = inode->ei_subvol; 2577042a1f26SKent Overstreet dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); 2578a8b3a677SKent Overstreet dio->op.devs_need_flush = &inode->ei_devs_need_flush; 2579042a1f26SKent Overstreet 25801df3e199SKent Overstreet if (sync) 25811df3e199SKent Overstreet dio->op.flags |= BCH_WRITE_SYNC; 2582a6336910SKent Overstreet dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; 2583042a1f26SKent Overstreet 25846b1b186aSKent Overstreet ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, 25856b1b186aSKent Overstreet bio_sectors(bio), true); 25866b1b186aSKent Overstreet if (unlikely(ret)) 25876b1b186aSKent Overstreet goto err; 25886b1b186aSKent Overstreet 2589042a1f26SKent Overstreet ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), 2590042a1f26SKent Overstreet dio->op.opts.data_replicas, 0); 2591042a1f26SKent Overstreet if (unlikely(ret) && 2592182c7bbfSKent Overstreet !bch2_dio_write_check_allocated(dio)) 2593042a1f26SKent Overstreet goto err; 25941c6fdbd8SKent Overstreet 25951c6fdbd8SKent Overstreet task_io_account_write(bio->bi_iter.bi_size); 25961c6fdbd8SKent Overstreet 2597182c7bbfSKent Overstreet if (unlikely(dio->iter.count) && 2598182c7bbfSKent Overstreet !dio->sync && 2599182c7bbfSKent Overstreet !dio->loop && 2600182c7bbfSKent Overstreet bch2_dio_write_copy_iov(dio)) 2601286d8ad0SKent Overstreet dio->sync = sync = true; 2602182c7bbfSKent Overstreet 26031c6fdbd8SKent Overstreet dio->loop = true; 2604f8f30863SKent Overstreet closure_call(&dio->op.cl, bch2_write, NULL, NULL); 26051c6fdbd8SKent Overstreet 2606182c7bbfSKent Overstreet if (!sync) 26071c6fdbd8SKent Overstreet return -EIOCBQUEUED; 26089a3df993SKent Overstreet 2609182c7bbfSKent Overstreet bch2_dio_write_end(dio); 26109a3df993SKent Overstreet 2611182c7bbfSKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 26121c6fdbd8SKent Overstreet break; 2613f8f30863SKent Overstreet 26141c6fdbd8SKent Overstreet bio_reset(bio, NULL, REQ_OP_WRITE); 26151c6fdbd8SKent Overstreet } 2616182c7bbfSKent Overstreet out: 2617182c7bbfSKent Overstreet return bch2_dio_write_done(dio); 26181c6fdbd8SKent Overstreet err: 2619182c7bbfSKent Overstreet dio->op.error = ret; 26201c6fdbd8SKent Overstreet 26215468f119SKent Overstreet bio_release_pages(bio, false); 26226b1b186aSKent Overstreet 26236b1b186aSKent Overstreet bch2_quota_reservation_put(c, inode, &dio->quota_res); 2624182c7bbfSKent Overstreet goto out; 26251c6fdbd8SKent Overstreet } 26261c6fdbd8SKent Overstreet 26274d868d18SKent Overstreet static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) 26281c6fdbd8SKent Overstreet { 2629182c7bbfSKent Overstreet struct mm_struct *mm = dio->mm; 26301c6fdbd8SKent Overstreet 2631182c7bbfSKent Overstreet bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); 2632182c7bbfSKent Overstreet 2633182c7bbfSKent Overstreet if (mm) 2634182c7bbfSKent Overstreet kthread_use_mm(mm); 26351c6fdbd8SKent Overstreet bch2_dio_write_loop(dio); 2636182c7bbfSKent Overstreet if (mm) 2637182c7bbfSKent Overstreet kthread_unuse_mm(mm); 26381c6fdbd8SKent Overstreet } 26391c6fdbd8SKent Overstreet 26404d868d18SKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *op) 26414d868d18SKent Overstreet { 26424d868d18SKent Overstreet struct dio_write *dio = container_of(op, struct dio_write, op); 26434d868d18SKent Overstreet 26444d868d18SKent Overstreet bch2_dio_write_end(dio); 26454d868d18SKent Overstreet 26464d868d18SKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 26474d868d18SKent Overstreet bch2_dio_write_done(dio); 26484d868d18SKent Overstreet else 26494d868d18SKent Overstreet bch2_dio_write_continue(dio); 26504d868d18SKent Overstreet } 26514d868d18SKent Overstreet 26521c6fdbd8SKent Overstreet static noinline 26531c6fdbd8SKent Overstreet ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) 26541c6fdbd8SKent Overstreet { 26551c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 265654847d25SKent Overstreet struct address_space *mapping = file->f_mapping; 26571c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 26581c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 26591c6fdbd8SKent Overstreet struct dio_write *dio; 26601c6fdbd8SKent Overstreet struct bio *bio; 26617edcfbfeSKent Overstreet bool locked = true, extending; 26621c6fdbd8SKent Overstreet ssize_t ret; 26631c6fdbd8SKent Overstreet 26647edcfbfeSKent Overstreet prefetch(&c->opts); 26657edcfbfeSKent Overstreet prefetch((void *) &c->opts + 64); 26667edcfbfeSKent Overstreet prefetch(&inode->ei_inode); 26677edcfbfeSKent Overstreet prefetch((void *) &inode->ei_inode + 64); 26681c6fdbd8SKent Overstreet 26697edcfbfeSKent Overstreet inode_lock(&inode->v); 26707edcfbfeSKent Overstreet 26717edcfbfeSKent Overstreet ret = generic_write_checks(req, iter); 26727edcfbfeSKent Overstreet if (unlikely(ret <= 0)) 26737edcfbfeSKent Overstreet goto err; 26747edcfbfeSKent Overstreet 26757edcfbfeSKent Overstreet ret = file_remove_privs(file); 26767edcfbfeSKent Overstreet if (unlikely(ret)) 26777edcfbfeSKent Overstreet goto err; 26787edcfbfeSKent Overstreet 26797edcfbfeSKent Overstreet ret = file_update_time(file); 26807edcfbfeSKent Overstreet if (unlikely(ret)) 26817edcfbfeSKent Overstreet goto err; 26821c6fdbd8SKent Overstreet 2683919dbbd1SKent Overstreet if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) 26847edcfbfeSKent Overstreet goto err; 26857edcfbfeSKent Overstreet 26867edcfbfeSKent Overstreet inode_dio_begin(&inode->v); 2687a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 26887edcfbfeSKent Overstreet 26897edcfbfeSKent Overstreet extending = req->ki_pos + iter->count > inode->v.i_size; 26907edcfbfeSKent Overstreet if (!extending) { 26917edcfbfeSKent Overstreet inode_unlock(&inode->v); 26927edcfbfeSKent Overstreet locked = false; 26937edcfbfeSKent Overstreet } 26941c6fdbd8SKent Overstreet 26951c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 26964d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 26971c6fdbd8SKent Overstreet REQ_OP_WRITE, 26981c6fdbd8SKent Overstreet GFP_KERNEL, 26991c6fdbd8SKent Overstreet &c->dio_write_bioset); 27009a3df993SKent Overstreet dio = container_of(bio, struct dio_write, op.wbio.bio); 27011c6fdbd8SKent Overstreet dio->req = req; 2702182c7bbfSKent Overstreet dio->mapping = mapping; 2703182c7bbfSKent Overstreet dio->inode = inode; 2704ed484030SKent Overstreet dio->mm = current->mm; 27051c6fdbd8SKent Overstreet dio->loop = false; 27066b1b186aSKent Overstreet dio->extending = extending; 27077edcfbfeSKent Overstreet dio->sync = is_sync_kiocb(req) || extending; 2708a1ee777bSKent Overstreet dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; 27091c6fdbd8SKent Overstreet dio->free_iov = false; 27101c6fdbd8SKent Overstreet dio->quota_res.sectors = 0; 2711042a1f26SKent Overstreet dio->written = 0; 27121c6fdbd8SKent Overstreet dio->iter = *iter; 2713182c7bbfSKent Overstreet dio->op.c = c; 27149a3df993SKent Overstreet 2715a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 271654847d25SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 271754847d25SKent Overstreet req->ki_pos, 271854847d25SKent Overstreet req->ki_pos + iter->count - 1); 271954847d25SKent Overstreet if (unlikely(ret)) 272054847d25SKent Overstreet goto err_put_bio; 2721a023127aSKent Overstreet } 272254847d25SKent Overstreet 27237edcfbfeSKent Overstreet ret = bch2_dio_write_loop(dio); 27241c6fdbd8SKent Overstreet err: 27257edcfbfeSKent Overstreet if (locked) 27267edcfbfeSKent Overstreet inode_unlock(&inode->v); 27277edcfbfeSKent Overstreet return ret; 27287edcfbfeSKent Overstreet err_put_bio: 2729a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 27301c6fdbd8SKent Overstreet bio_put(bio); 27317edcfbfeSKent Overstreet inode_dio_end(&inode->v); 27327edcfbfeSKent Overstreet goto err; 27331c6fdbd8SKent Overstreet } 27341c6fdbd8SKent Overstreet 27357edcfbfeSKent Overstreet ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) 27361c6fdbd8SKent Overstreet { 27371c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 27387edcfbfeSKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 27391c6fdbd8SKent Overstreet ssize_t ret; 27401c6fdbd8SKent Overstreet 27415c1ef830SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 27425c1ef830SKent Overstreet ret = bch2_direct_write(iocb, from); 27435c1ef830SKent Overstreet goto out; 27445c1ef830SKent Overstreet } 27451c6fdbd8SKent Overstreet 27467edcfbfeSKent Overstreet inode_lock(&inode->v); 27477edcfbfeSKent Overstreet 27487edcfbfeSKent Overstreet ret = generic_write_checks(iocb, from); 27497edcfbfeSKent Overstreet if (ret <= 0) 27507edcfbfeSKent Overstreet goto unlock; 27517edcfbfeSKent Overstreet 27521c6fdbd8SKent Overstreet ret = file_remove_privs(file); 27531c6fdbd8SKent Overstreet if (ret) 27547edcfbfeSKent Overstreet goto unlock; 27551c6fdbd8SKent Overstreet 27561c6fdbd8SKent Overstreet ret = file_update_time(file); 27571c6fdbd8SKent Overstreet if (ret) 27587edcfbfeSKent Overstreet goto unlock; 27591c6fdbd8SKent Overstreet 27607edcfbfeSKent Overstreet ret = bch2_buffered_write(iocb, from); 27611c6fdbd8SKent Overstreet if (likely(ret > 0)) 27621c6fdbd8SKent Overstreet iocb->ki_pos += ret; 27637edcfbfeSKent Overstreet unlock: 27641c6fdbd8SKent Overstreet inode_unlock(&inode->v); 27651c6fdbd8SKent Overstreet 27667edcfbfeSKent Overstreet if (ret > 0) 27671c6fdbd8SKent Overstreet ret = generic_write_sync(iocb, ret); 27685c1ef830SKent Overstreet out: 27695c1ef830SKent Overstreet return bch2_err_class(ret); 27701c6fdbd8SKent Overstreet } 27711c6fdbd8SKent Overstreet 27721c6fdbd8SKent Overstreet /* fsync: */ 27731c6fdbd8SKent Overstreet 277468a2054dSKent Overstreet /* 277568a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 277668a2054dSKent Overstreet * insert trigger: look up the btree inode instead 277768a2054dSKent Overstreet */ 2778a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 2779a8b3a677SKent Overstreet struct bch_inode_info *inode) 278068a2054dSKent Overstreet { 2781a8b3a677SKent Overstreet struct bch_inode_unpacked u; 278268a2054dSKent Overstreet int ret; 278368a2054dSKent Overstreet 278468a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 278568a2054dSKent Overstreet return 0; 278668a2054dSKent Overstreet 2787a8b3a677SKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 278868a2054dSKent Overstreet if (ret) 278968a2054dSKent Overstreet return ret; 279068a2054dSKent Overstreet 2791a8b3a677SKent Overstreet return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 2792a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 279368a2054dSKent Overstreet } 279468a2054dSKent Overstreet 27951c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 27961c6fdbd8SKent Overstreet { 27971c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 27981c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 279968a2054dSKent Overstreet int ret, ret2, ret3; 28001c6fdbd8SKent Overstreet 28011c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 280268a2054dSKent Overstreet ret2 = sync_inode_metadata(&inode->v, 1); 2803a8b3a677SKent Overstreet ret3 = bch2_flush_inode(c, inode); 28041c6fdbd8SKent Overstreet 28055c1ef830SKent Overstreet return bch2_err_class(ret ?: ret2 ?: ret3); 28061c6fdbd8SKent Overstreet } 28071c6fdbd8SKent Overstreet 28081c6fdbd8SKent Overstreet /* truncate: */ 28091c6fdbd8SKent Overstreet 28106fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 28111c6fdbd8SKent Overstreet struct bpos start, 28121c6fdbd8SKent Overstreet struct bpos end) 28131c6fdbd8SKent Overstreet { 2814424eb881SKent Overstreet struct btree_trans trans; 281567e0dd8fSKent Overstreet struct btree_iter iter; 28161c6fdbd8SKent Overstreet struct bkey_s_c k; 28171c6fdbd8SKent Overstreet int ret = 0; 28181c6fdbd8SKent Overstreet 281920bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 28206fed42bbSKent Overstreet retry: 28216fed42bbSKent Overstreet bch2_trans_begin(&trans); 28226fed42bbSKent Overstreet 28236fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); 28246fed42bbSKent Overstreet if (ret) 28256fed42bbSKent Overstreet goto err; 2826424eb881SKent Overstreet 2827c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 28284ad6aa46SBrian Foster if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { 28291c6fdbd8SKent Overstreet ret = 1; 28301c6fdbd8SKent Overstreet break; 28311c6fdbd8SKent Overstreet } 28326fed42bbSKent Overstreet start = iter.pos; 283367e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 28346fed42bbSKent Overstreet err: 2835549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 28366fed42bbSKent Overstreet goto retry; 28371c6fdbd8SKent Overstreet 28389a796fdbSKent Overstreet bch2_trans_exit(&trans); 28399a796fdbSKent Overstreet return ret; 28401c6fdbd8SKent Overstreet } 28411c6fdbd8SKent Overstreet 2842959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 28431c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 28441c6fdbd8SKent Overstreet { 28451c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 28461c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 28473342ac13SKent Overstreet struct bch_folio *s; 28481c6fdbd8SKent Overstreet unsigned start_offset = start & (PAGE_SIZE - 1); 28491c6fdbd8SKent Overstreet unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; 2850a99b1cafSKent Overstreet unsigned i; 285130bff594SKent Overstreet struct folio *folio; 2852b19d307dSKent Overstreet s64 i_sectors_delta = 0; 28531c6fdbd8SKent Overstreet int ret = 0; 28546b9857b2SBrian Foster u64 end_pos; 28551c6fdbd8SKent Overstreet 285630bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 2857b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) { 28581c6fdbd8SKent Overstreet /* 28591c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 286030bff594SKent Overstreet * folio 28611c6fdbd8SKent Overstreet */ 28626fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 2863c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 2864c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 28651c6fdbd8SKent Overstreet if (ret <= 0) 28661c6fdbd8SKent Overstreet return ret; 28671c6fdbd8SKent Overstreet 286830bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 286930bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 2870b6898917SKent Overstreet if (unlikely(IS_ERR_OR_NULL(folio))) { 28711c6fdbd8SKent Overstreet ret = -ENOMEM; 28721c6fdbd8SKent Overstreet goto out; 28731c6fdbd8SKent Overstreet } 28741c6fdbd8SKent Overstreet } 28751c6fdbd8SKent Overstreet 2876959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 2877959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 2878959f7368SKent Overstreet 2879959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 28806b9857b2SBrian Foster end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio); 2881959f7368SKent Overstreet 2882959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 2883959f7368SKent Overstreet if (start_offset == 0 && 2884959f7368SKent Overstreet end_offset == folio_size(folio)) { 2885959f7368SKent Overstreet ret = 0; 2886959f7368SKent Overstreet goto unlock; 2887959f7368SKent Overstreet } 2888959f7368SKent Overstreet 288930bff594SKent Overstreet s = bch2_folio_create(folio, 0); 2890a99b1cafSKent Overstreet if (!s) { 2891a99b1cafSKent Overstreet ret = -ENOMEM; 2892a99b1cafSKent Overstreet goto unlock; 2893a99b1cafSKent Overstreet } 2894a99b1cafSKent Overstreet 289530bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 289630bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 28971c6fdbd8SKent Overstreet if (ret) 28981c6fdbd8SKent Overstreet goto unlock; 28991c6fdbd8SKent Overstreet } 29001c6fdbd8SKent Overstreet 290134fdcf06SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 290234fdcf06SKent Overstreet if (ret) 290334fdcf06SKent Overstreet goto unlock; 2904c437e153SKent Overstreet 2905a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 2906a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 2907a99b1cafSKent Overstreet i++) { 2908a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 2909a1774a05SKent Overstreet 2910a1774a05SKent Overstreet i_sectors_delta -= s->s[i].state == SECTOR_dirty; 2911a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_unallocated); 2912a99b1cafSKent Overstreet } 2913a99b1cafSKent Overstreet 2914b19d307dSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 2915b19d307dSKent Overstreet 291674163da7SKent Overstreet /* 291730bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 291874163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 29194ad6aa46SBrian Foster * be responsible for the i_size update. 29204ad6aa46SBrian Foster * 29214ad6aa46SBrian Foster * Note that we shouldn't ever see a folio beyond EOF, but check and 29224ad6aa46SBrian Foster * warn if so. This has been observed by failure to clean up folios 29234ad6aa46SBrian Foster * after a short write and there's still a chance reclaim will fix 29244ad6aa46SBrian Foster * things up. 292574163da7SKent Overstreet */ 29264ad6aa46SBrian Foster WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size); 29274ad6aa46SBrian Foster end_pos = folio_end_pos(folio); 29284ad6aa46SBrian Foster if (inode->v.i_size > folio_pos(folio)) 29296b9857b2SBrian Foster end_pos = min_t(u64, inode->v.i_size, end_pos); 2930bf98ee10SBrian Foster ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty; 293174163da7SKent Overstreet 293230bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 2933a99b1cafSKent Overstreet 29341c6fdbd8SKent Overstreet /* 29351c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 29361c6fdbd8SKent Overstreet * 293730bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 29381c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 29391c6fdbd8SKent Overstreet */ 294030bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 29411c6fdbd8SKent Overstreet 29429ba2eb25SKent Overstreet /* 29439ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 29449ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 29459ba2eb25SKent Overstreet * redirty the full page: 29469ba2eb25SKent Overstreet */ 294730bff594SKent Overstreet folio_mkclean(folio); 294830bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 29491c6fdbd8SKent Overstreet unlock: 295030bff594SKent Overstreet folio_unlock(folio); 295130bff594SKent Overstreet folio_put(folio); 29521c6fdbd8SKent Overstreet out: 29531c6fdbd8SKent Overstreet return ret; 29541c6fdbd8SKent Overstreet } 29551c6fdbd8SKent Overstreet 2956959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 29571c6fdbd8SKent Overstreet { 2958959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 2959959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 29601c6fdbd8SKent Overstreet } 29611c6fdbd8SKent Overstreet 2962959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 296374163da7SKent Overstreet loff_t start, loff_t end) 296474163da7SKent Overstreet { 2965959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 296674163da7SKent Overstreet start, end); 296774163da7SKent Overstreet 296874163da7SKent Overstreet if (ret >= 0 && 296974163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 2970959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 2971959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 297274163da7SKent Overstreet start, end); 297374163da7SKent Overstreet return ret; 297474163da7SKent Overstreet } 297574163da7SKent Overstreet 297668a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 297768a507a2SKent Overstreet struct bch_inode_info *inode, 2978e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 2979e0541a93SKent Overstreet struct iattr *iattr) 29801c6fdbd8SKent Overstreet { 29811c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 29821c6fdbd8SKent Overstreet int ret; 29831c6fdbd8SKent Overstreet 2984e0541a93SKent Overstreet /* 2985e0541a93SKent Overstreet * sync appends: 29862925fc49SKent Overstreet * 29872925fc49SKent Overstreet * this has to be done _before_ extending i_size: 2988e0541a93SKent Overstreet */ 2989e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 29901c6fdbd8SKent Overstreet if (ret) 29911c6fdbd8SKent Overstreet return ret; 29921c6fdbd8SKent Overstreet 29931c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 29941c6fdbd8SKent Overstreet 299568a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 29961c6fdbd8SKent Overstreet } 29971c6fdbd8SKent Overstreet 299854e2264eSKent Overstreet static int bch2_truncate_finish_fn(struct bch_inode_info *inode, 299954e2264eSKent Overstreet struct bch_inode_unpacked *bi, 300054e2264eSKent Overstreet void *p) 300154e2264eSKent Overstreet { 300254e2264eSKent Overstreet bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 300354e2264eSKent Overstreet return 0; 300454e2264eSKent Overstreet } 300554e2264eSKent Overstreet 300654e2264eSKent Overstreet static int bch2_truncate_start_fn(struct bch_inode_info *inode, 300754e2264eSKent Overstreet struct bch_inode_unpacked *bi, void *p) 300854e2264eSKent Overstreet { 300954e2264eSKent Overstreet u64 *new_i_size = p; 301054e2264eSKent Overstreet 301154e2264eSKent Overstreet bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 301254e2264eSKent Overstreet bi->bi_size = *new_i_size; 301354e2264eSKent Overstreet return 0; 301454e2264eSKent Overstreet } 301554e2264eSKent Overstreet 301668a507a2SKent Overstreet int bch2_truncate(struct mnt_idmap *idmap, 301768a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 30181c6fdbd8SKent Overstreet { 30191c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 30201c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 3021e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 302254e2264eSKent Overstreet u64 new_i_size = iattr->ia_size; 30232e87eae1SKent Overstreet s64 i_sectors_delta = 0; 30241c6fdbd8SKent Overstreet int ret = 0; 30251c6fdbd8SKent Overstreet 302668a507a2SKent Overstreet /* 302778d66ab1SDan Robertson * If the truncate call with change the size of the file, the 302878d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 302978d66ab1SDan Robertson * do not need to update the cmtimes. 303068a507a2SKent Overstreet */ 303178d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 303268a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 303368a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 303468a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 303568a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 303668a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 303778d66ab1SDan Robertson } 303868a507a2SKent Overstreet 30391c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 3040a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 30411c6fdbd8SKent Overstreet 30426fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 3043e0541a93SKent Overstreet if (ret) 3044e0541a93SKent Overstreet goto err; 30451c6fdbd8SKent Overstreet 3046c45d473dSKent Overstreet /* 3047c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 3048c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 3049c45d473dSKent Overstreet * before the inode). 3050c45d473dSKent Overstreet */ 3051c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 3052c45d473dSKent Overstreet if (ret) 3053c45d473dSKent Overstreet goto err; 3054c45d473dSKent Overstreet 30558eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 30568eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 30578eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 30588eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 3059e0541a93SKent Overstreet 3060e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 306168a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 306254e2264eSKent Overstreet goto err; 30631c6fdbd8SKent Overstreet } 30641c6fdbd8SKent Overstreet 306568a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 306668a507a2SKent Overstreet 3067959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 306874163da7SKent Overstreet if (unlikely(ret < 0)) 306954e2264eSKent Overstreet goto err; 30701c6fdbd8SKent Overstreet 30716cc3535dSKent Overstreet /* 30726cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 30736cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 30746cc3535dSKent Overstreet * i_size first: 30756cc3535dSKent Overstreet * 30766cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 30776cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 30786cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 30796cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 30806cc3535dSKent Overstreet */ 3081e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 30821c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 3083e0541a93SKent Overstreet inode_u.bi_size, 30841c6fdbd8SKent Overstreet iattr->ia_size - 1); 30851c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 30861c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 30871c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 30881c6fdbd8SKent Overstreet iattr->ia_size - 1); 30891c6fdbd8SKent Overstreet if (ret) 309054e2264eSKent Overstreet goto err; 30911c6fdbd8SKent Overstreet 309254e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 309354e2264eSKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 309454e2264eSKent Overstreet &new_i_size, 0); 309554e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 30961c6fdbd8SKent Overstreet 30971c6fdbd8SKent Overstreet if (unlikely(ret)) 309854e2264eSKent Overstreet goto err; 30991c6fdbd8SKent Overstreet 31001c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 31011c6fdbd8SKent Overstreet 31028c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 3103a99b1cafSKent Overstreet round_up(iattr->ia_size, block_bytes(c)) >> 9, 310468a2054dSKent Overstreet U64_MAX, &i_sectors_delta); 31052e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 31062e87eae1SKent Overstreet 3107b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 3108b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 3109b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 3110b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 3111b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 31121c6fdbd8SKent Overstreet if (unlikely(ret)) 311354e2264eSKent Overstreet goto err; 31141c6fdbd8SKent Overstreet 311554e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 311668a507a2SKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 311754e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 311868a507a2SKent Overstreet 311968a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 312054e2264eSKent Overstreet err: 3121a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 31225c1ef830SKent Overstreet return bch2_err_class(ret); 31231c6fdbd8SKent Overstreet } 31241c6fdbd8SKent Overstreet 31251c6fdbd8SKent Overstreet /* fallocate: */ 31261c6fdbd8SKent Overstreet 3127050197b1SKent Overstreet static int inode_update_times_fn(struct bch_inode_info *inode, 3128050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 3129050197b1SKent Overstreet { 3130050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3131050197b1SKent Overstreet 3132050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 3133050197b1SKent Overstreet return 0; 3134050197b1SKent Overstreet } 3135050197b1SKent Overstreet 31362e87eae1SKent Overstreet static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 31371c6fdbd8SKent Overstreet { 31381c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 313974163da7SKent Overstreet u64 end = offset + len; 314074163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 314174163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 314274163da7SKent Overstreet bool truncated_last_page; 31431c6fdbd8SKent Overstreet int ret = 0; 31441c6fdbd8SKent Overstreet 3145959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 314674163da7SKent Overstreet if (unlikely(ret < 0)) 31471c6fdbd8SKent Overstreet goto err; 31481c6fdbd8SKent Overstreet 314974163da7SKent Overstreet truncated_last_page = ret; 31501c6fdbd8SKent Overstreet 315174163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 31521c6fdbd8SKent Overstreet 315374163da7SKent Overstreet if (block_start < block_end) { 31542e87eae1SKent Overstreet s64 i_sectors_delta = 0; 31552e87eae1SKent Overstreet 31568c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 315774163da7SKent Overstreet block_start >> 9, block_end >> 9, 31582e87eae1SKent Overstreet &i_sectors_delta); 31592e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 31602e87eae1SKent Overstreet } 3161050197b1SKent Overstreet 3162050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 316374163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 316474163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 316574163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 316674163da7SKent Overstreet } else { 3167050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 316874163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 316974163da7SKent Overstreet } 3170050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 31711c6fdbd8SKent Overstreet err: 31721c6fdbd8SKent Overstreet return ret; 31731c6fdbd8SKent Overstreet } 31741c6fdbd8SKent Overstreet 31752e87eae1SKent Overstreet static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 31765f786787SKent Overstreet loff_t offset, loff_t len, 31775f786787SKent Overstreet bool insert) 31781c6fdbd8SKent Overstreet { 31791c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 31801c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 318107a1006aSKent Overstreet struct bkey_buf copy; 3182d69f41d6SKent Overstreet struct btree_trans trans; 318367e0dd8fSKent Overstreet struct btree_iter src, dst, del; 31845f786787SKent Overstreet loff_t shift, new_size; 31855f786787SKent Overstreet u64 src_start; 318650dc0f69SKent Overstreet int ret = 0; 31871c6fdbd8SKent Overstreet 31881c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 31891c6fdbd8SKent Overstreet return -EINVAL; 31901c6fdbd8SKent Overstreet 31915f786787SKent Overstreet if (insert) { 31925f786787SKent Overstreet if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 319374163da7SKent Overstreet return -EFBIG; 31945f786787SKent Overstreet 31955f786787SKent Overstreet if (offset >= inode->v.i_size) 319674163da7SKent Overstreet return -EINVAL; 31975f786787SKent Overstreet 31985f786787SKent Overstreet src_start = U64_MAX; 31995f786787SKent Overstreet shift = len; 32005f786787SKent Overstreet } else { 32011c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 320274163da7SKent Overstreet return -EINVAL; 32031c6fdbd8SKent Overstreet 32045f786787SKent Overstreet src_start = offset + len; 32055f786787SKent Overstreet shift = -len; 32065f786787SKent Overstreet } 32071c6fdbd8SKent Overstreet 32085f786787SKent Overstreet new_size = inode->v.i_size + shift; 32091c6fdbd8SKent Overstreet 32101c6fdbd8SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 32111c6fdbd8SKent Overstreet if (ret) 321274163da7SKent Overstreet return ret; 32131c6fdbd8SKent Overstreet 32145f786787SKent Overstreet if (insert) { 32155f786787SKent Overstreet i_size_write(&inode->v, new_size); 32165f786787SKent Overstreet mutex_lock(&inode->ei_update_lock); 32175f786787SKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 32185f786787SKent Overstreet ATTR_MTIME|ATTR_CTIME); 32195f786787SKent Overstreet mutex_unlock(&inode->ei_update_lock); 32205f786787SKent Overstreet } else { 32212e87eae1SKent Overstreet s64 i_sectors_delta = 0; 32222e87eae1SKent Overstreet 32238c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 32242e87eae1SKent Overstreet offset >> 9, (offset + len) >> 9, 32252e87eae1SKent Overstreet &i_sectors_delta); 32262e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 32272e87eae1SKent Overstreet 322863095894SKent Overstreet if (ret) 322974163da7SKent Overstreet return ret; 32305f786787SKent Overstreet } 32318ef231bdSKent Overstreet 323250dc0f69SKent Overstreet bch2_bkey_buf_init(©); 3233f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 323467e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 32355f786787SKent Overstreet POS(inode->v.i_ino, src_start >> 9), 323663095894SKent Overstreet BTREE_ITER_INTENT); 323767e0dd8fSKent Overstreet bch2_trans_copy_iter(&dst, &src); 323867e0dd8fSKent Overstreet bch2_trans_copy_iter(&del, &src); 32395f786787SKent Overstreet 3240549d173cSKent Overstreet while (ret == 0 || 3241549d173cSKent Overstreet bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 324263095894SKent Overstreet struct disk_reservation disk_res = 324363095894SKent Overstreet bch2_disk_reservation_init(c, 0); 324463095894SKent Overstreet struct bkey_i delete; 324563095894SKent Overstreet struct bkey_s_c k; 324663095894SKent Overstreet struct bpos next_pos; 32475f786787SKent Overstreet struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 32485f786787SKent Overstreet struct bpos atomic_end; 32492d594dfbSKent Overstreet unsigned trigger_flags = 0; 32506fed42bbSKent Overstreet u32 snapshot; 32516fed42bbSKent Overstreet 32526fed42bbSKent Overstreet bch2_trans_begin(&trans); 32536fed42bbSKent Overstreet 32546fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 32556fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 32566fed42bbSKent Overstreet if (ret) 32576fed42bbSKent Overstreet continue; 32586fed42bbSKent Overstreet 32596fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&src, snapshot); 32606fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&dst, snapshot); 32616fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&del, snapshot); 326263095894SKent Overstreet 3263700c25b3SKent Overstreet bch2_trans_begin(&trans); 3264700c25b3SKent Overstreet 32655f786787SKent Overstreet k = insert 326667e0dd8fSKent Overstreet ? bch2_btree_iter_peek_prev(&src) 3267c72f687aSKent Overstreet : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 326863095894SKent Overstreet if ((ret = bkey_err(k))) 326950dc0f69SKent Overstreet continue; 327063095894SKent Overstreet 327163095894SKent Overstreet if (!k.k || k.k->p.inode != inode->v.i_ino) 327263095894SKent Overstreet break; 327363095894SKent Overstreet 32745f786787SKent Overstreet if (insert && 3275e88a75ebSKent Overstreet bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 32765f786787SKent Overstreet break; 32775f786787SKent Overstreet reassemble: 327807a1006aSKent Overstreet bch2_bkey_buf_reassemble(©, c, k); 32795f786787SKent Overstreet 32805f786787SKent Overstreet if (insert && 3281e88a75ebSKent Overstreet bkey_lt(bkey_start_pos(k.k), move_pos)) 328235189e09SKent Overstreet bch2_cut_front(move_pos, copy.k); 32835f786787SKent Overstreet 328435189e09SKent Overstreet copy.k->k.p.offset += shift >> 9; 328567e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); 32861c6fdbd8SKent Overstreet 328767e0dd8fSKent Overstreet ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 32883c7f3b7aSKent Overstreet if (ret) 328950dc0f69SKent Overstreet continue; 3290e2d9912cSKent Overstreet 3291e88a75ebSKent Overstreet if (!bkey_eq(atomic_end, copy.k->k.p)) { 32925f786787SKent Overstreet if (insert) { 32935f786787SKent Overstreet move_pos = atomic_end; 32945f786787SKent Overstreet move_pos.offset -= shift >> 9; 32955f786787SKent Overstreet goto reassemble; 32965f786787SKent Overstreet } else { 3297085ab693SKent Overstreet bch2_cut_back(atomic_end, copy.k); 32985f786787SKent Overstreet } 32995f786787SKent Overstreet } 33005f786787SKent Overstreet 330163095894SKent Overstreet bkey_init(&delete.k); 3302283eda57SKent Overstreet delete.k.p = copy.k->k.p; 3303283eda57SKent Overstreet delete.k.size = copy.k->k.size; 3304283eda57SKent Overstreet delete.k.p.offset -= shift >> 9; 330567e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 33061c6fdbd8SKent Overstreet 33075f786787SKent Overstreet next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 330863095894SKent Overstreet 33097c4ca54aSKent Overstreet if (copy.k->k.size != k.k->size) { 331063095894SKent Overstreet /* We might end up splitting compressed extents: */ 331163095894SKent Overstreet unsigned nr_ptrs = 33124de77495SKent Overstreet bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 331363095894SKent Overstreet 331463095894SKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 331535189e09SKent Overstreet copy.k->k.size, nr_ptrs, 33161c6fdbd8SKent Overstreet BCH_DISK_RESERVATION_NOFAIL); 33171c6fdbd8SKent Overstreet BUG_ON(ret); 331863095894SKent Overstreet } 33191c6fdbd8SKent Overstreet 332067e0dd8fSKent Overstreet ret = bch2_btree_iter_traverse(&del) ?: 332167e0dd8fSKent Overstreet bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 332267e0dd8fSKent Overstreet bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 332368a2054dSKent Overstreet bch2_trans_commit(&trans, &disk_res, NULL, 33242d594dfbSKent Overstreet BTREE_INSERT_NOFAIL); 33251c6fdbd8SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 332650dc0f69SKent Overstreet 332763095894SKent Overstreet if (!ret) 332867e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&src, next_pos); 332950dc0f69SKent Overstreet } 333067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &del); 333167e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &dst); 333267e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &src); 333350dc0f69SKent Overstreet bch2_trans_exit(&trans); 333450dc0f69SKent Overstreet bch2_bkey_buf_exit(©, c); 333563095894SKent Overstreet 33368ef231bdSKent Overstreet if (ret) 333774163da7SKent Overstreet return ret; 33381c6fdbd8SKent Overstreet 333974163da7SKent Overstreet mutex_lock(&inode->ei_update_lock); 33405f786787SKent Overstreet if (!insert) { 33418ef231bdSKent Overstreet i_size_write(&inode->v, new_size); 33428ef231bdSKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 33438ef231bdSKent Overstreet ATTR_MTIME|ATTR_CTIME); 334474163da7SKent Overstreet } else { 334574163da7SKent Overstreet /* We need an inode update to update bi_journal_seq for fsync: */ 334674163da7SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 334774163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 33485f786787SKent Overstreet } 334974163da7SKent Overstreet mutex_unlock(&inode->ei_update_lock); 33501c6fdbd8SKent Overstreet return ret; 33511c6fdbd8SKent Overstreet } 33521c6fdbd8SKent Overstreet 3353694015c2SKent Overstreet static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 3354694015c2SKent Overstreet u64 start_sector, u64 end_sector) 33551c6fdbd8SKent Overstreet { 33561c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3357190fa7afSKent Overstreet struct btree_trans trans; 335867e0dd8fSKent Overstreet struct btree_iter iter; 3359694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 336001ad6737SKent Overstreet struct bch_io_opts opts; 3361694015c2SKent Overstreet int ret = 0; 33621c6fdbd8SKent Overstreet 336301ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 3364f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); 33651c6fdbd8SKent Overstreet 336667e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3367694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 3368190fa7afSKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 33691c6fdbd8SKent Overstreet 3370e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 33712e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3372190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 33731c6fdbd8SKent Overstreet struct bkey_s_c k; 3374694015c2SKent Overstreet unsigned sectors; 3375a09818c7SKent Overstreet bool is_allocation; 3376a09818c7SKent Overstreet u64 hole_start, hole_end; 33776fed42bbSKent Overstreet u32 snapshot; 33781c6fdbd8SKent Overstreet 3379163e885aSKent Overstreet bch2_trans_begin(&trans); 3380a8abd3a7SKent Overstreet 33816fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 33826fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 33836fed42bbSKent Overstreet if (ret) 33846fed42bbSKent Overstreet goto bkey_err; 33856fed42bbSKent Overstreet 33866fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 33876fed42bbSKent Overstreet 338867e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 33890f238367SKent Overstreet if ((ret = bkey_err(k))) 33900f238367SKent Overstreet goto bkey_err; 33911c6fdbd8SKent Overstreet 3392a09818c7SKent Overstreet hole_start = iter.pos.offset; 3393a09818c7SKent Overstreet hole_end = bpos_min(k.k->p, end_pos).offset; 3394a09818c7SKent Overstreet is_allocation = bkey_extent_is_allocation(k.k); 3395a09818c7SKent Overstreet 33961c6fdbd8SKent Overstreet /* already reserved */ 339779203111SKent Overstreet if (bkey_extent_is_reservation(k) && 339879203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 339967e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 34001c6fdbd8SKent Overstreet continue; 34011c6fdbd8SKent Overstreet } 34021c6fdbd8SKent Overstreet 3403190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 3404190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 340567e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 34061c6fdbd8SKent Overstreet continue; 34071c6fdbd8SKent Overstreet } 34081c6fdbd8SKent Overstreet 3409a09818c7SKent Overstreet if (!(mode & FALLOC_FL_ZERO_RANGE)) { 3410*4198bf03SKent Overstreet /* 3411*4198bf03SKent Overstreet * Lock ordering - can't be holding btree locks while 3412*4198bf03SKent Overstreet * blocking on a folio lock: 3413*4198bf03SKent Overstreet */ 3414*4198bf03SKent Overstreet if (bch2_clamp_data_hole(&inode->v, 3415*4198bf03SKent Overstreet &hole_start, 3416*4198bf03SKent Overstreet &hole_end, 3417*4198bf03SKent Overstreet opts.data_replicas, true)) 3418a09818c7SKent Overstreet ret = drop_locks_do(&trans, 3419a09818c7SKent Overstreet (bch2_clamp_data_hole(&inode->v, 3420a09818c7SKent Overstreet &hole_start, 3421a09818c7SKent Overstreet &hole_end, 3422*4198bf03SKent Overstreet opts.data_replicas, false), 0)); 3423a09818c7SKent Overstreet bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); 3424a8b3a677SKent Overstreet 3425a09818c7SKent Overstreet if (ret) 3426a09818c7SKent Overstreet goto bkey_err; 34271c6fdbd8SKent Overstreet 3428a09818c7SKent Overstreet if (hole_start == hole_end) 3429a09818c7SKent Overstreet continue; 3430a09818c7SKent Overstreet } 3431a09818c7SKent Overstreet 3432a09818c7SKent Overstreet sectors = hole_end - hole_start; 3433a09818c7SKent Overstreet 3434a09818c7SKent Overstreet if (!is_allocation) { 34351c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 3436a09818c7SKent Overstreet "a_res, sectors, true); 34371c6fdbd8SKent Overstreet if (unlikely(ret)) 34380f238367SKent Overstreet goto bkey_err; 34391c6fdbd8SKent Overstreet } 34401c6fdbd8SKent Overstreet 344170de7a47SKent Overstreet ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, 344270de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 344370de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 34448810386fSKent Overstreet if (ret) 34458810386fSKent Overstreet goto bkey_err; 344670de7a47SKent Overstreet 34472e87eae1SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 3448a09818c7SKent Overstreet 3449a09818c7SKent Overstreet drop_locks_do(&trans, 3450a09818c7SKent Overstreet (mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); 34510f238367SKent Overstreet bkey_err: 3452190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 3453549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 34541c6fdbd8SKent Overstreet ret = 0; 345550dc0f69SKent Overstreet } 345674163da7SKent Overstreet 3457098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 345874163da7SKent Overstreet struct quota_res quota_res = { 0 }; 345974163da7SKent Overstreet s64 i_sectors_delta = 0; 346074163da7SKent Overstreet 346174163da7SKent Overstreet bch2_fpunch_at(&trans, &iter, inode_inum(inode), 346274163da7SKent Overstreet end_sector, &i_sectors_delta); 346374163da7SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 346474163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 346574163da7SKent Overstreet } 346674163da7SKent Overstreet 346767e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3468694015c2SKent Overstreet bch2_trans_exit(&trans); 3469694015c2SKent Overstreet return ret; 3470694015c2SKent Overstreet } 347150dc0f69SKent Overstreet 3472694015c2SKent Overstreet static long bchfs_fallocate(struct bch_inode_info *inode, int mode, 3473694015c2SKent Overstreet loff_t offset, loff_t len) 3474694015c2SKent Overstreet { 3475694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 347674163da7SKent Overstreet u64 end = offset + len; 347774163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 347874163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 347974163da7SKent Overstreet bool truncated_last_page = false; 348074163da7SKent Overstreet int ret, ret2 = 0; 3481694015c2SKent Overstreet 3482694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 3483694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 3484694015c2SKent Overstreet if (ret) 348574163da7SKent Overstreet return ret; 3486694015c2SKent Overstreet } 3487694015c2SKent Overstreet 3488694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 3489959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 349074163da7SKent Overstreet if (unlikely(ret < 0)) 349174163da7SKent Overstreet return ret; 3492694015c2SKent Overstreet 349374163da7SKent Overstreet truncated_last_page = ret; 3494694015c2SKent Overstreet 3495694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 349674163da7SKent Overstreet 349774163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 349874163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 3499694015c2SKent Overstreet } 3500694015c2SKent Overstreet 3501694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 3502e0541a93SKent Overstreet 3503e0541a93SKent Overstreet /* 350474163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 350574163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 3506e0541a93SKent Overstreet */ 350774163da7SKent Overstreet if (ret && 3508098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 350974163da7SKent Overstreet return ret; 35101c6fdbd8SKent Overstreet 351174163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 3512e0541a93SKent Overstreet end = inode->v.i_size; 351374163da7SKent Overstreet 351474163da7SKent Overstreet if (end >= inode->v.i_size && 351574163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 351674163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 351774163da7SKent Overstreet spin_lock(&inode->v.i_lock); 3518e0541a93SKent Overstreet i_size_write(&inode->v, end); 351974163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 3520e0541a93SKent Overstreet 35211c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 352274163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 35231c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 35241c6fdbd8SKent Overstreet } 352574163da7SKent Overstreet 352674163da7SKent Overstreet return ret ?: ret2; 35271c6fdbd8SKent Overstreet } 35281c6fdbd8SKent Overstreet 35291c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 35301c6fdbd8SKent Overstreet loff_t offset, loff_t len) 35311c6fdbd8SKent Overstreet { 35321c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 35332a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 35342a9101a9SKent Overstreet long ret; 35352a9101a9SKent Overstreet 3536d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 35372a9101a9SKent Overstreet return -EROFS; 35381c6fdbd8SKent Overstreet 353974163da7SKent Overstreet inode_lock(&inode->v); 354074163da7SKent Overstreet inode_dio_wait(&inode->v); 3541a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 354274163da7SKent Overstreet 354307bfcc0bSKent Overstreet ret = file_modified(file); 354407bfcc0bSKent Overstreet if (ret) 354507bfcc0bSKent Overstreet goto err; 354607bfcc0bSKent Overstreet 35471c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 35482a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 35492a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 35502a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 35512a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 35522a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 35532a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 35542a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 35552a9101a9SKent Overstreet else 35562a9101a9SKent Overstreet ret = -EOPNOTSUPP; 355707bfcc0bSKent Overstreet err: 3558a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 355974163da7SKent Overstreet inode_unlock(&inode->v); 3560d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 35611c6fdbd8SKent Overstreet 35625c1ef830SKent Overstreet return bch2_err_class(ret); 35631c6fdbd8SKent Overstreet } 35641c6fdbd8SKent Overstreet 3565c72f687aSKent Overstreet /* 3566c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 3567c72f687aSKent Overstreet * Does not check pagecache 3568c72f687aSKent Overstreet */ 3569e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 3570e8540e56SKent Overstreet struct quota_res *res, 3571e8540e56SKent Overstreet u64 start, u64 end) 3572e8540e56SKent Overstreet { 3573e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3574e8540e56SKent Overstreet struct btree_trans trans; 3575e8540e56SKent Overstreet struct btree_iter iter; 3576e8540e56SKent Overstreet struct bkey_s_c k; 3577e8540e56SKent Overstreet u32 snapshot; 3578e8540e56SKent Overstreet u64 sectors = end - start; 3579e8540e56SKent Overstreet u64 pos = start; 3580e8540e56SKent Overstreet int ret; 3581e8540e56SKent Overstreet 3582e8540e56SKent Overstreet bch2_trans_init(&trans, c, 0, 0); 3583e8540e56SKent Overstreet retry: 3584e8540e56SKent Overstreet bch2_trans_begin(&trans); 3585e8540e56SKent Overstreet 3586e8540e56SKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); 3587e8540e56SKent Overstreet if (ret) 3588e8540e56SKent Overstreet goto err; 3589e8540e56SKent Overstreet 3590e8540e56SKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3591e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 3592e8540e56SKent Overstreet 3593e8540e56SKent Overstreet while (!(ret = btree_trans_too_many_iters(&trans)) && 3594e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 3595e8540e56SKent Overstreet !(ret = bkey_err(k))) { 3596e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 3597e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 3598e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 3599e8540e56SKent Overstreet BUG_ON(s > sectors); 3600e8540e56SKent Overstreet sectors -= s; 3601e8540e56SKent Overstreet } 3602e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 3603e8540e56SKent Overstreet } 3604e8540e56SKent Overstreet pos = iter.pos.offset; 3605e8540e56SKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3606e8540e56SKent Overstreet err: 3607e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 3608e8540e56SKent Overstreet goto retry; 3609e8540e56SKent Overstreet 3610e8540e56SKent Overstreet bch2_trans_exit(&trans); 3611e8540e56SKent Overstreet 3612e8540e56SKent Overstreet if (ret) 3613e8540e56SKent Overstreet return ret; 3614e8540e56SKent Overstreet 3615e8540e56SKent Overstreet return bch2_quota_reservation_add(c, inode, res, sectors, true); 3616e8540e56SKent Overstreet } 3617e8540e56SKent Overstreet 361876426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 361976426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 362076426098SKent Overstreet loff_t len, unsigned remap_flags) 362176426098SKent Overstreet { 362276426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 362376426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 362476426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 3625e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 36262e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3627677fc056SKent Overstreet u64 aligned_len; 362876426098SKent Overstreet loff_t ret = 0; 362976426098SKent Overstreet 363076426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 363176426098SKent Overstreet return -EINVAL; 363276426098SKent Overstreet 363376426098SKent Overstreet if (remap_flags & REMAP_FILE_DEDUP) 363476426098SKent Overstreet return -EOPNOTSUPP; 363576426098SKent Overstreet 363676426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 363776426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 363876426098SKent Overstreet return -EINVAL; 363976426098SKent Overstreet 364076426098SKent Overstreet if (src == dst && 364176426098SKent Overstreet abs(pos_src - pos_dst) < len) 364276426098SKent Overstreet return -EINVAL; 364376426098SKent Overstreet 364476426098SKent Overstreet bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 364576426098SKent Overstreet 364676426098SKent Overstreet inode_dio_wait(&src->v); 364776426098SKent Overstreet inode_dio_wait(&dst->v); 364876426098SKent Overstreet 364976426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 365076426098SKent Overstreet file_dst, pos_dst, 365176426098SKent Overstreet &len, remap_flags); 365276426098SKent Overstreet if (ret < 0 || len == 0) 36532e87eae1SKent Overstreet goto err; 365476426098SKent Overstreet 3655677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 365676426098SKent Overstreet 365776426098SKent Overstreet ret = write_invalidate_inode_pages_range(dst->v.i_mapping, 3658677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 365976426098SKent Overstreet if (ret) 36602e87eae1SKent Overstreet goto err; 366176426098SKent Overstreet 3662e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 3663e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 3664e8540e56SKent Overstreet if (ret) 3665e8540e56SKent Overstreet goto err; 3666e8540e56SKent Overstreet 3667e8540e56SKent Overstreet file_update_time(file_dst); 3668e8540e56SKent Overstreet 3669dcfc593fSKent Overstreet mark_pagecache_unallocated(src, pos_src >> 9, 3670dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 367176426098SKent Overstreet 36722e87eae1SKent Overstreet ret = bch2_remap_range(c, 36736fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 36746fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 367576426098SKent Overstreet aligned_len >> 9, 36762e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 36772e87eae1SKent Overstreet if (ret < 0) 36782e87eae1SKent Overstreet goto err; 367976426098SKent Overstreet 36802e87eae1SKent Overstreet /* 36812e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 36822e87eae1SKent Overstreet */ 3683677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 36842e87eae1SKent Overstreet 3685e8540e56SKent Overstreet i_sectors_acct(c, dst, "a_res, i_sectors_delta); 36862e87eae1SKent Overstreet 36872e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 3688677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 3689677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 36902e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 3691e7084c9cSKent Overstreet 369268a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 369368a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 3694a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 36952e87eae1SKent Overstreet err: 3696e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 369776426098SKent Overstreet bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 369876426098SKent Overstreet 36995c1ef830SKent Overstreet return bch2_err_class(ret); 370076426098SKent Overstreet } 370176426098SKent Overstreet 37021c6fdbd8SKent Overstreet /* fseek: */ 37031c6fdbd8SKent Overstreet 3704a09818c7SKent Overstreet static int folio_data_offset(struct folio *folio, loff_t pos, 3705a09818c7SKent Overstreet unsigned min_replicas) 37061c6fdbd8SKent Overstreet { 370730bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 3708a86a92cbSKent Overstreet unsigned i, sectors = folio_sectors(folio); 3709f81b648dSKent Overstreet 3710543ef2ebSKent Overstreet if (s) 3711bf98ee10SBrian Foster for (i = folio_pos_to_s(folio, pos); i < sectors; i++) 3712a09818c7SKent Overstreet if (s->s[i].state >= SECTOR_dirty && 3713a09818c7SKent Overstreet s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) 3714bf98ee10SBrian Foster return i << SECTOR_SHIFT; 3715f57a6a5dSKent Overstreet 3716543ef2ebSKent Overstreet return -1; 37171c6fdbd8SKent Overstreet } 37181c6fdbd8SKent Overstreet 3719543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_data(struct inode *vinode, 37201c6fdbd8SKent Overstreet loff_t start_offset, 3721a09818c7SKent Overstreet loff_t end_offset, 3722*4198bf03SKent Overstreet unsigned min_replicas, 3723*4198bf03SKent Overstreet bool nonblock) 37241c6fdbd8SKent Overstreet { 37251c6fdbd8SKent Overstreet struct folio_batch fbatch; 37261c6fdbd8SKent Overstreet pgoff_t start_index = start_offset >> PAGE_SHIFT; 37271c6fdbd8SKent Overstreet pgoff_t end_index = end_offset >> PAGE_SHIFT; 37281c6fdbd8SKent Overstreet pgoff_t index = start_index; 37291c6fdbd8SKent Overstreet unsigned i; 3730543ef2ebSKent Overstreet loff_t ret; 3731543ef2ebSKent Overstreet int offset; 37321c6fdbd8SKent Overstreet 37331c6fdbd8SKent Overstreet folio_batch_init(&fbatch); 37341c6fdbd8SKent Overstreet 37351c6fdbd8SKent Overstreet while (filemap_get_folios(vinode->i_mapping, 37361c6fdbd8SKent Overstreet &index, end_index, &fbatch)) { 37371c6fdbd8SKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 37381c6fdbd8SKent Overstreet struct folio *folio = fbatch.folios[i]; 37391c6fdbd8SKent Overstreet 3740*4198bf03SKent Overstreet if (!nonblock) { 37411c6fdbd8SKent Overstreet folio_lock(folio); 3742*4198bf03SKent Overstreet } else if (!folio_trylock(folio)) { 3743*4198bf03SKent Overstreet folio_batch_release(&fbatch); 3744*4198bf03SKent Overstreet return -EAGAIN; 3745*4198bf03SKent Overstreet } 3746*4198bf03SKent Overstreet 3747543ef2ebSKent Overstreet offset = folio_data_offset(folio, 3748a09818c7SKent Overstreet max(folio_pos(folio), start_offset), 3749a09818c7SKent Overstreet min_replicas); 3750543ef2ebSKent Overstreet if (offset >= 0) { 3751a86a92cbSKent Overstreet ret = clamp(folio_pos(folio) + offset, 3752543ef2ebSKent Overstreet start_offset, end_offset); 37531c6fdbd8SKent Overstreet folio_unlock(folio); 37541c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 3755543ef2ebSKent Overstreet return ret; 37561c6fdbd8SKent Overstreet } 37571c6fdbd8SKent Overstreet folio_unlock(folio); 37581c6fdbd8SKent Overstreet } 37591c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 37601c6fdbd8SKent Overstreet cond_resched(); 37611c6fdbd8SKent Overstreet } 37621c6fdbd8SKent Overstreet 37631c6fdbd8SKent Overstreet return end_offset; 37641c6fdbd8SKent Overstreet } 37651c6fdbd8SKent Overstreet 37661c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 37671c6fdbd8SKent Overstreet { 37681c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 37691c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3770424eb881SKent Overstreet struct btree_trans trans; 377167e0dd8fSKent Overstreet struct btree_iter iter; 37721c6fdbd8SKent Overstreet struct bkey_s_c k; 37736fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 37741c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 37756fed42bbSKent Overstreet u32 snapshot; 37761c6fdbd8SKent Overstreet int ret; 37771c6fdbd8SKent Overstreet 37781c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 37791c6fdbd8SKent Overstreet if (offset >= isize) 37801c6fdbd8SKent Overstreet return -ENXIO; 37811c6fdbd8SKent Overstreet 378220bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 37836fed42bbSKent Overstreet retry: 37846fed42bbSKent Overstreet bch2_trans_begin(&trans); 37856fed42bbSKent Overstreet 37866fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 37876fed42bbSKent Overstreet if (ret) 37886fed42bbSKent Overstreet goto err; 3789424eb881SKent Overstreet 3790c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, 3791c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 3792c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 3793c72f687aSKent Overstreet 0, k, ret) { 3794c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 37951c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 37961c6fdbd8SKent Overstreet break; 37971c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 37981c6fdbd8SKent Overstreet break; 37991c6fdbd8SKent Overstreet } 380067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 38016fed42bbSKent Overstreet err: 3802549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 38036fed42bbSKent Overstreet goto retry; 38041c6fdbd8SKent Overstreet 38059a796fdbSKent Overstreet bch2_trans_exit(&trans); 38061c6fdbd8SKent Overstreet if (ret) 38071c6fdbd8SKent Overstreet return ret; 38081c6fdbd8SKent Overstreet 38091c6fdbd8SKent Overstreet if (next_data > offset) 3810543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 3811*4198bf03SKent Overstreet offset, next_data, 0, false); 38121c6fdbd8SKent Overstreet 3813e10d3094SKent Overstreet if (next_data >= isize) 38141c6fdbd8SKent Overstreet return -ENXIO; 38151c6fdbd8SKent Overstreet 38161c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 38171c6fdbd8SKent Overstreet } 38181c6fdbd8SKent Overstreet 3819*4198bf03SKent Overstreet static int folio_hole_offset(struct address_space *mapping, loff_t *offset, 3820*4198bf03SKent Overstreet unsigned min_replicas, bool nonblock) 38211c6fdbd8SKent Overstreet { 3822e8d28c3eSKent Overstreet struct folio *folio; 3823e8d28c3eSKent Overstreet struct bch_folio *s; 3824bf98ee10SBrian Foster unsigned i, sectors; 3825e8d28c3eSKent Overstreet bool ret = true; 3826543ef2ebSKent Overstreet 3827*4198bf03SKent Overstreet folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT, 3828*4198bf03SKent Overstreet FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0); 3829*4198bf03SKent Overstreet if (folio == ERR_PTR(-EAGAIN)) 3830*4198bf03SKent Overstreet return -EAGAIN; 3831b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) 3832e8d28c3eSKent Overstreet return true; 3833e8d28c3eSKent Overstreet 3834e8d28c3eSKent Overstreet s = bch2_folio(folio); 3835543ef2ebSKent Overstreet if (!s) 3836e8d28c3eSKent Overstreet goto unlock; 3837543ef2ebSKent Overstreet 3838e8d28c3eSKent Overstreet sectors = folio_sectors(folio); 3839bf98ee10SBrian Foster for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) 3840a09818c7SKent Overstreet if (s->s[i].state < SECTOR_dirty || 3841a09818c7SKent Overstreet s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { 3842bf98ee10SBrian Foster *offset = max(*offset, 3843bf98ee10SBrian Foster folio_pos(folio) + (i << SECTOR_SHIFT)); 3844e8d28c3eSKent Overstreet goto unlock; 3845543ef2ebSKent Overstreet } 3846543ef2ebSKent Overstreet 3847e8d28c3eSKent Overstreet *offset = folio_end_pos(folio); 3848e8d28c3eSKent Overstreet ret = false; 3849e8d28c3eSKent Overstreet unlock: 385030bff594SKent Overstreet folio_unlock(folio); 38510a6d6945SKent Overstreet folio_put(folio); 38521c6fdbd8SKent Overstreet return ret; 38531c6fdbd8SKent Overstreet } 38541c6fdbd8SKent Overstreet 3855543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_hole(struct inode *vinode, 38561c6fdbd8SKent Overstreet loff_t start_offset, 3857a09818c7SKent Overstreet loff_t end_offset, 3858*4198bf03SKent Overstreet unsigned min_replicas, 3859*4198bf03SKent Overstreet bool nonblock) 38601c6fdbd8SKent Overstreet { 38611c6fdbd8SKent Overstreet struct address_space *mapping = vinode->i_mapping; 3862e8d28c3eSKent Overstreet loff_t offset = start_offset; 38631c6fdbd8SKent Overstreet 3864e8d28c3eSKent Overstreet while (offset < end_offset && 3865*4198bf03SKent Overstreet !folio_hole_offset(mapping, &offset, min_replicas, nonblock)) 3866e8d28c3eSKent Overstreet ; 3867543ef2ebSKent Overstreet 3868e8d28c3eSKent Overstreet return min(offset, end_offset); 38691c6fdbd8SKent Overstreet } 38701c6fdbd8SKent Overstreet 3871*4198bf03SKent Overstreet static int bch2_clamp_data_hole(struct inode *inode, 3872a09818c7SKent Overstreet u64 *hole_start, 3873a09818c7SKent Overstreet u64 *hole_end, 3874*4198bf03SKent Overstreet unsigned min_replicas, 3875*4198bf03SKent Overstreet bool nonblock) 3876a09818c7SKent Overstreet { 3877*4198bf03SKent Overstreet loff_t ret; 3878*4198bf03SKent Overstreet 3879*4198bf03SKent Overstreet ret = bch2_seek_pagecache_hole(inode, 3880*4198bf03SKent Overstreet *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; 3881*4198bf03SKent Overstreet if (ret < 0) 3882*4198bf03SKent Overstreet return ret; 3883*4198bf03SKent Overstreet 3884*4198bf03SKent Overstreet *hole_start = ret; 3885a09818c7SKent Overstreet 3886a09818c7SKent Overstreet if (*hole_start == *hole_end) 3887*4198bf03SKent Overstreet return 0; 3888a09818c7SKent Overstreet 3889*4198bf03SKent Overstreet ret = bch2_seek_pagecache_data(inode, 3890*4198bf03SKent Overstreet *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; 3891*4198bf03SKent Overstreet if (ret < 0) 3892*4198bf03SKent Overstreet return ret; 3893*4198bf03SKent Overstreet 3894*4198bf03SKent Overstreet *hole_end = ret; 3895*4198bf03SKent Overstreet return 0; 3896a09818c7SKent Overstreet } 3897a09818c7SKent Overstreet 38981c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 38991c6fdbd8SKent Overstreet { 39001c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 39011c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3902424eb881SKent Overstreet struct btree_trans trans; 390367e0dd8fSKent Overstreet struct btree_iter iter; 39041c6fdbd8SKent Overstreet struct bkey_s_c k; 39056fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 39061c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 39076fed42bbSKent Overstreet u32 snapshot; 39081c6fdbd8SKent Overstreet int ret; 39091c6fdbd8SKent Overstreet 39101c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 39111c6fdbd8SKent Overstreet if (offset >= isize) 39121c6fdbd8SKent Overstreet return -ENXIO; 39131c6fdbd8SKent Overstreet 391420bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 39156fed42bbSKent Overstreet retry: 39166fed42bbSKent Overstreet bch2_trans_begin(&trans); 39176fed42bbSKent Overstreet 39186fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 39196fed42bbSKent Overstreet if (ret) 39206fed42bbSKent Overstreet goto err; 3921424eb881SKent Overstreet 3922e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 39236fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 392494f651e2SKent Overstreet BTREE_ITER_SLOTS, k, ret) { 39251c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 3926543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 3927*4198bf03SKent Overstreet offset, MAX_LFS_FILESIZE, 0, false); 39281c6fdbd8SKent Overstreet break; 39291c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 3930543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 39311c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 3932*4198bf03SKent Overstreet k.k->p.offset << 9, 0, false); 39331c6fdbd8SKent Overstreet 39341c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 39351c6fdbd8SKent Overstreet break; 39361c6fdbd8SKent Overstreet } else { 39371c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 39381c6fdbd8SKent Overstreet } 39391c6fdbd8SKent Overstreet } 394067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 39416fed42bbSKent Overstreet err: 3942549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 39436fed42bbSKent Overstreet goto retry; 39441c6fdbd8SKent Overstreet 39459a796fdbSKent Overstreet bch2_trans_exit(&trans); 39461c6fdbd8SKent Overstreet if (ret) 39471c6fdbd8SKent Overstreet return ret; 39481c6fdbd8SKent Overstreet 39491c6fdbd8SKent Overstreet if (next_hole > isize) 39501c6fdbd8SKent Overstreet next_hole = isize; 39511c6fdbd8SKent Overstreet 39521c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 39531c6fdbd8SKent Overstreet } 39541c6fdbd8SKent Overstreet 39551c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 39561c6fdbd8SKent Overstreet { 39575c1ef830SKent Overstreet loff_t ret; 39585c1ef830SKent Overstreet 39591c6fdbd8SKent Overstreet switch (whence) { 39601c6fdbd8SKent Overstreet case SEEK_SET: 39611c6fdbd8SKent Overstreet case SEEK_CUR: 39621c6fdbd8SKent Overstreet case SEEK_END: 39635c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 39645c1ef830SKent Overstreet break; 39651c6fdbd8SKent Overstreet case SEEK_DATA: 39665c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 39675c1ef830SKent Overstreet break; 39681c6fdbd8SKent Overstreet case SEEK_HOLE: 39695c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 39705c1ef830SKent Overstreet break; 39715c1ef830SKent Overstreet default: 39725c1ef830SKent Overstreet ret = -EINVAL; 39735c1ef830SKent Overstreet break; 39741c6fdbd8SKent Overstreet } 39751c6fdbd8SKent Overstreet 39765c1ef830SKent Overstreet return bch2_err_class(ret); 39771c6fdbd8SKent Overstreet } 39781c6fdbd8SKent Overstreet 39791c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 39801c6fdbd8SKent Overstreet { 3981a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 39821c6fdbd8SKent Overstreet bioset_exit(&c->dio_write_bioset); 39831c6fdbd8SKent Overstreet bioset_exit(&c->dio_read_bioset); 39841c6fdbd8SKent Overstreet bioset_exit(&c->writepage_bioset); 39851c6fdbd8SKent Overstreet } 39861c6fdbd8SKent Overstreet 39871c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 39881c6fdbd8SKent Overstreet { 39891c6fdbd8SKent Overstreet if (bioset_init(&c->writepage_bioset, 39909a3df993SKent Overstreet 4, offsetof(struct bch_writepage_io, op.wbio.bio), 399165d48e35SKent Overstreet BIOSET_NEED_BVECS)) 399265d48e35SKent Overstreet return -BCH_ERR_ENOMEM_writepage_bioset_init; 399365d48e35SKent Overstreet 399465d48e35SKent Overstreet if (bioset_init(&c->dio_read_bioset, 39951c6fdbd8SKent Overstreet 4, offsetof(struct dio_read, rbio.bio), 399665d48e35SKent Overstreet BIOSET_NEED_BVECS)) 399765d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_read_bioset_init; 399865d48e35SKent Overstreet 399965d48e35SKent Overstreet if (bioset_init(&c->dio_write_bioset, 40009a3df993SKent Overstreet 4, offsetof(struct dio_write, op.wbio.bio), 400165d48e35SKent Overstreet BIOSET_NEED_BVECS)) 400265d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_write_bioset_init; 400365d48e35SKent Overstreet 400465d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 4005a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 400665d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 40071c6fdbd8SKent Overstreet 4008c8b4534dSKent Overstreet return 0; 40091c6fdbd8SKent Overstreet } 40101c6fdbd8SKent Overstreet 40111c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 4012