11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 607a1006aSKent Overstreet #include "bkey_buf.h" 71c6fdbd8SKent Overstreet #include "btree_update.h" 81c6fdbd8SKent Overstreet #include "buckets.h" 91c6fdbd8SKent Overstreet #include "clock.h" 101c6fdbd8SKent Overstreet #include "error.h" 11e2d9912cSKent Overstreet #include "extents.h" 1208c07feaSKent Overstreet #include "extent_update.h" 131c6fdbd8SKent Overstreet #include "fs.h" 141c6fdbd8SKent Overstreet #include "fs-io.h" 151c6fdbd8SKent Overstreet #include "fsck.h" 161c6fdbd8SKent Overstreet #include "inode.h" 171c6fdbd8SKent Overstreet #include "journal.h" 181c6fdbd8SKent Overstreet #include "io.h" 191c6fdbd8SKent Overstreet #include "keylist.h" 201c6fdbd8SKent Overstreet #include "quota.h" 2176426098SKent Overstreet #include "reflink.h" 221c6fdbd8SKent Overstreet #include "trace.h" 231c6fdbd8SKent Overstreet 241c6fdbd8SKent Overstreet #include <linux/aio.h> 251c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 261c6fdbd8SKent Overstreet #include <linux/falloc.h> 271c6fdbd8SKent Overstreet #include <linux/migrate.h> 281c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 291c6fdbd8SKent Overstreet #include <linux/pagevec.h> 309ba2eb25SKent Overstreet #include <linux/rmap.h> 311c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 321c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 331c6fdbd8SKent Overstreet #include <linux/uio.h> 341c6fdbd8SKent Overstreet #include <linux/writeback.h> 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 371c6fdbd8SKent Overstreet 3830bff594SKent Overstreet static inline loff_t folio_end_pos(struct folio *folio) 3930bff594SKent Overstreet { 4030bff594SKent Overstreet return folio_pos(folio) + folio_size(folio); 4130bff594SKent Overstreet } 4230bff594SKent Overstreet 4330bff594SKent Overstreet static inline size_t folio_sectors(struct folio *folio) 4430bff594SKent Overstreet { 4530bff594SKent Overstreet return PAGE_SECTORS << folio_order(folio); 4630bff594SKent Overstreet } 4730bff594SKent Overstreet 4830bff594SKent Overstreet static inline loff_t folio_sector(struct folio *folio) 4930bff594SKent Overstreet { 5030bff594SKent Overstreet return folio_pos(folio) >> 9; 5130bff594SKent Overstreet } 5230bff594SKent Overstreet 5330bff594SKent Overstreet static inline loff_t folio_end_sector(struct folio *folio) 5430bff594SKent Overstreet { 5530bff594SKent Overstreet return folio_end_pos(folio) >> 9; 5630bff594SKent Overstreet } 5730bff594SKent Overstreet 58a8b3a677SKent Overstreet struct nocow_flush { 59a8b3a677SKent Overstreet struct closure *cl; 60a8b3a677SKent Overstreet struct bch_dev *ca; 61a8b3a677SKent Overstreet struct bio bio; 62a8b3a677SKent Overstreet }; 63a8b3a677SKent Overstreet 64a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 65a8b3a677SKent Overstreet { 66a8b3a677SKent Overstreet 67a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 68a8b3a677SKent Overstreet 69a8b3a677SKent Overstreet closure_put(bio->cl); 70a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 71a8b3a677SKent Overstreet bio_put(&bio->bio); 72a8b3a677SKent Overstreet } 73a8b3a677SKent Overstreet 74a8b3a677SKent Overstreet static void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 75a8b3a677SKent Overstreet struct bch_inode_info *inode, 76a8b3a677SKent Overstreet struct closure *cl) 77a8b3a677SKent Overstreet { 78a8b3a677SKent Overstreet struct nocow_flush *bio; 79a8b3a677SKent Overstreet struct bch_dev *ca; 80a8b3a677SKent Overstreet struct bch_devs_mask devs; 81a8b3a677SKent Overstreet unsigned dev; 82a8b3a677SKent Overstreet 83a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 84a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 85a8b3a677SKent Overstreet return; 86a8b3a677SKent Overstreet 87a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 88a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 89a8b3a677SKent Overstreet 90a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 91a8b3a677SKent Overstreet rcu_read_lock(); 92a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 93a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 94a8b3a677SKent Overstreet ca = NULL; 95a8b3a677SKent Overstreet rcu_read_unlock(); 96a8b3a677SKent Overstreet 97a8b3a677SKent Overstreet if (!ca) 98a8b3a677SKent Overstreet continue; 99a8b3a677SKent Overstreet 100a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 101a8b3a677SKent Overstreet REQ_OP_FLUSH, 102a8b3a677SKent Overstreet GFP_KERNEL, 103a8b3a677SKent Overstreet &c->nocow_flush_bioset), 104a8b3a677SKent Overstreet struct nocow_flush, bio); 105a8b3a677SKent Overstreet bio->cl = cl; 106a8b3a677SKent Overstreet bio->ca = ca; 107a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 108a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 109a8b3a677SKent Overstreet } 110a8b3a677SKent Overstreet } 111a8b3a677SKent Overstreet 112a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 113a8b3a677SKent Overstreet struct bch_inode_info *inode) 114a8b3a677SKent Overstreet { 115a8b3a677SKent Overstreet struct closure cl; 116a8b3a677SKent Overstreet 117a8b3a677SKent Overstreet closure_init_stack(&cl); 118a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 119a8b3a677SKent Overstreet closure_sync(&cl); 120a8b3a677SKent Overstreet 121a8b3a677SKent Overstreet return 0; 122a8b3a677SKent Overstreet } 123a8b3a677SKent Overstreet 1247f5e31e1SKent Overstreet static inline bool bio_full(struct bio *bio, unsigned len) 1257f5e31e1SKent Overstreet { 1267f5e31e1SKent Overstreet if (bio->bi_vcnt >= bio->bi_max_vecs) 1277f5e31e1SKent Overstreet return true; 1287f5e31e1SKent Overstreet if (bio->bi_iter.bi_size > UINT_MAX - len) 1297f5e31e1SKent Overstreet return true; 1307f5e31e1SKent Overstreet return false; 1317f5e31e1SKent Overstreet } 1327f5e31e1SKent Overstreet 133eb8e6e9cSKent Overstreet static inline struct address_space *faults_disabled_mapping(void) 134eb8e6e9cSKent Overstreet { 135eb8e6e9cSKent Overstreet return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); 136eb8e6e9cSKent Overstreet } 137eb8e6e9cSKent Overstreet 138eb8e6e9cSKent Overstreet static inline void set_fdm_dropped_locks(void) 139eb8e6e9cSKent Overstreet { 140eb8e6e9cSKent Overstreet current->faults_disabled_mapping = 141eb8e6e9cSKent Overstreet (void *) (((unsigned long) current->faults_disabled_mapping)|1); 142eb8e6e9cSKent Overstreet } 143eb8e6e9cSKent Overstreet 144eb8e6e9cSKent Overstreet static inline bool fdm_dropped_locks(void) 145eb8e6e9cSKent Overstreet { 146eb8e6e9cSKent Overstreet return ((unsigned long) current->faults_disabled_mapping) & 1; 147eb8e6e9cSKent Overstreet } 148eb8e6e9cSKent Overstreet 1491c6fdbd8SKent Overstreet struct quota_res { 1501c6fdbd8SKent Overstreet u64 sectors; 1511c6fdbd8SKent Overstreet }; 1521c6fdbd8SKent Overstreet 1539a3df993SKent Overstreet struct bch_writepage_io { 1541c6fdbd8SKent Overstreet struct bch_inode_info *inode; 1551c6fdbd8SKent Overstreet 1561c6fdbd8SKent Overstreet /* must be last: */ 1571c6fdbd8SKent Overstreet struct bch_write_op op; 1581c6fdbd8SKent Overstreet }; 1591c6fdbd8SKent Overstreet 1601c6fdbd8SKent Overstreet struct dio_write { 1611c6fdbd8SKent Overstreet struct kiocb *req; 162182c7bbfSKent Overstreet struct address_space *mapping; 163182c7bbfSKent Overstreet struct bch_inode_info *inode; 164ed484030SKent Overstreet struct mm_struct *mm; 1651c6fdbd8SKent Overstreet unsigned loop:1, 1666b1b186aSKent Overstreet extending:1, 1671c6fdbd8SKent Overstreet sync:1, 168a1ee777bSKent Overstreet flush:1, 1691c6fdbd8SKent Overstreet free_iov:1; 1701c6fdbd8SKent Overstreet struct quota_res quota_res; 171042a1f26SKent Overstreet u64 written; 1721c6fdbd8SKent Overstreet 1731c6fdbd8SKent Overstreet struct iov_iter iter; 1741c6fdbd8SKent Overstreet struct iovec inline_vecs[2]; 1751c6fdbd8SKent Overstreet 1761c6fdbd8SKent Overstreet /* must be last: */ 1779a3df993SKent Overstreet struct bch_write_op op; 1781c6fdbd8SKent Overstreet }; 1791c6fdbd8SKent Overstreet 1801c6fdbd8SKent Overstreet struct dio_read { 1811c6fdbd8SKent Overstreet struct closure cl; 1821c6fdbd8SKent Overstreet struct kiocb *req; 1831c6fdbd8SKent Overstreet long ret; 184b4725cc1SKent Overstreet bool should_dirty; 1851c6fdbd8SKent Overstreet struct bch_read_bio rbio; 1861c6fdbd8SKent Overstreet }; 1871c6fdbd8SKent Overstreet 1881c6fdbd8SKent Overstreet /* pagecache_block must be held */ 189a023127aSKent Overstreet static noinline int write_invalidate_inode_pages_range(struct address_space *mapping, 1901c6fdbd8SKent Overstreet loff_t start, loff_t end) 1911c6fdbd8SKent Overstreet { 1921c6fdbd8SKent Overstreet int ret; 1931c6fdbd8SKent Overstreet 1941c6fdbd8SKent Overstreet /* 1951c6fdbd8SKent Overstreet * XXX: the way this is currently implemented, we can spin if a process 1961c6fdbd8SKent Overstreet * is continually redirtying a specific page 1971c6fdbd8SKent Overstreet */ 1981c6fdbd8SKent Overstreet do { 1991c6fdbd8SKent Overstreet if (!mapping->nrpages) 2001c6fdbd8SKent Overstreet return 0; 2011c6fdbd8SKent Overstreet 2021c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, start, end); 2031c6fdbd8SKent Overstreet if (ret) 2041c6fdbd8SKent Overstreet break; 2051c6fdbd8SKent Overstreet 2061c6fdbd8SKent Overstreet if (!mapping->nrpages) 2071c6fdbd8SKent Overstreet return 0; 2081c6fdbd8SKent Overstreet 2091c6fdbd8SKent Overstreet ret = invalidate_inode_pages2_range(mapping, 2101c6fdbd8SKent Overstreet start >> PAGE_SHIFT, 2111c6fdbd8SKent Overstreet end >> PAGE_SHIFT); 2121c6fdbd8SKent Overstreet } while (ret == -EBUSY); 2131c6fdbd8SKent Overstreet 2141c6fdbd8SKent Overstreet return ret; 2151c6fdbd8SKent Overstreet } 2161c6fdbd8SKent Overstreet 2171c6fdbd8SKent Overstreet /* quotas */ 2181c6fdbd8SKent Overstreet 2191c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 2201c6fdbd8SKent Overstreet 2216b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 2221c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2231c6fdbd8SKent Overstreet struct quota_res *res) 2241c6fdbd8SKent Overstreet { 2251c6fdbd8SKent Overstreet BUG_ON(res->sectors > inode->ei_quota_reserved); 2261c6fdbd8SKent Overstreet 2271c6fdbd8SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, 22826609b61SKent Overstreet -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); 2291c6fdbd8SKent Overstreet inode->ei_quota_reserved -= res->sectors; 2301c6fdbd8SKent Overstreet res->sectors = 0; 2311c6fdbd8SKent Overstreet } 2321c6fdbd8SKent Overstreet 2336b1b186aSKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 2346b1b186aSKent Overstreet struct bch_inode_info *inode, 2356b1b186aSKent Overstreet struct quota_res *res) 2366b1b186aSKent Overstreet { 2376b1b186aSKent Overstreet if (res->sectors) { 2386b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 2396b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, res); 2406b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 2416b1b186aSKent Overstreet } 2426b1b186aSKent Overstreet } 2436b1b186aSKent Overstreet 2441c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 2451c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2461c6fdbd8SKent Overstreet struct quota_res *res, 247e8540e56SKent Overstreet u64 sectors, 2481c6fdbd8SKent Overstreet bool check_enospc) 2491c6fdbd8SKent Overstreet { 2501c6fdbd8SKent Overstreet int ret; 2511c6fdbd8SKent Overstreet 2521c6fdbd8SKent Overstreet mutex_lock(&inode->ei_quota_lock); 2531c6fdbd8SKent Overstreet ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, 25426609b61SKent Overstreet check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); 2551c6fdbd8SKent Overstreet if (likely(!ret)) { 2561c6fdbd8SKent Overstreet inode->ei_quota_reserved += sectors; 2571c6fdbd8SKent Overstreet res->sectors += sectors; 2581c6fdbd8SKent Overstreet } 2591c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 2601c6fdbd8SKent Overstreet 2611c6fdbd8SKent Overstreet return ret; 2621c6fdbd8SKent Overstreet } 2631c6fdbd8SKent Overstreet 2641c6fdbd8SKent Overstreet #else 2651c6fdbd8SKent Overstreet 2666b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 2676b1b186aSKent Overstreet struct bch_inode_info *inode, 2686b1b186aSKent Overstreet struct quota_res *res) {} 2696b1b186aSKent Overstreet 2701c6fdbd8SKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 2711c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2726b1b186aSKent Overstreet struct quota_res *res) {} 2731c6fdbd8SKent Overstreet 2741c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 2751c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2761c6fdbd8SKent Overstreet struct quota_res *res, 2771c6fdbd8SKent Overstreet unsigned sectors, 2781c6fdbd8SKent Overstreet bool check_enospc) 2791c6fdbd8SKent Overstreet { 2801c6fdbd8SKent Overstreet return 0; 2811c6fdbd8SKent Overstreet } 2821c6fdbd8SKent Overstreet 2831c6fdbd8SKent Overstreet #endif 2841c6fdbd8SKent Overstreet 2851c6fdbd8SKent Overstreet /* i_size updates: */ 2861c6fdbd8SKent Overstreet 2872ea90048SKent Overstreet struct inode_new_size { 2882ea90048SKent Overstreet loff_t new_size; 2892ea90048SKent Overstreet u64 now; 2902ea90048SKent Overstreet unsigned fields; 2912ea90048SKent Overstreet }; 2922ea90048SKent Overstreet 2931c6fdbd8SKent Overstreet static int inode_set_size(struct bch_inode_info *inode, 2941c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 2951c6fdbd8SKent Overstreet void *p) 2961c6fdbd8SKent Overstreet { 2972ea90048SKent Overstreet struct inode_new_size *s = p; 2981c6fdbd8SKent Overstreet 2992ea90048SKent Overstreet bi->bi_size = s->new_size; 3002ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 3012ea90048SKent Overstreet bi->bi_atime = s->now; 3022ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 3032ea90048SKent Overstreet bi->bi_mtime = s->now; 3042ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 3052ea90048SKent Overstreet bi->bi_ctime = s->now; 3061c6fdbd8SKent Overstreet 3071c6fdbd8SKent Overstreet return 0; 3081c6fdbd8SKent Overstreet } 3091c6fdbd8SKent Overstreet 31076426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 3111c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3122ea90048SKent Overstreet loff_t new_size, unsigned fields) 3131c6fdbd8SKent Overstreet { 3142ea90048SKent Overstreet struct inode_new_size s = { 3152ea90048SKent Overstreet .new_size = new_size, 3162ea90048SKent Overstreet .now = bch2_current_time(c), 3172ea90048SKent Overstreet .fields = fields, 3182ea90048SKent Overstreet }; 3192ea90048SKent Overstreet 3202ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 3211c6fdbd8SKent Overstreet } 3221c6fdbd8SKent Overstreet 3236b1b186aSKent Overstreet static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 324190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 3251c6fdbd8SKent Overstreet { 326b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 327b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 328b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 329b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 330b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 331b44a66a6SKent Overstreet 3321c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 3331c6fdbd8SKent Overstreet if (quota_res && sectors > 0) { 3341c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 3351c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 3361c6fdbd8SKent Overstreet 3371c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 3381c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 3391c6fdbd8SKent Overstreet } else { 34026609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 3411c6fdbd8SKent Overstreet } 3421c6fdbd8SKent Overstreet #endif 3436b1b186aSKent Overstreet } 3446b1b186aSKent Overstreet 3456b1b186aSKent Overstreet static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 3466b1b186aSKent Overstreet struct quota_res *quota_res, s64 sectors) 3476b1b186aSKent Overstreet { 3486b1b186aSKent Overstreet if (sectors) { 3496b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 3506b1b186aSKent Overstreet __i_sectors_acct(c, inode, quota_res, sectors); 3511c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3521c6fdbd8SKent Overstreet } 3536b1b186aSKent Overstreet } 3541c6fdbd8SKent Overstreet 3551c6fdbd8SKent Overstreet /* page state: */ 3561c6fdbd8SKent Overstreet 3571c6fdbd8SKent Overstreet /* stored in page->private: */ 3581c6fdbd8SKent Overstreet 359*a1774a05SKent Overstreet #define BCH_FOLIO_SECTOR_STATE() \ 360*a1774a05SKent Overstreet x(unallocated) \ 361*a1774a05SKent Overstreet x(reserved) \ 362*a1774a05SKent Overstreet x(dirty) \ 363*a1774a05SKent Overstreet x(dirty_reserved) \ 364*a1774a05SKent Overstreet x(allocated) 365*a1774a05SKent Overstreet 366*a1774a05SKent Overstreet enum bch_folio_sector_state { 367*a1774a05SKent Overstreet #define x(n) SECTOR_##n, 368*a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 369*a1774a05SKent Overstreet #undef x 370*a1774a05SKent Overstreet }; 371*a1774a05SKent Overstreet 372*a1774a05SKent Overstreet const char * const bch2_folio_sector_states[] = { 373*a1774a05SKent Overstreet #define x(n) #n, 374*a1774a05SKent Overstreet BCH_FOLIO_SECTOR_STATE() 375*a1774a05SKent Overstreet #undef x 376*a1774a05SKent Overstreet NULL 377*a1774a05SKent Overstreet }; 378*a1774a05SKent Overstreet 379*a1774a05SKent Overstreet static inline enum bch_folio_sector_state 380*a1774a05SKent Overstreet folio_sector_dirty(enum bch_folio_sector_state state) 381*a1774a05SKent Overstreet { 382*a1774a05SKent Overstreet switch (state) { 383*a1774a05SKent Overstreet case SECTOR_unallocated: 384*a1774a05SKent Overstreet return SECTOR_dirty; 385*a1774a05SKent Overstreet case SECTOR_reserved: 386*a1774a05SKent Overstreet return SECTOR_dirty_reserved; 387*a1774a05SKent Overstreet default: 388*a1774a05SKent Overstreet return state; 389*a1774a05SKent Overstreet } 390*a1774a05SKent Overstreet } 391*a1774a05SKent Overstreet 392*a1774a05SKent Overstreet static inline enum bch_folio_sector_state 393*a1774a05SKent Overstreet folio_sector_undirty(enum bch_folio_sector_state state) 394*a1774a05SKent Overstreet { 395*a1774a05SKent Overstreet switch (state) { 396*a1774a05SKent Overstreet case SECTOR_dirty: 397*a1774a05SKent Overstreet return SECTOR_unallocated; 398*a1774a05SKent Overstreet case SECTOR_dirty_reserved: 399*a1774a05SKent Overstreet return SECTOR_reserved; 400*a1774a05SKent Overstreet default: 401*a1774a05SKent Overstreet return state; 402*a1774a05SKent Overstreet } 403*a1774a05SKent Overstreet } 404*a1774a05SKent Overstreet 405*a1774a05SKent Overstreet static inline enum bch_folio_sector_state 406*a1774a05SKent Overstreet folio_sector_reserve(enum bch_folio_sector_state state) 407*a1774a05SKent Overstreet { 408*a1774a05SKent Overstreet switch (state) { 409*a1774a05SKent Overstreet case SECTOR_unallocated: 410*a1774a05SKent Overstreet return SECTOR_reserved; 411*a1774a05SKent Overstreet case SECTOR_dirty: 412*a1774a05SKent Overstreet return SECTOR_dirty_reserved; 413*a1774a05SKent Overstreet default: 414*a1774a05SKent Overstreet return state; 415*a1774a05SKent Overstreet } 416*a1774a05SKent Overstreet } 417*a1774a05SKent Overstreet 4183342ac13SKent Overstreet struct bch_folio_sector { 419b44a66a6SKent Overstreet /* Uncompressed, fully allocated replicas (or on disk reservation): */ 420b44a66a6SKent Overstreet unsigned nr_replicas:4; 421f81b648dSKent Overstreet 422b44a66a6SKent Overstreet /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ 423b44a66a6SKent Overstreet unsigned replicas_reserved:4; 4241c6fdbd8SKent Overstreet 425f57a6a5dSKent Overstreet /* i_sectors: */ 426*a1774a05SKent Overstreet enum bch_folio_sector_state state:8; 4271c6fdbd8SKent Overstreet }; 4281c6fdbd8SKent Overstreet 4293342ac13SKent Overstreet struct bch_folio { 4303826ee0bSKent Overstreet spinlock_t lock; 4317f5e31e1SKent Overstreet atomic_t write_count; 4323342ac13SKent Overstreet /* 4333342ac13SKent Overstreet * Is the sector state up to date with the btree? 4343342ac13SKent Overstreet * (Not the data itself) 4353342ac13SKent Overstreet */ 436e6ec361fSKent Overstreet bool uptodate; 43749fe78ffSKent Overstreet struct bch_folio_sector s[]; 438f57a6a5dSKent Overstreet }; 439f57a6a5dSKent Overstreet 440*a1774a05SKent Overstreet static inline void folio_sector_set(struct folio *folio, 441*a1774a05SKent Overstreet struct bch_folio *s, 442*a1774a05SKent Overstreet unsigned i, unsigned n) 443*a1774a05SKent Overstreet { 444*a1774a05SKent Overstreet s->s[i].state = n; 445*a1774a05SKent Overstreet } 446*a1774a05SKent Overstreet 44730bff594SKent Overstreet static inline struct bch_folio *__bch2_folio(struct folio *folio) 4481c6fdbd8SKent Overstreet { 44930bff594SKent Overstreet return folio_has_private(folio) 45030bff594SKent Overstreet ? (struct bch_folio *) folio_get_private(folio) 451f57a6a5dSKent Overstreet : NULL; 452f57a6a5dSKent Overstreet } 4531c6fdbd8SKent Overstreet 45430bff594SKent Overstreet static inline struct bch_folio *bch2_folio(struct folio *folio) 455f57a6a5dSKent Overstreet { 45630bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 4571c6fdbd8SKent Overstreet 45830bff594SKent Overstreet return __bch2_folio(folio); 459f57a6a5dSKent Overstreet } 460f57a6a5dSKent Overstreet 46130bff594SKent Overstreet /* for newly allocated folios: */ 46230bff594SKent Overstreet static void __bch2_folio_release(struct folio *folio) 463f57a6a5dSKent Overstreet { 46430bff594SKent Overstreet kfree(folio_detach_private(folio)); 465f57a6a5dSKent Overstreet } 466f57a6a5dSKent Overstreet 46730bff594SKent Overstreet static void bch2_folio_release(struct folio *folio) 468f57a6a5dSKent Overstreet { 46930bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 47030bff594SKent Overstreet __bch2_folio_release(folio); 471f57a6a5dSKent Overstreet } 472f57a6a5dSKent Overstreet 47330bff594SKent Overstreet /* for newly allocated folios: */ 47430bff594SKent Overstreet static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) 475f57a6a5dSKent Overstreet { 4763342ac13SKent Overstreet struct bch_folio *s; 477f57a6a5dSKent Overstreet 47849fe78ffSKent Overstreet s = kzalloc(sizeof(*s) + 47949fe78ffSKent Overstreet sizeof(struct bch_folio_sector) * 48049fe78ffSKent Overstreet folio_sectors(folio), GFP_NOFS|gfp); 481f57a6a5dSKent Overstreet if (!s) 482f57a6a5dSKent Overstreet return NULL; 483f57a6a5dSKent Overstreet 4843826ee0bSKent Overstreet spin_lock_init(&s->lock); 48530bff594SKent Overstreet folio_attach_private(folio, s); 4861c6fdbd8SKent Overstreet return s; 4871c6fdbd8SKent Overstreet } 4881c6fdbd8SKent Overstreet 48930bff594SKent Overstreet static struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) 490f57a6a5dSKent Overstreet { 49130bff594SKent Overstreet return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); 492f57a6a5dSKent Overstreet } 493f57a6a5dSKent Overstreet 49479203111SKent Overstreet static unsigned bkey_to_sector_state(struct bkey_s_c k) 495b44a66a6SKent Overstreet { 49679203111SKent Overstreet if (bkey_extent_is_reservation(k)) 497*a1774a05SKent Overstreet return SECTOR_reserved; 49879203111SKent Overstreet if (bkey_extent_is_allocation(k.k)) 499*a1774a05SKent Overstreet return SECTOR_allocated; 500*a1774a05SKent Overstreet return SECTOR_unallocated; 501b44a66a6SKent Overstreet } 502b44a66a6SKent Overstreet 50330bff594SKent Overstreet static void __bch2_folio_set(struct folio *folio, 504e6ec361fSKent Overstreet unsigned pg_offset, unsigned pg_len, 505e6ec361fSKent Overstreet unsigned nr_ptrs, unsigned state) 506e6ec361fSKent Overstreet { 50730bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, __GFP_NOFAIL); 50833e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio); 509e6ec361fSKent Overstreet 51033e2eb96SKent Overstreet BUG_ON(pg_offset >= sectors); 51133e2eb96SKent Overstreet BUG_ON(pg_offset + pg_len > sectors); 512e6ec361fSKent Overstreet 513e6ec361fSKent Overstreet spin_lock(&s->lock); 514e6ec361fSKent Overstreet 515e6ec361fSKent Overstreet for (i = pg_offset; i < pg_offset + pg_len; i++) { 516e6ec361fSKent Overstreet s->s[i].nr_replicas = nr_ptrs; 517*a1774a05SKent Overstreet folio_sector_set(folio, s, i, state); 518e6ec361fSKent Overstreet } 519e6ec361fSKent Overstreet 52033e2eb96SKent Overstreet if (i == sectors) 521e6ec361fSKent Overstreet s->uptodate = true; 522e6ec361fSKent Overstreet 523e6ec361fSKent Overstreet spin_unlock(&s->lock); 524e6ec361fSKent Overstreet } 525e6ec361fSKent Overstreet 5263342ac13SKent Overstreet /* 5273342ac13SKent Overstreet * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the 5283342ac13SKent Overstreet * extents btree: 5293342ac13SKent Overstreet */ 5303342ac13SKent Overstreet static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, 53130bff594SKent Overstreet struct folio **folios, unsigned nr_folios) 532e6ec361fSKent Overstreet { 533e6ec361fSKent Overstreet struct btree_trans trans; 534e6ec361fSKent Overstreet struct btree_iter iter; 535e6ec361fSKent Overstreet struct bkey_s_c k; 53630bff594SKent Overstreet u64 offset = folio_sector(folios[0]); 53730bff594SKent Overstreet unsigned folio_idx = 0; 538e6ec361fSKent Overstreet u32 snapshot; 539e6ec361fSKent Overstreet int ret; 540e6ec361fSKent Overstreet 541e6ec361fSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 542e6ec361fSKent Overstreet retry: 543e6ec361fSKent Overstreet bch2_trans_begin(&trans); 544e6ec361fSKent Overstreet 545e6ec361fSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 546e6ec361fSKent Overstreet if (ret) 547e6ec361fSKent Overstreet goto err; 548e6ec361fSKent Overstreet 549e6ec361fSKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 550e6ec361fSKent Overstreet SPOS(inum.inum, offset, snapshot), 551e6ec361fSKent Overstreet BTREE_ITER_SLOTS, k, ret) { 552e6ec361fSKent Overstreet unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); 55379203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 554e6ec361fSKent Overstreet 55530bff594SKent Overstreet while (folio_idx < nr_folios) { 55630bff594SKent Overstreet struct folio *folio = folios[folio_idx]; 55730bff594SKent Overstreet u64 folio_start = folio_sector(folio); 55830bff594SKent Overstreet u64 folio_end = folio_end_sector(folio); 55930bff594SKent Overstreet unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - folio_start; 56030bff594SKent Overstreet unsigned folio_len = min(k.k->p.offset, folio_end) - folio_offset - folio_start; 561e6ec361fSKent Overstreet 56230bff594SKent Overstreet BUG_ON(k.k->p.offset < folio_start); 56330bff594SKent Overstreet BUG_ON(bkey_start_offset(k.k) > folio_end); 564e6ec361fSKent Overstreet 56530bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) 56630bff594SKent Overstreet __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); 567e6ec361fSKent Overstreet 56830bff594SKent Overstreet if (k.k->p.offset < folio_end) 569e6ec361fSKent Overstreet break; 57030bff594SKent Overstreet folio_idx++; 571e6ec361fSKent Overstreet } 572e6ec361fSKent Overstreet 57330bff594SKent Overstreet if (folio_idx == nr_folios) 574e6ec361fSKent Overstreet break; 575e6ec361fSKent Overstreet } 576e6ec361fSKent Overstreet 577e6ec361fSKent Overstreet offset = iter.pos.offset; 578e6ec361fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 579e6ec361fSKent Overstreet err: 580549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 581e6ec361fSKent Overstreet goto retry; 582e6ec361fSKent Overstreet bch2_trans_exit(&trans); 583e6ec361fSKent Overstreet 584e6ec361fSKent Overstreet return ret; 585e6ec361fSKent Overstreet } 586e6ec361fSKent Overstreet 587b44a66a6SKent Overstreet static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) 588b44a66a6SKent Overstreet { 589b44a66a6SKent Overstreet struct bvec_iter iter; 590b44a66a6SKent Overstreet struct bio_vec bv; 591b44a66a6SKent Overstreet unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v 592b44a66a6SKent Overstreet ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); 59379203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 594b44a66a6SKent Overstreet 595e6ec361fSKent Overstreet bio_for_each_segment(bv, bio, iter) 59630bff594SKent Overstreet __bch2_folio_set(page_folio(bv.bv_page), bv.bv_offset >> 9, 597e6ec361fSKent Overstreet bv.bv_len >> 9, nr_ptrs, state); 598b44a66a6SKent Overstreet } 599b44a66a6SKent Overstreet 600dcfc593fSKent Overstreet static void mark_pagecache_unallocated(struct bch_inode_info *inode, 601dcfc593fSKent Overstreet u64 start, u64 end) 602dcfc593fSKent Overstreet { 603dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 604dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 605dcfc593fSKent Overstreet struct folio_batch fbatch; 606dcfc593fSKent Overstreet unsigned i, j; 607dcfc593fSKent Overstreet 608dcfc593fSKent Overstreet if (end <= start) 609dcfc593fSKent Overstreet return; 610dcfc593fSKent Overstreet 611dcfc593fSKent Overstreet folio_batch_init(&fbatch); 612dcfc593fSKent Overstreet 613dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 614dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 615dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 616dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 61733e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 61833e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 61930bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 62030bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 6213342ac13SKent Overstreet struct bch_folio *s; 622dcfc593fSKent Overstreet 62330bff594SKent Overstreet BUG_ON(end <= folio_start); 624dcfc593fSKent Overstreet 625dcfc593fSKent Overstreet folio_lock(folio); 62630bff594SKent Overstreet s = bch2_folio(folio); 627dcfc593fSKent Overstreet 628dcfc593fSKent Overstreet if (s) { 629dcfc593fSKent Overstreet spin_lock(&s->lock); 63030bff594SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) 631dcfc593fSKent Overstreet s->s[j].nr_replicas = 0; 632dcfc593fSKent Overstreet spin_unlock(&s->lock); 633dcfc593fSKent Overstreet } 634dcfc593fSKent Overstreet 635dcfc593fSKent Overstreet folio_unlock(folio); 636dcfc593fSKent Overstreet } 637dcfc593fSKent Overstreet folio_batch_release(&fbatch); 638dcfc593fSKent Overstreet cond_resched(); 639dcfc593fSKent Overstreet } 640dcfc593fSKent Overstreet } 641dcfc593fSKent Overstreet 642dcfc593fSKent Overstreet static void mark_pagecache_reserved(struct bch_inode_info *inode, 643dcfc593fSKent Overstreet u64 start, u64 end) 644dcfc593fSKent Overstreet { 645dcfc593fSKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 646dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 647dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 648dcfc593fSKent Overstreet struct folio_batch fbatch; 649dcfc593fSKent Overstreet s64 i_sectors_delta = 0; 650dcfc593fSKent Overstreet unsigned i, j; 651dcfc593fSKent Overstreet 652dcfc593fSKent Overstreet if (end <= start) 653dcfc593fSKent Overstreet return; 654dcfc593fSKent Overstreet 655dcfc593fSKent Overstreet folio_batch_init(&fbatch); 656dcfc593fSKent Overstreet 657dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 658dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 659dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 660dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 66133e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 66233e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 66330bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 66430bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 6653342ac13SKent Overstreet struct bch_folio *s; 666dcfc593fSKent Overstreet 66730bff594SKent Overstreet BUG_ON(end <= folio_start); 668dcfc593fSKent Overstreet 669dcfc593fSKent Overstreet folio_lock(folio); 67030bff594SKent Overstreet s = bch2_folio(folio); 671dcfc593fSKent Overstreet 672dcfc593fSKent Overstreet if (s) { 673dcfc593fSKent Overstreet spin_lock(&s->lock); 674*a1774a05SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) { 675*a1774a05SKent Overstreet i_sectors_delta -= s->s[j].state == SECTOR_dirty; 676*a1774a05SKent Overstreet folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); 677dcfc593fSKent Overstreet } 678dcfc593fSKent Overstreet spin_unlock(&s->lock); 679dcfc593fSKent Overstreet } 680dcfc593fSKent Overstreet 681dcfc593fSKent Overstreet folio_unlock(folio); 682dcfc593fSKent Overstreet } 683dcfc593fSKent Overstreet folio_batch_release(&fbatch); 684dcfc593fSKent Overstreet cond_resched(); 685dcfc593fSKent Overstreet } 686dcfc593fSKent Overstreet 687dcfc593fSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 688dcfc593fSKent Overstreet } 689dcfc593fSKent Overstreet 690e1036a2aSKent Overstreet static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) 691e1036a2aSKent Overstreet { 692e1036a2aSKent Overstreet /* XXX: this should not be open coded */ 693e1036a2aSKent Overstreet return inode->ei_inode.bi_data_replicas 694e1036a2aSKent Overstreet ? inode->ei_inode.bi_data_replicas - 1 695e1036a2aSKent Overstreet : c->opts.data_replicas; 696e1036a2aSKent Overstreet } 697e1036a2aSKent Overstreet 6983342ac13SKent Overstreet static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, 699f57a6a5dSKent Overstreet unsigned nr_replicas) 700f57a6a5dSKent Overstreet { 701f57a6a5dSKent Overstreet return max(0, (int) nr_replicas - 702f57a6a5dSKent Overstreet s->nr_replicas - 703f57a6a5dSKent Overstreet s->replicas_reserved); 704f57a6a5dSKent Overstreet } 705f57a6a5dSKent Overstreet 70630bff594SKent Overstreet static int bch2_get_folio_disk_reservation(struct bch_fs *c, 707f57a6a5dSKent Overstreet struct bch_inode_info *inode, 70830bff594SKent Overstreet struct folio *folio, bool check_enospc) 7091c6fdbd8SKent Overstreet { 71030bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 711e1036a2aSKent Overstreet unsigned nr_replicas = inode_nr_replicas(c, inode); 712f57a6a5dSKent Overstreet struct disk_reservation disk_res = { 0 }; 71333e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; 714f81b648dSKent Overstreet int ret; 7151c6fdbd8SKent Overstreet 716f57a6a5dSKent Overstreet if (!s) 717f57a6a5dSKent Overstreet return -ENOMEM; 7181c6fdbd8SKent Overstreet 71933e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 720f57a6a5dSKent Overstreet disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); 721f57a6a5dSKent Overstreet 722f57a6a5dSKent Overstreet if (!disk_res_sectors) 723f57a6a5dSKent Overstreet return 0; 724f57a6a5dSKent Overstreet 725f57a6a5dSKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 726f57a6a5dSKent Overstreet disk_res_sectors, 1, 727f57a6a5dSKent Overstreet !check_enospc 728f57a6a5dSKent Overstreet ? BCH_DISK_RESERVATION_NOFAIL 729f57a6a5dSKent Overstreet : 0); 7301c6fdbd8SKent Overstreet if (unlikely(ret)) 731f81b648dSKent Overstreet return ret; 732f81b648dSKent Overstreet 73333e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 734f57a6a5dSKent Overstreet s->s[i].replicas_reserved += 735f57a6a5dSKent Overstreet sectors_to_reserve(&s->s[i], nr_replicas); 736f57a6a5dSKent Overstreet 737f57a6a5dSKent Overstreet return 0; 7381c6fdbd8SKent Overstreet } 7391c6fdbd8SKent Overstreet 74030bff594SKent Overstreet struct bch2_folio_reservation { 741d1542e03SKent Overstreet struct disk_reservation disk; 742d1542e03SKent Overstreet struct quota_res quota; 743d1542e03SKent Overstreet }; 744d1542e03SKent Overstreet 74530bff594SKent Overstreet static void bch2_folio_reservation_init(struct bch_fs *c, 746f57a6a5dSKent Overstreet struct bch_inode_info *inode, 74730bff594SKent Overstreet struct bch2_folio_reservation *res) 748d1542e03SKent Overstreet { 749d1542e03SKent Overstreet memset(res, 0, sizeof(*res)); 750d1542e03SKent Overstreet 751d1542e03SKent Overstreet res->disk.nr_replicas = inode_nr_replicas(c, inode); 752d1542e03SKent Overstreet } 753d1542e03SKent Overstreet 75430bff594SKent Overstreet static void bch2_folio_reservation_put(struct bch_fs *c, 755d1542e03SKent Overstreet struct bch_inode_info *inode, 75630bff594SKent Overstreet struct bch2_folio_reservation *res) 757d1542e03SKent Overstreet { 758d1542e03SKent Overstreet bch2_disk_reservation_put(c, &res->disk); 759d1542e03SKent Overstreet bch2_quota_reservation_put(c, inode, &res->quota); 760d1542e03SKent Overstreet } 761d1542e03SKent Overstreet 76230bff594SKent Overstreet static int bch2_folio_reservation_get(struct bch_fs *c, 76330bff594SKent Overstreet struct bch_inode_info *inode, 76430bff594SKent Overstreet struct folio *folio, 76530bff594SKent Overstreet struct bch2_folio_reservation *res, 766bd954215SKent Overstreet unsigned offset, unsigned len) 767f57a6a5dSKent Overstreet { 76830bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 769d1542e03SKent Overstreet unsigned i, disk_sectors = 0, quota_sectors = 0; 770f57a6a5dSKent Overstreet int ret; 771f57a6a5dSKent Overstreet 772f57a6a5dSKent Overstreet if (!s) 773f57a6a5dSKent Overstreet return -ENOMEM; 774f57a6a5dSKent Overstreet 775e6ec361fSKent Overstreet BUG_ON(!s->uptodate); 776e6ec361fSKent Overstreet 7774b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 7784b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 779d1542e03SKent Overstreet i++) { 780d1542e03SKent Overstreet disk_sectors += sectors_to_reserve(&s->s[i], 781d1542e03SKent Overstreet res->disk.nr_replicas); 782*a1774a05SKent Overstreet quota_sectors += s->s[i].state == SECTOR_unallocated; 7831c6fdbd8SKent Overstreet } 7841c6fdbd8SKent Overstreet 785d1542e03SKent Overstreet if (disk_sectors) { 786bd954215SKent Overstreet ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); 787d1542e03SKent Overstreet if (unlikely(ret)) 788d1542e03SKent Overstreet return ret; 789d1542e03SKent Overstreet } 790d1542e03SKent Overstreet 791d1542e03SKent Overstreet if (quota_sectors) { 792d1542e03SKent Overstreet ret = bch2_quota_reservation_add(c, inode, &res->quota, 793bd954215SKent Overstreet quota_sectors, true); 794d1542e03SKent Overstreet if (unlikely(ret)) { 795d1542e03SKent Overstreet struct disk_reservation tmp = { 796d1542e03SKent Overstreet .sectors = disk_sectors 797d1542e03SKent Overstreet }; 798d1542e03SKent Overstreet 799d1542e03SKent Overstreet bch2_disk_reservation_put(c, &tmp); 800d1542e03SKent Overstreet res->disk.sectors -= disk_sectors; 801d1542e03SKent Overstreet return ret; 802d1542e03SKent Overstreet } 803d1542e03SKent Overstreet } 804d1542e03SKent Overstreet 805d1542e03SKent Overstreet return 0; 806f57a6a5dSKent Overstreet } 807f57a6a5dSKent Overstreet 80830bff594SKent Overstreet static void bch2_clear_folio_bits(struct folio *folio) 8091c6fdbd8SKent Overstreet { 81030bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 8111c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 81230bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 813d1542e03SKent Overstreet struct disk_reservation disk_res = { 0 }; 81433e2eb96SKent Overstreet int i, sectors = folio_sectors(folio), dirty_sectors = 0; 8151c6fdbd8SKent Overstreet 816f57a6a5dSKent Overstreet if (!s) 8171c6fdbd8SKent Overstreet return; 8181c6fdbd8SKent Overstreet 81930bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 82030bff594SKent Overstreet EBUG_ON(folio_test_writeback(folio)); 8213826ee0bSKent Overstreet 82233e2eb96SKent Overstreet for (i = 0; i < sectors; i++) { 823d1542e03SKent Overstreet disk_res.sectors += s->s[i].replicas_reserved; 824d1542e03SKent Overstreet s->s[i].replicas_reserved = 0; 825d1542e03SKent Overstreet 826*a1774a05SKent Overstreet dirty_sectors -= s->s[i].state == SECTOR_dirty; 827*a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); 828f57a6a5dSKent Overstreet } 829adfcfaf0SKent Overstreet 830d1542e03SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 831d1542e03SKent Overstreet 832b44a66a6SKent Overstreet i_sectors_acct(c, inode, NULL, dirty_sectors); 833adfcfaf0SKent Overstreet 83430bff594SKent Overstreet bch2_folio_release(folio); 8351c6fdbd8SKent Overstreet } 8361c6fdbd8SKent Overstreet 83730bff594SKent Overstreet static void bch2_set_folio_dirty(struct bch_fs *c, 83830bff594SKent Overstreet struct bch_inode_info *inode, 83930bff594SKent Overstreet struct folio *folio, 84030bff594SKent Overstreet struct bch2_folio_reservation *res, 841d1542e03SKent Overstreet unsigned offset, unsigned len) 8421c6fdbd8SKent Overstreet { 84330bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 844f57a6a5dSKent Overstreet unsigned i, dirty_sectors = 0; 8451c6fdbd8SKent Overstreet 84630bff594SKent Overstreet WARN_ON((u64) folio_pos(folio) + offset + len > 847877dfb34SKent Overstreet round_up((u64) i_size_read(&inode->v), block_bytes(c))); 848fb472ac5SKent Overstreet 8493826ee0bSKent Overstreet spin_lock(&s->lock); 8503826ee0bSKent Overstreet 8514b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 8524b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 853d1542e03SKent Overstreet i++) { 854d1542e03SKent Overstreet unsigned sectors = sectors_to_reserve(&s->s[i], 855d1542e03SKent Overstreet res->disk.nr_replicas); 8561c6fdbd8SKent Overstreet 857406d6d5aSKent Overstreet /* 858406d6d5aSKent Overstreet * This can happen if we race with the error path in 859406d6d5aSKent Overstreet * bch2_writepage_io_done(): 860406d6d5aSKent Overstreet */ 861406d6d5aSKent Overstreet sectors = min_t(unsigned, sectors, res->disk.sectors); 862406d6d5aSKent Overstreet 863d1542e03SKent Overstreet s->s[i].replicas_reserved += sectors; 864d1542e03SKent Overstreet res->disk.sectors -= sectors; 865adfcfaf0SKent Overstreet 866*a1774a05SKent Overstreet dirty_sectors += s->s[i].state == SECTOR_unallocated; 867*a1774a05SKent Overstreet 868*a1774a05SKent Overstreet folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); 869f57a6a5dSKent Overstreet } 870f57a6a5dSKent Overstreet 8713826ee0bSKent Overstreet spin_unlock(&s->lock); 8723826ee0bSKent Overstreet 873d1542e03SKent Overstreet i_sectors_acct(c, inode, &res->quota, dirty_sectors); 8741c6fdbd8SKent Overstreet 87530bff594SKent Overstreet if (!folio_test_dirty(folio)) 87630bff594SKent Overstreet filemap_dirty_folio(inode->v.i_mapping, folio); 8771c6fdbd8SKent Overstreet } 8781c6fdbd8SKent Overstreet 8791c6fdbd8SKent Overstreet vm_fault_t bch2_page_fault(struct vm_fault *vmf) 8801c6fdbd8SKent Overstreet { 8811c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 882eb8e6e9cSKent Overstreet struct address_space *mapping = file->f_mapping; 883eb8e6e9cSKent Overstreet struct address_space *fdm = faults_disabled_mapping(); 8841c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 8851c6fdbd8SKent Overstreet int ret; 8861c6fdbd8SKent Overstreet 887eb8e6e9cSKent Overstreet if (fdm == mapping) 888eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 889eb8e6e9cSKent Overstreet 890eb8e6e9cSKent Overstreet /* Lock ordering: */ 891eb8e6e9cSKent Overstreet if (fdm > mapping) { 892eb8e6e9cSKent Overstreet struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); 893eb8e6e9cSKent Overstreet 894a7ecd30cSKent Overstreet if (bch2_pagecache_add_tryget(inode)) 895eb8e6e9cSKent Overstreet goto got_lock; 896eb8e6e9cSKent Overstreet 897a7ecd30cSKent Overstreet bch2_pagecache_block_put(fdm_host); 898eb8e6e9cSKent Overstreet 899a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 900a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 901eb8e6e9cSKent Overstreet 902a7ecd30cSKent Overstreet bch2_pagecache_block_get(fdm_host); 903eb8e6e9cSKent Overstreet 904eb8e6e9cSKent Overstreet /* Signal that lock has been dropped: */ 905eb8e6e9cSKent Overstreet set_fdm_dropped_locks(); 906eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 907eb8e6e9cSKent Overstreet } 908eb8e6e9cSKent Overstreet 909a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 910eb8e6e9cSKent Overstreet got_lock: 9111c6fdbd8SKent Overstreet ret = filemap_fault(vmf); 912a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 9131c6fdbd8SKent Overstreet 9141c6fdbd8SKent Overstreet return ret; 9151c6fdbd8SKent Overstreet } 9161c6fdbd8SKent Overstreet 9171c6fdbd8SKent Overstreet vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) 9181c6fdbd8SKent Overstreet { 91930bff594SKent Overstreet struct folio *folio = page_folio(vmf->page); 9201c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 9211c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 9221c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 9231c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 92430bff594SKent Overstreet struct bch2_folio_reservation res; 9256cc3535dSKent Overstreet unsigned len; 9266cc3535dSKent Overstreet loff_t isize; 927e6ec361fSKent Overstreet int ret; 9281c6fdbd8SKent Overstreet 92930bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 930d1542e03SKent Overstreet 9311c6fdbd8SKent Overstreet sb_start_pagefault(inode->v.i_sb); 9321c6fdbd8SKent Overstreet file_update_time(file); 9331c6fdbd8SKent Overstreet 9341c6fdbd8SKent Overstreet /* 9351c6fdbd8SKent Overstreet * Not strictly necessary, but helps avoid dio writes livelocking in 9361c6fdbd8SKent Overstreet * write_invalidate_inode_pages_range() - can drop this if/when we get 9371c6fdbd8SKent Overstreet * a write_invalidate_inode_pages_range() that works without dropping 9381c6fdbd8SKent Overstreet * page lock before invalidating page 9391c6fdbd8SKent Overstreet */ 940a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 9411c6fdbd8SKent Overstreet 94230bff594SKent Overstreet folio_lock(folio); 9436cc3535dSKent Overstreet isize = i_size_read(&inode->v); 9446cc3535dSKent Overstreet 94530bff594SKent Overstreet if (folio->mapping != mapping || folio_pos(folio) >= isize) { 94630bff594SKent Overstreet folio_unlock(folio); 9471c6fdbd8SKent Overstreet ret = VM_FAULT_NOPAGE; 9481c6fdbd8SKent Overstreet goto out; 9491c6fdbd8SKent Overstreet } 9501c6fdbd8SKent Overstreet 95133e2eb96SKent Overstreet len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); 9526cc3535dSKent Overstreet 95330bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { 95430bff594SKent Overstreet if (bch2_folio_set(c, inode_inum(inode), &folio, 1)) { 95530bff594SKent Overstreet folio_unlock(folio); 956e6ec361fSKent Overstreet ret = VM_FAULT_SIGBUS; 957e6ec361fSKent Overstreet goto out; 958e6ec361fSKent Overstreet } 959e6ec361fSKent Overstreet } 960e6ec361fSKent Overstreet 96130bff594SKent Overstreet if (bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { 96230bff594SKent Overstreet folio_unlock(folio); 9631c6fdbd8SKent Overstreet ret = VM_FAULT_SIGBUS; 9641c6fdbd8SKent Overstreet goto out; 9651c6fdbd8SKent Overstreet } 9661c6fdbd8SKent Overstreet 96730bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, &res, 0, len); 96830bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 9691b783a69SKent Overstreet 97030bff594SKent Overstreet folio_wait_stable(folio); 971e6ec361fSKent Overstreet ret = VM_FAULT_LOCKED; 9721c6fdbd8SKent Overstreet out: 973a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 9741c6fdbd8SKent Overstreet sb_end_pagefault(inode->v.i_sb); 975d1542e03SKent Overstreet 9761c6fdbd8SKent Overstreet return ret; 9771c6fdbd8SKent Overstreet } 9781c6fdbd8SKent Overstreet 9791c6fdbd8SKent Overstreet void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) 9801c6fdbd8SKent Overstreet { 9811c6fdbd8SKent Overstreet if (offset || length < folio_size(folio)) 9821c6fdbd8SKent Overstreet return; 9831c6fdbd8SKent Overstreet 98430bff594SKent Overstreet bch2_clear_folio_bits(folio); 9851c6fdbd8SKent Overstreet } 9861c6fdbd8SKent Overstreet 9871c6fdbd8SKent Overstreet bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) 9881c6fdbd8SKent Overstreet { 989a6d90385SKent Overstreet if (folio_test_dirty(folio) || folio_test_writeback(folio)) 9901c6fdbd8SKent Overstreet return false; 9911c6fdbd8SKent Overstreet 99230bff594SKent Overstreet bch2_clear_folio_bits(folio); 9931c6fdbd8SKent Overstreet return true; 9941c6fdbd8SKent Overstreet } 9951c6fdbd8SKent Overstreet 9961c6fdbd8SKent Overstreet /* readpage(s): */ 9971c6fdbd8SKent Overstreet 9981c6fdbd8SKent Overstreet static void bch2_readpages_end_io(struct bio *bio) 9991c6fdbd8SKent Overstreet { 100030bff594SKent Overstreet struct folio_iter fi; 10011c6fdbd8SKent Overstreet 100230bff594SKent Overstreet bio_for_each_folio_all(fi, bio) { 10031c6fdbd8SKent Overstreet if (!bio->bi_status) { 100430bff594SKent Overstreet folio_mark_uptodate(fi.folio); 10051c6fdbd8SKent Overstreet } else { 100630bff594SKent Overstreet folio_clear_uptodate(fi.folio); 100730bff594SKent Overstreet folio_set_error(fi.folio); 10081c6fdbd8SKent Overstreet } 100930bff594SKent Overstreet folio_unlock(fi.folio); 10101c6fdbd8SKent Overstreet } 10111c6fdbd8SKent Overstreet 10121c6fdbd8SKent Overstreet bio_put(bio); 10131c6fdbd8SKent Overstreet } 10141c6fdbd8SKent Overstreet 10151c6fdbd8SKent Overstreet struct readpages_iter { 10161c6fdbd8SKent Overstreet struct address_space *mapping; 10171c6fdbd8SKent Overstreet struct page **pages; 10181c6fdbd8SKent Overstreet unsigned nr_pages; 10191c6fdbd8SKent Overstreet unsigned idx; 10201c6fdbd8SKent Overstreet pgoff_t offset; 10211c6fdbd8SKent Overstreet }; 10221c6fdbd8SKent Overstreet 10231c6fdbd8SKent Overstreet static int readpages_iter_init(struct readpages_iter *iter, 10241c6fdbd8SKent Overstreet struct readahead_control *ractl) 10251c6fdbd8SKent Overstreet { 10261c6fdbd8SKent Overstreet unsigned i, nr_pages = readahead_count(ractl); 10271c6fdbd8SKent Overstreet 10281c6fdbd8SKent Overstreet memset(iter, 0, sizeof(*iter)); 10291c6fdbd8SKent Overstreet 10301c6fdbd8SKent Overstreet iter->mapping = ractl->mapping; 10311c6fdbd8SKent Overstreet iter->offset = readahead_index(ractl); 10321c6fdbd8SKent Overstreet iter->nr_pages = nr_pages; 10331c6fdbd8SKent Overstreet 10341c6fdbd8SKent Overstreet iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); 10351c6fdbd8SKent Overstreet if (!iter->pages) 10361c6fdbd8SKent Overstreet return -ENOMEM; 10371c6fdbd8SKent Overstreet 103889931472SKent Overstreet nr_pages = __readahead_batch(ractl, iter->pages, nr_pages); 10391c6fdbd8SKent Overstreet for (i = 0; i < nr_pages; i++) { 104030bff594SKent Overstreet __bch2_folio_create(page_folio(iter->pages[i]), __GFP_NOFAIL); 10411c6fdbd8SKent Overstreet put_page(iter->pages[i]); 10421c6fdbd8SKent Overstreet } 10431c6fdbd8SKent Overstreet 10441c6fdbd8SKent Overstreet return 0; 10451c6fdbd8SKent Overstreet } 10461c6fdbd8SKent Overstreet 104730bff594SKent Overstreet static inline struct folio *readpage_iter_next(struct readpages_iter *iter) 10481c6fdbd8SKent Overstreet { 10491c6fdbd8SKent Overstreet if (iter->idx >= iter->nr_pages) 10501c6fdbd8SKent Overstreet return NULL; 10511c6fdbd8SKent Overstreet 10521c6fdbd8SKent Overstreet EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx); 10531c6fdbd8SKent Overstreet 105430bff594SKent Overstreet return page_folio(iter->pages[iter->idx]); 10551c6fdbd8SKent Overstreet } 10561c6fdbd8SKent Overstreet 105735189e09SKent Overstreet static bool extent_partial_reads_expensive(struct bkey_s_c k) 105835189e09SKent Overstreet { 105935189e09SKent Overstreet struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 106035189e09SKent Overstreet struct bch_extent_crc_unpacked crc; 106135189e09SKent Overstreet const union bch_extent_entry *i; 106235189e09SKent Overstreet 106335189e09SKent Overstreet bkey_for_each_crc(k.k, ptrs, crc, i) 106435189e09SKent Overstreet if (crc.csum_type || crc.compression_type) 106535189e09SKent Overstreet return true; 106635189e09SKent Overstreet return false; 106735189e09SKent Overstreet } 106835189e09SKent Overstreet 10691c6fdbd8SKent Overstreet static void readpage_bio_extend(struct readpages_iter *iter, 107076426098SKent Overstreet struct bio *bio, 107176426098SKent Overstreet unsigned sectors_this_extent, 10721c6fdbd8SKent Overstreet bool get_more) 10731c6fdbd8SKent Overstreet { 107476426098SKent Overstreet while (bio_sectors(bio) < sectors_this_extent && 10751c6fdbd8SKent Overstreet bio->bi_vcnt < bio->bi_max_vecs) { 107630bff594SKent Overstreet pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; 107730bff594SKent Overstreet struct folio *folio = readpage_iter_next(iter); 10781c6fdbd8SKent Overstreet int ret; 10791c6fdbd8SKent Overstreet 108030bff594SKent Overstreet if (folio) { 108130bff594SKent Overstreet if (iter->offset + iter->idx != folio_offset) 10821c6fdbd8SKent Overstreet break; 10831c6fdbd8SKent Overstreet 10841c6fdbd8SKent Overstreet iter->idx++; 10851c6fdbd8SKent Overstreet } else { 10861c6fdbd8SKent Overstreet if (!get_more) 10871c6fdbd8SKent Overstreet break; 10881c6fdbd8SKent Overstreet 108930bff594SKent Overstreet folio = xa_load(&iter->mapping->i_pages, folio_offset); 109030bff594SKent Overstreet if (folio && !xa_is_value(folio)) 10911c6fdbd8SKent Overstreet break; 10921c6fdbd8SKent Overstreet 109330bff594SKent Overstreet folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); 109430bff594SKent Overstreet if (!folio) 10951c6fdbd8SKent Overstreet break; 10961c6fdbd8SKent Overstreet 109730bff594SKent Overstreet if (!__bch2_folio_create(folio, 0)) { 109830bff594SKent Overstreet folio_put(folio); 1099f57a6a5dSKent Overstreet break; 1100f57a6a5dSKent Overstreet } 11011c6fdbd8SKent Overstreet 110230bff594SKent Overstreet ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_NOFS); 11031c6fdbd8SKent Overstreet if (ret) { 110430bff594SKent Overstreet __bch2_folio_release(folio); 110530bff594SKent Overstreet folio_put(folio); 11061c6fdbd8SKent Overstreet break; 11071c6fdbd8SKent Overstreet } 11081c6fdbd8SKent Overstreet 110930bff594SKent Overstreet folio_put(folio); 11101c6fdbd8SKent Overstreet } 11111c6fdbd8SKent Overstreet 111230bff594SKent Overstreet BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); 11131c6fdbd8SKent Overstreet } 11141c6fdbd8SKent Overstreet } 11151c6fdbd8SKent Overstreet 11168c6d298aSKent Overstreet static void bchfs_read(struct btree_trans *trans, 11178c6d298aSKent Overstreet struct bch_read_bio *rbio, 11188c6d298aSKent Overstreet subvol_inum inum, 11191c6fdbd8SKent Overstreet struct readpages_iter *readpages_iter) 11201c6fdbd8SKent Overstreet { 11210f238367SKent Overstreet struct bch_fs *c = trans->c; 11228c6d298aSKent Overstreet struct btree_iter iter; 112307a1006aSKent Overstreet struct bkey_buf sk; 11241c6fdbd8SKent Overstreet int flags = BCH_READ_RETRY_IF_STALE| 11251c6fdbd8SKent Overstreet BCH_READ_MAY_PROMOTE; 11268c6d298aSKent Overstreet u32 snapshot; 112776426098SKent Overstreet int ret = 0; 11281c6fdbd8SKent Overstreet 11291c6fdbd8SKent Overstreet rbio->c = c; 11301c6fdbd8SKent Overstreet rbio->start_time = local_clock(); 11318c6d298aSKent Overstreet rbio->subvol = inum.subvol; 113235189e09SKent Overstreet 113307a1006aSKent Overstreet bch2_bkey_buf_init(&sk); 113476426098SKent Overstreet retry: 1135700c25b3SKent Overstreet bch2_trans_begin(trans); 11368c6d298aSKent Overstreet iter = (struct btree_iter) { NULL }; 1137700c25b3SKent Overstreet 11388c6d298aSKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 11398c6d298aSKent Overstreet if (ret) 11408c6d298aSKent Overstreet goto err; 11418c6d298aSKent Overstreet 11428c6d298aSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 11438c6d298aSKent Overstreet SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), 114457cfdd8bSKent Overstreet BTREE_ITER_SLOTS); 11451c6fdbd8SKent Overstreet while (1) { 11461c6fdbd8SKent Overstreet struct bkey_s_c k; 114776426098SKent Overstreet unsigned bytes, sectors, offset_into_extent; 11485ff75ccbSKent Overstreet enum btree_id data_btree = BTREE_ID_extents; 11491c6fdbd8SKent Overstreet 11503737e0ddSKent Overstreet /* 11513737e0ddSKent Overstreet * read_extent -> io_time_reset may cause a transaction restart 11523737e0ddSKent Overstreet * without returning an error, we need to check for that here: 11533737e0ddSKent Overstreet */ 1154549d173cSKent Overstreet ret = bch2_trans_relock(trans); 1155549d173cSKent Overstreet if (ret) 11563737e0ddSKent Overstreet break; 11573737e0ddSKent Overstreet 11588c6d298aSKent Overstreet bch2_btree_iter_set_pos(&iter, 11598c6d298aSKent Overstreet POS(inum.inum, rbio->bio.bi_iter.bi_sector)); 11601c6fdbd8SKent Overstreet 11618c6d298aSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 116276426098SKent Overstreet ret = bkey_err(k); 116376426098SKent Overstreet if (ret) 116476426098SKent Overstreet break; 11651c6fdbd8SKent Overstreet 11668c6d298aSKent Overstreet offset_into_extent = iter.pos.offset - 116706ed8558SKent Overstreet bkey_start_offset(k.k); 116876426098SKent Overstreet sectors = k.k->size - offset_into_extent; 116976426098SKent Overstreet 117007a1006aSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 117113dcd4abSKent Overstreet 11725ff75ccbSKent Overstreet ret = bch2_read_indirect_extent(trans, &data_btree, 117322d8a33dSYuxuan Shui &offset_into_extent, &sk); 117476426098SKent Overstreet if (ret) 117576426098SKent Overstreet break; 117676426098SKent Overstreet 117713dcd4abSKent Overstreet k = bkey_i_to_s_c(sk.k); 117813dcd4abSKent Overstreet 117976426098SKent Overstreet sectors = min(sectors, k.k->size - offset_into_extent); 118076426098SKent Overstreet 118135189e09SKent Overstreet if (readpages_iter) 118235189e09SKent Overstreet readpage_bio_extend(readpages_iter, &rbio->bio, sectors, 118335189e09SKent Overstreet extent_partial_reads_expensive(k)); 11841c6fdbd8SKent Overstreet 118576426098SKent Overstreet bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; 118606ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 11871c6fdbd8SKent Overstreet 118806ed8558SKent Overstreet if (rbio->bio.bi_iter.bi_size == bytes) 11891c6fdbd8SKent Overstreet flags |= BCH_READ_LAST_FRAGMENT; 11901c6fdbd8SKent Overstreet 1191b44a66a6SKent Overstreet bch2_bio_page_state_set(&rbio->bio, k); 11921c6fdbd8SKent Overstreet 11938c6d298aSKent Overstreet bch2_read_extent(trans, rbio, iter.pos, 11945ff75ccbSKent Overstreet data_btree, k, offset_into_extent, flags); 11951c6fdbd8SKent Overstreet 11961c6fdbd8SKent Overstreet if (flags & BCH_READ_LAST_FRAGMENT) 119735189e09SKent Overstreet break; 11981c6fdbd8SKent Overstreet 119906ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 120006ed8558SKent Overstreet bio_advance(&rbio->bio, bytes); 1201084d42bbSKent Overstreet 1202084d42bbSKent Overstreet ret = btree_trans_too_many_iters(trans); 1203084d42bbSKent Overstreet if (ret) 1204084d42bbSKent Overstreet break; 12051c6fdbd8SKent Overstreet } 12068c6d298aSKent Overstreet err: 12078c6d298aSKent Overstreet bch2_trans_iter_exit(trans, &iter); 120876426098SKent Overstreet 1209549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 121076426098SKent Overstreet goto retry; 121176426098SKent Overstreet 121235189e09SKent Overstreet if (ret) { 12137fec8266SKent Overstreet bch_err_inum_offset_ratelimited(c, 12147fec8266SKent Overstreet iter.pos.inode, 12157fec8266SKent Overstreet iter.pos.offset << 9, 12160fefe8d8SKent Overstreet "read error %i from btree lookup", ret); 12170fefe8d8SKent Overstreet rbio->bio.bi_status = BLK_STS_IOERR; 121876426098SKent Overstreet bio_endio(&rbio->bio); 12191c6fdbd8SKent Overstreet } 12201c6fdbd8SKent Overstreet 122107a1006aSKent Overstreet bch2_bkey_buf_exit(&sk, c); 122235189e09SKent Overstreet } 122335189e09SKent Overstreet 12241c6fdbd8SKent Overstreet void bch2_readahead(struct readahead_control *ractl) 12251c6fdbd8SKent Overstreet { 12261c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); 12271c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 122801ad6737SKent Overstreet struct bch_io_opts opts; 1229424eb881SKent Overstreet struct btree_trans trans; 123030bff594SKent Overstreet struct folio *folio; 12311c6fdbd8SKent Overstreet struct readpages_iter readpages_iter; 12321c6fdbd8SKent Overstreet int ret; 12331c6fdbd8SKent Overstreet 123401ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 123501ad6737SKent Overstreet 12361c6fdbd8SKent Overstreet ret = readpages_iter_init(&readpages_iter, ractl); 12371c6fdbd8SKent Overstreet BUG_ON(ret); 12381c6fdbd8SKent Overstreet 123920bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 12401c6fdbd8SKent Overstreet 1241a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 12421c6fdbd8SKent Overstreet 124330bff594SKent Overstreet while ((folio = readpage_iter_next(&readpages_iter))) { 12441c6fdbd8SKent Overstreet pgoff_t index = readpages_iter.offset + readpages_iter.idx; 12451c6fdbd8SKent Overstreet unsigned n = min_t(unsigned, 12461c6fdbd8SKent Overstreet readpages_iter.nr_pages - 12471c6fdbd8SKent Overstreet readpages_iter.idx, 12481c6fdbd8SKent Overstreet BIO_MAX_VECS); 12491c6fdbd8SKent Overstreet struct bch_read_bio *rbio = 12501c6fdbd8SKent Overstreet rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, 12511c6fdbd8SKent Overstreet GFP_NOFS, &c->bio_read), 12521c6fdbd8SKent Overstreet opts); 12531c6fdbd8SKent Overstreet 12541c6fdbd8SKent Overstreet readpages_iter.idx++; 12551c6fdbd8SKent Overstreet 12567279c1a2SKent Overstreet rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTORS_SHIFT; 12571c6fdbd8SKent Overstreet rbio->bio.bi_end_io = bch2_readpages_end_io; 125830bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 12591c6fdbd8SKent Overstreet 12608c6d298aSKent Overstreet bchfs_read(&trans, rbio, inode_inum(inode), 12610f238367SKent Overstreet &readpages_iter); 12621c6fdbd8SKent Overstreet } 12631c6fdbd8SKent Overstreet 1264a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1265424eb881SKent Overstreet 1266424eb881SKent Overstreet bch2_trans_exit(&trans); 12671c6fdbd8SKent Overstreet kfree(readpages_iter.pages); 12681c6fdbd8SKent Overstreet } 12691c6fdbd8SKent Overstreet 127030bff594SKent Overstreet static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, 127130bff594SKent Overstreet subvol_inum inum, struct folio *folio) 12721c6fdbd8SKent Overstreet { 1273424eb881SKent Overstreet struct btree_trans trans; 12741c6fdbd8SKent Overstreet 127530bff594SKent Overstreet bch2_folio_create(folio, __GFP_NOFAIL); 12761c6fdbd8SKent Overstreet 12771c6fdbd8SKent Overstreet rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; 127830bff594SKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 127930bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 12801c6fdbd8SKent Overstreet 128120bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 12828c6d298aSKent Overstreet bchfs_read(&trans, rbio, inum, NULL); 1283424eb881SKent Overstreet bch2_trans_exit(&trans); 12841c6fdbd8SKent Overstreet } 12851c6fdbd8SKent Overstreet 128630bff594SKent Overstreet static void bch2_read_single_folio_end_io(struct bio *bio) 12871c6fdbd8SKent Overstreet { 12881c6fdbd8SKent Overstreet complete(bio->bi_private); 12891c6fdbd8SKent Overstreet } 12901c6fdbd8SKent Overstreet 129130bff594SKent Overstreet static int bch2_read_single_folio(struct folio *folio, 12921c6fdbd8SKent Overstreet struct address_space *mapping) 12931c6fdbd8SKent Overstreet { 12941c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 12951c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 12961c6fdbd8SKent Overstreet struct bch_read_bio *rbio; 129701ad6737SKent Overstreet struct bch_io_opts opts; 12981c6fdbd8SKent Overstreet int ret; 12991c6fdbd8SKent Overstreet DECLARE_COMPLETION_ONSTACK(done); 13001c6fdbd8SKent Overstreet 130101ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 130201ad6737SKent Overstreet 13031c6fdbd8SKent Overstreet rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), 130401ad6737SKent Overstreet opts); 13051c6fdbd8SKent Overstreet rbio->bio.bi_private = &done; 130630bff594SKent Overstreet rbio->bio.bi_end_io = bch2_read_single_folio_end_io; 13071c6fdbd8SKent Overstreet 130830bff594SKent Overstreet __bchfs_readfolio(c, rbio, inode_inum(inode), folio); 13091c6fdbd8SKent Overstreet wait_for_completion(&done); 13101c6fdbd8SKent Overstreet 13111c6fdbd8SKent Overstreet ret = blk_status_to_errno(rbio->bio.bi_status); 13121c6fdbd8SKent Overstreet bio_put(&rbio->bio); 13131c6fdbd8SKent Overstreet 13141c6fdbd8SKent Overstreet if (ret < 0) 13151c6fdbd8SKent Overstreet return ret; 13161c6fdbd8SKent Overstreet 131730bff594SKent Overstreet folio_mark_uptodate(folio); 13181c6fdbd8SKent Overstreet return 0; 13191c6fdbd8SKent Overstreet } 13201c6fdbd8SKent Overstreet 13211c6fdbd8SKent Overstreet int bch2_read_folio(struct file *file, struct folio *folio) 13221c6fdbd8SKent Overstreet { 13231c6fdbd8SKent Overstreet int ret; 13241c6fdbd8SKent Overstreet 132530bff594SKent Overstreet ret = bch2_read_single_folio(folio, folio->mapping); 13261c6fdbd8SKent Overstreet folio_unlock(folio); 13275c1ef830SKent Overstreet return bch2_err_class(ret); 13281c6fdbd8SKent Overstreet } 13291c6fdbd8SKent Overstreet 13301c6fdbd8SKent Overstreet /* writepages: */ 13311c6fdbd8SKent Overstreet 13321c6fdbd8SKent Overstreet struct bch_writepage_state { 13331c6fdbd8SKent Overstreet struct bch_writepage_io *io; 13341c6fdbd8SKent Overstreet struct bch_io_opts opts; 133549fe78ffSKent Overstreet struct bch_folio_sector *tmp; 133649fe78ffSKent Overstreet unsigned tmp_sectors; 13371c6fdbd8SKent Overstreet }; 13381c6fdbd8SKent Overstreet 13391c6fdbd8SKent Overstreet static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, 13401c6fdbd8SKent Overstreet struct bch_inode_info *inode) 13411c6fdbd8SKent Overstreet { 134201ad6737SKent Overstreet struct bch_writepage_state ret = { 0 }; 134301ad6737SKent Overstreet 134401ad6737SKent Overstreet bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); 134501ad6737SKent Overstreet return ret; 13461c6fdbd8SKent Overstreet } 13471c6fdbd8SKent Overstreet 13489f311f21SKent Overstreet static void bch2_writepage_io_done(struct bch_write_op *op) 13491c6fdbd8SKent Overstreet { 13509f311f21SKent Overstreet struct bch_writepage_io *io = 13519f311f21SKent Overstreet container_of(op, struct bch_writepage_io, op); 13529a3df993SKent Overstreet struct bch_fs *c = io->op.c; 13539a3df993SKent Overstreet struct bio *bio = &io->op.wbio.bio; 1354ff9c301fSKent Overstreet struct folio_iter fi; 1355b3fce09cSKent Overstreet unsigned i; 13561c6fdbd8SKent Overstreet 13579a3df993SKent Overstreet if (io->op.error) { 135833c74e41SKent Overstreet set_bit(EI_INODE_ERROR, &io->inode->ei_flags); 135933c74e41SKent Overstreet 1360ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 13613342ac13SKent Overstreet struct bch_folio *s; 1362b3fce09cSKent Overstreet 1363ff9c301fSKent Overstreet folio_set_error(fi.folio); 1364ff9c301fSKent Overstreet mapping_set_error(fi.folio->mapping, -EIO); 1365b3fce09cSKent Overstreet 1366ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 13673826ee0bSKent Overstreet spin_lock(&s->lock); 1368ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 1369b3fce09cSKent Overstreet s->s[i].nr_replicas = 0; 13703826ee0bSKent Overstreet spin_unlock(&s->lock); 137175812e70SKent Overstreet } 13721c6fdbd8SKent Overstreet } 13731c6fdbd8SKent Overstreet 13744be1a412SKent Overstreet if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { 1375ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 13763342ac13SKent Overstreet struct bch_folio *s; 13774be1a412SKent Overstreet 1378ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 13794be1a412SKent Overstreet spin_lock(&s->lock); 1380ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 13814be1a412SKent Overstreet s->s[i].nr_replicas = 0; 13824be1a412SKent Overstreet spin_unlock(&s->lock); 13834be1a412SKent Overstreet } 13844be1a412SKent Overstreet } 13854be1a412SKent Overstreet 13861c6fdbd8SKent Overstreet /* 13871c6fdbd8SKent Overstreet * racing with fallocate can cause us to add fewer sectors than 13881c6fdbd8SKent Overstreet * expected - but we shouldn't add more sectors than expected: 13891c6fdbd8SKent Overstreet */ 1390f8494d25SKent Overstreet WARN_ON_ONCE(io->op.i_sectors_delta > 0); 13911c6fdbd8SKent Overstreet 13921c6fdbd8SKent Overstreet /* 13931c6fdbd8SKent Overstreet * (error (due to going RO) halfway through a page can screw that up 13941c6fdbd8SKent Overstreet * slightly) 13951c6fdbd8SKent Overstreet * XXX wtf? 13969a3df993SKent Overstreet BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); 13971c6fdbd8SKent Overstreet */ 13981c6fdbd8SKent Overstreet 13991c6fdbd8SKent Overstreet /* 14001c6fdbd8SKent Overstreet * PageWriteback is effectively our ref on the inode - fixup i_blocks 14011c6fdbd8SKent Overstreet * before calling end_page_writeback: 14021c6fdbd8SKent Overstreet */ 14039a3df993SKent Overstreet i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); 14041c6fdbd8SKent Overstreet 1405ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 1406ff9c301fSKent Overstreet struct bch_folio *s = __bch2_folio(fi.folio); 14077f5e31e1SKent Overstreet 14087f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 1409ff9c301fSKent Overstreet folio_end_writeback(fi.folio); 14107f5e31e1SKent Overstreet } 14111c6fdbd8SKent Overstreet 14129f311f21SKent Overstreet bio_put(&io->op.wbio.bio); 14131c6fdbd8SKent Overstreet } 14141c6fdbd8SKent Overstreet 14151c6fdbd8SKent Overstreet static void bch2_writepage_do_io(struct bch_writepage_state *w) 14161c6fdbd8SKent Overstreet { 14171c6fdbd8SKent Overstreet struct bch_writepage_io *io = w->io; 14181c6fdbd8SKent Overstreet 14191c6fdbd8SKent Overstreet w->io = NULL; 14209f311f21SKent Overstreet closure_call(&io->op.cl, bch2_write, NULL, NULL); 14211c6fdbd8SKent Overstreet } 14221c6fdbd8SKent Overstreet 14231c6fdbd8SKent Overstreet /* 14241c6fdbd8SKent Overstreet * Get a bch_writepage_io and add @page to it - appending to an existing one if 14251c6fdbd8SKent Overstreet * possible, else allocating a new one: 14261c6fdbd8SKent Overstreet */ 14271c6fdbd8SKent Overstreet static void bch2_writepage_io_alloc(struct bch_fs *c, 142850fe5bd6SKent Overstreet struct writeback_control *wbc, 14291c6fdbd8SKent Overstreet struct bch_writepage_state *w, 14301c6fdbd8SKent Overstreet struct bch_inode_info *inode, 14317f5e31e1SKent Overstreet u64 sector, 14321c6fdbd8SKent Overstreet unsigned nr_replicas) 14331c6fdbd8SKent Overstreet { 14341c6fdbd8SKent Overstreet struct bch_write_op *op; 14351c6fdbd8SKent Overstreet 14361c6fdbd8SKent Overstreet w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, 14371c6fdbd8SKent Overstreet REQ_OP_WRITE, 14381c6fdbd8SKent Overstreet GFP_NOFS, 14391c6fdbd8SKent Overstreet &c->writepage_bioset), 14409a3df993SKent Overstreet struct bch_writepage_io, op.wbio.bio); 14411c6fdbd8SKent Overstreet 14429a3df993SKent Overstreet w->io->inode = inode; 14439a3df993SKent Overstreet op = &w->io->op; 14449a3df993SKent Overstreet bch2_write_op_init(op, c, w->opts); 14459a3df993SKent Overstreet op->target = w->opts.foreground_target; 14461c6fdbd8SKent Overstreet op->nr_replicas = nr_replicas; 14471c6fdbd8SKent Overstreet op->res.nr_replicas = nr_replicas; 14481c6fdbd8SKent Overstreet op->write_point = writepoint_hashed(inode->ei_last_dirtied); 14498c6d298aSKent Overstreet op->subvol = inode->ei_subvol; 14507f5e31e1SKent Overstreet op->pos = POS(inode->v.i_ino, sector); 14519f311f21SKent Overstreet op->end_io = bch2_writepage_io_done; 1452a8b3a677SKent Overstreet op->devs_need_flush = &inode->ei_devs_need_flush; 14537f5e31e1SKent Overstreet op->wbio.bio.bi_iter.bi_sector = sector; 145450fe5bd6SKent Overstreet op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); 14551c6fdbd8SKent Overstreet } 14561c6fdbd8SKent Overstreet 14571c6fdbd8SKent Overstreet static int __bch2_writepage(struct folio *folio, 14581c6fdbd8SKent Overstreet struct writeback_control *wbc, 14591c6fdbd8SKent Overstreet void *data) 14601c6fdbd8SKent Overstreet { 146130bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 14621c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 14631c6fdbd8SKent Overstreet struct bch_writepage_state *w = data; 146449fe78ffSKent Overstreet struct bch_folio *s; 146530bff594SKent Overstreet unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; 14661c6fdbd8SKent Overstreet loff_t i_size = i_size_read(&inode->v); 1467e1036a2aSKent Overstreet int ret; 14681c6fdbd8SKent Overstreet 146930bff594SKent Overstreet EBUG_ON(!folio_test_uptodate(folio)); 14701c6fdbd8SKent Overstreet 147130bff594SKent Overstreet /* Is the folio fully inside i_size? */ 147233e2eb96SKent Overstreet if (folio_end_pos(folio) <= i_size) 14731c6fdbd8SKent Overstreet goto do_io; 14741c6fdbd8SKent Overstreet 147530bff594SKent Overstreet /* Is the folio fully outside i_size? (truncate in progress) */ 147633e2eb96SKent Overstreet if (folio_pos(folio) >= i_size) { 147730bff594SKent Overstreet folio_unlock(folio); 14781c6fdbd8SKent Overstreet return 0; 14791c6fdbd8SKent Overstreet } 14801c6fdbd8SKent Overstreet 14811c6fdbd8SKent Overstreet /* 148230bff594SKent Overstreet * The folio straddles i_size. It must be zeroed out on each and every 14831c6fdbd8SKent Overstreet * writepage invocation because it may be mmapped. "A file is mapped 148430bff594SKent Overstreet * in multiples of the folio size. For a file that is not a multiple of 148530bff594SKent Overstreet * the folio size, the remaining memory is zeroed when mapped, and 14861c6fdbd8SKent Overstreet * writes to that region are not written out to the file." 14871c6fdbd8SKent Overstreet */ 148833e2eb96SKent Overstreet folio_zero_segment(folio, 148933e2eb96SKent Overstreet i_size - folio_pos(folio), 149033e2eb96SKent Overstreet folio_size(folio)); 14911c6fdbd8SKent Overstreet do_io: 149230bff594SKent Overstreet f_sectors = folio_sectors(folio); 149330bff594SKent Overstreet s = bch2_folio_create(folio, __GFP_NOFAIL); 1494f81b648dSKent Overstreet 149549fe78ffSKent Overstreet if (f_sectors > w->tmp_sectors) { 149649fe78ffSKent Overstreet kfree(w->tmp); 149749fe78ffSKent Overstreet w->tmp = kzalloc(sizeof(struct bch_folio_sector) * 149849fe78ffSKent Overstreet f_sectors, __GFP_NOFAIL); 149949fe78ffSKent Overstreet w->tmp_sectors = f_sectors; 150049fe78ffSKent Overstreet } 150149fe78ffSKent Overstreet 1502f74a5051SKent Overstreet /* 1503f74a5051SKent Overstreet * Things get really hairy with errors during writeback: 1504f74a5051SKent Overstreet */ 150530bff594SKent Overstreet ret = bch2_get_folio_disk_reservation(c, inode, folio, false); 1506f74a5051SKent Overstreet BUG_ON(ret); 1507e1036a2aSKent Overstreet 15087f5e31e1SKent Overstreet /* Before unlocking the page, get copy of reservations: */ 1509f74a5051SKent Overstreet spin_lock(&s->lock); 151049fe78ffSKent Overstreet memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); 15117f5e31e1SKent Overstreet 151230bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1513*a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 15147f5e31e1SKent Overstreet continue; 15157f5e31e1SKent Overstreet 1516f81b648dSKent Overstreet nr_replicas_this_write = 1517f57a6a5dSKent Overstreet min_t(unsigned, nr_replicas_this_write, 1518f57a6a5dSKent Overstreet s->s[i].nr_replicas + 1519f57a6a5dSKent Overstreet s->s[i].replicas_reserved); 15207f5e31e1SKent Overstreet } 15211c6fdbd8SKent Overstreet 152230bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 1523*a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) 15247f5e31e1SKent Overstreet continue; 15257f5e31e1SKent Overstreet 1526f57a6a5dSKent Overstreet s->s[i].nr_replicas = w->opts.compression 1527f57a6a5dSKent Overstreet ? 0 : nr_replicas_this_write; 1528e1036a2aSKent Overstreet 1529f57a6a5dSKent Overstreet s->s[i].replicas_reserved = 0; 1530*a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_allocated); 1531f57a6a5dSKent Overstreet } 1532*a1774a05SKent Overstreet spin_unlock(&s->lock); 15331c6fdbd8SKent Overstreet 15347f5e31e1SKent Overstreet BUG_ON(atomic_read(&s->write_count)); 15357f5e31e1SKent Overstreet atomic_set(&s->write_count, 1); 15367f5e31e1SKent Overstreet 153730bff594SKent Overstreet BUG_ON(folio_test_writeback(folio)); 153830bff594SKent Overstreet folio_start_writeback(folio); 15397f5e31e1SKent Overstreet 154030bff594SKent Overstreet folio_unlock(folio); 15411c6fdbd8SKent Overstreet 15427f5e31e1SKent Overstreet offset = 0; 15437f5e31e1SKent Overstreet while (1) { 1544f74a5051SKent Overstreet unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; 15457f5e31e1SKent Overstreet u64 sector; 15467f5e31e1SKent Overstreet 154730bff594SKent Overstreet while (offset < f_sectors && 1548*a1774a05SKent Overstreet w->tmp[offset].state < SECTOR_dirty) 15497f5e31e1SKent Overstreet offset++; 15507f5e31e1SKent Overstreet 155130bff594SKent Overstreet if (offset == f_sectors) 15527f5e31e1SKent Overstreet break; 15537f5e31e1SKent Overstreet 155430bff594SKent Overstreet while (offset + sectors < f_sectors && 1555*a1774a05SKent Overstreet w->tmp[offset + sectors].state >= SECTOR_dirty) { 155649fe78ffSKent Overstreet reserved_sectors += w->tmp[offset + sectors].replicas_reserved; 1557*a1774a05SKent Overstreet dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; 15587f5e31e1SKent Overstreet sectors++; 15597f5e31e1SKent Overstreet } 1560f74a5051SKent Overstreet BUG_ON(!sectors); 1561f74a5051SKent Overstreet 156230bff594SKent Overstreet sector = folio_sector(folio) + offset; 15637f5e31e1SKent Overstreet 15641c6fdbd8SKent Overstreet if (w->io && 15659a3df993SKent Overstreet (w->io->op.res.nr_replicas != nr_replicas_this_write || 156633e2eb96SKent Overstreet bio_full(&w->io->op.wbio.bio, sectors << 9) || 1567f59b3464SKent Overstreet w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= 1568f59b3464SKent Overstreet (BIO_MAX_VECS * PAGE_SIZE) || 15699a3df993SKent Overstreet bio_end_sector(&w->io->op.wbio.bio) != sector)) 15701c6fdbd8SKent Overstreet bch2_writepage_do_io(w); 15711c6fdbd8SKent Overstreet 15721c6fdbd8SKent Overstreet if (!w->io) 157350fe5bd6SKent Overstreet bch2_writepage_io_alloc(c, wbc, w, inode, sector, 1574f81b648dSKent Overstreet nr_replicas_this_write); 15751c6fdbd8SKent Overstreet 15767f5e31e1SKent Overstreet atomic_inc(&s->write_count); 15777f5e31e1SKent Overstreet 15789a3df993SKent Overstreet BUG_ON(inode != w->io->inode); 157930bff594SKent Overstreet BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, 15807f5e31e1SKent Overstreet sectors << 9, offset << 9)); 15811c6fdbd8SKent Overstreet 15826cc3535dSKent Overstreet /* Check for writing past i_size: */ 15838eb71e9eSKent Overstreet WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > 158480fe580cSKent Overstreet round_up(i_size, block_bytes(c)) && 15858eb71e9eSKent Overstreet !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), 15868eb71e9eSKent Overstreet "writing past i_size: %llu > %llu (unrounded %llu)\n", 15878eb71e9eSKent Overstreet bio_end_sector(&w->io->op.wbio.bio) << 9, 15888eb71e9eSKent Overstreet round_up(i_size, block_bytes(c)), 15898eb71e9eSKent Overstreet i_size); 15906cc3535dSKent Overstreet 15919a3df993SKent Overstreet w->io->op.res.sectors += reserved_sectors; 15929a3df993SKent Overstreet w->io->op.i_sectors_delta -= dirty_sectors; 15931c6fdbd8SKent Overstreet w->io->op.new_i_size = i_size; 15941c6fdbd8SKent Overstreet 15957f5e31e1SKent Overstreet offset += sectors; 15967f5e31e1SKent Overstreet } 15977f5e31e1SKent Overstreet 15987f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 159930bff594SKent Overstreet folio_end_writeback(folio); 16007f5e31e1SKent Overstreet 16011c6fdbd8SKent Overstreet return 0; 16021c6fdbd8SKent Overstreet } 16031c6fdbd8SKent Overstreet 16041c6fdbd8SKent Overstreet int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) 16051c6fdbd8SKent Overstreet { 16061c6fdbd8SKent Overstreet struct bch_fs *c = mapping->host->i_sb->s_fs_info; 16071c6fdbd8SKent Overstreet struct bch_writepage_state w = 16081c6fdbd8SKent Overstreet bch_writepage_state_init(c, to_bch_ei(mapping->host)); 16091c6fdbd8SKent Overstreet struct blk_plug plug; 16101c6fdbd8SKent Overstreet int ret; 16111c6fdbd8SKent Overstreet 16121c6fdbd8SKent Overstreet blk_start_plug(&plug); 16131c6fdbd8SKent Overstreet ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); 16141c6fdbd8SKent Overstreet if (w.io) 16151c6fdbd8SKent Overstreet bch2_writepage_do_io(&w); 16161c6fdbd8SKent Overstreet blk_finish_plug(&plug); 161749fe78ffSKent Overstreet kfree(w.tmp); 16185c1ef830SKent Overstreet return bch2_err_class(ret); 16191c6fdbd8SKent Overstreet } 16201c6fdbd8SKent Overstreet 16211c6fdbd8SKent Overstreet /* buffered writes: */ 16221c6fdbd8SKent Overstreet 16231c6fdbd8SKent Overstreet int bch2_write_begin(struct file *file, struct address_space *mapping, 16241c6fdbd8SKent Overstreet loff_t pos, unsigned len, 16251c6fdbd8SKent Overstreet struct page **pagep, void **fsdata) 16261c6fdbd8SKent Overstreet { 16271c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 16281c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 162930bff594SKent Overstreet struct bch2_folio_reservation *res; 163030bff594SKent Overstreet struct folio *folio; 163133e2eb96SKent Overstreet unsigned offset; 16321c6fdbd8SKent Overstreet int ret = -ENOMEM; 16331c6fdbd8SKent Overstreet 1634d1542e03SKent Overstreet res = kmalloc(sizeof(*res), GFP_KERNEL); 1635d1542e03SKent Overstreet if (!res) 1636d1542e03SKent Overstreet return -ENOMEM; 1637d1542e03SKent Overstreet 163830bff594SKent Overstreet bch2_folio_reservation_init(c, inode, res); 1639d1542e03SKent Overstreet *fsdata = res; 16401c6fdbd8SKent Overstreet 1641a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 16421c6fdbd8SKent Overstreet 164333e2eb96SKent Overstreet folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, 164430bff594SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, 164530bff594SKent Overstreet mapping_gfp_mask(mapping)); 164630bff594SKent Overstreet if (!folio) 16471c6fdbd8SKent Overstreet goto err_unlock; 16481c6fdbd8SKent Overstreet 164930bff594SKent Overstreet if (folio_test_uptodate(folio)) 16501c6fdbd8SKent Overstreet goto out; 16511c6fdbd8SKent Overstreet 165233e2eb96SKent Overstreet offset = pos - folio_pos(folio); 165333e2eb96SKent Overstreet len = min_t(size_t, len, folio_end_pos(folio) - pos); 165433e2eb96SKent Overstreet 165530bff594SKent Overstreet /* If we're writing entire folio, don't need to read it in first: */ 165633e2eb96SKent Overstreet if (!offset && len == folio_size(folio)) 16571c6fdbd8SKent Overstreet goto out; 16581c6fdbd8SKent Overstreet 16591c6fdbd8SKent Overstreet if (!offset && pos + len >= inode->v.i_size) { 166030bff594SKent Overstreet folio_zero_segment(folio, len, folio_size(folio)); 166130bff594SKent Overstreet flush_dcache_folio(folio); 16621c6fdbd8SKent Overstreet goto out; 16631c6fdbd8SKent Overstreet } 16641c6fdbd8SKent Overstreet 166533e2eb96SKent Overstreet if (folio_pos(folio) >= inode->v.i_size) { 166630bff594SKent Overstreet folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); 166730bff594SKent Overstreet flush_dcache_folio(folio); 16681c6fdbd8SKent Overstreet goto out; 16691c6fdbd8SKent Overstreet } 16701c6fdbd8SKent Overstreet readpage: 167130bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 16721c6fdbd8SKent Overstreet if (ret) 16731c6fdbd8SKent Overstreet goto err; 16741c6fdbd8SKent Overstreet out: 167530bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { 167630bff594SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 1677e6ec361fSKent Overstreet if (ret) 16783a4d3656SKent Overstreet goto err; 1679e6ec361fSKent Overstreet } 1680e6ec361fSKent Overstreet 168130bff594SKent Overstreet ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); 16821c6fdbd8SKent Overstreet if (ret) { 168330bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 16841c6fdbd8SKent Overstreet /* 168530bff594SKent Overstreet * If the folio hasn't been read in, we won't know if we 16861c6fdbd8SKent Overstreet * actually need a reservation - we don't actually need 168730bff594SKent Overstreet * to read here, we just need to check if the folio is 16881c6fdbd8SKent Overstreet * fully backed by uncompressed data: 16891c6fdbd8SKent Overstreet */ 16901c6fdbd8SKent Overstreet goto readpage; 16911c6fdbd8SKent Overstreet } 16921c6fdbd8SKent Overstreet 16931c6fdbd8SKent Overstreet goto err; 16941c6fdbd8SKent Overstreet } 16951c6fdbd8SKent Overstreet 169630bff594SKent Overstreet *pagep = &folio->page; 16971c6fdbd8SKent Overstreet return 0; 16981c6fdbd8SKent Overstreet err: 169930bff594SKent Overstreet folio_unlock(folio); 170030bff594SKent Overstreet folio_put(folio); 17011c6fdbd8SKent Overstreet *pagep = NULL; 17021c6fdbd8SKent Overstreet err_unlock: 1703a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1704d1542e03SKent Overstreet kfree(res); 1705d1542e03SKent Overstreet *fsdata = NULL; 17065c1ef830SKent Overstreet return bch2_err_class(ret); 17071c6fdbd8SKent Overstreet } 17081c6fdbd8SKent Overstreet 17091c6fdbd8SKent Overstreet int bch2_write_end(struct file *file, struct address_space *mapping, 17101c6fdbd8SKent Overstreet loff_t pos, unsigned len, unsigned copied, 17111c6fdbd8SKent Overstreet struct page *page, void *fsdata) 17121c6fdbd8SKent Overstreet { 17131c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 17141c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 171530bff594SKent Overstreet struct bch2_folio_reservation *res = fsdata; 171630bff594SKent Overstreet struct folio *folio = page_folio(page); 171733e2eb96SKent Overstreet unsigned offset = pos - folio_pos(folio); 17181c6fdbd8SKent Overstreet 17191c6fdbd8SKent Overstreet lockdep_assert_held(&inode->v.i_rwsem); 172033e2eb96SKent Overstreet BUG_ON(offset + copied > folio_size(folio)); 17211c6fdbd8SKent Overstreet 172230bff594SKent Overstreet if (unlikely(copied < len && !folio_test_uptodate(folio))) { 17231c6fdbd8SKent Overstreet /* 172430bff594SKent Overstreet * The folio needs to be read in, but that would destroy 17251c6fdbd8SKent Overstreet * our partial write - simplest thing is to just force 17261c6fdbd8SKent Overstreet * userspace to redo the write: 17271c6fdbd8SKent Overstreet */ 172830bff594SKent Overstreet folio_zero_range(folio, 0, folio_size(folio)); 172930bff594SKent Overstreet flush_dcache_folio(folio); 17301c6fdbd8SKent Overstreet copied = 0; 17311c6fdbd8SKent Overstreet } 17321c6fdbd8SKent Overstreet 17331c6fdbd8SKent Overstreet spin_lock(&inode->v.i_lock); 17341c6fdbd8SKent Overstreet if (pos + copied > inode->v.i_size) 17351c6fdbd8SKent Overstreet i_size_write(&inode->v, pos + copied); 17361c6fdbd8SKent Overstreet spin_unlock(&inode->v.i_lock); 17371c6fdbd8SKent Overstreet 17381c6fdbd8SKent Overstreet if (copied) { 173930bff594SKent Overstreet if (!folio_test_uptodate(folio)) 174030bff594SKent Overstreet folio_mark_uptodate(folio); 1741d1542e03SKent Overstreet 174230bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, res, offset, copied); 17431c6fdbd8SKent Overstreet 17441c6fdbd8SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 17451c6fdbd8SKent Overstreet } 17461c6fdbd8SKent Overstreet 174730bff594SKent Overstreet folio_unlock(folio); 174830bff594SKent Overstreet folio_put(folio); 1749a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 17501c6fdbd8SKent Overstreet 175130bff594SKent Overstreet bch2_folio_reservation_put(c, inode, res); 1752d1542e03SKent Overstreet kfree(res); 1753d1542e03SKent Overstreet 17541c6fdbd8SKent Overstreet return copied; 17551c6fdbd8SKent Overstreet } 17561c6fdbd8SKent Overstreet 1757c42b57c4SKent Overstreet typedef DARRAY(struct folio *) folios; 1758c42b57c4SKent Overstreet 1759c42b57c4SKent Overstreet static noinline void folios_trunc(folios *folios, struct folio **fi) 1760c42b57c4SKent Overstreet { 1761c42b57c4SKent Overstreet while (folios->data + folios->nr > fi) { 1762c42b57c4SKent Overstreet struct folio *f = darray_pop(folios); 1763c42b57c4SKent Overstreet 1764c42b57c4SKent Overstreet folio_unlock(f); 1765c42b57c4SKent Overstreet folio_put(f); 1766c42b57c4SKent Overstreet } 1767c42b57c4SKent Overstreet } 17681c6fdbd8SKent Overstreet 17691c6fdbd8SKent Overstreet static int __bch2_buffered_write(struct bch_inode_info *inode, 17701c6fdbd8SKent Overstreet struct address_space *mapping, 17711c6fdbd8SKent Overstreet struct iov_iter *iter, 17721c6fdbd8SKent Overstreet loff_t pos, unsigned len) 17731c6fdbd8SKent Overstreet { 17741c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 177530bff594SKent Overstreet struct bch2_folio_reservation res; 1776c42b57c4SKent Overstreet folios folios; 1777c42b57c4SKent Overstreet struct folio **fi, *f; 1778c42b57c4SKent Overstreet unsigned copied = 0, f_offset; 1779c42b57c4SKent Overstreet loff_t end = pos + len, f_pos; 17801c6fdbd8SKent Overstreet int ret = 0; 17811c6fdbd8SKent Overstreet 17821c6fdbd8SKent Overstreet BUG_ON(!len); 17831c6fdbd8SKent Overstreet 178430bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1785c42b57c4SKent Overstreet darray_init(&folios); 1786d1542e03SKent Overstreet 1787c42b57c4SKent Overstreet f_pos = pos; 1788c42b57c4SKent Overstreet while (f_pos < end) { 1789c42b57c4SKent Overstreet unsigned fgp_flags = FGP_LOCK|FGP_WRITE|FGP_STABLE; 1790c42b57c4SKent Overstreet 1791c42b57c4SKent Overstreet if ((u64) f_pos < (u64) pos + (1U << 20)) 1792c42b57c4SKent Overstreet fgp_flags |= FGP_CREAT; 1793c42b57c4SKent Overstreet 1794c42b57c4SKent Overstreet if (darray_make_room_gfp(&folios, 1, 1795c42b57c4SKent Overstreet mapping_gfp_mask(mapping) & GFP_KERNEL)) 1796c42b57c4SKent Overstreet break; 1797c42b57c4SKent Overstreet 1798c42b57c4SKent Overstreet f = __filemap_get_folio(mapping, f_pos >> PAGE_SHIFT, 1799c42b57c4SKent Overstreet fgp_flags, mapping_gfp_mask(mapping)); 1800c42b57c4SKent Overstreet if (!f) 1801c42b57c4SKent Overstreet break; 1802c42b57c4SKent Overstreet 1803c42b57c4SKent Overstreet BUG_ON(folios.nr && folio_pos(f) != f_pos); 1804c42b57c4SKent Overstreet 1805c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1806c42b57c4SKent Overstreet darray_push(&folios, f); 1807c42b57c4SKent Overstreet } 1808c42b57c4SKent Overstreet 1809c42b57c4SKent Overstreet end = min(end, f_pos); 1810c42b57c4SKent Overstreet if (end == pos) { 18111c6fdbd8SKent Overstreet ret = -ENOMEM; 18121c6fdbd8SKent Overstreet goto out; 18131c6fdbd8SKent Overstreet } 18141c6fdbd8SKent Overstreet 1815c42b57c4SKent Overstreet f = darray_first(folios); 1816c42b57c4SKent Overstreet if (pos != folio_pos(f) && !folio_test_uptodate(f)) { 1817c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 18181c6fdbd8SKent Overstreet if (ret) 18191c6fdbd8SKent Overstreet goto out; 18201c6fdbd8SKent Overstreet } 18211c6fdbd8SKent Overstreet 1822c42b57c4SKent Overstreet f = darray_last(folios); 1823c42b57c4SKent Overstreet if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { 1824c42b57c4SKent Overstreet if (end >= inode->v.i_size) { 1825c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 18261c6fdbd8SKent Overstreet } else { 1827c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 18281c6fdbd8SKent Overstreet if (ret) 18291c6fdbd8SKent Overstreet goto out; 18301c6fdbd8SKent Overstreet } 18311c6fdbd8SKent Overstreet } 18321c6fdbd8SKent Overstreet 1833c42b57c4SKent Overstreet f_pos = pos; 1834c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1835c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1836c42b57c4SKent Overstreet struct folio *f = *fi; 1837c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 18381c6fdbd8SKent Overstreet 1839c42b57c4SKent Overstreet if (!bch2_folio_create(f, __GFP_NOFAIL)->uptodate) { 1840c42b57c4SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), fi, 1841c42b57c4SKent Overstreet folios.data + folios.nr - fi); 1842e6ec361fSKent Overstreet if (ret) 1843e6ec361fSKent Overstreet goto out; 18441c6fdbd8SKent Overstreet } 18451c6fdbd8SKent Overstreet 1846353448f3SKent Overstreet /* 1847353448f3SKent Overstreet * XXX: per POSIX and fstests generic/275, on -ENOSPC we're 1848353448f3SKent Overstreet * supposed to write as much as we have disk space for. 1849353448f3SKent Overstreet * 1850353448f3SKent Overstreet * On failure here we should still write out a partial page if 1851353448f3SKent Overstreet * we aren't completely out of disk space - we don't do that 1852353448f3SKent Overstreet * yet: 1853353448f3SKent Overstreet */ 1854c42b57c4SKent Overstreet ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); 1855353448f3SKent Overstreet if (unlikely(ret)) { 1856c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1857c42b57c4SKent Overstreet if (!folios.nr) 18581c6fdbd8SKent Overstreet goto out; 1859c42b57c4SKent Overstreet 1860c42b57c4SKent Overstreet end = min(end, folio_end_pos(darray_last(folios))); 1861353448f3SKent Overstreet break; 1862353448f3SKent Overstreet } 1863d1542e03SKent Overstreet 1864c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1865c42b57c4SKent Overstreet f_offset = 0; 18661c6fdbd8SKent Overstreet } 18671c6fdbd8SKent Overstreet 18681c6fdbd8SKent Overstreet if (mapping_writably_mapped(mapping)) 1869c42b57c4SKent Overstreet darray_for_each(folios, fi) 1870c42b57c4SKent Overstreet flush_dcache_folio(*fi); 18711c6fdbd8SKent Overstreet 1872c42b57c4SKent Overstreet f_pos = pos; 1873c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1874c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1875c42b57c4SKent Overstreet struct folio *f = *fi; 1876c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 1877c42b57c4SKent Overstreet unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); 1878d1542e03SKent Overstreet 1879c42b57c4SKent Overstreet if (!f_copied) { 1880c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1881912bdf17SKent Overstreet break; 1882912bdf17SKent Overstreet } 1883912bdf17SKent Overstreet 1884c42b57c4SKent Overstreet if (!folio_test_uptodate(f) && 1885c42b57c4SKent Overstreet f_copied != folio_size(f) && 1886c42b57c4SKent Overstreet pos + copied + f_copied < inode->v.i_size) { 1887c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 1888c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1889912bdf17SKent Overstreet break; 18901c6fdbd8SKent Overstreet } 18911c6fdbd8SKent Overstreet 1892c42b57c4SKent Overstreet flush_dcache_folio(f); 1893c42b57c4SKent Overstreet copied += f_copied; 1894c42b57c4SKent Overstreet 1895c42b57c4SKent Overstreet if (f_copied != f_len) { 1896c42b57c4SKent Overstreet folios_trunc(&folios, fi + 1); 1897c42b57c4SKent Overstreet break; 1898c42b57c4SKent Overstreet } 1899c42b57c4SKent Overstreet 1900c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1901c42b57c4SKent Overstreet f_offset = 0; 1902c42b57c4SKent Overstreet } 1903c42b57c4SKent Overstreet 19041c6fdbd8SKent Overstreet if (!copied) 19051c6fdbd8SKent Overstreet goto out; 19061c6fdbd8SKent Overstreet 1907c42b57c4SKent Overstreet end = pos + copied; 1908c42b57c4SKent Overstreet 1909877dfb34SKent Overstreet spin_lock(&inode->v.i_lock); 1910c42b57c4SKent Overstreet if (end > inode->v.i_size) 1911c42b57c4SKent Overstreet i_size_write(&inode->v, end); 1912877dfb34SKent Overstreet spin_unlock(&inode->v.i_lock); 1913877dfb34SKent Overstreet 1914c42b57c4SKent Overstreet f_pos = pos; 1915c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1916c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1917c42b57c4SKent Overstreet struct folio *f = *fi; 1918c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 1919d1542e03SKent Overstreet 1920c42b57c4SKent Overstreet if (!folio_test_uptodate(f)) 1921c42b57c4SKent Overstreet folio_mark_uptodate(f); 1922d1542e03SKent Overstreet 1923c42b57c4SKent Overstreet bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); 1924d1542e03SKent Overstreet 1925c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1926c42b57c4SKent Overstreet f_offset = 0; 1927d1542e03SKent Overstreet } 1928877dfb34SKent Overstreet 1929877dfb34SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 19301c6fdbd8SKent Overstreet out: 1931c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1932c42b57c4SKent Overstreet folio_unlock(*fi); 1933c42b57c4SKent Overstreet folio_put(*fi); 19341c6fdbd8SKent Overstreet } 19351c6fdbd8SKent Overstreet 1936c42b57c4SKent Overstreet darray_exit(&folios); 193730bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 19381c6fdbd8SKent Overstreet 19391c6fdbd8SKent Overstreet return copied ?: ret; 19401c6fdbd8SKent Overstreet } 19411c6fdbd8SKent Overstreet 19421c6fdbd8SKent Overstreet static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) 19431c6fdbd8SKent Overstreet { 19441c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 19451c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 19461c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 19471c6fdbd8SKent Overstreet loff_t pos = iocb->ki_pos; 19481c6fdbd8SKent Overstreet ssize_t written = 0; 19491c6fdbd8SKent Overstreet int ret = 0; 19501c6fdbd8SKent Overstreet 1951a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 19521c6fdbd8SKent Overstreet 19531c6fdbd8SKent Overstreet do { 19541c6fdbd8SKent Overstreet unsigned offset = pos & (PAGE_SIZE - 1); 1955c42b57c4SKent Overstreet unsigned bytes = iov_iter_count(iter); 19561c6fdbd8SKent Overstreet again: 19571c6fdbd8SKent Overstreet /* 19581c6fdbd8SKent Overstreet * Bring in the user page that we will copy from _first_. 19591c6fdbd8SKent Overstreet * Otherwise there's a nasty deadlock on copying from the 19601c6fdbd8SKent Overstreet * same page as we're writing to, without it being marked 19611c6fdbd8SKent Overstreet * up-to-date. 19621c6fdbd8SKent Overstreet * 19631c6fdbd8SKent Overstreet * Not only is this an optimisation, but it is also required 19641c6fdbd8SKent Overstreet * to check that the address is actually valid, when atomic 19651c6fdbd8SKent Overstreet * usercopies are used, below. 19661c6fdbd8SKent Overstreet */ 19671c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 19681c6fdbd8SKent Overstreet bytes = min_t(unsigned long, iov_iter_count(iter), 19691c6fdbd8SKent Overstreet PAGE_SIZE - offset); 19701c6fdbd8SKent Overstreet 19711c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 19721c6fdbd8SKent Overstreet ret = -EFAULT; 19731c6fdbd8SKent Overstreet break; 19741c6fdbd8SKent Overstreet } 19751c6fdbd8SKent Overstreet } 19761c6fdbd8SKent Overstreet 19771c6fdbd8SKent Overstreet if (unlikely(fatal_signal_pending(current))) { 19781c6fdbd8SKent Overstreet ret = -EINTR; 19791c6fdbd8SKent Overstreet break; 19801c6fdbd8SKent Overstreet } 19811c6fdbd8SKent Overstreet 19821c6fdbd8SKent Overstreet ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); 19831c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 19841c6fdbd8SKent Overstreet break; 19851c6fdbd8SKent Overstreet 19861c6fdbd8SKent Overstreet cond_resched(); 19871c6fdbd8SKent Overstreet 19881c6fdbd8SKent Overstreet if (unlikely(ret == 0)) { 19891c6fdbd8SKent Overstreet /* 19901c6fdbd8SKent Overstreet * If we were unable to copy any data at all, we must 19911c6fdbd8SKent Overstreet * fall back to a single segment length write. 19921c6fdbd8SKent Overstreet * 19931c6fdbd8SKent Overstreet * If we didn't fallback here, we could livelock 19941c6fdbd8SKent Overstreet * because not all segments in the iov can be copied at 19951c6fdbd8SKent Overstreet * once without a pagefault. 19961c6fdbd8SKent Overstreet */ 19971c6fdbd8SKent Overstreet bytes = min_t(unsigned long, PAGE_SIZE - offset, 19981c6fdbd8SKent Overstreet iov_iter_single_seg_count(iter)); 19991c6fdbd8SKent Overstreet goto again; 20001c6fdbd8SKent Overstreet } 20011c6fdbd8SKent Overstreet pos += ret; 20021c6fdbd8SKent Overstreet written += ret; 2003912bdf17SKent Overstreet ret = 0; 20041c6fdbd8SKent Overstreet 20051c6fdbd8SKent Overstreet balance_dirty_pages_ratelimited(mapping); 20061c6fdbd8SKent Overstreet } while (iov_iter_count(iter)); 20071c6fdbd8SKent Overstreet 2008a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 20091c6fdbd8SKent Overstreet 20101c6fdbd8SKent Overstreet return written ? written : ret; 20111c6fdbd8SKent Overstreet } 20121c6fdbd8SKent Overstreet 20131c6fdbd8SKent Overstreet /* O_DIRECT reads */ 20141c6fdbd8SKent Overstreet 2015b4725cc1SKent Overstreet static void bio_check_or_release(struct bio *bio, bool check_dirty) 2016b4725cc1SKent Overstreet { 2017b4725cc1SKent Overstreet if (check_dirty) { 2018b4725cc1SKent Overstreet bio_check_pages_dirty(bio); 2019b4725cc1SKent Overstreet } else { 2020b4725cc1SKent Overstreet bio_release_pages(bio, false); 2021b4725cc1SKent Overstreet bio_put(bio); 2022b4725cc1SKent Overstreet } 2023b4725cc1SKent Overstreet } 2024b4725cc1SKent Overstreet 20251c6fdbd8SKent Overstreet static void bch2_dio_read_complete(struct closure *cl) 20261c6fdbd8SKent Overstreet { 20271c6fdbd8SKent Overstreet struct dio_read *dio = container_of(cl, struct dio_read, cl); 20281c6fdbd8SKent Overstreet 20291c6fdbd8SKent Overstreet dio->req->ki_complete(dio->req, dio->ret); 2030b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 20311c6fdbd8SKent Overstreet } 20321c6fdbd8SKent Overstreet 20331c6fdbd8SKent Overstreet static void bch2_direct_IO_read_endio(struct bio *bio) 20341c6fdbd8SKent Overstreet { 20351c6fdbd8SKent Overstreet struct dio_read *dio = bio->bi_private; 20361c6fdbd8SKent Overstreet 20371c6fdbd8SKent Overstreet if (bio->bi_status) 20381c6fdbd8SKent Overstreet dio->ret = blk_status_to_errno(bio->bi_status); 20391c6fdbd8SKent Overstreet 20401c6fdbd8SKent Overstreet closure_put(&dio->cl); 20411c6fdbd8SKent Overstreet } 20421c6fdbd8SKent Overstreet 20431c6fdbd8SKent Overstreet static void bch2_direct_IO_read_split_endio(struct bio *bio) 20441c6fdbd8SKent Overstreet { 2045b4725cc1SKent Overstreet struct dio_read *dio = bio->bi_private; 2046b4725cc1SKent Overstreet bool should_dirty = dio->should_dirty; 2047b4725cc1SKent Overstreet 20481c6fdbd8SKent Overstreet bch2_direct_IO_read_endio(bio); 2049b4725cc1SKent Overstreet bio_check_or_release(bio, should_dirty); 20501c6fdbd8SKent Overstreet } 20511c6fdbd8SKent Overstreet 20521c6fdbd8SKent Overstreet static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) 20531c6fdbd8SKent Overstreet { 20541c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 20551c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 20561c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 205701ad6737SKent Overstreet struct bch_io_opts opts; 20581c6fdbd8SKent Overstreet struct dio_read *dio; 20591c6fdbd8SKent Overstreet struct bio *bio; 20601c6fdbd8SKent Overstreet loff_t offset = req->ki_pos; 20611c6fdbd8SKent Overstreet bool sync = is_sync_kiocb(req); 20621c6fdbd8SKent Overstreet size_t shorten; 20631c6fdbd8SKent Overstreet ssize_t ret; 20641c6fdbd8SKent Overstreet 206501ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 206601ad6737SKent Overstreet 20671c6fdbd8SKent Overstreet if ((offset|iter->count) & (block_bytes(c) - 1)) 20681c6fdbd8SKent Overstreet return -EINVAL; 20691c6fdbd8SKent Overstreet 20701c6fdbd8SKent Overstreet ret = min_t(loff_t, iter->count, 20711c6fdbd8SKent Overstreet max_t(loff_t, 0, i_size_read(&inode->v) - offset)); 20721c6fdbd8SKent Overstreet 20731c6fdbd8SKent Overstreet if (!ret) 20741c6fdbd8SKent Overstreet return ret; 20751c6fdbd8SKent Overstreet 20761c6fdbd8SKent Overstreet shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); 20771c6fdbd8SKent Overstreet iter->count -= shorten; 20781c6fdbd8SKent Overstreet 20791c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 20804d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 20811c6fdbd8SKent Overstreet REQ_OP_READ, 20821c6fdbd8SKent Overstreet GFP_KERNEL, 20831c6fdbd8SKent Overstreet &c->dio_read_bioset); 20841c6fdbd8SKent Overstreet 20851c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_endio; 20861c6fdbd8SKent Overstreet 20871c6fdbd8SKent Overstreet dio = container_of(bio, struct dio_read, rbio.bio); 20881c6fdbd8SKent Overstreet closure_init(&dio->cl, NULL); 20891c6fdbd8SKent Overstreet 20901c6fdbd8SKent Overstreet /* 20911c6fdbd8SKent Overstreet * this is a _really_ horrible hack just to avoid an atomic sub at the 20921c6fdbd8SKent Overstreet * end: 20931c6fdbd8SKent Overstreet */ 20941c6fdbd8SKent Overstreet if (!sync) { 20951c6fdbd8SKent Overstreet set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); 20961c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 20971c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER - 20981c6fdbd8SKent Overstreet CLOSURE_RUNNING + 20991c6fdbd8SKent Overstreet CLOSURE_DESTRUCTOR); 21001c6fdbd8SKent Overstreet } else { 21011c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 21021c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER + 1); 21031c6fdbd8SKent Overstreet } 21041c6fdbd8SKent Overstreet 21051c6fdbd8SKent Overstreet dio->req = req; 21061c6fdbd8SKent Overstreet dio->ret = ret; 2107b4725cc1SKent Overstreet /* 2108b4725cc1SKent Overstreet * This is one of the sketchier things I've encountered: we have to skip 2109b4725cc1SKent Overstreet * the dirtying of requests that are internal from the kernel (i.e. from 2110b4725cc1SKent Overstreet * loopback), because we'll deadlock on page_lock. 2111b4725cc1SKent Overstreet */ 2112b4725cc1SKent Overstreet dio->should_dirty = iter_is_iovec(iter); 21131c6fdbd8SKent Overstreet 21141c6fdbd8SKent Overstreet goto start; 21151c6fdbd8SKent Overstreet while (iter->count) { 21161c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 21174d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 21181c6fdbd8SKent Overstreet REQ_OP_READ, 21191c6fdbd8SKent Overstreet GFP_KERNEL, 21201c6fdbd8SKent Overstreet &c->bio_read); 21211c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_split_endio; 21221c6fdbd8SKent Overstreet start: 21231c6fdbd8SKent Overstreet bio->bi_opf = REQ_OP_READ|REQ_SYNC; 21241c6fdbd8SKent Overstreet bio->bi_iter.bi_sector = offset >> 9; 21251c6fdbd8SKent Overstreet bio->bi_private = dio; 21261c6fdbd8SKent Overstreet 21271c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, iter); 21281c6fdbd8SKent Overstreet if (ret < 0) { 21291c6fdbd8SKent Overstreet /* XXX: fault inject this path */ 21301c6fdbd8SKent Overstreet bio->bi_status = BLK_STS_RESOURCE; 21311c6fdbd8SKent Overstreet bio_endio(bio); 21321c6fdbd8SKent Overstreet break; 21331c6fdbd8SKent Overstreet } 21341c6fdbd8SKent Overstreet 21351c6fdbd8SKent Overstreet offset += bio->bi_iter.bi_size; 2136b4725cc1SKent Overstreet 2137b4725cc1SKent Overstreet if (dio->should_dirty) 21381c6fdbd8SKent Overstreet bio_set_pages_dirty(bio); 21391c6fdbd8SKent Overstreet 21401c6fdbd8SKent Overstreet if (iter->count) 21411c6fdbd8SKent Overstreet closure_get(&dio->cl); 21421c6fdbd8SKent Overstreet 21438c6d298aSKent Overstreet bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); 21441c6fdbd8SKent Overstreet } 21451c6fdbd8SKent Overstreet 21461c6fdbd8SKent Overstreet iter->count += shorten; 21471c6fdbd8SKent Overstreet 21481c6fdbd8SKent Overstreet if (sync) { 21491c6fdbd8SKent Overstreet closure_sync(&dio->cl); 21501c6fdbd8SKent Overstreet closure_debug_destroy(&dio->cl); 21511c6fdbd8SKent Overstreet ret = dio->ret; 2152b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 21531c6fdbd8SKent Overstreet return ret; 21541c6fdbd8SKent Overstreet } else { 21551c6fdbd8SKent Overstreet return -EIOCBQUEUED; 21561c6fdbd8SKent Overstreet } 21571c6fdbd8SKent Overstreet } 21581c6fdbd8SKent Overstreet 21591c6fdbd8SKent Overstreet ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) 21601c6fdbd8SKent Overstreet { 21611c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 21621c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 21631c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 21641c6fdbd8SKent Overstreet size_t count = iov_iter_count(iter); 21651c6fdbd8SKent Overstreet ssize_t ret; 21661c6fdbd8SKent Overstreet 21671c6fdbd8SKent Overstreet if (!count) 21681c6fdbd8SKent Overstreet return 0; /* skip atime */ 21691c6fdbd8SKent Overstreet 21701c6fdbd8SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 21711c6fdbd8SKent Overstreet struct blk_plug plug; 21721c6fdbd8SKent Overstreet 2173a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 21741c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 21751c6fdbd8SKent Overstreet iocb->ki_pos, 21761c6fdbd8SKent Overstreet iocb->ki_pos + count - 1); 21771c6fdbd8SKent Overstreet if (ret < 0) 21785c1ef830SKent Overstreet goto out; 2179a023127aSKent Overstreet } 21801c6fdbd8SKent Overstreet 21811c6fdbd8SKent Overstreet file_accessed(file); 21821c6fdbd8SKent Overstreet 21831c6fdbd8SKent Overstreet blk_start_plug(&plug); 21841c6fdbd8SKent Overstreet ret = bch2_direct_IO_read(iocb, iter); 21851c6fdbd8SKent Overstreet blk_finish_plug(&plug); 21861c6fdbd8SKent Overstreet 21871c6fdbd8SKent Overstreet if (ret >= 0) 21881c6fdbd8SKent Overstreet iocb->ki_pos += ret; 21891c6fdbd8SKent Overstreet } else { 2190a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 21911c6fdbd8SKent Overstreet ret = generic_file_read_iter(iocb, iter); 2192a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 21931c6fdbd8SKent Overstreet } 21945c1ef830SKent Overstreet out: 21955c1ef830SKent Overstreet return bch2_err_class(ret); 21961c6fdbd8SKent Overstreet } 21971c6fdbd8SKent Overstreet 21981c6fdbd8SKent Overstreet /* O_DIRECT writes */ 21991c6fdbd8SKent Overstreet 22006fed42bbSKent Overstreet static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, 22016fed42bbSKent Overstreet u64 offset, u64 size, 22026fed42bbSKent Overstreet unsigned nr_replicas, bool compressed) 22036fed42bbSKent Overstreet { 22046fed42bbSKent Overstreet struct btree_trans trans; 22056fed42bbSKent Overstreet struct btree_iter iter; 22066fed42bbSKent Overstreet struct bkey_s_c k; 22076fed42bbSKent Overstreet u64 end = offset + size; 22086fed42bbSKent Overstreet u32 snapshot; 22096fed42bbSKent Overstreet bool ret = true; 22106fed42bbSKent Overstreet int err; 22116fed42bbSKent Overstreet 22126fed42bbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 22136fed42bbSKent Overstreet retry: 22146fed42bbSKent Overstreet bch2_trans_begin(&trans); 22156fed42bbSKent Overstreet 22166fed42bbSKent Overstreet err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 22176fed42bbSKent Overstreet if (err) 22186fed42bbSKent Overstreet goto err; 22196fed42bbSKent Overstreet 2220e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 22216fed42bbSKent Overstreet SPOS(inum.inum, offset, snapshot), 22226fed42bbSKent Overstreet BTREE_ITER_SLOTS, k, err) { 2223e88a75ebSKent Overstreet if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) 22246fed42bbSKent Overstreet break; 22256fed42bbSKent Overstreet 22268c6d298aSKent Overstreet if (k.k->p.snapshot != snapshot || 22278c6d298aSKent Overstreet nr_replicas > bch2_bkey_replicas(c, k) || 22286fed42bbSKent Overstreet (!compressed && bch2_bkey_sectors_compressed(k))) { 22296fed42bbSKent Overstreet ret = false; 22306fed42bbSKent Overstreet break; 22316fed42bbSKent Overstreet } 22326fed42bbSKent Overstreet } 22336fed42bbSKent Overstreet 22346fed42bbSKent Overstreet offset = iter.pos.offset; 22356fed42bbSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 22366fed42bbSKent Overstreet err: 2237549d173cSKent Overstreet if (bch2_err_matches(err, BCH_ERR_transaction_restart)) 22386fed42bbSKent Overstreet goto retry; 22396fed42bbSKent Overstreet bch2_trans_exit(&trans); 22406fed42bbSKent Overstreet 22416fed42bbSKent Overstreet return err ? false : ret; 22426fed42bbSKent Overstreet } 22436fed42bbSKent Overstreet 2244182c7bbfSKent Overstreet static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) 2245182c7bbfSKent Overstreet { 2246182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2247182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2248182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2249182c7bbfSKent Overstreet 2250182c7bbfSKent Overstreet return bch2_check_range_allocated(c, inode_inum(inode), 2251182c7bbfSKent Overstreet dio->op.pos.offset, bio_sectors(bio), 2252182c7bbfSKent Overstreet dio->op.opts.data_replicas, 2253182c7bbfSKent Overstreet dio->op.opts.compression != 0); 2254182c7bbfSKent Overstreet } 2255182c7bbfSKent Overstreet 2256a1ee777bSKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *); 2257a1ee777bSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio); 2258a1ee777bSKent Overstreet 22591c6fdbd8SKent Overstreet /* 22601c6fdbd8SKent Overstreet * We're going to return -EIOCBQUEUED, but we haven't finished consuming the 22611c6fdbd8SKent Overstreet * iov_iter yet, so we need to stash a copy of the iovec: it might be on the 22621c6fdbd8SKent Overstreet * caller's stack, we're not guaranteed that it will live for the duration of 22631c6fdbd8SKent Overstreet * the IO: 22641c6fdbd8SKent Overstreet */ 22651c6fdbd8SKent Overstreet static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) 22661c6fdbd8SKent Overstreet { 22671c6fdbd8SKent Overstreet struct iovec *iov = dio->inline_vecs; 22681c6fdbd8SKent Overstreet 22691c6fdbd8SKent Overstreet /* 22701c6fdbd8SKent Overstreet * iov_iter has a single embedded iovec - nothing to do: 22711c6fdbd8SKent Overstreet */ 22721c6fdbd8SKent Overstreet if (iter_is_ubuf(&dio->iter)) 22731c6fdbd8SKent Overstreet return 0; 22741c6fdbd8SKent Overstreet 22751c6fdbd8SKent Overstreet /* 22761c6fdbd8SKent Overstreet * We don't currently handle non-iovec iov_iters here - return an error, 22771c6fdbd8SKent Overstreet * and we'll fall back to doing the IO synchronously: 22781c6fdbd8SKent Overstreet */ 22791c6fdbd8SKent Overstreet if (!iter_is_iovec(&dio->iter)) 22801c6fdbd8SKent Overstreet return -1; 22811c6fdbd8SKent Overstreet 22821c6fdbd8SKent Overstreet if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { 22831c6fdbd8SKent Overstreet iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), 22841c6fdbd8SKent Overstreet GFP_KERNEL); 22851c6fdbd8SKent Overstreet if (unlikely(!iov)) 22861c6fdbd8SKent Overstreet return -ENOMEM; 22871c6fdbd8SKent Overstreet 22881c6fdbd8SKent Overstreet dio->free_iov = true; 22891c6fdbd8SKent Overstreet } 22901c6fdbd8SKent Overstreet 22911c6fdbd8SKent Overstreet memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); 22921c6fdbd8SKent Overstreet dio->iter.__iov = iov; 22931c6fdbd8SKent Overstreet return 0; 22941c6fdbd8SKent Overstreet } 22951c6fdbd8SKent Overstreet 2296a1ee777bSKent Overstreet static void bch2_dio_write_flush_done(struct closure *cl) 2297a1ee777bSKent Overstreet { 2298a1ee777bSKent Overstreet struct dio_write *dio = container_of(cl, struct dio_write, op.cl); 2299a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2300a1ee777bSKent Overstreet 2301a1ee777bSKent Overstreet closure_debug_destroy(cl); 2302a1ee777bSKent Overstreet 2303a1ee777bSKent Overstreet dio->op.error = bch2_journal_error(&c->journal); 2304a1ee777bSKent Overstreet 2305a1ee777bSKent Overstreet bch2_dio_write_done(dio); 2306a1ee777bSKent Overstreet } 2307a1ee777bSKent Overstreet 2308a1ee777bSKent Overstreet static noinline void bch2_dio_write_flush(struct dio_write *dio) 2309a1ee777bSKent Overstreet { 2310a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2311a1ee777bSKent Overstreet struct bch_inode_unpacked inode; 2312a1ee777bSKent Overstreet int ret; 2313a1ee777bSKent Overstreet 2314a1ee777bSKent Overstreet dio->flush = 0; 2315a1ee777bSKent Overstreet 2316a1ee777bSKent Overstreet closure_init(&dio->op.cl, NULL); 2317a1ee777bSKent Overstreet 2318a1ee777bSKent Overstreet if (!dio->op.error) { 2319a1ee777bSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); 2320a8b3a677SKent Overstreet if (ret) { 2321a1ee777bSKent Overstreet dio->op.error = ret; 2322a8b3a677SKent Overstreet } else { 2323a1ee777bSKent Overstreet bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl); 2324a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); 2325a8b3a677SKent Overstreet } 2326a1ee777bSKent Overstreet } 2327a1ee777bSKent Overstreet 2328a1ee777bSKent Overstreet if (dio->sync) { 2329a1ee777bSKent Overstreet closure_sync(&dio->op.cl); 2330a1ee777bSKent Overstreet closure_debug_destroy(&dio->op.cl); 2331a1ee777bSKent Overstreet } else { 2332a1ee777bSKent Overstreet continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); 2333a1ee777bSKent Overstreet } 2334a1ee777bSKent Overstreet } 2335042a1f26SKent Overstreet 2336182c7bbfSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio) 2337182c7bbfSKent Overstreet { 2338182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2339182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2340182c7bbfSKent Overstreet bool sync = dio->sync; 2341a1ee777bSKent Overstreet long ret; 2342a1ee777bSKent Overstreet 2343a1ee777bSKent Overstreet if (unlikely(dio->flush)) { 2344a1ee777bSKent Overstreet bch2_dio_write_flush(dio); 2345a1ee777bSKent Overstreet if (!sync) 2346a1ee777bSKent Overstreet return -EIOCBQUEUED; 2347a1ee777bSKent Overstreet } 2348182c7bbfSKent Overstreet 2349a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 2350182c7bbfSKent Overstreet 2351182c7bbfSKent Overstreet if (dio->free_iov) 2352182c7bbfSKent Overstreet kfree(dio->iter.__iov); 2353a1ee777bSKent Overstreet 2354a1ee777bSKent Overstreet ret = dio->op.error ?: ((long) dio->written << 9); 2355182c7bbfSKent Overstreet bio_put(&dio->op.wbio.bio); 2356182c7bbfSKent Overstreet 2357182c7bbfSKent Overstreet /* inode->i_dio_count is our ref on inode and thus bch_fs */ 2358182c7bbfSKent Overstreet inode_dio_end(&inode->v); 2359182c7bbfSKent Overstreet 2360182c7bbfSKent Overstreet if (ret < 0) 2361182c7bbfSKent Overstreet ret = bch2_err_class(ret); 2362182c7bbfSKent Overstreet 2363182c7bbfSKent Overstreet if (!sync) { 2364182c7bbfSKent Overstreet req->ki_complete(req, ret); 2365182c7bbfSKent Overstreet ret = -EIOCBQUEUED; 2366182c7bbfSKent Overstreet } 2367182c7bbfSKent Overstreet return ret; 2368182c7bbfSKent Overstreet } 2369182c7bbfSKent Overstreet 2370182c7bbfSKent Overstreet static __always_inline void bch2_dio_write_end(struct dio_write *dio) 2371182c7bbfSKent Overstreet { 2372182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2373182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2374182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2375182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2376182c7bbfSKent Overstreet 2377182c7bbfSKent Overstreet req->ki_pos += (u64) dio->op.written << 9; 2378182c7bbfSKent Overstreet dio->written += dio->op.written; 2379182c7bbfSKent Overstreet 23806b1b186aSKent Overstreet if (dio->extending) { 2381182c7bbfSKent Overstreet spin_lock(&inode->v.i_lock); 2382182c7bbfSKent Overstreet if (req->ki_pos > inode->v.i_size) 2383182c7bbfSKent Overstreet i_size_write(&inode->v, req->ki_pos); 2384182c7bbfSKent Overstreet spin_unlock(&inode->v.i_lock); 23856b1b186aSKent Overstreet } 23866b1b186aSKent Overstreet 23876b1b186aSKent Overstreet if (dio->op.i_sectors_delta || dio->quota_res.sectors) { 23886b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 23896b1b186aSKent Overstreet __i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); 23906b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, &dio->quota_res); 23916b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 23926b1b186aSKent Overstreet } 2393182c7bbfSKent Overstreet 2394182c7bbfSKent Overstreet bio_release_pages(bio, false); 2395182c7bbfSKent Overstreet 2396182c7bbfSKent Overstreet if (unlikely(dio->op.error)) 2397182c7bbfSKent Overstreet set_bit(EI_INODE_ERROR, &inode->ei_flags); 2398182c7bbfSKent Overstreet } 2399182c7bbfSKent Overstreet 24004d868d18SKent Overstreet static __always_inline long bch2_dio_write_loop(struct dio_write *dio) 24011c6fdbd8SKent Overstreet { 2402182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 24031c6fdbd8SKent Overstreet struct kiocb *req = dio->req; 2404182c7bbfSKent Overstreet struct address_space *mapping = dio->mapping; 2405182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 240601ad6737SKent Overstreet struct bch_io_opts opts; 24079a3df993SKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2408eb8e6e9cSKent Overstreet unsigned unaligned, iter_count; 2409eb8e6e9cSKent Overstreet bool sync = dio->sync, dropped_locks; 24101c6fdbd8SKent Overstreet long ret; 24111c6fdbd8SKent Overstreet 241201ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 241301ad6737SKent Overstreet 24141c6fdbd8SKent Overstreet while (1) { 2415eb8e6e9cSKent Overstreet iter_count = dio->iter.count; 2416eb8e6e9cSKent Overstreet 2417182c7bbfSKent Overstreet EBUG_ON(current->faults_disabled_mapping); 24181c6fdbd8SKent Overstreet current->faults_disabled_mapping = mapping; 24191c6fdbd8SKent Overstreet 24201c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, &dio->iter); 24211c6fdbd8SKent Overstreet 2422eb8e6e9cSKent Overstreet dropped_locks = fdm_dropped_locks(); 2423eb8e6e9cSKent Overstreet 24241c6fdbd8SKent Overstreet current->faults_disabled_mapping = NULL; 24251c6fdbd8SKent Overstreet 2426eb8e6e9cSKent Overstreet /* 2427eb8e6e9cSKent Overstreet * If the fault handler returned an error but also signalled 2428eb8e6e9cSKent Overstreet * that it dropped & retook ei_pagecache_lock, we just need to 2429eb8e6e9cSKent Overstreet * re-shoot down the page cache and retry: 2430eb8e6e9cSKent Overstreet */ 2431eb8e6e9cSKent Overstreet if (dropped_locks && ret) 2432eb8e6e9cSKent Overstreet ret = 0; 2433eb8e6e9cSKent Overstreet 24341c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 24351c6fdbd8SKent Overstreet goto err; 24361c6fdbd8SKent Overstreet 2437eb8e6e9cSKent Overstreet if (unlikely(dropped_locks)) { 2438eb8e6e9cSKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 2439eb8e6e9cSKent Overstreet req->ki_pos, 2440eb8e6e9cSKent Overstreet req->ki_pos + iter_count - 1); 2441eb8e6e9cSKent Overstreet if (unlikely(ret)) 2442eb8e6e9cSKent Overstreet goto err; 2443eb8e6e9cSKent Overstreet 2444eb8e6e9cSKent Overstreet if (!bio->bi_iter.bi_size) 2445eb8e6e9cSKent Overstreet continue; 2446eb8e6e9cSKent Overstreet } 2447eb8e6e9cSKent Overstreet 24480a426c32SKent Overstreet unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); 24490a426c32SKent Overstreet bio->bi_iter.bi_size -= unaligned; 24500a426c32SKent Overstreet iov_iter_revert(&dio->iter, unaligned); 24510a426c32SKent Overstreet 24520a426c32SKent Overstreet if (!bio->bi_iter.bi_size) { 24530a426c32SKent Overstreet /* 24540a426c32SKent Overstreet * bio_iov_iter_get_pages was only able to get < 24550a426c32SKent Overstreet * blocksize worth of pages: 24560a426c32SKent Overstreet */ 24570a426c32SKent Overstreet ret = -EFAULT; 24580a426c32SKent Overstreet goto err; 24590a426c32SKent Overstreet } 24600a426c32SKent Overstreet 246101ad6737SKent Overstreet bch2_write_op_init(&dio->op, c, opts); 2462182c7bbfSKent Overstreet dio->op.end_io = sync 2463182c7bbfSKent Overstreet ? NULL 2464182c7bbfSKent Overstreet : bch2_dio_write_loop_async; 2465042a1f26SKent Overstreet dio->op.target = dio->op.opts.foreground_target; 2466042a1f26SKent Overstreet dio->op.write_point = writepoint_hashed((unsigned long) current); 2467042a1f26SKent Overstreet dio->op.nr_replicas = dio->op.opts.data_replicas; 24688c6d298aSKent Overstreet dio->op.subvol = inode->ei_subvol; 2469042a1f26SKent Overstreet dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); 2470a8b3a677SKent Overstreet dio->op.devs_need_flush = &inode->ei_devs_need_flush; 2471042a1f26SKent Overstreet 24721df3e199SKent Overstreet if (sync) 24731df3e199SKent Overstreet dio->op.flags |= BCH_WRITE_SYNC; 2474a6336910SKent Overstreet dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; 2475042a1f26SKent Overstreet 24766b1b186aSKent Overstreet ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, 24776b1b186aSKent Overstreet bio_sectors(bio), true); 24786b1b186aSKent Overstreet if (unlikely(ret)) 24796b1b186aSKent Overstreet goto err; 24806b1b186aSKent Overstreet 2481042a1f26SKent Overstreet ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), 2482042a1f26SKent Overstreet dio->op.opts.data_replicas, 0); 2483042a1f26SKent Overstreet if (unlikely(ret) && 2484182c7bbfSKent Overstreet !bch2_dio_write_check_allocated(dio)) 2485042a1f26SKent Overstreet goto err; 24861c6fdbd8SKent Overstreet 24871c6fdbd8SKent Overstreet task_io_account_write(bio->bi_iter.bi_size); 24881c6fdbd8SKent Overstreet 2489182c7bbfSKent Overstreet if (unlikely(dio->iter.count) && 2490182c7bbfSKent Overstreet !dio->sync && 2491182c7bbfSKent Overstreet !dio->loop && 2492182c7bbfSKent Overstreet bch2_dio_write_copy_iov(dio)) 2493286d8ad0SKent Overstreet dio->sync = sync = true; 2494182c7bbfSKent Overstreet 24951c6fdbd8SKent Overstreet dio->loop = true; 2496f8f30863SKent Overstreet closure_call(&dio->op.cl, bch2_write, NULL, NULL); 24971c6fdbd8SKent Overstreet 2498182c7bbfSKent Overstreet if (!sync) 24991c6fdbd8SKent Overstreet return -EIOCBQUEUED; 25009a3df993SKent Overstreet 2501182c7bbfSKent Overstreet bch2_dio_write_end(dio); 25029a3df993SKent Overstreet 2503182c7bbfSKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 25041c6fdbd8SKent Overstreet break; 2505f8f30863SKent Overstreet 25061c6fdbd8SKent Overstreet bio_reset(bio, NULL, REQ_OP_WRITE); 25071c6fdbd8SKent Overstreet } 2508182c7bbfSKent Overstreet out: 2509182c7bbfSKent Overstreet return bch2_dio_write_done(dio); 25101c6fdbd8SKent Overstreet err: 2511182c7bbfSKent Overstreet dio->op.error = ret; 25121c6fdbd8SKent Overstreet 25135468f119SKent Overstreet bio_release_pages(bio, false); 25146b1b186aSKent Overstreet 25156b1b186aSKent Overstreet bch2_quota_reservation_put(c, inode, &dio->quota_res); 2516182c7bbfSKent Overstreet goto out; 25171c6fdbd8SKent Overstreet } 25181c6fdbd8SKent Overstreet 25194d868d18SKent Overstreet static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) 25201c6fdbd8SKent Overstreet { 2521182c7bbfSKent Overstreet struct mm_struct *mm = dio->mm; 25221c6fdbd8SKent Overstreet 2523182c7bbfSKent Overstreet bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); 2524182c7bbfSKent Overstreet 2525182c7bbfSKent Overstreet if (mm) 2526182c7bbfSKent Overstreet kthread_use_mm(mm); 25271c6fdbd8SKent Overstreet bch2_dio_write_loop(dio); 2528182c7bbfSKent Overstreet if (mm) 2529182c7bbfSKent Overstreet kthread_unuse_mm(mm); 25301c6fdbd8SKent Overstreet } 25311c6fdbd8SKent Overstreet 25324d868d18SKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *op) 25334d868d18SKent Overstreet { 25344d868d18SKent Overstreet struct dio_write *dio = container_of(op, struct dio_write, op); 25354d868d18SKent Overstreet 25364d868d18SKent Overstreet bch2_dio_write_end(dio); 25374d868d18SKent Overstreet 25384d868d18SKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 25394d868d18SKent Overstreet bch2_dio_write_done(dio); 25404d868d18SKent Overstreet else 25414d868d18SKent Overstreet bch2_dio_write_continue(dio); 25424d868d18SKent Overstreet } 25434d868d18SKent Overstreet 25441c6fdbd8SKent Overstreet static noinline 25451c6fdbd8SKent Overstreet ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) 25461c6fdbd8SKent Overstreet { 25471c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 254854847d25SKent Overstreet struct address_space *mapping = file->f_mapping; 25491c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 25501c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 25511c6fdbd8SKent Overstreet struct dio_write *dio; 25521c6fdbd8SKent Overstreet struct bio *bio; 25537edcfbfeSKent Overstreet bool locked = true, extending; 25541c6fdbd8SKent Overstreet ssize_t ret; 25551c6fdbd8SKent Overstreet 25567edcfbfeSKent Overstreet prefetch(&c->opts); 25577edcfbfeSKent Overstreet prefetch((void *) &c->opts + 64); 25587edcfbfeSKent Overstreet prefetch(&inode->ei_inode); 25597edcfbfeSKent Overstreet prefetch((void *) &inode->ei_inode + 64); 25601c6fdbd8SKent Overstreet 25617edcfbfeSKent Overstreet inode_lock(&inode->v); 25627edcfbfeSKent Overstreet 25637edcfbfeSKent Overstreet ret = generic_write_checks(req, iter); 25647edcfbfeSKent Overstreet if (unlikely(ret <= 0)) 25657edcfbfeSKent Overstreet goto err; 25667edcfbfeSKent Overstreet 25677edcfbfeSKent Overstreet ret = file_remove_privs(file); 25687edcfbfeSKent Overstreet if (unlikely(ret)) 25697edcfbfeSKent Overstreet goto err; 25707edcfbfeSKent Overstreet 25717edcfbfeSKent Overstreet ret = file_update_time(file); 25727edcfbfeSKent Overstreet if (unlikely(ret)) 25737edcfbfeSKent Overstreet goto err; 25741c6fdbd8SKent Overstreet 2575919dbbd1SKent Overstreet if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) 25767edcfbfeSKent Overstreet goto err; 25777edcfbfeSKent Overstreet 25787edcfbfeSKent Overstreet inode_dio_begin(&inode->v); 2579a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 25807edcfbfeSKent Overstreet 25817edcfbfeSKent Overstreet extending = req->ki_pos + iter->count > inode->v.i_size; 25827edcfbfeSKent Overstreet if (!extending) { 25837edcfbfeSKent Overstreet inode_unlock(&inode->v); 25847edcfbfeSKent Overstreet locked = false; 25857edcfbfeSKent Overstreet } 25861c6fdbd8SKent Overstreet 25871c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 25884d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 25891c6fdbd8SKent Overstreet REQ_OP_WRITE, 25901c6fdbd8SKent Overstreet GFP_KERNEL, 25911c6fdbd8SKent Overstreet &c->dio_write_bioset); 25929a3df993SKent Overstreet dio = container_of(bio, struct dio_write, op.wbio.bio); 25931c6fdbd8SKent Overstreet dio->req = req; 2594182c7bbfSKent Overstreet dio->mapping = mapping; 2595182c7bbfSKent Overstreet dio->inode = inode; 2596ed484030SKent Overstreet dio->mm = current->mm; 25971c6fdbd8SKent Overstreet dio->loop = false; 25986b1b186aSKent Overstreet dio->extending = extending; 25997edcfbfeSKent Overstreet dio->sync = is_sync_kiocb(req) || extending; 2600a1ee777bSKent Overstreet dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; 26011c6fdbd8SKent Overstreet dio->free_iov = false; 26021c6fdbd8SKent Overstreet dio->quota_res.sectors = 0; 2603042a1f26SKent Overstreet dio->written = 0; 26041c6fdbd8SKent Overstreet dio->iter = *iter; 2605182c7bbfSKent Overstreet dio->op.c = c; 26069a3df993SKent Overstreet 2607a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 260854847d25SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 260954847d25SKent Overstreet req->ki_pos, 261054847d25SKent Overstreet req->ki_pos + iter->count - 1); 261154847d25SKent Overstreet if (unlikely(ret)) 261254847d25SKent Overstreet goto err_put_bio; 2613a023127aSKent Overstreet } 261454847d25SKent Overstreet 26157edcfbfeSKent Overstreet ret = bch2_dio_write_loop(dio); 26161c6fdbd8SKent Overstreet err: 26177edcfbfeSKent Overstreet if (locked) 26187edcfbfeSKent Overstreet inode_unlock(&inode->v); 26197edcfbfeSKent Overstreet return ret; 26207edcfbfeSKent Overstreet err_put_bio: 2621a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 26221c6fdbd8SKent Overstreet bio_put(bio); 26237edcfbfeSKent Overstreet inode_dio_end(&inode->v); 26247edcfbfeSKent Overstreet goto err; 26251c6fdbd8SKent Overstreet } 26261c6fdbd8SKent Overstreet 26277edcfbfeSKent Overstreet ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) 26281c6fdbd8SKent Overstreet { 26291c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 26307edcfbfeSKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 26311c6fdbd8SKent Overstreet ssize_t ret; 26321c6fdbd8SKent Overstreet 26335c1ef830SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 26345c1ef830SKent Overstreet ret = bch2_direct_write(iocb, from); 26355c1ef830SKent Overstreet goto out; 26365c1ef830SKent Overstreet } 26371c6fdbd8SKent Overstreet 26387edcfbfeSKent Overstreet inode_lock(&inode->v); 26397edcfbfeSKent Overstreet 26407edcfbfeSKent Overstreet ret = generic_write_checks(iocb, from); 26417edcfbfeSKent Overstreet if (ret <= 0) 26427edcfbfeSKent Overstreet goto unlock; 26437edcfbfeSKent Overstreet 26441c6fdbd8SKent Overstreet ret = file_remove_privs(file); 26451c6fdbd8SKent Overstreet if (ret) 26467edcfbfeSKent Overstreet goto unlock; 26471c6fdbd8SKent Overstreet 26481c6fdbd8SKent Overstreet ret = file_update_time(file); 26491c6fdbd8SKent Overstreet if (ret) 26507edcfbfeSKent Overstreet goto unlock; 26511c6fdbd8SKent Overstreet 26527edcfbfeSKent Overstreet ret = bch2_buffered_write(iocb, from); 26531c6fdbd8SKent Overstreet if (likely(ret > 0)) 26541c6fdbd8SKent Overstreet iocb->ki_pos += ret; 26557edcfbfeSKent Overstreet unlock: 26561c6fdbd8SKent Overstreet inode_unlock(&inode->v); 26571c6fdbd8SKent Overstreet 26587edcfbfeSKent Overstreet if (ret > 0) 26591c6fdbd8SKent Overstreet ret = generic_write_sync(iocb, ret); 26605c1ef830SKent Overstreet out: 26615c1ef830SKent Overstreet return bch2_err_class(ret); 26621c6fdbd8SKent Overstreet } 26631c6fdbd8SKent Overstreet 26641c6fdbd8SKent Overstreet /* fsync: */ 26651c6fdbd8SKent Overstreet 266668a2054dSKent Overstreet /* 266768a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 266868a2054dSKent Overstreet * insert trigger: look up the btree inode instead 266968a2054dSKent Overstreet */ 2670a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 2671a8b3a677SKent Overstreet struct bch_inode_info *inode) 267268a2054dSKent Overstreet { 2673a8b3a677SKent Overstreet struct bch_inode_unpacked u; 267468a2054dSKent Overstreet int ret; 267568a2054dSKent Overstreet 267668a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 267768a2054dSKent Overstreet return 0; 267868a2054dSKent Overstreet 2679a8b3a677SKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 268068a2054dSKent Overstreet if (ret) 268168a2054dSKent Overstreet return ret; 268268a2054dSKent Overstreet 2683a8b3a677SKent Overstreet return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 2684a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 268568a2054dSKent Overstreet } 268668a2054dSKent Overstreet 26871c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 26881c6fdbd8SKent Overstreet { 26891c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 26901c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 269168a2054dSKent Overstreet int ret, ret2, ret3; 26921c6fdbd8SKent Overstreet 26931c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 269468a2054dSKent Overstreet ret2 = sync_inode_metadata(&inode->v, 1); 2695a8b3a677SKent Overstreet ret3 = bch2_flush_inode(c, inode); 26961c6fdbd8SKent Overstreet 26975c1ef830SKent Overstreet return bch2_err_class(ret ?: ret2 ?: ret3); 26981c6fdbd8SKent Overstreet } 26991c6fdbd8SKent Overstreet 27001c6fdbd8SKent Overstreet /* truncate: */ 27011c6fdbd8SKent Overstreet 27026fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 27031c6fdbd8SKent Overstreet struct bpos start, 27041c6fdbd8SKent Overstreet struct bpos end) 27051c6fdbd8SKent Overstreet { 2706424eb881SKent Overstreet struct btree_trans trans; 270767e0dd8fSKent Overstreet struct btree_iter iter; 27081c6fdbd8SKent Overstreet struct bkey_s_c k; 27091c6fdbd8SKent Overstreet int ret = 0; 27101c6fdbd8SKent Overstreet 271120bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 27126fed42bbSKent Overstreet retry: 27136fed42bbSKent Overstreet bch2_trans_begin(&trans); 27146fed42bbSKent Overstreet 27156fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); 27166fed42bbSKent Overstreet if (ret) 27176fed42bbSKent Overstreet goto err; 2718424eb881SKent Overstreet 2719c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 27201c6fdbd8SKent Overstreet if (bkey_extent_is_data(k.k)) { 27211c6fdbd8SKent Overstreet ret = 1; 27221c6fdbd8SKent Overstreet break; 27231c6fdbd8SKent Overstreet } 27246fed42bbSKent Overstreet start = iter.pos; 272567e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 27266fed42bbSKent Overstreet err: 2727549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 27286fed42bbSKent Overstreet goto retry; 27291c6fdbd8SKent Overstreet 27309a796fdbSKent Overstreet bch2_trans_exit(&trans); 27319a796fdbSKent Overstreet return ret; 27321c6fdbd8SKent Overstreet } 27331c6fdbd8SKent Overstreet 2734959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 27351c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 27361c6fdbd8SKent Overstreet { 27371c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 27381c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 27393342ac13SKent Overstreet struct bch_folio *s; 27401c6fdbd8SKent Overstreet unsigned start_offset = start & (PAGE_SIZE - 1); 27411c6fdbd8SKent Overstreet unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; 2742a99b1cafSKent Overstreet unsigned i; 274330bff594SKent Overstreet struct folio *folio; 2744b19d307dSKent Overstreet s64 i_sectors_delta = 0; 27451c6fdbd8SKent Overstreet int ret = 0; 27461c6fdbd8SKent Overstreet 274730bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 274830bff594SKent Overstreet if (!folio) { 27491c6fdbd8SKent Overstreet /* 27501c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 275130bff594SKent Overstreet * folio 27521c6fdbd8SKent Overstreet */ 27536fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 2754c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 2755c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 27561c6fdbd8SKent Overstreet if (ret <= 0) 27571c6fdbd8SKent Overstreet return ret; 27581c6fdbd8SKent Overstreet 275930bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 276030bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 276130bff594SKent Overstreet if (unlikely(!folio)) { 27621c6fdbd8SKent Overstreet ret = -ENOMEM; 27631c6fdbd8SKent Overstreet goto out; 27641c6fdbd8SKent Overstreet } 27651c6fdbd8SKent Overstreet } 27661c6fdbd8SKent Overstreet 2767959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 2768959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 2769959f7368SKent Overstreet 2770959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 2771959f7368SKent Overstreet end_offset = min(end, folio_end_pos(folio)) - folio_pos(folio); 2772959f7368SKent Overstreet 2773959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 2774959f7368SKent Overstreet if (start_offset == 0 && 2775959f7368SKent Overstreet end_offset == folio_size(folio)) { 2776959f7368SKent Overstreet ret = 0; 2777959f7368SKent Overstreet goto unlock; 2778959f7368SKent Overstreet } 2779959f7368SKent Overstreet 278030bff594SKent Overstreet s = bch2_folio_create(folio, 0); 2781a99b1cafSKent Overstreet if (!s) { 2782a99b1cafSKent Overstreet ret = -ENOMEM; 2783a99b1cafSKent Overstreet goto unlock; 2784a99b1cafSKent Overstreet } 2785a99b1cafSKent Overstreet 278630bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 278730bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 27881c6fdbd8SKent Overstreet if (ret) 27891c6fdbd8SKent Overstreet goto unlock; 27901c6fdbd8SKent Overstreet } 27911c6fdbd8SKent Overstreet 2792c437e153SKent Overstreet BUG_ON(!s->uptodate); 2793c437e153SKent Overstreet 2794a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 2795a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 2796a99b1cafSKent Overstreet i++) { 2797a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 2798*a1774a05SKent Overstreet 2799*a1774a05SKent Overstreet i_sectors_delta -= s->s[i].state == SECTOR_dirty; 2800*a1774a05SKent Overstreet folio_sector_set(folio, s, i, SECTOR_unallocated); 2801a99b1cafSKent Overstreet } 2802a99b1cafSKent Overstreet 2803b19d307dSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 2804b19d307dSKent Overstreet 280574163da7SKent Overstreet /* 280630bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 280774163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 280874163da7SKent Overstreet * be responsible for the i_size update: 280974163da7SKent Overstreet */ 2810959f7368SKent Overstreet ret = s->s[(min(inode->v.i_size, folio_end_pos(folio)) - 2811*a1774a05SKent Overstreet folio_pos(folio) - 1) >> 9].state >= SECTOR_dirty; 281274163da7SKent Overstreet 281330bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 2814a99b1cafSKent Overstreet 28151c6fdbd8SKent Overstreet /* 28161c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 28171c6fdbd8SKent Overstreet * 281830bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 28191c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 28201c6fdbd8SKent Overstreet */ 282130bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 28221c6fdbd8SKent Overstreet 28239ba2eb25SKent Overstreet /* 28249ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 28259ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 28269ba2eb25SKent Overstreet * redirty the full page: 28279ba2eb25SKent Overstreet */ 282830bff594SKent Overstreet folio_mkclean(folio); 282930bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 28301c6fdbd8SKent Overstreet unlock: 283130bff594SKent Overstreet folio_unlock(folio); 283230bff594SKent Overstreet folio_put(folio); 28331c6fdbd8SKent Overstreet out: 28341c6fdbd8SKent Overstreet return ret; 28351c6fdbd8SKent Overstreet } 28361c6fdbd8SKent Overstreet 2837959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 28381c6fdbd8SKent Overstreet { 2839959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 2840959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 28411c6fdbd8SKent Overstreet } 28421c6fdbd8SKent Overstreet 2843959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 284474163da7SKent Overstreet loff_t start, loff_t end) 284574163da7SKent Overstreet { 2846959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 284774163da7SKent Overstreet start, end); 284874163da7SKent Overstreet 284974163da7SKent Overstreet if (ret >= 0 && 285074163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 2851959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 2852959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 285374163da7SKent Overstreet start, end); 285474163da7SKent Overstreet return ret; 285574163da7SKent Overstreet } 285674163da7SKent Overstreet 285768a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 285868a507a2SKent Overstreet struct bch_inode_info *inode, 2859e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 2860e0541a93SKent Overstreet struct iattr *iattr) 28611c6fdbd8SKent Overstreet { 28621c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 28631c6fdbd8SKent Overstreet int ret; 28641c6fdbd8SKent Overstreet 2865e0541a93SKent Overstreet /* 2866e0541a93SKent Overstreet * sync appends: 28672925fc49SKent Overstreet * 28682925fc49SKent Overstreet * this has to be done _before_ extending i_size: 2869e0541a93SKent Overstreet */ 2870e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 28711c6fdbd8SKent Overstreet if (ret) 28721c6fdbd8SKent Overstreet return ret; 28731c6fdbd8SKent Overstreet 28741c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 28751c6fdbd8SKent Overstreet 287668a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 28771c6fdbd8SKent Overstreet } 28781c6fdbd8SKent Overstreet 287954e2264eSKent Overstreet static int bch2_truncate_finish_fn(struct bch_inode_info *inode, 288054e2264eSKent Overstreet struct bch_inode_unpacked *bi, 288154e2264eSKent Overstreet void *p) 288254e2264eSKent Overstreet { 288354e2264eSKent Overstreet bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 288454e2264eSKent Overstreet return 0; 288554e2264eSKent Overstreet } 288654e2264eSKent Overstreet 288754e2264eSKent Overstreet static int bch2_truncate_start_fn(struct bch_inode_info *inode, 288854e2264eSKent Overstreet struct bch_inode_unpacked *bi, void *p) 288954e2264eSKent Overstreet { 289054e2264eSKent Overstreet u64 *new_i_size = p; 289154e2264eSKent Overstreet 289254e2264eSKent Overstreet bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 289354e2264eSKent Overstreet bi->bi_size = *new_i_size; 289454e2264eSKent Overstreet return 0; 289554e2264eSKent Overstreet } 289654e2264eSKent Overstreet 289768a507a2SKent Overstreet int bch2_truncate(struct mnt_idmap *idmap, 289868a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 28991c6fdbd8SKent Overstreet { 29001c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 29011c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 2902e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 290354e2264eSKent Overstreet u64 new_i_size = iattr->ia_size; 29042e87eae1SKent Overstreet s64 i_sectors_delta = 0; 29051c6fdbd8SKent Overstreet int ret = 0; 29061c6fdbd8SKent Overstreet 290768a507a2SKent Overstreet /* 290878d66ab1SDan Robertson * If the truncate call with change the size of the file, the 290978d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 291078d66ab1SDan Robertson * do not need to update the cmtimes. 291168a507a2SKent Overstreet */ 291278d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 291368a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 291468a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 291568a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 291668a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 291768a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 291878d66ab1SDan Robertson } 291968a507a2SKent Overstreet 29201c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 2921a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 29221c6fdbd8SKent Overstreet 29236fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 2924e0541a93SKent Overstreet if (ret) 2925e0541a93SKent Overstreet goto err; 29261c6fdbd8SKent Overstreet 2927c45d473dSKent Overstreet /* 2928c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 2929c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 2930c45d473dSKent Overstreet * before the inode). 2931c45d473dSKent Overstreet */ 2932c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 2933c45d473dSKent Overstreet if (ret) 2934c45d473dSKent Overstreet goto err; 2935c45d473dSKent Overstreet 29368eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 29378eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 29388eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 29398eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 2940e0541a93SKent Overstreet 2941e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 294268a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 294354e2264eSKent Overstreet goto err; 29441c6fdbd8SKent Overstreet } 29451c6fdbd8SKent Overstreet 294668a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 294768a507a2SKent Overstreet 2948959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 294974163da7SKent Overstreet if (unlikely(ret < 0)) 295054e2264eSKent Overstreet goto err; 29511c6fdbd8SKent Overstreet 29526cc3535dSKent Overstreet /* 29536cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 29546cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 29556cc3535dSKent Overstreet * i_size first: 29566cc3535dSKent Overstreet * 29576cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 29586cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 29596cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 29606cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 29616cc3535dSKent Overstreet */ 2962e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 29631c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 2964e0541a93SKent Overstreet inode_u.bi_size, 29651c6fdbd8SKent Overstreet iattr->ia_size - 1); 29661c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 29671c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 29681c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 29691c6fdbd8SKent Overstreet iattr->ia_size - 1); 29701c6fdbd8SKent Overstreet if (ret) 297154e2264eSKent Overstreet goto err; 29721c6fdbd8SKent Overstreet 297354e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 297454e2264eSKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 297554e2264eSKent Overstreet &new_i_size, 0); 297654e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 29771c6fdbd8SKent Overstreet 29781c6fdbd8SKent Overstreet if (unlikely(ret)) 297954e2264eSKent Overstreet goto err; 29801c6fdbd8SKent Overstreet 29811c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 29821c6fdbd8SKent Overstreet 29838c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 2984a99b1cafSKent Overstreet round_up(iattr->ia_size, block_bytes(c)) >> 9, 298568a2054dSKent Overstreet U64_MAX, &i_sectors_delta); 29862e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 29872e87eae1SKent Overstreet 2988b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 2989b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 2990b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 2991b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 2992b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 29931c6fdbd8SKent Overstreet if (unlikely(ret)) 299454e2264eSKent Overstreet goto err; 29951c6fdbd8SKent Overstreet 299654e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 299768a507a2SKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 299854e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 299968a507a2SKent Overstreet 300068a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 300154e2264eSKent Overstreet err: 3002a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 30035c1ef830SKent Overstreet return bch2_err_class(ret); 30041c6fdbd8SKent Overstreet } 30051c6fdbd8SKent Overstreet 30061c6fdbd8SKent Overstreet /* fallocate: */ 30071c6fdbd8SKent Overstreet 3008050197b1SKent Overstreet static int inode_update_times_fn(struct bch_inode_info *inode, 3009050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 3010050197b1SKent Overstreet { 3011050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3012050197b1SKent Overstreet 3013050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 3014050197b1SKent Overstreet return 0; 3015050197b1SKent Overstreet } 3016050197b1SKent Overstreet 30172e87eae1SKent Overstreet static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 30181c6fdbd8SKent Overstreet { 30191c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 302074163da7SKent Overstreet u64 end = offset + len; 302174163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 302274163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 302374163da7SKent Overstreet bool truncated_last_page; 30241c6fdbd8SKent Overstreet int ret = 0; 30251c6fdbd8SKent Overstreet 3026959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 302774163da7SKent Overstreet if (unlikely(ret < 0)) 30281c6fdbd8SKent Overstreet goto err; 30291c6fdbd8SKent Overstreet 303074163da7SKent Overstreet truncated_last_page = ret; 30311c6fdbd8SKent Overstreet 303274163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 30331c6fdbd8SKent Overstreet 303474163da7SKent Overstreet if (block_start < block_end) { 30352e87eae1SKent Overstreet s64 i_sectors_delta = 0; 30362e87eae1SKent Overstreet 30378c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 303874163da7SKent Overstreet block_start >> 9, block_end >> 9, 30392e87eae1SKent Overstreet &i_sectors_delta); 30402e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 30412e87eae1SKent Overstreet } 3042050197b1SKent Overstreet 3043050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 304474163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 304574163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 304674163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 304774163da7SKent Overstreet } else { 3048050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 304974163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 305074163da7SKent Overstreet } 3051050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 30521c6fdbd8SKent Overstreet err: 30531c6fdbd8SKent Overstreet return ret; 30541c6fdbd8SKent Overstreet } 30551c6fdbd8SKent Overstreet 30562e87eae1SKent Overstreet static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 30575f786787SKent Overstreet loff_t offset, loff_t len, 30585f786787SKent Overstreet bool insert) 30591c6fdbd8SKent Overstreet { 30601c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 30611c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 306207a1006aSKent Overstreet struct bkey_buf copy; 3063d69f41d6SKent Overstreet struct btree_trans trans; 306467e0dd8fSKent Overstreet struct btree_iter src, dst, del; 30655f786787SKent Overstreet loff_t shift, new_size; 30665f786787SKent Overstreet u64 src_start; 306750dc0f69SKent Overstreet int ret = 0; 30681c6fdbd8SKent Overstreet 30691c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 30701c6fdbd8SKent Overstreet return -EINVAL; 30711c6fdbd8SKent Overstreet 30725f786787SKent Overstreet if (insert) { 30735f786787SKent Overstreet if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 307474163da7SKent Overstreet return -EFBIG; 30755f786787SKent Overstreet 30765f786787SKent Overstreet if (offset >= inode->v.i_size) 307774163da7SKent Overstreet return -EINVAL; 30785f786787SKent Overstreet 30795f786787SKent Overstreet src_start = U64_MAX; 30805f786787SKent Overstreet shift = len; 30815f786787SKent Overstreet } else { 30821c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 308374163da7SKent Overstreet return -EINVAL; 30841c6fdbd8SKent Overstreet 30855f786787SKent Overstreet src_start = offset + len; 30865f786787SKent Overstreet shift = -len; 30875f786787SKent Overstreet } 30881c6fdbd8SKent Overstreet 30895f786787SKent Overstreet new_size = inode->v.i_size + shift; 30901c6fdbd8SKent Overstreet 30911c6fdbd8SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 30921c6fdbd8SKent Overstreet if (ret) 309374163da7SKent Overstreet return ret; 30941c6fdbd8SKent Overstreet 30955f786787SKent Overstreet if (insert) { 30965f786787SKent Overstreet i_size_write(&inode->v, new_size); 30975f786787SKent Overstreet mutex_lock(&inode->ei_update_lock); 30985f786787SKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 30995f786787SKent Overstreet ATTR_MTIME|ATTR_CTIME); 31005f786787SKent Overstreet mutex_unlock(&inode->ei_update_lock); 31015f786787SKent Overstreet } else { 31022e87eae1SKent Overstreet s64 i_sectors_delta = 0; 31032e87eae1SKent Overstreet 31048c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 31052e87eae1SKent Overstreet offset >> 9, (offset + len) >> 9, 31062e87eae1SKent Overstreet &i_sectors_delta); 31072e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 31082e87eae1SKent Overstreet 310963095894SKent Overstreet if (ret) 311074163da7SKent Overstreet return ret; 31115f786787SKent Overstreet } 31128ef231bdSKent Overstreet 311350dc0f69SKent Overstreet bch2_bkey_buf_init(©); 3114f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 311567e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 31165f786787SKent Overstreet POS(inode->v.i_ino, src_start >> 9), 311763095894SKent Overstreet BTREE_ITER_INTENT); 311867e0dd8fSKent Overstreet bch2_trans_copy_iter(&dst, &src); 311967e0dd8fSKent Overstreet bch2_trans_copy_iter(&del, &src); 31205f786787SKent Overstreet 3121549d173cSKent Overstreet while (ret == 0 || 3122549d173cSKent Overstreet bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 312363095894SKent Overstreet struct disk_reservation disk_res = 312463095894SKent Overstreet bch2_disk_reservation_init(c, 0); 312563095894SKent Overstreet struct bkey_i delete; 312663095894SKent Overstreet struct bkey_s_c k; 312763095894SKent Overstreet struct bpos next_pos; 31285f786787SKent Overstreet struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 31295f786787SKent Overstreet struct bpos atomic_end; 31302d594dfbSKent Overstreet unsigned trigger_flags = 0; 31316fed42bbSKent Overstreet u32 snapshot; 31326fed42bbSKent Overstreet 31336fed42bbSKent Overstreet bch2_trans_begin(&trans); 31346fed42bbSKent Overstreet 31356fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 31366fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 31376fed42bbSKent Overstreet if (ret) 31386fed42bbSKent Overstreet continue; 31396fed42bbSKent Overstreet 31406fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&src, snapshot); 31416fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&dst, snapshot); 31426fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&del, snapshot); 314363095894SKent Overstreet 3144700c25b3SKent Overstreet bch2_trans_begin(&trans); 3145700c25b3SKent Overstreet 31465f786787SKent Overstreet k = insert 314767e0dd8fSKent Overstreet ? bch2_btree_iter_peek_prev(&src) 3148c72f687aSKent Overstreet : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 314963095894SKent Overstreet if ((ret = bkey_err(k))) 315050dc0f69SKent Overstreet continue; 315163095894SKent Overstreet 315263095894SKent Overstreet if (!k.k || k.k->p.inode != inode->v.i_ino) 315363095894SKent Overstreet break; 315463095894SKent Overstreet 31555f786787SKent Overstreet if (insert && 3156e88a75ebSKent Overstreet bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 31575f786787SKent Overstreet break; 31585f786787SKent Overstreet reassemble: 315907a1006aSKent Overstreet bch2_bkey_buf_reassemble(©, c, k); 31605f786787SKent Overstreet 31615f786787SKent Overstreet if (insert && 3162e88a75ebSKent Overstreet bkey_lt(bkey_start_pos(k.k), move_pos)) 316335189e09SKent Overstreet bch2_cut_front(move_pos, copy.k); 31645f786787SKent Overstreet 316535189e09SKent Overstreet copy.k->k.p.offset += shift >> 9; 316667e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); 31671c6fdbd8SKent Overstreet 316867e0dd8fSKent Overstreet ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 31693c7f3b7aSKent Overstreet if (ret) 317050dc0f69SKent Overstreet continue; 3171e2d9912cSKent Overstreet 3172e88a75ebSKent Overstreet if (!bkey_eq(atomic_end, copy.k->k.p)) { 31735f786787SKent Overstreet if (insert) { 31745f786787SKent Overstreet move_pos = atomic_end; 31755f786787SKent Overstreet move_pos.offset -= shift >> 9; 31765f786787SKent Overstreet goto reassemble; 31775f786787SKent Overstreet } else { 3178085ab693SKent Overstreet bch2_cut_back(atomic_end, copy.k); 31795f786787SKent Overstreet } 31805f786787SKent Overstreet } 31815f786787SKent Overstreet 318263095894SKent Overstreet bkey_init(&delete.k); 3183283eda57SKent Overstreet delete.k.p = copy.k->k.p; 3184283eda57SKent Overstreet delete.k.size = copy.k->k.size; 3185283eda57SKent Overstreet delete.k.p.offset -= shift >> 9; 318667e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 31871c6fdbd8SKent Overstreet 31885f786787SKent Overstreet next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 318963095894SKent Overstreet 31907c4ca54aSKent Overstreet if (copy.k->k.size != k.k->size) { 319163095894SKent Overstreet /* We might end up splitting compressed extents: */ 319263095894SKent Overstreet unsigned nr_ptrs = 31934de77495SKent Overstreet bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 319463095894SKent Overstreet 319563095894SKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 319635189e09SKent Overstreet copy.k->k.size, nr_ptrs, 31971c6fdbd8SKent Overstreet BCH_DISK_RESERVATION_NOFAIL); 31981c6fdbd8SKent Overstreet BUG_ON(ret); 319963095894SKent Overstreet } 32001c6fdbd8SKent Overstreet 320167e0dd8fSKent Overstreet ret = bch2_btree_iter_traverse(&del) ?: 320267e0dd8fSKent Overstreet bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 320367e0dd8fSKent Overstreet bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 320468a2054dSKent Overstreet bch2_trans_commit(&trans, &disk_res, NULL, 32052d594dfbSKent Overstreet BTREE_INSERT_NOFAIL); 32061c6fdbd8SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 320750dc0f69SKent Overstreet 320863095894SKent Overstreet if (!ret) 320967e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&src, next_pos); 321050dc0f69SKent Overstreet } 321167e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &del); 321267e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &dst); 321367e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &src); 321450dc0f69SKent Overstreet bch2_trans_exit(&trans); 321550dc0f69SKent Overstreet bch2_bkey_buf_exit(©, c); 321663095894SKent Overstreet 32178ef231bdSKent Overstreet if (ret) 321874163da7SKent Overstreet return ret; 32191c6fdbd8SKent Overstreet 322074163da7SKent Overstreet mutex_lock(&inode->ei_update_lock); 32215f786787SKent Overstreet if (!insert) { 32228ef231bdSKent Overstreet i_size_write(&inode->v, new_size); 32238ef231bdSKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 32248ef231bdSKent Overstreet ATTR_MTIME|ATTR_CTIME); 322574163da7SKent Overstreet } else { 322674163da7SKent Overstreet /* We need an inode update to update bi_journal_seq for fsync: */ 322774163da7SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 322874163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 32295f786787SKent Overstreet } 323074163da7SKent Overstreet mutex_unlock(&inode->ei_update_lock); 32311c6fdbd8SKent Overstreet return ret; 32321c6fdbd8SKent Overstreet } 32331c6fdbd8SKent Overstreet 3234694015c2SKent Overstreet static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 3235694015c2SKent Overstreet u64 start_sector, u64 end_sector) 32361c6fdbd8SKent Overstreet { 32371c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3238190fa7afSKent Overstreet struct btree_trans trans; 323967e0dd8fSKent Overstreet struct btree_iter iter; 3240694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 324101ad6737SKent Overstreet struct bch_io_opts opts; 3242694015c2SKent Overstreet int ret = 0; 32431c6fdbd8SKent Overstreet 324401ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 3245f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); 32461c6fdbd8SKent Overstreet 324767e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3248694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 3249190fa7afSKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 32501c6fdbd8SKent Overstreet 3251e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 32522e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3253190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 32541c6fdbd8SKent Overstreet struct bkey_s_c k; 3255694015c2SKent Overstreet unsigned sectors; 32566fed42bbSKent Overstreet u32 snapshot; 32571c6fdbd8SKent Overstreet 3258163e885aSKent Overstreet bch2_trans_begin(&trans); 3259a8abd3a7SKent Overstreet 32606fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 32616fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 32626fed42bbSKent Overstreet if (ret) 32636fed42bbSKent Overstreet goto bkey_err; 32646fed42bbSKent Overstreet 32656fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 32666fed42bbSKent Overstreet 326767e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 32680f238367SKent Overstreet if ((ret = bkey_err(k))) 32690f238367SKent Overstreet goto bkey_err; 32701c6fdbd8SKent Overstreet 32711c6fdbd8SKent Overstreet /* already reserved */ 327279203111SKent Overstreet if (bkey_extent_is_reservation(k) && 327379203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 327467e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 32751c6fdbd8SKent Overstreet continue; 32761c6fdbd8SKent Overstreet } 32771c6fdbd8SKent Overstreet 3278190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 3279190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 328067e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 32811c6fdbd8SKent Overstreet continue; 32821c6fdbd8SKent Overstreet } 32831c6fdbd8SKent Overstreet 3284a8b3a677SKent Overstreet /* 3285a8b3a677SKent Overstreet * XXX: for nocow mode, we should promote shared extents to 3286a8b3a677SKent Overstreet * unshared here 3287a8b3a677SKent Overstreet */ 3288a8b3a677SKent Overstreet 328970de7a47SKent Overstreet sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset; 32901c6fdbd8SKent Overstreet 32911c6fdbd8SKent Overstreet if (!bkey_extent_is_allocation(k.k)) { 32921c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 3293190fa7afSKent Overstreet "a_res, 32941c6fdbd8SKent Overstreet sectors, true); 32951c6fdbd8SKent Overstreet if (unlikely(ret)) 32960f238367SKent Overstreet goto bkey_err; 32971c6fdbd8SKent Overstreet } 32981c6fdbd8SKent Overstreet 329970de7a47SKent Overstreet ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, 330070de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 330170de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 33028810386fSKent Overstreet if (ret) 33038810386fSKent Overstreet goto bkey_err; 330470de7a47SKent Overstreet 33052e87eae1SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 33060f238367SKent Overstreet bkey_err: 3307190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 3308549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 33091c6fdbd8SKent Overstreet ret = 0; 331050dc0f69SKent Overstreet } 331174163da7SKent Overstreet 3312dcfc593fSKent Overstreet bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */ 3313dcfc593fSKent Overstreet mark_pagecache_reserved(inode, start_sector, iter.pos.offset); 3314dcfc593fSKent Overstreet 3315098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 331674163da7SKent Overstreet struct quota_res quota_res = { 0 }; 331774163da7SKent Overstreet s64 i_sectors_delta = 0; 331874163da7SKent Overstreet 331974163da7SKent Overstreet bch2_fpunch_at(&trans, &iter, inode_inum(inode), 332074163da7SKent Overstreet end_sector, &i_sectors_delta); 332174163da7SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 332274163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 332374163da7SKent Overstreet } 332474163da7SKent Overstreet 332567e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3326694015c2SKent Overstreet bch2_trans_exit(&trans); 3327694015c2SKent Overstreet return ret; 3328694015c2SKent Overstreet } 332950dc0f69SKent Overstreet 3330694015c2SKent Overstreet static long bchfs_fallocate(struct bch_inode_info *inode, int mode, 3331694015c2SKent Overstreet loff_t offset, loff_t len) 3332694015c2SKent Overstreet { 3333694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 333474163da7SKent Overstreet u64 end = offset + len; 333574163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 333674163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 333774163da7SKent Overstreet bool truncated_last_page = false; 333874163da7SKent Overstreet int ret, ret2 = 0; 3339694015c2SKent Overstreet 3340694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 3341694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 3342694015c2SKent Overstreet if (ret) 334374163da7SKent Overstreet return ret; 3344694015c2SKent Overstreet } 3345694015c2SKent Overstreet 3346694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 3347959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 334874163da7SKent Overstreet if (unlikely(ret < 0)) 334974163da7SKent Overstreet return ret; 3350694015c2SKent Overstreet 335174163da7SKent Overstreet truncated_last_page = ret; 3352694015c2SKent Overstreet 3353694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 335474163da7SKent Overstreet 335574163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 335674163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 3357694015c2SKent Overstreet } 3358694015c2SKent Overstreet 3359694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 3360e0541a93SKent Overstreet 3361e0541a93SKent Overstreet /* 336274163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 336374163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 3364e0541a93SKent Overstreet */ 336574163da7SKent Overstreet if (ret && 3366098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 336774163da7SKent Overstreet return ret; 33681c6fdbd8SKent Overstreet 336974163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 3370e0541a93SKent Overstreet end = inode->v.i_size; 337174163da7SKent Overstreet 337274163da7SKent Overstreet if (end >= inode->v.i_size && 337374163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 337474163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 337574163da7SKent Overstreet spin_lock(&inode->v.i_lock); 3376e0541a93SKent Overstreet i_size_write(&inode->v, end); 337774163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 3378e0541a93SKent Overstreet 33791c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 338074163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 33811c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 33821c6fdbd8SKent Overstreet } 338374163da7SKent Overstreet 338474163da7SKent Overstreet return ret ?: ret2; 33851c6fdbd8SKent Overstreet } 33861c6fdbd8SKent Overstreet 33871c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 33881c6fdbd8SKent Overstreet loff_t offset, loff_t len) 33891c6fdbd8SKent Overstreet { 33901c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 33912a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 33922a9101a9SKent Overstreet long ret; 33932a9101a9SKent Overstreet 3394d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 33952a9101a9SKent Overstreet return -EROFS; 33961c6fdbd8SKent Overstreet 339774163da7SKent Overstreet inode_lock(&inode->v); 339874163da7SKent Overstreet inode_dio_wait(&inode->v); 3399a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 340074163da7SKent Overstreet 340107bfcc0bSKent Overstreet ret = file_modified(file); 340207bfcc0bSKent Overstreet if (ret) 340307bfcc0bSKent Overstreet goto err; 340407bfcc0bSKent Overstreet 34051c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 34062a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 34072a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 34082a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 34092a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 34102a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 34112a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 34122a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 34132a9101a9SKent Overstreet else 34142a9101a9SKent Overstreet ret = -EOPNOTSUPP; 341507bfcc0bSKent Overstreet err: 3416a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 341774163da7SKent Overstreet inode_unlock(&inode->v); 3418d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 34191c6fdbd8SKent Overstreet 34205c1ef830SKent Overstreet return bch2_err_class(ret); 34211c6fdbd8SKent Overstreet } 34221c6fdbd8SKent Overstreet 3423c72f687aSKent Overstreet /* 3424c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 3425c72f687aSKent Overstreet * Does not check pagecache 3426c72f687aSKent Overstreet */ 3427e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 3428e8540e56SKent Overstreet struct quota_res *res, 3429e8540e56SKent Overstreet u64 start, u64 end) 3430e8540e56SKent Overstreet { 3431e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3432e8540e56SKent Overstreet struct btree_trans trans; 3433e8540e56SKent Overstreet struct btree_iter iter; 3434e8540e56SKent Overstreet struct bkey_s_c k; 3435e8540e56SKent Overstreet u32 snapshot; 3436e8540e56SKent Overstreet u64 sectors = end - start; 3437e8540e56SKent Overstreet u64 pos = start; 3438e8540e56SKent Overstreet int ret; 3439e8540e56SKent Overstreet 3440e8540e56SKent Overstreet bch2_trans_init(&trans, c, 0, 0); 3441e8540e56SKent Overstreet retry: 3442e8540e56SKent Overstreet bch2_trans_begin(&trans); 3443e8540e56SKent Overstreet 3444e8540e56SKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); 3445e8540e56SKent Overstreet if (ret) 3446e8540e56SKent Overstreet goto err; 3447e8540e56SKent Overstreet 3448e8540e56SKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3449e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 3450e8540e56SKent Overstreet 3451e8540e56SKent Overstreet while (!(ret = btree_trans_too_many_iters(&trans)) && 3452e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 3453e8540e56SKent Overstreet !(ret = bkey_err(k))) { 3454e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 3455e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 3456e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 3457e8540e56SKent Overstreet BUG_ON(s > sectors); 3458e8540e56SKent Overstreet sectors -= s; 3459e8540e56SKent Overstreet } 3460e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 3461e8540e56SKent Overstreet } 3462e8540e56SKent Overstreet pos = iter.pos.offset; 3463e8540e56SKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3464e8540e56SKent Overstreet err: 3465e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 3466e8540e56SKent Overstreet goto retry; 3467e8540e56SKent Overstreet 3468e8540e56SKent Overstreet bch2_trans_exit(&trans); 3469e8540e56SKent Overstreet 3470e8540e56SKent Overstreet if (ret) 3471e8540e56SKent Overstreet return ret; 3472e8540e56SKent Overstreet 3473e8540e56SKent Overstreet return bch2_quota_reservation_add(c, inode, res, sectors, true); 3474e8540e56SKent Overstreet } 3475e8540e56SKent Overstreet 347676426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 347776426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 347876426098SKent Overstreet loff_t len, unsigned remap_flags) 347976426098SKent Overstreet { 348076426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 348176426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 348276426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 3483e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 34842e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3485677fc056SKent Overstreet u64 aligned_len; 348676426098SKent Overstreet loff_t ret = 0; 348776426098SKent Overstreet 348876426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 348976426098SKent Overstreet return -EINVAL; 349076426098SKent Overstreet 349176426098SKent Overstreet if (remap_flags & REMAP_FILE_DEDUP) 349276426098SKent Overstreet return -EOPNOTSUPP; 349376426098SKent Overstreet 349476426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 349576426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 349676426098SKent Overstreet return -EINVAL; 349776426098SKent Overstreet 349876426098SKent Overstreet if (src == dst && 349976426098SKent Overstreet abs(pos_src - pos_dst) < len) 350076426098SKent Overstreet return -EINVAL; 350176426098SKent Overstreet 350276426098SKent Overstreet bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 350376426098SKent Overstreet 350476426098SKent Overstreet inode_dio_wait(&src->v); 350576426098SKent Overstreet inode_dio_wait(&dst->v); 350676426098SKent Overstreet 350776426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 350876426098SKent Overstreet file_dst, pos_dst, 350976426098SKent Overstreet &len, remap_flags); 351076426098SKent Overstreet if (ret < 0 || len == 0) 35112e87eae1SKent Overstreet goto err; 351276426098SKent Overstreet 3513677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 351476426098SKent Overstreet 351576426098SKent Overstreet ret = write_invalidate_inode_pages_range(dst->v.i_mapping, 3516677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 351776426098SKent Overstreet if (ret) 35182e87eae1SKent Overstreet goto err; 351976426098SKent Overstreet 3520e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 3521e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 3522e8540e56SKent Overstreet if (ret) 3523e8540e56SKent Overstreet goto err; 3524e8540e56SKent Overstreet 3525e8540e56SKent Overstreet file_update_time(file_dst); 3526e8540e56SKent Overstreet 3527dcfc593fSKent Overstreet mark_pagecache_unallocated(src, pos_src >> 9, 3528dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 352976426098SKent Overstreet 35302e87eae1SKent Overstreet ret = bch2_remap_range(c, 35316fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 35326fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 353376426098SKent Overstreet aligned_len >> 9, 35342e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 35352e87eae1SKent Overstreet if (ret < 0) 35362e87eae1SKent Overstreet goto err; 353776426098SKent Overstreet 35382e87eae1SKent Overstreet /* 35392e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 35402e87eae1SKent Overstreet */ 3541677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 35422e87eae1SKent Overstreet 3543e8540e56SKent Overstreet i_sectors_acct(c, dst, "a_res, i_sectors_delta); 35442e87eae1SKent Overstreet 35452e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 3546677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 3547677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 35482e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 3549e7084c9cSKent Overstreet 355068a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 355168a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 3552a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 35532e87eae1SKent Overstreet err: 3554e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 355576426098SKent Overstreet bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 355676426098SKent Overstreet 35575c1ef830SKent Overstreet return bch2_err_class(ret); 355876426098SKent Overstreet } 355976426098SKent Overstreet 35601c6fdbd8SKent Overstreet /* fseek: */ 35611c6fdbd8SKent Overstreet 3562543ef2ebSKent Overstreet static int folio_data_offset(struct folio *folio, unsigned offset) 35631c6fdbd8SKent Overstreet { 356430bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 3565a86a92cbSKent Overstreet unsigned i, sectors = folio_sectors(folio); 3566f81b648dSKent Overstreet 3567543ef2ebSKent Overstreet if (s) 3568a86a92cbSKent Overstreet for (i = offset >> 9; i < sectors; i++) 3569*a1774a05SKent Overstreet if (s->s[i].state >= SECTOR_dirty) 3570543ef2ebSKent Overstreet return i << 9; 3571f57a6a5dSKent Overstreet 3572543ef2ebSKent Overstreet return -1; 35731c6fdbd8SKent Overstreet } 35741c6fdbd8SKent Overstreet 3575543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_data(struct inode *vinode, 35761c6fdbd8SKent Overstreet loff_t start_offset, 35771c6fdbd8SKent Overstreet loff_t end_offset) 35781c6fdbd8SKent Overstreet { 35791c6fdbd8SKent Overstreet struct folio_batch fbatch; 35801c6fdbd8SKent Overstreet pgoff_t start_index = start_offset >> PAGE_SHIFT; 35811c6fdbd8SKent Overstreet pgoff_t end_index = end_offset >> PAGE_SHIFT; 35821c6fdbd8SKent Overstreet pgoff_t index = start_index; 35831c6fdbd8SKent Overstreet unsigned i; 3584543ef2ebSKent Overstreet loff_t ret; 3585543ef2ebSKent Overstreet int offset; 35861c6fdbd8SKent Overstreet 35871c6fdbd8SKent Overstreet folio_batch_init(&fbatch); 35881c6fdbd8SKent Overstreet 35891c6fdbd8SKent Overstreet while (filemap_get_folios(vinode->i_mapping, 35901c6fdbd8SKent Overstreet &index, end_index, &fbatch)) { 35911c6fdbd8SKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 35921c6fdbd8SKent Overstreet struct folio *folio = fbatch.folios[i]; 35931c6fdbd8SKent Overstreet 35941c6fdbd8SKent Overstreet folio_lock(folio); 3595543ef2ebSKent Overstreet offset = folio_data_offset(folio, 3596a86a92cbSKent Overstreet max(folio_pos(folio), start_offset) - 3597a86a92cbSKent Overstreet folio_pos(folio)); 3598543ef2ebSKent Overstreet if (offset >= 0) { 3599a86a92cbSKent Overstreet ret = clamp(folio_pos(folio) + offset, 3600543ef2ebSKent Overstreet start_offset, end_offset); 36011c6fdbd8SKent Overstreet folio_unlock(folio); 36021c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 3603543ef2ebSKent Overstreet return ret; 36041c6fdbd8SKent Overstreet } 36051c6fdbd8SKent Overstreet folio_unlock(folio); 36061c6fdbd8SKent Overstreet } 36071c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 36081c6fdbd8SKent Overstreet cond_resched(); 36091c6fdbd8SKent Overstreet } 36101c6fdbd8SKent Overstreet 36111c6fdbd8SKent Overstreet return end_offset; 36121c6fdbd8SKent Overstreet } 36131c6fdbd8SKent Overstreet 36141c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 36151c6fdbd8SKent Overstreet { 36161c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 36171c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3618424eb881SKent Overstreet struct btree_trans trans; 361967e0dd8fSKent Overstreet struct btree_iter iter; 36201c6fdbd8SKent Overstreet struct bkey_s_c k; 36216fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 36221c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 36236fed42bbSKent Overstreet u32 snapshot; 36241c6fdbd8SKent Overstreet int ret; 36251c6fdbd8SKent Overstreet 36261c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 36271c6fdbd8SKent Overstreet if (offset >= isize) 36281c6fdbd8SKent Overstreet return -ENXIO; 36291c6fdbd8SKent Overstreet 363020bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 36316fed42bbSKent Overstreet retry: 36326fed42bbSKent Overstreet bch2_trans_begin(&trans); 36336fed42bbSKent Overstreet 36346fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 36356fed42bbSKent Overstreet if (ret) 36366fed42bbSKent Overstreet goto err; 3637424eb881SKent Overstreet 3638c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, 3639c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 3640c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 3641c72f687aSKent Overstreet 0, k, ret) { 3642c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 36431c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 36441c6fdbd8SKent Overstreet break; 36451c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 36461c6fdbd8SKent Overstreet break; 36471c6fdbd8SKent Overstreet } 364867e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 36496fed42bbSKent Overstreet err: 3650549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 36516fed42bbSKent Overstreet goto retry; 36521c6fdbd8SKent Overstreet 36539a796fdbSKent Overstreet bch2_trans_exit(&trans); 36541c6fdbd8SKent Overstreet if (ret) 36551c6fdbd8SKent Overstreet return ret; 36561c6fdbd8SKent Overstreet 36571c6fdbd8SKent Overstreet if (next_data > offset) 3658543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 36591c6fdbd8SKent Overstreet offset, next_data); 36601c6fdbd8SKent Overstreet 3661e10d3094SKent Overstreet if (next_data >= isize) 36621c6fdbd8SKent Overstreet return -ENXIO; 36631c6fdbd8SKent Overstreet 36641c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 36651c6fdbd8SKent Overstreet } 36661c6fdbd8SKent Overstreet 3667e8d28c3eSKent Overstreet static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) 36681c6fdbd8SKent Overstreet { 3669e8d28c3eSKent Overstreet struct folio *folio; 3670e8d28c3eSKent Overstreet struct bch_folio *s; 3671e8d28c3eSKent Overstreet unsigned i, sectors, f_offset; 3672e8d28c3eSKent Overstreet bool ret = true; 3673543ef2ebSKent Overstreet 3674e8d28c3eSKent Overstreet folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT); 3675e8d28c3eSKent Overstreet if (!folio) 3676e8d28c3eSKent Overstreet return true; 3677e8d28c3eSKent Overstreet 3678e8d28c3eSKent Overstreet s = bch2_folio(folio); 3679543ef2ebSKent Overstreet if (!s) 3680e8d28c3eSKent Overstreet goto unlock; 3681543ef2ebSKent Overstreet 3682e8d28c3eSKent Overstreet sectors = folio_sectors(folio); 3683e8d28c3eSKent Overstreet f_offset = *offset - folio_pos(folio); 3684543ef2ebSKent Overstreet 3685e8d28c3eSKent Overstreet for (i = f_offset >> 9; i < sectors; i++) 3686*a1774a05SKent Overstreet if (s->s[i].state < SECTOR_dirty) { 3687e8d28c3eSKent Overstreet *offset = max(*offset, folio_pos(folio) + (i << 9)); 3688e8d28c3eSKent Overstreet goto unlock; 3689543ef2ebSKent Overstreet } 3690543ef2ebSKent Overstreet 3691e8d28c3eSKent Overstreet *offset = folio_end_pos(folio); 3692e8d28c3eSKent Overstreet ret = false; 3693e8d28c3eSKent Overstreet unlock: 369430bff594SKent Overstreet folio_unlock(folio); 36951c6fdbd8SKent Overstreet return ret; 36961c6fdbd8SKent Overstreet } 36971c6fdbd8SKent Overstreet 3698543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_hole(struct inode *vinode, 36991c6fdbd8SKent Overstreet loff_t start_offset, 37001c6fdbd8SKent Overstreet loff_t end_offset) 37011c6fdbd8SKent Overstreet { 37021c6fdbd8SKent Overstreet struct address_space *mapping = vinode->i_mapping; 3703e8d28c3eSKent Overstreet loff_t offset = start_offset; 37041c6fdbd8SKent Overstreet 3705e8d28c3eSKent Overstreet while (offset < end_offset && 3706e8d28c3eSKent Overstreet !folio_hole_offset(mapping, &offset)) 3707e8d28c3eSKent Overstreet ; 3708543ef2ebSKent Overstreet 3709e8d28c3eSKent Overstreet return min(offset, end_offset); 37101c6fdbd8SKent Overstreet } 37111c6fdbd8SKent Overstreet 37121c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 37131c6fdbd8SKent Overstreet { 37141c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 37151c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3716424eb881SKent Overstreet struct btree_trans trans; 371767e0dd8fSKent Overstreet struct btree_iter iter; 37181c6fdbd8SKent Overstreet struct bkey_s_c k; 37196fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 37201c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 37216fed42bbSKent Overstreet u32 snapshot; 37221c6fdbd8SKent Overstreet int ret; 37231c6fdbd8SKent Overstreet 37241c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 37251c6fdbd8SKent Overstreet if (offset >= isize) 37261c6fdbd8SKent Overstreet return -ENXIO; 37271c6fdbd8SKent Overstreet 372820bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 37296fed42bbSKent Overstreet retry: 37306fed42bbSKent Overstreet bch2_trans_begin(&trans); 37316fed42bbSKent Overstreet 37326fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 37336fed42bbSKent Overstreet if (ret) 37346fed42bbSKent Overstreet goto err; 3735424eb881SKent Overstreet 3736e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 37376fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 373894f651e2SKent Overstreet BTREE_ITER_SLOTS, k, ret) { 37391c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 3740543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 37411c6fdbd8SKent Overstreet offset, MAX_LFS_FILESIZE); 37421c6fdbd8SKent Overstreet break; 37431c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 3744543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 37451c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 37461c6fdbd8SKent Overstreet k.k->p.offset << 9); 37471c6fdbd8SKent Overstreet 37481c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 37491c6fdbd8SKent Overstreet break; 37501c6fdbd8SKent Overstreet } else { 37511c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 37521c6fdbd8SKent Overstreet } 37531c6fdbd8SKent Overstreet } 375467e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 37556fed42bbSKent Overstreet err: 3756549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 37576fed42bbSKent Overstreet goto retry; 37581c6fdbd8SKent Overstreet 37599a796fdbSKent Overstreet bch2_trans_exit(&trans); 37601c6fdbd8SKent Overstreet if (ret) 37611c6fdbd8SKent Overstreet return ret; 37621c6fdbd8SKent Overstreet 37631c6fdbd8SKent Overstreet if (next_hole > isize) 37641c6fdbd8SKent Overstreet next_hole = isize; 37651c6fdbd8SKent Overstreet 37661c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 37671c6fdbd8SKent Overstreet } 37681c6fdbd8SKent Overstreet 37691c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 37701c6fdbd8SKent Overstreet { 37715c1ef830SKent Overstreet loff_t ret; 37725c1ef830SKent Overstreet 37731c6fdbd8SKent Overstreet switch (whence) { 37741c6fdbd8SKent Overstreet case SEEK_SET: 37751c6fdbd8SKent Overstreet case SEEK_CUR: 37761c6fdbd8SKent Overstreet case SEEK_END: 37775c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 37785c1ef830SKent Overstreet break; 37791c6fdbd8SKent Overstreet case SEEK_DATA: 37805c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 37815c1ef830SKent Overstreet break; 37821c6fdbd8SKent Overstreet case SEEK_HOLE: 37835c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 37845c1ef830SKent Overstreet break; 37855c1ef830SKent Overstreet default: 37865c1ef830SKent Overstreet ret = -EINVAL; 37875c1ef830SKent Overstreet break; 37881c6fdbd8SKent Overstreet } 37891c6fdbd8SKent Overstreet 37905c1ef830SKent Overstreet return bch2_err_class(ret); 37911c6fdbd8SKent Overstreet } 37921c6fdbd8SKent Overstreet 37931c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 37941c6fdbd8SKent Overstreet { 3795a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 37961c6fdbd8SKent Overstreet bioset_exit(&c->dio_write_bioset); 37971c6fdbd8SKent Overstreet bioset_exit(&c->dio_read_bioset); 37981c6fdbd8SKent Overstreet bioset_exit(&c->writepage_bioset); 37991c6fdbd8SKent Overstreet } 38001c6fdbd8SKent Overstreet 38011c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 38021c6fdbd8SKent Overstreet { 38031c6fdbd8SKent Overstreet int ret = 0; 38041c6fdbd8SKent Overstreet 38051c6fdbd8SKent Overstreet pr_verbose_init(c->opts, ""); 38061c6fdbd8SKent Overstreet 38071c6fdbd8SKent Overstreet if (bioset_init(&c->writepage_bioset, 38089a3df993SKent Overstreet 4, offsetof(struct bch_writepage_io, op.wbio.bio), 380965d48e35SKent Overstreet BIOSET_NEED_BVECS)) 381065d48e35SKent Overstreet return -BCH_ERR_ENOMEM_writepage_bioset_init; 381165d48e35SKent Overstreet 381265d48e35SKent Overstreet if (bioset_init(&c->dio_read_bioset, 38131c6fdbd8SKent Overstreet 4, offsetof(struct dio_read, rbio.bio), 381465d48e35SKent Overstreet BIOSET_NEED_BVECS)) 381565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_read_bioset_init; 381665d48e35SKent Overstreet 381765d48e35SKent Overstreet if (bioset_init(&c->dio_write_bioset, 38189a3df993SKent Overstreet 4, offsetof(struct dio_write, op.wbio.bio), 381965d48e35SKent Overstreet BIOSET_NEED_BVECS)) 382065d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_write_bioset_init; 382165d48e35SKent Overstreet 382265d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 3823a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 382465d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 38251c6fdbd8SKent Overstreet 38261c6fdbd8SKent Overstreet pr_verbose_init(c->opts, "ret %i", ret); 38271c6fdbd8SKent Overstreet return ret; 38281c6fdbd8SKent Overstreet } 38291c6fdbd8SKent Overstreet 38301c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 3831