11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 607a1006aSKent Overstreet #include "bkey_buf.h" 71c6fdbd8SKent Overstreet #include "btree_update.h" 81c6fdbd8SKent Overstreet #include "buckets.h" 91c6fdbd8SKent Overstreet #include "clock.h" 101c6fdbd8SKent Overstreet #include "error.h" 11e2d9912cSKent Overstreet #include "extents.h" 1208c07feaSKent Overstreet #include "extent_update.h" 131c6fdbd8SKent Overstreet #include "fs.h" 141c6fdbd8SKent Overstreet #include "fs-io.h" 151c6fdbd8SKent Overstreet #include "fsck.h" 161c6fdbd8SKent Overstreet #include "inode.h" 171c6fdbd8SKent Overstreet #include "journal.h" 181c6fdbd8SKent Overstreet #include "io.h" 191c6fdbd8SKent Overstreet #include "keylist.h" 201c6fdbd8SKent Overstreet #include "quota.h" 2176426098SKent Overstreet #include "reflink.h" 221c6fdbd8SKent Overstreet #include "trace.h" 231c6fdbd8SKent Overstreet 241c6fdbd8SKent Overstreet #include <linux/aio.h> 251c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 261c6fdbd8SKent Overstreet #include <linux/falloc.h> 271c6fdbd8SKent Overstreet #include <linux/migrate.h> 281c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 291c6fdbd8SKent Overstreet #include <linux/pagevec.h> 309ba2eb25SKent Overstreet #include <linux/rmap.h> 311c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 321c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 331c6fdbd8SKent Overstreet #include <linux/uio.h> 341c6fdbd8SKent Overstreet #include <linux/writeback.h> 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 371c6fdbd8SKent Overstreet 3830bff594SKent Overstreet static inline loff_t folio_end_pos(struct folio *folio) 3930bff594SKent Overstreet { 4030bff594SKent Overstreet return folio_pos(folio) + folio_size(folio); 4130bff594SKent Overstreet } 4230bff594SKent Overstreet 4330bff594SKent Overstreet static inline size_t folio_sectors(struct folio *folio) 4430bff594SKent Overstreet { 4530bff594SKent Overstreet return PAGE_SECTORS << folio_order(folio); 4630bff594SKent Overstreet } 4730bff594SKent Overstreet 4830bff594SKent Overstreet static inline loff_t folio_sector(struct folio *folio) 4930bff594SKent Overstreet { 5030bff594SKent Overstreet return folio_pos(folio) >> 9; 5130bff594SKent Overstreet } 5230bff594SKent Overstreet 5330bff594SKent Overstreet static inline loff_t folio_end_sector(struct folio *folio) 5430bff594SKent Overstreet { 5530bff594SKent Overstreet return folio_end_pos(folio) >> 9; 5630bff594SKent Overstreet } 5730bff594SKent Overstreet 58a8b3a677SKent Overstreet struct nocow_flush { 59a8b3a677SKent Overstreet struct closure *cl; 60a8b3a677SKent Overstreet struct bch_dev *ca; 61a8b3a677SKent Overstreet struct bio bio; 62a8b3a677SKent Overstreet }; 63a8b3a677SKent Overstreet 64a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 65a8b3a677SKent Overstreet { 66a8b3a677SKent Overstreet 67a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 68a8b3a677SKent Overstreet 69a8b3a677SKent Overstreet closure_put(bio->cl); 70a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 71a8b3a677SKent Overstreet bio_put(&bio->bio); 72a8b3a677SKent Overstreet } 73a8b3a677SKent Overstreet 74a8b3a677SKent Overstreet static void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 75a8b3a677SKent Overstreet struct bch_inode_info *inode, 76a8b3a677SKent Overstreet struct closure *cl) 77a8b3a677SKent Overstreet { 78a8b3a677SKent Overstreet struct nocow_flush *bio; 79a8b3a677SKent Overstreet struct bch_dev *ca; 80a8b3a677SKent Overstreet struct bch_devs_mask devs; 81a8b3a677SKent Overstreet unsigned dev; 82a8b3a677SKent Overstreet 83a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 84a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 85a8b3a677SKent Overstreet return; 86a8b3a677SKent Overstreet 87a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 88a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 89a8b3a677SKent Overstreet 90a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 91a8b3a677SKent Overstreet rcu_read_lock(); 92a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 93a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 94a8b3a677SKent Overstreet ca = NULL; 95a8b3a677SKent Overstreet rcu_read_unlock(); 96a8b3a677SKent Overstreet 97a8b3a677SKent Overstreet if (!ca) 98a8b3a677SKent Overstreet continue; 99a8b3a677SKent Overstreet 100a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 101a8b3a677SKent Overstreet REQ_OP_FLUSH, 102a8b3a677SKent Overstreet GFP_KERNEL, 103a8b3a677SKent Overstreet &c->nocow_flush_bioset), 104a8b3a677SKent Overstreet struct nocow_flush, bio); 105a8b3a677SKent Overstreet bio->cl = cl; 106a8b3a677SKent Overstreet bio->ca = ca; 107a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 108a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 109a8b3a677SKent Overstreet } 110a8b3a677SKent Overstreet } 111a8b3a677SKent Overstreet 112a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 113a8b3a677SKent Overstreet struct bch_inode_info *inode) 114a8b3a677SKent Overstreet { 115a8b3a677SKent Overstreet struct closure cl; 116a8b3a677SKent Overstreet 117a8b3a677SKent Overstreet closure_init_stack(&cl); 118a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 119a8b3a677SKent Overstreet closure_sync(&cl); 120a8b3a677SKent Overstreet 121a8b3a677SKent Overstreet return 0; 122a8b3a677SKent Overstreet } 123a8b3a677SKent Overstreet 1247f5e31e1SKent Overstreet static inline bool bio_full(struct bio *bio, unsigned len) 1257f5e31e1SKent Overstreet { 1267f5e31e1SKent Overstreet if (bio->bi_vcnt >= bio->bi_max_vecs) 1277f5e31e1SKent Overstreet return true; 1287f5e31e1SKent Overstreet if (bio->bi_iter.bi_size > UINT_MAX - len) 1297f5e31e1SKent Overstreet return true; 1307f5e31e1SKent Overstreet return false; 1317f5e31e1SKent Overstreet } 1327f5e31e1SKent Overstreet 133eb8e6e9cSKent Overstreet static inline struct address_space *faults_disabled_mapping(void) 134eb8e6e9cSKent Overstreet { 135eb8e6e9cSKent Overstreet return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); 136eb8e6e9cSKent Overstreet } 137eb8e6e9cSKent Overstreet 138eb8e6e9cSKent Overstreet static inline void set_fdm_dropped_locks(void) 139eb8e6e9cSKent Overstreet { 140eb8e6e9cSKent Overstreet current->faults_disabled_mapping = 141eb8e6e9cSKent Overstreet (void *) (((unsigned long) current->faults_disabled_mapping)|1); 142eb8e6e9cSKent Overstreet } 143eb8e6e9cSKent Overstreet 144eb8e6e9cSKent Overstreet static inline bool fdm_dropped_locks(void) 145eb8e6e9cSKent Overstreet { 146eb8e6e9cSKent Overstreet return ((unsigned long) current->faults_disabled_mapping) & 1; 147eb8e6e9cSKent Overstreet } 148eb8e6e9cSKent Overstreet 1491c6fdbd8SKent Overstreet struct quota_res { 1501c6fdbd8SKent Overstreet u64 sectors; 1511c6fdbd8SKent Overstreet }; 1521c6fdbd8SKent Overstreet 1539a3df993SKent Overstreet struct bch_writepage_io { 1541c6fdbd8SKent Overstreet struct bch_inode_info *inode; 1551c6fdbd8SKent Overstreet 1561c6fdbd8SKent Overstreet /* must be last: */ 1571c6fdbd8SKent Overstreet struct bch_write_op op; 1581c6fdbd8SKent Overstreet }; 1591c6fdbd8SKent Overstreet 1601c6fdbd8SKent Overstreet struct dio_write { 1611c6fdbd8SKent Overstreet struct kiocb *req; 162182c7bbfSKent Overstreet struct address_space *mapping; 163182c7bbfSKent Overstreet struct bch_inode_info *inode; 164ed484030SKent Overstreet struct mm_struct *mm; 1651c6fdbd8SKent Overstreet unsigned loop:1, 1666b1b186aSKent Overstreet extending:1, 1671c6fdbd8SKent Overstreet sync:1, 168a1ee777bSKent Overstreet flush:1, 1691c6fdbd8SKent Overstreet free_iov:1; 1701c6fdbd8SKent Overstreet struct quota_res quota_res; 171042a1f26SKent Overstreet u64 written; 1721c6fdbd8SKent Overstreet 1731c6fdbd8SKent Overstreet struct iov_iter iter; 1741c6fdbd8SKent Overstreet struct iovec inline_vecs[2]; 1751c6fdbd8SKent Overstreet 1761c6fdbd8SKent Overstreet /* must be last: */ 1779a3df993SKent Overstreet struct bch_write_op op; 1781c6fdbd8SKent Overstreet }; 1791c6fdbd8SKent Overstreet 1801c6fdbd8SKent Overstreet struct dio_read { 1811c6fdbd8SKent Overstreet struct closure cl; 1821c6fdbd8SKent Overstreet struct kiocb *req; 1831c6fdbd8SKent Overstreet long ret; 184b4725cc1SKent Overstreet bool should_dirty; 1851c6fdbd8SKent Overstreet struct bch_read_bio rbio; 1861c6fdbd8SKent Overstreet }; 1871c6fdbd8SKent Overstreet 1881c6fdbd8SKent Overstreet /* pagecache_block must be held */ 189a023127aSKent Overstreet static noinline int write_invalidate_inode_pages_range(struct address_space *mapping, 1901c6fdbd8SKent Overstreet loff_t start, loff_t end) 1911c6fdbd8SKent Overstreet { 1921c6fdbd8SKent Overstreet int ret; 1931c6fdbd8SKent Overstreet 1941c6fdbd8SKent Overstreet /* 1951c6fdbd8SKent Overstreet * XXX: the way this is currently implemented, we can spin if a process 1961c6fdbd8SKent Overstreet * is continually redirtying a specific page 1971c6fdbd8SKent Overstreet */ 1981c6fdbd8SKent Overstreet do { 1991c6fdbd8SKent Overstreet if (!mapping->nrpages) 2001c6fdbd8SKent Overstreet return 0; 2011c6fdbd8SKent Overstreet 2021c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, start, end); 2031c6fdbd8SKent Overstreet if (ret) 2041c6fdbd8SKent Overstreet break; 2051c6fdbd8SKent Overstreet 2061c6fdbd8SKent Overstreet if (!mapping->nrpages) 2071c6fdbd8SKent Overstreet return 0; 2081c6fdbd8SKent Overstreet 2091c6fdbd8SKent Overstreet ret = invalidate_inode_pages2_range(mapping, 2101c6fdbd8SKent Overstreet start >> PAGE_SHIFT, 2111c6fdbd8SKent Overstreet end >> PAGE_SHIFT); 2121c6fdbd8SKent Overstreet } while (ret == -EBUSY); 2131c6fdbd8SKent Overstreet 2141c6fdbd8SKent Overstreet return ret; 2151c6fdbd8SKent Overstreet } 2161c6fdbd8SKent Overstreet 2171c6fdbd8SKent Overstreet /* quotas */ 2181c6fdbd8SKent Overstreet 2191c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 2201c6fdbd8SKent Overstreet 2216b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 2221c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2231c6fdbd8SKent Overstreet struct quota_res *res) 2241c6fdbd8SKent Overstreet { 2251c6fdbd8SKent Overstreet BUG_ON(res->sectors > inode->ei_quota_reserved); 2261c6fdbd8SKent Overstreet 2271c6fdbd8SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, 22826609b61SKent Overstreet -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); 2291c6fdbd8SKent Overstreet inode->ei_quota_reserved -= res->sectors; 2301c6fdbd8SKent Overstreet res->sectors = 0; 2311c6fdbd8SKent Overstreet } 2321c6fdbd8SKent Overstreet 2336b1b186aSKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 2346b1b186aSKent Overstreet struct bch_inode_info *inode, 2356b1b186aSKent Overstreet struct quota_res *res) 2366b1b186aSKent Overstreet { 2376b1b186aSKent Overstreet if (res->sectors) { 2386b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 2396b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, res); 2406b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 2416b1b186aSKent Overstreet } 2426b1b186aSKent Overstreet } 2436b1b186aSKent Overstreet 2441c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 2451c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2461c6fdbd8SKent Overstreet struct quota_res *res, 247e8540e56SKent Overstreet u64 sectors, 2481c6fdbd8SKent Overstreet bool check_enospc) 2491c6fdbd8SKent Overstreet { 2501c6fdbd8SKent Overstreet int ret; 2511c6fdbd8SKent Overstreet 2521c6fdbd8SKent Overstreet mutex_lock(&inode->ei_quota_lock); 2531c6fdbd8SKent Overstreet ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, 25426609b61SKent Overstreet check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); 2551c6fdbd8SKent Overstreet if (likely(!ret)) { 2561c6fdbd8SKent Overstreet inode->ei_quota_reserved += sectors; 2571c6fdbd8SKent Overstreet res->sectors += sectors; 2581c6fdbd8SKent Overstreet } 2591c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 2601c6fdbd8SKent Overstreet 2611c6fdbd8SKent Overstreet return ret; 2621c6fdbd8SKent Overstreet } 2631c6fdbd8SKent Overstreet 2641c6fdbd8SKent Overstreet #else 2651c6fdbd8SKent Overstreet 2666b1b186aSKent Overstreet static void __bch2_quota_reservation_put(struct bch_fs *c, 2676b1b186aSKent Overstreet struct bch_inode_info *inode, 2686b1b186aSKent Overstreet struct quota_res *res) {} 2696b1b186aSKent Overstreet 2701c6fdbd8SKent Overstreet static void bch2_quota_reservation_put(struct bch_fs *c, 2711c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2726b1b186aSKent Overstreet struct quota_res *res) {} 2731c6fdbd8SKent Overstreet 2741c6fdbd8SKent Overstreet static int bch2_quota_reservation_add(struct bch_fs *c, 2751c6fdbd8SKent Overstreet struct bch_inode_info *inode, 2761c6fdbd8SKent Overstreet struct quota_res *res, 2771c6fdbd8SKent Overstreet unsigned sectors, 2781c6fdbd8SKent Overstreet bool check_enospc) 2791c6fdbd8SKent Overstreet { 2801c6fdbd8SKent Overstreet return 0; 2811c6fdbd8SKent Overstreet } 2821c6fdbd8SKent Overstreet 2831c6fdbd8SKent Overstreet #endif 2841c6fdbd8SKent Overstreet 2851c6fdbd8SKent Overstreet /* i_size updates: */ 2861c6fdbd8SKent Overstreet 2872ea90048SKent Overstreet struct inode_new_size { 2882ea90048SKent Overstreet loff_t new_size; 2892ea90048SKent Overstreet u64 now; 2902ea90048SKent Overstreet unsigned fields; 2912ea90048SKent Overstreet }; 2922ea90048SKent Overstreet 2931c6fdbd8SKent Overstreet static int inode_set_size(struct bch_inode_info *inode, 2941c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 2951c6fdbd8SKent Overstreet void *p) 2961c6fdbd8SKent Overstreet { 2972ea90048SKent Overstreet struct inode_new_size *s = p; 2981c6fdbd8SKent Overstreet 2992ea90048SKent Overstreet bi->bi_size = s->new_size; 3002ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 3012ea90048SKent Overstreet bi->bi_atime = s->now; 3022ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 3032ea90048SKent Overstreet bi->bi_mtime = s->now; 3042ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 3052ea90048SKent Overstreet bi->bi_ctime = s->now; 3061c6fdbd8SKent Overstreet 3071c6fdbd8SKent Overstreet return 0; 3081c6fdbd8SKent Overstreet } 3091c6fdbd8SKent Overstreet 31076426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 3111c6fdbd8SKent Overstreet struct bch_inode_info *inode, 3122ea90048SKent Overstreet loff_t new_size, unsigned fields) 3131c6fdbd8SKent Overstreet { 3142ea90048SKent Overstreet struct inode_new_size s = { 3152ea90048SKent Overstreet .new_size = new_size, 3162ea90048SKent Overstreet .now = bch2_current_time(c), 3172ea90048SKent Overstreet .fields = fields, 3182ea90048SKent Overstreet }; 3192ea90048SKent Overstreet 3202ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 3211c6fdbd8SKent Overstreet } 3221c6fdbd8SKent Overstreet 3236b1b186aSKent Overstreet static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 324190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 3251c6fdbd8SKent Overstreet { 326b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 327b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 328b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 329b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 330b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 331b44a66a6SKent Overstreet 3321c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 3331c6fdbd8SKent Overstreet if (quota_res && sectors > 0) { 3341c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 3351c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 3361c6fdbd8SKent Overstreet 3371c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 3381c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 3391c6fdbd8SKent Overstreet } else { 34026609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 3411c6fdbd8SKent Overstreet } 3421c6fdbd8SKent Overstreet #endif 3436b1b186aSKent Overstreet } 3446b1b186aSKent Overstreet 3456b1b186aSKent Overstreet static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 3466b1b186aSKent Overstreet struct quota_res *quota_res, s64 sectors) 3476b1b186aSKent Overstreet { 3486b1b186aSKent Overstreet if (sectors) { 3496b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 3506b1b186aSKent Overstreet __i_sectors_acct(c, inode, quota_res, sectors); 3511c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_quota_lock); 3521c6fdbd8SKent Overstreet } 3536b1b186aSKent Overstreet } 3541c6fdbd8SKent Overstreet 3551c6fdbd8SKent Overstreet /* page state: */ 3561c6fdbd8SKent Overstreet 3571c6fdbd8SKent Overstreet /* stored in page->private: */ 3581c6fdbd8SKent Overstreet 3593342ac13SKent Overstreet struct bch_folio_sector { 360b44a66a6SKent Overstreet /* Uncompressed, fully allocated replicas (or on disk reservation): */ 361b44a66a6SKent Overstreet unsigned nr_replicas:4; 362f81b648dSKent Overstreet 363b44a66a6SKent Overstreet /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ 364b44a66a6SKent Overstreet unsigned replicas_reserved:4; 3651c6fdbd8SKent Overstreet 366f57a6a5dSKent Overstreet /* i_sectors: */ 367f57a6a5dSKent Overstreet enum { 368f57a6a5dSKent Overstreet SECTOR_UNALLOCATED, 3692ba5d38bSKent Overstreet SECTOR_RESERVED, 370f57a6a5dSKent Overstreet SECTOR_DIRTY, 371b44a66a6SKent Overstreet SECTOR_DIRTY_RESERVED, 372f57a6a5dSKent Overstreet SECTOR_ALLOCATED, 373b44a66a6SKent Overstreet } state:8; 3741c6fdbd8SKent Overstreet }; 3751c6fdbd8SKent Overstreet 3763342ac13SKent Overstreet struct bch_folio { 3773826ee0bSKent Overstreet spinlock_t lock; 3787f5e31e1SKent Overstreet atomic_t write_count; 3793342ac13SKent Overstreet /* 3803342ac13SKent Overstreet * Is the sector state up to date with the btree? 3813342ac13SKent Overstreet * (Not the data itself) 3823342ac13SKent Overstreet */ 383e6ec361fSKent Overstreet bool uptodate; 38449fe78ffSKent Overstreet struct bch_folio_sector s[]; 385f57a6a5dSKent Overstreet }; 386f57a6a5dSKent Overstreet 38730bff594SKent Overstreet static inline struct bch_folio *__bch2_folio(struct folio *folio) 3881c6fdbd8SKent Overstreet { 38930bff594SKent Overstreet return folio_has_private(folio) 39030bff594SKent Overstreet ? (struct bch_folio *) folio_get_private(folio) 391f57a6a5dSKent Overstreet : NULL; 392f57a6a5dSKent Overstreet } 3931c6fdbd8SKent Overstreet 39430bff594SKent Overstreet static inline struct bch_folio *bch2_folio(struct folio *folio) 395f57a6a5dSKent Overstreet { 39630bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 3971c6fdbd8SKent Overstreet 39830bff594SKent Overstreet return __bch2_folio(folio); 399f57a6a5dSKent Overstreet } 400f57a6a5dSKent Overstreet 40130bff594SKent Overstreet /* for newly allocated folios: */ 40230bff594SKent Overstreet static void __bch2_folio_release(struct folio *folio) 403f57a6a5dSKent Overstreet { 40430bff594SKent Overstreet kfree(folio_detach_private(folio)); 405f57a6a5dSKent Overstreet } 406f57a6a5dSKent Overstreet 40730bff594SKent Overstreet static void bch2_folio_release(struct folio *folio) 408f57a6a5dSKent Overstreet { 40930bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 41030bff594SKent Overstreet __bch2_folio_release(folio); 411f57a6a5dSKent Overstreet } 412f57a6a5dSKent Overstreet 41330bff594SKent Overstreet /* for newly allocated folios: */ 41430bff594SKent Overstreet static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) 415f57a6a5dSKent Overstreet { 4163342ac13SKent Overstreet struct bch_folio *s; 417f57a6a5dSKent Overstreet 41849fe78ffSKent Overstreet s = kzalloc(sizeof(*s) + 41949fe78ffSKent Overstreet sizeof(struct bch_folio_sector) * 42049fe78ffSKent Overstreet folio_sectors(folio), GFP_NOFS|gfp); 421f57a6a5dSKent Overstreet if (!s) 422f57a6a5dSKent Overstreet return NULL; 423f57a6a5dSKent Overstreet 4243826ee0bSKent Overstreet spin_lock_init(&s->lock); 42530bff594SKent Overstreet folio_attach_private(folio, s); 4261c6fdbd8SKent Overstreet return s; 4271c6fdbd8SKent Overstreet } 4281c6fdbd8SKent Overstreet 42930bff594SKent Overstreet static struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) 430f57a6a5dSKent Overstreet { 43130bff594SKent Overstreet return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); 432f57a6a5dSKent Overstreet } 433f57a6a5dSKent Overstreet 43479203111SKent Overstreet static unsigned bkey_to_sector_state(struct bkey_s_c k) 435b44a66a6SKent Overstreet { 43679203111SKent Overstreet if (bkey_extent_is_reservation(k)) 437b44a66a6SKent Overstreet return SECTOR_RESERVED; 43879203111SKent Overstreet if (bkey_extent_is_allocation(k.k)) 439b44a66a6SKent Overstreet return SECTOR_ALLOCATED; 440b44a66a6SKent Overstreet return SECTOR_UNALLOCATED; 441b44a66a6SKent Overstreet } 442b44a66a6SKent Overstreet 44330bff594SKent Overstreet static void __bch2_folio_set(struct folio *folio, 444e6ec361fSKent Overstreet unsigned pg_offset, unsigned pg_len, 445e6ec361fSKent Overstreet unsigned nr_ptrs, unsigned state) 446e6ec361fSKent Overstreet { 44730bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, __GFP_NOFAIL); 44833e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio); 449e6ec361fSKent Overstreet 45033e2eb96SKent Overstreet BUG_ON(pg_offset >= sectors); 45133e2eb96SKent Overstreet BUG_ON(pg_offset + pg_len > sectors); 452e6ec361fSKent Overstreet 453e6ec361fSKent Overstreet spin_lock(&s->lock); 454e6ec361fSKent Overstreet 455e6ec361fSKent Overstreet for (i = pg_offset; i < pg_offset + pg_len; i++) { 456e6ec361fSKent Overstreet s->s[i].nr_replicas = nr_ptrs; 457e6ec361fSKent Overstreet s->s[i].state = state; 458e6ec361fSKent Overstreet } 459e6ec361fSKent Overstreet 46033e2eb96SKent Overstreet if (i == sectors) 461e6ec361fSKent Overstreet s->uptodate = true; 462e6ec361fSKent Overstreet 463e6ec361fSKent Overstreet spin_unlock(&s->lock); 464e6ec361fSKent Overstreet } 465e6ec361fSKent Overstreet 4663342ac13SKent Overstreet /* 4673342ac13SKent Overstreet * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the 4683342ac13SKent Overstreet * extents btree: 4693342ac13SKent Overstreet */ 4703342ac13SKent Overstreet static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, 47130bff594SKent Overstreet struct folio **folios, unsigned nr_folios) 472e6ec361fSKent Overstreet { 473e6ec361fSKent Overstreet struct btree_trans trans; 474e6ec361fSKent Overstreet struct btree_iter iter; 475e6ec361fSKent Overstreet struct bkey_s_c k; 47630bff594SKent Overstreet u64 offset = folio_sector(folios[0]); 47730bff594SKent Overstreet unsigned folio_idx = 0; 478e6ec361fSKent Overstreet u32 snapshot; 479e6ec361fSKent Overstreet int ret; 480e6ec361fSKent Overstreet 481e6ec361fSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 482e6ec361fSKent Overstreet retry: 483e6ec361fSKent Overstreet bch2_trans_begin(&trans); 484e6ec361fSKent Overstreet 485e6ec361fSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 486e6ec361fSKent Overstreet if (ret) 487e6ec361fSKent Overstreet goto err; 488e6ec361fSKent Overstreet 489e6ec361fSKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 490e6ec361fSKent Overstreet SPOS(inum.inum, offset, snapshot), 491e6ec361fSKent Overstreet BTREE_ITER_SLOTS, k, ret) { 492e6ec361fSKent Overstreet unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); 49379203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 494e6ec361fSKent Overstreet 49530bff594SKent Overstreet while (folio_idx < nr_folios) { 49630bff594SKent Overstreet struct folio *folio = folios[folio_idx]; 49730bff594SKent Overstreet u64 folio_start = folio_sector(folio); 49830bff594SKent Overstreet u64 folio_end = folio_end_sector(folio); 49930bff594SKent Overstreet unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - folio_start; 50030bff594SKent Overstreet unsigned folio_len = min(k.k->p.offset, folio_end) - folio_offset - folio_start; 501e6ec361fSKent Overstreet 50230bff594SKent Overstreet BUG_ON(k.k->p.offset < folio_start); 50330bff594SKent Overstreet BUG_ON(bkey_start_offset(k.k) > folio_end); 504e6ec361fSKent Overstreet 50530bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) 50630bff594SKent Overstreet __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); 507e6ec361fSKent Overstreet 50830bff594SKent Overstreet if (k.k->p.offset < folio_end) 509e6ec361fSKent Overstreet break; 51030bff594SKent Overstreet folio_idx++; 511e6ec361fSKent Overstreet } 512e6ec361fSKent Overstreet 51330bff594SKent Overstreet if (folio_idx == nr_folios) 514e6ec361fSKent Overstreet break; 515e6ec361fSKent Overstreet } 516e6ec361fSKent Overstreet 517e6ec361fSKent Overstreet offset = iter.pos.offset; 518e6ec361fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 519e6ec361fSKent Overstreet err: 520549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 521e6ec361fSKent Overstreet goto retry; 522e6ec361fSKent Overstreet bch2_trans_exit(&trans); 523e6ec361fSKent Overstreet 524e6ec361fSKent Overstreet return ret; 525e6ec361fSKent Overstreet } 526e6ec361fSKent Overstreet 527b44a66a6SKent Overstreet static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) 528b44a66a6SKent Overstreet { 529b44a66a6SKent Overstreet struct bvec_iter iter; 530b44a66a6SKent Overstreet struct bio_vec bv; 531b44a66a6SKent Overstreet unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v 532b44a66a6SKent Overstreet ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); 53379203111SKent Overstreet unsigned state = bkey_to_sector_state(k); 534b44a66a6SKent Overstreet 535e6ec361fSKent Overstreet bio_for_each_segment(bv, bio, iter) 53630bff594SKent Overstreet __bch2_folio_set(page_folio(bv.bv_page), bv.bv_offset >> 9, 537e6ec361fSKent Overstreet bv.bv_len >> 9, nr_ptrs, state); 538b44a66a6SKent Overstreet } 539b44a66a6SKent Overstreet 540dcfc593fSKent Overstreet static void mark_pagecache_unallocated(struct bch_inode_info *inode, 541dcfc593fSKent Overstreet u64 start, u64 end) 542dcfc593fSKent Overstreet { 543dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 544dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 545dcfc593fSKent Overstreet struct folio_batch fbatch; 546dcfc593fSKent Overstreet unsigned i, j; 547dcfc593fSKent Overstreet 548dcfc593fSKent Overstreet if (end <= start) 549dcfc593fSKent Overstreet return; 550dcfc593fSKent Overstreet 551dcfc593fSKent Overstreet folio_batch_init(&fbatch); 552dcfc593fSKent Overstreet 553dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 554dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 555dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 556dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 55733e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 55833e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 55930bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 56030bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 5613342ac13SKent Overstreet struct bch_folio *s; 562dcfc593fSKent Overstreet 56330bff594SKent Overstreet BUG_ON(end <= folio_start); 564dcfc593fSKent Overstreet 565dcfc593fSKent Overstreet folio_lock(folio); 56630bff594SKent Overstreet s = bch2_folio(folio); 567dcfc593fSKent Overstreet 568dcfc593fSKent Overstreet if (s) { 569dcfc593fSKent Overstreet spin_lock(&s->lock); 57030bff594SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) 571dcfc593fSKent Overstreet s->s[j].nr_replicas = 0; 572dcfc593fSKent Overstreet spin_unlock(&s->lock); 573dcfc593fSKent Overstreet } 574dcfc593fSKent Overstreet 575dcfc593fSKent Overstreet folio_unlock(folio); 576dcfc593fSKent Overstreet } 577dcfc593fSKent Overstreet folio_batch_release(&fbatch); 578dcfc593fSKent Overstreet cond_resched(); 579dcfc593fSKent Overstreet } 580dcfc593fSKent Overstreet } 581dcfc593fSKent Overstreet 582dcfc593fSKent Overstreet static void mark_pagecache_reserved(struct bch_inode_info *inode, 583dcfc593fSKent Overstreet u64 start, u64 end) 584dcfc593fSKent Overstreet { 585dcfc593fSKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 586dcfc593fSKent Overstreet pgoff_t index = start >> PAGE_SECTORS_SHIFT; 587dcfc593fSKent Overstreet pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 588dcfc593fSKent Overstreet struct folio_batch fbatch; 589dcfc593fSKent Overstreet s64 i_sectors_delta = 0; 590dcfc593fSKent Overstreet unsigned i, j; 591dcfc593fSKent Overstreet 592dcfc593fSKent Overstreet if (end <= start) 593dcfc593fSKent Overstreet return; 594dcfc593fSKent Overstreet 595dcfc593fSKent Overstreet folio_batch_init(&fbatch); 596dcfc593fSKent Overstreet 597dcfc593fSKent Overstreet while (filemap_get_folios(inode->v.i_mapping, 598dcfc593fSKent Overstreet &index, end_index, &fbatch)) { 599dcfc593fSKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 600dcfc593fSKent Overstreet struct folio *folio = fbatch.folios[i]; 60133e2eb96SKent Overstreet u64 folio_start = folio_sector(folio); 60233e2eb96SKent Overstreet u64 folio_end = folio_end_sector(folio); 60330bff594SKent Overstreet unsigned folio_offset = max(start, folio_start) - folio_start; 60430bff594SKent Overstreet unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 6053342ac13SKent Overstreet struct bch_folio *s; 606dcfc593fSKent Overstreet 60730bff594SKent Overstreet BUG_ON(end <= folio_start); 608dcfc593fSKent Overstreet 609dcfc593fSKent Overstreet folio_lock(folio); 61030bff594SKent Overstreet s = bch2_folio(folio); 611dcfc593fSKent Overstreet 612dcfc593fSKent Overstreet if (s) { 613dcfc593fSKent Overstreet spin_lock(&s->lock); 61430bff594SKent Overstreet for (j = folio_offset; j < folio_offset + folio_len; j++) 615dcfc593fSKent Overstreet switch (s->s[j].state) { 616dcfc593fSKent Overstreet case SECTOR_UNALLOCATED: 617dcfc593fSKent Overstreet s->s[j].state = SECTOR_RESERVED; 618dcfc593fSKent Overstreet break; 619dcfc593fSKent Overstreet case SECTOR_DIRTY: 620dcfc593fSKent Overstreet s->s[j].state = SECTOR_DIRTY_RESERVED; 621dcfc593fSKent Overstreet i_sectors_delta--; 622dcfc593fSKent Overstreet break; 623dcfc593fSKent Overstreet default: 624dcfc593fSKent Overstreet break; 625dcfc593fSKent Overstreet } 626dcfc593fSKent Overstreet spin_unlock(&s->lock); 627dcfc593fSKent Overstreet } 628dcfc593fSKent Overstreet 629dcfc593fSKent Overstreet folio_unlock(folio); 630dcfc593fSKent Overstreet } 631dcfc593fSKent Overstreet folio_batch_release(&fbatch); 632dcfc593fSKent Overstreet cond_resched(); 633dcfc593fSKent Overstreet } 634dcfc593fSKent Overstreet 635dcfc593fSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 636dcfc593fSKent Overstreet } 637dcfc593fSKent Overstreet 638e1036a2aSKent Overstreet static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) 639e1036a2aSKent Overstreet { 640e1036a2aSKent Overstreet /* XXX: this should not be open coded */ 641e1036a2aSKent Overstreet return inode->ei_inode.bi_data_replicas 642e1036a2aSKent Overstreet ? inode->ei_inode.bi_data_replicas - 1 643e1036a2aSKent Overstreet : c->opts.data_replicas; 644e1036a2aSKent Overstreet } 645e1036a2aSKent Overstreet 6463342ac13SKent Overstreet static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, 647f57a6a5dSKent Overstreet unsigned nr_replicas) 648f57a6a5dSKent Overstreet { 649f57a6a5dSKent Overstreet return max(0, (int) nr_replicas - 650f57a6a5dSKent Overstreet s->nr_replicas - 651f57a6a5dSKent Overstreet s->replicas_reserved); 652f57a6a5dSKent Overstreet } 653f57a6a5dSKent Overstreet 65430bff594SKent Overstreet static int bch2_get_folio_disk_reservation(struct bch_fs *c, 655f57a6a5dSKent Overstreet struct bch_inode_info *inode, 65630bff594SKent Overstreet struct folio *folio, bool check_enospc) 6571c6fdbd8SKent Overstreet { 65830bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 659e1036a2aSKent Overstreet unsigned nr_replicas = inode_nr_replicas(c, inode); 660f57a6a5dSKent Overstreet struct disk_reservation disk_res = { 0 }; 66133e2eb96SKent Overstreet unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; 662f81b648dSKent Overstreet int ret; 6631c6fdbd8SKent Overstreet 664f57a6a5dSKent Overstreet if (!s) 665f57a6a5dSKent Overstreet return -ENOMEM; 6661c6fdbd8SKent Overstreet 66733e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 668f57a6a5dSKent Overstreet disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); 669f57a6a5dSKent Overstreet 670f57a6a5dSKent Overstreet if (!disk_res_sectors) 671f57a6a5dSKent Overstreet return 0; 672f57a6a5dSKent Overstreet 673f57a6a5dSKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 674f57a6a5dSKent Overstreet disk_res_sectors, 1, 675f57a6a5dSKent Overstreet !check_enospc 676f57a6a5dSKent Overstreet ? BCH_DISK_RESERVATION_NOFAIL 677f57a6a5dSKent Overstreet : 0); 6781c6fdbd8SKent Overstreet if (unlikely(ret)) 679f81b648dSKent Overstreet return ret; 680f81b648dSKent Overstreet 68133e2eb96SKent Overstreet for (i = 0; i < sectors; i++) 682f57a6a5dSKent Overstreet s->s[i].replicas_reserved += 683f57a6a5dSKent Overstreet sectors_to_reserve(&s->s[i], nr_replicas); 684f57a6a5dSKent Overstreet 685f57a6a5dSKent Overstreet return 0; 6861c6fdbd8SKent Overstreet } 6871c6fdbd8SKent Overstreet 68830bff594SKent Overstreet struct bch2_folio_reservation { 689d1542e03SKent Overstreet struct disk_reservation disk; 690d1542e03SKent Overstreet struct quota_res quota; 691d1542e03SKent Overstreet }; 692d1542e03SKent Overstreet 69330bff594SKent Overstreet static void bch2_folio_reservation_init(struct bch_fs *c, 694f57a6a5dSKent Overstreet struct bch_inode_info *inode, 69530bff594SKent Overstreet struct bch2_folio_reservation *res) 696d1542e03SKent Overstreet { 697d1542e03SKent Overstreet memset(res, 0, sizeof(*res)); 698d1542e03SKent Overstreet 699d1542e03SKent Overstreet res->disk.nr_replicas = inode_nr_replicas(c, inode); 700d1542e03SKent Overstreet } 701d1542e03SKent Overstreet 70230bff594SKent Overstreet static void bch2_folio_reservation_put(struct bch_fs *c, 703d1542e03SKent Overstreet struct bch_inode_info *inode, 70430bff594SKent Overstreet struct bch2_folio_reservation *res) 705d1542e03SKent Overstreet { 706d1542e03SKent Overstreet bch2_disk_reservation_put(c, &res->disk); 707d1542e03SKent Overstreet bch2_quota_reservation_put(c, inode, &res->quota); 708d1542e03SKent Overstreet } 709d1542e03SKent Overstreet 71030bff594SKent Overstreet static int bch2_folio_reservation_get(struct bch_fs *c, 71130bff594SKent Overstreet struct bch_inode_info *inode, 71230bff594SKent Overstreet struct folio *folio, 71330bff594SKent Overstreet struct bch2_folio_reservation *res, 714bd954215SKent Overstreet unsigned offset, unsigned len) 715f57a6a5dSKent Overstreet { 71630bff594SKent Overstreet struct bch_folio *s = bch2_folio_create(folio, 0); 717d1542e03SKent Overstreet unsigned i, disk_sectors = 0, quota_sectors = 0; 718f57a6a5dSKent Overstreet int ret; 719f57a6a5dSKent Overstreet 720f57a6a5dSKent Overstreet if (!s) 721f57a6a5dSKent Overstreet return -ENOMEM; 722f57a6a5dSKent Overstreet 723e6ec361fSKent Overstreet BUG_ON(!s->uptodate); 724e6ec361fSKent Overstreet 7254b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 7264b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 727d1542e03SKent Overstreet i++) { 728d1542e03SKent Overstreet disk_sectors += sectors_to_reserve(&s->s[i], 729d1542e03SKent Overstreet res->disk.nr_replicas); 730d1542e03SKent Overstreet quota_sectors += s->s[i].state == SECTOR_UNALLOCATED; 7311c6fdbd8SKent Overstreet } 7321c6fdbd8SKent Overstreet 733d1542e03SKent Overstreet if (disk_sectors) { 734bd954215SKent Overstreet ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); 735d1542e03SKent Overstreet if (unlikely(ret)) 736d1542e03SKent Overstreet return ret; 737d1542e03SKent Overstreet } 738d1542e03SKent Overstreet 739d1542e03SKent Overstreet if (quota_sectors) { 740d1542e03SKent Overstreet ret = bch2_quota_reservation_add(c, inode, &res->quota, 741bd954215SKent Overstreet quota_sectors, true); 742d1542e03SKent Overstreet if (unlikely(ret)) { 743d1542e03SKent Overstreet struct disk_reservation tmp = { 744d1542e03SKent Overstreet .sectors = disk_sectors 745d1542e03SKent Overstreet }; 746d1542e03SKent Overstreet 747d1542e03SKent Overstreet bch2_disk_reservation_put(c, &tmp); 748d1542e03SKent Overstreet res->disk.sectors -= disk_sectors; 749d1542e03SKent Overstreet return ret; 750d1542e03SKent Overstreet } 751d1542e03SKent Overstreet } 752d1542e03SKent Overstreet 753d1542e03SKent Overstreet return 0; 754f57a6a5dSKent Overstreet } 755f57a6a5dSKent Overstreet 75630bff594SKent Overstreet static void bch2_clear_folio_bits(struct folio *folio) 7571c6fdbd8SKent Overstreet { 75830bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 7591c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 76030bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 761d1542e03SKent Overstreet struct disk_reservation disk_res = { 0 }; 76233e2eb96SKent Overstreet int i, sectors = folio_sectors(folio), dirty_sectors = 0; 7631c6fdbd8SKent Overstreet 764f57a6a5dSKent Overstreet if (!s) 7651c6fdbd8SKent Overstreet return; 7661c6fdbd8SKent Overstreet 76730bff594SKent Overstreet EBUG_ON(!folio_test_locked(folio)); 76830bff594SKent Overstreet EBUG_ON(folio_test_writeback(folio)); 7693826ee0bSKent Overstreet 77033e2eb96SKent Overstreet for (i = 0; i < sectors; i++) { 771d1542e03SKent Overstreet disk_res.sectors += s->s[i].replicas_reserved; 772d1542e03SKent Overstreet s->s[i].replicas_reserved = 0; 773d1542e03SKent Overstreet 774b44a66a6SKent Overstreet switch (s->s[i].state) { 775b44a66a6SKent Overstreet case SECTOR_DIRTY: 776f57a6a5dSKent Overstreet s->s[i].state = SECTOR_UNALLOCATED; 777b44a66a6SKent Overstreet --dirty_sectors; 778b44a66a6SKent Overstreet break; 779b44a66a6SKent Overstreet case SECTOR_DIRTY_RESERVED: 780b44a66a6SKent Overstreet s->s[i].state = SECTOR_RESERVED; 781b44a66a6SKent Overstreet break; 782b44a66a6SKent Overstreet default: 783b44a66a6SKent Overstreet break; 784f57a6a5dSKent Overstreet } 785f57a6a5dSKent Overstreet } 786adfcfaf0SKent Overstreet 787d1542e03SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 788d1542e03SKent Overstreet 789b44a66a6SKent Overstreet i_sectors_acct(c, inode, NULL, dirty_sectors); 790adfcfaf0SKent Overstreet 79130bff594SKent Overstreet bch2_folio_release(folio); 7921c6fdbd8SKent Overstreet } 7931c6fdbd8SKent Overstreet 79430bff594SKent Overstreet static void bch2_set_folio_dirty(struct bch_fs *c, 79530bff594SKent Overstreet struct bch_inode_info *inode, 79630bff594SKent Overstreet struct folio *folio, 79730bff594SKent Overstreet struct bch2_folio_reservation *res, 798d1542e03SKent Overstreet unsigned offset, unsigned len) 7991c6fdbd8SKent Overstreet { 80030bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 801f57a6a5dSKent Overstreet unsigned i, dirty_sectors = 0; 8021c6fdbd8SKent Overstreet 80330bff594SKent Overstreet WARN_ON((u64) folio_pos(folio) + offset + len > 804877dfb34SKent Overstreet round_up((u64) i_size_read(&inode->v), block_bytes(c))); 805fb472ac5SKent Overstreet 8063826ee0bSKent Overstreet spin_lock(&s->lock); 8073826ee0bSKent Overstreet 8084b0a66d5SKent Overstreet for (i = round_down(offset, block_bytes(c)) >> 9; 8094b0a66d5SKent Overstreet i < round_up(offset + len, block_bytes(c)) >> 9; 810d1542e03SKent Overstreet i++) { 811d1542e03SKent Overstreet unsigned sectors = sectors_to_reserve(&s->s[i], 812d1542e03SKent Overstreet res->disk.nr_replicas); 8131c6fdbd8SKent Overstreet 814406d6d5aSKent Overstreet /* 815406d6d5aSKent Overstreet * This can happen if we race with the error path in 816406d6d5aSKent Overstreet * bch2_writepage_io_done(): 817406d6d5aSKent Overstreet */ 818406d6d5aSKent Overstreet sectors = min_t(unsigned, sectors, res->disk.sectors); 819406d6d5aSKent Overstreet 820d1542e03SKent Overstreet s->s[i].replicas_reserved += sectors; 821d1542e03SKent Overstreet res->disk.sectors -= sectors; 822adfcfaf0SKent Overstreet 823b44a66a6SKent Overstreet switch (s->s[i].state) { 824b44a66a6SKent Overstreet case SECTOR_UNALLOCATED: 825b44a66a6SKent Overstreet s->s[i].state = SECTOR_DIRTY; 826f57a6a5dSKent Overstreet dirty_sectors++; 827b44a66a6SKent Overstreet break; 828b44a66a6SKent Overstreet case SECTOR_RESERVED: 829b44a66a6SKent Overstreet s->s[i].state = SECTOR_DIRTY_RESERVED; 830b44a66a6SKent Overstreet break; 831b44a66a6SKent Overstreet default: 832b44a66a6SKent Overstreet break; 833b44a66a6SKent Overstreet } 834f57a6a5dSKent Overstreet } 835f57a6a5dSKent Overstreet 8363826ee0bSKent Overstreet spin_unlock(&s->lock); 8373826ee0bSKent Overstreet 838d1542e03SKent Overstreet i_sectors_acct(c, inode, &res->quota, dirty_sectors); 8391c6fdbd8SKent Overstreet 84030bff594SKent Overstreet if (!folio_test_dirty(folio)) 84130bff594SKent Overstreet filemap_dirty_folio(inode->v.i_mapping, folio); 8421c6fdbd8SKent Overstreet } 8431c6fdbd8SKent Overstreet 8441c6fdbd8SKent Overstreet vm_fault_t bch2_page_fault(struct vm_fault *vmf) 8451c6fdbd8SKent Overstreet { 8461c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 847eb8e6e9cSKent Overstreet struct address_space *mapping = file->f_mapping; 848eb8e6e9cSKent Overstreet struct address_space *fdm = faults_disabled_mapping(); 8491c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 8501c6fdbd8SKent Overstreet int ret; 8511c6fdbd8SKent Overstreet 852eb8e6e9cSKent Overstreet if (fdm == mapping) 853eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 854eb8e6e9cSKent Overstreet 855eb8e6e9cSKent Overstreet /* Lock ordering: */ 856eb8e6e9cSKent Overstreet if (fdm > mapping) { 857eb8e6e9cSKent Overstreet struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); 858eb8e6e9cSKent Overstreet 859a7ecd30cSKent Overstreet if (bch2_pagecache_add_tryget(inode)) 860eb8e6e9cSKent Overstreet goto got_lock; 861eb8e6e9cSKent Overstreet 862a7ecd30cSKent Overstreet bch2_pagecache_block_put(fdm_host); 863eb8e6e9cSKent Overstreet 864a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 865a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 866eb8e6e9cSKent Overstreet 867a7ecd30cSKent Overstreet bch2_pagecache_block_get(fdm_host); 868eb8e6e9cSKent Overstreet 869eb8e6e9cSKent Overstreet /* Signal that lock has been dropped: */ 870eb8e6e9cSKent Overstreet set_fdm_dropped_locks(); 871eb8e6e9cSKent Overstreet return VM_FAULT_SIGBUS; 872eb8e6e9cSKent Overstreet } 873eb8e6e9cSKent Overstreet 874a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 875eb8e6e9cSKent Overstreet got_lock: 8761c6fdbd8SKent Overstreet ret = filemap_fault(vmf); 877a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 8781c6fdbd8SKent Overstreet 8791c6fdbd8SKent Overstreet return ret; 8801c6fdbd8SKent Overstreet } 8811c6fdbd8SKent Overstreet 8821c6fdbd8SKent Overstreet vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) 8831c6fdbd8SKent Overstreet { 88430bff594SKent Overstreet struct folio *folio = page_folio(vmf->page); 8851c6fdbd8SKent Overstreet struct file *file = vmf->vma->vm_file; 8861c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 8871c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 8881c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 88930bff594SKent Overstreet struct bch2_folio_reservation res; 8906cc3535dSKent Overstreet unsigned len; 8916cc3535dSKent Overstreet loff_t isize; 892e6ec361fSKent Overstreet int ret; 8931c6fdbd8SKent Overstreet 89430bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 895d1542e03SKent Overstreet 8961c6fdbd8SKent Overstreet sb_start_pagefault(inode->v.i_sb); 8971c6fdbd8SKent Overstreet file_update_time(file); 8981c6fdbd8SKent Overstreet 8991c6fdbd8SKent Overstreet /* 9001c6fdbd8SKent Overstreet * Not strictly necessary, but helps avoid dio writes livelocking in 9011c6fdbd8SKent Overstreet * write_invalidate_inode_pages_range() - can drop this if/when we get 9021c6fdbd8SKent Overstreet * a write_invalidate_inode_pages_range() that works without dropping 9031c6fdbd8SKent Overstreet * page lock before invalidating page 9041c6fdbd8SKent Overstreet */ 905a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 9061c6fdbd8SKent Overstreet 90730bff594SKent Overstreet folio_lock(folio); 9086cc3535dSKent Overstreet isize = i_size_read(&inode->v); 9096cc3535dSKent Overstreet 91030bff594SKent Overstreet if (folio->mapping != mapping || folio_pos(folio) >= isize) { 91130bff594SKent Overstreet folio_unlock(folio); 9121c6fdbd8SKent Overstreet ret = VM_FAULT_NOPAGE; 9131c6fdbd8SKent Overstreet goto out; 9141c6fdbd8SKent Overstreet } 9151c6fdbd8SKent Overstreet 91633e2eb96SKent Overstreet len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); 9176cc3535dSKent Overstreet 91830bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { 91930bff594SKent Overstreet if (bch2_folio_set(c, inode_inum(inode), &folio, 1)) { 92030bff594SKent Overstreet folio_unlock(folio); 921e6ec361fSKent Overstreet ret = VM_FAULT_SIGBUS; 922e6ec361fSKent Overstreet goto out; 923e6ec361fSKent Overstreet } 924e6ec361fSKent Overstreet } 925e6ec361fSKent Overstreet 92630bff594SKent Overstreet if (bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { 92730bff594SKent Overstreet folio_unlock(folio); 9281c6fdbd8SKent Overstreet ret = VM_FAULT_SIGBUS; 9291c6fdbd8SKent Overstreet goto out; 9301c6fdbd8SKent Overstreet } 9311c6fdbd8SKent Overstreet 93230bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, &res, 0, len); 93330bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 9341b783a69SKent Overstreet 93530bff594SKent Overstreet folio_wait_stable(folio); 936e6ec361fSKent Overstreet ret = VM_FAULT_LOCKED; 9371c6fdbd8SKent Overstreet out: 938a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 9391c6fdbd8SKent Overstreet sb_end_pagefault(inode->v.i_sb); 940d1542e03SKent Overstreet 9411c6fdbd8SKent Overstreet return ret; 9421c6fdbd8SKent Overstreet } 9431c6fdbd8SKent Overstreet 9441c6fdbd8SKent Overstreet void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) 9451c6fdbd8SKent Overstreet { 9461c6fdbd8SKent Overstreet if (offset || length < folio_size(folio)) 9471c6fdbd8SKent Overstreet return; 9481c6fdbd8SKent Overstreet 94930bff594SKent Overstreet bch2_clear_folio_bits(folio); 9501c6fdbd8SKent Overstreet } 9511c6fdbd8SKent Overstreet 9521c6fdbd8SKent Overstreet bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) 9531c6fdbd8SKent Overstreet { 954a6d90385SKent Overstreet if (folio_test_dirty(folio) || folio_test_writeback(folio)) 9551c6fdbd8SKent Overstreet return false; 9561c6fdbd8SKent Overstreet 95730bff594SKent Overstreet bch2_clear_folio_bits(folio); 9581c6fdbd8SKent Overstreet return true; 9591c6fdbd8SKent Overstreet } 9601c6fdbd8SKent Overstreet 9611c6fdbd8SKent Overstreet /* readpage(s): */ 9621c6fdbd8SKent Overstreet 9631c6fdbd8SKent Overstreet static void bch2_readpages_end_io(struct bio *bio) 9641c6fdbd8SKent Overstreet { 96530bff594SKent Overstreet struct folio_iter fi; 9661c6fdbd8SKent Overstreet 96730bff594SKent Overstreet bio_for_each_folio_all(fi, bio) { 9681c6fdbd8SKent Overstreet if (!bio->bi_status) { 96930bff594SKent Overstreet folio_mark_uptodate(fi.folio); 9701c6fdbd8SKent Overstreet } else { 97130bff594SKent Overstreet folio_clear_uptodate(fi.folio); 97230bff594SKent Overstreet folio_set_error(fi.folio); 9731c6fdbd8SKent Overstreet } 97430bff594SKent Overstreet folio_unlock(fi.folio); 9751c6fdbd8SKent Overstreet } 9761c6fdbd8SKent Overstreet 9771c6fdbd8SKent Overstreet bio_put(bio); 9781c6fdbd8SKent Overstreet } 9791c6fdbd8SKent Overstreet 9801c6fdbd8SKent Overstreet struct readpages_iter { 9811c6fdbd8SKent Overstreet struct address_space *mapping; 9821c6fdbd8SKent Overstreet struct page **pages; 9831c6fdbd8SKent Overstreet unsigned nr_pages; 9841c6fdbd8SKent Overstreet unsigned idx; 9851c6fdbd8SKent Overstreet pgoff_t offset; 9861c6fdbd8SKent Overstreet }; 9871c6fdbd8SKent Overstreet 9881c6fdbd8SKent Overstreet static int readpages_iter_init(struct readpages_iter *iter, 9891c6fdbd8SKent Overstreet struct readahead_control *ractl) 9901c6fdbd8SKent Overstreet { 9911c6fdbd8SKent Overstreet unsigned i, nr_pages = readahead_count(ractl); 9921c6fdbd8SKent Overstreet 9931c6fdbd8SKent Overstreet memset(iter, 0, sizeof(*iter)); 9941c6fdbd8SKent Overstreet 9951c6fdbd8SKent Overstreet iter->mapping = ractl->mapping; 9961c6fdbd8SKent Overstreet iter->offset = readahead_index(ractl); 9971c6fdbd8SKent Overstreet iter->nr_pages = nr_pages; 9981c6fdbd8SKent Overstreet 9991c6fdbd8SKent Overstreet iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); 10001c6fdbd8SKent Overstreet if (!iter->pages) 10011c6fdbd8SKent Overstreet return -ENOMEM; 10021c6fdbd8SKent Overstreet 100389931472SKent Overstreet nr_pages = __readahead_batch(ractl, iter->pages, nr_pages); 10041c6fdbd8SKent Overstreet for (i = 0; i < nr_pages; i++) { 100530bff594SKent Overstreet __bch2_folio_create(page_folio(iter->pages[i]), __GFP_NOFAIL); 10061c6fdbd8SKent Overstreet put_page(iter->pages[i]); 10071c6fdbd8SKent Overstreet } 10081c6fdbd8SKent Overstreet 10091c6fdbd8SKent Overstreet return 0; 10101c6fdbd8SKent Overstreet } 10111c6fdbd8SKent Overstreet 101230bff594SKent Overstreet static inline struct folio *readpage_iter_next(struct readpages_iter *iter) 10131c6fdbd8SKent Overstreet { 10141c6fdbd8SKent Overstreet if (iter->idx >= iter->nr_pages) 10151c6fdbd8SKent Overstreet return NULL; 10161c6fdbd8SKent Overstreet 10171c6fdbd8SKent Overstreet EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx); 10181c6fdbd8SKent Overstreet 101930bff594SKent Overstreet return page_folio(iter->pages[iter->idx]); 10201c6fdbd8SKent Overstreet } 10211c6fdbd8SKent Overstreet 102235189e09SKent Overstreet static bool extent_partial_reads_expensive(struct bkey_s_c k) 102335189e09SKent Overstreet { 102435189e09SKent Overstreet struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 102535189e09SKent Overstreet struct bch_extent_crc_unpacked crc; 102635189e09SKent Overstreet const union bch_extent_entry *i; 102735189e09SKent Overstreet 102835189e09SKent Overstreet bkey_for_each_crc(k.k, ptrs, crc, i) 102935189e09SKent Overstreet if (crc.csum_type || crc.compression_type) 103035189e09SKent Overstreet return true; 103135189e09SKent Overstreet return false; 103235189e09SKent Overstreet } 103335189e09SKent Overstreet 10341c6fdbd8SKent Overstreet static void readpage_bio_extend(struct readpages_iter *iter, 103576426098SKent Overstreet struct bio *bio, 103676426098SKent Overstreet unsigned sectors_this_extent, 10371c6fdbd8SKent Overstreet bool get_more) 10381c6fdbd8SKent Overstreet { 103976426098SKent Overstreet while (bio_sectors(bio) < sectors_this_extent && 10401c6fdbd8SKent Overstreet bio->bi_vcnt < bio->bi_max_vecs) { 104130bff594SKent Overstreet pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; 104230bff594SKent Overstreet struct folio *folio = readpage_iter_next(iter); 10431c6fdbd8SKent Overstreet int ret; 10441c6fdbd8SKent Overstreet 104530bff594SKent Overstreet if (folio) { 104630bff594SKent Overstreet if (iter->offset + iter->idx != folio_offset) 10471c6fdbd8SKent Overstreet break; 10481c6fdbd8SKent Overstreet 10491c6fdbd8SKent Overstreet iter->idx++; 10501c6fdbd8SKent Overstreet } else { 10511c6fdbd8SKent Overstreet if (!get_more) 10521c6fdbd8SKent Overstreet break; 10531c6fdbd8SKent Overstreet 105430bff594SKent Overstreet folio = xa_load(&iter->mapping->i_pages, folio_offset); 105530bff594SKent Overstreet if (folio && !xa_is_value(folio)) 10561c6fdbd8SKent Overstreet break; 10571c6fdbd8SKent Overstreet 105830bff594SKent Overstreet folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); 105930bff594SKent Overstreet if (!folio) 10601c6fdbd8SKent Overstreet break; 10611c6fdbd8SKent Overstreet 106230bff594SKent Overstreet if (!__bch2_folio_create(folio, 0)) { 106330bff594SKent Overstreet folio_put(folio); 1064f57a6a5dSKent Overstreet break; 1065f57a6a5dSKent Overstreet } 10661c6fdbd8SKent Overstreet 106730bff594SKent Overstreet ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_NOFS); 10681c6fdbd8SKent Overstreet if (ret) { 106930bff594SKent Overstreet __bch2_folio_release(folio); 107030bff594SKent Overstreet folio_put(folio); 10711c6fdbd8SKent Overstreet break; 10721c6fdbd8SKent Overstreet } 10731c6fdbd8SKent Overstreet 107430bff594SKent Overstreet folio_put(folio); 10751c6fdbd8SKent Overstreet } 10761c6fdbd8SKent Overstreet 107730bff594SKent Overstreet BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); 10781c6fdbd8SKent Overstreet } 10791c6fdbd8SKent Overstreet } 10801c6fdbd8SKent Overstreet 10818c6d298aSKent Overstreet static void bchfs_read(struct btree_trans *trans, 10828c6d298aSKent Overstreet struct bch_read_bio *rbio, 10838c6d298aSKent Overstreet subvol_inum inum, 10841c6fdbd8SKent Overstreet struct readpages_iter *readpages_iter) 10851c6fdbd8SKent Overstreet { 10860f238367SKent Overstreet struct bch_fs *c = trans->c; 10878c6d298aSKent Overstreet struct btree_iter iter; 108807a1006aSKent Overstreet struct bkey_buf sk; 10891c6fdbd8SKent Overstreet int flags = BCH_READ_RETRY_IF_STALE| 10901c6fdbd8SKent Overstreet BCH_READ_MAY_PROMOTE; 10918c6d298aSKent Overstreet u32 snapshot; 109276426098SKent Overstreet int ret = 0; 10931c6fdbd8SKent Overstreet 10941c6fdbd8SKent Overstreet rbio->c = c; 10951c6fdbd8SKent Overstreet rbio->start_time = local_clock(); 10968c6d298aSKent Overstreet rbio->subvol = inum.subvol; 109735189e09SKent Overstreet 109807a1006aSKent Overstreet bch2_bkey_buf_init(&sk); 109976426098SKent Overstreet retry: 1100700c25b3SKent Overstreet bch2_trans_begin(trans); 11018c6d298aSKent Overstreet iter = (struct btree_iter) { NULL }; 1102700c25b3SKent Overstreet 11038c6d298aSKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 11048c6d298aSKent Overstreet if (ret) 11058c6d298aSKent Overstreet goto err; 11068c6d298aSKent Overstreet 11078c6d298aSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 11088c6d298aSKent Overstreet SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), 110957cfdd8bSKent Overstreet BTREE_ITER_SLOTS); 11101c6fdbd8SKent Overstreet while (1) { 11111c6fdbd8SKent Overstreet struct bkey_s_c k; 111276426098SKent Overstreet unsigned bytes, sectors, offset_into_extent; 11135ff75ccbSKent Overstreet enum btree_id data_btree = BTREE_ID_extents; 11141c6fdbd8SKent Overstreet 11153737e0ddSKent Overstreet /* 11163737e0ddSKent Overstreet * read_extent -> io_time_reset may cause a transaction restart 11173737e0ddSKent Overstreet * without returning an error, we need to check for that here: 11183737e0ddSKent Overstreet */ 1119549d173cSKent Overstreet ret = bch2_trans_relock(trans); 1120549d173cSKent Overstreet if (ret) 11213737e0ddSKent Overstreet break; 11223737e0ddSKent Overstreet 11238c6d298aSKent Overstreet bch2_btree_iter_set_pos(&iter, 11248c6d298aSKent Overstreet POS(inum.inum, rbio->bio.bi_iter.bi_sector)); 11251c6fdbd8SKent Overstreet 11268c6d298aSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 112776426098SKent Overstreet ret = bkey_err(k); 112876426098SKent Overstreet if (ret) 112976426098SKent Overstreet break; 11301c6fdbd8SKent Overstreet 11318c6d298aSKent Overstreet offset_into_extent = iter.pos.offset - 113206ed8558SKent Overstreet bkey_start_offset(k.k); 113376426098SKent Overstreet sectors = k.k->size - offset_into_extent; 113476426098SKent Overstreet 113507a1006aSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 113613dcd4abSKent Overstreet 11375ff75ccbSKent Overstreet ret = bch2_read_indirect_extent(trans, &data_btree, 113822d8a33dSYuxuan Shui &offset_into_extent, &sk); 113976426098SKent Overstreet if (ret) 114076426098SKent Overstreet break; 114176426098SKent Overstreet 114213dcd4abSKent Overstreet k = bkey_i_to_s_c(sk.k); 114313dcd4abSKent Overstreet 114476426098SKent Overstreet sectors = min(sectors, k.k->size - offset_into_extent); 114576426098SKent Overstreet 114635189e09SKent Overstreet if (readpages_iter) 114735189e09SKent Overstreet readpage_bio_extend(readpages_iter, &rbio->bio, sectors, 114835189e09SKent Overstreet extent_partial_reads_expensive(k)); 11491c6fdbd8SKent Overstreet 115076426098SKent Overstreet bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; 115106ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 11521c6fdbd8SKent Overstreet 115306ed8558SKent Overstreet if (rbio->bio.bi_iter.bi_size == bytes) 11541c6fdbd8SKent Overstreet flags |= BCH_READ_LAST_FRAGMENT; 11551c6fdbd8SKent Overstreet 1156b44a66a6SKent Overstreet bch2_bio_page_state_set(&rbio->bio, k); 11571c6fdbd8SKent Overstreet 11588c6d298aSKent Overstreet bch2_read_extent(trans, rbio, iter.pos, 11595ff75ccbSKent Overstreet data_btree, k, offset_into_extent, flags); 11601c6fdbd8SKent Overstreet 11611c6fdbd8SKent Overstreet if (flags & BCH_READ_LAST_FRAGMENT) 116235189e09SKent Overstreet break; 11631c6fdbd8SKent Overstreet 116406ed8558SKent Overstreet swap(rbio->bio.bi_iter.bi_size, bytes); 116506ed8558SKent Overstreet bio_advance(&rbio->bio, bytes); 1166084d42bbSKent Overstreet 1167084d42bbSKent Overstreet ret = btree_trans_too_many_iters(trans); 1168084d42bbSKent Overstreet if (ret) 1169084d42bbSKent Overstreet break; 11701c6fdbd8SKent Overstreet } 11718c6d298aSKent Overstreet err: 11728c6d298aSKent Overstreet bch2_trans_iter_exit(trans, &iter); 117376426098SKent Overstreet 1174549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 117576426098SKent Overstreet goto retry; 117676426098SKent Overstreet 117735189e09SKent Overstreet if (ret) { 11787fec8266SKent Overstreet bch_err_inum_offset_ratelimited(c, 11797fec8266SKent Overstreet iter.pos.inode, 11807fec8266SKent Overstreet iter.pos.offset << 9, 11810fefe8d8SKent Overstreet "read error %i from btree lookup", ret); 11820fefe8d8SKent Overstreet rbio->bio.bi_status = BLK_STS_IOERR; 118376426098SKent Overstreet bio_endio(&rbio->bio); 11841c6fdbd8SKent Overstreet } 11851c6fdbd8SKent Overstreet 118607a1006aSKent Overstreet bch2_bkey_buf_exit(&sk, c); 118735189e09SKent Overstreet } 118835189e09SKent Overstreet 11891c6fdbd8SKent Overstreet void bch2_readahead(struct readahead_control *ractl) 11901c6fdbd8SKent Overstreet { 11911c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); 11921c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 119301ad6737SKent Overstreet struct bch_io_opts opts; 1194424eb881SKent Overstreet struct btree_trans trans; 119530bff594SKent Overstreet struct folio *folio; 11961c6fdbd8SKent Overstreet struct readpages_iter readpages_iter; 11971c6fdbd8SKent Overstreet int ret; 11981c6fdbd8SKent Overstreet 119901ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 120001ad6737SKent Overstreet 12011c6fdbd8SKent Overstreet ret = readpages_iter_init(&readpages_iter, ractl); 12021c6fdbd8SKent Overstreet BUG_ON(ret); 12031c6fdbd8SKent Overstreet 120420bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 12051c6fdbd8SKent Overstreet 1206a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 12071c6fdbd8SKent Overstreet 120830bff594SKent Overstreet while ((folio = readpage_iter_next(&readpages_iter))) { 12091c6fdbd8SKent Overstreet pgoff_t index = readpages_iter.offset + readpages_iter.idx; 12101c6fdbd8SKent Overstreet unsigned n = min_t(unsigned, 12111c6fdbd8SKent Overstreet readpages_iter.nr_pages - 12121c6fdbd8SKent Overstreet readpages_iter.idx, 12131c6fdbd8SKent Overstreet BIO_MAX_VECS); 12141c6fdbd8SKent Overstreet struct bch_read_bio *rbio = 12151c6fdbd8SKent Overstreet rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, 12161c6fdbd8SKent Overstreet GFP_NOFS, &c->bio_read), 12171c6fdbd8SKent Overstreet opts); 12181c6fdbd8SKent Overstreet 12191c6fdbd8SKent Overstreet readpages_iter.idx++; 12201c6fdbd8SKent Overstreet 12217279c1a2SKent Overstreet rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTORS_SHIFT; 12221c6fdbd8SKent Overstreet rbio->bio.bi_end_io = bch2_readpages_end_io; 122330bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 12241c6fdbd8SKent Overstreet 12258c6d298aSKent Overstreet bchfs_read(&trans, rbio, inode_inum(inode), 12260f238367SKent Overstreet &readpages_iter); 12271c6fdbd8SKent Overstreet } 12281c6fdbd8SKent Overstreet 1229a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1230424eb881SKent Overstreet 1231424eb881SKent Overstreet bch2_trans_exit(&trans); 12321c6fdbd8SKent Overstreet kfree(readpages_iter.pages); 12331c6fdbd8SKent Overstreet } 12341c6fdbd8SKent Overstreet 123530bff594SKent Overstreet static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, 123630bff594SKent Overstreet subvol_inum inum, struct folio *folio) 12371c6fdbd8SKent Overstreet { 1238424eb881SKent Overstreet struct btree_trans trans; 12391c6fdbd8SKent Overstreet 124030bff594SKent Overstreet bch2_folio_create(folio, __GFP_NOFAIL); 12411c6fdbd8SKent Overstreet 12421c6fdbd8SKent Overstreet rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; 124330bff594SKent Overstreet rbio->bio.bi_iter.bi_sector = folio_sector(folio); 124430bff594SKent Overstreet BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); 12451c6fdbd8SKent Overstreet 124620bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 12478c6d298aSKent Overstreet bchfs_read(&trans, rbio, inum, NULL); 1248424eb881SKent Overstreet bch2_trans_exit(&trans); 12491c6fdbd8SKent Overstreet } 12501c6fdbd8SKent Overstreet 125130bff594SKent Overstreet static void bch2_read_single_folio_end_io(struct bio *bio) 12521c6fdbd8SKent Overstreet { 12531c6fdbd8SKent Overstreet complete(bio->bi_private); 12541c6fdbd8SKent Overstreet } 12551c6fdbd8SKent Overstreet 125630bff594SKent Overstreet static int bch2_read_single_folio(struct folio *folio, 12571c6fdbd8SKent Overstreet struct address_space *mapping) 12581c6fdbd8SKent Overstreet { 12591c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 12601c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 12611c6fdbd8SKent Overstreet struct bch_read_bio *rbio; 126201ad6737SKent Overstreet struct bch_io_opts opts; 12631c6fdbd8SKent Overstreet int ret; 12641c6fdbd8SKent Overstreet DECLARE_COMPLETION_ONSTACK(done); 12651c6fdbd8SKent Overstreet 126601ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 126701ad6737SKent Overstreet 12681c6fdbd8SKent Overstreet rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), 126901ad6737SKent Overstreet opts); 12701c6fdbd8SKent Overstreet rbio->bio.bi_private = &done; 127130bff594SKent Overstreet rbio->bio.bi_end_io = bch2_read_single_folio_end_io; 12721c6fdbd8SKent Overstreet 127330bff594SKent Overstreet __bchfs_readfolio(c, rbio, inode_inum(inode), folio); 12741c6fdbd8SKent Overstreet wait_for_completion(&done); 12751c6fdbd8SKent Overstreet 12761c6fdbd8SKent Overstreet ret = blk_status_to_errno(rbio->bio.bi_status); 12771c6fdbd8SKent Overstreet bio_put(&rbio->bio); 12781c6fdbd8SKent Overstreet 12791c6fdbd8SKent Overstreet if (ret < 0) 12801c6fdbd8SKent Overstreet return ret; 12811c6fdbd8SKent Overstreet 128230bff594SKent Overstreet folio_mark_uptodate(folio); 12831c6fdbd8SKent Overstreet return 0; 12841c6fdbd8SKent Overstreet } 12851c6fdbd8SKent Overstreet 12861c6fdbd8SKent Overstreet int bch2_read_folio(struct file *file, struct folio *folio) 12871c6fdbd8SKent Overstreet { 12881c6fdbd8SKent Overstreet int ret; 12891c6fdbd8SKent Overstreet 129030bff594SKent Overstreet ret = bch2_read_single_folio(folio, folio->mapping); 12911c6fdbd8SKent Overstreet folio_unlock(folio); 12925c1ef830SKent Overstreet return bch2_err_class(ret); 12931c6fdbd8SKent Overstreet } 12941c6fdbd8SKent Overstreet 12951c6fdbd8SKent Overstreet /* writepages: */ 12961c6fdbd8SKent Overstreet 12971c6fdbd8SKent Overstreet struct bch_writepage_state { 12981c6fdbd8SKent Overstreet struct bch_writepage_io *io; 12991c6fdbd8SKent Overstreet struct bch_io_opts opts; 130049fe78ffSKent Overstreet struct bch_folio_sector *tmp; 130149fe78ffSKent Overstreet unsigned tmp_sectors; 13021c6fdbd8SKent Overstreet }; 13031c6fdbd8SKent Overstreet 13041c6fdbd8SKent Overstreet static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, 13051c6fdbd8SKent Overstreet struct bch_inode_info *inode) 13061c6fdbd8SKent Overstreet { 130701ad6737SKent Overstreet struct bch_writepage_state ret = { 0 }; 130801ad6737SKent Overstreet 130901ad6737SKent Overstreet bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); 131001ad6737SKent Overstreet return ret; 13111c6fdbd8SKent Overstreet } 13121c6fdbd8SKent Overstreet 13139f311f21SKent Overstreet static void bch2_writepage_io_done(struct bch_write_op *op) 13141c6fdbd8SKent Overstreet { 13159f311f21SKent Overstreet struct bch_writepage_io *io = 13169f311f21SKent Overstreet container_of(op, struct bch_writepage_io, op); 13179a3df993SKent Overstreet struct bch_fs *c = io->op.c; 13189a3df993SKent Overstreet struct bio *bio = &io->op.wbio.bio; 1319ff9c301fSKent Overstreet struct folio_iter fi; 1320b3fce09cSKent Overstreet unsigned i; 13211c6fdbd8SKent Overstreet 13229a3df993SKent Overstreet if (io->op.error) { 132333c74e41SKent Overstreet set_bit(EI_INODE_ERROR, &io->inode->ei_flags); 132433c74e41SKent Overstreet 1325ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 13263342ac13SKent Overstreet struct bch_folio *s; 1327b3fce09cSKent Overstreet 1328ff9c301fSKent Overstreet folio_set_error(fi.folio); 1329ff9c301fSKent Overstreet mapping_set_error(fi.folio->mapping, -EIO); 1330b3fce09cSKent Overstreet 1331ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 13323826ee0bSKent Overstreet spin_lock(&s->lock); 1333ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 1334b3fce09cSKent Overstreet s->s[i].nr_replicas = 0; 13353826ee0bSKent Overstreet spin_unlock(&s->lock); 133675812e70SKent Overstreet } 13371c6fdbd8SKent Overstreet } 13381c6fdbd8SKent Overstreet 13394be1a412SKent Overstreet if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { 1340ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 13413342ac13SKent Overstreet struct bch_folio *s; 13424be1a412SKent Overstreet 1343ff9c301fSKent Overstreet s = __bch2_folio(fi.folio); 13444be1a412SKent Overstreet spin_lock(&s->lock); 1345ff9c301fSKent Overstreet for (i = 0; i < folio_sectors(fi.folio); i++) 13464be1a412SKent Overstreet s->s[i].nr_replicas = 0; 13474be1a412SKent Overstreet spin_unlock(&s->lock); 13484be1a412SKent Overstreet } 13494be1a412SKent Overstreet } 13504be1a412SKent Overstreet 13511c6fdbd8SKent Overstreet /* 13521c6fdbd8SKent Overstreet * racing with fallocate can cause us to add fewer sectors than 13531c6fdbd8SKent Overstreet * expected - but we shouldn't add more sectors than expected: 13541c6fdbd8SKent Overstreet */ 1355f8494d25SKent Overstreet WARN_ON_ONCE(io->op.i_sectors_delta > 0); 13561c6fdbd8SKent Overstreet 13571c6fdbd8SKent Overstreet /* 13581c6fdbd8SKent Overstreet * (error (due to going RO) halfway through a page can screw that up 13591c6fdbd8SKent Overstreet * slightly) 13601c6fdbd8SKent Overstreet * XXX wtf? 13619a3df993SKent Overstreet BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); 13621c6fdbd8SKent Overstreet */ 13631c6fdbd8SKent Overstreet 13641c6fdbd8SKent Overstreet /* 13651c6fdbd8SKent Overstreet * PageWriteback is effectively our ref on the inode - fixup i_blocks 13661c6fdbd8SKent Overstreet * before calling end_page_writeback: 13671c6fdbd8SKent Overstreet */ 13689a3df993SKent Overstreet i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); 13691c6fdbd8SKent Overstreet 1370ff9c301fSKent Overstreet bio_for_each_folio_all(fi, bio) { 1371ff9c301fSKent Overstreet struct bch_folio *s = __bch2_folio(fi.folio); 13727f5e31e1SKent Overstreet 13737f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 1374ff9c301fSKent Overstreet folio_end_writeback(fi.folio); 13757f5e31e1SKent Overstreet } 13761c6fdbd8SKent Overstreet 13779f311f21SKent Overstreet bio_put(&io->op.wbio.bio); 13781c6fdbd8SKent Overstreet } 13791c6fdbd8SKent Overstreet 13801c6fdbd8SKent Overstreet static void bch2_writepage_do_io(struct bch_writepage_state *w) 13811c6fdbd8SKent Overstreet { 13821c6fdbd8SKent Overstreet struct bch_writepage_io *io = w->io; 13831c6fdbd8SKent Overstreet 13841c6fdbd8SKent Overstreet w->io = NULL; 13859f311f21SKent Overstreet closure_call(&io->op.cl, bch2_write, NULL, NULL); 13861c6fdbd8SKent Overstreet } 13871c6fdbd8SKent Overstreet 13881c6fdbd8SKent Overstreet /* 13891c6fdbd8SKent Overstreet * Get a bch_writepage_io and add @page to it - appending to an existing one if 13901c6fdbd8SKent Overstreet * possible, else allocating a new one: 13911c6fdbd8SKent Overstreet */ 13921c6fdbd8SKent Overstreet static void bch2_writepage_io_alloc(struct bch_fs *c, 139350fe5bd6SKent Overstreet struct writeback_control *wbc, 13941c6fdbd8SKent Overstreet struct bch_writepage_state *w, 13951c6fdbd8SKent Overstreet struct bch_inode_info *inode, 13967f5e31e1SKent Overstreet u64 sector, 13971c6fdbd8SKent Overstreet unsigned nr_replicas) 13981c6fdbd8SKent Overstreet { 13991c6fdbd8SKent Overstreet struct bch_write_op *op; 14001c6fdbd8SKent Overstreet 14011c6fdbd8SKent Overstreet w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, 14021c6fdbd8SKent Overstreet REQ_OP_WRITE, 14031c6fdbd8SKent Overstreet GFP_NOFS, 14041c6fdbd8SKent Overstreet &c->writepage_bioset), 14059a3df993SKent Overstreet struct bch_writepage_io, op.wbio.bio); 14061c6fdbd8SKent Overstreet 14079a3df993SKent Overstreet w->io->inode = inode; 14089a3df993SKent Overstreet op = &w->io->op; 14099a3df993SKent Overstreet bch2_write_op_init(op, c, w->opts); 14109a3df993SKent Overstreet op->target = w->opts.foreground_target; 14111c6fdbd8SKent Overstreet op->nr_replicas = nr_replicas; 14121c6fdbd8SKent Overstreet op->res.nr_replicas = nr_replicas; 14131c6fdbd8SKent Overstreet op->write_point = writepoint_hashed(inode->ei_last_dirtied); 14148c6d298aSKent Overstreet op->subvol = inode->ei_subvol; 14157f5e31e1SKent Overstreet op->pos = POS(inode->v.i_ino, sector); 14169f311f21SKent Overstreet op->end_io = bch2_writepage_io_done; 1417a8b3a677SKent Overstreet op->devs_need_flush = &inode->ei_devs_need_flush; 14187f5e31e1SKent Overstreet op->wbio.bio.bi_iter.bi_sector = sector; 141950fe5bd6SKent Overstreet op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); 14201c6fdbd8SKent Overstreet } 14211c6fdbd8SKent Overstreet 14221c6fdbd8SKent Overstreet static int __bch2_writepage(struct folio *folio, 14231c6fdbd8SKent Overstreet struct writeback_control *wbc, 14241c6fdbd8SKent Overstreet void *data) 14251c6fdbd8SKent Overstreet { 142630bff594SKent Overstreet struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); 14271c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 14281c6fdbd8SKent Overstreet struct bch_writepage_state *w = data; 142949fe78ffSKent Overstreet struct bch_folio *s; 143030bff594SKent Overstreet unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; 14311c6fdbd8SKent Overstreet loff_t i_size = i_size_read(&inode->v); 1432e1036a2aSKent Overstreet int ret; 14331c6fdbd8SKent Overstreet 143430bff594SKent Overstreet EBUG_ON(!folio_test_uptodate(folio)); 14351c6fdbd8SKent Overstreet 143630bff594SKent Overstreet /* Is the folio fully inside i_size? */ 143733e2eb96SKent Overstreet if (folio_end_pos(folio) <= i_size) 14381c6fdbd8SKent Overstreet goto do_io; 14391c6fdbd8SKent Overstreet 144030bff594SKent Overstreet /* Is the folio fully outside i_size? (truncate in progress) */ 144133e2eb96SKent Overstreet if (folio_pos(folio) >= i_size) { 144230bff594SKent Overstreet folio_unlock(folio); 14431c6fdbd8SKent Overstreet return 0; 14441c6fdbd8SKent Overstreet } 14451c6fdbd8SKent Overstreet 14461c6fdbd8SKent Overstreet /* 144730bff594SKent Overstreet * The folio straddles i_size. It must be zeroed out on each and every 14481c6fdbd8SKent Overstreet * writepage invocation because it may be mmapped. "A file is mapped 144930bff594SKent Overstreet * in multiples of the folio size. For a file that is not a multiple of 145030bff594SKent Overstreet * the folio size, the remaining memory is zeroed when mapped, and 14511c6fdbd8SKent Overstreet * writes to that region are not written out to the file." 14521c6fdbd8SKent Overstreet */ 145333e2eb96SKent Overstreet folio_zero_segment(folio, 145433e2eb96SKent Overstreet i_size - folio_pos(folio), 145533e2eb96SKent Overstreet folio_size(folio)); 14561c6fdbd8SKent Overstreet do_io: 145730bff594SKent Overstreet f_sectors = folio_sectors(folio); 145830bff594SKent Overstreet s = bch2_folio_create(folio, __GFP_NOFAIL); 1459f81b648dSKent Overstreet 146049fe78ffSKent Overstreet if (f_sectors > w->tmp_sectors) { 146149fe78ffSKent Overstreet kfree(w->tmp); 146249fe78ffSKent Overstreet w->tmp = kzalloc(sizeof(struct bch_folio_sector) * 146349fe78ffSKent Overstreet f_sectors, __GFP_NOFAIL); 146449fe78ffSKent Overstreet w->tmp_sectors = f_sectors; 146549fe78ffSKent Overstreet } 146649fe78ffSKent Overstreet 1467f74a5051SKent Overstreet /* 1468f74a5051SKent Overstreet * Things get really hairy with errors during writeback: 1469f74a5051SKent Overstreet */ 147030bff594SKent Overstreet ret = bch2_get_folio_disk_reservation(c, inode, folio, false); 1471f74a5051SKent Overstreet BUG_ON(ret); 1472e1036a2aSKent Overstreet 14737f5e31e1SKent Overstreet /* Before unlocking the page, get copy of reservations: */ 1474f74a5051SKent Overstreet spin_lock(&s->lock); 147549fe78ffSKent Overstreet memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); 1476f74a5051SKent Overstreet spin_unlock(&s->lock); 14777f5e31e1SKent Overstreet 147830bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 14792ba5d38bSKent Overstreet if (s->s[i].state < SECTOR_DIRTY) 14807f5e31e1SKent Overstreet continue; 14817f5e31e1SKent Overstreet 1482f81b648dSKent Overstreet nr_replicas_this_write = 1483f57a6a5dSKent Overstreet min_t(unsigned, nr_replicas_this_write, 1484f57a6a5dSKent Overstreet s->s[i].nr_replicas + 1485f57a6a5dSKent Overstreet s->s[i].replicas_reserved); 14867f5e31e1SKent Overstreet } 14871c6fdbd8SKent Overstreet 148830bff594SKent Overstreet for (i = 0; i < f_sectors; i++) { 14892ba5d38bSKent Overstreet if (s->s[i].state < SECTOR_DIRTY) 14907f5e31e1SKent Overstreet continue; 14917f5e31e1SKent Overstreet 1492f57a6a5dSKent Overstreet s->s[i].nr_replicas = w->opts.compression 1493f57a6a5dSKent Overstreet ? 0 : nr_replicas_this_write; 1494e1036a2aSKent Overstreet 1495f57a6a5dSKent Overstreet s->s[i].replicas_reserved = 0; 1496f57a6a5dSKent Overstreet s->s[i].state = SECTOR_ALLOCATED; 1497f57a6a5dSKent Overstreet } 14981c6fdbd8SKent Overstreet 14997f5e31e1SKent Overstreet BUG_ON(atomic_read(&s->write_count)); 15007f5e31e1SKent Overstreet atomic_set(&s->write_count, 1); 15017f5e31e1SKent Overstreet 150230bff594SKent Overstreet BUG_ON(folio_test_writeback(folio)); 150330bff594SKent Overstreet folio_start_writeback(folio); 15047f5e31e1SKent Overstreet 150530bff594SKent Overstreet folio_unlock(folio); 15061c6fdbd8SKent Overstreet 15077f5e31e1SKent Overstreet offset = 0; 15087f5e31e1SKent Overstreet while (1) { 1509f74a5051SKent Overstreet unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; 15107f5e31e1SKent Overstreet u64 sector; 15117f5e31e1SKent Overstreet 151230bff594SKent Overstreet while (offset < f_sectors && 151349fe78ffSKent Overstreet w->tmp[offset].state < SECTOR_DIRTY) 15147f5e31e1SKent Overstreet offset++; 15157f5e31e1SKent Overstreet 151630bff594SKent Overstreet if (offset == f_sectors) 15177f5e31e1SKent Overstreet break; 15187f5e31e1SKent Overstreet 151930bff594SKent Overstreet while (offset + sectors < f_sectors && 152049fe78ffSKent Overstreet w->tmp[offset + sectors].state >= SECTOR_DIRTY) { 152149fe78ffSKent Overstreet reserved_sectors += w->tmp[offset + sectors].replicas_reserved; 152249fe78ffSKent Overstreet dirty_sectors += w->tmp[offset + sectors].state == SECTOR_DIRTY; 15237f5e31e1SKent Overstreet sectors++; 15247f5e31e1SKent Overstreet } 1525f74a5051SKent Overstreet BUG_ON(!sectors); 1526f74a5051SKent Overstreet 152730bff594SKent Overstreet sector = folio_sector(folio) + offset; 15287f5e31e1SKent Overstreet 15291c6fdbd8SKent Overstreet if (w->io && 15309a3df993SKent Overstreet (w->io->op.res.nr_replicas != nr_replicas_this_write || 153133e2eb96SKent Overstreet bio_full(&w->io->op.wbio.bio, sectors << 9) || 1532f59b3464SKent Overstreet w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= 1533f59b3464SKent Overstreet (BIO_MAX_VECS * PAGE_SIZE) || 15349a3df993SKent Overstreet bio_end_sector(&w->io->op.wbio.bio) != sector)) 15351c6fdbd8SKent Overstreet bch2_writepage_do_io(w); 15361c6fdbd8SKent Overstreet 15371c6fdbd8SKent Overstreet if (!w->io) 153850fe5bd6SKent Overstreet bch2_writepage_io_alloc(c, wbc, w, inode, sector, 1539f81b648dSKent Overstreet nr_replicas_this_write); 15401c6fdbd8SKent Overstreet 15417f5e31e1SKent Overstreet atomic_inc(&s->write_count); 15427f5e31e1SKent Overstreet 15439a3df993SKent Overstreet BUG_ON(inode != w->io->inode); 154430bff594SKent Overstreet BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, 15457f5e31e1SKent Overstreet sectors << 9, offset << 9)); 15461c6fdbd8SKent Overstreet 15476cc3535dSKent Overstreet /* Check for writing past i_size: */ 15488eb71e9eSKent Overstreet WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > 154980fe580cSKent Overstreet round_up(i_size, block_bytes(c)) && 15508eb71e9eSKent Overstreet !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), 15518eb71e9eSKent Overstreet "writing past i_size: %llu > %llu (unrounded %llu)\n", 15528eb71e9eSKent Overstreet bio_end_sector(&w->io->op.wbio.bio) << 9, 15538eb71e9eSKent Overstreet round_up(i_size, block_bytes(c)), 15548eb71e9eSKent Overstreet i_size); 15556cc3535dSKent Overstreet 15569a3df993SKent Overstreet w->io->op.res.sectors += reserved_sectors; 15579a3df993SKent Overstreet w->io->op.i_sectors_delta -= dirty_sectors; 15581c6fdbd8SKent Overstreet w->io->op.new_i_size = i_size; 15591c6fdbd8SKent Overstreet 15607f5e31e1SKent Overstreet offset += sectors; 15617f5e31e1SKent Overstreet } 15627f5e31e1SKent Overstreet 15637f5e31e1SKent Overstreet if (atomic_dec_and_test(&s->write_count)) 156430bff594SKent Overstreet folio_end_writeback(folio); 15657f5e31e1SKent Overstreet 15661c6fdbd8SKent Overstreet return 0; 15671c6fdbd8SKent Overstreet } 15681c6fdbd8SKent Overstreet 15691c6fdbd8SKent Overstreet int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) 15701c6fdbd8SKent Overstreet { 15711c6fdbd8SKent Overstreet struct bch_fs *c = mapping->host->i_sb->s_fs_info; 15721c6fdbd8SKent Overstreet struct bch_writepage_state w = 15731c6fdbd8SKent Overstreet bch_writepage_state_init(c, to_bch_ei(mapping->host)); 15741c6fdbd8SKent Overstreet struct blk_plug plug; 15751c6fdbd8SKent Overstreet int ret; 15761c6fdbd8SKent Overstreet 15771c6fdbd8SKent Overstreet blk_start_plug(&plug); 15781c6fdbd8SKent Overstreet ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); 15791c6fdbd8SKent Overstreet if (w.io) 15801c6fdbd8SKent Overstreet bch2_writepage_do_io(&w); 15811c6fdbd8SKent Overstreet blk_finish_plug(&plug); 158249fe78ffSKent Overstreet kfree(w.tmp); 15835c1ef830SKent Overstreet return bch2_err_class(ret); 15841c6fdbd8SKent Overstreet } 15851c6fdbd8SKent Overstreet 15861c6fdbd8SKent Overstreet /* buffered writes: */ 15871c6fdbd8SKent Overstreet 15881c6fdbd8SKent Overstreet int bch2_write_begin(struct file *file, struct address_space *mapping, 15891c6fdbd8SKent Overstreet loff_t pos, unsigned len, 15901c6fdbd8SKent Overstreet struct page **pagep, void **fsdata) 15911c6fdbd8SKent Overstreet { 15921c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 15931c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 159430bff594SKent Overstreet struct bch2_folio_reservation *res; 159530bff594SKent Overstreet struct folio *folio; 159633e2eb96SKent Overstreet unsigned offset; 15971c6fdbd8SKent Overstreet int ret = -ENOMEM; 15981c6fdbd8SKent Overstreet 1599d1542e03SKent Overstreet res = kmalloc(sizeof(*res), GFP_KERNEL); 1600d1542e03SKent Overstreet if (!res) 1601d1542e03SKent Overstreet return -ENOMEM; 1602d1542e03SKent Overstreet 160330bff594SKent Overstreet bch2_folio_reservation_init(c, inode, res); 1604d1542e03SKent Overstreet *fsdata = res; 16051c6fdbd8SKent Overstreet 1606a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 16071c6fdbd8SKent Overstreet 160833e2eb96SKent Overstreet folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, 160930bff594SKent Overstreet FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, 161030bff594SKent Overstreet mapping_gfp_mask(mapping)); 161130bff594SKent Overstreet if (!folio) 16121c6fdbd8SKent Overstreet goto err_unlock; 16131c6fdbd8SKent Overstreet 161430bff594SKent Overstreet if (folio_test_uptodate(folio)) 16151c6fdbd8SKent Overstreet goto out; 16161c6fdbd8SKent Overstreet 161733e2eb96SKent Overstreet offset = pos - folio_pos(folio); 161833e2eb96SKent Overstreet len = min_t(size_t, len, folio_end_pos(folio) - pos); 161933e2eb96SKent Overstreet 162030bff594SKent Overstreet /* If we're writing entire folio, don't need to read it in first: */ 162133e2eb96SKent Overstreet if (!offset && len == folio_size(folio)) 16221c6fdbd8SKent Overstreet goto out; 16231c6fdbd8SKent Overstreet 16241c6fdbd8SKent Overstreet if (!offset && pos + len >= inode->v.i_size) { 162530bff594SKent Overstreet folio_zero_segment(folio, len, folio_size(folio)); 162630bff594SKent Overstreet flush_dcache_folio(folio); 16271c6fdbd8SKent Overstreet goto out; 16281c6fdbd8SKent Overstreet } 16291c6fdbd8SKent Overstreet 163033e2eb96SKent Overstreet if (folio_pos(folio) >= inode->v.i_size) { 163130bff594SKent Overstreet folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); 163230bff594SKent Overstreet flush_dcache_folio(folio); 16331c6fdbd8SKent Overstreet goto out; 16341c6fdbd8SKent Overstreet } 16351c6fdbd8SKent Overstreet readpage: 163630bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 16371c6fdbd8SKent Overstreet if (ret) 16381c6fdbd8SKent Overstreet goto err; 16391c6fdbd8SKent Overstreet out: 164030bff594SKent Overstreet if (!bch2_folio_create(folio, __GFP_NOFAIL)->uptodate) { 164130bff594SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 1642e6ec361fSKent Overstreet if (ret) 16433a4d3656SKent Overstreet goto err; 1644e6ec361fSKent Overstreet } 1645e6ec361fSKent Overstreet 164630bff594SKent Overstreet ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); 16471c6fdbd8SKent Overstreet if (ret) { 164830bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 16491c6fdbd8SKent Overstreet /* 165030bff594SKent Overstreet * If the folio hasn't been read in, we won't know if we 16511c6fdbd8SKent Overstreet * actually need a reservation - we don't actually need 165230bff594SKent Overstreet * to read here, we just need to check if the folio is 16531c6fdbd8SKent Overstreet * fully backed by uncompressed data: 16541c6fdbd8SKent Overstreet */ 16551c6fdbd8SKent Overstreet goto readpage; 16561c6fdbd8SKent Overstreet } 16571c6fdbd8SKent Overstreet 16581c6fdbd8SKent Overstreet goto err; 16591c6fdbd8SKent Overstreet } 16601c6fdbd8SKent Overstreet 166130bff594SKent Overstreet *pagep = &folio->page; 16621c6fdbd8SKent Overstreet return 0; 16631c6fdbd8SKent Overstreet err: 166430bff594SKent Overstreet folio_unlock(folio); 166530bff594SKent Overstreet folio_put(folio); 16661c6fdbd8SKent Overstreet *pagep = NULL; 16671c6fdbd8SKent Overstreet err_unlock: 1668a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 1669d1542e03SKent Overstreet kfree(res); 1670d1542e03SKent Overstreet *fsdata = NULL; 16715c1ef830SKent Overstreet return bch2_err_class(ret); 16721c6fdbd8SKent Overstreet } 16731c6fdbd8SKent Overstreet 16741c6fdbd8SKent Overstreet int bch2_write_end(struct file *file, struct address_space *mapping, 16751c6fdbd8SKent Overstreet loff_t pos, unsigned len, unsigned copied, 16761c6fdbd8SKent Overstreet struct page *page, void *fsdata) 16771c6fdbd8SKent Overstreet { 16781c6fdbd8SKent Overstreet struct bch_inode_info *inode = to_bch_ei(mapping->host); 16791c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 168030bff594SKent Overstreet struct bch2_folio_reservation *res = fsdata; 168130bff594SKent Overstreet struct folio *folio = page_folio(page); 168233e2eb96SKent Overstreet unsigned offset = pos - folio_pos(folio); 16831c6fdbd8SKent Overstreet 16841c6fdbd8SKent Overstreet lockdep_assert_held(&inode->v.i_rwsem); 168533e2eb96SKent Overstreet BUG_ON(offset + copied > folio_size(folio)); 16861c6fdbd8SKent Overstreet 168730bff594SKent Overstreet if (unlikely(copied < len && !folio_test_uptodate(folio))) { 16881c6fdbd8SKent Overstreet /* 168930bff594SKent Overstreet * The folio needs to be read in, but that would destroy 16901c6fdbd8SKent Overstreet * our partial write - simplest thing is to just force 16911c6fdbd8SKent Overstreet * userspace to redo the write: 16921c6fdbd8SKent Overstreet */ 169330bff594SKent Overstreet folio_zero_range(folio, 0, folio_size(folio)); 169430bff594SKent Overstreet flush_dcache_folio(folio); 16951c6fdbd8SKent Overstreet copied = 0; 16961c6fdbd8SKent Overstreet } 16971c6fdbd8SKent Overstreet 16981c6fdbd8SKent Overstreet spin_lock(&inode->v.i_lock); 16991c6fdbd8SKent Overstreet if (pos + copied > inode->v.i_size) 17001c6fdbd8SKent Overstreet i_size_write(&inode->v, pos + copied); 17011c6fdbd8SKent Overstreet spin_unlock(&inode->v.i_lock); 17021c6fdbd8SKent Overstreet 17031c6fdbd8SKent Overstreet if (copied) { 170430bff594SKent Overstreet if (!folio_test_uptodate(folio)) 170530bff594SKent Overstreet folio_mark_uptodate(folio); 1706d1542e03SKent Overstreet 170730bff594SKent Overstreet bch2_set_folio_dirty(c, inode, folio, res, offset, copied); 17081c6fdbd8SKent Overstreet 17091c6fdbd8SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 17101c6fdbd8SKent Overstreet } 17111c6fdbd8SKent Overstreet 171230bff594SKent Overstreet folio_unlock(folio); 171330bff594SKent Overstreet folio_put(folio); 1714a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 17151c6fdbd8SKent Overstreet 171630bff594SKent Overstreet bch2_folio_reservation_put(c, inode, res); 1717d1542e03SKent Overstreet kfree(res); 1718d1542e03SKent Overstreet 17191c6fdbd8SKent Overstreet return copied; 17201c6fdbd8SKent Overstreet } 17211c6fdbd8SKent Overstreet 1722c42b57c4SKent Overstreet typedef DARRAY(struct folio *) folios; 1723c42b57c4SKent Overstreet 1724c42b57c4SKent Overstreet static noinline void folios_trunc(folios *folios, struct folio **fi) 1725c42b57c4SKent Overstreet { 1726c42b57c4SKent Overstreet while (folios->data + folios->nr > fi) { 1727c42b57c4SKent Overstreet struct folio *f = darray_pop(folios); 1728c42b57c4SKent Overstreet 1729c42b57c4SKent Overstreet folio_unlock(f); 1730c42b57c4SKent Overstreet folio_put(f); 1731c42b57c4SKent Overstreet } 1732c42b57c4SKent Overstreet } 17331c6fdbd8SKent Overstreet 17341c6fdbd8SKent Overstreet static int __bch2_buffered_write(struct bch_inode_info *inode, 17351c6fdbd8SKent Overstreet struct address_space *mapping, 17361c6fdbd8SKent Overstreet struct iov_iter *iter, 17371c6fdbd8SKent Overstreet loff_t pos, unsigned len) 17381c6fdbd8SKent Overstreet { 17391c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 174030bff594SKent Overstreet struct bch2_folio_reservation res; 1741c42b57c4SKent Overstreet folios folios; 1742c42b57c4SKent Overstreet struct folio **fi, *f; 1743c42b57c4SKent Overstreet unsigned copied = 0, f_offset; 1744c42b57c4SKent Overstreet loff_t end = pos + len, f_pos; 17451c6fdbd8SKent Overstreet int ret = 0; 17461c6fdbd8SKent Overstreet 17471c6fdbd8SKent Overstreet BUG_ON(!len); 17481c6fdbd8SKent Overstreet 174930bff594SKent Overstreet bch2_folio_reservation_init(c, inode, &res); 1750c42b57c4SKent Overstreet darray_init(&folios); 1751d1542e03SKent Overstreet 1752c42b57c4SKent Overstreet f_pos = pos; 1753c42b57c4SKent Overstreet while (f_pos < end) { 1754c42b57c4SKent Overstreet unsigned fgp_flags = FGP_LOCK|FGP_WRITE|FGP_STABLE; 1755c42b57c4SKent Overstreet 1756c42b57c4SKent Overstreet if ((u64) f_pos < (u64) pos + (1U << 20)) 1757c42b57c4SKent Overstreet fgp_flags |= FGP_CREAT; 1758c42b57c4SKent Overstreet 1759c42b57c4SKent Overstreet if (darray_make_room_gfp(&folios, 1, 1760c42b57c4SKent Overstreet mapping_gfp_mask(mapping) & GFP_KERNEL)) 1761c42b57c4SKent Overstreet break; 1762c42b57c4SKent Overstreet 1763c42b57c4SKent Overstreet f = __filemap_get_folio(mapping, f_pos >> PAGE_SHIFT, 1764c42b57c4SKent Overstreet fgp_flags, mapping_gfp_mask(mapping)); 1765c42b57c4SKent Overstreet if (!f) 1766c42b57c4SKent Overstreet break; 1767c42b57c4SKent Overstreet 1768c42b57c4SKent Overstreet BUG_ON(folios.nr && folio_pos(f) != f_pos); 1769c42b57c4SKent Overstreet 1770c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1771c42b57c4SKent Overstreet darray_push(&folios, f); 1772c42b57c4SKent Overstreet } 1773c42b57c4SKent Overstreet 1774c42b57c4SKent Overstreet end = min(end, f_pos); 1775c42b57c4SKent Overstreet if (end == pos) { 17761c6fdbd8SKent Overstreet ret = -ENOMEM; 17771c6fdbd8SKent Overstreet goto out; 17781c6fdbd8SKent Overstreet } 17791c6fdbd8SKent Overstreet 1780c42b57c4SKent Overstreet f = darray_first(folios); 1781c42b57c4SKent Overstreet if (pos != folio_pos(f) && !folio_test_uptodate(f)) { 1782c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 17831c6fdbd8SKent Overstreet if (ret) 17841c6fdbd8SKent Overstreet goto out; 17851c6fdbd8SKent Overstreet } 17861c6fdbd8SKent Overstreet 1787c42b57c4SKent Overstreet f = darray_last(folios); 1788c42b57c4SKent Overstreet if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { 1789c42b57c4SKent Overstreet if (end >= inode->v.i_size) { 1790c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 17911c6fdbd8SKent Overstreet } else { 1792c42b57c4SKent Overstreet ret = bch2_read_single_folio(f, mapping); 17931c6fdbd8SKent Overstreet if (ret) 17941c6fdbd8SKent Overstreet goto out; 17951c6fdbd8SKent Overstreet } 17961c6fdbd8SKent Overstreet } 17971c6fdbd8SKent Overstreet 1798c42b57c4SKent Overstreet f_pos = pos; 1799c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1800c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1801c42b57c4SKent Overstreet struct folio *f = *fi; 1802c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 18031c6fdbd8SKent Overstreet 1804c42b57c4SKent Overstreet if (!bch2_folio_create(f, __GFP_NOFAIL)->uptodate) { 1805c42b57c4SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), fi, 1806c42b57c4SKent Overstreet folios.data + folios.nr - fi); 1807e6ec361fSKent Overstreet if (ret) 1808e6ec361fSKent Overstreet goto out; 18091c6fdbd8SKent Overstreet } 18101c6fdbd8SKent Overstreet 1811353448f3SKent Overstreet /* 1812353448f3SKent Overstreet * XXX: per POSIX and fstests generic/275, on -ENOSPC we're 1813353448f3SKent Overstreet * supposed to write as much as we have disk space for. 1814353448f3SKent Overstreet * 1815353448f3SKent Overstreet * On failure here we should still write out a partial page if 1816353448f3SKent Overstreet * we aren't completely out of disk space - we don't do that 1817353448f3SKent Overstreet * yet: 1818353448f3SKent Overstreet */ 1819c42b57c4SKent Overstreet ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); 1820353448f3SKent Overstreet if (unlikely(ret)) { 1821c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1822c42b57c4SKent Overstreet if (!folios.nr) 18231c6fdbd8SKent Overstreet goto out; 1824c42b57c4SKent Overstreet 1825c42b57c4SKent Overstreet end = min(end, folio_end_pos(darray_last(folios))); 1826353448f3SKent Overstreet break; 1827353448f3SKent Overstreet } 1828d1542e03SKent Overstreet 1829c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1830c42b57c4SKent Overstreet f_offset = 0; 18311c6fdbd8SKent Overstreet } 18321c6fdbd8SKent Overstreet 18331c6fdbd8SKent Overstreet if (mapping_writably_mapped(mapping)) 1834c42b57c4SKent Overstreet darray_for_each(folios, fi) 1835c42b57c4SKent Overstreet flush_dcache_folio(*fi); 18361c6fdbd8SKent Overstreet 1837c42b57c4SKent Overstreet f_pos = pos; 1838c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1839c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1840c42b57c4SKent Overstreet struct folio *f = *fi; 1841c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 1842c42b57c4SKent Overstreet unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); 1843d1542e03SKent Overstreet 1844c42b57c4SKent Overstreet if (!f_copied) { 1845c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1846912bdf17SKent Overstreet break; 1847912bdf17SKent Overstreet } 1848912bdf17SKent Overstreet 1849c42b57c4SKent Overstreet if (!folio_test_uptodate(f) && 1850c42b57c4SKent Overstreet f_copied != folio_size(f) && 1851c42b57c4SKent Overstreet pos + copied + f_copied < inode->v.i_size) { 1852c42b57c4SKent Overstreet folio_zero_range(f, 0, folio_size(f)); 1853c42b57c4SKent Overstreet folios_trunc(&folios, fi); 1854912bdf17SKent Overstreet break; 18551c6fdbd8SKent Overstreet } 18561c6fdbd8SKent Overstreet 1857c42b57c4SKent Overstreet flush_dcache_folio(f); 1858c42b57c4SKent Overstreet copied += f_copied; 1859c42b57c4SKent Overstreet 1860c42b57c4SKent Overstreet if (f_copied != f_len) { 1861c42b57c4SKent Overstreet folios_trunc(&folios, fi + 1); 1862c42b57c4SKent Overstreet break; 1863c42b57c4SKent Overstreet } 1864c42b57c4SKent Overstreet 1865c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1866c42b57c4SKent Overstreet f_offset = 0; 1867c42b57c4SKent Overstreet } 1868c42b57c4SKent Overstreet 18691c6fdbd8SKent Overstreet if (!copied) 18701c6fdbd8SKent Overstreet goto out; 18711c6fdbd8SKent Overstreet 1872c42b57c4SKent Overstreet end = pos + copied; 1873c42b57c4SKent Overstreet 1874877dfb34SKent Overstreet spin_lock(&inode->v.i_lock); 1875c42b57c4SKent Overstreet if (end > inode->v.i_size) 1876c42b57c4SKent Overstreet i_size_write(&inode->v, end); 1877877dfb34SKent Overstreet spin_unlock(&inode->v.i_lock); 1878877dfb34SKent Overstreet 1879c42b57c4SKent Overstreet f_pos = pos; 1880c42b57c4SKent Overstreet f_offset = pos - folio_pos(darray_first(folios)); 1881c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1882c42b57c4SKent Overstreet struct folio *f = *fi; 1883c42b57c4SKent Overstreet unsigned f_len = min(end, folio_end_pos(f)) - f_pos; 1884d1542e03SKent Overstreet 1885c42b57c4SKent Overstreet if (!folio_test_uptodate(f)) 1886c42b57c4SKent Overstreet folio_mark_uptodate(f); 1887d1542e03SKent Overstreet 1888c42b57c4SKent Overstreet bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); 1889d1542e03SKent Overstreet 1890c42b57c4SKent Overstreet f_pos = folio_end_pos(f); 1891c42b57c4SKent Overstreet f_offset = 0; 1892d1542e03SKent Overstreet } 1893877dfb34SKent Overstreet 1894877dfb34SKent Overstreet inode->ei_last_dirtied = (unsigned long) current; 18951c6fdbd8SKent Overstreet out: 1896c42b57c4SKent Overstreet darray_for_each(folios, fi) { 1897c42b57c4SKent Overstreet folio_unlock(*fi); 1898c42b57c4SKent Overstreet folio_put(*fi); 18991c6fdbd8SKent Overstreet } 19001c6fdbd8SKent Overstreet 1901c42b57c4SKent Overstreet darray_exit(&folios); 190230bff594SKent Overstreet bch2_folio_reservation_put(c, inode, &res); 19031c6fdbd8SKent Overstreet 19041c6fdbd8SKent Overstreet return copied ?: ret; 19051c6fdbd8SKent Overstreet } 19061c6fdbd8SKent Overstreet 19071c6fdbd8SKent Overstreet static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) 19081c6fdbd8SKent Overstreet { 19091c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 19101c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 19111c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 19121c6fdbd8SKent Overstreet loff_t pos = iocb->ki_pos; 19131c6fdbd8SKent Overstreet ssize_t written = 0; 19141c6fdbd8SKent Overstreet int ret = 0; 19151c6fdbd8SKent Overstreet 1916a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 19171c6fdbd8SKent Overstreet 19181c6fdbd8SKent Overstreet do { 19191c6fdbd8SKent Overstreet unsigned offset = pos & (PAGE_SIZE - 1); 1920c42b57c4SKent Overstreet unsigned bytes = iov_iter_count(iter); 19211c6fdbd8SKent Overstreet again: 19221c6fdbd8SKent Overstreet /* 19231c6fdbd8SKent Overstreet * Bring in the user page that we will copy from _first_. 19241c6fdbd8SKent Overstreet * Otherwise there's a nasty deadlock on copying from the 19251c6fdbd8SKent Overstreet * same page as we're writing to, without it being marked 19261c6fdbd8SKent Overstreet * up-to-date. 19271c6fdbd8SKent Overstreet * 19281c6fdbd8SKent Overstreet * Not only is this an optimisation, but it is also required 19291c6fdbd8SKent Overstreet * to check that the address is actually valid, when atomic 19301c6fdbd8SKent Overstreet * usercopies are used, below. 19311c6fdbd8SKent Overstreet */ 19321c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 19331c6fdbd8SKent Overstreet bytes = min_t(unsigned long, iov_iter_count(iter), 19341c6fdbd8SKent Overstreet PAGE_SIZE - offset); 19351c6fdbd8SKent Overstreet 19361c6fdbd8SKent Overstreet if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { 19371c6fdbd8SKent Overstreet ret = -EFAULT; 19381c6fdbd8SKent Overstreet break; 19391c6fdbd8SKent Overstreet } 19401c6fdbd8SKent Overstreet } 19411c6fdbd8SKent Overstreet 19421c6fdbd8SKent Overstreet if (unlikely(fatal_signal_pending(current))) { 19431c6fdbd8SKent Overstreet ret = -EINTR; 19441c6fdbd8SKent Overstreet break; 19451c6fdbd8SKent Overstreet } 19461c6fdbd8SKent Overstreet 19471c6fdbd8SKent Overstreet ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); 19481c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 19491c6fdbd8SKent Overstreet break; 19501c6fdbd8SKent Overstreet 19511c6fdbd8SKent Overstreet cond_resched(); 19521c6fdbd8SKent Overstreet 19531c6fdbd8SKent Overstreet if (unlikely(ret == 0)) { 19541c6fdbd8SKent Overstreet /* 19551c6fdbd8SKent Overstreet * If we were unable to copy any data at all, we must 19561c6fdbd8SKent Overstreet * fall back to a single segment length write. 19571c6fdbd8SKent Overstreet * 19581c6fdbd8SKent Overstreet * If we didn't fallback here, we could livelock 19591c6fdbd8SKent Overstreet * because not all segments in the iov can be copied at 19601c6fdbd8SKent Overstreet * once without a pagefault. 19611c6fdbd8SKent Overstreet */ 19621c6fdbd8SKent Overstreet bytes = min_t(unsigned long, PAGE_SIZE - offset, 19631c6fdbd8SKent Overstreet iov_iter_single_seg_count(iter)); 19641c6fdbd8SKent Overstreet goto again; 19651c6fdbd8SKent Overstreet } 19661c6fdbd8SKent Overstreet pos += ret; 19671c6fdbd8SKent Overstreet written += ret; 1968912bdf17SKent Overstreet ret = 0; 19691c6fdbd8SKent Overstreet 19701c6fdbd8SKent Overstreet balance_dirty_pages_ratelimited(mapping); 19711c6fdbd8SKent Overstreet } while (iov_iter_count(iter)); 19721c6fdbd8SKent Overstreet 1973a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 19741c6fdbd8SKent Overstreet 19751c6fdbd8SKent Overstreet return written ? written : ret; 19761c6fdbd8SKent Overstreet } 19771c6fdbd8SKent Overstreet 19781c6fdbd8SKent Overstreet /* O_DIRECT reads */ 19791c6fdbd8SKent Overstreet 1980b4725cc1SKent Overstreet static void bio_check_or_release(struct bio *bio, bool check_dirty) 1981b4725cc1SKent Overstreet { 1982b4725cc1SKent Overstreet if (check_dirty) { 1983b4725cc1SKent Overstreet bio_check_pages_dirty(bio); 1984b4725cc1SKent Overstreet } else { 1985b4725cc1SKent Overstreet bio_release_pages(bio, false); 1986b4725cc1SKent Overstreet bio_put(bio); 1987b4725cc1SKent Overstreet } 1988b4725cc1SKent Overstreet } 1989b4725cc1SKent Overstreet 19901c6fdbd8SKent Overstreet static void bch2_dio_read_complete(struct closure *cl) 19911c6fdbd8SKent Overstreet { 19921c6fdbd8SKent Overstreet struct dio_read *dio = container_of(cl, struct dio_read, cl); 19931c6fdbd8SKent Overstreet 19941c6fdbd8SKent Overstreet dio->req->ki_complete(dio->req, dio->ret); 1995b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 19961c6fdbd8SKent Overstreet } 19971c6fdbd8SKent Overstreet 19981c6fdbd8SKent Overstreet static void bch2_direct_IO_read_endio(struct bio *bio) 19991c6fdbd8SKent Overstreet { 20001c6fdbd8SKent Overstreet struct dio_read *dio = bio->bi_private; 20011c6fdbd8SKent Overstreet 20021c6fdbd8SKent Overstreet if (bio->bi_status) 20031c6fdbd8SKent Overstreet dio->ret = blk_status_to_errno(bio->bi_status); 20041c6fdbd8SKent Overstreet 20051c6fdbd8SKent Overstreet closure_put(&dio->cl); 20061c6fdbd8SKent Overstreet } 20071c6fdbd8SKent Overstreet 20081c6fdbd8SKent Overstreet static void bch2_direct_IO_read_split_endio(struct bio *bio) 20091c6fdbd8SKent Overstreet { 2010b4725cc1SKent Overstreet struct dio_read *dio = bio->bi_private; 2011b4725cc1SKent Overstreet bool should_dirty = dio->should_dirty; 2012b4725cc1SKent Overstreet 20131c6fdbd8SKent Overstreet bch2_direct_IO_read_endio(bio); 2014b4725cc1SKent Overstreet bio_check_or_release(bio, should_dirty); 20151c6fdbd8SKent Overstreet } 20161c6fdbd8SKent Overstreet 20171c6fdbd8SKent Overstreet static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) 20181c6fdbd8SKent Overstreet { 20191c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 20201c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 20211c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 202201ad6737SKent Overstreet struct bch_io_opts opts; 20231c6fdbd8SKent Overstreet struct dio_read *dio; 20241c6fdbd8SKent Overstreet struct bio *bio; 20251c6fdbd8SKent Overstreet loff_t offset = req->ki_pos; 20261c6fdbd8SKent Overstreet bool sync = is_sync_kiocb(req); 20271c6fdbd8SKent Overstreet size_t shorten; 20281c6fdbd8SKent Overstreet ssize_t ret; 20291c6fdbd8SKent Overstreet 203001ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 203101ad6737SKent Overstreet 20321c6fdbd8SKent Overstreet if ((offset|iter->count) & (block_bytes(c) - 1)) 20331c6fdbd8SKent Overstreet return -EINVAL; 20341c6fdbd8SKent Overstreet 20351c6fdbd8SKent Overstreet ret = min_t(loff_t, iter->count, 20361c6fdbd8SKent Overstreet max_t(loff_t, 0, i_size_read(&inode->v) - offset)); 20371c6fdbd8SKent Overstreet 20381c6fdbd8SKent Overstreet if (!ret) 20391c6fdbd8SKent Overstreet return ret; 20401c6fdbd8SKent Overstreet 20411c6fdbd8SKent Overstreet shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); 20421c6fdbd8SKent Overstreet iter->count -= shorten; 20431c6fdbd8SKent Overstreet 20441c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 20454d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 20461c6fdbd8SKent Overstreet REQ_OP_READ, 20471c6fdbd8SKent Overstreet GFP_KERNEL, 20481c6fdbd8SKent Overstreet &c->dio_read_bioset); 20491c6fdbd8SKent Overstreet 20501c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_endio; 20511c6fdbd8SKent Overstreet 20521c6fdbd8SKent Overstreet dio = container_of(bio, struct dio_read, rbio.bio); 20531c6fdbd8SKent Overstreet closure_init(&dio->cl, NULL); 20541c6fdbd8SKent Overstreet 20551c6fdbd8SKent Overstreet /* 20561c6fdbd8SKent Overstreet * this is a _really_ horrible hack just to avoid an atomic sub at the 20571c6fdbd8SKent Overstreet * end: 20581c6fdbd8SKent Overstreet */ 20591c6fdbd8SKent Overstreet if (!sync) { 20601c6fdbd8SKent Overstreet set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); 20611c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 20621c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER - 20631c6fdbd8SKent Overstreet CLOSURE_RUNNING + 20641c6fdbd8SKent Overstreet CLOSURE_DESTRUCTOR); 20651c6fdbd8SKent Overstreet } else { 20661c6fdbd8SKent Overstreet atomic_set(&dio->cl.remaining, 20671c6fdbd8SKent Overstreet CLOSURE_REMAINING_INITIALIZER + 1); 20681c6fdbd8SKent Overstreet } 20691c6fdbd8SKent Overstreet 20701c6fdbd8SKent Overstreet dio->req = req; 20711c6fdbd8SKent Overstreet dio->ret = ret; 2072b4725cc1SKent Overstreet /* 2073b4725cc1SKent Overstreet * This is one of the sketchier things I've encountered: we have to skip 2074b4725cc1SKent Overstreet * the dirtying of requests that are internal from the kernel (i.e. from 2075b4725cc1SKent Overstreet * loopback), because we'll deadlock on page_lock. 2076b4725cc1SKent Overstreet */ 2077b4725cc1SKent Overstreet dio->should_dirty = iter_is_iovec(iter); 20781c6fdbd8SKent Overstreet 20791c6fdbd8SKent Overstreet goto start; 20801c6fdbd8SKent Overstreet while (iter->count) { 20811c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 20824d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 20831c6fdbd8SKent Overstreet REQ_OP_READ, 20841c6fdbd8SKent Overstreet GFP_KERNEL, 20851c6fdbd8SKent Overstreet &c->bio_read); 20861c6fdbd8SKent Overstreet bio->bi_end_io = bch2_direct_IO_read_split_endio; 20871c6fdbd8SKent Overstreet start: 20881c6fdbd8SKent Overstreet bio->bi_opf = REQ_OP_READ|REQ_SYNC; 20891c6fdbd8SKent Overstreet bio->bi_iter.bi_sector = offset >> 9; 20901c6fdbd8SKent Overstreet bio->bi_private = dio; 20911c6fdbd8SKent Overstreet 20921c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, iter); 20931c6fdbd8SKent Overstreet if (ret < 0) { 20941c6fdbd8SKent Overstreet /* XXX: fault inject this path */ 20951c6fdbd8SKent Overstreet bio->bi_status = BLK_STS_RESOURCE; 20961c6fdbd8SKent Overstreet bio_endio(bio); 20971c6fdbd8SKent Overstreet break; 20981c6fdbd8SKent Overstreet } 20991c6fdbd8SKent Overstreet 21001c6fdbd8SKent Overstreet offset += bio->bi_iter.bi_size; 2101b4725cc1SKent Overstreet 2102b4725cc1SKent Overstreet if (dio->should_dirty) 21031c6fdbd8SKent Overstreet bio_set_pages_dirty(bio); 21041c6fdbd8SKent Overstreet 21051c6fdbd8SKent Overstreet if (iter->count) 21061c6fdbd8SKent Overstreet closure_get(&dio->cl); 21071c6fdbd8SKent Overstreet 21088c6d298aSKent Overstreet bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); 21091c6fdbd8SKent Overstreet } 21101c6fdbd8SKent Overstreet 21111c6fdbd8SKent Overstreet iter->count += shorten; 21121c6fdbd8SKent Overstreet 21131c6fdbd8SKent Overstreet if (sync) { 21141c6fdbd8SKent Overstreet closure_sync(&dio->cl); 21151c6fdbd8SKent Overstreet closure_debug_destroy(&dio->cl); 21161c6fdbd8SKent Overstreet ret = dio->ret; 2117b4725cc1SKent Overstreet bio_check_or_release(&dio->rbio.bio, dio->should_dirty); 21181c6fdbd8SKent Overstreet return ret; 21191c6fdbd8SKent Overstreet } else { 21201c6fdbd8SKent Overstreet return -EIOCBQUEUED; 21211c6fdbd8SKent Overstreet } 21221c6fdbd8SKent Overstreet } 21231c6fdbd8SKent Overstreet 21241c6fdbd8SKent Overstreet ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) 21251c6fdbd8SKent Overstreet { 21261c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 21271c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 21281c6fdbd8SKent Overstreet struct address_space *mapping = file->f_mapping; 21291c6fdbd8SKent Overstreet size_t count = iov_iter_count(iter); 21301c6fdbd8SKent Overstreet ssize_t ret; 21311c6fdbd8SKent Overstreet 21321c6fdbd8SKent Overstreet if (!count) 21331c6fdbd8SKent Overstreet return 0; /* skip atime */ 21341c6fdbd8SKent Overstreet 21351c6fdbd8SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 21361c6fdbd8SKent Overstreet struct blk_plug plug; 21371c6fdbd8SKent Overstreet 2138a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 21391c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 21401c6fdbd8SKent Overstreet iocb->ki_pos, 21411c6fdbd8SKent Overstreet iocb->ki_pos + count - 1); 21421c6fdbd8SKent Overstreet if (ret < 0) 21435c1ef830SKent Overstreet goto out; 2144a023127aSKent Overstreet } 21451c6fdbd8SKent Overstreet 21461c6fdbd8SKent Overstreet file_accessed(file); 21471c6fdbd8SKent Overstreet 21481c6fdbd8SKent Overstreet blk_start_plug(&plug); 21491c6fdbd8SKent Overstreet ret = bch2_direct_IO_read(iocb, iter); 21501c6fdbd8SKent Overstreet blk_finish_plug(&plug); 21511c6fdbd8SKent Overstreet 21521c6fdbd8SKent Overstreet if (ret >= 0) 21531c6fdbd8SKent Overstreet iocb->ki_pos += ret; 21541c6fdbd8SKent Overstreet } else { 2155a7ecd30cSKent Overstreet bch2_pagecache_add_get(inode); 21561c6fdbd8SKent Overstreet ret = generic_file_read_iter(iocb, iter); 2157a7ecd30cSKent Overstreet bch2_pagecache_add_put(inode); 21581c6fdbd8SKent Overstreet } 21595c1ef830SKent Overstreet out: 21605c1ef830SKent Overstreet return bch2_err_class(ret); 21611c6fdbd8SKent Overstreet } 21621c6fdbd8SKent Overstreet 21631c6fdbd8SKent Overstreet /* O_DIRECT writes */ 21641c6fdbd8SKent Overstreet 21656fed42bbSKent Overstreet static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, 21666fed42bbSKent Overstreet u64 offset, u64 size, 21676fed42bbSKent Overstreet unsigned nr_replicas, bool compressed) 21686fed42bbSKent Overstreet { 21696fed42bbSKent Overstreet struct btree_trans trans; 21706fed42bbSKent Overstreet struct btree_iter iter; 21716fed42bbSKent Overstreet struct bkey_s_c k; 21726fed42bbSKent Overstreet u64 end = offset + size; 21736fed42bbSKent Overstreet u32 snapshot; 21746fed42bbSKent Overstreet bool ret = true; 21756fed42bbSKent Overstreet int err; 21766fed42bbSKent Overstreet 21776fed42bbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 21786fed42bbSKent Overstreet retry: 21796fed42bbSKent Overstreet bch2_trans_begin(&trans); 21806fed42bbSKent Overstreet 21816fed42bbSKent Overstreet err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 21826fed42bbSKent Overstreet if (err) 21836fed42bbSKent Overstreet goto err; 21846fed42bbSKent Overstreet 2185e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 21866fed42bbSKent Overstreet SPOS(inum.inum, offset, snapshot), 21876fed42bbSKent Overstreet BTREE_ITER_SLOTS, k, err) { 2188e88a75ebSKent Overstreet if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) 21896fed42bbSKent Overstreet break; 21906fed42bbSKent Overstreet 21918c6d298aSKent Overstreet if (k.k->p.snapshot != snapshot || 21928c6d298aSKent Overstreet nr_replicas > bch2_bkey_replicas(c, k) || 21936fed42bbSKent Overstreet (!compressed && bch2_bkey_sectors_compressed(k))) { 21946fed42bbSKent Overstreet ret = false; 21956fed42bbSKent Overstreet break; 21966fed42bbSKent Overstreet } 21976fed42bbSKent Overstreet } 21986fed42bbSKent Overstreet 21996fed42bbSKent Overstreet offset = iter.pos.offset; 22006fed42bbSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 22016fed42bbSKent Overstreet err: 2202549d173cSKent Overstreet if (bch2_err_matches(err, BCH_ERR_transaction_restart)) 22036fed42bbSKent Overstreet goto retry; 22046fed42bbSKent Overstreet bch2_trans_exit(&trans); 22056fed42bbSKent Overstreet 22066fed42bbSKent Overstreet return err ? false : ret; 22076fed42bbSKent Overstreet } 22086fed42bbSKent Overstreet 2209182c7bbfSKent Overstreet static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) 2210182c7bbfSKent Overstreet { 2211182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2212182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2213182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2214182c7bbfSKent Overstreet 2215182c7bbfSKent Overstreet return bch2_check_range_allocated(c, inode_inum(inode), 2216182c7bbfSKent Overstreet dio->op.pos.offset, bio_sectors(bio), 2217182c7bbfSKent Overstreet dio->op.opts.data_replicas, 2218182c7bbfSKent Overstreet dio->op.opts.compression != 0); 2219182c7bbfSKent Overstreet } 2220182c7bbfSKent Overstreet 2221a1ee777bSKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *); 2222a1ee777bSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio); 2223a1ee777bSKent Overstreet 22241c6fdbd8SKent Overstreet /* 22251c6fdbd8SKent Overstreet * We're going to return -EIOCBQUEUED, but we haven't finished consuming the 22261c6fdbd8SKent Overstreet * iov_iter yet, so we need to stash a copy of the iovec: it might be on the 22271c6fdbd8SKent Overstreet * caller's stack, we're not guaranteed that it will live for the duration of 22281c6fdbd8SKent Overstreet * the IO: 22291c6fdbd8SKent Overstreet */ 22301c6fdbd8SKent Overstreet static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) 22311c6fdbd8SKent Overstreet { 22321c6fdbd8SKent Overstreet struct iovec *iov = dio->inline_vecs; 22331c6fdbd8SKent Overstreet 22341c6fdbd8SKent Overstreet /* 22351c6fdbd8SKent Overstreet * iov_iter has a single embedded iovec - nothing to do: 22361c6fdbd8SKent Overstreet */ 22371c6fdbd8SKent Overstreet if (iter_is_ubuf(&dio->iter)) 22381c6fdbd8SKent Overstreet return 0; 22391c6fdbd8SKent Overstreet 22401c6fdbd8SKent Overstreet /* 22411c6fdbd8SKent Overstreet * We don't currently handle non-iovec iov_iters here - return an error, 22421c6fdbd8SKent Overstreet * and we'll fall back to doing the IO synchronously: 22431c6fdbd8SKent Overstreet */ 22441c6fdbd8SKent Overstreet if (!iter_is_iovec(&dio->iter)) 22451c6fdbd8SKent Overstreet return -1; 22461c6fdbd8SKent Overstreet 22471c6fdbd8SKent Overstreet if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { 22481c6fdbd8SKent Overstreet iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), 22491c6fdbd8SKent Overstreet GFP_KERNEL); 22501c6fdbd8SKent Overstreet if (unlikely(!iov)) 22511c6fdbd8SKent Overstreet return -ENOMEM; 22521c6fdbd8SKent Overstreet 22531c6fdbd8SKent Overstreet dio->free_iov = true; 22541c6fdbd8SKent Overstreet } 22551c6fdbd8SKent Overstreet 22561c6fdbd8SKent Overstreet memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); 22571c6fdbd8SKent Overstreet dio->iter.__iov = iov; 22581c6fdbd8SKent Overstreet return 0; 22591c6fdbd8SKent Overstreet } 22601c6fdbd8SKent Overstreet 2261a1ee777bSKent Overstreet static void bch2_dio_write_flush_done(struct closure *cl) 2262a1ee777bSKent Overstreet { 2263a1ee777bSKent Overstreet struct dio_write *dio = container_of(cl, struct dio_write, op.cl); 2264a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2265a1ee777bSKent Overstreet 2266a1ee777bSKent Overstreet closure_debug_destroy(cl); 2267a1ee777bSKent Overstreet 2268a1ee777bSKent Overstreet dio->op.error = bch2_journal_error(&c->journal); 2269a1ee777bSKent Overstreet 2270a1ee777bSKent Overstreet bch2_dio_write_done(dio); 2271a1ee777bSKent Overstreet } 2272a1ee777bSKent Overstreet 2273a1ee777bSKent Overstreet static noinline void bch2_dio_write_flush(struct dio_write *dio) 2274a1ee777bSKent Overstreet { 2275a1ee777bSKent Overstreet struct bch_fs *c = dio->op.c; 2276a1ee777bSKent Overstreet struct bch_inode_unpacked inode; 2277a1ee777bSKent Overstreet int ret; 2278a1ee777bSKent Overstreet 2279a1ee777bSKent Overstreet dio->flush = 0; 2280a1ee777bSKent Overstreet 2281a1ee777bSKent Overstreet closure_init(&dio->op.cl, NULL); 2282a1ee777bSKent Overstreet 2283a1ee777bSKent Overstreet if (!dio->op.error) { 2284a1ee777bSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); 2285a8b3a677SKent Overstreet if (ret) { 2286a1ee777bSKent Overstreet dio->op.error = ret; 2287a8b3a677SKent Overstreet } else { 2288a1ee777bSKent Overstreet bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl); 2289a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); 2290a8b3a677SKent Overstreet } 2291a1ee777bSKent Overstreet } 2292a1ee777bSKent Overstreet 2293a1ee777bSKent Overstreet if (dio->sync) { 2294a1ee777bSKent Overstreet closure_sync(&dio->op.cl); 2295a1ee777bSKent Overstreet closure_debug_destroy(&dio->op.cl); 2296a1ee777bSKent Overstreet } else { 2297a1ee777bSKent Overstreet continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); 2298a1ee777bSKent Overstreet } 2299a1ee777bSKent Overstreet } 2300042a1f26SKent Overstreet 2301182c7bbfSKent Overstreet static __always_inline long bch2_dio_write_done(struct dio_write *dio) 2302182c7bbfSKent Overstreet { 2303182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2304182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2305182c7bbfSKent Overstreet bool sync = dio->sync; 2306a1ee777bSKent Overstreet long ret; 2307a1ee777bSKent Overstreet 2308a1ee777bSKent Overstreet if (unlikely(dio->flush)) { 2309a1ee777bSKent Overstreet bch2_dio_write_flush(dio); 2310a1ee777bSKent Overstreet if (!sync) 2311a1ee777bSKent Overstreet return -EIOCBQUEUED; 2312a1ee777bSKent Overstreet } 2313182c7bbfSKent Overstreet 2314a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 2315182c7bbfSKent Overstreet 2316182c7bbfSKent Overstreet if (dio->free_iov) 2317182c7bbfSKent Overstreet kfree(dio->iter.__iov); 2318a1ee777bSKent Overstreet 2319a1ee777bSKent Overstreet ret = dio->op.error ?: ((long) dio->written << 9); 2320182c7bbfSKent Overstreet bio_put(&dio->op.wbio.bio); 2321182c7bbfSKent Overstreet 2322182c7bbfSKent Overstreet /* inode->i_dio_count is our ref on inode and thus bch_fs */ 2323182c7bbfSKent Overstreet inode_dio_end(&inode->v); 2324182c7bbfSKent Overstreet 2325182c7bbfSKent Overstreet if (ret < 0) 2326182c7bbfSKent Overstreet ret = bch2_err_class(ret); 2327182c7bbfSKent Overstreet 2328182c7bbfSKent Overstreet if (!sync) { 2329182c7bbfSKent Overstreet req->ki_complete(req, ret); 2330182c7bbfSKent Overstreet ret = -EIOCBQUEUED; 2331182c7bbfSKent Overstreet } 2332182c7bbfSKent Overstreet return ret; 2333182c7bbfSKent Overstreet } 2334182c7bbfSKent Overstreet 2335182c7bbfSKent Overstreet static __always_inline void bch2_dio_write_end(struct dio_write *dio) 2336182c7bbfSKent Overstreet { 2337182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 2338182c7bbfSKent Overstreet struct kiocb *req = dio->req; 2339182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 2340182c7bbfSKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2341182c7bbfSKent Overstreet 2342182c7bbfSKent Overstreet req->ki_pos += (u64) dio->op.written << 9; 2343182c7bbfSKent Overstreet dio->written += dio->op.written; 2344182c7bbfSKent Overstreet 23456b1b186aSKent Overstreet if (dio->extending) { 2346182c7bbfSKent Overstreet spin_lock(&inode->v.i_lock); 2347182c7bbfSKent Overstreet if (req->ki_pos > inode->v.i_size) 2348182c7bbfSKent Overstreet i_size_write(&inode->v, req->ki_pos); 2349182c7bbfSKent Overstreet spin_unlock(&inode->v.i_lock); 23506b1b186aSKent Overstreet } 23516b1b186aSKent Overstreet 23526b1b186aSKent Overstreet if (dio->op.i_sectors_delta || dio->quota_res.sectors) { 23536b1b186aSKent Overstreet mutex_lock(&inode->ei_quota_lock); 23546b1b186aSKent Overstreet __i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); 23556b1b186aSKent Overstreet __bch2_quota_reservation_put(c, inode, &dio->quota_res); 23566b1b186aSKent Overstreet mutex_unlock(&inode->ei_quota_lock); 23576b1b186aSKent Overstreet } 2358182c7bbfSKent Overstreet 2359182c7bbfSKent Overstreet bio_release_pages(bio, false); 2360182c7bbfSKent Overstreet 2361182c7bbfSKent Overstreet if (unlikely(dio->op.error)) 2362182c7bbfSKent Overstreet set_bit(EI_INODE_ERROR, &inode->ei_flags); 2363182c7bbfSKent Overstreet } 2364182c7bbfSKent Overstreet 23654d868d18SKent Overstreet static __always_inline long bch2_dio_write_loop(struct dio_write *dio) 23661c6fdbd8SKent Overstreet { 2367182c7bbfSKent Overstreet struct bch_fs *c = dio->op.c; 23681c6fdbd8SKent Overstreet struct kiocb *req = dio->req; 2369182c7bbfSKent Overstreet struct address_space *mapping = dio->mapping; 2370182c7bbfSKent Overstreet struct bch_inode_info *inode = dio->inode; 237101ad6737SKent Overstreet struct bch_io_opts opts; 23729a3df993SKent Overstreet struct bio *bio = &dio->op.wbio.bio; 2373eb8e6e9cSKent Overstreet unsigned unaligned, iter_count; 2374eb8e6e9cSKent Overstreet bool sync = dio->sync, dropped_locks; 23751c6fdbd8SKent Overstreet long ret; 23761c6fdbd8SKent Overstreet 237701ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 237801ad6737SKent Overstreet 23791c6fdbd8SKent Overstreet while (1) { 2380eb8e6e9cSKent Overstreet iter_count = dio->iter.count; 2381eb8e6e9cSKent Overstreet 2382182c7bbfSKent Overstreet EBUG_ON(current->faults_disabled_mapping); 23831c6fdbd8SKent Overstreet current->faults_disabled_mapping = mapping; 23841c6fdbd8SKent Overstreet 23851c6fdbd8SKent Overstreet ret = bio_iov_iter_get_pages(bio, &dio->iter); 23861c6fdbd8SKent Overstreet 2387eb8e6e9cSKent Overstreet dropped_locks = fdm_dropped_locks(); 2388eb8e6e9cSKent Overstreet 23891c6fdbd8SKent Overstreet current->faults_disabled_mapping = NULL; 23901c6fdbd8SKent Overstreet 2391eb8e6e9cSKent Overstreet /* 2392eb8e6e9cSKent Overstreet * If the fault handler returned an error but also signalled 2393eb8e6e9cSKent Overstreet * that it dropped & retook ei_pagecache_lock, we just need to 2394eb8e6e9cSKent Overstreet * re-shoot down the page cache and retry: 2395eb8e6e9cSKent Overstreet */ 2396eb8e6e9cSKent Overstreet if (dropped_locks && ret) 2397eb8e6e9cSKent Overstreet ret = 0; 2398eb8e6e9cSKent Overstreet 23991c6fdbd8SKent Overstreet if (unlikely(ret < 0)) 24001c6fdbd8SKent Overstreet goto err; 24011c6fdbd8SKent Overstreet 2402eb8e6e9cSKent Overstreet if (unlikely(dropped_locks)) { 2403eb8e6e9cSKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 2404eb8e6e9cSKent Overstreet req->ki_pos, 2405eb8e6e9cSKent Overstreet req->ki_pos + iter_count - 1); 2406eb8e6e9cSKent Overstreet if (unlikely(ret)) 2407eb8e6e9cSKent Overstreet goto err; 2408eb8e6e9cSKent Overstreet 2409eb8e6e9cSKent Overstreet if (!bio->bi_iter.bi_size) 2410eb8e6e9cSKent Overstreet continue; 2411eb8e6e9cSKent Overstreet } 2412eb8e6e9cSKent Overstreet 24130a426c32SKent Overstreet unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); 24140a426c32SKent Overstreet bio->bi_iter.bi_size -= unaligned; 24150a426c32SKent Overstreet iov_iter_revert(&dio->iter, unaligned); 24160a426c32SKent Overstreet 24170a426c32SKent Overstreet if (!bio->bi_iter.bi_size) { 24180a426c32SKent Overstreet /* 24190a426c32SKent Overstreet * bio_iov_iter_get_pages was only able to get < 24200a426c32SKent Overstreet * blocksize worth of pages: 24210a426c32SKent Overstreet */ 24220a426c32SKent Overstreet ret = -EFAULT; 24230a426c32SKent Overstreet goto err; 24240a426c32SKent Overstreet } 24250a426c32SKent Overstreet 242601ad6737SKent Overstreet bch2_write_op_init(&dio->op, c, opts); 2427182c7bbfSKent Overstreet dio->op.end_io = sync 2428182c7bbfSKent Overstreet ? NULL 2429182c7bbfSKent Overstreet : bch2_dio_write_loop_async; 2430042a1f26SKent Overstreet dio->op.target = dio->op.opts.foreground_target; 2431042a1f26SKent Overstreet dio->op.write_point = writepoint_hashed((unsigned long) current); 2432042a1f26SKent Overstreet dio->op.nr_replicas = dio->op.opts.data_replicas; 24338c6d298aSKent Overstreet dio->op.subvol = inode->ei_subvol; 2434042a1f26SKent Overstreet dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); 2435a8b3a677SKent Overstreet dio->op.devs_need_flush = &inode->ei_devs_need_flush; 2436042a1f26SKent Overstreet 24371df3e199SKent Overstreet if (sync) 24381df3e199SKent Overstreet dio->op.flags |= BCH_WRITE_SYNC; 2439a6336910SKent Overstreet dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; 2440042a1f26SKent Overstreet 24416b1b186aSKent Overstreet ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, 24426b1b186aSKent Overstreet bio_sectors(bio), true); 24436b1b186aSKent Overstreet if (unlikely(ret)) 24446b1b186aSKent Overstreet goto err; 24456b1b186aSKent Overstreet 2446042a1f26SKent Overstreet ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), 2447042a1f26SKent Overstreet dio->op.opts.data_replicas, 0); 2448042a1f26SKent Overstreet if (unlikely(ret) && 2449182c7bbfSKent Overstreet !bch2_dio_write_check_allocated(dio)) 2450042a1f26SKent Overstreet goto err; 24511c6fdbd8SKent Overstreet 24521c6fdbd8SKent Overstreet task_io_account_write(bio->bi_iter.bi_size); 24531c6fdbd8SKent Overstreet 2454182c7bbfSKent Overstreet if (unlikely(dio->iter.count) && 2455182c7bbfSKent Overstreet !dio->sync && 2456182c7bbfSKent Overstreet !dio->loop && 2457182c7bbfSKent Overstreet bch2_dio_write_copy_iov(dio)) 2458286d8ad0SKent Overstreet dio->sync = sync = true; 2459182c7bbfSKent Overstreet 24601c6fdbd8SKent Overstreet dio->loop = true; 2461f8f30863SKent Overstreet closure_call(&dio->op.cl, bch2_write, NULL, NULL); 24621c6fdbd8SKent Overstreet 2463182c7bbfSKent Overstreet if (!sync) 24641c6fdbd8SKent Overstreet return -EIOCBQUEUED; 24659a3df993SKent Overstreet 2466182c7bbfSKent Overstreet bch2_dio_write_end(dio); 24679a3df993SKent Overstreet 2468182c7bbfSKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 24691c6fdbd8SKent Overstreet break; 2470f8f30863SKent Overstreet 24711c6fdbd8SKent Overstreet bio_reset(bio, NULL, REQ_OP_WRITE); 24721c6fdbd8SKent Overstreet } 2473182c7bbfSKent Overstreet out: 2474182c7bbfSKent Overstreet return bch2_dio_write_done(dio); 24751c6fdbd8SKent Overstreet err: 2476182c7bbfSKent Overstreet dio->op.error = ret; 24771c6fdbd8SKent Overstreet 24785468f119SKent Overstreet bio_release_pages(bio, false); 24796b1b186aSKent Overstreet 24806b1b186aSKent Overstreet bch2_quota_reservation_put(c, inode, &dio->quota_res); 2481182c7bbfSKent Overstreet goto out; 24821c6fdbd8SKent Overstreet } 24831c6fdbd8SKent Overstreet 24844d868d18SKent Overstreet static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) 24851c6fdbd8SKent Overstreet { 2486182c7bbfSKent Overstreet struct mm_struct *mm = dio->mm; 24871c6fdbd8SKent Overstreet 2488182c7bbfSKent Overstreet bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); 2489182c7bbfSKent Overstreet 2490182c7bbfSKent Overstreet if (mm) 2491182c7bbfSKent Overstreet kthread_use_mm(mm); 24921c6fdbd8SKent Overstreet bch2_dio_write_loop(dio); 2493182c7bbfSKent Overstreet if (mm) 2494182c7bbfSKent Overstreet kthread_unuse_mm(mm); 24951c6fdbd8SKent Overstreet } 24961c6fdbd8SKent Overstreet 24974d868d18SKent Overstreet static void bch2_dio_write_loop_async(struct bch_write_op *op) 24984d868d18SKent Overstreet { 24994d868d18SKent Overstreet struct dio_write *dio = container_of(op, struct dio_write, op); 25004d868d18SKent Overstreet 25014d868d18SKent Overstreet bch2_dio_write_end(dio); 25024d868d18SKent Overstreet 25034d868d18SKent Overstreet if (likely(!dio->iter.count) || dio->op.error) 25044d868d18SKent Overstreet bch2_dio_write_done(dio); 25054d868d18SKent Overstreet else 25064d868d18SKent Overstreet bch2_dio_write_continue(dio); 25074d868d18SKent Overstreet } 25084d868d18SKent Overstreet 25091c6fdbd8SKent Overstreet static noinline 25101c6fdbd8SKent Overstreet ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) 25111c6fdbd8SKent Overstreet { 25121c6fdbd8SKent Overstreet struct file *file = req->ki_filp; 251354847d25SKent Overstreet struct address_space *mapping = file->f_mapping; 25141c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 25151c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 25161c6fdbd8SKent Overstreet struct dio_write *dio; 25171c6fdbd8SKent Overstreet struct bio *bio; 25187edcfbfeSKent Overstreet bool locked = true, extending; 25191c6fdbd8SKent Overstreet ssize_t ret; 25201c6fdbd8SKent Overstreet 25217edcfbfeSKent Overstreet prefetch(&c->opts); 25227edcfbfeSKent Overstreet prefetch((void *) &c->opts + 64); 25237edcfbfeSKent Overstreet prefetch(&inode->ei_inode); 25247edcfbfeSKent Overstreet prefetch((void *) &inode->ei_inode + 64); 25251c6fdbd8SKent Overstreet 25267edcfbfeSKent Overstreet inode_lock(&inode->v); 25277edcfbfeSKent Overstreet 25287edcfbfeSKent Overstreet ret = generic_write_checks(req, iter); 25297edcfbfeSKent Overstreet if (unlikely(ret <= 0)) 25307edcfbfeSKent Overstreet goto err; 25317edcfbfeSKent Overstreet 25327edcfbfeSKent Overstreet ret = file_remove_privs(file); 25337edcfbfeSKent Overstreet if (unlikely(ret)) 25347edcfbfeSKent Overstreet goto err; 25357edcfbfeSKent Overstreet 25367edcfbfeSKent Overstreet ret = file_update_time(file); 25377edcfbfeSKent Overstreet if (unlikely(ret)) 25387edcfbfeSKent Overstreet goto err; 25391c6fdbd8SKent Overstreet 2540919dbbd1SKent Overstreet if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) 25417edcfbfeSKent Overstreet goto err; 25427edcfbfeSKent Overstreet 25437edcfbfeSKent Overstreet inode_dio_begin(&inode->v); 2544a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 25457edcfbfeSKent Overstreet 25467edcfbfeSKent Overstreet extending = req->ki_pos + iter->count > inode->v.i_size; 25477edcfbfeSKent Overstreet if (!extending) { 25487edcfbfeSKent Overstreet inode_unlock(&inode->v); 25497edcfbfeSKent Overstreet locked = false; 25507edcfbfeSKent Overstreet } 25511c6fdbd8SKent Overstreet 25521c6fdbd8SKent Overstreet bio = bio_alloc_bioset(NULL, 25534d126dc8SKent Overstreet bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), 25541c6fdbd8SKent Overstreet REQ_OP_WRITE, 25551c6fdbd8SKent Overstreet GFP_KERNEL, 25561c6fdbd8SKent Overstreet &c->dio_write_bioset); 25579a3df993SKent Overstreet dio = container_of(bio, struct dio_write, op.wbio.bio); 25581c6fdbd8SKent Overstreet dio->req = req; 2559182c7bbfSKent Overstreet dio->mapping = mapping; 2560182c7bbfSKent Overstreet dio->inode = inode; 2561ed484030SKent Overstreet dio->mm = current->mm; 25621c6fdbd8SKent Overstreet dio->loop = false; 25636b1b186aSKent Overstreet dio->extending = extending; 25647edcfbfeSKent Overstreet dio->sync = is_sync_kiocb(req) || extending; 2565a1ee777bSKent Overstreet dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; 25661c6fdbd8SKent Overstreet dio->free_iov = false; 25671c6fdbd8SKent Overstreet dio->quota_res.sectors = 0; 2568042a1f26SKent Overstreet dio->written = 0; 25691c6fdbd8SKent Overstreet dio->iter = *iter; 2570182c7bbfSKent Overstreet dio->op.c = c; 25719a3df993SKent Overstreet 2572a023127aSKent Overstreet if (unlikely(mapping->nrpages)) { 257354847d25SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, 257454847d25SKent Overstreet req->ki_pos, 257554847d25SKent Overstreet req->ki_pos + iter->count - 1); 257654847d25SKent Overstreet if (unlikely(ret)) 257754847d25SKent Overstreet goto err_put_bio; 2578a023127aSKent Overstreet } 257954847d25SKent Overstreet 25807edcfbfeSKent Overstreet ret = bch2_dio_write_loop(dio); 25811c6fdbd8SKent Overstreet err: 25827edcfbfeSKent Overstreet if (locked) 25837edcfbfeSKent Overstreet inode_unlock(&inode->v); 25847edcfbfeSKent Overstreet return ret; 25857edcfbfeSKent Overstreet err_put_bio: 2586a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 25871c6fdbd8SKent Overstreet bio_put(bio); 25887edcfbfeSKent Overstreet inode_dio_end(&inode->v); 25897edcfbfeSKent Overstreet goto err; 25901c6fdbd8SKent Overstreet } 25911c6fdbd8SKent Overstreet 25927edcfbfeSKent Overstreet ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) 25931c6fdbd8SKent Overstreet { 25941c6fdbd8SKent Overstreet struct file *file = iocb->ki_filp; 25957edcfbfeSKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 25961c6fdbd8SKent Overstreet ssize_t ret; 25971c6fdbd8SKent Overstreet 25985c1ef830SKent Overstreet if (iocb->ki_flags & IOCB_DIRECT) { 25995c1ef830SKent Overstreet ret = bch2_direct_write(iocb, from); 26005c1ef830SKent Overstreet goto out; 26015c1ef830SKent Overstreet } 26021c6fdbd8SKent Overstreet 26037edcfbfeSKent Overstreet inode_lock(&inode->v); 26047edcfbfeSKent Overstreet 26057edcfbfeSKent Overstreet ret = generic_write_checks(iocb, from); 26067edcfbfeSKent Overstreet if (ret <= 0) 26077edcfbfeSKent Overstreet goto unlock; 26087edcfbfeSKent Overstreet 26091c6fdbd8SKent Overstreet ret = file_remove_privs(file); 26101c6fdbd8SKent Overstreet if (ret) 26117edcfbfeSKent Overstreet goto unlock; 26121c6fdbd8SKent Overstreet 26131c6fdbd8SKent Overstreet ret = file_update_time(file); 26141c6fdbd8SKent Overstreet if (ret) 26157edcfbfeSKent Overstreet goto unlock; 26161c6fdbd8SKent Overstreet 26177edcfbfeSKent Overstreet ret = bch2_buffered_write(iocb, from); 26181c6fdbd8SKent Overstreet if (likely(ret > 0)) 26191c6fdbd8SKent Overstreet iocb->ki_pos += ret; 26207edcfbfeSKent Overstreet unlock: 26211c6fdbd8SKent Overstreet inode_unlock(&inode->v); 26221c6fdbd8SKent Overstreet 26237edcfbfeSKent Overstreet if (ret > 0) 26241c6fdbd8SKent Overstreet ret = generic_write_sync(iocb, ret); 26255c1ef830SKent Overstreet out: 26265c1ef830SKent Overstreet return bch2_err_class(ret); 26271c6fdbd8SKent Overstreet } 26281c6fdbd8SKent Overstreet 26291c6fdbd8SKent Overstreet /* fsync: */ 26301c6fdbd8SKent Overstreet 263168a2054dSKent Overstreet /* 263268a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 263368a2054dSKent Overstreet * insert trigger: look up the btree inode instead 263468a2054dSKent Overstreet */ 2635a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 2636a8b3a677SKent Overstreet struct bch_inode_info *inode) 263768a2054dSKent Overstreet { 2638a8b3a677SKent Overstreet struct bch_inode_unpacked u; 263968a2054dSKent Overstreet int ret; 264068a2054dSKent Overstreet 264168a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 264268a2054dSKent Overstreet return 0; 264368a2054dSKent Overstreet 2644a8b3a677SKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 264568a2054dSKent Overstreet if (ret) 264668a2054dSKent Overstreet return ret; 264768a2054dSKent Overstreet 2648a8b3a677SKent Overstreet return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 2649a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 265068a2054dSKent Overstreet } 265168a2054dSKent Overstreet 26521c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 26531c6fdbd8SKent Overstreet { 26541c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 26551c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 265668a2054dSKent Overstreet int ret, ret2, ret3; 26571c6fdbd8SKent Overstreet 26581c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 265968a2054dSKent Overstreet ret2 = sync_inode_metadata(&inode->v, 1); 2660a8b3a677SKent Overstreet ret3 = bch2_flush_inode(c, inode); 26611c6fdbd8SKent Overstreet 26625c1ef830SKent Overstreet return bch2_err_class(ret ?: ret2 ?: ret3); 26631c6fdbd8SKent Overstreet } 26641c6fdbd8SKent Overstreet 26651c6fdbd8SKent Overstreet /* truncate: */ 26661c6fdbd8SKent Overstreet 26676fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 26681c6fdbd8SKent Overstreet struct bpos start, 26691c6fdbd8SKent Overstreet struct bpos end) 26701c6fdbd8SKent Overstreet { 2671424eb881SKent Overstreet struct btree_trans trans; 267267e0dd8fSKent Overstreet struct btree_iter iter; 26731c6fdbd8SKent Overstreet struct bkey_s_c k; 26741c6fdbd8SKent Overstreet int ret = 0; 26751c6fdbd8SKent Overstreet 267620bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 26776fed42bbSKent Overstreet retry: 26786fed42bbSKent Overstreet bch2_trans_begin(&trans); 26796fed42bbSKent Overstreet 26806fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); 26816fed42bbSKent Overstreet if (ret) 26826fed42bbSKent Overstreet goto err; 2683424eb881SKent Overstreet 2684c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 26851c6fdbd8SKent Overstreet if (bkey_extent_is_data(k.k)) { 26861c6fdbd8SKent Overstreet ret = 1; 26871c6fdbd8SKent Overstreet break; 26881c6fdbd8SKent Overstreet } 26896fed42bbSKent Overstreet start = iter.pos; 269067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 26916fed42bbSKent Overstreet err: 2692549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 26936fed42bbSKent Overstreet goto retry; 26941c6fdbd8SKent Overstreet 26959a796fdbSKent Overstreet bch2_trans_exit(&trans); 26969a796fdbSKent Overstreet return ret; 26971c6fdbd8SKent Overstreet } 26981c6fdbd8SKent Overstreet 2699*959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 27001c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 27011c6fdbd8SKent Overstreet { 27021c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 27031c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 27043342ac13SKent Overstreet struct bch_folio *s; 27051c6fdbd8SKent Overstreet unsigned start_offset = start & (PAGE_SIZE - 1); 27061c6fdbd8SKent Overstreet unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; 2707a99b1cafSKent Overstreet unsigned i; 270830bff594SKent Overstreet struct folio *folio; 2709b19d307dSKent Overstreet s64 i_sectors_delta = 0; 27101c6fdbd8SKent Overstreet int ret = 0; 27111c6fdbd8SKent Overstreet 271230bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 271330bff594SKent Overstreet if (!folio) { 27141c6fdbd8SKent Overstreet /* 27151c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 271630bff594SKent Overstreet * folio 27171c6fdbd8SKent Overstreet */ 27186fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 2719c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 2720c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 27211c6fdbd8SKent Overstreet if (ret <= 0) 27221c6fdbd8SKent Overstreet return ret; 27231c6fdbd8SKent Overstreet 272430bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 272530bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 272630bff594SKent Overstreet if (unlikely(!folio)) { 27271c6fdbd8SKent Overstreet ret = -ENOMEM; 27281c6fdbd8SKent Overstreet goto out; 27291c6fdbd8SKent Overstreet } 27301c6fdbd8SKent Overstreet } 27311c6fdbd8SKent Overstreet 2732*959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 2733*959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 2734*959f7368SKent Overstreet 2735*959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 2736*959f7368SKent Overstreet end_offset = min(end, folio_end_pos(folio)) - folio_pos(folio); 2737*959f7368SKent Overstreet 2738*959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 2739*959f7368SKent Overstreet if (start_offset == 0 && 2740*959f7368SKent Overstreet end_offset == folio_size(folio)) { 2741*959f7368SKent Overstreet ret = 0; 2742*959f7368SKent Overstreet goto unlock; 2743*959f7368SKent Overstreet } 2744*959f7368SKent Overstreet 274530bff594SKent Overstreet s = bch2_folio_create(folio, 0); 2746a99b1cafSKent Overstreet if (!s) { 2747a99b1cafSKent Overstreet ret = -ENOMEM; 2748a99b1cafSKent Overstreet goto unlock; 2749a99b1cafSKent Overstreet } 2750a99b1cafSKent Overstreet 275130bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 275230bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 27531c6fdbd8SKent Overstreet if (ret) 27541c6fdbd8SKent Overstreet goto unlock; 27551c6fdbd8SKent Overstreet } 27561c6fdbd8SKent Overstreet 2757c437e153SKent Overstreet BUG_ON(!s->uptodate); 2758c437e153SKent Overstreet 2759a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 2760a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 2761a99b1cafSKent Overstreet i++) { 2762a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 2763b19d307dSKent Overstreet if (s->s[i].state == SECTOR_DIRTY) 2764b19d307dSKent Overstreet i_sectors_delta--; 2765a99b1cafSKent Overstreet s->s[i].state = SECTOR_UNALLOCATED; 2766a99b1cafSKent Overstreet } 2767a99b1cafSKent Overstreet 2768b19d307dSKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 2769b19d307dSKent Overstreet 277074163da7SKent Overstreet /* 277130bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 277274163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 277374163da7SKent Overstreet * be responsible for the i_size update: 277474163da7SKent Overstreet */ 2775*959f7368SKent Overstreet ret = s->s[(min(inode->v.i_size, folio_end_pos(folio)) - 2776*959f7368SKent Overstreet folio_pos(folio) - 1) >> 9].state >= SECTOR_DIRTY; 277774163da7SKent Overstreet 277830bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 2779a99b1cafSKent Overstreet 27801c6fdbd8SKent Overstreet /* 27811c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 27821c6fdbd8SKent Overstreet * 278330bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 27841c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 27851c6fdbd8SKent Overstreet */ 278630bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 27871c6fdbd8SKent Overstreet 27889ba2eb25SKent Overstreet /* 27899ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 27909ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 27919ba2eb25SKent Overstreet * redirty the full page: 27929ba2eb25SKent Overstreet */ 279330bff594SKent Overstreet folio_mkclean(folio); 279430bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 27951c6fdbd8SKent Overstreet unlock: 279630bff594SKent Overstreet folio_unlock(folio); 279730bff594SKent Overstreet folio_put(folio); 27981c6fdbd8SKent Overstreet out: 27991c6fdbd8SKent Overstreet return ret; 28001c6fdbd8SKent Overstreet } 28011c6fdbd8SKent Overstreet 2802*959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 28031c6fdbd8SKent Overstreet { 2804*959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 2805*959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 28061c6fdbd8SKent Overstreet } 28071c6fdbd8SKent Overstreet 2808*959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 280974163da7SKent Overstreet loff_t start, loff_t end) 281074163da7SKent Overstreet { 2811*959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 281274163da7SKent Overstreet start, end); 281374163da7SKent Overstreet 281474163da7SKent Overstreet if (ret >= 0 && 281574163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 2816*959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 2817*959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 281874163da7SKent Overstreet start, end); 281974163da7SKent Overstreet return ret; 282074163da7SKent Overstreet } 282174163da7SKent Overstreet 282268a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 282368a507a2SKent Overstreet struct bch_inode_info *inode, 2824e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 2825e0541a93SKent Overstreet struct iattr *iattr) 28261c6fdbd8SKent Overstreet { 28271c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 28281c6fdbd8SKent Overstreet int ret; 28291c6fdbd8SKent Overstreet 2830e0541a93SKent Overstreet /* 2831e0541a93SKent Overstreet * sync appends: 28322925fc49SKent Overstreet * 28332925fc49SKent Overstreet * this has to be done _before_ extending i_size: 2834e0541a93SKent Overstreet */ 2835e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 28361c6fdbd8SKent Overstreet if (ret) 28371c6fdbd8SKent Overstreet return ret; 28381c6fdbd8SKent Overstreet 28391c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 28401c6fdbd8SKent Overstreet 284168a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 28421c6fdbd8SKent Overstreet } 28431c6fdbd8SKent Overstreet 284454e2264eSKent Overstreet static int bch2_truncate_finish_fn(struct bch_inode_info *inode, 284554e2264eSKent Overstreet struct bch_inode_unpacked *bi, 284654e2264eSKent Overstreet void *p) 284754e2264eSKent Overstreet { 284854e2264eSKent Overstreet bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 284954e2264eSKent Overstreet return 0; 285054e2264eSKent Overstreet } 285154e2264eSKent Overstreet 285254e2264eSKent Overstreet static int bch2_truncate_start_fn(struct bch_inode_info *inode, 285354e2264eSKent Overstreet struct bch_inode_unpacked *bi, void *p) 285454e2264eSKent Overstreet { 285554e2264eSKent Overstreet u64 *new_i_size = p; 285654e2264eSKent Overstreet 285754e2264eSKent Overstreet bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 285854e2264eSKent Overstreet bi->bi_size = *new_i_size; 285954e2264eSKent Overstreet return 0; 286054e2264eSKent Overstreet } 286154e2264eSKent Overstreet 286268a507a2SKent Overstreet int bch2_truncate(struct mnt_idmap *idmap, 286368a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 28641c6fdbd8SKent Overstreet { 28651c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 28661c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 2867e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 286854e2264eSKent Overstreet u64 new_i_size = iattr->ia_size; 28692e87eae1SKent Overstreet s64 i_sectors_delta = 0; 28701c6fdbd8SKent Overstreet int ret = 0; 28711c6fdbd8SKent Overstreet 287268a507a2SKent Overstreet /* 287378d66ab1SDan Robertson * If the truncate call with change the size of the file, the 287478d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 287578d66ab1SDan Robertson * do not need to update the cmtimes. 287668a507a2SKent Overstreet */ 287778d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 287868a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 287968a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 288068a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 288168a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 288268a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 288378d66ab1SDan Robertson } 288468a507a2SKent Overstreet 28851c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 2886a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 28871c6fdbd8SKent Overstreet 28886fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 2889e0541a93SKent Overstreet if (ret) 2890e0541a93SKent Overstreet goto err; 28911c6fdbd8SKent Overstreet 2892c45d473dSKent Overstreet /* 2893c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 2894c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 2895c45d473dSKent Overstreet * before the inode). 2896c45d473dSKent Overstreet */ 2897c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 2898c45d473dSKent Overstreet if (ret) 2899c45d473dSKent Overstreet goto err; 2900c45d473dSKent Overstreet 29018eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 29028eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 29038eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 29048eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 2905e0541a93SKent Overstreet 2906e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 290768a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 290854e2264eSKent Overstreet goto err; 29091c6fdbd8SKent Overstreet } 29101c6fdbd8SKent Overstreet 291168a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 291268a507a2SKent Overstreet 2913*959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 291474163da7SKent Overstreet if (unlikely(ret < 0)) 291554e2264eSKent Overstreet goto err; 29161c6fdbd8SKent Overstreet 29176cc3535dSKent Overstreet /* 29186cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 29196cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 29206cc3535dSKent Overstreet * i_size first: 29216cc3535dSKent Overstreet * 29226cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 29236cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 29246cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 29256cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 29266cc3535dSKent Overstreet */ 2927e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 29281c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 2929e0541a93SKent Overstreet inode_u.bi_size, 29301c6fdbd8SKent Overstreet iattr->ia_size - 1); 29311c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 29321c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 29331c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 29341c6fdbd8SKent Overstreet iattr->ia_size - 1); 29351c6fdbd8SKent Overstreet if (ret) 293654e2264eSKent Overstreet goto err; 29371c6fdbd8SKent Overstreet 293854e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 293954e2264eSKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 294054e2264eSKent Overstreet &new_i_size, 0); 294154e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 29421c6fdbd8SKent Overstreet 29431c6fdbd8SKent Overstreet if (unlikely(ret)) 294454e2264eSKent Overstreet goto err; 29451c6fdbd8SKent Overstreet 29461c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 29471c6fdbd8SKent Overstreet 29488c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 2949a99b1cafSKent Overstreet round_up(iattr->ia_size, block_bytes(c)) >> 9, 295068a2054dSKent Overstreet U64_MAX, &i_sectors_delta); 29512e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 29522e87eae1SKent Overstreet 2953b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 2954b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 2955b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 2956b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 2957b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 29581c6fdbd8SKent Overstreet if (unlikely(ret)) 295954e2264eSKent Overstreet goto err; 29601c6fdbd8SKent Overstreet 296154e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 296268a507a2SKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 296354e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 296468a507a2SKent Overstreet 296568a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 296654e2264eSKent Overstreet err: 2967a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 29685c1ef830SKent Overstreet return bch2_err_class(ret); 29691c6fdbd8SKent Overstreet } 29701c6fdbd8SKent Overstreet 29711c6fdbd8SKent Overstreet /* fallocate: */ 29721c6fdbd8SKent Overstreet 2973050197b1SKent Overstreet static int inode_update_times_fn(struct bch_inode_info *inode, 2974050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 2975050197b1SKent Overstreet { 2976050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 2977050197b1SKent Overstreet 2978050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 2979050197b1SKent Overstreet return 0; 2980050197b1SKent Overstreet } 2981050197b1SKent Overstreet 29822e87eae1SKent Overstreet static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 29831c6fdbd8SKent Overstreet { 29841c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 298574163da7SKent Overstreet u64 end = offset + len; 298674163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 298774163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 298874163da7SKent Overstreet bool truncated_last_page; 29891c6fdbd8SKent Overstreet int ret = 0; 29901c6fdbd8SKent Overstreet 2991*959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 299274163da7SKent Overstreet if (unlikely(ret < 0)) 29931c6fdbd8SKent Overstreet goto err; 29941c6fdbd8SKent Overstreet 299574163da7SKent Overstreet truncated_last_page = ret; 29961c6fdbd8SKent Overstreet 299774163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 29981c6fdbd8SKent Overstreet 299974163da7SKent Overstreet if (block_start < block_end) { 30002e87eae1SKent Overstreet s64 i_sectors_delta = 0; 30012e87eae1SKent Overstreet 30028c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 300374163da7SKent Overstreet block_start >> 9, block_end >> 9, 30042e87eae1SKent Overstreet &i_sectors_delta); 30052e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 30062e87eae1SKent Overstreet } 3007050197b1SKent Overstreet 3008050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 300974163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 301074163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 301174163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 301274163da7SKent Overstreet } else { 3013050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 301474163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 301574163da7SKent Overstreet } 3016050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 30171c6fdbd8SKent Overstreet err: 30181c6fdbd8SKent Overstreet return ret; 30191c6fdbd8SKent Overstreet } 30201c6fdbd8SKent Overstreet 30212e87eae1SKent Overstreet static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 30225f786787SKent Overstreet loff_t offset, loff_t len, 30235f786787SKent Overstreet bool insert) 30241c6fdbd8SKent Overstreet { 30251c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 30261c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 302707a1006aSKent Overstreet struct bkey_buf copy; 3028d69f41d6SKent Overstreet struct btree_trans trans; 302967e0dd8fSKent Overstreet struct btree_iter src, dst, del; 30305f786787SKent Overstreet loff_t shift, new_size; 30315f786787SKent Overstreet u64 src_start; 303250dc0f69SKent Overstreet int ret = 0; 30331c6fdbd8SKent Overstreet 30341c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 30351c6fdbd8SKent Overstreet return -EINVAL; 30361c6fdbd8SKent Overstreet 30375f786787SKent Overstreet if (insert) { 30385f786787SKent Overstreet if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 303974163da7SKent Overstreet return -EFBIG; 30405f786787SKent Overstreet 30415f786787SKent Overstreet if (offset >= inode->v.i_size) 304274163da7SKent Overstreet return -EINVAL; 30435f786787SKent Overstreet 30445f786787SKent Overstreet src_start = U64_MAX; 30455f786787SKent Overstreet shift = len; 30465f786787SKent Overstreet } else { 30471c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 304874163da7SKent Overstreet return -EINVAL; 30491c6fdbd8SKent Overstreet 30505f786787SKent Overstreet src_start = offset + len; 30515f786787SKent Overstreet shift = -len; 30525f786787SKent Overstreet } 30531c6fdbd8SKent Overstreet 30545f786787SKent Overstreet new_size = inode->v.i_size + shift; 30551c6fdbd8SKent Overstreet 30561c6fdbd8SKent Overstreet ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 30571c6fdbd8SKent Overstreet if (ret) 305874163da7SKent Overstreet return ret; 30591c6fdbd8SKent Overstreet 30605f786787SKent Overstreet if (insert) { 30615f786787SKent Overstreet i_size_write(&inode->v, new_size); 30625f786787SKent Overstreet mutex_lock(&inode->ei_update_lock); 30635f786787SKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 30645f786787SKent Overstreet ATTR_MTIME|ATTR_CTIME); 30655f786787SKent Overstreet mutex_unlock(&inode->ei_update_lock); 30665f786787SKent Overstreet } else { 30672e87eae1SKent Overstreet s64 i_sectors_delta = 0; 30682e87eae1SKent Overstreet 30698c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 30702e87eae1SKent Overstreet offset >> 9, (offset + len) >> 9, 30712e87eae1SKent Overstreet &i_sectors_delta); 30722e87eae1SKent Overstreet i_sectors_acct(c, inode, NULL, i_sectors_delta); 30732e87eae1SKent Overstreet 307463095894SKent Overstreet if (ret) 307574163da7SKent Overstreet return ret; 30765f786787SKent Overstreet } 30778ef231bdSKent Overstreet 307850dc0f69SKent Overstreet bch2_bkey_buf_init(©); 3079f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 308067e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 30815f786787SKent Overstreet POS(inode->v.i_ino, src_start >> 9), 308263095894SKent Overstreet BTREE_ITER_INTENT); 308367e0dd8fSKent Overstreet bch2_trans_copy_iter(&dst, &src); 308467e0dd8fSKent Overstreet bch2_trans_copy_iter(&del, &src); 30855f786787SKent Overstreet 3086549d173cSKent Overstreet while (ret == 0 || 3087549d173cSKent Overstreet bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 308863095894SKent Overstreet struct disk_reservation disk_res = 308963095894SKent Overstreet bch2_disk_reservation_init(c, 0); 309063095894SKent Overstreet struct bkey_i delete; 309163095894SKent Overstreet struct bkey_s_c k; 309263095894SKent Overstreet struct bpos next_pos; 30935f786787SKent Overstreet struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 30945f786787SKent Overstreet struct bpos atomic_end; 30952d594dfbSKent Overstreet unsigned trigger_flags = 0; 30966fed42bbSKent Overstreet u32 snapshot; 30976fed42bbSKent Overstreet 30986fed42bbSKent Overstreet bch2_trans_begin(&trans); 30996fed42bbSKent Overstreet 31006fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 31016fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 31026fed42bbSKent Overstreet if (ret) 31036fed42bbSKent Overstreet continue; 31046fed42bbSKent Overstreet 31056fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&src, snapshot); 31066fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&dst, snapshot); 31076fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&del, snapshot); 310863095894SKent Overstreet 3109700c25b3SKent Overstreet bch2_trans_begin(&trans); 3110700c25b3SKent Overstreet 31115f786787SKent Overstreet k = insert 311267e0dd8fSKent Overstreet ? bch2_btree_iter_peek_prev(&src) 3113c72f687aSKent Overstreet : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 311463095894SKent Overstreet if ((ret = bkey_err(k))) 311550dc0f69SKent Overstreet continue; 311663095894SKent Overstreet 311763095894SKent Overstreet if (!k.k || k.k->p.inode != inode->v.i_ino) 311863095894SKent Overstreet break; 311963095894SKent Overstreet 31205f786787SKent Overstreet if (insert && 3121e88a75ebSKent Overstreet bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 31225f786787SKent Overstreet break; 31235f786787SKent Overstreet reassemble: 312407a1006aSKent Overstreet bch2_bkey_buf_reassemble(©, c, k); 31255f786787SKent Overstreet 31265f786787SKent Overstreet if (insert && 3127e88a75ebSKent Overstreet bkey_lt(bkey_start_pos(k.k), move_pos)) 312835189e09SKent Overstreet bch2_cut_front(move_pos, copy.k); 31295f786787SKent Overstreet 313035189e09SKent Overstreet copy.k->k.p.offset += shift >> 9; 313167e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); 31321c6fdbd8SKent Overstreet 313367e0dd8fSKent Overstreet ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 31343c7f3b7aSKent Overstreet if (ret) 313550dc0f69SKent Overstreet continue; 3136e2d9912cSKent Overstreet 3137e88a75ebSKent Overstreet if (!bkey_eq(atomic_end, copy.k->k.p)) { 31385f786787SKent Overstreet if (insert) { 31395f786787SKent Overstreet move_pos = atomic_end; 31405f786787SKent Overstreet move_pos.offset -= shift >> 9; 31415f786787SKent Overstreet goto reassemble; 31425f786787SKent Overstreet } else { 3143085ab693SKent Overstreet bch2_cut_back(atomic_end, copy.k); 31445f786787SKent Overstreet } 31455f786787SKent Overstreet } 31465f786787SKent Overstreet 314763095894SKent Overstreet bkey_init(&delete.k); 3148283eda57SKent Overstreet delete.k.p = copy.k->k.p; 3149283eda57SKent Overstreet delete.k.size = copy.k->k.size; 3150283eda57SKent Overstreet delete.k.p.offset -= shift >> 9; 315167e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 31521c6fdbd8SKent Overstreet 31535f786787SKent Overstreet next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 315463095894SKent Overstreet 31557c4ca54aSKent Overstreet if (copy.k->k.size != k.k->size) { 315663095894SKent Overstreet /* We might end up splitting compressed extents: */ 315763095894SKent Overstreet unsigned nr_ptrs = 31584de77495SKent Overstreet bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 315963095894SKent Overstreet 316063095894SKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 316135189e09SKent Overstreet copy.k->k.size, nr_ptrs, 31621c6fdbd8SKent Overstreet BCH_DISK_RESERVATION_NOFAIL); 31631c6fdbd8SKent Overstreet BUG_ON(ret); 316463095894SKent Overstreet } 31651c6fdbd8SKent Overstreet 316667e0dd8fSKent Overstreet ret = bch2_btree_iter_traverse(&del) ?: 316767e0dd8fSKent Overstreet bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 316867e0dd8fSKent Overstreet bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 316968a2054dSKent Overstreet bch2_trans_commit(&trans, &disk_res, NULL, 31702d594dfbSKent Overstreet BTREE_INSERT_NOFAIL); 31711c6fdbd8SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 317250dc0f69SKent Overstreet 317363095894SKent Overstreet if (!ret) 317467e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&src, next_pos); 317550dc0f69SKent Overstreet } 317667e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &del); 317767e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &dst); 317867e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &src); 317950dc0f69SKent Overstreet bch2_trans_exit(&trans); 318050dc0f69SKent Overstreet bch2_bkey_buf_exit(©, c); 318163095894SKent Overstreet 31828ef231bdSKent Overstreet if (ret) 318374163da7SKent Overstreet return ret; 31841c6fdbd8SKent Overstreet 318574163da7SKent Overstreet mutex_lock(&inode->ei_update_lock); 31865f786787SKent Overstreet if (!insert) { 31878ef231bdSKent Overstreet i_size_write(&inode->v, new_size); 31888ef231bdSKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 31898ef231bdSKent Overstreet ATTR_MTIME|ATTR_CTIME); 319074163da7SKent Overstreet } else { 319174163da7SKent Overstreet /* We need an inode update to update bi_journal_seq for fsync: */ 319274163da7SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 319374163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 31945f786787SKent Overstreet } 319574163da7SKent Overstreet mutex_unlock(&inode->ei_update_lock); 31961c6fdbd8SKent Overstreet return ret; 31971c6fdbd8SKent Overstreet } 31981c6fdbd8SKent Overstreet 3199694015c2SKent Overstreet static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 3200694015c2SKent Overstreet u64 start_sector, u64 end_sector) 32011c6fdbd8SKent Overstreet { 32021c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3203190fa7afSKent Overstreet struct btree_trans trans; 320467e0dd8fSKent Overstreet struct btree_iter iter; 3205694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 320601ad6737SKent Overstreet struct bch_io_opts opts; 3207694015c2SKent Overstreet int ret = 0; 32081c6fdbd8SKent Overstreet 320901ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 3210f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); 32111c6fdbd8SKent Overstreet 321267e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3213694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 3214190fa7afSKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 32151c6fdbd8SKent Overstreet 3216e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 32172e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3218190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 32191c6fdbd8SKent Overstreet struct bkey_s_c k; 3220694015c2SKent Overstreet unsigned sectors; 32216fed42bbSKent Overstreet u32 snapshot; 32221c6fdbd8SKent Overstreet 3223163e885aSKent Overstreet bch2_trans_begin(&trans); 3224a8abd3a7SKent Overstreet 32256fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 32266fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 32276fed42bbSKent Overstreet if (ret) 32286fed42bbSKent Overstreet goto bkey_err; 32296fed42bbSKent Overstreet 32306fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 32316fed42bbSKent Overstreet 323267e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 32330f238367SKent Overstreet if ((ret = bkey_err(k))) 32340f238367SKent Overstreet goto bkey_err; 32351c6fdbd8SKent Overstreet 32361c6fdbd8SKent Overstreet /* already reserved */ 323779203111SKent Overstreet if (bkey_extent_is_reservation(k) && 323879203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 323967e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 32401c6fdbd8SKent Overstreet continue; 32411c6fdbd8SKent Overstreet } 32421c6fdbd8SKent Overstreet 3243190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 3244190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 324567e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 32461c6fdbd8SKent Overstreet continue; 32471c6fdbd8SKent Overstreet } 32481c6fdbd8SKent Overstreet 3249a8b3a677SKent Overstreet /* 3250a8b3a677SKent Overstreet * XXX: for nocow mode, we should promote shared extents to 3251a8b3a677SKent Overstreet * unshared here 3252a8b3a677SKent Overstreet */ 3253a8b3a677SKent Overstreet 325470de7a47SKent Overstreet sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset; 32551c6fdbd8SKent Overstreet 32561c6fdbd8SKent Overstreet if (!bkey_extent_is_allocation(k.k)) { 32571c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 3258190fa7afSKent Overstreet "a_res, 32591c6fdbd8SKent Overstreet sectors, true); 32601c6fdbd8SKent Overstreet if (unlikely(ret)) 32610f238367SKent Overstreet goto bkey_err; 32621c6fdbd8SKent Overstreet } 32631c6fdbd8SKent Overstreet 326470de7a47SKent Overstreet ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, 326570de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 326670de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 32678810386fSKent Overstreet if (ret) 32688810386fSKent Overstreet goto bkey_err; 326970de7a47SKent Overstreet 32702e87eae1SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 32710f238367SKent Overstreet bkey_err: 3272190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 3273549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 32741c6fdbd8SKent Overstreet ret = 0; 327550dc0f69SKent Overstreet } 327674163da7SKent Overstreet 3277dcfc593fSKent Overstreet bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */ 3278dcfc593fSKent Overstreet mark_pagecache_reserved(inode, start_sector, iter.pos.offset); 3279dcfc593fSKent Overstreet 3280098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 328174163da7SKent Overstreet struct quota_res quota_res = { 0 }; 328274163da7SKent Overstreet s64 i_sectors_delta = 0; 328374163da7SKent Overstreet 328474163da7SKent Overstreet bch2_fpunch_at(&trans, &iter, inode_inum(inode), 328574163da7SKent Overstreet end_sector, &i_sectors_delta); 328674163da7SKent Overstreet i_sectors_acct(c, inode, "a_res, i_sectors_delta); 328774163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 328874163da7SKent Overstreet } 328974163da7SKent Overstreet 329067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3291694015c2SKent Overstreet bch2_trans_exit(&trans); 3292694015c2SKent Overstreet return ret; 3293694015c2SKent Overstreet } 329450dc0f69SKent Overstreet 3295694015c2SKent Overstreet static long bchfs_fallocate(struct bch_inode_info *inode, int mode, 3296694015c2SKent Overstreet loff_t offset, loff_t len) 3297694015c2SKent Overstreet { 3298694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 329974163da7SKent Overstreet u64 end = offset + len; 330074163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 330174163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 330274163da7SKent Overstreet bool truncated_last_page = false; 330374163da7SKent Overstreet int ret, ret2 = 0; 3304694015c2SKent Overstreet 3305694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 3306694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 3307694015c2SKent Overstreet if (ret) 330874163da7SKent Overstreet return ret; 3309694015c2SKent Overstreet } 3310694015c2SKent Overstreet 3311694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 3312*959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 331374163da7SKent Overstreet if (unlikely(ret < 0)) 331474163da7SKent Overstreet return ret; 3315694015c2SKent Overstreet 331674163da7SKent Overstreet truncated_last_page = ret; 3317694015c2SKent Overstreet 3318694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 331974163da7SKent Overstreet 332074163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 332174163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 3322694015c2SKent Overstreet } 3323694015c2SKent Overstreet 3324694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 3325e0541a93SKent Overstreet 3326e0541a93SKent Overstreet /* 332774163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 332874163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 3329e0541a93SKent Overstreet */ 333074163da7SKent Overstreet if (ret && 3331098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 333274163da7SKent Overstreet return ret; 33331c6fdbd8SKent Overstreet 333474163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 3335e0541a93SKent Overstreet end = inode->v.i_size; 333674163da7SKent Overstreet 333774163da7SKent Overstreet if (end >= inode->v.i_size && 333874163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 333974163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 334074163da7SKent Overstreet spin_lock(&inode->v.i_lock); 3341e0541a93SKent Overstreet i_size_write(&inode->v, end); 334274163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 3343e0541a93SKent Overstreet 33441c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 334574163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 33461c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 33471c6fdbd8SKent Overstreet } 334874163da7SKent Overstreet 334974163da7SKent Overstreet return ret ?: ret2; 33501c6fdbd8SKent Overstreet } 33511c6fdbd8SKent Overstreet 33521c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 33531c6fdbd8SKent Overstreet loff_t offset, loff_t len) 33541c6fdbd8SKent Overstreet { 33551c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 33562a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 33572a9101a9SKent Overstreet long ret; 33582a9101a9SKent Overstreet 3359d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 33602a9101a9SKent Overstreet return -EROFS; 33611c6fdbd8SKent Overstreet 336274163da7SKent Overstreet inode_lock(&inode->v); 336374163da7SKent Overstreet inode_dio_wait(&inode->v); 3364a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 336574163da7SKent Overstreet 336607bfcc0bSKent Overstreet ret = file_modified(file); 336707bfcc0bSKent Overstreet if (ret) 336807bfcc0bSKent Overstreet goto err; 336907bfcc0bSKent Overstreet 33701c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 33712a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 33722a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 33732a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 33742a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 33752a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 33762a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 33772a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 33782a9101a9SKent Overstreet else 33792a9101a9SKent Overstreet ret = -EOPNOTSUPP; 338007bfcc0bSKent Overstreet err: 3381a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 338274163da7SKent Overstreet inode_unlock(&inode->v); 3383d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 33841c6fdbd8SKent Overstreet 33855c1ef830SKent Overstreet return bch2_err_class(ret); 33861c6fdbd8SKent Overstreet } 33871c6fdbd8SKent Overstreet 3388c72f687aSKent Overstreet /* 3389c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 3390c72f687aSKent Overstreet * Does not check pagecache 3391c72f687aSKent Overstreet */ 3392e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 3393e8540e56SKent Overstreet struct quota_res *res, 3394e8540e56SKent Overstreet u64 start, u64 end) 3395e8540e56SKent Overstreet { 3396e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3397e8540e56SKent Overstreet struct btree_trans trans; 3398e8540e56SKent Overstreet struct btree_iter iter; 3399e8540e56SKent Overstreet struct bkey_s_c k; 3400e8540e56SKent Overstreet u32 snapshot; 3401e8540e56SKent Overstreet u64 sectors = end - start; 3402e8540e56SKent Overstreet u64 pos = start; 3403e8540e56SKent Overstreet int ret; 3404e8540e56SKent Overstreet 3405e8540e56SKent Overstreet bch2_trans_init(&trans, c, 0, 0); 3406e8540e56SKent Overstreet retry: 3407e8540e56SKent Overstreet bch2_trans_begin(&trans); 3408e8540e56SKent Overstreet 3409e8540e56SKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); 3410e8540e56SKent Overstreet if (ret) 3411e8540e56SKent Overstreet goto err; 3412e8540e56SKent Overstreet 3413e8540e56SKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 3414e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 3415e8540e56SKent Overstreet 3416e8540e56SKent Overstreet while (!(ret = btree_trans_too_many_iters(&trans)) && 3417e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 3418e8540e56SKent Overstreet !(ret = bkey_err(k))) { 3419e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 3420e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 3421e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 3422e8540e56SKent Overstreet BUG_ON(s > sectors); 3423e8540e56SKent Overstreet sectors -= s; 3424e8540e56SKent Overstreet } 3425e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 3426e8540e56SKent Overstreet } 3427e8540e56SKent Overstreet pos = iter.pos.offset; 3428e8540e56SKent Overstreet bch2_trans_iter_exit(&trans, &iter); 3429e8540e56SKent Overstreet err: 3430e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 3431e8540e56SKent Overstreet goto retry; 3432e8540e56SKent Overstreet 3433e8540e56SKent Overstreet bch2_trans_exit(&trans); 3434e8540e56SKent Overstreet 3435e8540e56SKent Overstreet if (ret) 3436e8540e56SKent Overstreet return ret; 3437e8540e56SKent Overstreet 3438e8540e56SKent Overstreet return bch2_quota_reservation_add(c, inode, res, sectors, true); 3439e8540e56SKent Overstreet } 3440e8540e56SKent Overstreet 344176426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 344276426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 344376426098SKent Overstreet loff_t len, unsigned remap_flags) 344476426098SKent Overstreet { 344576426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 344676426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 344776426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 3448e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 34492e87eae1SKent Overstreet s64 i_sectors_delta = 0; 3450677fc056SKent Overstreet u64 aligned_len; 345176426098SKent Overstreet loff_t ret = 0; 345276426098SKent Overstreet 345376426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 345476426098SKent Overstreet return -EINVAL; 345576426098SKent Overstreet 345676426098SKent Overstreet if (remap_flags & REMAP_FILE_DEDUP) 345776426098SKent Overstreet return -EOPNOTSUPP; 345876426098SKent Overstreet 345976426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 346076426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 346176426098SKent Overstreet return -EINVAL; 346276426098SKent Overstreet 346376426098SKent Overstreet if (src == dst && 346476426098SKent Overstreet abs(pos_src - pos_dst) < len) 346576426098SKent Overstreet return -EINVAL; 346676426098SKent Overstreet 346776426098SKent Overstreet bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 346876426098SKent Overstreet 346976426098SKent Overstreet inode_dio_wait(&src->v); 347076426098SKent Overstreet inode_dio_wait(&dst->v); 347176426098SKent Overstreet 347276426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 347376426098SKent Overstreet file_dst, pos_dst, 347476426098SKent Overstreet &len, remap_flags); 347576426098SKent Overstreet if (ret < 0 || len == 0) 34762e87eae1SKent Overstreet goto err; 347776426098SKent Overstreet 3478677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 347976426098SKent Overstreet 348076426098SKent Overstreet ret = write_invalidate_inode_pages_range(dst->v.i_mapping, 3481677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 348276426098SKent Overstreet if (ret) 34832e87eae1SKent Overstreet goto err; 348476426098SKent Overstreet 3485e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 3486e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 3487e8540e56SKent Overstreet if (ret) 3488e8540e56SKent Overstreet goto err; 3489e8540e56SKent Overstreet 3490e8540e56SKent Overstreet file_update_time(file_dst); 3491e8540e56SKent Overstreet 3492dcfc593fSKent Overstreet mark_pagecache_unallocated(src, pos_src >> 9, 3493dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 349476426098SKent Overstreet 34952e87eae1SKent Overstreet ret = bch2_remap_range(c, 34966fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 34976fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 349876426098SKent Overstreet aligned_len >> 9, 34992e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 35002e87eae1SKent Overstreet if (ret < 0) 35012e87eae1SKent Overstreet goto err; 350276426098SKent Overstreet 35032e87eae1SKent Overstreet /* 35042e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 35052e87eae1SKent Overstreet */ 3506677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 35072e87eae1SKent Overstreet 3508e8540e56SKent Overstreet i_sectors_acct(c, dst, "a_res, i_sectors_delta); 35092e87eae1SKent Overstreet 35102e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 3511677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 3512677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 35132e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 3514e7084c9cSKent Overstreet 351568a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 351668a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 3517a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 35182e87eae1SKent Overstreet err: 3519e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 352076426098SKent Overstreet bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 352176426098SKent Overstreet 35225c1ef830SKent Overstreet return bch2_err_class(ret); 352376426098SKent Overstreet } 352476426098SKent Overstreet 35251c6fdbd8SKent Overstreet /* fseek: */ 35261c6fdbd8SKent Overstreet 3527543ef2ebSKent Overstreet static int folio_data_offset(struct folio *folio, unsigned offset) 35281c6fdbd8SKent Overstreet { 352930bff594SKent Overstreet struct bch_folio *s = bch2_folio(folio); 3530a86a92cbSKent Overstreet unsigned i, sectors = folio_sectors(folio); 3531f81b648dSKent Overstreet 3532543ef2ebSKent Overstreet if (s) 3533a86a92cbSKent Overstreet for (i = offset >> 9; i < sectors; i++) 3534f57a6a5dSKent Overstreet if (s->s[i].state >= SECTOR_DIRTY) 3535543ef2ebSKent Overstreet return i << 9; 3536f57a6a5dSKent Overstreet 3537543ef2ebSKent Overstreet return -1; 35381c6fdbd8SKent Overstreet } 35391c6fdbd8SKent Overstreet 3540543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_data(struct inode *vinode, 35411c6fdbd8SKent Overstreet loff_t start_offset, 35421c6fdbd8SKent Overstreet loff_t end_offset) 35431c6fdbd8SKent Overstreet { 35441c6fdbd8SKent Overstreet struct folio_batch fbatch; 35451c6fdbd8SKent Overstreet pgoff_t start_index = start_offset >> PAGE_SHIFT; 35461c6fdbd8SKent Overstreet pgoff_t end_index = end_offset >> PAGE_SHIFT; 35471c6fdbd8SKent Overstreet pgoff_t index = start_index; 35481c6fdbd8SKent Overstreet unsigned i; 3549543ef2ebSKent Overstreet loff_t ret; 3550543ef2ebSKent Overstreet int offset; 35511c6fdbd8SKent Overstreet 35521c6fdbd8SKent Overstreet folio_batch_init(&fbatch); 35531c6fdbd8SKent Overstreet 35541c6fdbd8SKent Overstreet while (filemap_get_folios(vinode->i_mapping, 35551c6fdbd8SKent Overstreet &index, end_index, &fbatch)) { 35561c6fdbd8SKent Overstreet for (i = 0; i < folio_batch_count(&fbatch); i++) { 35571c6fdbd8SKent Overstreet struct folio *folio = fbatch.folios[i]; 35581c6fdbd8SKent Overstreet 35591c6fdbd8SKent Overstreet folio_lock(folio); 3560543ef2ebSKent Overstreet offset = folio_data_offset(folio, 3561a86a92cbSKent Overstreet max(folio_pos(folio), start_offset) - 3562a86a92cbSKent Overstreet folio_pos(folio)); 3563543ef2ebSKent Overstreet if (offset >= 0) { 3564a86a92cbSKent Overstreet ret = clamp(folio_pos(folio) + offset, 3565543ef2ebSKent Overstreet start_offset, end_offset); 35661c6fdbd8SKent Overstreet folio_unlock(folio); 35671c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 3568543ef2ebSKent Overstreet return ret; 35691c6fdbd8SKent Overstreet } 35701c6fdbd8SKent Overstreet folio_unlock(folio); 35711c6fdbd8SKent Overstreet } 35721c6fdbd8SKent Overstreet folio_batch_release(&fbatch); 35731c6fdbd8SKent Overstreet cond_resched(); 35741c6fdbd8SKent Overstreet } 35751c6fdbd8SKent Overstreet 35761c6fdbd8SKent Overstreet return end_offset; 35771c6fdbd8SKent Overstreet } 35781c6fdbd8SKent Overstreet 35791c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 35801c6fdbd8SKent Overstreet { 35811c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 35821c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3583424eb881SKent Overstreet struct btree_trans trans; 358467e0dd8fSKent Overstreet struct btree_iter iter; 35851c6fdbd8SKent Overstreet struct bkey_s_c k; 35866fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 35871c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 35886fed42bbSKent Overstreet u32 snapshot; 35891c6fdbd8SKent Overstreet int ret; 35901c6fdbd8SKent Overstreet 35911c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 35921c6fdbd8SKent Overstreet if (offset >= isize) 35931c6fdbd8SKent Overstreet return -ENXIO; 35941c6fdbd8SKent Overstreet 359520bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 35966fed42bbSKent Overstreet retry: 35976fed42bbSKent Overstreet bch2_trans_begin(&trans); 35986fed42bbSKent Overstreet 35996fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 36006fed42bbSKent Overstreet if (ret) 36016fed42bbSKent Overstreet goto err; 3602424eb881SKent Overstreet 3603c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, 3604c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 3605c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 3606c72f687aSKent Overstreet 0, k, ret) { 3607c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 36081c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 36091c6fdbd8SKent Overstreet break; 36101c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 36111c6fdbd8SKent Overstreet break; 36121c6fdbd8SKent Overstreet } 361367e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 36146fed42bbSKent Overstreet err: 3615549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 36166fed42bbSKent Overstreet goto retry; 36171c6fdbd8SKent Overstreet 36189a796fdbSKent Overstreet bch2_trans_exit(&trans); 36191c6fdbd8SKent Overstreet if (ret) 36201c6fdbd8SKent Overstreet return ret; 36211c6fdbd8SKent Overstreet 36221c6fdbd8SKent Overstreet if (next_data > offset) 3623543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 36241c6fdbd8SKent Overstreet offset, next_data); 36251c6fdbd8SKent Overstreet 3626e10d3094SKent Overstreet if (next_data >= isize) 36271c6fdbd8SKent Overstreet return -ENXIO; 36281c6fdbd8SKent Overstreet 36291c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 36301c6fdbd8SKent Overstreet } 36311c6fdbd8SKent Overstreet 3632e8d28c3eSKent Overstreet static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) 36331c6fdbd8SKent Overstreet { 3634e8d28c3eSKent Overstreet struct folio *folio; 3635e8d28c3eSKent Overstreet struct bch_folio *s; 3636e8d28c3eSKent Overstreet unsigned i, sectors, f_offset; 3637e8d28c3eSKent Overstreet bool ret = true; 3638543ef2ebSKent Overstreet 3639e8d28c3eSKent Overstreet folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT); 3640e8d28c3eSKent Overstreet if (!folio) 3641e8d28c3eSKent Overstreet return true; 3642e8d28c3eSKent Overstreet 3643e8d28c3eSKent Overstreet s = bch2_folio(folio); 3644543ef2ebSKent Overstreet if (!s) 3645e8d28c3eSKent Overstreet goto unlock; 3646543ef2ebSKent Overstreet 3647e8d28c3eSKent Overstreet sectors = folio_sectors(folio); 3648e8d28c3eSKent Overstreet f_offset = *offset - folio_pos(folio); 3649543ef2ebSKent Overstreet 3650e8d28c3eSKent Overstreet for (i = f_offset >> 9; i < sectors; i++) 3651e8d28c3eSKent Overstreet if (s->s[i].state < SECTOR_DIRTY) { 3652e8d28c3eSKent Overstreet *offset = max(*offset, folio_pos(folio) + (i << 9)); 3653e8d28c3eSKent Overstreet goto unlock; 3654543ef2ebSKent Overstreet } 3655543ef2ebSKent Overstreet 3656e8d28c3eSKent Overstreet *offset = folio_end_pos(folio); 3657e8d28c3eSKent Overstreet ret = false; 3658e8d28c3eSKent Overstreet unlock: 365930bff594SKent Overstreet folio_unlock(folio); 36601c6fdbd8SKent Overstreet return ret; 36611c6fdbd8SKent Overstreet } 36621c6fdbd8SKent Overstreet 3663543ef2ebSKent Overstreet static loff_t bch2_seek_pagecache_hole(struct inode *vinode, 36641c6fdbd8SKent Overstreet loff_t start_offset, 36651c6fdbd8SKent Overstreet loff_t end_offset) 36661c6fdbd8SKent Overstreet { 36671c6fdbd8SKent Overstreet struct address_space *mapping = vinode->i_mapping; 3668e8d28c3eSKent Overstreet loff_t offset = start_offset; 36691c6fdbd8SKent Overstreet 3670e8d28c3eSKent Overstreet while (offset < end_offset && 3671e8d28c3eSKent Overstreet !folio_hole_offset(mapping, &offset)) 3672e8d28c3eSKent Overstreet ; 3673543ef2ebSKent Overstreet 3674e8d28c3eSKent Overstreet return min(offset, end_offset); 36751c6fdbd8SKent Overstreet } 36761c6fdbd8SKent Overstreet 36771c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 36781c6fdbd8SKent Overstreet { 36791c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 36801c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 3681424eb881SKent Overstreet struct btree_trans trans; 368267e0dd8fSKent Overstreet struct btree_iter iter; 36831c6fdbd8SKent Overstreet struct bkey_s_c k; 36846fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 36851c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 36866fed42bbSKent Overstreet u32 snapshot; 36871c6fdbd8SKent Overstreet int ret; 36881c6fdbd8SKent Overstreet 36891c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 36901c6fdbd8SKent Overstreet if (offset >= isize) 36911c6fdbd8SKent Overstreet return -ENXIO; 36921c6fdbd8SKent Overstreet 369320bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 36946fed42bbSKent Overstreet retry: 36956fed42bbSKent Overstreet bch2_trans_begin(&trans); 36966fed42bbSKent Overstreet 36976fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 36986fed42bbSKent Overstreet if (ret) 36996fed42bbSKent Overstreet goto err; 3700424eb881SKent Overstreet 3701e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 37026fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 370394f651e2SKent Overstreet BTREE_ITER_SLOTS, k, ret) { 37041c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 3705543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 37061c6fdbd8SKent Overstreet offset, MAX_LFS_FILESIZE); 37071c6fdbd8SKent Overstreet break; 37081c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 3709543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 37101c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 37111c6fdbd8SKent Overstreet k.k->p.offset << 9); 37121c6fdbd8SKent Overstreet 37131c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 37141c6fdbd8SKent Overstreet break; 37151c6fdbd8SKent Overstreet } else { 37161c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 37171c6fdbd8SKent Overstreet } 37181c6fdbd8SKent Overstreet } 371967e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 37206fed42bbSKent Overstreet err: 3721549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 37226fed42bbSKent Overstreet goto retry; 37231c6fdbd8SKent Overstreet 37249a796fdbSKent Overstreet bch2_trans_exit(&trans); 37251c6fdbd8SKent Overstreet if (ret) 37261c6fdbd8SKent Overstreet return ret; 37271c6fdbd8SKent Overstreet 37281c6fdbd8SKent Overstreet if (next_hole > isize) 37291c6fdbd8SKent Overstreet next_hole = isize; 37301c6fdbd8SKent Overstreet 37311c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 37321c6fdbd8SKent Overstreet } 37331c6fdbd8SKent Overstreet 37341c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 37351c6fdbd8SKent Overstreet { 37365c1ef830SKent Overstreet loff_t ret; 37375c1ef830SKent Overstreet 37381c6fdbd8SKent Overstreet switch (whence) { 37391c6fdbd8SKent Overstreet case SEEK_SET: 37401c6fdbd8SKent Overstreet case SEEK_CUR: 37411c6fdbd8SKent Overstreet case SEEK_END: 37425c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 37435c1ef830SKent Overstreet break; 37441c6fdbd8SKent Overstreet case SEEK_DATA: 37455c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 37465c1ef830SKent Overstreet break; 37471c6fdbd8SKent Overstreet case SEEK_HOLE: 37485c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 37495c1ef830SKent Overstreet break; 37505c1ef830SKent Overstreet default: 37515c1ef830SKent Overstreet ret = -EINVAL; 37525c1ef830SKent Overstreet break; 37531c6fdbd8SKent Overstreet } 37541c6fdbd8SKent Overstreet 37555c1ef830SKent Overstreet return bch2_err_class(ret); 37561c6fdbd8SKent Overstreet } 37571c6fdbd8SKent Overstreet 37581c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 37591c6fdbd8SKent Overstreet { 3760a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 37611c6fdbd8SKent Overstreet bioset_exit(&c->dio_write_bioset); 37621c6fdbd8SKent Overstreet bioset_exit(&c->dio_read_bioset); 37631c6fdbd8SKent Overstreet bioset_exit(&c->writepage_bioset); 37641c6fdbd8SKent Overstreet } 37651c6fdbd8SKent Overstreet 37661c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 37671c6fdbd8SKent Overstreet { 37681c6fdbd8SKent Overstreet int ret = 0; 37691c6fdbd8SKent Overstreet 37701c6fdbd8SKent Overstreet pr_verbose_init(c->opts, ""); 37711c6fdbd8SKent Overstreet 37721c6fdbd8SKent Overstreet if (bioset_init(&c->writepage_bioset, 37739a3df993SKent Overstreet 4, offsetof(struct bch_writepage_io, op.wbio.bio), 377465d48e35SKent Overstreet BIOSET_NEED_BVECS)) 377565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_writepage_bioset_init; 377665d48e35SKent Overstreet 377765d48e35SKent Overstreet if (bioset_init(&c->dio_read_bioset, 37781c6fdbd8SKent Overstreet 4, offsetof(struct dio_read, rbio.bio), 377965d48e35SKent Overstreet BIOSET_NEED_BVECS)) 378065d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_read_bioset_init; 378165d48e35SKent Overstreet 378265d48e35SKent Overstreet if (bioset_init(&c->dio_write_bioset, 37839a3df993SKent Overstreet 4, offsetof(struct dio_write, op.wbio.bio), 378465d48e35SKent Overstreet BIOSET_NEED_BVECS)) 378565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_dio_write_bioset_init; 378665d48e35SKent Overstreet 378765d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 3788a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 378965d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 37901c6fdbd8SKent Overstreet 37911c6fdbd8SKent Overstreet pr_verbose_init(c->opts, "ret %i", ret); 37921c6fdbd8SKent Overstreet return ret; 37931c6fdbd8SKent Overstreet } 37941c6fdbd8SKent Overstreet 37951c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 3796