11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 61c6fdbd8SKent Overstreet #include "btree_update.h" 71c6fdbd8SKent Overstreet #include "buckets.h" 81c6fdbd8SKent Overstreet #include "clock.h" 91c6fdbd8SKent Overstreet #include "error.h" 10e2d9912cSKent Overstreet #include "extents.h" 1108c07feaSKent Overstreet #include "extent_update.h" 121c6fdbd8SKent Overstreet #include "fs.h" 131c6fdbd8SKent Overstreet #include "fs-io.h" 14dbbfca9fSKent Overstreet #include "fs-io-buffered.h" 15dbbfca9fSKent Overstreet #include "fs-io-pagecache.h" 161c6fdbd8SKent Overstreet #include "fsck.h" 171c6fdbd8SKent Overstreet #include "inode.h" 181c6fdbd8SKent Overstreet #include "journal.h" 191c6fdbd8SKent Overstreet #include "io.h" 201c6fdbd8SKent Overstreet #include "keylist.h" 211c6fdbd8SKent Overstreet #include "quota.h" 2276426098SKent Overstreet #include "reflink.h" 231c6fdbd8SKent Overstreet #include "trace.h" 241c6fdbd8SKent Overstreet 251c6fdbd8SKent Overstreet #include <linux/aio.h> 261c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 271c6fdbd8SKent Overstreet #include <linux/falloc.h> 281c6fdbd8SKent Overstreet #include <linux/migrate.h> 291c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 301c6fdbd8SKent Overstreet #include <linux/pagevec.h> 319ba2eb25SKent Overstreet #include <linux/rmap.h> 321c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 331c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 341c6fdbd8SKent Overstreet #include <linux/uio.h> 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 371c6fdbd8SKent Overstreet 38a8b3a677SKent Overstreet struct nocow_flush { 39a8b3a677SKent Overstreet struct closure *cl; 40a8b3a677SKent Overstreet struct bch_dev *ca; 41a8b3a677SKent Overstreet struct bio bio; 42a8b3a677SKent Overstreet }; 43a8b3a677SKent Overstreet 44a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 45a8b3a677SKent Overstreet { 46a8b3a677SKent Overstreet 47a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 48a8b3a677SKent Overstreet 49a8b3a677SKent Overstreet closure_put(bio->cl); 50a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 51a8b3a677SKent Overstreet bio_put(&bio->bio); 52a8b3a677SKent Overstreet } 53a8b3a677SKent Overstreet 54dbbfca9fSKent Overstreet void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 55a8b3a677SKent Overstreet struct bch_inode_info *inode, 56a8b3a677SKent Overstreet struct closure *cl) 57a8b3a677SKent Overstreet { 58a8b3a677SKent Overstreet struct nocow_flush *bio; 59a8b3a677SKent Overstreet struct bch_dev *ca; 60a8b3a677SKent Overstreet struct bch_devs_mask devs; 61a8b3a677SKent Overstreet unsigned dev; 62a8b3a677SKent Overstreet 63a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 64a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 65a8b3a677SKent Overstreet return; 66a8b3a677SKent Overstreet 67a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 68a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 69a8b3a677SKent Overstreet 70a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 71a8b3a677SKent Overstreet rcu_read_lock(); 72a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 73a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 74a8b3a677SKent Overstreet ca = NULL; 75a8b3a677SKent Overstreet rcu_read_unlock(); 76a8b3a677SKent Overstreet 77a8b3a677SKent Overstreet if (!ca) 78a8b3a677SKent Overstreet continue; 79a8b3a677SKent Overstreet 80a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 81a8b3a677SKent Overstreet REQ_OP_FLUSH, 82a8b3a677SKent Overstreet GFP_KERNEL, 83a8b3a677SKent Overstreet &c->nocow_flush_bioset), 84a8b3a677SKent Overstreet struct nocow_flush, bio); 85a8b3a677SKent Overstreet bio->cl = cl; 86a8b3a677SKent Overstreet bio->ca = ca; 87a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 88a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 89a8b3a677SKent Overstreet } 90a8b3a677SKent Overstreet } 91a8b3a677SKent Overstreet 92a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 93a8b3a677SKent Overstreet struct bch_inode_info *inode) 94a8b3a677SKent Overstreet { 95a8b3a677SKent Overstreet struct closure cl; 96a8b3a677SKent Overstreet 97a8b3a677SKent Overstreet closure_init_stack(&cl); 98a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 99a8b3a677SKent Overstreet closure_sync(&cl); 100a8b3a677SKent Overstreet 101a8b3a677SKent Overstreet return 0; 102a8b3a677SKent Overstreet } 103a8b3a677SKent Overstreet 1041c6fdbd8SKent Overstreet /* i_size updates: */ 1051c6fdbd8SKent Overstreet 1062ea90048SKent Overstreet struct inode_new_size { 1072ea90048SKent Overstreet loff_t new_size; 1082ea90048SKent Overstreet u64 now; 1092ea90048SKent Overstreet unsigned fields; 1102ea90048SKent Overstreet }; 1112ea90048SKent Overstreet 112*791236b8SJoshua Ashton static int inode_set_size(struct btree_trans *trans, 113*791236b8SJoshua Ashton struct bch_inode_info *inode, 1141c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 1151c6fdbd8SKent Overstreet void *p) 1161c6fdbd8SKent Overstreet { 1172ea90048SKent Overstreet struct inode_new_size *s = p; 1181c6fdbd8SKent Overstreet 1192ea90048SKent Overstreet bi->bi_size = s->new_size; 1202ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 1212ea90048SKent Overstreet bi->bi_atime = s->now; 1222ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 1232ea90048SKent Overstreet bi->bi_mtime = s->now; 1242ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 1252ea90048SKent Overstreet bi->bi_ctime = s->now; 1261c6fdbd8SKent Overstreet 1271c6fdbd8SKent Overstreet return 0; 1281c6fdbd8SKent Overstreet } 1291c6fdbd8SKent Overstreet 13076426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 1311c6fdbd8SKent Overstreet struct bch_inode_info *inode, 1322ea90048SKent Overstreet loff_t new_size, unsigned fields) 1331c6fdbd8SKent Overstreet { 1342ea90048SKent Overstreet struct inode_new_size s = { 1352ea90048SKent Overstreet .new_size = new_size, 1362ea90048SKent Overstreet .now = bch2_current_time(c), 1372ea90048SKent Overstreet .fields = fields, 1382ea90048SKent Overstreet }; 1392ea90048SKent Overstreet 1402ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 1411c6fdbd8SKent Overstreet } 1421c6fdbd8SKent Overstreet 143dbbfca9fSKent Overstreet void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 144190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 1451c6fdbd8SKent Overstreet { 146b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 147b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 148b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 149b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 150b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 151b44a66a6SKent Overstreet 1521c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 153cb1b479dSKent Overstreet if (quota_res && 154cb1b479dSKent Overstreet !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && 155cb1b479dSKent Overstreet sectors > 0) { 1561c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 1571c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 1581c6fdbd8SKent Overstreet 1591c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 1601c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 1611c6fdbd8SKent Overstreet } else { 16226609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 1631c6fdbd8SKent Overstreet } 1641c6fdbd8SKent Overstreet #endif 1656b1b186aSKent Overstreet } 1666b1b186aSKent Overstreet 1671c6fdbd8SKent Overstreet /* fsync: */ 1681c6fdbd8SKent Overstreet 16968a2054dSKent Overstreet /* 17068a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 17168a2054dSKent Overstreet * insert trigger: look up the btree inode instead 17268a2054dSKent Overstreet */ 173a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 174a8b3a677SKent Overstreet struct bch_inode_info *inode) 17568a2054dSKent Overstreet { 176a8b3a677SKent Overstreet struct bch_inode_unpacked u; 17768a2054dSKent Overstreet int ret; 17868a2054dSKent Overstreet 17968a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 18068a2054dSKent Overstreet return 0; 18168a2054dSKent Overstreet 182a8b3a677SKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 18368a2054dSKent Overstreet if (ret) 18468a2054dSKent Overstreet return ret; 18568a2054dSKent Overstreet 186a8b3a677SKent Overstreet return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 187a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 18868a2054dSKent Overstreet } 18968a2054dSKent Overstreet 1901c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1911c6fdbd8SKent Overstreet { 1921c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 1931c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 19468a2054dSKent Overstreet int ret, ret2, ret3; 1951c6fdbd8SKent Overstreet 1961c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 19768a2054dSKent Overstreet ret2 = sync_inode_metadata(&inode->v, 1); 198a8b3a677SKent Overstreet ret3 = bch2_flush_inode(c, inode); 1991c6fdbd8SKent Overstreet 2005c1ef830SKent Overstreet return bch2_err_class(ret ?: ret2 ?: ret3); 2011c6fdbd8SKent Overstreet } 2021c6fdbd8SKent Overstreet 2031c6fdbd8SKent Overstreet /* truncate: */ 2041c6fdbd8SKent Overstreet 2056fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 2061c6fdbd8SKent Overstreet struct bpos start, 2071c6fdbd8SKent Overstreet struct bpos end) 2081c6fdbd8SKent Overstreet { 209424eb881SKent Overstreet struct btree_trans trans; 21067e0dd8fSKent Overstreet struct btree_iter iter; 2111c6fdbd8SKent Overstreet struct bkey_s_c k; 2121c6fdbd8SKent Overstreet int ret = 0; 2131c6fdbd8SKent Overstreet 21420bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 2156fed42bbSKent Overstreet retry: 2166fed42bbSKent Overstreet bch2_trans_begin(&trans); 2176fed42bbSKent Overstreet 2186fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); 2196fed42bbSKent Overstreet if (ret) 2206fed42bbSKent Overstreet goto err; 221424eb881SKent Overstreet 222c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 2234ad6aa46SBrian Foster if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { 2241c6fdbd8SKent Overstreet ret = 1; 2251c6fdbd8SKent Overstreet break; 2261c6fdbd8SKent Overstreet } 2276fed42bbSKent Overstreet start = iter.pos; 22867e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 2296fed42bbSKent Overstreet err: 230549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2316fed42bbSKent Overstreet goto retry; 2321c6fdbd8SKent Overstreet 2339a796fdbSKent Overstreet bch2_trans_exit(&trans); 2349a796fdbSKent Overstreet return ret; 2351c6fdbd8SKent Overstreet } 2361c6fdbd8SKent Overstreet 237959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 2381c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 2391c6fdbd8SKent Overstreet { 2401c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 2411c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 2423342ac13SKent Overstreet struct bch_folio *s; 2431c6fdbd8SKent Overstreet unsigned start_offset = start & (PAGE_SIZE - 1); 2441c6fdbd8SKent Overstreet unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; 245a99b1cafSKent Overstreet unsigned i; 24630bff594SKent Overstreet struct folio *folio; 247b19d307dSKent Overstreet s64 i_sectors_delta = 0; 2481c6fdbd8SKent Overstreet int ret = 0; 2496b9857b2SBrian Foster u64 end_pos; 2501c6fdbd8SKent Overstreet 25130bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 252b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) { 2531c6fdbd8SKent Overstreet /* 2541c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 25530bff594SKent Overstreet * folio 2561c6fdbd8SKent Overstreet */ 2576fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 258c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 259c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 2601c6fdbd8SKent Overstreet if (ret <= 0) 2611c6fdbd8SKent Overstreet return ret; 2621c6fdbd8SKent Overstreet 26330bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 26430bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 2651e81f89bSKent Overstreet if (IS_ERR_OR_NULL(folio)) { 2661c6fdbd8SKent Overstreet ret = -ENOMEM; 2671c6fdbd8SKent Overstreet goto out; 2681c6fdbd8SKent Overstreet } 2691c6fdbd8SKent Overstreet } 2701c6fdbd8SKent Overstreet 271959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 272959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 273959f7368SKent Overstreet 274959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 2756b9857b2SBrian Foster end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio); 276959f7368SKent Overstreet 277959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 278959f7368SKent Overstreet if (start_offset == 0 && 279959f7368SKent Overstreet end_offset == folio_size(folio)) { 280959f7368SKent Overstreet ret = 0; 281959f7368SKent Overstreet goto unlock; 282959f7368SKent Overstreet } 283959f7368SKent Overstreet 28430bff594SKent Overstreet s = bch2_folio_create(folio, 0); 285a99b1cafSKent Overstreet if (!s) { 286a99b1cafSKent Overstreet ret = -ENOMEM; 287a99b1cafSKent Overstreet goto unlock; 288a99b1cafSKent Overstreet } 289a99b1cafSKent Overstreet 29030bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 29130bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 2921c6fdbd8SKent Overstreet if (ret) 2931c6fdbd8SKent Overstreet goto unlock; 2941c6fdbd8SKent Overstreet } 2951c6fdbd8SKent Overstreet 29634fdcf06SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 29734fdcf06SKent Overstreet if (ret) 29834fdcf06SKent Overstreet goto unlock; 299c437e153SKent Overstreet 300a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 301a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 302a99b1cafSKent Overstreet i++) { 303a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 304a1774a05SKent Overstreet 305a1774a05SKent Overstreet i_sectors_delta -= s->s[i].state == SECTOR_dirty; 306dbbfca9fSKent Overstreet bch2_folio_sector_set(folio, s, i, SECTOR_unallocated); 307a99b1cafSKent Overstreet } 308a99b1cafSKent Overstreet 309dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 310b19d307dSKent Overstreet 31174163da7SKent Overstreet /* 31230bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 31374163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 3144ad6aa46SBrian Foster * be responsible for the i_size update. 3154ad6aa46SBrian Foster * 3164ad6aa46SBrian Foster * Note that we shouldn't ever see a folio beyond EOF, but check and 3174ad6aa46SBrian Foster * warn if so. This has been observed by failure to clean up folios 3184ad6aa46SBrian Foster * after a short write and there's still a chance reclaim will fix 3194ad6aa46SBrian Foster * things up. 32074163da7SKent Overstreet */ 3214ad6aa46SBrian Foster WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size); 3224ad6aa46SBrian Foster end_pos = folio_end_pos(folio); 3234ad6aa46SBrian Foster if (inode->v.i_size > folio_pos(folio)) 3246b9857b2SBrian Foster end_pos = min_t(u64, inode->v.i_size, end_pos); 325bf98ee10SBrian Foster ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty; 32674163da7SKent Overstreet 32730bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 328a99b1cafSKent Overstreet 3291c6fdbd8SKent Overstreet /* 3301c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 3311c6fdbd8SKent Overstreet * 33230bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 3331c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 3341c6fdbd8SKent Overstreet */ 33530bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 3361c6fdbd8SKent Overstreet 3379ba2eb25SKent Overstreet /* 3389ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 3399ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 3409ba2eb25SKent Overstreet * redirty the full page: 3419ba2eb25SKent Overstreet */ 34230bff594SKent Overstreet folio_mkclean(folio); 34330bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 3441c6fdbd8SKent Overstreet unlock: 34530bff594SKent Overstreet folio_unlock(folio); 34630bff594SKent Overstreet folio_put(folio); 3471c6fdbd8SKent Overstreet out: 3481c6fdbd8SKent Overstreet return ret; 3491c6fdbd8SKent Overstreet } 3501c6fdbd8SKent Overstreet 351959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 3521c6fdbd8SKent Overstreet { 353959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 354959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 3551c6fdbd8SKent Overstreet } 3561c6fdbd8SKent Overstreet 357959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 35874163da7SKent Overstreet loff_t start, loff_t end) 35974163da7SKent Overstreet { 360959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 36174163da7SKent Overstreet start, end); 36274163da7SKent Overstreet 36374163da7SKent Overstreet if (ret >= 0 && 36474163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 365959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 366959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 36774163da7SKent Overstreet start, end); 36874163da7SKent Overstreet return ret; 36974163da7SKent Overstreet } 37074163da7SKent Overstreet 37168a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 37268a507a2SKent Overstreet struct bch_inode_info *inode, 373e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 374e0541a93SKent Overstreet struct iattr *iattr) 3751c6fdbd8SKent Overstreet { 3761c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 3771c6fdbd8SKent Overstreet int ret; 3781c6fdbd8SKent Overstreet 379e0541a93SKent Overstreet /* 380e0541a93SKent Overstreet * sync appends: 3812925fc49SKent Overstreet * 3822925fc49SKent Overstreet * this has to be done _before_ extending i_size: 383e0541a93SKent Overstreet */ 384e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 3851c6fdbd8SKent Overstreet if (ret) 3861c6fdbd8SKent Overstreet return ret; 3871c6fdbd8SKent Overstreet 3881c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 3891c6fdbd8SKent Overstreet 39068a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 3911c6fdbd8SKent Overstreet } 3921c6fdbd8SKent Overstreet 393*791236b8SJoshua Ashton static int bch2_truncate_finish_fn(struct btree_trans *trans, 394*791236b8SJoshua Ashton struct bch_inode_info *inode, 39554e2264eSKent Overstreet struct bch_inode_unpacked *bi, 39654e2264eSKent Overstreet void *p) 39754e2264eSKent Overstreet { 39854e2264eSKent Overstreet bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 39954e2264eSKent Overstreet return 0; 40054e2264eSKent Overstreet } 40154e2264eSKent Overstreet 402*791236b8SJoshua Ashton static int bch2_truncate_start_fn(struct btree_trans *trans, 403*791236b8SJoshua Ashton struct bch_inode_info *inode, 40454e2264eSKent Overstreet struct bch_inode_unpacked *bi, void *p) 40554e2264eSKent Overstreet { 40654e2264eSKent Overstreet u64 *new_i_size = p; 40754e2264eSKent Overstreet 40854e2264eSKent Overstreet bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 40954e2264eSKent Overstreet bi->bi_size = *new_i_size; 41054e2264eSKent Overstreet return 0; 41154e2264eSKent Overstreet } 41254e2264eSKent Overstreet 41368a507a2SKent Overstreet int bch2_truncate(struct mnt_idmap *idmap, 41468a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 4151c6fdbd8SKent Overstreet { 4161c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 4171c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 418e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 41954e2264eSKent Overstreet u64 new_i_size = iattr->ia_size; 4202e87eae1SKent Overstreet s64 i_sectors_delta = 0; 4211c6fdbd8SKent Overstreet int ret = 0; 4221c6fdbd8SKent Overstreet 42368a507a2SKent Overstreet /* 42478d66ab1SDan Robertson * If the truncate call with change the size of the file, the 42578d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 42678d66ab1SDan Robertson * do not need to update the cmtimes. 42768a507a2SKent Overstreet */ 42878d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 42968a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 43068a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 43168a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 43268a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 43368a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 43478d66ab1SDan Robertson } 43568a507a2SKent Overstreet 4361c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 437a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 4381c6fdbd8SKent Overstreet 4396fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 440e0541a93SKent Overstreet if (ret) 441e0541a93SKent Overstreet goto err; 4421c6fdbd8SKent Overstreet 443c45d473dSKent Overstreet /* 444c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 445c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 446c45d473dSKent Overstreet * before the inode). 447c45d473dSKent Overstreet */ 448c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 449c45d473dSKent Overstreet if (ret) 450c45d473dSKent Overstreet goto err; 451c45d473dSKent Overstreet 4528eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 4538eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 4548eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 4558eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 456e0541a93SKent Overstreet 457e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 45868a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 45954e2264eSKent Overstreet goto err; 4601c6fdbd8SKent Overstreet } 4611c6fdbd8SKent Overstreet 46268a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 46368a507a2SKent Overstreet 464959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 46574163da7SKent Overstreet if (unlikely(ret < 0)) 46654e2264eSKent Overstreet goto err; 4671c6fdbd8SKent Overstreet 4686cc3535dSKent Overstreet /* 4696cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 4706cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 4716cc3535dSKent Overstreet * i_size first: 4726cc3535dSKent Overstreet * 4736cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 4746cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 4756cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 4766cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 4776cc3535dSKent Overstreet */ 478e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 4791c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 480e0541a93SKent Overstreet inode_u.bi_size, 4811c6fdbd8SKent Overstreet iattr->ia_size - 1); 4821c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 4831c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 4841c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 4851c6fdbd8SKent Overstreet iattr->ia_size - 1); 4861c6fdbd8SKent Overstreet if (ret) 48754e2264eSKent Overstreet goto err; 4881c6fdbd8SKent Overstreet 48954e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 49054e2264eSKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 49154e2264eSKent Overstreet &new_i_size, 0); 49254e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 4931c6fdbd8SKent Overstreet 4941c6fdbd8SKent Overstreet if (unlikely(ret)) 49554e2264eSKent Overstreet goto err; 4961c6fdbd8SKent Overstreet 4971c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 4981c6fdbd8SKent Overstreet 4998c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 500a99b1cafSKent Overstreet round_up(iattr->ia_size, block_bytes(c)) >> 9, 50168a2054dSKent Overstreet U64_MAX, &i_sectors_delta); 502dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 5032e87eae1SKent Overstreet 504b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 505b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 506b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 507b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 508b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 5091c6fdbd8SKent Overstreet if (unlikely(ret)) 51054e2264eSKent Overstreet goto err; 5111c6fdbd8SKent Overstreet 51254e2264eSKent Overstreet mutex_lock(&inode->ei_update_lock); 51368a507a2SKent Overstreet ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 51454e2264eSKent Overstreet mutex_unlock(&inode->ei_update_lock); 51568a507a2SKent Overstreet 51668a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 51754e2264eSKent Overstreet err: 518a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 5195c1ef830SKent Overstreet return bch2_err_class(ret); 5201c6fdbd8SKent Overstreet } 5211c6fdbd8SKent Overstreet 5221c6fdbd8SKent Overstreet /* fallocate: */ 5231c6fdbd8SKent Overstreet 524*791236b8SJoshua Ashton static int inode_update_times_fn(struct btree_trans *trans, 525*791236b8SJoshua Ashton struct bch_inode_info *inode, 526050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 527050197b1SKent Overstreet { 528050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 529050197b1SKent Overstreet 530050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 531050197b1SKent Overstreet return 0; 532050197b1SKent Overstreet } 533050197b1SKent Overstreet 5342e87eae1SKent Overstreet static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 5351c6fdbd8SKent Overstreet { 5361c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 53774163da7SKent Overstreet u64 end = offset + len; 53874163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 53974163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 54074163da7SKent Overstreet bool truncated_last_page; 5411c6fdbd8SKent Overstreet int ret = 0; 5421c6fdbd8SKent Overstreet 543959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 54474163da7SKent Overstreet if (unlikely(ret < 0)) 5451c6fdbd8SKent Overstreet goto err; 5461c6fdbd8SKent Overstreet 54774163da7SKent Overstreet truncated_last_page = ret; 5481c6fdbd8SKent Overstreet 54974163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 5501c6fdbd8SKent Overstreet 55174163da7SKent Overstreet if (block_start < block_end) { 5522e87eae1SKent Overstreet s64 i_sectors_delta = 0; 5532e87eae1SKent Overstreet 5548c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 55574163da7SKent Overstreet block_start >> 9, block_end >> 9, 5562e87eae1SKent Overstreet &i_sectors_delta); 557dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 5582e87eae1SKent Overstreet } 559050197b1SKent Overstreet 560050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 56174163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 56274163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 56374163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 56474163da7SKent Overstreet } else { 565050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 56674163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 56774163da7SKent Overstreet } 568050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 5691c6fdbd8SKent Overstreet err: 5701c6fdbd8SKent Overstreet return ret; 5711c6fdbd8SKent Overstreet } 5721c6fdbd8SKent Overstreet 5732e87eae1SKent Overstreet static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 5745f786787SKent Overstreet loff_t offset, loff_t len, 5755f786787SKent Overstreet bool insert) 5761c6fdbd8SKent Overstreet { 5771c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 5781c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 57907a1006aSKent Overstreet struct bkey_buf copy; 580d69f41d6SKent Overstreet struct btree_trans trans; 58167e0dd8fSKent Overstreet struct btree_iter src, dst, del; 5825f786787SKent Overstreet loff_t shift, new_size; 5835f786787SKent Overstreet u64 src_start; 58450dc0f69SKent Overstreet int ret = 0; 5851c6fdbd8SKent Overstreet 5861c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 5871c6fdbd8SKent Overstreet return -EINVAL; 5881c6fdbd8SKent Overstreet 5895f786787SKent Overstreet if (insert) { 5905f786787SKent Overstreet if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 59174163da7SKent Overstreet return -EFBIG; 5925f786787SKent Overstreet 5935f786787SKent Overstreet if (offset >= inode->v.i_size) 59474163da7SKent Overstreet return -EINVAL; 5955f786787SKent Overstreet 5965f786787SKent Overstreet src_start = U64_MAX; 5975f786787SKent Overstreet shift = len; 5985f786787SKent Overstreet } else { 5991c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 60074163da7SKent Overstreet return -EINVAL; 6011c6fdbd8SKent Overstreet 6025f786787SKent Overstreet src_start = offset + len; 6035f786787SKent Overstreet shift = -len; 6045f786787SKent Overstreet } 6051c6fdbd8SKent Overstreet 6065f786787SKent Overstreet new_size = inode->v.i_size + shift; 6071c6fdbd8SKent Overstreet 608dbbfca9fSKent Overstreet ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 6091c6fdbd8SKent Overstreet if (ret) 61074163da7SKent Overstreet return ret; 6111c6fdbd8SKent Overstreet 6125f786787SKent Overstreet if (insert) { 6135f786787SKent Overstreet i_size_write(&inode->v, new_size); 6145f786787SKent Overstreet mutex_lock(&inode->ei_update_lock); 6155f786787SKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 6165f786787SKent Overstreet ATTR_MTIME|ATTR_CTIME); 6175f786787SKent Overstreet mutex_unlock(&inode->ei_update_lock); 6185f786787SKent Overstreet } else { 6192e87eae1SKent Overstreet s64 i_sectors_delta = 0; 6202e87eae1SKent Overstreet 6218c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 6222e87eae1SKent Overstreet offset >> 9, (offset + len) >> 9, 6232e87eae1SKent Overstreet &i_sectors_delta); 624dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 6252e87eae1SKent Overstreet 62663095894SKent Overstreet if (ret) 62774163da7SKent Overstreet return ret; 6285f786787SKent Overstreet } 6298ef231bdSKent Overstreet 63050dc0f69SKent Overstreet bch2_bkey_buf_init(©); 631f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 63267e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 6335f786787SKent Overstreet POS(inode->v.i_ino, src_start >> 9), 63463095894SKent Overstreet BTREE_ITER_INTENT); 63567e0dd8fSKent Overstreet bch2_trans_copy_iter(&dst, &src); 63667e0dd8fSKent Overstreet bch2_trans_copy_iter(&del, &src); 6375f786787SKent Overstreet 638549d173cSKent Overstreet while (ret == 0 || 639549d173cSKent Overstreet bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 64063095894SKent Overstreet struct disk_reservation disk_res = 64163095894SKent Overstreet bch2_disk_reservation_init(c, 0); 64263095894SKent Overstreet struct bkey_i delete; 64363095894SKent Overstreet struct bkey_s_c k; 64463095894SKent Overstreet struct bpos next_pos; 6455f786787SKent Overstreet struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 6465f786787SKent Overstreet struct bpos atomic_end; 6472d594dfbSKent Overstreet unsigned trigger_flags = 0; 6486fed42bbSKent Overstreet u32 snapshot; 6496fed42bbSKent Overstreet 6506fed42bbSKent Overstreet bch2_trans_begin(&trans); 6516fed42bbSKent Overstreet 6526fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 6536fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 6546fed42bbSKent Overstreet if (ret) 6556fed42bbSKent Overstreet continue; 6566fed42bbSKent Overstreet 6576fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&src, snapshot); 6586fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&dst, snapshot); 6596fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&del, snapshot); 66063095894SKent Overstreet 661700c25b3SKent Overstreet bch2_trans_begin(&trans); 662700c25b3SKent Overstreet 6635f786787SKent Overstreet k = insert 66467e0dd8fSKent Overstreet ? bch2_btree_iter_peek_prev(&src) 665c72f687aSKent Overstreet : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 66663095894SKent Overstreet if ((ret = bkey_err(k))) 66750dc0f69SKent Overstreet continue; 66863095894SKent Overstreet 66963095894SKent Overstreet if (!k.k || k.k->p.inode != inode->v.i_ino) 67063095894SKent Overstreet break; 67163095894SKent Overstreet 6725f786787SKent Overstreet if (insert && 673e88a75ebSKent Overstreet bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 6745f786787SKent Overstreet break; 6755f786787SKent Overstreet reassemble: 67607a1006aSKent Overstreet bch2_bkey_buf_reassemble(©, c, k); 6775f786787SKent Overstreet 6785f786787SKent Overstreet if (insert && 679e88a75ebSKent Overstreet bkey_lt(bkey_start_pos(k.k), move_pos)) 68035189e09SKent Overstreet bch2_cut_front(move_pos, copy.k); 6815f786787SKent Overstreet 68235189e09SKent Overstreet copy.k->k.p.offset += shift >> 9; 68367e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); 6841c6fdbd8SKent Overstreet 68567e0dd8fSKent Overstreet ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 6863c7f3b7aSKent Overstreet if (ret) 68750dc0f69SKent Overstreet continue; 688e2d9912cSKent Overstreet 689e88a75ebSKent Overstreet if (!bkey_eq(atomic_end, copy.k->k.p)) { 6905f786787SKent Overstreet if (insert) { 6915f786787SKent Overstreet move_pos = atomic_end; 6925f786787SKent Overstreet move_pos.offset -= shift >> 9; 6935f786787SKent Overstreet goto reassemble; 6945f786787SKent Overstreet } else { 695085ab693SKent Overstreet bch2_cut_back(atomic_end, copy.k); 6965f786787SKent Overstreet } 6975f786787SKent Overstreet } 6985f786787SKent Overstreet 69963095894SKent Overstreet bkey_init(&delete.k); 700283eda57SKent Overstreet delete.k.p = copy.k->k.p; 701283eda57SKent Overstreet delete.k.size = copy.k->k.size; 702283eda57SKent Overstreet delete.k.p.offset -= shift >> 9; 70367e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 7041c6fdbd8SKent Overstreet 7055f786787SKent Overstreet next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 70663095894SKent Overstreet 7077c4ca54aSKent Overstreet if (copy.k->k.size != k.k->size) { 70863095894SKent Overstreet /* We might end up splitting compressed extents: */ 70963095894SKent Overstreet unsigned nr_ptrs = 7104de77495SKent Overstreet bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 71163095894SKent Overstreet 71263095894SKent Overstreet ret = bch2_disk_reservation_get(c, &disk_res, 71335189e09SKent Overstreet copy.k->k.size, nr_ptrs, 7141c6fdbd8SKent Overstreet BCH_DISK_RESERVATION_NOFAIL); 7151c6fdbd8SKent Overstreet BUG_ON(ret); 71663095894SKent Overstreet } 7171c6fdbd8SKent Overstreet 71867e0dd8fSKent Overstreet ret = bch2_btree_iter_traverse(&del) ?: 71967e0dd8fSKent Overstreet bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 72067e0dd8fSKent Overstreet bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 72168a2054dSKent Overstreet bch2_trans_commit(&trans, &disk_res, NULL, 7222d594dfbSKent Overstreet BTREE_INSERT_NOFAIL); 7231c6fdbd8SKent Overstreet bch2_disk_reservation_put(c, &disk_res); 72450dc0f69SKent Overstreet 72563095894SKent Overstreet if (!ret) 72667e0dd8fSKent Overstreet bch2_btree_iter_set_pos(&src, next_pos); 72750dc0f69SKent Overstreet } 72867e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &del); 72967e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &dst); 73067e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &src); 73150dc0f69SKent Overstreet bch2_trans_exit(&trans); 73250dc0f69SKent Overstreet bch2_bkey_buf_exit(©, c); 73363095894SKent Overstreet 7348ef231bdSKent Overstreet if (ret) 73574163da7SKent Overstreet return ret; 7361c6fdbd8SKent Overstreet 73774163da7SKent Overstreet mutex_lock(&inode->ei_update_lock); 7385f786787SKent Overstreet if (!insert) { 7398ef231bdSKent Overstreet i_size_write(&inode->v, new_size); 7408ef231bdSKent Overstreet ret = bch2_write_inode_size(c, inode, new_size, 7418ef231bdSKent Overstreet ATTR_MTIME|ATTR_CTIME); 74274163da7SKent Overstreet } else { 74374163da7SKent Overstreet /* We need an inode update to update bi_journal_seq for fsync: */ 74474163da7SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 74574163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 7465f786787SKent Overstreet } 74774163da7SKent Overstreet mutex_unlock(&inode->ei_update_lock); 7481c6fdbd8SKent Overstreet return ret; 7491c6fdbd8SKent Overstreet } 7501c6fdbd8SKent Overstreet 751694015c2SKent Overstreet static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 752694015c2SKent Overstreet u64 start_sector, u64 end_sector) 7531c6fdbd8SKent Overstreet { 7541c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 755190fa7afSKent Overstreet struct btree_trans trans; 75667e0dd8fSKent Overstreet struct btree_iter iter; 757694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 75801ad6737SKent Overstreet struct bch_io_opts opts; 759694015c2SKent Overstreet int ret = 0; 7601c6fdbd8SKent Overstreet 76101ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 762f7beb4caSKent Overstreet bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); 7631c6fdbd8SKent Overstreet 76467e0dd8fSKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 765694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 766190fa7afSKent Overstreet BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 7671c6fdbd8SKent Overstreet 768e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 7692e87eae1SKent Overstreet s64 i_sectors_delta = 0; 770190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 7711c6fdbd8SKent Overstreet struct bkey_s_c k; 772694015c2SKent Overstreet unsigned sectors; 773a09818c7SKent Overstreet bool is_allocation; 774a09818c7SKent Overstreet u64 hole_start, hole_end; 7756fed42bbSKent Overstreet u32 snapshot; 7761c6fdbd8SKent Overstreet 777163e885aSKent Overstreet bch2_trans_begin(&trans); 778a8abd3a7SKent Overstreet 7796fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, 7806fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 7816fed42bbSKent Overstreet if (ret) 7826fed42bbSKent Overstreet goto bkey_err; 7836fed42bbSKent Overstreet 7846fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 7856fed42bbSKent Overstreet 78667e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 7870f238367SKent Overstreet if ((ret = bkey_err(k))) 7880f238367SKent Overstreet goto bkey_err; 7891c6fdbd8SKent Overstreet 790a09818c7SKent Overstreet hole_start = iter.pos.offset; 791a09818c7SKent Overstreet hole_end = bpos_min(k.k->p, end_pos).offset; 792a09818c7SKent Overstreet is_allocation = bkey_extent_is_allocation(k.k); 793a09818c7SKent Overstreet 7941c6fdbd8SKent Overstreet /* already reserved */ 79579203111SKent Overstreet if (bkey_extent_is_reservation(k) && 79679203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 79767e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 7981c6fdbd8SKent Overstreet continue; 7991c6fdbd8SKent Overstreet } 8001c6fdbd8SKent Overstreet 801190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 802190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 80367e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 8041c6fdbd8SKent Overstreet continue; 8051c6fdbd8SKent Overstreet } 8061c6fdbd8SKent Overstreet 807a09818c7SKent Overstreet if (!(mode & FALLOC_FL_ZERO_RANGE)) { 8084198bf03SKent Overstreet /* 8094198bf03SKent Overstreet * Lock ordering - can't be holding btree locks while 8104198bf03SKent Overstreet * blocking on a folio lock: 8114198bf03SKent Overstreet */ 8124198bf03SKent Overstreet if (bch2_clamp_data_hole(&inode->v, 8134198bf03SKent Overstreet &hole_start, 8144198bf03SKent Overstreet &hole_end, 8154198bf03SKent Overstreet opts.data_replicas, true)) 816a09818c7SKent Overstreet ret = drop_locks_do(&trans, 817a09818c7SKent Overstreet (bch2_clamp_data_hole(&inode->v, 818a09818c7SKent Overstreet &hole_start, 819a09818c7SKent Overstreet &hole_end, 8204198bf03SKent Overstreet opts.data_replicas, false), 0)); 821a09818c7SKent Overstreet bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); 822a8b3a677SKent Overstreet 823a09818c7SKent Overstreet if (ret) 824a09818c7SKent Overstreet goto bkey_err; 8251c6fdbd8SKent Overstreet 826a09818c7SKent Overstreet if (hole_start == hole_end) 827a09818c7SKent Overstreet continue; 828a09818c7SKent Overstreet } 829a09818c7SKent Overstreet 830a09818c7SKent Overstreet sectors = hole_end - hole_start; 831a09818c7SKent Overstreet 832a09818c7SKent Overstreet if (!is_allocation) { 8331c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 834a09818c7SKent Overstreet "a_res, sectors, true); 8351c6fdbd8SKent Overstreet if (unlikely(ret)) 8360f238367SKent Overstreet goto bkey_err; 8371c6fdbd8SKent Overstreet } 8381c6fdbd8SKent Overstreet 83970de7a47SKent Overstreet ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, 84070de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 84170de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 8428810386fSKent Overstreet if (ret) 8438810386fSKent Overstreet goto bkey_err; 84470de7a47SKent Overstreet 845dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); 846a09818c7SKent Overstreet 847a09818c7SKent Overstreet drop_locks_do(&trans, 848dbbfca9fSKent Overstreet (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); 8490f238367SKent Overstreet bkey_err: 850190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 851549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 8521c6fdbd8SKent Overstreet ret = 0; 85350dc0f69SKent Overstreet } 85474163da7SKent Overstreet 855098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 85674163da7SKent Overstreet struct quota_res quota_res = { 0 }; 85774163da7SKent Overstreet s64 i_sectors_delta = 0; 85874163da7SKent Overstreet 85974163da7SKent Overstreet bch2_fpunch_at(&trans, &iter, inode_inum(inode), 86074163da7SKent Overstreet end_sector, &i_sectors_delta); 861dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); 86274163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 86374163da7SKent Overstreet } 86474163da7SKent Overstreet 86567e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 866694015c2SKent Overstreet bch2_trans_exit(&trans); 867694015c2SKent Overstreet return ret; 868694015c2SKent Overstreet } 86950dc0f69SKent Overstreet 870694015c2SKent Overstreet static long bchfs_fallocate(struct bch_inode_info *inode, int mode, 871694015c2SKent Overstreet loff_t offset, loff_t len) 872694015c2SKent Overstreet { 873694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 87474163da7SKent Overstreet u64 end = offset + len; 87574163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 87674163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 87774163da7SKent Overstreet bool truncated_last_page = false; 87874163da7SKent Overstreet int ret, ret2 = 0; 879694015c2SKent Overstreet 880694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 881694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 882694015c2SKent Overstreet if (ret) 88374163da7SKent Overstreet return ret; 884694015c2SKent Overstreet } 885694015c2SKent Overstreet 886694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 887959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 88874163da7SKent Overstreet if (unlikely(ret < 0)) 88974163da7SKent Overstreet return ret; 890694015c2SKent Overstreet 89174163da7SKent Overstreet truncated_last_page = ret; 892694015c2SKent Overstreet 893694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 89474163da7SKent Overstreet 89574163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 89674163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 897694015c2SKent Overstreet } 898694015c2SKent Overstreet 899694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 900e0541a93SKent Overstreet 901e0541a93SKent Overstreet /* 90274163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 90374163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 904e0541a93SKent Overstreet */ 90574163da7SKent Overstreet if (ret && 906098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 90774163da7SKent Overstreet return ret; 9081c6fdbd8SKent Overstreet 90974163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 910e0541a93SKent Overstreet end = inode->v.i_size; 91174163da7SKent Overstreet 91274163da7SKent Overstreet if (end >= inode->v.i_size && 91374163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 91474163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 91574163da7SKent Overstreet spin_lock(&inode->v.i_lock); 916e0541a93SKent Overstreet i_size_write(&inode->v, end); 91774163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 918e0541a93SKent Overstreet 9191c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 92074163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 9211c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 9221c6fdbd8SKent Overstreet } 92374163da7SKent Overstreet 92474163da7SKent Overstreet return ret ?: ret2; 9251c6fdbd8SKent Overstreet } 9261c6fdbd8SKent Overstreet 9271c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 9281c6fdbd8SKent Overstreet loff_t offset, loff_t len) 9291c6fdbd8SKent Overstreet { 9301c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 9312a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 9322a9101a9SKent Overstreet long ret; 9332a9101a9SKent Overstreet 934d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 9352a9101a9SKent Overstreet return -EROFS; 9361c6fdbd8SKent Overstreet 93774163da7SKent Overstreet inode_lock(&inode->v); 93874163da7SKent Overstreet inode_dio_wait(&inode->v); 939a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 94074163da7SKent Overstreet 94107bfcc0bSKent Overstreet ret = file_modified(file); 94207bfcc0bSKent Overstreet if (ret) 94307bfcc0bSKent Overstreet goto err; 94407bfcc0bSKent Overstreet 9451c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 9462a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 9472a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 9482a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 9492a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 9502a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 9512a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 9522a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 9532a9101a9SKent Overstreet else 9542a9101a9SKent Overstreet ret = -EOPNOTSUPP; 95507bfcc0bSKent Overstreet err: 956a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 95774163da7SKent Overstreet inode_unlock(&inode->v); 958d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 9591c6fdbd8SKent Overstreet 9605c1ef830SKent Overstreet return bch2_err_class(ret); 9611c6fdbd8SKent Overstreet } 9621c6fdbd8SKent Overstreet 963c72f687aSKent Overstreet /* 964c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 965c72f687aSKent Overstreet * Does not check pagecache 966c72f687aSKent Overstreet */ 967e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 968e8540e56SKent Overstreet struct quota_res *res, 969e8540e56SKent Overstreet u64 start, u64 end) 970e8540e56SKent Overstreet { 971e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 972e8540e56SKent Overstreet struct btree_trans trans; 973e8540e56SKent Overstreet struct btree_iter iter; 974e8540e56SKent Overstreet struct bkey_s_c k; 975e8540e56SKent Overstreet u32 snapshot; 976e8540e56SKent Overstreet u64 sectors = end - start; 977e8540e56SKent Overstreet u64 pos = start; 978e8540e56SKent Overstreet int ret; 979e8540e56SKent Overstreet 980e8540e56SKent Overstreet bch2_trans_init(&trans, c, 0, 0); 981e8540e56SKent Overstreet retry: 982e8540e56SKent Overstreet bch2_trans_begin(&trans); 983e8540e56SKent Overstreet 984e8540e56SKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); 985e8540e56SKent Overstreet if (ret) 986e8540e56SKent Overstreet goto err; 987e8540e56SKent Overstreet 988e8540e56SKent Overstreet bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 989e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 990e8540e56SKent Overstreet 991e8540e56SKent Overstreet while (!(ret = btree_trans_too_many_iters(&trans)) && 992e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 993e8540e56SKent Overstreet !(ret = bkey_err(k))) { 994e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 995e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 996e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 997e8540e56SKent Overstreet BUG_ON(s > sectors); 998e8540e56SKent Overstreet sectors -= s; 999e8540e56SKent Overstreet } 1000e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 1001e8540e56SKent Overstreet } 1002e8540e56SKent Overstreet pos = iter.pos.offset; 1003e8540e56SKent Overstreet bch2_trans_iter_exit(&trans, &iter); 1004e8540e56SKent Overstreet err: 1005e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1006e8540e56SKent Overstreet goto retry; 1007e8540e56SKent Overstreet 1008e8540e56SKent Overstreet bch2_trans_exit(&trans); 1009e8540e56SKent Overstreet 1010e8540e56SKent Overstreet if (ret) 1011e8540e56SKent Overstreet return ret; 1012e8540e56SKent Overstreet 1013e8540e56SKent Overstreet return bch2_quota_reservation_add(c, inode, res, sectors, true); 1014e8540e56SKent Overstreet } 1015e8540e56SKent Overstreet 101676426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 101776426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 101876426098SKent Overstreet loff_t len, unsigned remap_flags) 101976426098SKent Overstreet { 102076426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 102176426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 102276426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 1023e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 10242e87eae1SKent Overstreet s64 i_sectors_delta = 0; 1025677fc056SKent Overstreet u64 aligned_len; 102676426098SKent Overstreet loff_t ret = 0; 102776426098SKent Overstreet 102876426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 102976426098SKent Overstreet return -EINVAL; 103076426098SKent Overstreet 103176426098SKent Overstreet if (remap_flags & REMAP_FILE_DEDUP) 103276426098SKent Overstreet return -EOPNOTSUPP; 103376426098SKent Overstreet 103476426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 103576426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 103676426098SKent Overstreet return -EINVAL; 103776426098SKent Overstreet 103876426098SKent Overstreet if (src == dst && 103976426098SKent Overstreet abs(pos_src - pos_dst) < len) 104076426098SKent Overstreet return -EINVAL; 104176426098SKent Overstreet 104276426098SKent Overstreet bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 104376426098SKent Overstreet 104476426098SKent Overstreet inode_dio_wait(&src->v); 104576426098SKent Overstreet inode_dio_wait(&dst->v); 104676426098SKent Overstreet 104776426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 104876426098SKent Overstreet file_dst, pos_dst, 104976426098SKent Overstreet &len, remap_flags); 105076426098SKent Overstreet if (ret < 0 || len == 0) 10512e87eae1SKent Overstreet goto err; 105276426098SKent Overstreet 1053677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 105476426098SKent Overstreet 1055dbbfca9fSKent Overstreet ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping, 1056677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 105776426098SKent Overstreet if (ret) 10582e87eae1SKent Overstreet goto err; 105976426098SKent Overstreet 1060e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 1061e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 1062e8540e56SKent Overstreet if (ret) 1063e8540e56SKent Overstreet goto err; 1064e8540e56SKent Overstreet 1065e8540e56SKent Overstreet file_update_time(file_dst); 1066e8540e56SKent Overstreet 1067dbbfca9fSKent Overstreet bch2_mark_pagecache_unallocated(src, pos_src >> 9, 1068dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 106976426098SKent Overstreet 10702e87eae1SKent Overstreet ret = bch2_remap_range(c, 10716fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 10726fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 107376426098SKent Overstreet aligned_len >> 9, 10742e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 10752e87eae1SKent Overstreet if (ret < 0) 10762e87eae1SKent Overstreet goto err; 107776426098SKent Overstreet 10782e87eae1SKent Overstreet /* 10792e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 10802e87eae1SKent Overstreet */ 1081677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 10822e87eae1SKent Overstreet 1083dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); 10842e87eae1SKent Overstreet 10852e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 1086677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 1087677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 10882e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 1089e7084c9cSKent Overstreet 109068a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 109168a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 1092a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 10932e87eae1SKent Overstreet err: 1094e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 109576426098SKent Overstreet bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); 109676426098SKent Overstreet 10975c1ef830SKent Overstreet return bch2_err_class(ret); 109876426098SKent Overstreet } 109976426098SKent Overstreet 11001c6fdbd8SKent Overstreet /* fseek: */ 11011c6fdbd8SKent Overstreet 11021c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 11031c6fdbd8SKent Overstreet { 11041c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 11051c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 1106424eb881SKent Overstreet struct btree_trans trans; 110767e0dd8fSKent Overstreet struct btree_iter iter; 11081c6fdbd8SKent Overstreet struct bkey_s_c k; 11096fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 11101c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 11116fed42bbSKent Overstreet u32 snapshot; 11121c6fdbd8SKent Overstreet int ret; 11131c6fdbd8SKent Overstreet 11141c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 11151c6fdbd8SKent Overstreet if (offset >= isize) 11161c6fdbd8SKent Overstreet return -ENXIO; 11171c6fdbd8SKent Overstreet 111820bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 11196fed42bbSKent Overstreet retry: 11206fed42bbSKent Overstreet bch2_trans_begin(&trans); 11216fed42bbSKent Overstreet 11226fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 11236fed42bbSKent Overstreet if (ret) 11246fed42bbSKent Overstreet goto err; 1125424eb881SKent Overstreet 1126c72f687aSKent Overstreet for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, 1127c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 1128c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 1129c72f687aSKent Overstreet 0, k, ret) { 1130c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 11311c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 11321c6fdbd8SKent Overstreet break; 11331c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 11341c6fdbd8SKent Overstreet break; 11351c6fdbd8SKent Overstreet } 113667e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 11376fed42bbSKent Overstreet err: 1138549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 11396fed42bbSKent Overstreet goto retry; 11401c6fdbd8SKent Overstreet 11419a796fdbSKent Overstreet bch2_trans_exit(&trans); 11421c6fdbd8SKent Overstreet if (ret) 11431c6fdbd8SKent Overstreet return ret; 11441c6fdbd8SKent Overstreet 11451c6fdbd8SKent Overstreet if (next_data > offset) 1146543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 11474198bf03SKent Overstreet offset, next_data, 0, false); 11481c6fdbd8SKent Overstreet 1149e10d3094SKent Overstreet if (next_data >= isize) 11501c6fdbd8SKent Overstreet return -ENXIO; 11511c6fdbd8SKent Overstreet 11521c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 11531c6fdbd8SKent Overstreet } 11541c6fdbd8SKent Overstreet 11551c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 11561c6fdbd8SKent Overstreet { 11571c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 11581c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 1159424eb881SKent Overstreet struct btree_trans trans; 116067e0dd8fSKent Overstreet struct btree_iter iter; 11611c6fdbd8SKent Overstreet struct bkey_s_c k; 11626fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 11631c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 11646fed42bbSKent Overstreet u32 snapshot; 11651c6fdbd8SKent Overstreet int ret; 11661c6fdbd8SKent Overstreet 11671c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 11681c6fdbd8SKent Overstreet if (offset >= isize) 11691c6fdbd8SKent Overstreet return -ENXIO; 11701c6fdbd8SKent Overstreet 117120bceecbSKent Overstreet bch2_trans_init(&trans, c, 0, 0); 11726fed42bbSKent Overstreet retry: 11736fed42bbSKent Overstreet bch2_trans_begin(&trans); 11746fed42bbSKent Overstreet 11756fed42bbSKent Overstreet ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 11766fed42bbSKent Overstreet if (ret) 11776fed42bbSKent Overstreet goto err; 1178424eb881SKent Overstreet 1179e5fa91d7SKent Overstreet for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, 11806fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 118194f651e2SKent Overstreet BTREE_ITER_SLOTS, k, ret) { 11821c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 1183543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 11844198bf03SKent Overstreet offset, MAX_LFS_FILESIZE, 0, false); 11851c6fdbd8SKent Overstreet break; 11861c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 1187543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 11881c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 11894198bf03SKent Overstreet k.k->p.offset << 9, 0, false); 11901c6fdbd8SKent Overstreet 11911c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 11921c6fdbd8SKent Overstreet break; 11931c6fdbd8SKent Overstreet } else { 11941c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 11951c6fdbd8SKent Overstreet } 11961c6fdbd8SKent Overstreet } 119767e0dd8fSKent Overstreet bch2_trans_iter_exit(&trans, &iter); 11986fed42bbSKent Overstreet err: 1199549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 12006fed42bbSKent Overstreet goto retry; 12011c6fdbd8SKent Overstreet 12029a796fdbSKent Overstreet bch2_trans_exit(&trans); 12031c6fdbd8SKent Overstreet if (ret) 12041c6fdbd8SKent Overstreet return ret; 12051c6fdbd8SKent Overstreet 12061c6fdbd8SKent Overstreet if (next_hole > isize) 12071c6fdbd8SKent Overstreet next_hole = isize; 12081c6fdbd8SKent Overstreet 12091c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 12101c6fdbd8SKent Overstreet } 12111c6fdbd8SKent Overstreet 12121c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 12131c6fdbd8SKent Overstreet { 12145c1ef830SKent Overstreet loff_t ret; 12155c1ef830SKent Overstreet 12161c6fdbd8SKent Overstreet switch (whence) { 12171c6fdbd8SKent Overstreet case SEEK_SET: 12181c6fdbd8SKent Overstreet case SEEK_CUR: 12191c6fdbd8SKent Overstreet case SEEK_END: 12205c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 12215c1ef830SKent Overstreet break; 12221c6fdbd8SKent Overstreet case SEEK_DATA: 12235c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 12245c1ef830SKent Overstreet break; 12251c6fdbd8SKent Overstreet case SEEK_HOLE: 12265c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 12275c1ef830SKent Overstreet break; 12285c1ef830SKent Overstreet default: 12295c1ef830SKent Overstreet ret = -EINVAL; 12305c1ef830SKent Overstreet break; 12311c6fdbd8SKent Overstreet } 12321c6fdbd8SKent Overstreet 12335c1ef830SKent Overstreet return bch2_err_class(ret); 12341c6fdbd8SKent Overstreet } 12351c6fdbd8SKent Overstreet 12361c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 12371c6fdbd8SKent Overstreet { 1238a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 12391c6fdbd8SKent Overstreet } 12401c6fdbd8SKent Overstreet 12411c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 12421c6fdbd8SKent Overstreet { 124365d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 1244a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 124565d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 12461c6fdbd8SKent Overstreet 1247c8b4534dSKent Overstreet return 0; 12481c6fdbd8SKent Overstreet } 12491c6fdbd8SKent Overstreet 12501c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 1251