11c6fdbd8SKent Overstreet // SPDX-License-Identifier: GPL-2.0 21c6fdbd8SKent Overstreet #ifndef NO_BCACHEFS_FS 31c6fdbd8SKent Overstreet 41c6fdbd8SKent Overstreet #include "bcachefs.h" 57b3f84eaSKent Overstreet #include "alloc_foreground.h" 61809b8cbSKent Overstreet #include "bkey_buf.h" 71c6fdbd8SKent Overstreet #include "btree_update.h" 81c6fdbd8SKent Overstreet #include "buckets.h" 91c6fdbd8SKent Overstreet #include "clock.h" 101c6fdbd8SKent Overstreet #include "error.h" 11e2d9912cSKent Overstreet #include "extents.h" 1208c07feaSKent Overstreet #include "extent_update.h" 131c6fdbd8SKent Overstreet #include "fs.h" 141c6fdbd8SKent Overstreet #include "fs-io.h" 15dbbfca9fSKent Overstreet #include "fs-io-buffered.h" 16dbbfca9fSKent Overstreet #include "fs-io-pagecache.h" 171c6fdbd8SKent Overstreet #include "fsck.h" 181c6fdbd8SKent Overstreet #include "inode.h" 191c6fdbd8SKent Overstreet #include "journal.h" 201809b8cbSKent Overstreet #include "io_misc.h" 211c6fdbd8SKent Overstreet #include "keylist.h" 221c6fdbd8SKent Overstreet #include "quota.h" 2376426098SKent Overstreet #include "reflink.h" 241c6fdbd8SKent Overstreet #include "trace.h" 251c6fdbd8SKent Overstreet 261c6fdbd8SKent Overstreet #include <linux/aio.h> 271c6fdbd8SKent Overstreet #include <linux/backing-dev.h> 281c6fdbd8SKent Overstreet #include <linux/falloc.h> 291c6fdbd8SKent Overstreet #include <linux/migrate.h> 301c6fdbd8SKent Overstreet #include <linux/mmu_context.h> 311c6fdbd8SKent Overstreet #include <linux/pagevec.h> 329ba2eb25SKent Overstreet #include <linux/rmap.h> 331c6fdbd8SKent Overstreet #include <linux/sched/signal.h> 341c6fdbd8SKent Overstreet #include <linux/task_io_accounting_ops.h> 351c6fdbd8SKent Overstreet #include <linux/uio.h> 361c6fdbd8SKent Overstreet 371c6fdbd8SKent Overstreet #include <trace/events/writeback.h> 381c6fdbd8SKent Overstreet 39a8b3a677SKent Overstreet struct nocow_flush { 40a8b3a677SKent Overstreet struct closure *cl; 41a8b3a677SKent Overstreet struct bch_dev *ca; 42a8b3a677SKent Overstreet struct bio bio; 43a8b3a677SKent Overstreet }; 44a8b3a677SKent Overstreet 45a8b3a677SKent Overstreet static void nocow_flush_endio(struct bio *_bio) 46a8b3a677SKent Overstreet { 47a8b3a677SKent Overstreet 48a8b3a677SKent Overstreet struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); 49a8b3a677SKent Overstreet 50a8b3a677SKent Overstreet closure_put(bio->cl); 51a8b3a677SKent Overstreet percpu_ref_put(&bio->ca->io_ref); 52a8b3a677SKent Overstreet bio_put(&bio->bio); 53a8b3a677SKent Overstreet } 54a8b3a677SKent Overstreet 55dbbfca9fSKent Overstreet void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, 56a8b3a677SKent Overstreet struct bch_inode_info *inode, 57a8b3a677SKent Overstreet struct closure *cl) 58a8b3a677SKent Overstreet { 59a8b3a677SKent Overstreet struct nocow_flush *bio; 60a8b3a677SKent Overstreet struct bch_dev *ca; 61a8b3a677SKent Overstreet struct bch_devs_mask devs; 62a8b3a677SKent Overstreet unsigned dev; 63a8b3a677SKent Overstreet 64a8b3a677SKent Overstreet dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); 65a8b3a677SKent Overstreet if (dev == BCH_SB_MEMBERS_MAX) 66a8b3a677SKent Overstreet return; 67a8b3a677SKent Overstreet 68a8b3a677SKent Overstreet devs = inode->ei_devs_need_flush; 69a8b3a677SKent Overstreet memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); 70a8b3a677SKent Overstreet 71a8b3a677SKent Overstreet for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { 72a8b3a677SKent Overstreet rcu_read_lock(); 73a8b3a677SKent Overstreet ca = rcu_dereference(c->devs[dev]); 74a8b3a677SKent Overstreet if (ca && !percpu_ref_tryget(&ca->io_ref)) 75a8b3a677SKent Overstreet ca = NULL; 76a8b3a677SKent Overstreet rcu_read_unlock(); 77a8b3a677SKent Overstreet 78a8b3a677SKent Overstreet if (!ca) 79a8b3a677SKent Overstreet continue; 80a8b3a677SKent Overstreet 81a8b3a677SKent Overstreet bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, 823e44f325SChristoph Hellwig REQ_OP_WRITE|REQ_PREFLUSH, 83a8b3a677SKent Overstreet GFP_KERNEL, 84a8b3a677SKent Overstreet &c->nocow_flush_bioset), 85a8b3a677SKent Overstreet struct nocow_flush, bio); 86a8b3a677SKent Overstreet bio->cl = cl; 87a8b3a677SKent Overstreet bio->ca = ca; 88a8b3a677SKent Overstreet bio->bio.bi_end_io = nocow_flush_endio; 89a8b3a677SKent Overstreet closure_bio_submit(&bio->bio, cl); 90a8b3a677SKent Overstreet } 91a8b3a677SKent Overstreet } 92a8b3a677SKent Overstreet 93a8b3a677SKent Overstreet static int bch2_inode_flush_nocow_writes(struct bch_fs *c, 94a8b3a677SKent Overstreet struct bch_inode_info *inode) 95a8b3a677SKent Overstreet { 96a8b3a677SKent Overstreet struct closure cl; 97a8b3a677SKent Overstreet 98a8b3a677SKent Overstreet closure_init_stack(&cl); 99a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes_async(c, inode, &cl); 100a8b3a677SKent Overstreet closure_sync(&cl); 101a8b3a677SKent Overstreet 102a8b3a677SKent Overstreet return 0; 103a8b3a677SKent Overstreet } 104a8b3a677SKent Overstreet 1051c6fdbd8SKent Overstreet /* i_size updates: */ 1061c6fdbd8SKent Overstreet 1072ea90048SKent Overstreet struct inode_new_size { 1082ea90048SKent Overstreet loff_t new_size; 1092ea90048SKent Overstreet u64 now; 1102ea90048SKent Overstreet unsigned fields; 1112ea90048SKent Overstreet }; 1122ea90048SKent Overstreet 113791236b8SJoshua Ashton static int inode_set_size(struct btree_trans *trans, 114791236b8SJoshua Ashton struct bch_inode_info *inode, 1151c6fdbd8SKent Overstreet struct bch_inode_unpacked *bi, 1161c6fdbd8SKent Overstreet void *p) 1171c6fdbd8SKent Overstreet { 1182ea90048SKent Overstreet struct inode_new_size *s = p; 1191c6fdbd8SKent Overstreet 1202ea90048SKent Overstreet bi->bi_size = s->new_size; 1212ea90048SKent Overstreet if (s->fields & ATTR_ATIME) 1222ea90048SKent Overstreet bi->bi_atime = s->now; 1232ea90048SKent Overstreet if (s->fields & ATTR_MTIME) 1242ea90048SKent Overstreet bi->bi_mtime = s->now; 1252ea90048SKent Overstreet if (s->fields & ATTR_CTIME) 1262ea90048SKent Overstreet bi->bi_ctime = s->now; 1271c6fdbd8SKent Overstreet 1281c6fdbd8SKent Overstreet return 0; 1291c6fdbd8SKent Overstreet } 1301c6fdbd8SKent Overstreet 13176426098SKent Overstreet int __must_check bch2_write_inode_size(struct bch_fs *c, 1321c6fdbd8SKent Overstreet struct bch_inode_info *inode, 1332ea90048SKent Overstreet loff_t new_size, unsigned fields) 1341c6fdbd8SKent Overstreet { 1352ea90048SKent Overstreet struct inode_new_size s = { 1362ea90048SKent Overstreet .new_size = new_size, 1372ea90048SKent Overstreet .now = bch2_current_time(c), 1382ea90048SKent Overstreet .fields = fields, 1392ea90048SKent Overstreet }; 1402ea90048SKent Overstreet 1412ea90048SKent Overstreet return bch2_write_inode(c, inode, inode_set_size, &s, fields); 1421c6fdbd8SKent Overstreet } 1431c6fdbd8SKent Overstreet 144dbbfca9fSKent Overstreet void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, 145190fa7afSKent Overstreet struct quota_res *quota_res, s64 sectors) 1461c6fdbd8SKent Overstreet { 147b33bf1bcSKent Overstreet bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, 148b33bf1bcSKent Overstreet "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", 149b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, sectors, 150b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 151b44a66a6SKent Overstreet inode->v.i_blocks += sectors; 152b44a66a6SKent Overstreet 1531c6fdbd8SKent Overstreet #ifdef CONFIG_BCACHEFS_QUOTA 154cb1b479dSKent Overstreet if (quota_res && 155cb1b479dSKent Overstreet !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && 156cb1b479dSKent Overstreet sectors > 0) { 1571c6fdbd8SKent Overstreet BUG_ON(sectors > quota_res->sectors); 1581c6fdbd8SKent Overstreet BUG_ON(sectors > inode->ei_quota_reserved); 1591c6fdbd8SKent Overstreet 1601c6fdbd8SKent Overstreet quota_res->sectors -= sectors; 1611c6fdbd8SKent Overstreet inode->ei_quota_reserved -= sectors; 1621c6fdbd8SKent Overstreet } else { 16326609b61SKent Overstreet bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); 1641c6fdbd8SKent Overstreet } 1651c6fdbd8SKent Overstreet #endif 1666b1b186aSKent Overstreet } 1676b1b186aSKent Overstreet 1681c6fdbd8SKent Overstreet /* fsync: */ 1691c6fdbd8SKent Overstreet 17068a2054dSKent Overstreet /* 17168a2054dSKent Overstreet * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an 17268a2054dSKent Overstreet * insert trigger: look up the btree inode instead 17368a2054dSKent Overstreet */ 174a8b3a677SKent Overstreet static int bch2_flush_inode(struct bch_fs *c, 175a8b3a677SKent Overstreet struct bch_inode_info *inode) 17668a2054dSKent Overstreet { 17768a2054dSKent Overstreet if (c->opts.journal_flush_disabled) 17868a2054dSKent Overstreet return 0; 17968a2054dSKent Overstreet 1809e203c43SKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) 1819e203c43SKent Overstreet return -EROFS; 18268a2054dSKent Overstreet 1839e203c43SKent Overstreet struct bch_inode_unpacked u; 1849e203c43SKent Overstreet int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: 1859e203c43SKent Overstreet bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 186a8b3a677SKent Overstreet bch2_inode_flush_nocow_writes(c, inode); 1879e203c43SKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fsync); 1889e203c43SKent Overstreet return ret; 18968a2054dSKent Overstreet } 19068a2054dSKent Overstreet 1911c6fdbd8SKent Overstreet int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1921c6fdbd8SKent Overstreet { 1931c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 1941c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 1958b088250SYouling Tang int ret, err; 1961c6fdbd8SKent Overstreet 197747d1d6cSYouling Tang trace_bch2_fsync(file, datasync); 198747d1d6cSYouling Tang 1991c6fdbd8SKent Overstreet ret = file_write_and_wait_range(file, start, end); 2005a11b5feSBrian Foster if (ret) 2015a11b5feSBrian Foster goto out; 2025a11b5feSBrian Foster ret = sync_inode_metadata(&inode->v, 1); 2035a11b5feSBrian Foster if (ret) 2045a11b5feSBrian Foster goto out; 2055a11b5feSBrian Foster ret = bch2_flush_inode(c, inode); 2065a11b5feSBrian Foster out: 207d09a8468SKent Overstreet ret = bch2_err_class(ret); 208d09a8468SKent Overstreet if (ret == -EROFS) 209d09a8468SKent Overstreet ret = -EIO; 2108b088250SYouling Tang 2118b088250SYouling Tang err = file_check_and_advance_wb_err(file); 2128b088250SYouling Tang if (!ret) 2138b088250SYouling Tang ret = err; 2148b088250SYouling Tang 215d09a8468SKent Overstreet return ret; 2161c6fdbd8SKent Overstreet } 2171c6fdbd8SKent Overstreet 2181c6fdbd8SKent Overstreet /* truncate: */ 2191c6fdbd8SKent Overstreet 2206fed42bbSKent Overstreet static inline int range_has_data(struct bch_fs *c, u32 subvol, 2211c6fdbd8SKent Overstreet struct bpos start, 2221c6fdbd8SKent Overstreet struct bpos end) 2231c6fdbd8SKent Overstreet { 2246bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c); 22567e0dd8fSKent Overstreet struct btree_iter iter; 2261c6fdbd8SKent Overstreet struct bkey_s_c k; 2271c6fdbd8SKent Overstreet int ret = 0; 2286fed42bbSKent Overstreet retry: 2296bd68ec2SKent Overstreet bch2_trans_begin(trans); 2306fed42bbSKent Overstreet 2316bd68ec2SKent Overstreet ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot); 2326fed42bbSKent Overstreet if (ret) 2336fed42bbSKent Overstreet goto err; 234424eb881SKent Overstreet 2356bd68ec2SKent Overstreet for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret) 2364ad6aa46SBrian Foster if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { 2371c6fdbd8SKent Overstreet ret = 1; 2381c6fdbd8SKent Overstreet break; 2391c6fdbd8SKent Overstreet } 2406fed42bbSKent Overstreet start = iter.pos; 2416bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 2426fed42bbSKent Overstreet err: 243549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2446fed42bbSKent Overstreet goto retry; 2451c6fdbd8SKent Overstreet 2466bd68ec2SKent Overstreet bch2_trans_put(trans); 2479a796fdbSKent Overstreet return ret; 2481c6fdbd8SKent Overstreet } 2491c6fdbd8SKent Overstreet 250959f7368SKent Overstreet static int __bch2_truncate_folio(struct bch_inode_info *inode, 2511c6fdbd8SKent Overstreet pgoff_t index, loff_t start, loff_t end) 2521c6fdbd8SKent Overstreet { 2531c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 2541c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 2553342ac13SKent Overstreet struct bch_folio *s; 256c04cbc0dSColin Ian King unsigned start_offset; 257c04cbc0dSColin Ian King unsigned end_offset; 258a99b1cafSKent Overstreet unsigned i; 25930bff594SKent Overstreet struct folio *folio; 260b19d307dSKent Overstreet s64 i_sectors_delta = 0; 2611c6fdbd8SKent Overstreet int ret = 0; 2626b9857b2SBrian Foster u64 end_pos; 2631c6fdbd8SKent Overstreet 26430bff594SKent Overstreet folio = filemap_lock_folio(mapping, index); 265b6898917SKent Overstreet if (IS_ERR_OR_NULL(folio)) { 2661c6fdbd8SKent Overstreet /* 2671c6fdbd8SKent Overstreet * XXX: we're doing two index lookups when we end up reading the 26830bff594SKent Overstreet * folio 2691c6fdbd8SKent Overstreet */ 2706fed42bbSKent Overstreet ret = range_has_data(c, inode->ei_subvol, 271c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), 272c72f687aSKent Overstreet POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); 2731c6fdbd8SKent Overstreet if (ret <= 0) 2741c6fdbd8SKent Overstreet return ret; 2751c6fdbd8SKent Overstreet 27630bff594SKent Overstreet folio = __filemap_get_folio(mapping, index, 27730bff594SKent Overstreet FGP_LOCK|FGP_CREAT, GFP_KERNEL); 2781e81f89bSKent Overstreet if (IS_ERR_OR_NULL(folio)) { 2791c6fdbd8SKent Overstreet ret = -ENOMEM; 2801c6fdbd8SKent Overstreet goto out; 2811c6fdbd8SKent Overstreet } 2821c6fdbd8SKent Overstreet } 2831c6fdbd8SKent Overstreet 284959f7368SKent Overstreet BUG_ON(start >= folio_end_pos(folio)); 285959f7368SKent Overstreet BUG_ON(end <= folio_pos(folio)); 286959f7368SKent Overstreet 287959f7368SKent Overstreet start_offset = max(start, folio_pos(folio)) - folio_pos(folio); 2886b9857b2SBrian Foster end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio); 289959f7368SKent Overstreet 290959f7368SKent Overstreet /* Folio boundary? Nothing to do */ 291959f7368SKent Overstreet if (start_offset == 0 && 292959f7368SKent Overstreet end_offset == folio_size(folio)) { 293959f7368SKent Overstreet ret = 0; 294959f7368SKent Overstreet goto unlock; 295959f7368SKent Overstreet } 296959f7368SKent Overstreet 29730bff594SKent Overstreet s = bch2_folio_create(folio, 0); 298a99b1cafSKent Overstreet if (!s) { 299a99b1cafSKent Overstreet ret = -ENOMEM; 300a99b1cafSKent Overstreet goto unlock; 301a99b1cafSKent Overstreet } 302a99b1cafSKent Overstreet 30330bff594SKent Overstreet if (!folio_test_uptodate(folio)) { 30430bff594SKent Overstreet ret = bch2_read_single_folio(folio, mapping); 3051c6fdbd8SKent Overstreet if (ret) 3061c6fdbd8SKent Overstreet goto unlock; 3071c6fdbd8SKent Overstreet } 3081c6fdbd8SKent Overstreet 30934fdcf06SKent Overstreet ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); 31034fdcf06SKent Overstreet if (ret) 31134fdcf06SKent Overstreet goto unlock; 312c437e153SKent Overstreet 313a99b1cafSKent Overstreet for (i = round_up(start_offset, block_bytes(c)) >> 9; 314a99b1cafSKent Overstreet i < round_down(end_offset, block_bytes(c)) >> 9; 315a99b1cafSKent Overstreet i++) { 316a99b1cafSKent Overstreet s->s[i].nr_replicas = 0; 317a1774a05SKent Overstreet 318a1774a05SKent Overstreet i_sectors_delta -= s->s[i].state == SECTOR_dirty; 319dbbfca9fSKent Overstreet bch2_folio_sector_set(folio, s, i, SECTOR_unallocated); 320a99b1cafSKent Overstreet } 321a99b1cafSKent Overstreet 322dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 323b19d307dSKent Overstreet 32474163da7SKent Overstreet /* 32530bff594SKent Overstreet * Caller needs to know whether this folio will be written out by 32674163da7SKent Overstreet * writeback - doing an i_size update if necessary - or whether it will 3274ad6aa46SBrian Foster * be responsible for the i_size update. 3284ad6aa46SBrian Foster * 3294ad6aa46SBrian Foster * Note that we shouldn't ever see a folio beyond EOF, but check and 3304ad6aa46SBrian Foster * warn if so. This has been observed by failure to clean up folios 3314ad6aa46SBrian Foster * after a short write and there's still a chance reclaim will fix 3324ad6aa46SBrian Foster * things up. 33374163da7SKent Overstreet */ 3344ad6aa46SBrian Foster WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size); 3354ad6aa46SBrian Foster end_pos = folio_end_pos(folio); 3364ad6aa46SBrian Foster if (inode->v.i_size > folio_pos(folio)) 3376b9857b2SBrian Foster end_pos = min_t(u64, inode->v.i_size, end_pos); 338bf98ee10SBrian Foster ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty; 33974163da7SKent Overstreet 34030bff594SKent Overstreet folio_zero_segment(folio, start_offset, end_offset); 341a99b1cafSKent Overstreet 3421c6fdbd8SKent Overstreet /* 3431c6fdbd8SKent Overstreet * Bit of a hack - we don't want truncate to fail due to -ENOSPC. 3441c6fdbd8SKent Overstreet * 34530bff594SKent Overstreet * XXX: because we aren't currently tracking whether the folio has actual 3461c6fdbd8SKent Overstreet * data in it (vs. just 0s, or only partially written) this wrong. ick. 3471c6fdbd8SKent Overstreet */ 34830bff594SKent Overstreet BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); 3491c6fdbd8SKent Overstreet 3509ba2eb25SKent Overstreet /* 3519ba2eb25SKent Overstreet * This removes any writeable userspace mappings; we need to force 3529ba2eb25SKent Overstreet * .page_mkwrite to be called again before any mmapped writes, to 3539ba2eb25SKent Overstreet * redirty the full page: 3549ba2eb25SKent Overstreet */ 35530bff594SKent Overstreet folio_mkclean(folio); 35630bff594SKent Overstreet filemap_dirty_folio(mapping, folio); 3571c6fdbd8SKent Overstreet unlock: 35830bff594SKent Overstreet folio_unlock(folio); 35930bff594SKent Overstreet folio_put(folio); 3601c6fdbd8SKent Overstreet out: 3611c6fdbd8SKent Overstreet return ret; 3621c6fdbd8SKent Overstreet } 3631c6fdbd8SKent Overstreet 364959f7368SKent Overstreet static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) 3651c6fdbd8SKent Overstreet { 366959f7368SKent Overstreet return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, 367959f7368SKent Overstreet from, ANYSINT_MAX(loff_t)); 3681c6fdbd8SKent Overstreet } 3691c6fdbd8SKent Overstreet 370959f7368SKent Overstreet static int bch2_truncate_folios(struct bch_inode_info *inode, 37174163da7SKent Overstreet loff_t start, loff_t end) 37274163da7SKent Overstreet { 373959f7368SKent Overstreet int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, 37474163da7SKent Overstreet start, end); 37574163da7SKent Overstreet 37674163da7SKent Overstreet if (ret >= 0 && 37774163da7SKent Overstreet start >> PAGE_SHIFT != end >> PAGE_SHIFT) 378959f7368SKent Overstreet ret = __bch2_truncate_folio(inode, 379959f7368SKent Overstreet (end - 1) >> PAGE_SHIFT, 38074163da7SKent Overstreet start, end); 38174163da7SKent Overstreet return ret; 38274163da7SKent Overstreet } 38374163da7SKent Overstreet 38468a507a2SKent Overstreet static int bch2_extend(struct mnt_idmap *idmap, 38568a507a2SKent Overstreet struct bch_inode_info *inode, 386e0541a93SKent Overstreet struct bch_inode_unpacked *inode_u, 387e0541a93SKent Overstreet struct iattr *iattr) 3881c6fdbd8SKent Overstreet { 3891c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 3901c6fdbd8SKent Overstreet int ret; 3911c6fdbd8SKent Overstreet 392e0541a93SKent Overstreet /* 393e0541a93SKent Overstreet * sync appends: 3942925fc49SKent Overstreet * 3952925fc49SKent Overstreet * this has to be done _before_ extending i_size: 396e0541a93SKent Overstreet */ 397e0541a93SKent Overstreet ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); 3981c6fdbd8SKent Overstreet if (ret) 3991c6fdbd8SKent Overstreet return ret; 4001c6fdbd8SKent Overstreet 4011c6fdbd8SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 4021c6fdbd8SKent Overstreet 40368a507a2SKent Overstreet return bch2_setattr_nonsize(idmap, inode, iattr); 4041c6fdbd8SKent Overstreet } 4051c6fdbd8SKent Overstreet 4065902cc28SKent Overstreet int bchfs_truncate(struct mnt_idmap *idmap, 40768a507a2SKent Overstreet struct bch_inode_info *inode, struct iattr *iattr) 4081c6fdbd8SKent Overstreet { 4091c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 4101c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 411e0541a93SKent Overstreet struct bch_inode_unpacked inode_u; 4122e87eae1SKent Overstreet s64 i_sectors_delta = 0; 4131c6fdbd8SKent Overstreet int ret = 0; 4141c6fdbd8SKent Overstreet 41568a507a2SKent Overstreet /* 41678d66ab1SDan Robertson * If the truncate call with change the size of the file, the 41778d66ab1SDan Robertson * cmtimes should be updated. If the size will not change, we 41878d66ab1SDan Robertson * do not need to update the cmtimes. 41968a507a2SKent Overstreet */ 42078d66ab1SDan Robertson if (iattr->ia_size != inode->v.i_size) { 42168a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_MTIME)) 42268a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_mtime); 42368a507a2SKent Overstreet if (!(iattr->ia_valid & ATTR_CTIME)) 42468a507a2SKent Overstreet ktime_get_coarse_real_ts64(&iattr->ia_ctime); 42568a507a2SKent Overstreet iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; 42678d66ab1SDan Robertson } 42768a507a2SKent Overstreet 4281c6fdbd8SKent Overstreet inode_dio_wait(&inode->v); 429a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 4301c6fdbd8SKent Overstreet 4316fed42bbSKent Overstreet ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); 432e0541a93SKent Overstreet if (ret) 433e0541a93SKent Overstreet goto err; 4341c6fdbd8SKent Overstreet 435c45d473dSKent Overstreet /* 436c45d473dSKent Overstreet * check this before next assertion; on filesystem error our normal 437c45d473dSKent Overstreet * invariants are a bit broken (truncate has to truncate the page cache 438c45d473dSKent Overstreet * before the inode). 439c45d473dSKent Overstreet */ 440c45d473dSKent Overstreet ret = bch2_journal_error(&c->journal); 441c45d473dSKent Overstreet if (ret) 442c45d473dSKent Overstreet goto err; 443c45d473dSKent Overstreet 4448eb71e9eSKent Overstreet WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && 4458eb71e9eSKent Overstreet inode->v.i_size < inode_u.bi_size, 4468eb71e9eSKent Overstreet "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", 4478eb71e9eSKent Overstreet (u64) inode->v.i_size, inode_u.bi_size); 448e0541a93SKent Overstreet 449e0541a93SKent Overstreet if (iattr->ia_size > inode->v.i_size) { 45068a507a2SKent Overstreet ret = bch2_extend(idmap, inode, &inode_u, iattr); 45154e2264eSKent Overstreet goto err; 4521c6fdbd8SKent Overstreet } 4531c6fdbd8SKent Overstreet 45468a507a2SKent Overstreet iattr->ia_valid &= ~ATTR_SIZE; 45568a507a2SKent Overstreet 456959f7368SKent Overstreet ret = bch2_truncate_folio(inode, iattr->ia_size); 45774163da7SKent Overstreet if (unlikely(ret < 0)) 45854e2264eSKent Overstreet goto err; 4591c6fdbd8SKent Overstreet 4605902cc28SKent Overstreet truncate_setsize(&inode->v, iattr->ia_size); 4615902cc28SKent Overstreet 4626cc3535dSKent Overstreet /* 4636cc3535dSKent Overstreet * When extending, we're going to write the new i_size to disk 4646cc3535dSKent Overstreet * immediately so we need to flush anything above the current on disk 4656cc3535dSKent Overstreet * i_size first: 4666cc3535dSKent Overstreet * 4676cc3535dSKent Overstreet * Also, when extending we need to flush the page that i_size currently 4686cc3535dSKent Overstreet * straddles - if it's mapped to userspace, we need to ensure that 4696cc3535dSKent Overstreet * userspace has to redirty it and call .mkwrite -> set_page_dirty 4706cc3535dSKent Overstreet * again to allocate the part of the page that was extended. 4716cc3535dSKent Overstreet */ 472e0541a93SKent Overstreet if (iattr->ia_size > inode_u.bi_size) 4731c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 474e0541a93SKent Overstreet inode_u.bi_size, 4751c6fdbd8SKent Overstreet iattr->ia_size - 1); 4761c6fdbd8SKent Overstreet else if (iattr->ia_size & (PAGE_SIZE - 1)) 4771c6fdbd8SKent Overstreet ret = filemap_write_and_wait_range(mapping, 4781c6fdbd8SKent Overstreet round_down(iattr->ia_size, PAGE_SIZE), 4791c6fdbd8SKent Overstreet iattr->ia_size - 1); 4801c6fdbd8SKent Overstreet if (ret) 48154e2264eSKent Overstreet goto err; 4821c6fdbd8SKent Overstreet 4835902cc28SKent Overstreet ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta); 484dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 4852e87eae1SKent Overstreet 4865902cc28SKent Overstreet if (unlikely(ret)) { 4875902cc28SKent Overstreet /* 4885902cc28SKent Overstreet * If we error here, VFS caches are now inconsistent with btree 4895902cc28SKent Overstreet */ 4905902cc28SKent Overstreet set_bit(EI_INODE_ERROR, &inode->ei_flags); 4915902cc28SKent Overstreet goto err; 4925902cc28SKent Overstreet } 4935902cc28SKent Overstreet 494b33bf1bcSKent Overstreet bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 495b33bf1bcSKent Overstreet !bch2_journal_error(&c->journal), c, 496b33bf1bcSKent Overstreet "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 497b33bf1bcSKent Overstreet inode->v.i_ino, (u64) inode->v.i_blocks, 498b33bf1bcSKent Overstreet inode->ei_inode.bi_sectors); 49968a507a2SKent Overstreet 50068a507a2SKent Overstreet ret = bch2_setattr_nonsize(idmap, inode, iattr); 50154e2264eSKent Overstreet err: 502a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 5035c1ef830SKent Overstreet return bch2_err_class(ret); 5041c6fdbd8SKent Overstreet } 5051c6fdbd8SKent Overstreet 5061c6fdbd8SKent Overstreet /* fallocate: */ 5071c6fdbd8SKent Overstreet 508791236b8SJoshua Ashton static int inode_update_times_fn(struct btree_trans *trans, 509791236b8SJoshua Ashton struct bch_inode_info *inode, 510050197b1SKent Overstreet struct bch_inode_unpacked *bi, void *p) 511050197b1SKent Overstreet { 512050197b1SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 513050197b1SKent Overstreet 514050197b1SKent Overstreet bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); 515050197b1SKent Overstreet return 0; 516050197b1SKent Overstreet } 517050197b1SKent Overstreet 5188a3c8303SKent Overstreet static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) 5191c6fdbd8SKent Overstreet { 5201c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 52174163da7SKent Overstreet u64 end = offset + len; 52274163da7SKent Overstreet u64 block_start = round_up(offset, block_bytes(c)); 52374163da7SKent Overstreet u64 block_end = round_down(end, block_bytes(c)); 52474163da7SKent Overstreet bool truncated_last_page; 5251c6fdbd8SKent Overstreet int ret = 0; 5261c6fdbd8SKent Overstreet 527959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 52874163da7SKent Overstreet if (unlikely(ret < 0)) 5291c6fdbd8SKent Overstreet goto err; 5301c6fdbd8SKent Overstreet 53174163da7SKent Overstreet truncated_last_page = ret; 5321c6fdbd8SKent Overstreet 53374163da7SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 5341c6fdbd8SKent Overstreet 53574163da7SKent Overstreet if (block_start < block_end) { 5362e87eae1SKent Overstreet s64 i_sectors_delta = 0; 5372e87eae1SKent Overstreet 5388c6d298aSKent Overstreet ret = bch2_fpunch(c, inode_inum(inode), 53974163da7SKent Overstreet block_start >> 9, block_end >> 9, 5402e87eae1SKent Overstreet &i_sectors_delta); 541dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 5422e87eae1SKent Overstreet } 543050197b1SKent Overstreet 544050197b1SKent Overstreet mutex_lock(&inode->ei_update_lock); 54574163da7SKent Overstreet if (end >= inode->v.i_size && !truncated_last_page) { 54674163da7SKent Overstreet ret = bch2_write_inode_size(c, inode, inode->v.i_size, 54774163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 54874163da7SKent Overstreet } else { 549050197b1SKent Overstreet ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 55074163da7SKent Overstreet ATTR_MTIME|ATTR_CTIME); 55174163da7SKent Overstreet } 552050197b1SKent Overstreet mutex_unlock(&inode->ei_update_lock); 5531c6fdbd8SKent Overstreet err: 5541c6fdbd8SKent Overstreet return ret; 5551c6fdbd8SKent Overstreet } 5561c6fdbd8SKent Overstreet 5578a3c8303SKent Overstreet static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, 5585f786787SKent Overstreet loff_t offset, loff_t len, 5595f786787SKent Overstreet bool insert) 5601c6fdbd8SKent Overstreet { 5611c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 5621c6fdbd8SKent Overstreet struct address_space *mapping = inode->v.i_mapping; 5635902cc28SKent Overstreet s64 i_sectors_delta = 0; 56450dc0f69SKent Overstreet int ret = 0; 5651c6fdbd8SKent Overstreet 5661c6fdbd8SKent Overstreet if ((offset | len) & (block_bytes(c) - 1)) 5671c6fdbd8SKent Overstreet return -EINVAL; 5681c6fdbd8SKent Overstreet 5695f786787SKent Overstreet if (insert) { 5705f786787SKent Overstreet if (offset >= inode->v.i_size) 57174163da7SKent Overstreet return -EINVAL; 5725f786787SKent Overstreet } else { 5731c6fdbd8SKent Overstreet if (offset + len >= inode->v.i_size) 57474163da7SKent Overstreet return -EINVAL; 5755f786787SKent Overstreet } 5761c6fdbd8SKent Overstreet 577dbbfca9fSKent Overstreet ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 5781c6fdbd8SKent Overstreet if (ret) 57974163da7SKent Overstreet return ret; 5801c6fdbd8SKent Overstreet 5815902cc28SKent Overstreet if (insert) 5825902cc28SKent Overstreet i_size_write(&inode->v, inode->v.i_size + len); 5832e87eae1SKent Overstreet 5845902cc28SKent Overstreet ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, 5855902cc28SKent Overstreet insert, &i_sectors_delta); 5865902cc28SKent Overstreet if (!ret && !insert) 5875902cc28SKent Overstreet i_size_write(&inode->v, inode->v.i_size - len); 588dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 5892e87eae1SKent Overstreet 5901c6fdbd8SKent Overstreet return ret; 5911c6fdbd8SKent Overstreet } 5921c6fdbd8SKent Overstreet 5938a3c8303SKent Overstreet static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, 594694015c2SKent Overstreet u64 start_sector, u64 end_sector) 5951c6fdbd8SKent Overstreet { 5961c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 5976bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c); 59867e0dd8fSKent Overstreet struct btree_iter iter; 599694015c2SKent Overstreet struct bpos end_pos = POS(inode->v.i_ino, end_sector); 60001ad6737SKent Overstreet struct bch_io_opts opts; 601694015c2SKent Overstreet int ret = 0; 6021c6fdbd8SKent Overstreet 60301ad6737SKent Overstreet bch2_inode_opts_get(&opts, c, &inode->ei_inode); 6041c6fdbd8SKent Overstreet 6056bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 606694015c2SKent Overstreet POS(inode->v.i_ino, start_sector), 6075dd8c60eSKent Overstreet BTREE_ITER_slots|BTREE_ITER_intent); 6081c6fdbd8SKent Overstreet 609e88a75ebSKent Overstreet while (!ret && bkey_lt(iter.pos, end_pos)) { 6102e87eae1SKent Overstreet s64 i_sectors_delta = 0; 611190fa7afSKent Overstreet struct quota_res quota_res = { 0 }; 6121c6fdbd8SKent Overstreet struct bkey_s_c k; 613694015c2SKent Overstreet unsigned sectors; 614a09818c7SKent Overstreet bool is_allocation; 615a09818c7SKent Overstreet u64 hole_start, hole_end; 6166fed42bbSKent Overstreet u32 snapshot; 6171c6fdbd8SKent Overstreet 6186bd68ec2SKent Overstreet bch2_trans_begin(trans); 619a8abd3a7SKent Overstreet 6206bd68ec2SKent Overstreet ret = bch2_subvolume_get_snapshot(trans, 6216fed42bbSKent Overstreet inode->ei_subvol, &snapshot); 6226fed42bbSKent Overstreet if (ret) 6236fed42bbSKent Overstreet goto bkey_err; 6246fed42bbSKent Overstreet 6256fed42bbSKent Overstreet bch2_btree_iter_set_snapshot(&iter, snapshot); 6266fed42bbSKent Overstreet 62767e0dd8fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 6280f238367SKent Overstreet if ((ret = bkey_err(k))) 6290f238367SKent Overstreet goto bkey_err; 6301c6fdbd8SKent Overstreet 631a09818c7SKent Overstreet hole_start = iter.pos.offset; 632a09818c7SKent Overstreet hole_end = bpos_min(k.k->p, end_pos).offset; 633a09818c7SKent Overstreet is_allocation = bkey_extent_is_allocation(k.k); 634a09818c7SKent Overstreet 6351c6fdbd8SKent Overstreet /* already reserved */ 63679203111SKent Overstreet if (bkey_extent_is_reservation(k) && 63779203111SKent Overstreet bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { 63867e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 6391c6fdbd8SKent Overstreet continue; 6401c6fdbd8SKent Overstreet } 6411c6fdbd8SKent Overstreet 642190fa7afSKent Overstreet if (bkey_extent_is_data(k.k) && 643190fa7afSKent Overstreet !(mode & FALLOC_FL_ZERO_RANGE)) { 64467e0dd8fSKent Overstreet bch2_btree_iter_advance(&iter); 6451c6fdbd8SKent Overstreet continue; 6461c6fdbd8SKent Overstreet } 6471c6fdbd8SKent Overstreet 648a09818c7SKent Overstreet if (!(mode & FALLOC_FL_ZERO_RANGE)) { 6494198bf03SKent Overstreet /* 6504198bf03SKent Overstreet * Lock ordering - can't be holding btree locks while 6514198bf03SKent Overstreet * blocking on a folio lock: 6524198bf03SKent Overstreet */ 6534198bf03SKent Overstreet if (bch2_clamp_data_hole(&inode->v, 6544198bf03SKent Overstreet &hole_start, 6554198bf03SKent Overstreet &hole_end, 6564198bf03SKent Overstreet opts.data_replicas, true)) 6576bd68ec2SKent Overstreet ret = drop_locks_do(trans, 658a09818c7SKent Overstreet (bch2_clamp_data_hole(&inode->v, 659a09818c7SKent Overstreet &hole_start, 660a09818c7SKent Overstreet &hole_end, 6614198bf03SKent Overstreet opts.data_replicas, false), 0)); 662a09818c7SKent Overstreet bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); 663a8b3a677SKent Overstreet 664a09818c7SKent Overstreet if (ret) 665a09818c7SKent Overstreet goto bkey_err; 6661c6fdbd8SKent Overstreet 667a09818c7SKent Overstreet if (hole_start == hole_end) 668a09818c7SKent Overstreet continue; 669a09818c7SKent Overstreet } 670a09818c7SKent Overstreet 671a09818c7SKent Overstreet sectors = hole_end - hole_start; 672a09818c7SKent Overstreet 673a09818c7SKent Overstreet if (!is_allocation) { 6741c6fdbd8SKent Overstreet ret = bch2_quota_reservation_add(c, inode, 675a09818c7SKent Overstreet "a_res, sectors, true); 6761c6fdbd8SKent Overstreet if (unlikely(ret)) 6770f238367SKent Overstreet goto bkey_err; 6781c6fdbd8SKent Overstreet } 6791c6fdbd8SKent Overstreet 6806bd68ec2SKent Overstreet ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter, 68170de7a47SKent Overstreet sectors, opts, &i_sectors_delta, 68270de7a47SKent Overstreet writepoint_hashed((unsigned long) current)); 6838810386fSKent Overstreet if (ret) 6848810386fSKent Overstreet goto bkey_err; 68570de7a47SKent Overstreet 686dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); 687a09818c7SKent Overstreet 68846bf2e9cSKent Overstreet if (bch2_mark_pagecache_reserved(inode, &hole_start, 68946bf2e9cSKent Overstreet iter.pos.offset, true)) 6906bd68ec2SKent Overstreet drop_locks_do(trans, 69146bf2e9cSKent Overstreet bch2_mark_pagecache_reserved(inode, &hole_start, 69246bf2e9cSKent Overstreet iter.pos.offset, false)); 6930f238367SKent Overstreet bkey_err: 694190fa7afSKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 695549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 6961c6fdbd8SKent Overstreet ret = 0; 69750dc0f69SKent Overstreet } 69874163da7SKent Overstreet 699098ef98dSKent Overstreet if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { 70074163da7SKent Overstreet struct quota_res quota_res = { 0 }; 70174163da7SKent Overstreet s64 i_sectors_delta = 0; 70274163da7SKent Overstreet 7036bd68ec2SKent Overstreet bch2_fpunch_at(trans, &iter, inode_inum(inode), 70474163da7SKent Overstreet end_sector, &i_sectors_delta); 705dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); 70674163da7SKent Overstreet bch2_quota_reservation_put(c, inode, "a_res); 70774163da7SKent Overstreet } 70874163da7SKent Overstreet 7096bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 7106bd68ec2SKent Overstreet bch2_trans_put(trans); 711694015c2SKent Overstreet return ret; 712694015c2SKent Overstreet } 71350dc0f69SKent Overstreet 7148a3c8303SKent Overstreet static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode, 715694015c2SKent Overstreet loff_t offset, loff_t len) 716694015c2SKent Overstreet { 717694015c2SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 71874163da7SKent Overstreet u64 end = offset + len; 71974163da7SKent Overstreet u64 block_start = round_down(offset, block_bytes(c)); 72074163da7SKent Overstreet u64 block_end = round_up(end, block_bytes(c)); 72174163da7SKent Overstreet bool truncated_last_page = false; 72274163da7SKent Overstreet int ret, ret2 = 0; 723694015c2SKent Overstreet 724694015c2SKent Overstreet if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { 725694015c2SKent Overstreet ret = inode_newsize_ok(&inode->v, end); 726694015c2SKent Overstreet if (ret) 72774163da7SKent Overstreet return ret; 728694015c2SKent Overstreet } 729694015c2SKent Overstreet 730694015c2SKent Overstreet if (mode & FALLOC_FL_ZERO_RANGE) { 731959f7368SKent Overstreet ret = bch2_truncate_folios(inode, offset, end); 73274163da7SKent Overstreet if (unlikely(ret < 0)) 73374163da7SKent Overstreet return ret; 734694015c2SKent Overstreet 73574163da7SKent Overstreet truncated_last_page = ret; 736694015c2SKent Overstreet 737694015c2SKent Overstreet truncate_pagecache_range(&inode->v, offset, end - 1); 73874163da7SKent Overstreet 73974163da7SKent Overstreet block_start = round_up(offset, block_bytes(c)); 74074163da7SKent Overstreet block_end = round_down(end, block_bytes(c)); 741694015c2SKent Overstreet } 742694015c2SKent Overstreet 743694015c2SKent Overstreet ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); 744e0541a93SKent Overstreet 745e0541a93SKent Overstreet /* 74674163da7SKent Overstreet * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, 74774163da7SKent Overstreet * so that the VFS cache i_size is consistent with the btree i_size: 748e0541a93SKent Overstreet */ 74974163da7SKent Overstreet if (ret && 750098ef98dSKent Overstreet !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) 75174163da7SKent Overstreet return ret; 7521c6fdbd8SKent Overstreet 75374163da7SKent Overstreet if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) 754e0541a93SKent Overstreet end = inode->v.i_size; 75574163da7SKent Overstreet 75674163da7SKent Overstreet if (end >= inode->v.i_size && 75774163da7SKent Overstreet (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || 75874163da7SKent Overstreet !(mode & FALLOC_FL_KEEP_SIZE))) { 75974163da7SKent Overstreet spin_lock(&inode->v.i_lock); 760e0541a93SKent Overstreet i_size_write(&inode->v, end); 76174163da7SKent Overstreet spin_unlock(&inode->v.i_lock); 762e0541a93SKent Overstreet 7631c6fdbd8SKent Overstreet mutex_lock(&inode->ei_update_lock); 76474163da7SKent Overstreet ret2 = bch2_write_inode_size(c, inode, end, 0); 7651c6fdbd8SKent Overstreet mutex_unlock(&inode->ei_update_lock); 7661c6fdbd8SKent Overstreet } 76774163da7SKent Overstreet 76874163da7SKent Overstreet return ret ?: ret2; 7691c6fdbd8SKent Overstreet } 7701c6fdbd8SKent Overstreet 7711c6fdbd8SKent Overstreet long bch2_fallocate_dispatch(struct file *file, int mode, 7721c6fdbd8SKent Overstreet loff_t offset, loff_t len) 7731c6fdbd8SKent Overstreet { 7741c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 7752a9101a9SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 7762a9101a9SKent Overstreet long ret; 7772a9101a9SKent Overstreet 778d94189adSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) 7792a9101a9SKent Overstreet return -EROFS; 7801c6fdbd8SKent Overstreet 78174163da7SKent Overstreet inode_lock(&inode->v); 78274163da7SKent Overstreet inode_dio_wait(&inode->v); 783a7ecd30cSKent Overstreet bch2_pagecache_block_get(inode); 78474163da7SKent Overstreet 78507bfcc0bSKent Overstreet ret = file_modified(file); 78607bfcc0bSKent Overstreet if (ret) 78707bfcc0bSKent Overstreet goto err; 78807bfcc0bSKent Overstreet 7891c6fdbd8SKent Overstreet if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) 7902a9101a9SKent Overstreet ret = bchfs_fallocate(inode, mode, offset, len); 7912a9101a9SKent Overstreet else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) 7922a9101a9SKent Overstreet ret = bchfs_fpunch(inode, offset, len); 7932a9101a9SKent Overstreet else if (mode == FALLOC_FL_INSERT_RANGE) 7942a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, true); 7952a9101a9SKent Overstreet else if (mode == FALLOC_FL_COLLAPSE_RANGE) 7962a9101a9SKent Overstreet ret = bchfs_fcollapse_finsert(inode, offset, len, false); 7972a9101a9SKent Overstreet else 7982a9101a9SKent Overstreet ret = -EOPNOTSUPP; 79907bfcc0bSKent Overstreet err: 800a7ecd30cSKent Overstreet bch2_pagecache_block_put(inode); 80174163da7SKent Overstreet inode_unlock(&inode->v); 802d94189adSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); 8031c6fdbd8SKent Overstreet 8045c1ef830SKent Overstreet return bch2_err_class(ret); 8051c6fdbd8SKent Overstreet } 8061c6fdbd8SKent Overstreet 807c72f687aSKent Overstreet /* 808c72f687aSKent Overstreet * Take a quota reservation for unallocated blocks in a given file range 809c72f687aSKent Overstreet * Does not check pagecache 810c72f687aSKent Overstreet */ 811e8540e56SKent Overstreet static int quota_reserve_range(struct bch_inode_info *inode, 812e8540e56SKent Overstreet struct quota_res *res, 813e8540e56SKent Overstreet u64 start, u64 end) 814e8540e56SKent Overstreet { 815e8540e56SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 8166bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c); 817e8540e56SKent Overstreet struct btree_iter iter; 818e8540e56SKent Overstreet struct bkey_s_c k; 819e8540e56SKent Overstreet u32 snapshot; 820e8540e56SKent Overstreet u64 sectors = end - start; 821e8540e56SKent Overstreet u64 pos = start; 822e8540e56SKent Overstreet int ret; 823e8540e56SKent Overstreet retry: 8246bd68ec2SKent Overstreet bch2_trans_begin(trans); 825e8540e56SKent Overstreet 8266bd68ec2SKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); 827e8540e56SKent Overstreet if (ret) 828e8540e56SKent Overstreet goto err; 829e8540e56SKent Overstreet 8306bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 831e8540e56SKent Overstreet SPOS(inode->v.i_ino, pos, snapshot), 0); 832e8540e56SKent Overstreet 8336bd68ec2SKent Overstreet while (!(ret = btree_trans_too_many_iters(trans)) && 834e8540e56SKent Overstreet (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && 835e8540e56SKent Overstreet !(ret = bkey_err(k))) { 836e8540e56SKent Overstreet if (bkey_extent_is_allocation(k.k)) { 837e8540e56SKent Overstreet u64 s = min(end, k.k->p.offset) - 838e8540e56SKent Overstreet max(start, bkey_start_offset(k.k)); 839e8540e56SKent Overstreet BUG_ON(s > sectors); 840e8540e56SKent Overstreet sectors -= s; 841e8540e56SKent Overstreet } 842e8540e56SKent Overstreet bch2_btree_iter_advance(&iter); 843e8540e56SKent Overstreet } 844e8540e56SKent Overstreet pos = iter.pos.offset; 8456bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 846e8540e56SKent Overstreet err: 847e8540e56SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 848e8540e56SKent Overstreet goto retry; 849e8540e56SKent Overstreet 8506bd68ec2SKent Overstreet bch2_trans_put(trans); 851e8540e56SKent Overstreet 8526bd68ec2SKent Overstreet return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); 853e8540e56SKent Overstreet } 854e8540e56SKent Overstreet 85576426098SKent Overstreet loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, 85676426098SKent Overstreet struct file *file_dst, loff_t pos_dst, 85776426098SKent Overstreet loff_t len, unsigned remap_flags) 85876426098SKent Overstreet { 85976426098SKent Overstreet struct bch_inode_info *src = file_bch_inode(file_src); 86076426098SKent Overstreet struct bch_inode_info *dst = file_bch_inode(file_dst); 86176426098SKent Overstreet struct bch_fs *c = src->v.i_sb->s_fs_info; 862e8540e56SKent Overstreet struct quota_res quota_res = { 0 }; 8632e87eae1SKent Overstreet s64 i_sectors_delta = 0; 864677fc056SKent Overstreet u64 aligned_len; 86576426098SKent Overstreet loff_t ret = 0; 86676426098SKent Overstreet 86776426098SKent Overstreet if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) 86876426098SKent Overstreet return -EINVAL; 86976426098SKent Overstreet 87076426098SKent Overstreet if ((pos_src & (block_bytes(c) - 1)) || 87176426098SKent Overstreet (pos_dst & (block_bytes(c) - 1))) 87276426098SKent Overstreet return -EINVAL; 87376426098SKent Overstreet 87476426098SKent Overstreet if (src == dst && 87576426098SKent Overstreet abs(pos_src - pos_dst) < len) 87676426098SKent Overstreet return -EINVAL; 87776426098SKent Overstreet 878ecf8a74dSKent Overstreet lock_two_nondirectories(&src->v, &dst->v); 879ecf8a74dSKent Overstreet bch2_lock_inodes(INODE_PAGECACHE_BLOCK, src, dst); 88076426098SKent Overstreet 88176426098SKent Overstreet inode_dio_wait(&src->v); 88276426098SKent Overstreet inode_dio_wait(&dst->v); 88376426098SKent Overstreet 88476426098SKent Overstreet ret = generic_remap_file_range_prep(file_src, pos_src, 88576426098SKent Overstreet file_dst, pos_dst, 88676426098SKent Overstreet &len, remap_flags); 88776426098SKent Overstreet if (ret < 0 || len == 0) 8882e87eae1SKent Overstreet goto err; 88976426098SKent Overstreet 890677fc056SKent Overstreet aligned_len = round_up((u64) len, block_bytes(c)); 89176426098SKent Overstreet 892dbbfca9fSKent Overstreet ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping, 893677fc056SKent Overstreet pos_dst, pos_dst + len - 1); 89476426098SKent Overstreet if (ret) 8952e87eae1SKent Overstreet goto err; 89676426098SKent Overstreet 897e8540e56SKent Overstreet ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, 898e8540e56SKent Overstreet (pos_dst + aligned_len) >> 9); 899e8540e56SKent Overstreet if (ret) 900e8540e56SKent Overstreet goto err; 901e8540e56SKent Overstreet 902*7f3dc6c9SReed Riley if (!(remap_flags & REMAP_FILE_DEDUP)) 903e8540e56SKent Overstreet file_update_time(file_dst); 904e8540e56SKent Overstreet 905dbbfca9fSKent Overstreet bch2_mark_pagecache_unallocated(src, pos_src >> 9, 906dcfc593fSKent Overstreet (pos_src + aligned_len) >> 9); 90776426098SKent Overstreet 9082e87eae1SKent Overstreet ret = bch2_remap_range(c, 9096fed42bbSKent Overstreet inode_inum(dst), pos_dst >> 9, 9106fed42bbSKent Overstreet inode_inum(src), pos_src >> 9, 91176426098SKent Overstreet aligned_len >> 9, 9122e87eae1SKent Overstreet pos_dst + len, &i_sectors_delta); 9132e87eae1SKent Overstreet if (ret < 0) 9142e87eae1SKent Overstreet goto err; 91576426098SKent Overstreet 9162e87eae1SKent Overstreet /* 9172e87eae1SKent Overstreet * due to alignment, we might have remapped slightly more than requsted 9182e87eae1SKent Overstreet */ 919677fc056SKent Overstreet ret = min((u64) ret << 9, (u64) len); 9202e87eae1SKent Overstreet 921dbbfca9fSKent Overstreet bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); 9222e87eae1SKent Overstreet 9232e87eae1SKent Overstreet spin_lock(&dst->v.i_lock); 924677fc056SKent Overstreet if (pos_dst + ret > dst->v.i_size) 925677fc056SKent Overstreet i_size_write(&dst->v, pos_dst + ret); 9262e87eae1SKent Overstreet spin_unlock(&dst->v.i_lock); 927e7084c9cSKent Overstreet 92868a2054dSKent Overstreet if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || 92968a2054dSKent Overstreet IS_SYNC(file_inode(file_dst))) 930a8b3a677SKent Overstreet ret = bch2_flush_inode(c, dst); 9312e87eae1SKent Overstreet err: 932e8540e56SKent Overstreet bch2_quota_reservation_put(c, dst, "a_res); 933ecf8a74dSKent Overstreet bch2_unlock_inodes(INODE_PAGECACHE_BLOCK, src, dst); 934ecf8a74dSKent Overstreet unlock_two_nondirectories(&src->v, &dst->v); 93576426098SKent Overstreet 9365c1ef830SKent Overstreet return bch2_err_class(ret); 93776426098SKent Overstreet } 93876426098SKent Overstreet 9391c6fdbd8SKent Overstreet /* fseek: */ 9401c6fdbd8SKent Overstreet 9411c6fdbd8SKent Overstreet static loff_t bch2_seek_data(struct file *file, u64 offset) 9421c6fdbd8SKent Overstreet { 9431c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 9441c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 9456bd68ec2SKent Overstreet struct btree_trans *trans; 94667e0dd8fSKent Overstreet struct btree_iter iter; 9471c6fdbd8SKent Overstreet struct bkey_s_c k; 9486fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 9491c6fdbd8SKent Overstreet u64 isize, next_data = MAX_LFS_FILESIZE; 9506fed42bbSKent Overstreet u32 snapshot; 9511c6fdbd8SKent Overstreet int ret; 9521c6fdbd8SKent Overstreet 9531c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 9541c6fdbd8SKent Overstreet if (offset >= isize) 9551c6fdbd8SKent Overstreet return -ENXIO; 9561c6fdbd8SKent Overstreet 9576bd68ec2SKent Overstreet trans = bch2_trans_get(c); 9586fed42bbSKent Overstreet retry: 9596bd68ec2SKent Overstreet bch2_trans_begin(trans); 9606fed42bbSKent Overstreet 9616bd68ec2SKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 9626fed42bbSKent Overstreet if (ret) 9636fed42bbSKent Overstreet goto err; 964424eb881SKent Overstreet 9656bd68ec2SKent Overstreet for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, 966c72f687aSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 967c72f687aSKent Overstreet POS(inode->v.i_ino, U64_MAX), 968c72f687aSKent Overstreet 0, k, ret) { 969c72f687aSKent Overstreet if (bkey_extent_is_data(k.k)) { 9701c6fdbd8SKent Overstreet next_data = max(offset, bkey_start_offset(k.k) << 9); 9711c6fdbd8SKent Overstreet break; 9721c6fdbd8SKent Overstreet } else if (k.k->p.offset >> 9 > isize) 9731c6fdbd8SKent Overstreet break; 9741c6fdbd8SKent Overstreet } 9756bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 9766fed42bbSKent Overstreet err: 977549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 9786fed42bbSKent Overstreet goto retry; 9791c6fdbd8SKent Overstreet 9806bd68ec2SKent Overstreet bch2_trans_put(trans); 9811c6fdbd8SKent Overstreet if (ret) 9821c6fdbd8SKent Overstreet return ret; 9831c6fdbd8SKent Overstreet 9841c6fdbd8SKent Overstreet if (next_data > offset) 985543ef2ebSKent Overstreet next_data = bch2_seek_pagecache_data(&inode->v, 9864198bf03SKent Overstreet offset, next_data, 0, false); 9871c6fdbd8SKent Overstreet 988e10d3094SKent Overstreet if (next_data >= isize) 9891c6fdbd8SKent Overstreet return -ENXIO; 9901c6fdbd8SKent Overstreet 9911c6fdbd8SKent Overstreet return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); 9921c6fdbd8SKent Overstreet } 9931c6fdbd8SKent Overstreet 9941c6fdbd8SKent Overstreet static loff_t bch2_seek_hole(struct file *file, u64 offset) 9951c6fdbd8SKent Overstreet { 9961c6fdbd8SKent Overstreet struct bch_inode_info *inode = file_bch_inode(file); 9971c6fdbd8SKent Overstreet struct bch_fs *c = inode->v.i_sb->s_fs_info; 9986bd68ec2SKent Overstreet struct btree_trans *trans; 99967e0dd8fSKent Overstreet struct btree_iter iter; 10001c6fdbd8SKent Overstreet struct bkey_s_c k; 10016fed42bbSKent Overstreet subvol_inum inum = inode_inum(inode); 10021c6fdbd8SKent Overstreet u64 isize, next_hole = MAX_LFS_FILESIZE; 10036fed42bbSKent Overstreet u32 snapshot; 10041c6fdbd8SKent Overstreet int ret; 10051c6fdbd8SKent Overstreet 10061c6fdbd8SKent Overstreet isize = i_size_read(&inode->v); 10071c6fdbd8SKent Overstreet if (offset >= isize) 10081c6fdbd8SKent Overstreet return -ENXIO; 10091c6fdbd8SKent Overstreet 10106bd68ec2SKent Overstreet trans = bch2_trans_get(c); 10116fed42bbSKent Overstreet retry: 10126bd68ec2SKent Overstreet bch2_trans_begin(trans); 10136fed42bbSKent Overstreet 10146bd68ec2SKent Overstreet ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 10156fed42bbSKent Overstreet if (ret) 10166fed42bbSKent Overstreet goto err; 1017424eb881SKent Overstreet 10186bd68ec2SKent Overstreet for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, 10196fed42bbSKent Overstreet SPOS(inode->v.i_ino, offset >> 9, snapshot), 10205dd8c60eSKent Overstreet BTREE_ITER_slots, k, ret) { 10211c6fdbd8SKent Overstreet if (k.k->p.inode != inode->v.i_ino) { 1022543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 10234198bf03SKent Overstreet offset, MAX_LFS_FILESIZE, 0, false); 10241c6fdbd8SKent Overstreet break; 10251c6fdbd8SKent Overstreet } else if (!bkey_extent_is_data(k.k)) { 1026543ef2ebSKent Overstreet next_hole = bch2_seek_pagecache_hole(&inode->v, 10271c6fdbd8SKent Overstreet max(offset, bkey_start_offset(k.k) << 9), 10284198bf03SKent Overstreet k.k->p.offset << 9, 0, false); 10291c6fdbd8SKent Overstreet 10301c6fdbd8SKent Overstreet if (next_hole < k.k->p.offset << 9) 10311c6fdbd8SKent Overstreet break; 10321c6fdbd8SKent Overstreet } else { 10331c6fdbd8SKent Overstreet offset = max(offset, bkey_start_offset(k.k) << 9); 10341c6fdbd8SKent Overstreet } 10351c6fdbd8SKent Overstreet } 10366bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 10376fed42bbSKent Overstreet err: 1038549d173cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 10396fed42bbSKent Overstreet goto retry; 10401c6fdbd8SKent Overstreet 10416bd68ec2SKent Overstreet bch2_trans_put(trans); 10421c6fdbd8SKent Overstreet if (ret) 10431c6fdbd8SKent Overstreet return ret; 10441c6fdbd8SKent Overstreet 10451c6fdbd8SKent Overstreet if (next_hole > isize) 10461c6fdbd8SKent Overstreet next_hole = isize; 10471c6fdbd8SKent Overstreet 10481c6fdbd8SKent Overstreet return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); 10491c6fdbd8SKent Overstreet } 10501c6fdbd8SKent Overstreet 10511c6fdbd8SKent Overstreet loff_t bch2_llseek(struct file *file, loff_t offset, int whence) 10521c6fdbd8SKent Overstreet { 10535c1ef830SKent Overstreet loff_t ret; 10545c1ef830SKent Overstreet 10551c6fdbd8SKent Overstreet switch (whence) { 10561c6fdbd8SKent Overstreet case SEEK_SET: 10571c6fdbd8SKent Overstreet case SEEK_CUR: 10581c6fdbd8SKent Overstreet case SEEK_END: 10595c1ef830SKent Overstreet ret = generic_file_llseek(file, offset, whence); 10605c1ef830SKent Overstreet break; 10611c6fdbd8SKent Overstreet case SEEK_DATA: 10625c1ef830SKent Overstreet ret = bch2_seek_data(file, offset); 10635c1ef830SKent Overstreet break; 10641c6fdbd8SKent Overstreet case SEEK_HOLE: 10655c1ef830SKent Overstreet ret = bch2_seek_hole(file, offset); 10665c1ef830SKent Overstreet break; 10675c1ef830SKent Overstreet default: 10685c1ef830SKent Overstreet ret = -EINVAL; 10695c1ef830SKent Overstreet break; 10701c6fdbd8SKent Overstreet } 10711c6fdbd8SKent Overstreet 10725c1ef830SKent Overstreet return bch2_err_class(ret); 10731c6fdbd8SKent Overstreet } 10741c6fdbd8SKent Overstreet 10751c6fdbd8SKent Overstreet void bch2_fs_fsio_exit(struct bch_fs *c) 10761c6fdbd8SKent Overstreet { 1077a8b3a677SKent Overstreet bioset_exit(&c->nocow_flush_bioset); 10781c6fdbd8SKent Overstreet } 10791c6fdbd8SKent Overstreet 10801c6fdbd8SKent Overstreet int bch2_fs_fsio_init(struct bch_fs *c) 10811c6fdbd8SKent Overstreet { 108265d48e35SKent Overstreet if (bioset_init(&c->nocow_flush_bioset, 1083a8b3a677SKent Overstreet 1, offsetof(struct nocow_flush, bio), 0)) 108465d48e35SKent Overstreet return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; 10851c6fdbd8SKent Overstreet 1086c8b4534dSKent Overstreet return 0; 10871c6fdbd8SKent Overstreet } 10881c6fdbd8SKent Overstreet 10891c6fdbd8SKent Overstreet #endif /* NO_BCACHEFS_FS */ 1090