xref: /linux/fs/bcachefs/fs.c (revision 4232da23d75d173195c6766729e51947b64f83cd)
1  // SPDX-License-Identifier: GPL-2.0
2  #ifndef NO_BCACHEFS_FS
3  
4  #include "bcachefs.h"
5  #include "acl.h"
6  #include "bkey_buf.h"
7  #include "btree_update.h"
8  #include "buckets.h"
9  #include "chardev.h"
10  #include "dirent.h"
11  #include "errcode.h"
12  #include "extents.h"
13  #include "fs.h"
14  #include "fs-common.h"
15  #include "fs-io.h"
16  #include "fs-ioctl.h"
17  #include "fs-io-buffered.h"
18  #include "fs-io-direct.h"
19  #include "fs-io-pagecache.h"
20  #include "fsck.h"
21  #include "inode.h"
22  #include "io_read.h"
23  #include "journal.h"
24  #include "keylist.h"
25  #include "quota.h"
26  #include "snapshot.h"
27  #include "super.h"
28  #include "xattr.h"
29  
30  #include <linux/aio.h>
31  #include <linux/backing-dev.h>
32  #include <linux/exportfs.h>
33  #include <linux/fiemap.h>
34  #include <linux/module.h>
35  #include <linux/pagemap.h>
36  #include <linux/posix_acl.h>
37  #include <linux/random.h>
38  #include <linux/seq_file.h>
39  #include <linux/statfs.h>
40  #include <linux/string.h>
41  #include <linux/xattr.h>
42  
43  static struct kmem_cache *bch2_inode_cache;
44  
45  static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
46  				struct bch_inode_info *,
47  				struct bch_inode_unpacked *,
48  				struct bch_subvolume *);
49  
50  void bch2_inode_update_after_write(struct btree_trans *trans,
51  				   struct bch_inode_info *inode,
52  				   struct bch_inode_unpacked *bi,
53  				   unsigned fields)
54  {
55  	struct bch_fs *c = trans->c;
56  
57  	BUG_ON(bi->bi_inum != inode->v.i_ino);
58  
59  	bch2_assert_pos_locked(trans, BTREE_ID_inodes,
60  			       POS(0, bi->bi_inum),
61  			       c->opts.inodes_use_key_cache);
62  
63  	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
64  	i_uid_write(&inode->v, bi->bi_uid);
65  	i_gid_write(&inode->v, bi->bi_gid);
66  	inode->v.i_mode	= bi->bi_mode;
67  
68  	if (fields & ATTR_ATIME)
69  		inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
70  	if (fields & ATTR_MTIME)
71  		inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
72  	if (fields & ATTR_CTIME)
73  		inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
74  
75  	inode->ei_inode		= *bi;
76  
77  	bch2_inode_flags_to_vfs(inode);
78  }
79  
80  int __must_check bch2_write_inode(struct bch_fs *c,
81  				  struct bch_inode_info *inode,
82  				  inode_set_fn set,
83  				  void *p, unsigned fields)
84  {
85  	struct btree_trans *trans = bch2_trans_get(c);
86  	struct btree_iter iter = { NULL };
87  	struct bch_inode_unpacked inode_u;
88  	int ret;
89  retry:
90  	bch2_trans_begin(trans);
91  
92  	ret   = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
93  				BTREE_ITER_INTENT) ?:
94  		(set ? set(trans, inode, &inode_u, p) : 0) ?:
95  		bch2_inode_write(trans, &iter, &inode_u) ?:
96  		bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
97  
98  	/*
99  	 * the btree node lock protects inode->ei_inode, not ei_update_lock;
100  	 * this is important for inode updates via bchfs_write_index_update
101  	 */
102  	if (!ret)
103  		bch2_inode_update_after_write(trans, inode, &inode_u, fields);
104  
105  	bch2_trans_iter_exit(trans, &iter);
106  
107  	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
108  		goto retry;
109  
110  	bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
111  			     "%s: inode %u:%llu not found when updating",
112  			     bch2_err_str(ret),
113  			     inode_inum(inode).subvol,
114  			     inode_inum(inode).inum);
115  
116  	bch2_trans_put(trans);
117  	return ret < 0 ? ret : 0;
118  }
119  
120  int bch2_fs_quota_transfer(struct bch_fs *c,
121  			   struct bch_inode_info *inode,
122  			   struct bch_qid new_qid,
123  			   unsigned qtypes,
124  			   enum quota_acct_mode mode)
125  {
126  	unsigned i;
127  	int ret;
128  
129  	qtypes &= enabled_qtypes(c);
130  
131  	for (i = 0; i < QTYP_NR; i++)
132  		if (new_qid.q[i] == inode->ei_qid.q[i])
133  			qtypes &= ~(1U << i);
134  
135  	if (!qtypes)
136  		return 0;
137  
138  	mutex_lock(&inode->ei_quota_lock);
139  
140  	ret = bch2_quota_transfer(c, qtypes, new_qid,
141  				  inode->ei_qid,
142  				  inode->v.i_blocks +
143  				  inode->ei_quota_reserved,
144  				  mode);
145  	if (!ret)
146  		for (i = 0; i < QTYP_NR; i++)
147  			if (qtypes & (1 << i))
148  				inode->ei_qid.q[i] = new_qid.q[i];
149  
150  	mutex_unlock(&inode->ei_quota_lock);
151  
152  	return ret;
153  }
154  
155  static int bch2_iget5_test(struct inode *vinode, void *p)
156  {
157  	struct bch_inode_info *inode = to_bch_ei(vinode);
158  	subvol_inum *inum = p;
159  
160  	return inode->ei_subvol == inum->subvol &&
161  		inode->ei_inode.bi_inum == inum->inum;
162  }
163  
164  static int bch2_iget5_set(struct inode *vinode, void *p)
165  {
166  	struct bch_inode_info *inode = to_bch_ei(vinode);
167  	subvol_inum *inum = p;
168  
169  	inode->v.i_ino		= inum->inum;
170  	inode->ei_subvol	= inum->subvol;
171  	inode->ei_inode.bi_inum	= inum->inum;
172  	return 0;
173  }
174  
175  static unsigned bch2_inode_hash(subvol_inum inum)
176  {
177  	return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
178  }
179  
180  static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
181  {
182  	subvol_inum inum = inode_inum(inode);
183  	struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,
184  				      bch2_inode_hash(inum),
185  				      bch2_iget5_test,
186  				      bch2_iget5_set,
187  				      &inum));
188  	BUG_ON(!old);
189  
190  	if (unlikely(old != inode)) {
191  		__destroy_inode(&inode->v);
192  		kmem_cache_free(bch2_inode_cache, inode);
193  		inode = old;
194  	} else {
195  		mutex_lock(&c->vfs_inodes_lock);
196  		list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
197  		mutex_unlock(&c->vfs_inodes_lock);
198  		/*
199  		 * we really don't want insert_inode_locked2() to be setting
200  		 * I_NEW...
201  		 */
202  		unlock_new_inode(&inode->v);
203  	}
204  
205  	return inode;
206  }
207  
208  #define memalloc_flags_do(_flags, _do)						\
209  ({										\
210  	unsigned _saved_flags = memalloc_flags_save(_flags);			\
211  	typeof(_do) _ret = _do;							\
212  	memalloc_noreclaim_restore(_saved_flags);				\
213  	_ret;									\
214  })
215  
216  /*
217   * Allocate a new inode, dropping/retaking btree locks if necessary:
218   */
219  static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
220  {
221  	struct bch_fs *c = trans->c;
222  
223  	struct bch_inode_info *inode =
224  		memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN,
225  				  to_bch_ei(new_inode(c->vfs_sb)));
226  
227  	if (unlikely(!inode)) {
228  		int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
229  		if (ret && inode) {
230  			__destroy_inode(&inode->v);
231  			kmem_cache_free(bch2_inode_cache, inode);
232  		}
233  		if (ret)
234  			return ERR_PTR(ret);
235  	}
236  
237  	return inode;
238  }
239  
240  struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
241  {
242  	struct bch_inode_info *inode =
243  		to_bch_ei(ilookup5_nowait(c->vfs_sb,
244  					  bch2_inode_hash(inum),
245  					  bch2_iget5_test,
246  					  &inum));
247  	if (inode)
248  		return &inode->v;
249  
250  	struct btree_trans *trans = bch2_trans_get(c);
251  
252  	struct bch_inode_unpacked inode_u;
253  	struct bch_subvolume subvol;
254  	int ret = lockrestart_do(trans,
255  		bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
256  		bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
257  		PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
258  	if (!ret) {
259  		bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
260  		inode = bch2_inode_insert(c, inode);
261  	}
262  	bch2_trans_put(trans);
263  
264  	return ret ? ERR_PTR(ret) : &inode->v;
265  }
266  
267  struct bch_inode_info *
268  __bch2_create(struct mnt_idmap *idmap,
269  	      struct bch_inode_info *dir, struct dentry *dentry,
270  	      umode_t mode, dev_t rdev, subvol_inum snapshot_src,
271  	      unsigned flags)
272  {
273  	struct bch_fs *c = dir->v.i_sb->s_fs_info;
274  	struct btree_trans *trans;
275  	struct bch_inode_unpacked dir_u;
276  	struct bch_inode_info *inode;
277  	struct bch_inode_unpacked inode_u;
278  	struct posix_acl *default_acl = NULL, *acl = NULL;
279  	subvol_inum inum;
280  	struct bch_subvolume subvol;
281  	u64 journal_seq = 0;
282  	int ret;
283  
284  	/*
285  	 * preallocate acls + vfs inode before btree transaction, so that
286  	 * nothing can fail after the transaction succeeds:
287  	 */
288  #ifdef CONFIG_BCACHEFS_POSIX_ACL
289  	ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
290  	if (ret)
291  		return ERR_PTR(ret);
292  #endif
293  	inode = to_bch_ei(new_inode(c->vfs_sb));
294  	if (unlikely(!inode)) {
295  		inode = ERR_PTR(-ENOMEM);
296  		goto err;
297  	}
298  
299  	bch2_inode_init_early(c, &inode_u);
300  
301  	if (!(flags & BCH_CREATE_TMPFILE))
302  		mutex_lock(&dir->ei_update_lock);
303  
304  	trans = bch2_trans_get(c);
305  retry:
306  	bch2_trans_begin(trans);
307  
308  	ret   = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
309  		bch2_create_trans(trans,
310  				  inode_inum(dir), &dir_u, &inode_u,
311  				  !(flags & BCH_CREATE_TMPFILE)
312  				  ? &dentry->d_name : NULL,
313  				  from_kuid(i_user_ns(&dir->v), current_fsuid()),
314  				  from_kgid(i_user_ns(&dir->v), current_fsgid()),
315  				  mode, rdev,
316  				  default_acl, acl, snapshot_src, flags) ?:
317  		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
318  				KEY_TYPE_QUOTA_PREALLOC);
319  	if (unlikely(ret))
320  		goto err_before_quota;
321  
322  	inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
323  	inum.inum = inode_u.bi_inum;
324  
325  	ret   = bch2_subvolume_get(trans, inum.subvol, true,
326  				   BTREE_ITER_WITH_UPDATES, &subvol) ?:
327  		bch2_trans_commit(trans, NULL, &journal_seq, 0);
328  	if (unlikely(ret)) {
329  		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
330  				KEY_TYPE_QUOTA_WARN);
331  err_before_quota:
332  		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
333  			goto retry;
334  		goto err_trans;
335  	}
336  
337  	if (!(flags & BCH_CREATE_TMPFILE)) {
338  		bch2_inode_update_after_write(trans, dir, &dir_u,
339  					      ATTR_MTIME|ATTR_CTIME);
340  		mutex_unlock(&dir->ei_update_lock);
341  	}
342  
343  	bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
344  
345  	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
346  	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
347  
348  	/*
349  	 * we must insert the new inode into the inode cache before calling
350  	 * bch2_trans_exit() and dropping locks, else we could race with another
351  	 * thread pulling the inode in and modifying it:
352  	 */
353  	inode = bch2_inode_insert(c, inode);
354  	bch2_trans_put(trans);
355  err:
356  	posix_acl_release(default_acl);
357  	posix_acl_release(acl);
358  	return inode;
359  err_trans:
360  	if (!(flags & BCH_CREATE_TMPFILE))
361  		mutex_unlock(&dir->ei_update_lock);
362  
363  	bch2_trans_put(trans);
364  	make_bad_inode(&inode->v);
365  	iput(&inode->v);
366  	inode = ERR_PTR(ret);
367  	goto err;
368  }
369  
370  /* methods */
371  
372  static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
373  			subvol_inum dir, struct bch_hash_info *dir_hash_info,
374  			const struct qstr *name)
375  {
376  	struct bch_fs *c = trans->c;
377  	struct btree_iter dirent_iter = {};
378  	subvol_inum inum = {};
379  
380  	int ret = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
381  				   dir_hash_info, dir, name, 0);
382  	if (ret)
383  		return ERR_PTR(ret);
384  
385  	struct bkey_s_c k = bch2_btree_iter_peek_slot(&dirent_iter);
386  	ret = bkey_err(k);
387  	if (ret)
388  		goto err;
389  
390  	ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
391  	if (ret > 0)
392  		ret = -ENOENT;
393  	if (ret)
394  		goto err;
395  
396  	struct bch_inode_info *inode =
397  		to_bch_ei(ilookup5_nowait(c->vfs_sb,
398  					  bch2_inode_hash(inum),
399  					  bch2_iget5_test,
400  					  &inum));
401  	if (inode)
402  		goto out;
403  
404  	struct bch_subvolume subvol;
405  	struct bch_inode_unpacked inode_u;
406  	ret =   bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
407  		bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
408  		PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
409  	if (bch2_err_matches(ret, ENOENT)) {
410  		struct printbuf buf = PRINTBUF;
411  
412  		bch2_bkey_val_to_text(&buf, c, k);
413  		bch_err(c, "%s points to missing inode", buf.buf);
414  		printbuf_exit(&buf);
415  	}
416  	if (ret)
417  		goto err;
418  
419  	bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
420  	inode = bch2_inode_insert(c, inode);
421  out:
422  	bch2_trans_iter_exit(trans, &dirent_iter);
423  	return inode;
424  err:
425  	inode = ERR_PTR(ret);
426  	goto out;
427  }
428  
429  static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
430  				  unsigned int flags)
431  {
432  	struct bch_fs *c = vdir->i_sb->s_fs_info;
433  	struct bch_inode_info *dir = to_bch_ei(vdir);
434  	struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
435  
436  	struct bch_inode_info *inode;
437  	bch2_trans_do(c, NULL, NULL, 0,
438  		PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
439  							  &hash, &dentry->d_name)));
440  	if (IS_ERR(inode))
441  		inode = NULL;
442  
443  	return d_splice_alias(&inode->v, dentry);
444  }
445  
446  static int bch2_mknod(struct mnt_idmap *idmap,
447  		      struct inode *vdir, struct dentry *dentry,
448  		      umode_t mode, dev_t rdev)
449  {
450  	struct bch_inode_info *inode =
451  		__bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
452  			      (subvol_inum) { 0 }, 0);
453  
454  	if (IS_ERR(inode))
455  		return bch2_err_class(PTR_ERR(inode));
456  
457  	d_instantiate(dentry, &inode->v);
458  	return 0;
459  }
460  
461  static int bch2_create(struct mnt_idmap *idmap,
462  		       struct inode *vdir, struct dentry *dentry,
463  		       umode_t mode, bool excl)
464  {
465  	return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
466  }
467  
468  static int __bch2_link(struct bch_fs *c,
469  		       struct bch_inode_info *inode,
470  		       struct bch_inode_info *dir,
471  		       struct dentry *dentry)
472  {
473  	struct btree_trans *trans = bch2_trans_get(c);
474  	struct bch_inode_unpacked dir_u, inode_u;
475  	int ret;
476  
477  	mutex_lock(&inode->ei_update_lock);
478  
479  	ret = commit_do(trans, NULL, NULL, 0,
480  			bch2_link_trans(trans,
481  					inode_inum(dir),   &dir_u,
482  					inode_inum(inode), &inode_u,
483  					&dentry->d_name));
484  
485  	if (likely(!ret)) {
486  		bch2_inode_update_after_write(trans, dir, &dir_u,
487  					      ATTR_MTIME|ATTR_CTIME);
488  		bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
489  	}
490  
491  	bch2_trans_put(trans);
492  	mutex_unlock(&inode->ei_update_lock);
493  	return ret;
494  }
495  
496  static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
497  		     struct dentry *dentry)
498  {
499  	struct bch_fs *c = vdir->i_sb->s_fs_info;
500  	struct bch_inode_info *dir = to_bch_ei(vdir);
501  	struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
502  	int ret;
503  
504  	lockdep_assert_held(&inode->v.i_rwsem);
505  
506  	ret   = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
507  		bch2_subvol_is_ro(c, inode->ei_subvol) ?:
508  		__bch2_link(c, inode, dir, dentry);
509  	if (unlikely(ret))
510  		return bch2_err_class(ret);
511  
512  	ihold(&inode->v);
513  	d_instantiate(dentry, &inode->v);
514  	return 0;
515  }
516  
517  int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
518  		  bool deleting_snapshot)
519  {
520  	struct bch_fs *c = vdir->i_sb->s_fs_info;
521  	struct bch_inode_info *dir = to_bch_ei(vdir);
522  	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
523  	struct bch_inode_unpacked dir_u, inode_u;
524  	struct btree_trans *trans = bch2_trans_get(c);
525  	int ret;
526  
527  	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
528  
529  	ret = commit_do(trans, NULL, NULL,
530  			BCH_TRANS_COMMIT_no_enospc,
531  		bch2_unlink_trans(trans,
532  				  inode_inum(dir), &dir_u,
533  				  &inode_u, &dentry->d_name,
534  				  deleting_snapshot));
535  	if (unlikely(ret))
536  		goto err;
537  
538  	bch2_inode_update_after_write(trans, dir, &dir_u,
539  				      ATTR_MTIME|ATTR_CTIME);
540  	bch2_inode_update_after_write(trans, inode, &inode_u,
541  				      ATTR_MTIME);
542  
543  	if (inode_u.bi_subvol) {
544  		/*
545  		 * Subvolume deletion is asynchronous, but we still want to tell
546  		 * the VFS that it's been deleted here:
547  		 */
548  		set_nlink(&inode->v, 0);
549  	}
550  err:
551  	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
552  	bch2_trans_put(trans);
553  
554  	return ret;
555  }
556  
557  static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
558  {
559  	struct bch_inode_info *dir= to_bch_ei(vdir);
560  	struct bch_fs *c = dir->v.i_sb->s_fs_info;
561  
562  	int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
563  		__bch2_unlink(vdir, dentry, false);
564  	return bch2_err_class(ret);
565  }
566  
567  static int bch2_symlink(struct mnt_idmap *idmap,
568  			struct inode *vdir, struct dentry *dentry,
569  			const char *symname)
570  {
571  	struct bch_fs *c = vdir->i_sb->s_fs_info;
572  	struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
573  	int ret;
574  
575  	inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
576  			      (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
577  	if (IS_ERR(inode))
578  		return bch2_err_class(PTR_ERR(inode));
579  
580  	inode_lock(&inode->v);
581  	ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
582  	inode_unlock(&inode->v);
583  
584  	if (unlikely(ret))
585  		goto err;
586  
587  	ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
588  	if (unlikely(ret))
589  		goto err;
590  
591  	ret = __bch2_link(c, inode, dir, dentry);
592  	if (unlikely(ret))
593  		goto err;
594  
595  	d_instantiate(dentry, &inode->v);
596  	return 0;
597  err:
598  	iput(&inode->v);
599  	return bch2_err_class(ret);
600  }
601  
602  static int bch2_mkdir(struct mnt_idmap *idmap,
603  		      struct inode *vdir, struct dentry *dentry, umode_t mode)
604  {
605  	return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
606  }
607  
608  static int bch2_rename2(struct mnt_idmap *idmap,
609  			struct inode *src_vdir, struct dentry *src_dentry,
610  			struct inode *dst_vdir, struct dentry *dst_dentry,
611  			unsigned flags)
612  {
613  	struct bch_fs *c = src_vdir->i_sb->s_fs_info;
614  	struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
615  	struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
616  	struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
617  	struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
618  	struct bch_inode_unpacked dst_dir_u, src_dir_u;
619  	struct bch_inode_unpacked src_inode_u, dst_inode_u;
620  	struct btree_trans *trans;
621  	enum bch_rename_mode mode = flags & RENAME_EXCHANGE
622  		? BCH_RENAME_EXCHANGE
623  		: dst_dentry->d_inode
624  		? BCH_RENAME_OVERWRITE : BCH_RENAME;
625  	int ret;
626  
627  	if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
628  		return -EINVAL;
629  
630  	if (mode == BCH_RENAME_OVERWRITE) {
631  		ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
632  						   0, LLONG_MAX);
633  		if (ret)
634  			return ret;
635  	}
636  
637  	trans = bch2_trans_get(c);
638  
639  	bch2_lock_inodes(INODE_UPDATE_LOCK,
640  			 src_dir,
641  			 dst_dir,
642  			 src_inode,
643  			 dst_inode);
644  
645  	ret   = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
646  		bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
647  	if (ret)
648  		goto err;
649  
650  	if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
651  		ret = bch2_fs_quota_transfer(c, src_inode,
652  					     dst_dir->ei_qid,
653  					     1 << QTYP_PRJ,
654  					     KEY_TYPE_QUOTA_PREALLOC);
655  		if (ret)
656  			goto err;
657  	}
658  
659  	if (mode == BCH_RENAME_EXCHANGE &&
660  	    inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
661  		ret = bch2_fs_quota_transfer(c, dst_inode,
662  					     src_dir->ei_qid,
663  					     1 << QTYP_PRJ,
664  					     KEY_TYPE_QUOTA_PREALLOC);
665  		if (ret)
666  			goto err;
667  	}
668  
669  	ret = commit_do(trans, NULL, NULL, 0,
670  			bch2_rename_trans(trans,
671  					  inode_inum(src_dir), &src_dir_u,
672  					  inode_inum(dst_dir), &dst_dir_u,
673  					  &src_inode_u,
674  					  &dst_inode_u,
675  					  &src_dentry->d_name,
676  					  &dst_dentry->d_name,
677  					  mode));
678  	if (unlikely(ret))
679  		goto err;
680  
681  	BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
682  	BUG_ON(dst_inode &&
683  	       dst_inode->v.i_ino != dst_inode_u.bi_inum);
684  
685  	bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
686  				      ATTR_MTIME|ATTR_CTIME);
687  
688  	if (src_dir != dst_dir)
689  		bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
690  					      ATTR_MTIME|ATTR_CTIME);
691  
692  	bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
693  				      ATTR_CTIME);
694  
695  	if (dst_inode)
696  		bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
697  					      ATTR_CTIME);
698  err:
699  	bch2_trans_put(trans);
700  
701  	bch2_fs_quota_transfer(c, src_inode,
702  			       bch_qid(&src_inode->ei_inode),
703  			       1 << QTYP_PRJ,
704  			       KEY_TYPE_QUOTA_NOCHECK);
705  	if (dst_inode)
706  		bch2_fs_quota_transfer(c, dst_inode,
707  				       bch_qid(&dst_inode->ei_inode),
708  				       1 << QTYP_PRJ,
709  				       KEY_TYPE_QUOTA_NOCHECK);
710  
711  	bch2_unlock_inodes(INODE_UPDATE_LOCK,
712  			   src_dir,
713  			   dst_dir,
714  			   src_inode,
715  			   dst_inode);
716  
717  	return bch2_err_class(ret);
718  }
719  
720  static void bch2_setattr_copy(struct mnt_idmap *idmap,
721  			      struct bch_inode_info *inode,
722  			      struct bch_inode_unpacked *bi,
723  			      struct iattr *attr)
724  {
725  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
726  	unsigned int ia_valid = attr->ia_valid;
727  
728  	if (ia_valid & ATTR_UID)
729  		bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
730  	if (ia_valid & ATTR_GID)
731  		bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
732  
733  	if (ia_valid & ATTR_SIZE)
734  		bi->bi_size = attr->ia_size;
735  
736  	if (ia_valid & ATTR_ATIME)
737  		bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
738  	if (ia_valid & ATTR_MTIME)
739  		bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
740  	if (ia_valid & ATTR_CTIME)
741  		bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
742  
743  	if (ia_valid & ATTR_MODE) {
744  		umode_t mode = attr->ia_mode;
745  		kgid_t gid = ia_valid & ATTR_GID
746  			? attr->ia_gid
747  			: inode->v.i_gid;
748  
749  		if (!in_group_p(gid) &&
750  		    !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
751  			mode &= ~S_ISGID;
752  		bi->bi_mode = mode;
753  	}
754  }
755  
756  int bch2_setattr_nonsize(struct mnt_idmap *idmap,
757  			 struct bch_inode_info *inode,
758  			 struct iattr *attr)
759  {
760  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
761  	struct bch_qid qid;
762  	struct btree_trans *trans;
763  	struct btree_iter inode_iter = { NULL };
764  	struct bch_inode_unpacked inode_u;
765  	struct posix_acl *acl = NULL;
766  	int ret;
767  
768  	mutex_lock(&inode->ei_update_lock);
769  
770  	qid = inode->ei_qid;
771  
772  	if (attr->ia_valid & ATTR_UID)
773  		qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
774  
775  	if (attr->ia_valid & ATTR_GID)
776  		qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
777  
778  	ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
779  				     KEY_TYPE_QUOTA_PREALLOC);
780  	if (ret)
781  		goto err;
782  
783  	trans = bch2_trans_get(c);
784  retry:
785  	bch2_trans_begin(trans);
786  	kfree(acl);
787  	acl = NULL;
788  
789  	ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
790  			      BTREE_ITER_INTENT);
791  	if (ret)
792  		goto btree_err;
793  
794  	bch2_setattr_copy(idmap, inode, &inode_u, attr);
795  
796  	if (attr->ia_valid & ATTR_MODE) {
797  		ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
798  				     inode_u.bi_mode, &acl);
799  		if (ret)
800  			goto btree_err;
801  	}
802  
803  	ret =   bch2_inode_write(trans, &inode_iter, &inode_u) ?:
804  		bch2_trans_commit(trans, NULL, NULL,
805  				  BCH_TRANS_COMMIT_no_enospc);
806  btree_err:
807  	bch2_trans_iter_exit(trans, &inode_iter);
808  
809  	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
810  		goto retry;
811  	if (unlikely(ret))
812  		goto err_trans;
813  
814  	bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
815  
816  	if (acl)
817  		set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
818  err_trans:
819  	bch2_trans_put(trans);
820  err:
821  	mutex_unlock(&inode->ei_update_lock);
822  
823  	return bch2_err_class(ret);
824  }
825  
826  static int bch2_getattr(struct mnt_idmap *idmap,
827  			const struct path *path, struct kstat *stat,
828  			u32 request_mask, unsigned query_flags)
829  {
830  	struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
831  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
832  
833  	stat->dev	= inode->v.i_sb->s_dev;
834  	stat->ino	= inode->v.i_ino;
835  	stat->mode	= inode->v.i_mode;
836  	stat->nlink	= inode->v.i_nlink;
837  	stat->uid	= inode->v.i_uid;
838  	stat->gid	= inode->v.i_gid;
839  	stat->rdev	= inode->v.i_rdev;
840  	stat->size	= i_size_read(&inode->v);
841  	stat->atime	= inode_get_atime(&inode->v);
842  	stat->mtime	= inode_get_mtime(&inode->v);
843  	stat->ctime	= inode_get_ctime(&inode->v);
844  	stat->blksize	= block_bytes(c);
845  	stat->blocks	= inode->v.i_blocks;
846  
847  	if (request_mask & STATX_BTIME) {
848  		stat->result_mask |= STATX_BTIME;
849  		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
850  	}
851  
852  	if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
853  		stat->attributes |= STATX_ATTR_IMMUTABLE;
854  	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
855  
856  	if (inode->ei_inode.bi_flags & BCH_INODE_append)
857  		stat->attributes |= STATX_ATTR_APPEND;
858  	stat->attributes_mask	 |= STATX_ATTR_APPEND;
859  
860  	if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
861  		stat->attributes |= STATX_ATTR_NODUMP;
862  	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
863  
864  	return 0;
865  }
866  
867  static int bch2_setattr(struct mnt_idmap *idmap,
868  			struct dentry *dentry, struct iattr *iattr)
869  {
870  	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
871  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
872  	int ret;
873  
874  	lockdep_assert_held(&inode->v.i_rwsem);
875  
876  	ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
877  		setattr_prepare(idmap, dentry, iattr);
878  	if (ret)
879  		return ret;
880  
881  	return iattr->ia_valid & ATTR_SIZE
882  		? bchfs_truncate(idmap, inode, iattr)
883  		: bch2_setattr_nonsize(idmap, inode, iattr);
884  }
885  
886  static int bch2_tmpfile(struct mnt_idmap *idmap,
887  			struct inode *vdir, struct file *file, umode_t mode)
888  {
889  	struct bch_inode_info *inode =
890  		__bch2_create(idmap, to_bch_ei(vdir),
891  			      file->f_path.dentry, mode, 0,
892  			      (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
893  
894  	if (IS_ERR(inode))
895  		return bch2_err_class(PTR_ERR(inode));
896  
897  	d_mark_tmpfile(file, &inode->v);
898  	d_instantiate(file->f_path.dentry, &inode->v);
899  	return finish_open_simple(file, 0);
900  }
901  
902  static int bch2_fill_extent(struct bch_fs *c,
903  			    struct fiemap_extent_info *info,
904  			    struct bkey_s_c k, unsigned flags)
905  {
906  	if (bkey_extent_is_direct_data(k.k)) {
907  		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
908  		const union bch_extent_entry *entry;
909  		struct extent_ptr_decoded p;
910  		int ret;
911  
912  		if (k.k->type == KEY_TYPE_reflink_v)
913  			flags |= FIEMAP_EXTENT_SHARED;
914  
915  		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
916  			int flags2 = 0;
917  			u64 offset = p.ptr.offset;
918  
919  			if (p.ptr.unwritten)
920  				flags2 |= FIEMAP_EXTENT_UNWRITTEN;
921  
922  			if (p.crc.compression_type)
923  				flags2 |= FIEMAP_EXTENT_ENCODED;
924  			else
925  				offset += p.crc.offset;
926  
927  			if ((offset & (block_sectors(c) - 1)) ||
928  			    (k.k->size & (block_sectors(c) - 1)))
929  				flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
930  
931  			ret = fiemap_fill_next_extent(info,
932  						bkey_start_offset(k.k) << 9,
933  						offset << 9,
934  						k.k->size << 9, flags|flags2);
935  			if (ret)
936  				return ret;
937  		}
938  
939  		return 0;
940  	} else if (bkey_extent_is_inline_data(k.k)) {
941  		return fiemap_fill_next_extent(info,
942  					       bkey_start_offset(k.k) << 9,
943  					       0, k.k->size << 9,
944  					       flags|
945  					       FIEMAP_EXTENT_DATA_INLINE);
946  	} else if (k.k->type == KEY_TYPE_reservation) {
947  		return fiemap_fill_next_extent(info,
948  					       bkey_start_offset(k.k) << 9,
949  					       0, k.k->size << 9,
950  					       flags|
951  					       FIEMAP_EXTENT_DELALLOC|
952  					       FIEMAP_EXTENT_UNWRITTEN);
953  	} else {
954  		BUG();
955  	}
956  }
957  
958  static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
959  		       u64 start, u64 len)
960  {
961  	struct bch_fs *c = vinode->i_sb->s_fs_info;
962  	struct bch_inode_info *ei = to_bch_ei(vinode);
963  	struct btree_trans *trans;
964  	struct btree_iter iter;
965  	struct bkey_s_c k;
966  	struct bkey_buf cur, prev;
967  	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
968  	unsigned offset_into_extent, sectors;
969  	bool have_extent = false;
970  	u32 snapshot;
971  	int ret = 0;
972  
973  	ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
974  	if (ret)
975  		return ret;
976  
977  	if (start + len < start)
978  		return -EINVAL;
979  
980  	start >>= 9;
981  
982  	bch2_bkey_buf_init(&cur);
983  	bch2_bkey_buf_init(&prev);
984  	trans = bch2_trans_get(c);
985  retry:
986  	bch2_trans_begin(trans);
987  
988  	ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
989  	if (ret)
990  		goto err;
991  
992  	bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
993  			     SPOS(ei->v.i_ino, start, snapshot), 0);
994  
995  	while (!(ret = btree_trans_too_many_iters(trans)) &&
996  	       (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
997  	       !(ret = bkey_err(k))) {
998  		enum btree_id data_btree = BTREE_ID_extents;
999  
1000  		if (!bkey_extent_is_data(k.k) &&
1001  		    k.k->type != KEY_TYPE_reservation) {
1002  			bch2_btree_iter_advance(&iter);
1003  			continue;
1004  		}
1005  
1006  		offset_into_extent	= iter.pos.offset -
1007  			bkey_start_offset(k.k);
1008  		sectors			= k.k->size - offset_into_extent;
1009  
1010  		bch2_bkey_buf_reassemble(&cur, c, k);
1011  
1012  		ret = bch2_read_indirect_extent(trans, &data_btree,
1013  					&offset_into_extent, &cur);
1014  		if (ret)
1015  			break;
1016  
1017  		k = bkey_i_to_s_c(cur.k);
1018  		bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
1019  
1020  		sectors = min(sectors, k.k->size - offset_into_extent);
1021  
1022  		bch2_cut_front(POS(k.k->p.inode,
1023  				   bkey_start_offset(k.k) +
1024  				   offset_into_extent),
1025  			       cur.k);
1026  		bch2_key_resize(&cur.k->k, sectors);
1027  		cur.k->k.p = iter.pos;
1028  		cur.k->k.p.offset += cur.k->k.size;
1029  
1030  		if (have_extent) {
1031  			bch2_trans_unlock(trans);
1032  			ret = bch2_fill_extent(c, info,
1033  					bkey_i_to_s_c(prev.k), 0);
1034  			if (ret)
1035  				break;
1036  		}
1037  
1038  		bkey_copy(prev.k, cur.k);
1039  		have_extent = true;
1040  
1041  		bch2_btree_iter_set_pos(&iter,
1042  			POS(iter.pos.inode, iter.pos.offset + sectors));
1043  	}
1044  	start = iter.pos.offset;
1045  	bch2_trans_iter_exit(trans, &iter);
1046  err:
1047  	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1048  		goto retry;
1049  
1050  	if (!ret && have_extent) {
1051  		bch2_trans_unlock(trans);
1052  		ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
1053  				       FIEMAP_EXTENT_LAST);
1054  	}
1055  
1056  	bch2_trans_put(trans);
1057  	bch2_bkey_buf_exit(&cur, c);
1058  	bch2_bkey_buf_exit(&prev, c);
1059  	return ret < 0 ? ret : 0;
1060  }
1061  
1062  static const struct vm_operations_struct bch_vm_ops = {
1063  	.fault		= bch2_page_fault,
1064  	.map_pages	= filemap_map_pages,
1065  	.page_mkwrite   = bch2_page_mkwrite,
1066  };
1067  
1068  static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
1069  {
1070  	file_accessed(file);
1071  
1072  	vma->vm_ops = &bch_vm_ops;
1073  	return 0;
1074  }
1075  
1076  /* Directories: */
1077  
1078  static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
1079  {
1080  	return generic_file_llseek_size(file, offset, whence,
1081  					S64_MAX, S64_MAX);
1082  }
1083  
1084  static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1085  {
1086  	struct bch_inode_info *inode = file_bch_inode(file);
1087  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1088  
1089  	if (!dir_emit_dots(file, ctx))
1090  		return 0;
1091  
1092  	int ret = bch2_readdir(c, inode_inum(inode), ctx);
1093  
1094  	bch_err_fn(c, ret);
1095  	return bch2_err_class(ret);
1096  }
1097  
1098  static int bch2_open(struct inode *vinode, struct file *file)
1099  {
1100  	if (file->f_flags & (O_WRONLY|O_RDWR)) {
1101  		struct bch_inode_info *inode = to_bch_ei(vinode);
1102  		struct bch_fs *c = inode->v.i_sb->s_fs_info;
1103  
1104  		int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
1105  		if (ret)
1106  			return ret;
1107  	}
1108  
1109  	return generic_file_open(vinode, file);
1110  }
1111  
1112  static const struct file_operations bch_file_operations = {
1113  	.open		= bch2_open,
1114  	.llseek		= bch2_llseek,
1115  	.read_iter	= bch2_read_iter,
1116  	.write_iter	= bch2_write_iter,
1117  	.mmap		= bch2_mmap,
1118  	.fsync		= bch2_fsync,
1119  	.splice_read	= filemap_splice_read,
1120  	.splice_write	= iter_file_splice_write,
1121  	.fallocate	= bch2_fallocate_dispatch,
1122  	.unlocked_ioctl = bch2_fs_file_ioctl,
1123  #ifdef CONFIG_COMPAT
1124  	.compat_ioctl	= bch2_compat_fs_ioctl,
1125  #endif
1126  	.remap_file_range = bch2_remap_file_range,
1127  };
1128  
1129  static const struct inode_operations bch_file_inode_operations = {
1130  	.getattr	= bch2_getattr,
1131  	.setattr	= bch2_setattr,
1132  	.fiemap		= bch2_fiemap,
1133  	.listxattr	= bch2_xattr_list,
1134  #ifdef CONFIG_BCACHEFS_POSIX_ACL
1135  	.get_acl	= bch2_get_acl,
1136  	.set_acl	= bch2_set_acl,
1137  #endif
1138  };
1139  
1140  static const struct inode_operations bch_dir_inode_operations = {
1141  	.lookup		= bch2_lookup,
1142  	.create		= bch2_create,
1143  	.link		= bch2_link,
1144  	.unlink		= bch2_unlink,
1145  	.symlink	= bch2_symlink,
1146  	.mkdir		= bch2_mkdir,
1147  	.rmdir		= bch2_unlink,
1148  	.mknod		= bch2_mknod,
1149  	.rename		= bch2_rename2,
1150  	.getattr	= bch2_getattr,
1151  	.setattr	= bch2_setattr,
1152  	.tmpfile	= bch2_tmpfile,
1153  	.listxattr	= bch2_xattr_list,
1154  #ifdef CONFIG_BCACHEFS_POSIX_ACL
1155  	.get_acl	= bch2_get_acl,
1156  	.set_acl	= bch2_set_acl,
1157  #endif
1158  };
1159  
1160  static const struct file_operations bch_dir_file_operations = {
1161  	.llseek		= bch2_dir_llseek,
1162  	.read		= generic_read_dir,
1163  	.iterate_shared	= bch2_vfs_readdir,
1164  	.fsync		= bch2_fsync,
1165  	.unlocked_ioctl = bch2_fs_file_ioctl,
1166  #ifdef CONFIG_COMPAT
1167  	.compat_ioctl	= bch2_compat_fs_ioctl,
1168  #endif
1169  };
1170  
1171  static const struct inode_operations bch_symlink_inode_operations = {
1172  	.get_link	= page_get_link,
1173  	.getattr	= bch2_getattr,
1174  	.setattr	= bch2_setattr,
1175  	.listxattr	= bch2_xattr_list,
1176  #ifdef CONFIG_BCACHEFS_POSIX_ACL
1177  	.get_acl	= bch2_get_acl,
1178  	.set_acl	= bch2_set_acl,
1179  #endif
1180  };
1181  
1182  static const struct inode_operations bch_special_inode_operations = {
1183  	.getattr	= bch2_getattr,
1184  	.setattr	= bch2_setattr,
1185  	.listxattr	= bch2_xattr_list,
1186  #ifdef CONFIG_BCACHEFS_POSIX_ACL
1187  	.get_acl	= bch2_get_acl,
1188  	.set_acl	= bch2_set_acl,
1189  #endif
1190  };
1191  
1192  static const struct address_space_operations bch_address_space_operations = {
1193  	.read_folio	= bch2_read_folio,
1194  	.writepages	= bch2_writepages,
1195  	.readahead	= bch2_readahead,
1196  	.dirty_folio	= filemap_dirty_folio,
1197  	.write_begin	= bch2_write_begin,
1198  	.write_end	= bch2_write_end,
1199  	.invalidate_folio = bch2_invalidate_folio,
1200  	.release_folio	= bch2_release_folio,
1201  	.direct_IO	= noop_direct_IO,
1202  #ifdef CONFIG_MIGRATION
1203  	.migrate_folio	= filemap_migrate_folio,
1204  #endif
1205  	.error_remove_folio = generic_error_remove_folio,
1206  };
1207  
1208  struct bcachefs_fid {
1209  	u64		inum;
1210  	u32		subvol;
1211  	u32		gen;
1212  } __packed;
1213  
1214  struct bcachefs_fid_with_parent {
1215  	struct bcachefs_fid	fid;
1216  	struct bcachefs_fid	dir;
1217  } __packed;
1218  
1219  static int bcachefs_fid_valid(int fh_len, int fh_type)
1220  {
1221  	switch (fh_type) {
1222  	case FILEID_BCACHEFS_WITHOUT_PARENT:
1223  		return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1224  	case FILEID_BCACHEFS_WITH_PARENT:
1225  		return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1226  	default:
1227  		return false;
1228  	}
1229  }
1230  
1231  static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1232  {
1233  	return (struct bcachefs_fid) {
1234  		.inum	= inode->ei_inode.bi_inum,
1235  		.subvol	= inode->ei_subvol,
1236  		.gen	= inode->ei_inode.bi_generation,
1237  	};
1238  }
1239  
1240  static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1241  			  struct inode *vdir)
1242  {
1243  	struct bch_inode_info *inode	= to_bch_ei(vinode);
1244  	struct bch_inode_info *dir	= to_bch_ei(vdir);
1245  	int min_len;
1246  
1247  	if (!S_ISDIR(inode->v.i_mode) && dir) {
1248  		struct bcachefs_fid_with_parent *fid = (void *) fh;
1249  
1250  		min_len = sizeof(*fid) / sizeof(u32);
1251  		if (*len < min_len) {
1252  			*len = min_len;
1253  			return FILEID_INVALID;
1254  		}
1255  
1256  		fid->fid = bch2_inode_to_fid(inode);
1257  		fid->dir = bch2_inode_to_fid(dir);
1258  
1259  		*len = min_len;
1260  		return FILEID_BCACHEFS_WITH_PARENT;
1261  	} else {
1262  		struct bcachefs_fid *fid = (void *) fh;
1263  
1264  		min_len = sizeof(*fid) / sizeof(u32);
1265  		if (*len < min_len) {
1266  			*len = min_len;
1267  			return FILEID_INVALID;
1268  		}
1269  		*fid = bch2_inode_to_fid(inode);
1270  
1271  		*len = min_len;
1272  		return FILEID_BCACHEFS_WITHOUT_PARENT;
1273  	}
1274  }
1275  
1276  static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1277  					struct bcachefs_fid fid)
1278  {
1279  	struct bch_fs *c = sb->s_fs_info;
1280  	struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1281  				    .subvol = fid.subvol,
1282  				    .inum = fid.inum,
1283  	});
1284  	if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1285  		iput(vinode);
1286  		vinode = ERR_PTR(-ESTALE);
1287  	}
1288  	return vinode;
1289  }
1290  
1291  static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1292  		int fh_len, int fh_type)
1293  {
1294  	struct bcachefs_fid *fid = (void *) _fid;
1295  
1296  	if (!bcachefs_fid_valid(fh_len, fh_type))
1297  		return NULL;
1298  
1299  	return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1300  }
1301  
1302  static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1303  		int fh_len, int fh_type)
1304  {
1305  	struct bcachefs_fid_with_parent *fid = (void *) _fid;
1306  
1307  	if (!bcachefs_fid_valid(fh_len, fh_type) ||
1308  	    fh_type != FILEID_BCACHEFS_WITH_PARENT)
1309  		return NULL;
1310  
1311  	return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1312  }
1313  
1314  static struct dentry *bch2_get_parent(struct dentry *child)
1315  {
1316  	struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1317  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1318  	subvol_inum parent_inum = {
1319  		.subvol = inode->ei_inode.bi_parent_subvol ?:
1320  			inode->ei_subvol,
1321  		.inum = inode->ei_inode.bi_dir,
1322  	};
1323  
1324  	return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1325  }
1326  
1327  static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1328  {
1329  	struct bch_inode_info *inode	= to_bch_ei(child->d_inode);
1330  	struct bch_inode_info *dir	= to_bch_ei(parent->d_inode);
1331  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1332  	struct btree_trans *trans;
1333  	struct btree_iter iter1;
1334  	struct btree_iter iter2;
1335  	struct bkey_s_c k;
1336  	struct bkey_s_c_dirent d;
1337  	struct bch_inode_unpacked inode_u;
1338  	subvol_inum target;
1339  	u32 snapshot;
1340  	struct qstr dirent_name;
1341  	unsigned name_len = 0;
1342  	int ret;
1343  
1344  	if (!S_ISDIR(dir->v.i_mode))
1345  		return -EINVAL;
1346  
1347  	trans = bch2_trans_get(c);
1348  
1349  	bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
1350  			     POS(dir->ei_inode.bi_inum, 0), 0);
1351  	bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
1352  			     POS(dir->ei_inode.bi_inum, 0), 0);
1353  retry:
1354  	bch2_trans_begin(trans);
1355  
1356  	ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
1357  	if (ret)
1358  		goto err;
1359  
1360  	bch2_btree_iter_set_snapshot(&iter1, snapshot);
1361  	bch2_btree_iter_set_snapshot(&iter2, snapshot);
1362  
1363  	ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
1364  	if (ret)
1365  		goto err;
1366  
1367  	if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1368  		bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1369  
1370  		k = bch2_btree_iter_peek_slot(&iter1);
1371  		ret = bkey_err(k);
1372  		if (ret)
1373  			goto err;
1374  
1375  		if (k.k->type != KEY_TYPE_dirent) {
1376  			ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1377  			goto err;
1378  		}
1379  
1380  		d = bkey_s_c_to_dirent(k);
1381  		ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1382  		if (ret > 0)
1383  			ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1384  		if (ret)
1385  			goto err;
1386  
1387  		if (target.subvol	== inode->ei_subvol &&
1388  		    target.inum		== inode->ei_inode.bi_inum)
1389  			goto found;
1390  	} else {
1391  		/*
1392  		 * File with multiple hardlinks and our backref is to the wrong
1393  		 * directory - linear search:
1394  		 */
1395  		for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1396  			if (k.k->p.inode > dir->ei_inode.bi_inum)
1397  				break;
1398  
1399  			if (k.k->type != KEY_TYPE_dirent)
1400  				continue;
1401  
1402  			d = bkey_s_c_to_dirent(k);
1403  			ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1404  			if (ret < 0)
1405  				break;
1406  			if (ret)
1407  				continue;
1408  
1409  			if (target.subvol	== inode->ei_subvol &&
1410  			    target.inum		== inode->ei_inode.bi_inum)
1411  				goto found;
1412  		}
1413  	}
1414  
1415  	ret = -ENOENT;
1416  	goto err;
1417  found:
1418  	dirent_name = bch2_dirent_get_name(d);
1419  
1420  	name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1421  	memcpy(name, dirent_name.name, name_len);
1422  	name[name_len] = '\0';
1423  err:
1424  	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1425  		goto retry;
1426  
1427  	bch2_trans_iter_exit(trans, &iter1);
1428  	bch2_trans_iter_exit(trans, &iter2);
1429  	bch2_trans_put(trans);
1430  
1431  	return ret;
1432  }
1433  
1434  static const struct export_operations bch_export_ops = {
1435  	.encode_fh	= bch2_encode_fh,
1436  	.fh_to_dentry	= bch2_fh_to_dentry,
1437  	.fh_to_parent	= bch2_fh_to_parent,
1438  	.get_parent	= bch2_get_parent,
1439  	.get_name	= bch2_get_name,
1440  };
1441  
1442  static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1443  				struct bch_inode_info *inode,
1444  				struct bch_inode_unpacked *bi,
1445  				struct bch_subvolume *subvol)
1446  {
1447  	bch2_iget5_set(&inode->v, &inum);
1448  	bch2_inode_update_after_write(trans, inode, bi, ~0);
1449  
1450  	if (BCH_SUBVOLUME_SNAP(subvol))
1451  		set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1452  	else
1453  		clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1454  
1455  	inode->v.i_blocks	= bi->bi_sectors;
1456  	inode->v.i_ino		= bi->bi_inum;
1457  	inode->v.i_rdev		= bi->bi_dev;
1458  	inode->v.i_generation	= bi->bi_generation;
1459  	inode->v.i_size		= bi->bi_size;
1460  
1461  	inode->ei_flags		= 0;
1462  	inode->ei_quota_reserved = 0;
1463  	inode->ei_qid		= bch_qid(bi);
1464  	inode->ei_subvol	= inum.subvol;
1465  
1466  	inode->v.i_mapping->a_ops = &bch_address_space_operations;
1467  
1468  	switch (inode->v.i_mode & S_IFMT) {
1469  	case S_IFREG:
1470  		inode->v.i_op	= &bch_file_inode_operations;
1471  		inode->v.i_fop	= &bch_file_operations;
1472  		break;
1473  	case S_IFDIR:
1474  		inode->v.i_op	= &bch_dir_inode_operations;
1475  		inode->v.i_fop	= &bch_dir_file_operations;
1476  		break;
1477  	case S_IFLNK:
1478  		inode_nohighmem(&inode->v);
1479  		inode->v.i_op	= &bch_symlink_inode_operations;
1480  		break;
1481  	default:
1482  		init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1483  		inode->v.i_op	= &bch_special_inode_operations;
1484  		break;
1485  	}
1486  
1487  	mapping_set_large_folios(inode->v.i_mapping);
1488  }
1489  
1490  static struct inode *bch2_alloc_inode(struct super_block *sb)
1491  {
1492  	struct bch_inode_info *inode;
1493  
1494  	inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1495  	if (!inode)
1496  		return NULL;
1497  
1498  	inode_init_once(&inode->v);
1499  	mutex_init(&inode->ei_update_lock);
1500  	two_state_lock_init(&inode->ei_pagecache_lock);
1501  	INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1502  	mutex_init(&inode->ei_quota_lock);
1503  
1504  	return &inode->v;
1505  }
1506  
1507  static void bch2_i_callback(struct rcu_head *head)
1508  {
1509  	struct inode *vinode = container_of(head, struct inode, i_rcu);
1510  	struct bch_inode_info *inode = to_bch_ei(vinode);
1511  
1512  	kmem_cache_free(bch2_inode_cache, inode);
1513  }
1514  
1515  static void bch2_destroy_inode(struct inode *vinode)
1516  {
1517  	call_rcu(&vinode->i_rcu, bch2_i_callback);
1518  }
1519  
1520  static int inode_update_times_fn(struct btree_trans *trans,
1521  				 struct bch_inode_info *inode,
1522  				 struct bch_inode_unpacked *bi,
1523  				 void *p)
1524  {
1525  	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1526  
1527  	bi->bi_atime	= timespec_to_bch2_time(c, inode_get_atime(&inode->v));
1528  	bi->bi_mtime	= timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
1529  	bi->bi_ctime	= timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
1530  
1531  	return 0;
1532  }
1533  
1534  static int bch2_vfs_write_inode(struct inode *vinode,
1535  				struct writeback_control *wbc)
1536  {
1537  	struct bch_fs *c = vinode->i_sb->s_fs_info;
1538  	struct bch_inode_info *inode = to_bch_ei(vinode);
1539  	int ret;
1540  
1541  	mutex_lock(&inode->ei_update_lock);
1542  	ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1543  			       ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1544  	mutex_unlock(&inode->ei_update_lock);
1545  
1546  	return bch2_err_class(ret);
1547  }
1548  
1549  static void bch2_evict_inode(struct inode *vinode)
1550  {
1551  	struct bch_fs *c = vinode->i_sb->s_fs_info;
1552  	struct bch_inode_info *inode = to_bch_ei(vinode);
1553  
1554  	truncate_inode_pages_final(&inode->v.i_data);
1555  
1556  	clear_inode(&inode->v);
1557  
1558  	BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1559  
1560  	if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1561  		bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1562  				KEY_TYPE_QUOTA_WARN);
1563  		bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1564  				KEY_TYPE_QUOTA_WARN);
1565  		bch2_inode_rm(c, inode_inum(inode));
1566  	}
1567  
1568  	mutex_lock(&c->vfs_inodes_lock);
1569  	list_del_init(&inode->ei_vfs_inode_list);
1570  	mutex_unlock(&c->vfs_inodes_lock);
1571  }
1572  
1573  void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1574  {
1575  	struct bch_inode_info *inode;
1576  	DARRAY(struct bch_inode_info *) grabbed;
1577  	bool clean_pass = false, this_pass_clean;
1578  
1579  	/*
1580  	 * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1581  	 * be pruned with d_mark_dontcache().
1582  	 *
1583  	 * Once we've had a clean pass where we didn't find any inodes without
1584  	 * I_DONTCACHE, we wait for them to be freed:
1585  	 */
1586  
1587  	darray_init(&grabbed);
1588  	darray_make_room(&grabbed, 1024);
1589  again:
1590  	cond_resched();
1591  	this_pass_clean = true;
1592  
1593  	mutex_lock(&c->vfs_inodes_lock);
1594  	list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1595  		if (!snapshot_list_has_id(s, inode->ei_subvol))
1596  			continue;
1597  
1598  		if (!(inode->v.i_state & I_DONTCACHE) &&
1599  		    !(inode->v.i_state & I_FREEING) &&
1600  		    igrab(&inode->v)) {
1601  			this_pass_clean = false;
1602  
1603  			if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1604  				iput(&inode->v);
1605  				break;
1606  			}
1607  		} else if (clean_pass && this_pass_clean) {
1608  			wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1609  			DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1610  
1611  			prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1612  			mutex_unlock(&c->vfs_inodes_lock);
1613  
1614  			schedule();
1615  			finish_wait(wq, &wait.wq_entry);
1616  			goto again;
1617  		}
1618  	}
1619  	mutex_unlock(&c->vfs_inodes_lock);
1620  
1621  	darray_for_each(grabbed, i) {
1622  		inode = *i;
1623  		d_mark_dontcache(&inode->v);
1624  		d_prune_aliases(&inode->v);
1625  		iput(&inode->v);
1626  	}
1627  	grabbed.nr = 0;
1628  
1629  	if (!clean_pass || !this_pass_clean) {
1630  		clean_pass = this_pass_clean;
1631  		goto again;
1632  	}
1633  
1634  	darray_exit(&grabbed);
1635  }
1636  
1637  static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1638  {
1639  	struct super_block *sb = dentry->d_sb;
1640  	struct bch_fs *c = sb->s_fs_info;
1641  	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1642  	unsigned shift = sb->s_blocksize_bits - 9;
1643  	/*
1644  	 * this assumes inodes take up 64 bytes, which is a decent average
1645  	 * number:
1646  	 */
1647  	u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1648  
1649  	buf->f_type	= BCACHEFS_STATFS_MAGIC;
1650  	buf->f_bsize	= sb->s_blocksize;
1651  	buf->f_blocks	= usage.capacity >> shift;
1652  	buf->f_bfree	= usage.free >> shift;
1653  	buf->f_bavail	= avail_factor(usage.free) >> shift;
1654  
1655  	buf->f_files	= usage.nr_inodes + avail_inodes;
1656  	buf->f_ffree	= avail_inodes;
1657  
1658  	buf->f_fsid	= uuid_to_fsid(c->sb.user_uuid.b);
1659  	buf->f_namelen	= BCH_NAME_MAX;
1660  
1661  	return 0;
1662  }
1663  
1664  static int bch2_sync_fs(struct super_block *sb, int wait)
1665  {
1666  	struct bch_fs *c = sb->s_fs_info;
1667  	int ret;
1668  
1669  	if (c->opts.journal_flush_disabled)
1670  		return 0;
1671  
1672  	if (!wait) {
1673  		bch2_journal_flush_async(&c->journal, NULL);
1674  		return 0;
1675  	}
1676  
1677  	ret = bch2_journal_flush(&c->journal);
1678  	return bch2_err_class(ret);
1679  }
1680  
1681  static struct bch_fs *bch2_path_to_fs(const char *path)
1682  {
1683  	struct bch_fs *c;
1684  	dev_t dev;
1685  	int ret;
1686  
1687  	ret = lookup_bdev(path, &dev);
1688  	if (ret)
1689  		return ERR_PTR(ret);
1690  
1691  	c = bch2_dev_to_fs(dev);
1692  	if (c)
1693  		closure_put(&c->cl);
1694  	return c ?: ERR_PTR(-ENOENT);
1695  }
1696  
1697  static int bch2_remount(struct super_block *sb, int *flags, char *data)
1698  {
1699  	struct bch_fs *c = sb->s_fs_info;
1700  	struct bch_opts opts = bch2_opts_empty();
1701  	int ret;
1702  
1703  	ret = bch2_parse_mount_opts(c, &opts, data);
1704  	if (ret)
1705  		goto err;
1706  
1707  	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1708  
1709  	if (opts.read_only != c->opts.read_only) {
1710  		down_write(&c->state_lock);
1711  
1712  		if (opts.read_only) {
1713  			bch2_fs_read_only(c);
1714  
1715  			sb->s_flags |= SB_RDONLY;
1716  		} else {
1717  			ret = bch2_fs_read_write(c);
1718  			if (ret) {
1719  				bch_err(c, "error going rw: %i", ret);
1720  				up_write(&c->state_lock);
1721  				ret = -EINVAL;
1722  				goto err;
1723  			}
1724  
1725  			sb->s_flags &= ~SB_RDONLY;
1726  		}
1727  
1728  		c->opts.read_only = opts.read_only;
1729  
1730  		up_write(&c->state_lock);
1731  	}
1732  
1733  	if (opt_defined(opts, errors))
1734  		c->opts.errors = opts.errors;
1735  err:
1736  	return bch2_err_class(ret);
1737  }
1738  
1739  static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1740  {
1741  	struct bch_fs *c = root->d_sb->s_fs_info;
1742  	bool first = true;
1743  
1744  	for_each_online_member(c, ca) {
1745  		if (!first)
1746  			seq_putc(seq, ':');
1747  		first = false;
1748  		seq_puts(seq, ca->disk_sb.sb_name);
1749  	}
1750  
1751  	return 0;
1752  }
1753  
1754  static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1755  {
1756  	struct bch_fs *c = root->d_sb->s_fs_info;
1757  	enum bch_opt_id i;
1758  	struct printbuf buf = PRINTBUF;
1759  	int ret = 0;
1760  
1761  	for (i = 0; i < bch2_opts_nr; i++) {
1762  		const struct bch_option *opt = &bch2_opt_table[i];
1763  		u64 v = bch2_opt_get_by_id(&c->opts, i);
1764  
1765  		if (!(opt->flags & OPT_MOUNT))
1766  			continue;
1767  
1768  		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1769  			continue;
1770  
1771  		printbuf_reset(&buf);
1772  		bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1773  				 OPT_SHOW_MOUNT_STYLE);
1774  		seq_putc(seq, ',');
1775  		seq_puts(seq, buf.buf);
1776  	}
1777  
1778  	if (buf.allocation_failure)
1779  		ret = -ENOMEM;
1780  	printbuf_exit(&buf);
1781  	return ret;
1782  }
1783  
1784  static void bch2_put_super(struct super_block *sb)
1785  {
1786  	struct bch_fs *c = sb->s_fs_info;
1787  
1788  	__bch2_fs_stop(c);
1789  }
1790  
1791  /*
1792   * bcachefs doesn't currently integrate intwrite freeze protection but the
1793   * internal write references serve the same purpose. Therefore reuse the
1794   * read-only transition code to perform the quiesce. The caveat is that we don't
1795   * currently have the ability to block tasks that want a write reference while
1796   * the superblock is frozen. This is fine for now, but we should either add
1797   * blocking support or find a way to integrate sb_start_intwrite() and friends.
1798   */
1799  static int bch2_freeze(struct super_block *sb)
1800  {
1801  	struct bch_fs *c = sb->s_fs_info;
1802  
1803  	down_write(&c->state_lock);
1804  	bch2_fs_read_only(c);
1805  	up_write(&c->state_lock);
1806  	return 0;
1807  }
1808  
1809  static int bch2_unfreeze(struct super_block *sb)
1810  {
1811  	struct bch_fs *c = sb->s_fs_info;
1812  	int ret;
1813  
1814  	if (test_bit(BCH_FS_emergency_ro, &c->flags))
1815  		return 0;
1816  
1817  	down_write(&c->state_lock);
1818  	ret = bch2_fs_read_write(c);
1819  	up_write(&c->state_lock);
1820  	return ret;
1821  }
1822  
1823  static const struct super_operations bch_super_operations = {
1824  	.alloc_inode	= bch2_alloc_inode,
1825  	.destroy_inode	= bch2_destroy_inode,
1826  	.write_inode	= bch2_vfs_write_inode,
1827  	.evict_inode	= bch2_evict_inode,
1828  	.sync_fs	= bch2_sync_fs,
1829  	.statfs		= bch2_statfs,
1830  	.show_devname	= bch2_show_devname,
1831  	.show_options	= bch2_show_options,
1832  	.remount_fs	= bch2_remount,
1833  	.put_super	= bch2_put_super,
1834  	.freeze_fs	= bch2_freeze,
1835  	.unfreeze_fs	= bch2_unfreeze,
1836  };
1837  
1838  static int bch2_set_super(struct super_block *s, void *data)
1839  {
1840  	s->s_fs_info = data;
1841  	return 0;
1842  }
1843  
1844  static int bch2_noset_super(struct super_block *s, void *data)
1845  {
1846  	return -EBUSY;
1847  }
1848  
1849  typedef DARRAY(struct bch_fs *) darray_fs;
1850  
1851  static int bch2_test_super(struct super_block *s, void *data)
1852  {
1853  	struct bch_fs *c = s->s_fs_info;
1854  	darray_fs *d = data;
1855  
1856  	if (!c)
1857  		return false;
1858  
1859  	darray_for_each(*d, i)
1860  		if (c != *i)
1861  			return false;
1862  	return true;
1863  }
1864  
1865  static struct dentry *bch2_mount(struct file_system_type *fs_type,
1866  				 int flags, const char *dev_name, void *data)
1867  {
1868  	struct bch_fs *c;
1869  	struct super_block *sb;
1870  	struct inode *vinode;
1871  	struct bch_opts opts = bch2_opts_empty();
1872  	int ret;
1873  
1874  	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1875  
1876  	ret = bch2_parse_mount_opts(NULL, &opts, data);
1877  	if (ret) {
1878  		ret = bch2_err_class(ret);
1879  		return ERR_PTR(ret);
1880  	}
1881  
1882  	if (!dev_name || strlen(dev_name) == 0)
1883  		return ERR_PTR(-EINVAL);
1884  
1885  	darray_str devs;
1886  	ret = bch2_split_devs(dev_name, &devs);
1887  	if (ret)
1888  		return ERR_PTR(ret);
1889  
1890  	darray_fs devs_to_fs = {};
1891  	darray_for_each(devs, i) {
1892  		ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
1893  		if (ret) {
1894  			sb = ERR_PTR(ret);
1895  			goto got_sb;
1896  		}
1897  	}
1898  
1899  	sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs);
1900  	if (!IS_ERR(sb))
1901  		goto got_sb;
1902  
1903  	c = bch2_fs_open(devs.data, devs.nr, opts);
1904  	if (IS_ERR(c)) {
1905  		sb = ERR_CAST(c);
1906  		goto got_sb;
1907  	}
1908  
1909  	/* Some options can't be parsed until after the fs is started: */
1910  	ret = bch2_parse_mount_opts(c, &opts, data);
1911  	if (ret) {
1912  		bch2_fs_stop(c);
1913  		sb = ERR_PTR(ret);
1914  		goto got_sb;
1915  	}
1916  
1917  	bch2_opts_apply(&c->opts, opts);
1918  
1919  	sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1920  	if (IS_ERR(sb))
1921  		bch2_fs_stop(c);
1922  got_sb:
1923  	darray_exit(&devs_to_fs);
1924  	bch2_darray_str_exit(&devs);
1925  
1926  	if (IS_ERR(sb)) {
1927  		ret = PTR_ERR(sb);
1928  		ret = bch2_err_class(ret);
1929  		return ERR_PTR(ret);
1930  	}
1931  
1932  	c = sb->s_fs_info;
1933  
1934  	if (sb->s_root) {
1935  		if ((flags ^ sb->s_flags) & SB_RDONLY) {
1936  			ret = -EBUSY;
1937  			goto err_put_super;
1938  		}
1939  		goto out;
1940  	}
1941  
1942  	sb->s_blocksize		= block_bytes(c);
1943  	sb->s_blocksize_bits	= ilog2(block_bytes(c));
1944  	sb->s_maxbytes		= MAX_LFS_FILESIZE;
1945  	sb->s_op		= &bch_super_operations;
1946  	sb->s_export_op		= &bch_export_ops;
1947  #ifdef CONFIG_BCACHEFS_QUOTA
1948  	sb->s_qcop		= &bch2_quotactl_operations;
1949  	sb->s_quota_types	= QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1950  #endif
1951  	sb->s_xattr		= bch2_xattr_handlers;
1952  	sb->s_magic		= BCACHEFS_STATFS_MAGIC;
1953  	sb->s_time_gran		= c->sb.nsec_per_time_unit;
1954  	sb->s_time_min		= div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1955  	sb->s_time_max		= div_s64(S64_MAX, c->sb.time_units_per_sec);
1956  	sb->s_uuid		= c->sb.user_uuid;
1957  	c->vfs_sb		= sb;
1958  	strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1959  
1960  	ret = super_setup_bdi(sb);
1961  	if (ret)
1962  		goto err_put_super;
1963  
1964  	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
1965  
1966  	for_each_online_member(c, ca) {
1967  		struct block_device *bdev = ca->disk_sb.bdev;
1968  
1969  		/* XXX: create an anonymous device for multi device filesystems */
1970  		sb->s_bdev	= bdev;
1971  		sb->s_dev	= bdev->bd_dev;
1972  		percpu_ref_put(&ca->io_ref);
1973  		break;
1974  	}
1975  
1976  	c->dev = sb->s_dev;
1977  
1978  #ifdef CONFIG_BCACHEFS_POSIX_ACL
1979  	if (c->opts.acl)
1980  		sb->s_flags	|= SB_POSIXACL;
1981  #endif
1982  
1983  	sb->s_shrink->seeks = 0;
1984  
1985  	vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1986  	ret = PTR_ERR_OR_ZERO(vinode);
1987  	bch_err_msg(c, ret, "mounting: error getting root inode");
1988  	if (ret)
1989  		goto err_put_super;
1990  
1991  	sb->s_root = d_make_root(vinode);
1992  	if (!sb->s_root) {
1993  		bch_err(c, "error mounting: error allocating root dentry");
1994  		ret = -ENOMEM;
1995  		goto err_put_super;
1996  	}
1997  
1998  	sb->s_flags |= SB_ACTIVE;
1999  out:
2000  	return dget(sb->s_root);
2001  
2002  err_put_super:
2003  	__bch2_fs_stop(c);
2004  	deactivate_locked_super(sb);
2005  	return ERR_PTR(bch2_err_class(ret));
2006  }
2007  
2008  static void bch2_kill_sb(struct super_block *sb)
2009  {
2010  	struct bch_fs *c = sb->s_fs_info;
2011  
2012  	generic_shutdown_super(sb);
2013  	bch2_fs_free(c);
2014  }
2015  
2016  static struct file_system_type bcache_fs_type = {
2017  	.owner		= THIS_MODULE,
2018  	.name		= "bcachefs",
2019  	.mount		= bch2_mount,
2020  	.kill_sb	= bch2_kill_sb,
2021  	.fs_flags	= FS_REQUIRES_DEV,
2022  };
2023  
2024  MODULE_ALIAS_FS("bcachefs");
2025  
2026  void bch2_vfs_exit(void)
2027  {
2028  	unregister_filesystem(&bcache_fs_type);
2029  	kmem_cache_destroy(bch2_inode_cache);
2030  }
2031  
2032  int __init bch2_vfs_init(void)
2033  {
2034  	int ret = -ENOMEM;
2035  
2036  	bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
2037  	if (!bch2_inode_cache)
2038  		goto err;
2039  
2040  	ret = register_filesystem(&bcache_fs_type);
2041  	if (ret)
2042  		goto err;
2043  
2044  	return 0;
2045  err:
2046  	bch2_vfs_exit();
2047  	return ret;
2048  }
2049  
2050  #endif /* NO_BCACHEFS_FS */
2051