1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "acl.h"
6 #include "bkey_buf.h"
7 #include "btree_update.h"
8 #include "buckets.h"
9 #include "chardev.h"
10 #include "dirent.h"
11 #include "errcode.h"
12 #include "extents.h"
13 #include "fs.h"
14 #include "fs-io.h"
15 #include "fs-ioctl.h"
16 #include "fs-io-buffered.h"
17 #include "fs-io-direct.h"
18 #include "fs-io-pagecache.h"
19 #include "fsck.h"
20 #include "inode.h"
21 #include "io_read.h"
22 #include "journal.h"
23 #include "keylist.h"
24 #include "namei.h"
25 #include "quota.h"
26 #include "rebalance.h"
27 #include "snapshot.h"
28 #include "super.h"
29 #include "xattr.h"
30 #include "trace.h"
31
32 #include <linux/aio.h>
33 #include <linux/backing-dev.h>
34 #include <linux/exportfs.h>
35 #include <linux/fiemap.h>
36 #include <linux/fs_context.h>
37 #include <linux/module.h>
38 #include <linux/pagemap.h>
39 #include <linux/posix_acl.h>
40 #include <linux/random.h>
41 #include <linux/seq_file.h>
42 #include <linux/siphash.h>
43 #include <linux/statfs.h>
44 #include <linux/string.h>
45 #include <linux/xattr.h>
46
47 static struct kmem_cache *bch2_inode_cache;
48
49 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
50 struct bch_inode_info *,
51 struct bch_inode_unpacked *,
52 struct bch_subvolume *);
53
bch2_inode_update_after_write(struct btree_trans * trans,struct bch_inode_info * inode,struct bch_inode_unpacked * bi,unsigned fields)54 void bch2_inode_update_after_write(struct btree_trans *trans,
55 struct bch_inode_info *inode,
56 struct bch_inode_unpacked *bi,
57 unsigned fields)
58 {
59 struct bch_fs *c = trans->c;
60
61 BUG_ON(bi->bi_inum != inode->v.i_ino);
62
63 bch2_assert_pos_locked(trans, BTREE_ID_inodes, POS(0, bi->bi_inum));
64
65 set_nlink(&inode->v, bch2_inode_nlink_get(bi));
66 i_uid_write(&inode->v, bi->bi_uid);
67 i_gid_write(&inode->v, bi->bi_gid);
68 inode->v.i_mode = bi->bi_mode;
69
70 if (fields & ATTR_SIZE)
71 i_size_write(&inode->v, bi->bi_size);
72
73 if (fields & ATTR_ATIME)
74 inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
75 if (fields & ATTR_MTIME)
76 inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
77 if (fields & ATTR_CTIME)
78 inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
79
80 inode->ei_inode = *bi;
81
82 bch2_inode_flags_to_vfs(inode);
83 }
84
bch2_write_inode(struct bch_fs * c,struct bch_inode_info * inode,inode_set_fn set,void * p,unsigned fields)85 int __must_check bch2_write_inode(struct bch_fs *c,
86 struct bch_inode_info *inode,
87 inode_set_fn set,
88 void *p, unsigned fields)
89 {
90 struct btree_trans *trans = bch2_trans_get(c);
91 struct btree_iter iter = { NULL };
92 struct bch_inode_unpacked inode_u;
93 int ret;
94 retry:
95 bch2_trans_begin(trans);
96
97 ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent);
98 if (ret)
99 goto err;
100
101 struct bch_extent_rebalance old_r = bch2_inode_rebalance_opts_get(c, &inode_u);
102
103 ret = (set ? set(trans, inode, &inode_u, p) : 0);
104 if (ret)
105 goto err;
106
107 struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u);
108
109 if (memcmp(&old_r, &new_r, sizeof(new_r))) {
110 ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum);
111 if (ret)
112 goto err;
113 }
114
115 ret = bch2_inode_write(trans, &iter, &inode_u) ?:
116 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
117
118 /*
119 * the btree node lock protects inode->ei_inode, not ei_update_lock;
120 * this is important for inode updates via bchfs_write_index_update
121 */
122 if (!ret)
123 bch2_inode_update_after_write(trans, inode, &inode_u, fields);
124 err:
125 bch2_trans_iter_exit(trans, &iter);
126
127 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
128 goto retry;
129
130 bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
131 "%s: inode %llu:%llu not found when updating",
132 bch2_err_str(ret),
133 inode_inum(inode).subvol,
134 inode_inum(inode).inum);
135
136 bch2_trans_put(trans);
137 return ret < 0 ? ret : 0;
138 }
139
bch2_fs_quota_transfer(struct bch_fs * c,struct bch_inode_info * inode,struct bch_qid new_qid,unsigned qtypes,enum quota_acct_mode mode)140 int bch2_fs_quota_transfer(struct bch_fs *c,
141 struct bch_inode_info *inode,
142 struct bch_qid new_qid,
143 unsigned qtypes,
144 enum quota_acct_mode mode)
145 {
146 unsigned i;
147 int ret;
148
149 qtypes &= enabled_qtypes(c);
150
151 for (i = 0; i < QTYP_NR; i++)
152 if (new_qid.q[i] == inode->ei_qid.q[i])
153 qtypes &= ~(1U << i);
154
155 if (!qtypes)
156 return 0;
157
158 mutex_lock(&inode->ei_quota_lock);
159
160 ret = bch2_quota_transfer(c, qtypes, new_qid,
161 inode->ei_qid,
162 inode->v.i_blocks +
163 inode->ei_quota_reserved,
164 mode);
165 if (!ret)
166 for (i = 0; i < QTYP_NR; i++)
167 if (qtypes & (1 << i))
168 inode->ei_qid.q[i] = new_qid.q[i];
169
170 mutex_unlock(&inode->ei_quota_lock);
171
172 return ret;
173 }
174
subvol_inum_eq(subvol_inum a,subvol_inum b)175 static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
176 {
177 return a.subvol == b.subvol && a.inum == b.inum;
178 }
179
bch2_vfs_inode_hash_fn(const void * data,u32 len,u32 seed)180 static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
181 {
182 const subvol_inum *inum = data;
183 siphash_key_t k = { .key[0] = seed };
184
185 return siphash_2u64(inum->subvol, inum->inum, &k);
186 }
187
bch2_vfs_inode_obj_hash_fn(const void * data,u32 len,u32 seed)188 static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
189 {
190 const struct bch_inode_info *inode = data;
191
192 return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
193 }
194
bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg * arg,const void * obj)195 static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
196 const void *obj)
197 {
198 const struct bch_inode_info *inode = obj;
199 const subvol_inum *v = arg->key;
200
201 return !subvol_inum_eq(inode->ei_inum, *v);
202 }
203
204 static const struct rhashtable_params bch2_vfs_inodes_params = {
205 .head_offset = offsetof(struct bch_inode_info, hash),
206 .key_offset = offsetof(struct bch_inode_info, ei_inum),
207 .key_len = sizeof(subvol_inum),
208 .hashfn = bch2_vfs_inode_hash_fn,
209 .obj_hashfn = bch2_vfs_inode_obj_hash_fn,
210 .obj_cmpfn = bch2_vfs_inode_cmp_fn,
211 .automatic_shrinking = true,
212 };
213
214 static const struct rhashtable_params bch2_vfs_inodes_by_inum_params = {
215 .head_offset = offsetof(struct bch_inode_info, by_inum_hash),
216 .key_offset = offsetof(struct bch_inode_info, ei_inum.inum),
217 .key_len = sizeof(u64),
218 .automatic_shrinking = true,
219 };
220
bch2_inode_or_descendents_is_open(struct btree_trans * trans,struct bpos p)221 int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
222 {
223 struct bch_fs *c = trans->c;
224 struct rhltable *ht = &c->vfs_inodes_by_inum_table;
225 u64 inum = p.offset;
226 DARRAY(u32) subvols;
227 int ret = 0;
228
229 if (!test_bit(BCH_FS_started, &c->flags))
230 return false;
231
232 darray_init(&subvols);
233 restart_from_top:
234
235 /*
236 * Tweaked version of __rhashtable_lookup(); we need to get a list of
237 * subvolumes in which the given inode number is open.
238 *
239 * For this to work, we don't include the subvolume ID in the key that
240 * we hash - all inodes with the same inode number regardless of
241 * subvolume will hash to the same slot.
242 *
243 * This will be less than ideal if the same file is ever open
244 * simultaneously in many different snapshots:
245 */
246 rcu_read_lock();
247 struct rhash_lock_head __rcu *const *bkt;
248 struct rhash_head *he;
249 unsigned int hash;
250 struct bucket_table *tbl = rht_dereference_rcu(ht->ht.tbl, &ht->ht);
251 restart:
252 hash = rht_key_hashfn(&ht->ht, tbl, &inum, bch2_vfs_inodes_by_inum_params);
253 bkt = rht_bucket(tbl, hash);
254 do {
255 struct bch_inode_info *inode;
256
257 rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
258 if (inode->ei_inum.inum == inum) {
259 ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
260 GFP_NOWAIT|__GFP_NOWARN);
261 if (ret) {
262 rcu_read_unlock();
263 ret = darray_make_room(&subvols, 1);
264 if (ret)
265 goto err;
266 subvols.nr = 0;
267 goto restart_from_top;
268 }
269 }
270 }
271 /* An object might have been moved to a different hash chain,
272 * while we walk along it - better check and retry.
273 */
274 } while (he != RHT_NULLS_MARKER(bkt));
275
276 /* Ensure we see any new tables. */
277 smp_rmb();
278
279 tbl = rht_dereference_rcu(tbl->future_tbl, &ht->ht);
280 if (unlikely(tbl))
281 goto restart;
282 rcu_read_unlock();
283
284 darray_for_each(subvols, i) {
285 u32 snap;
286 ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
287 if (ret)
288 goto err;
289
290 ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
291 if (ret)
292 break;
293 }
294 err:
295 darray_exit(&subvols);
296 return ret;
297 }
298
__bch2_inode_hash_find(struct bch_fs * c,subvol_inum inum)299 static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
300 {
301 return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
302 }
303
__wait_on_freeing_inode(struct bch_fs * c,struct bch_inode_info * inode,subvol_inum inum)304 static void __wait_on_freeing_inode(struct bch_fs *c,
305 struct bch_inode_info *inode,
306 subvol_inum inum)
307 {
308 wait_queue_head_t *wq;
309 struct wait_bit_queue_entry wait;
310
311 wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW);
312 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
313 spin_unlock(&inode->v.i_lock);
314
315 if (__bch2_inode_hash_find(c, inum) == inode)
316 schedule_timeout(HZ * 10);
317 finish_wait(wq, &wait.wq_entry);
318 }
319
bch2_inode_hash_find(struct bch_fs * c,struct btree_trans * trans,subvol_inum inum)320 static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
321 subvol_inum inum)
322 {
323 struct bch_inode_info *inode;
324 repeat:
325 inode = __bch2_inode_hash_find(c, inum);
326 if (inode) {
327 spin_lock(&inode->v.i_lock);
328 if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
329 spin_unlock(&inode->v.i_lock);
330 return NULL;
331 }
332 if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
333 if (!trans) {
334 __wait_on_freeing_inode(c, inode, inum);
335 } else {
336 bch2_trans_unlock(trans);
337 __wait_on_freeing_inode(c, inode, inum);
338 int ret = bch2_trans_relock(trans);
339 if (ret)
340 return ERR_PTR(ret);
341 }
342 goto repeat;
343 }
344 __iget(&inode->v);
345 spin_unlock(&inode->v.i_lock);
346 }
347
348 return inode;
349 }
350
bch2_inode_hash_remove(struct bch_fs * c,struct bch_inode_info * inode)351 static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)
352 {
353 spin_lock(&inode->v.i_lock);
354 bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);
355 spin_unlock(&inode->v.i_lock);
356
357 if (remove) {
358 int ret = rhltable_remove(&c->vfs_inodes_by_inum_table,
359 &inode->by_inum_hash, bch2_vfs_inodes_by_inum_params);
360 BUG_ON(ret);
361
362 ret = rhashtable_remove_fast(&c->vfs_inodes_table,
363 &inode->hash, bch2_vfs_inodes_params);
364 BUG_ON(ret);
365 inode->v.i_hash.pprev = NULL;
366 /*
367 * This pairs with the bch2_inode_hash_find() ->
368 * __wait_on_freeing_inode() path
369 */
370 inode_wake_up_bit(&inode->v, __I_NEW);
371 }
372 }
373
bch2_inode_hash_insert(struct bch_fs * c,struct btree_trans * trans,struct bch_inode_info * inode)374 static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
375 struct btree_trans *trans,
376 struct bch_inode_info *inode)
377 {
378 struct bch_inode_info *old = inode;
379
380 set_bit(EI_INODE_HASHED, &inode->ei_flags);
381 retry:
382 if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
383 &inode->ei_inum,
384 &inode->hash,
385 bch2_vfs_inodes_params))) {
386 old = bch2_inode_hash_find(c, trans, inode->ei_inum);
387 if (!old)
388 goto retry;
389
390 clear_bit(EI_INODE_HASHED, &inode->ei_flags);
391
392 /*
393 * bcachefs doesn't use I_NEW; we have no use for it since we
394 * only insert fully created inodes in the inode hash table. But
395 * discard_new_inode() expects it to be set...
396 */
397 inode->v.i_state |= I_NEW;
398 /*
399 * We don't want bch2_evict_inode() to delete the inode on disk,
400 * we just raced and had another inode in cache. Normally new
401 * inodes don't have nlink == 0 - except tmpfiles do...
402 */
403 set_nlink(&inode->v, 1);
404 discard_new_inode(&inode->v);
405 return old;
406 } else {
407 int ret = rhltable_insert(&c->vfs_inodes_by_inum_table,
408 &inode->by_inum_hash,
409 bch2_vfs_inodes_by_inum_params);
410 BUG_ON(ret);
411
412 inode_fake_hash(&inode->v);
413
414 inode_sb_list_add(&inode->v);
415
416 mutex_lock(&c->vfs_inodes_lock);
417 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
418 mutex_unlock(&c->vfs_inodes_lock);
419 return inode;
420 }
421 }
422
423 #define memalloc_flags_do(_flags, _do) \
424 ({ \
425 unsigned _saved_flags = memalloc_flags_save(_flags); \
426 typeof(_do) _ret = _do; \
427 memalloc_noreclaim_restore(_saved_flags); \
428 _ret; \
429 })
430
bch2_alloc_inode(struct super_block * sb)431 static struct inode *bch2_alloc_inode(struct super_block *sb)
432 {
433 BUG();
434 }
435
__bch2_new_inode(struct bch_fs * c,gfp_t gfp)436 static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp)
437 {
438 struct bch_inode_info *inode = alloc_inode_sb(c->vfs_sb,
439 bch2_inode_cache, gfp);
440 if (!inode)
441 return NULL;
442
443 inode_init_once(&inode->v);
444 mutex_init(&inode->ei_update_lock);
445 two_state_lock_init(&inode->ei_pagecache_lock);
446 INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
447 inode->ei_flags = 0;
448 mutex_init(&inode->ei_quota_lock);
449 memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
450
451 if (unlikely(inode_init_always_gfp(c->vfs_sb, &inode->v, gfp))) {
452 kmem_cache_free(bch2_inode_cache, inode);
453 return NULL;
454 }
455
456 return inode;
457 }
458
459 /*
460 * Allocate a new inode, dropping/retaking btree locks if necessary:
461 */
bch2_new_inode(struct btree_trans * trans)462 static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
463 {
464 struct bch_inode_info *inode = __bch2_new_inode(trans->c, GFP_NOWAIT);
465
466 if (unlikely(!inode)) {
467 int ret = drop_locks_do(trans, (inode = __bch2_new_inode(trans->c, GFP_NOFS)) ? 0 : -ENOMEM);
468 if (ret && inode) {
469 __destroy_inode(&inode->v);
470 kmem_cache_free(bch2_inode_cache, inode);
471 }
472 if (ret)
473 return ERR_PTR(ret);
474 }
475
476 return inode;
477 }
478
bch2_inode_hash_init_insert(struct btree_trans * trans,subvol_inum inum,struct bch_inode_unpacked * bi,struct bch_subvolume * subvol)479 static struct bch_inode_info *bch2_inode_hash_init_insert(struct btree_trans *trans,
480 subvol_inum inum,
481 struct bch_inode_unpacked *bi,
482 struct bch_subvolume *subvol)
483 {
484 struct bch_inode_info *inode = bch2_new_inode(trans);
485 if (IS_ERR(inode))
486 return inode;
487
488 bch2_vfs_inode_init(trans, inum, inode, bi, subvol);
489
490 return bch2_inode_hash_insert(trans->c, trans, inode);
491
492 }
493
bch2_vfs_inode_get(struct bch_fs * c,subvol_inum inum)494 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
495 {
496 struct bch_inode_info *inode = bch2_inode_hash_find(c, NULL, inum);
497 if (inode)
498 return &inode->v;
499
500 struct btree_trans *trans = bch2_trans_get(c);
501
502 struct bch_inode_unpacked inode_u;
503 struct bch_subvolume subvol;
504 int ret = lockrestart_do(trans,
505 bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
506 bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
507 PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
508 bch2_trans_put(trans);
509
510 return ret ? ERR_PTR(ret) : &inode->v;
511 }
512
513 struct bch_inode_info *
__bch2_create(struct mnt_idmap * idmap,struct bch_inode_info * dir,struct dentry * dentry,umode_t mode,dev_t rdev,subvol_inum snapshot_src,unsigned flags)514 __bch2_create(struct mnt_idmap *idmap,
515 struct bch_inode_info *dir, struct dentry *dentry,
516 umode_t mode, dev_t rdev, subvol_inum snapshot_src,
517 unsigned flags)
518 {
519 struct bch_fs *c = dir->v.i_sb->s_fs_info;
520 struct btree_trans *trans;
521 struct bch_inode_unpacked dir_u;
522 struct bch_inode_info *inode;
523 struct bch_inode_unpacked inode_u;
524 struct posix_acl *default_acl = NULL, *acl = NULL;
525 subvol_inum inum;
526 struct bch_subvolume subvol;
527 u64 journal_seq = 0;
528 kuid_t kuid;
529 kgid_t kgid;
530 int ret;
531
532 /*
533 * preallocate acls + vfs inode before btree transaction, so that
534 * nothing can fail after the transaction succeeds:
535 */
536 #ifdef CONFIG_BCACHEFS_POSIX_ACL
537 ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
538 if (ret)
539 return ERR_PTR(ret);
540 #endif
541 inode = __bch2_new_inode(c, GFP_NOFS);
542 if (unlikely(!inode)) {
543 inode = ERR_PTR(-ENOMEM);
544 goto err;
545 }
546
547 bch2_inode_init_early(c, &inode_u);
548
549 if (!(flags & BCH_CREATE_TMPFILE))
550 mutex_lock(&dir->ei_update_lock);
551
552 trans = bch2_trans_get(c);
553 retry:
554 bch2_trans_begin(trans);
555
556 kuid = mapped_fsuid(idmap, i_user_ns(&dir->v));
557 kgid = mapped_fsgid(idmap, i_user_ns(&dir->v));
558 ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
559 bch2_create_trans(trans,
560 inode_inum(dir), &dir_u, &inode_u,
561 !(flags & BCH_CREATE_TMPFILE)
562 ? &dentry->d_name : NULL,
563 from_kuid(i_user_ns(&dir->v), kuid),
564 from_kgid(i_user_ns(&dir->v), kgid),
565 mode, rdev,
566 default_acl, acl, snapshot_src, flags) ?:
567 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
568 KEY_TYPE_QUOTA_PREALLOC);
569 if (unlikely(ret))
570 goto err_before_quota;
571
572 inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;
573 inum.inum = inode_u.bi_inum;
574
575 ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
576 bch2_trans_commit(trans, NULL, &journal_seq, 0);
577 if (unlikely(ret)) {
578 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
579 KEY_TYPE_QUOTA_WARN);
580 err_before_quota:
581 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
582 goto retry;
583 goto err_trans;
584 }
585
586 if (!(flags & BCH_CREATE_TMPFILE)) {
587 bch2_inode_update_after_write(trans, dir, &dir_u,
588 ATTR_MTIME|ATTR_CTIME|ATTR_SIZE);
589 mutex_unlock(&dir->ei_update_lock);
590 }
591
592 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
593
594 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
595 set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
596
597 /*
598 * we must insert the new inode into the inode cache before calling
599 * bch2_trans_exit() and dropping locks, else we could race with another
600 * thread pulling the inode in and modifying it:
601 *
602 * also, calling bch2_inode_hash_insert() without passing in the
603 * transaction object is sketchy - if we could ever end up in
604 * __wait_on_freeing_inode(), we'd risk deadlock.
605 *
606 * But that shouldn't be possible, since we still have the inode locked
607 * that we just created, and we _really_ can't take a transaction
608 * restart here.
609 */
610 inode = bch2_inode_hash_insert(c, NULL, inode);
611 bch2_trans_put(trans);
612 err:
613 posix_acl_release(default_acl);
614 posix_acl_release(acl);
615 return inode;
616 err_trans:
617 if (!(flags & BCH_CREATE_TMPFILE))
618 mutex_unlock(&dir->ei_update_lock);
619
620 bch2_trans_put(trans);
621 make_bad_inode(&inode->v);
622 iput(&inode->v);
623 inode = ERR_PTR(ret);
624 goto err;
625 }
626
627 /* methods */
628
bch2_lookup_trans(struct btree_trans * trans,subvol_inum dir,struct bch_hash_info * dir_hash_info,const struct qstr * name)629 static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
630 subvol_inum dir, struct bch_hash_info *dir_hash_info,
631 const struct qstr *name)
632 {
633 struct bch_fs *c = trans->c;
634 struct btree_iter dirent_iter = {};
635 subvol_inum inum = {};
636 struct printbuf buf = PRINTBUF;
637
638 struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
639 dir_hash_info, dir, name, 0);
640 int ret = bkey_err(k);
641 if (ret)
642 return ERR_PTR(ret);
643
644 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
645
646 ret = bch2_dirent_read_target(trans, dir, d, &inum);
647 if (ret > 0)
648 ret = -ENOENT;
649 if (ret)
650 goto err;
651
652 struct bch_inode_info *inode = bch2_inode_hash_find(c, trans, inum);
653 if (inode)
654 goto out;
655
656 /*
657 * Note: if check/repair needs it, we commit before
658 * bch2_inode_hash_init_insert(), as after that point we can't take a
659 * restart - not in the top level loop with a commit_do(), like we
660 * usually do:
661 */
662
663 struct bch_subvolume subvol;
664 struct bch_inode_unpacked inode_u;
665 ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
666 bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
667 bch2_check_dirent_target(trans, &dirent_iter, d, &inode_u, false) ?:
668 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
669 PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
670
671 /*
672 * don't remove it: check_inodes might find another inode that points
673 * back to this dirent
674 */
675 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
676 c, "dirent to missing inode:\n %s",
677 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf));
678 if (ret)
679 goto err;
680 out:
681 bch2_trans_iter_exit(trans, &dirent_iter);
682 printbuf_exit(&buf);
683 return inode;
684 err:
685 inode = ERR_PTR(ret);
686 goto out;
687 }
688
bch2_lookup(struct inode * vdir,struct dentry * dentry,unsigned int flags)689 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
690 unsigned int flags)
691 {
692 struct bch_fs *c = vdir->i_sb->s_fs_info;
693 struct bch_inode_info *dir = to_bch_ei(vdir);
694 struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
695
696 struct bch_inode_info *inode;
697 bch2_trans_do(c,
698 PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
699 &hash, &dentry->d_name)));
700 if (IS_ERR(inode))
701 inode = NULL;
702
703 #ifdef CONFIG_UNICODE
704 if (!inode && IS_CASEFOLDED(vdir)) {
705 /*
706 * Do not cache a negative dentry in casefolded directories
707 * as it would need to be invalidated in the following situation:
708 * - Lookup file "blAH" in a casefolded directory
709 * - Creation of file "BLAH" in a casefolded directory
710 * - Lookup file "blAH" in a casefolded directory
711 * which would fail if we had a negative dentry.
712 *
713 * We should come back to this when VFS has a method to handle
714 * this edgecase.
715 */
716 return NULL;
717 }
718 #endif
719
720 return d_splice_alias(&inode->v, dentry);
721 }
722
bch2_mknod(struct mnt_idmap * idmap,struct inode * vdir,struct dentry * dentry,umode_t mode,dev_t rdev)723 static int bch2_mknod(struct mnt_idmap *idmap,
724 struct inode *vdir, struct dentry *dentry,
725 umode_t mode, dev_t rdev)
726 {
727 struct bch_inode_info *inode =
728 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
729 (subvol_inum) { 0 }, 0);
730
731 if (IS_ERR(inode))
732 return bch2_err_class(PTR_ERR(inode));
733
734 d_instantiate(dentry, &inode->v);
735 return 0;
736 }
737
bch2_create(struct mnt_idmap * idmap,struct inode * vdir,struct dentry * dentry,umode_t mode,bool excl)738 static int bch2_create(struct mnt_idmap *idmap,
739 struct inode *vdir, struct dentry *dentry,
740 umode_t mode, bool excl)
741 {
742 return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
743 }
744
__bch2_link(struct bch_fs * c,struct bch_inode_info * inode,struct bch_inode_info * dir,struct dentry * dentry)745 static int __bch2_link(struct bch_fs *c,
746 struct bch_inode_info *inode,
747 struct bch_inode_info *dir,
748 struct dentry *dentry)
749 {
750 struct bch_inode_unpacked dir_u, inode_u;
751 int ret;
752
753 mutex_lock(&inode->ei_update_lock);
754 struct btree_trans *trans = bch2_trans_get(c);
755
756 ret = commit_do(trans, NULL, NULL, 0,
757 bch2_link_trans(trans,
758 inode_inum(dir), &dir_u,
759 inode_inum(inode), &inode_u,
760 &dentry->d_name));
761
762 if (likely(!ret)) {
763 bch2_inode_update_after_write(trans, dir, &dir_u,
764 ATTR_MTIME|ATTR_CTIME|ATTR_SIZE);
765 bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
766 }
767
768 bch2_trans_put(trans);
769 mutex_unlock(&inode->ei_update_lock);
770 return ret;
771 }
772
bch2_link(struct dentry * old_dentry,struct inode * vdir,struct dentry * dentry)773 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
774 struct dentry *dentry)
775 {
776 struct bch_fs *c = vdir->i_sb->s_fs_info;
777 struct bch_inode_info *dir = to_bch_ei(vdir);
778 struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
779 int ret;
780
781 lockdep_assert_held(&inode->v.i_rwsem);
782
783 ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
784 bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
785 __bch2_link(c, inode, dir, dentry);
786 if (unlikely(ret))
787 return bch2_err_class(ret);
788
789 ihold(&inode->v);
790 d_instantiate(dentry, &inode->v);
791 return 0;
792 }
793
__bch2_unlink(struct inode * vdir,struct dentry * dentry,bool deleting_snapshot)794 int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
795 bool deleting_snapshot)
796 {
797 struct bch_fs *c = vdir->i_sb->s_fs_info;
798 struct bch_inode_info *dir = to_bch_ei(vdir);
799 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
800 struct bch_inode_unpacked dir_u, inode_u;
801 int ret;
802
803 bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
804
805 struct btree_trans *trans = bch2_trans_get(c);
806
807 ret = commit_do(trans, NULL, NULL,
808 BCH_TRANS_COMMIT_no_enospc,
809 bch2_unlink_trans(trans,
810 inode_inum(dir), &dir_u,
811 &inode_u, &dentry->d_name,
812 deleting_snapshot));
813 if (unlikely(ret))
814 goto err;
815
816 bch2_inode_update_after_write(trans, dir, &dir_u,
817 ATTR_MTIME|ATTR_CTIME|ATTR_SIZE);
818 bch2_inode_update_after_write(trans, inode, &inode_u,
819 ATTR_MTIME);
820
821 if (inode_u.bi_subvol) {
822 /*
823 * Subvolume deletion is asynchronous, but we still want to tell
824 * the VFS that it's been deleted here:
825 */
826 set_nlink(&inode->v, 0);
827 }
828 err:
829 bch2_trans_put(trans);
830 bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
831
832 return ret;
833 }
834
bch2_unlink(struct inode * vdir,struct dentry * dentry)835 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
836 {
837 struct bch_inode_info *dir= to_bch_ei(vdir);
838 struct bch_fs *c = dir->v.i_sb->s_fs_info;
839
840 int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
841 __bch2_unlink(vdir, dentry, false);
842 return bch2_err_class(ret);
843 }
844
bch2_symlink(struct mnt_idmap * idmap,struct inode * vdir,struct dentry * dentry,const char * symname)845 static int bch2_symlink(struct mnt_idmap *idmap,
846 struct inode *vdir, struct dentry *dentry,
847 const char *symname)
848 {
849 struct bch_fs *c = vdir->i_sb->s_fs_info;
850 struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
851 int ret;
852
853 inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
854 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
855 if (IS_ERR(inode))
856 return bch2_err_class(PTR_ERR(inode));
857
858 inode_lock(&inode->v);
859 ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
860 inode_unlock(&inode->v);
861
862 if (unlikely(ret))
863 goto err;
864
865 ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
866 if (unlikely(ret))
867 goto err;
868
869 ret = __bch2_link(c, inode, dir, dentry);
870 if (unlikely(ret))
871 goto err;
872
873 d_instantiate(dentry, &inode->v);
874 return 0;
875 err:
876 iput(&inode->v);
877 return bch2_err_class(ret);
878 }
879
bch2_mkdir(struct mnt_idmap * idmap,struct inode * vdir,struct dentry * dentry,umode_t mode)880 static struct dentry *bch2_mkdir(struct mnt_idmap *idmap,
881 struct inode *vdir, struct dentry *dentry, umode_t mode)
882 {
883 return ERR_PTR(bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0));
884 }
885
bch2_rename2(struct mnt_idmap * idmap,struct inode * src_vdir,struct dentry * src_dentry,struct inode * dst_vdir,struct dentry * dst_dentry,unsigned flags)886 static int bch2_rename2(struct mnt_idmap *idmap,
887 struct inode *src_vdir, struct dentry *src_dentry,
888 struct inode *dst_vdir, struct dentry *dst_dentry,
889 unsigned flags)
890 {
891 struct bch_fs *c = src_vdir->i_sb->s_fs_info;
892 struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
893 struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
894 struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
895 struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
896 struct bch_inode_unpacked dst_dir_u, src_dir_u;
897 struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u;
898 struct btree_trans *trans;
899 enum bch_rename_mode mode = flags & RENAME_EXCHANGE
900 ? BCH_RENAME_EXCHANGE
901 : dst_dentry->d_inode
902 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
903 bool whiteout = !!(flags & RENAME_WHITEOUT);
904 int ret;
905
906 if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE|RENAME_WHITEOUT))
907 return -EINVAL;
908
909 if (mode == BCH_RENAME_OVERWRITE) {
910 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
911 0, LLONG_MAX);
912 if (ret)
913 return ret;
914 }
915
916 bch2_lock_inodes(INODE_UPDATE_LOCK,
917 src_dir,
918 dst_dir,
919 src_inode,
920 dst_inode);
921
922 trans = bch2_trans_get(c);
923
924 ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
925 bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
926 if (ret)
927 goto err_tx_restart;
928
929 if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
930 ret = bch2_fs_quota_transfer(c, src_inode,
931 dst_dir->ei_qid,
932 1 << QTYP_PRJ,
933 KEY_TYPE_QUOTA_PREALLOC);
934 if (ret)
935 goto err;
936 }
937
938 if (mode == BCH_RENAME_EXCHANGE &&
939 inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
940 ret = bch2_fs_quota_transfer(c, dst_inode,
941 src_dir->ei_qid,
942 1 << QTYP_PRJ,
943 KEY_TYPE_QUOTA_PREALLOC);
944 if (ret)
945 goto err;
946 }
947 retry:
948 bch2_trans_begin(trans);
949
950 ret = bch2_rename_trans(trans,
951 inode_inum(src_dir), &src_dir_u,
952 inode_inum(dst_dir), &dst_dir_u,
953 &src_inode_u,
954 &dst_inode_u,
955 &src_dentry->d_name,
956 &dst_dentry->d_name,
957 mode);
958 if (unlikely(ret))
959 goto err_tx_restart;
960
961 if (whiteout) {
962 whiteout_inode_u = bch2_trans_kmalloc_nomemzero(trans, sizeof(*whiteout_inode_u));
963 ret = PTR_ERR_OR_ZERO(whiteout_inode_u);
964 if (unlikely(ret))
965 goto err_tx_restart;
966 bch2_inode_init_early(c, whiteout_inode_u);
967
968 ret = bch2_create_trans(trans,
969 inode_inum(src_dir), &src_dir_u,
970 whiteout_inode_u,
971 &src_dentry->d_name,
972 from_kuid(i_user_ns(&src_dir->v), current_fsuid()),
973 from_kgid(i_user_ns(&src_dir->v), current_fsgid()),
974 S_IFCHR|WHITEOUT_MODE, 0,
975 NULL, NULL, (subvol_inum) { 0 }, 0) ?:
976 bch2_quota_acct(c, bch_qid(whiteout_inode_u), Q_INO, 1,
977 KEY_TYPE_QUOTA_PREALLOC);
978 if (unlikely(ret))
979 goto err_tx_restart;
980 }
981
982 ret = bch2_trans_commit(trans, NULL, NULL, 0);
983 if (unlikely(ret)) {
984 err_tx_restart:
985 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
986 goto retry;
987 goto err;
988 }
989
990 BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
991 BUG_ON(dst_inode &&
992 dst_inode->v.i_ino != dst_inode_u.bi_inum);
993
994 bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
995 ATTR_MTIME|ATTR_CTIME|ATTR_SIZE);
996
997 if (src_dir != dst_dir)
998 bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
999 ATTR_MTIME|ATTR_CTIME|ATTR_SIZE);
1000
1001 bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
1002 ATTR_CTIME);
1003
1004 if (dst_inode)
1005 bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
1006 ATTR_CTIME);
1007 err:
1008 bch2_trans_put(trans);
1009
1010 bch2_fs_quota_transfer(c, src_inode,
1011 bch_qid(&src_inode->ei_inode),
1012 1 << QTYP_PRJ,
1013 KEY_TYPE_QUOTA_NOCHECK);
1014 if (dst_inode)
1015 bch2_fs_quota_transfer(c, dst_inode,
1016 bch_qid(&dst_inode->ei_inode),
1017 1 << QTYP_PRJ,
1018 KEY_TYPE_QUOTA_NOCHECK);
1019
1020 bch2_unlock_inodes(INODE_UPDATE_LOCK,
1021 src_dir,
1022 dst_dir,
1023 src_inode,
1024 dst_inode);
1025
1026 return bch2_err_class(ret);
1027 }
1028
bch2_setattr_copy(struct mnt_idmap * idmap,struct bch_inode_info * inode,struct bch_inode_unpacked * bi,struct iattr * attr)1029 static void bch2_setattr_copy(struct mnt_idmap *idmap,
1030 struct bch_inode_info *inode,
1031 struct bch_inode_unpacked *bi,
1032 struct iattr *attr)
1033 {
1034 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1035 unsigned int ia_valid = attr->ia_valid;
1036 kuid_t kuid;
1037 kgid_t kgid;
1038
1039 if (ia_valid & ATTR_UID) {
1040 kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
1041 bi->bi_uid = from_kuid(i_user_ns(&inode->v), kuid);
1042 }
1043 if (ia_valid & ATTR_GID) {
1044 kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
1045 bi->bi_gid = from_kgid(i_user_ns(&inode->v), kgid);
1046 }
1047
1048 if (ia_valid & ATTR_SIZE)
1049 bi->bi_size = attr->ia_size;
1050
1051 if (ia_valid & ATTR_ATIME)
1052 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
1053 if (ia_valid & ATTR_MTIME)
1054 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
1055 if (ia_valid & ATTR_CTIME)
1056 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
1057
1058 if (ia_valid & ATTR_MODE) {
1059 umode_t mode = attr->ia_mode;
1060 kgid_t gid = ia_valid & ATTR_GID
1061 ? kgid
1062 : inode->v.i_gid;
1063
1064 if (!in_group_or_capable(idmap, &inode->v,
1065 make_vfsgid(idmap, i_user_ns(&inode->v), gid)))
1066 mode &= ~S_ISGID;
1067 bi->bi_mode = mode;
1068 }
1069 }
1070
bch2_setattr_nonsize(struct mnt_idmap * idmap,struct bch_inode_info * inode,struct iattr * attr)1071 int bch2_setattr_nonsize(struct mnt_idmap *idmap,
1072 struct bch_inode_info *inode,
1073 struct iattr *attr)
1074 {
1075 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1076 struct bch_qid qid;
1077 struct btree_trans *trans;
1078 struct btree_iter inode_iter = { NULL };
1079 struct bch_inode_unpacked inode_u;
1080 struct posix_acl *acl = NULL;
1081 kuid_t kuid;
1082 kgid_t kgid;
1083 int ret;
1084
1085 mutex_lock(&inode->ei_update_lock);
1086
1087 qid = inode->ei_qid;
1088
1089 if (attr->ia_valid & ATTR_UID) {
1090 kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
1091 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), kuid);
1092 }
1093
1094 if (attr->ia_valid & ATTR_GID) {
1095 kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
1096 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), kgid);
1097 }
1098
1099 ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
1100 KEY_TYPE_QUOTA_PREALLOC);
1101 if (ret)
1102 goto err;
1103
1104 trans = bch2_trans_get(c);
1105 retry:
1106 bch2_trans_begin(trans);
1107 kfree(acl);
1108 acl = NULL;
1109
1110 ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
1111 BTREE_ITER_intent);
1112 if (ret)
1113 goto btree_err;
1114
1115 bch2_setattr_copy(idmap, inode, &inode_u, attr);
1116
1117 if (attr->ia_valid & ATTR_MODE) {
1118 ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
1119 inode_u.bi_mode, &acl);
1120 if (ret)
1121 goto btree_err;
1122 }
1123
1124 ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
1125 bch2_trans_commit(trans, NULL, NULL,
1126 BCH_TRANS_COMMIT_no_enospc);
1127 btree_err:
1128 bch2_trans_iter_exit(trans, &inode_iter);
1129
1130 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1131 goto retry;
1132 if (unlikely(ret))
1133 goto err_trans;
1134
1135 bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
1136
1137 if (acl)
1138 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
1139 err_trans:
1140 bch2_trans_put(trans);
1141 err:
1142 mutex_unlock(&inode->ei_update_lock);
1143
1144 return bch2_err_class(ret);
1145 }
1146
bch2_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned query_flags)1147 static int bch2_getattr(struct mnt_idmap *idmap,
1148 const struct path *path, struct kstat *stat,
1149 u32 request_mask, unsigned query_flags)
1150 {
1151 struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
1152 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1153 vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, &inode->v);
1154 vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, &inode->v);
1155
1156 stat->dev = inode->v.i_sb->s_dev;
1157 stat->ino = inode->v.i_ino;
1158 stat->mode = inode->v.i_mode;
1159 stat->nlink = inode->v.i_nlink;
1160 stat->uid = vfsuid_into_kuid(vfsuid);
1161 stat->gid = vfsgid_into_kgid(vfsgid);
1162 stat->rdev = inode->v.i_rdev;
1163 stat->size = i_size_read(&inode->v);
1164 stat->atime = inode_get_atime(&inode->v);
1165 stat->mtime = inode_get_mtime(&inode->v);
1166 stat->ctime = inode_get_ctime(&inode->v);
1167 stat->blksize = block_bytes(c);
1168 stat->blocks = inode->v.i_blocks;
1169
1170 stat->subvol = inode->ei_inum.subvol;
1171 stat->result_mask |= STATX_SUBVOL;
1172
1173 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
1174 stat->result_mask |= STATX_DIOALIGN;
1175 /*
1176 * this is incorrect; we should be tracking this in superblock,
1177 * and checking the alignment of open devices
1178 */
1179 stat->dio_mem_align = SECTOR_SIZE;
1180 stat->dio_offset_align = block_bytes(c);
1181 }
1182
1183 if (request_mask & STATX_BTIME) {
1184 stat->result_mask |= STATX_BTIME;
1185 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
1186 }
1187
1188 if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
1189 stat->attributes |= STATX_ATTR_IMMUTABLE;
1190 stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
1191
1192 if (inode->ei_inode.bi_flags & BCH_INODE_append)
1193 stat->attributes |= STATX_ATTR_APPEND;
1194 stat->attributes_mask |= STATX_ATTR_APPEND;
1195
1196 if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
1197 stat->attributes |= STATX_ATTR_NODUMP;
1198 stat->attributes_mask |= STATX_ATTR_NODUMP;
1199
1200 return 0;
1201 }
1202
bch2_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)1203 static int bch2_setattr(struct mnt_idmap *idmap,
1204 struct dentry *dentry, struct iattr *iattr)
1205 {
1206 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
1207 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1208 int ret;
1209
1210 lockdep_assert_held(&inode->v.i_rwsem);
1211
1212 ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
1213 setattr_prepare(idmap, dentry, iattr);
1214 if (ret)
1215 return ret;
1216
1217 return iattr->ia_valid & ATTR_SIZE
1218 ? bchfs_truncate(idmap, inode, iattr)
1219 : bch2_setattr_nonsize(idmap, inode, iattr);
1220 }
1221
bch2_tmpfile(struct mnt_idmap * idmap,struct inode * vdir,struct file * file,umode_t mode)1222 static int bch2_tmpfile(struct mnt_idmap *idmap,
1223 struct inode *vdir, struct file *file, umode_t mode)
1224 {
1225 struct bch_inode_info *inode =
1226 __bch2_create(idmap, to_bch_ei(vdir),
1227 file->f_path.dentry, mode, 0,
1228 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
1229
1230 if (IS_ERR(inode))
1231 return bch2_err_class(PTR_ERR(inode));
1232
1233 d_mark_tmpfile(file, &inode->v);
1234 d_instantiate(file->f_path.dentry, &inode->v);
1235 return finish_open_simple(file, 0);
1236 }
1237
bch2_fill_extent(struct bch_fs * c,struct fiemap_extent_info * info,struct bkey_s_c k,unsigned flags)1238 static int bch2_fill_extent(struct bch_fs *c,
1239 struct fiemap_extent_info *info,
1240 struct bkey_s_c k, unsigned flags)
1241 {
1242 if (bkey_extent_is_direct_data(k.k)) {
1243 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1244 const union bch_extent_entry *entry;
1245 struct extent_ptr_decoded p;
1246 int ret;
1247
1248 if (k.k->type == KEY_TYPE_reflink_v)
1249 flags |= FIEMAP_EXTENT_SHARED;
1250
1251 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
1252 int flags2 = 0;
1253 u64 offset = p.ptr.offset;
1254
1255 if (p.ptr.unwritten)
1256 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
1257
1258 if (p.crc.compression_type)
1259 flags2 |= FIEMAP_EXTENT_ENCODED;
1260 else
1261 offset += p.crc.offset;
1262
1263 if ((offset & (block_sectors(c) - 1)) ||
1264 (k.k->size & (block_sectors(c) - 1)))
1265 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
1266
1267 ret = fiemap_fill_next_extent(info,
1268 bkey_start_offset(k.k) << 9,
1269 offset << 9,
1270 k.k->size << 9, flags|flags2);
1271 if (ret)
1272 return ret;
1273 }
1274
1275 return 0;
1276 } else if (bkey_extent_is_inline_data(k.k)) {
1277 return fiemap_fill_next_extent(info,
1278 bkey_start_offset(k.k) << 9,
1279 0, k.k->size << 9,
1280 flags|
1281 FIEMAP_EXTENT_DATA_INLINE);
1282 } else if (k.k->type == KEY_TYPE_reservation) {
1283 return fiemap_fill_next_extent(info,
1284 bkey_start_offset(k.k) << 9,
1285 0, k.k->size << 9,
1286 flags|
1287 FIEMAP_EXTENT_DELALLOC|
1288 FIEMAP_EXTENT_UNWRITTEN);
1289 } else {
1290 BUG();
1291 }
1292 }
1293
bch2_fiemap(struct inode * vinode,struct fiemap_extent_info * info,u64 start,u64 len)1294 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
1295 u64 start, u64 len)
1296 {
1297 struct bch_fs *c = vinode->i_sb->s_fs_info;
1298 struct bch_inode_info *ei = to_bch_ei(vinode);
1299 struct btree_trans *trans;
1300 struct btree_iter iter;
1301 struct bkey_s_c k;
1302 struct bkey_buf cur, prev;
1303 bool have_extent = false;
1304 int ret = 0;
1305
1306 ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
1307 if (ret)
1308 return ret;
1309
1310 struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
1311 if (start + len < start)
1312 return -EINVAL;
1313
1314 start >>= 9;
1315
1316 bch2_bkey_buf_init(&cur);
1317 bch2_bkey_buf_init(&prev);
1318 trans = bch2_trans_get(c);
1319
1320 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
1321 POS(ei->v.i_ino, start), 0);
1322
1323 while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
1324 enum btree_id data_btree = BTREE_ID_extents;
1325
1326 bch2_trans_begin(trans);
1327
1328 u32 snapshot;
1329 ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
1330 if (ret)
1331 continue;
1332
1333 bch2_btree_iter_set_snapshot(&iter, snapshot);
1334
1335 k = bch2_btree_iter_peek_max(&iter, end);
1336 ret = bkey_err(k);
1337 if (ret)
1338 continue;
1339
1340 if (!k.k)
1341 break;
1342
1343 if (!bkey_extent_is_data(k.k) &&
1344 k.k->type != KEY_TYPE_reservation) {
1345 bch2_btree_iter_advance(&iter);
1346 continue;
1347 }
1348
1349 s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
1350 unsigned sectors = k.k->size - offset_into_extent;
1351
1352 bch2_bkey_buf_reassemble(&cur, c, k);
1353
1354 ret = bch2_read_indirect_extent(trans, &data_btree,
1355 &offset_into_extent, &cur);
1356 if (ret)
1357 continue;
1358
1359 k = bkey_i_to_s_c(cur.k);
1360 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
1361
1362 sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
1363
1364 bch2_cut_front(POS(k.k->p.inode,
1365 bkey_start_offset(k.k) +
1366 offset_into_extent),
1367 cur.k);
1368 bch2_key_resize(&cur.k->k, sectors);
1369 cur.k->k.p = iter.pos;
1370 cur.k->k.p.offset += cur.k->k.size;
1371
1372 if (have_extent) {
1373 bch2_trans_unlock(trans);
1374 ret = bch2_fill_extent(c, info,
1375 bkey_i_to_s_c(prev.k), 0);
1376 if (ret)
1377 break;
1378 }
1379
1380 bkey_copy(prev.k, cur.k);
1381 have_extent = true;
1382
1383 bch2_btree_iter_set_pos(&iter,
1384 POS(iter.pos.inode, iter.pos.offset + sectors));
1385 }
1386 bch2_trans_iter_exit(trans, &iter);
1387
1388 if (!ret && have_extent) {
1389 bch2_trans_unlock(trans);
1390 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
1391 FIEMAP_EXTENT_LAST);
1392 }
1393
1394 bch2_trans_put(trans);
1395 bch2_bkey_buf_exit(&cur, c);
1396 bch2_bkey_buf_exit(&prev, c);
1397 return ret < 0 ? ret : 0;
1398 }
1399
1400 static const struct vm_operations_struct bch_vm_ops = {
1401 .fault = bch2_page_fault,
1402 .map_pages = filemap_map_pages,
1403 .page_mkwrite = bch2_page_mkwrite,
1404 };
1405
bch2_mmap(struct file * file,struct vm_area_struct * vma)1406 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
1407 {
1408 file_accessed(file);
1409
1410 vma->vm_ops = &bch_vm_ops;
1411 return 0;
1412 }
1413
1414 /* Directories: */
1415
bch2_dir_llseek(struct file * file,loff_t offset,int whence)1416 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
1417 {
1418 return generic_file_llseek_size(file, offset, whence,
1419 S64_MAX, S64_MAX);
1420 }
1421
bch2_vfs_readdir(struct file * file,struct dir_context * ctx)1422 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1423 {
1424 struct bch_inode_info *inode = file_bch_inode(file);
1425 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1426
1427 if (!dir_emit_dots(file, ctx))
1428 return 0;
1429
1430 int ret = bch2_readdir(c, inode_inum(inode), ctx);
1431
1432 bch_err_fn(c, ret);
1433 return bch2_err_class(ret);
1434 }
1435
bch2_open(struct inode * vinode,struct file * file)1436 static int bch2_open(struct inode *vinode, struct file *file)
1437 {
1438 if (file->f_flags & (O_WRONLY|O_RDWR)) {
1439 struct bch_inode_info *inode = to_bch_ei(vinode);
1440 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1441
1442 int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol);
1443 if (ret)
1444 return ret;
1445 }
1446
1447 file->f_mode |= FMODE_CAN_ODIRECT;
1448
1449 return generic_file_open(vinode, file);
1450 }
1451
1452 static const struct file_operations bch_file_operations = {
1453 .open = bch2_open,
1454 .llseek = bch2_llseek,
1455 .read_iter = bch2_read_iter,
1456 .write_iter = bch2_write_iter,
1457 .mmap = bch2_mmap,
1458 .get_unmapped_area = thp_get_unmapped_area,
1459 .fsync = bch2_fsync,
1460 .splice_read = filemap_splice_read,
1461 .splice_write = iter_file_splice_write,
1462 .fallocate = bch2_fallocate_dispatch,
1463 .unlocked_ioctl = bch2_fs_file_ioctl,
1464 #ifdef CONFIG_COMPAT
1465 .compat_ioctl = bch2_compat_fs_ioctl,
1466 #endif
1467 .remap_file_range = bch2_remap_file_range,
1468 };
1469
1470 static const struct inode_operations bch_file_inode_operations = {
1471 .getattr = bch2_getattr,
1472 .setattr = bch2_setattr,
1473 .fiemap = bch2_fiemap,
1474 .listxattr = bch2_xattr_list,
1475 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1476 .get_inode_acl = bch2_get_acl,
1477 .set_acl = bch2_set_acl,
1478 #endif
1479 };
1480
1481 static const struct inode_operations bch_dir_inode_operations = {
1482 .lookup = bch2_lookup,
1483 .create = bch2_create,
1484 .link = bch2_link,
1485 .unlink = bch2_unlink,
1486 .symlink = bch2_symlink,
1487 .mkdir = bch2_mkdir,
1488 .rmdir = bch2_unlink,
1489 .mknod = bch2_mknod,
1490 .rename = bch2_rename2,
1491 .getattr = bch2_getattr,
1492 .setattr = bch2_setattr,
1493 .tmpfile = bch2_tmpfile,
1494 .listxattr = bch2_xattr_list,
1495 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1496 .get_inode_acl = bch2_get_acl,
1497 .set_acl = bch2_set_acl,
1498 #endif
1499 };
1500
1501 static const struct file_operations bch_dir_file_operations = {
1502 .llseek = bch2_dir_llseek,
1503 .read = generic_read_dir,
1504 .iterate_shared = bch2_vfs_readdir,
1505 .fsync = bch2_fsync,
1506 .unlocked_ioctl = bch2_fs_file_ioctl,
1507 #ifdef CONFIG_COMPAT
1508 .compat_ioctl = bch2_compat_fs_ioctl,
1509 #endif
1510 };
1511
1512 static const struct inode_operations bch_symlink_inode_operations = {
1513 .get_link = page_get_link,
1514 .getattr = bch2_getattr,
1515 .setattr = bch2_setattr,
1516 .listxattr = bch2_xattr_list,
1517 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1518 .get_inode_acl = bch2_get_acl,
1519 .set_acl = bch2_set_acl,
1520 #endif
1521 };
1522
1523 static const struct inode_operations bch_special_inode_operations = {
1524 .getattr = bch2_getattr,
1525 .setattr = bch2_setattr,
1526 .listxattr = bch2_xattr_list,
1527 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1528 .get_inode_acl = bch2_get_acl,
1529 .set_acl = bch2_set_acl,
1530 #endif
1531 };
1532
1533 static const struct address_space_operations bch_address_space_operations = {
1534 .read_folio = bch2_read_folio,
1535 .writepages = bch2_writepages,
1536 .readahead = bch2_readahead,
1537 .dirty_folio = filemap_dirty_folio,
1538 .write_begin = bch2_write_begin,
1539 .write_end = bch2_write_end,
1540 .invalidate_folio = bch2_invalidate_folio,
1541 .release_folio = bch2_release_folio,
1542 #ifdef CONFIG_MIGRATION
1543 .migrate_folio = filemap_migrate_folio,
1544 #endif
1545 .error_remove_folio = generic_error_remove_folio,
1546 };
1547
1548 struct bcachefs_fid {
1549 u64 inum;
1550 u32 subvol;
1551 u32 gen;
1552 } __packed;
1553
1554 struct bcachefs_fid_with_parent {
1555 struct bcachefs_fid fid;
1556 struct bcachefs_fid dir;
1557 } __packed;
1558
bcachefs_fid_valid(int fh_len,int fh_type)1559 static int bcachefs_fid_valid(int fh_len, int fh_type)
1560 {
1561 switch (fh_type) {
1562 case FILEID_BCACHEFS_WITHOUT_PARENT:
1563 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1564 case FILEID_BCACHEFS_WITH_PARENT:
1565 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1566 default:
1567 return false;
1568 }
1569 }
1570
bch2_inode_to_fid(struct bch_inode_info * inode)1571 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1572 {
1573 return (struct bcachefs_fid) {
1574 .inum = inode->ei_inum.inum,
1575 .subvol = inode->ei_inum.subvol,
1576 .gen = inode->ei_inode.bi_generation,
1577 };
1578 }
1579
bch2_encode_fh(struct inode * vinode,u32 * fh,int * len,struct inode * vdir)1580 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1581 struct inode *vdir)
1582 {
1583 struct bch_inode_info *inode = to_bch_ei(vinode);
1584 struct bch_inode_info *dir = to_bch_ei(vdir);
1585 int min_len;
1586
1587 if (!S_ISDIR(inode->v.i_mode) && dir) {
1588 struct bcachefs_fid_with_parent *fid = (void *) fh;
1589
1590 min_len = sizeof(*fid) / sizeof(u32);
1591 if (*len < min_len) {
1592 *len = min_len;
1593 return FILEID_INVALID;
1594 }
1595
1596 fid->fid = bch2_inode_to_fid(inode);
1597 fid->dir = bch2_inode_to_fid(dir);
1598
1599 *len = min_len;
1600 return FILEID_BCACHEFS_WITH_PARENT;
1601 } else {
1602 struct bcachefs_fid *fid = (void *) fh;
1603
1604 min_len = sizeof(*fid) / sizeof(u32);
1605 if (*len < min_len) {
1606 *len = min_len;
1607 return FILEID_INVALID;
1608 }
1609 *fid = bch2_inode_to_fid(inode);
1610
1611 *len = min_len;
1612 return FILEID_BCACHEFS_WITHOUT_PARENT;
1613 }
1614 }
1615
bch2_nfs_get_inode(struct super_block * sb,struct bcachefs_fid fid)1616 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1617 struct bcachefs_fid fid)
1618 {
1619 struct bch_fs *c = sb->s_fs_info;
1620 struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1621 .subvol = fid.subvol,
1622 .inum = fid.inum,
1623 });
1624 if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1625 iput(vinode);
1626 vinode = ERR_PTR(-ESTALE);
1627 }
1628 return vinode;
1629 }
1630
bch2_fh_to_dentry(struct super_block * sb,struct fid * _fid,int fh_len,int fh_type)1631 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1632 int fh_len, int fh_type)
1633 {
1634 struct bcachefs_fid *fid = (void *) _fid;
1635
1636 if (!bcachefs_fid_valid(fh_len, fh_type))
1637 return NULL;
1638
1639 return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1640 }
1641
bch2_fh_to_parent(struct super_block * sb,struct fid * _fid,int fh_len,int fh_type)1642 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1643 int fh_len, int fh_type)
1644 {
1645 struct bcachefs_fid_with_parent *fid = (void *) _fid;
1646
1647 if (!bcachefs_fid_valid(fh_len, fh_type) ||
1648 fh_type != FILEID_BCACHEFS_WITH_PARENT)
1649 return NULL;
1650
1651 return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1652 }
1653
bch2_get_parent(struct dentry * child)1654 static struct dentry *bch2_get_parent(struct dentry *child)
1655 {
1656 struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1657 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1658 subvol_inum parent_inum = {
1659 .subvol = inode->ei_inode.bi_parent_subvol ?:
1660 inode->ei_inum.subvol,
1661 .inum = inode->ei_inode.bi_dir,
1662 };
1663
1664 return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1665 }
1666
bch2_get_name(struct dentry * parent,char * name,struct dentry * child)1667 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1668 {
1669 struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1670 struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
1671 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1672 struct btree_trans *trans;
1673 struct btree_iter iter1;
1674 struct btree_iter iter2;
1675 struct bkey_s_c k;
1676 struct bkey_s_c_dirent d;
1677 struct bch_inode_unpacked inode_u;
1678 subvol_inum target;
1679 u32 snapshot;
1680 struct qstr dirent_name;
1681 unsigned name_len = 0;
1682 int ret;
1683
1684 if (!S_ISDIR(dir->v.i_mode))
1685 return -EINVAL;
1686
1687 trans = bch2_trans_get(c);
1688
1689 bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
1690 POS(dir->ei_inode.bi_inum, 0), 0);
1691 bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
1692 POS(dir->ei_inode.bi_inum, 0), 0);
1693 retry:
1694 bch2_trans_begin(trans);
1695
1696 ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot);
1697 if (ret)
1698 goto err;
1699
1700 bch2_btree_iter_set_snapshot(&iter1, snapshot);
1701 bch2_btree_iter_set_snapshot(&iter2, snapshot);
1702
1703 ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
1704 if (ret)
1705 goto err;
1706
1707 if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1708 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1709
1710 k = bch2_btree_iter_peek_slot(&iter1);
1711 ret = bkey_err(k);
1712 if (ret)
1713 goto err;
1714
1715 if (k.k->type != KEY_TYPE_dirent) {
1716 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1717 goto err;
1718 }
1719
1720 d = bkey_s_c_to_dirent(k);
1721 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1722 if (ret > 0)
1723 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1724 if (ret)
1725 goto err;
1726
1727 if (subvol_inum_eq(target, inode->ei_inum))
1728 goto found;
1729 } else {
1730 /*
1731 * File with multiple hardlinks and our backref is to the wrong
1732 * directory - linear search:
1733 */
1734 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1735 if (k.k->p.inode > dir->ei_inode.bi_inum)
1736 break;
1737
1738 if (k.k->type != KEY_TYPE_dirent)
1739 continue;
1740
1741 d = bkey_s_c_to_dirent(k);
1742 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1743 if (ret < 0)
1744 break;
1745 if (ret)
1746 continue;
1747
1748 if (subvol_inum_eq(target, inode->ei_inum))
1749 goto found;
1750 }
1751 }
1752
1753 ret = -ENOENT;
1754 goto err;
1755 found:
1756 dirent_name = bch2_dirent_get_name(d);
1757
1758 name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1759 memcpy(name, dirent_name.name, name_len);
1760 name[name_len] = '\0';
1761 err:
1762 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1763 goto retry;
1764
1765 bch2_trans_iter_exit(trans, &iter1);
1766 bch2_trans_iter_exit(trans, &iter2);
1767 bch2_trans_put(trans);
1768
1769 return ret;
1770 }
1771
1772 static const struct export_operations bch_export_ops = {
1773 .encode_fh = bch2_encode_fh,
1774 .fh_to_dentry = bch2_fh_to_dentry,
1775 .fh_to_parent = bch2_fh_to_parent,
1776 .get_parent = bch2_get_parent,
1777 .get_name = bch2_get_name,
1778 };
1779
bch2_vfs_inode_init(struct btree_trans * trans,subvol_inum inum,struct bch_inode_info * inode,struct bch_inode_unpacked * bi,struct bch_subvolume * subvol)1780 static void bch2_vfs_inode_init(struct btree_trans *trans,
1781 subvol_inum inum,
1782 struct bch_inode_info *inode,
1783 struct bch_inode_unpacked *bi,
1784 struct bch_subvolume *subvol)
1785 {
1786 inode->v.i_ino = inum.inum;
1787 inode->ei_inum = inum;
1788 inode->ei_inode.bi_inum = inum.inum;
1789 bch2_inode_update_after_write(trans, inode, bi, ~0);
1790
1791 inode->v.i_blocks = bi->bi_sectors;
1792 inode->v.i_rdev = bi->bi_dev;
1793 inode->v.i_generation = bi->bi_generation;
1794 inode->v.i_size = bi->bi_size;
1795
1796 inode->ei_flags = 0;
1797 inode->ei_quota_reserved = 0;
1798 inode->ei_qid = bch_qid(bi);
1799
1800 if (BCH_SUBVOLUME_SNAP(subvol))
1801 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1802
1803 inode->v.i_mapping->a_ops = &bch_address_space_operations;
1804
1805 switch (inode->v.i_mode & S_IFMT) {
1806 case S_IFREG:
1807 inode->v.i_op = &bch_file_inode_operations;
1808 inode->v.i_fop = &bch_file_operations;
1809 break;
1810 case S_IFDIR:
1811 inode->v.i_op = &bch_dir_inode_operations;
1812 inode->v.i_fop = &bch_dir_file_operations;
1813 break;
1814 case S_IFLNK:
1815 inode_nohighmem(&inode->v);
1816 inode->v.i_op = &bch_symlink_inode_operations;
1817 break;
1818 default:
1819 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1820 inode->v.i_op = &bch_special_inode_operations;
1821 break;
1822 }
1823
1824 mapping_set_folio_min_order(inode->v.i_mapping,
1825 get_order(trans->c->opts.block_size));
1826 }
1827
bch2_free_inode(struct inode * vinode)1828 static void bch2_free_inode(struct inode *vinode)
1829 {
1830 kmem_cache_free(bch2_inode_cache, to_bch_ei(vinode));
1831 }
1832
inode_update_times_fn(struct btree_trans * trans,struct bch_inode_info * inode,struct bch_inode_unpacked * bi,void * p)1833 static int inode_update_times_fn(struct btree_trans *trans,
1834 struct bch_inode_info *inode,
1835 struct bch_inode_unpacked *bi,
1836 void *p)
1837 {
1838 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1839
1840 bi->bi_atime = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
1841 bi->bi_mtime = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
1842 bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
1843
1844 return 0;
1845 }
1846
bch2_vfs_write_inode(struct inode * vinode,struct writeback_control * wbc)1847 static int bch2_vfs_write_inode(struct inode *vinode,
1848 struct writeback_control *wbc)
1849 {
1850 struct bch_fs *c = vinode->i_sb->s_fs_info;
1851 struct bch_inode_info *inode = to_bch_ei(vinode);
1852 int ret;
1853
1854 mutex_lock(&inode->ei_update_lock);
1855 ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1856 ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1857 mutex_unlock(&inode->ei_update_lock);
1858
1859 return bch2_err_class(ret);
1860 }
1861
bch2_evict_inode(struct inode * vinode)1862 static void bch2_evict_inode(struct inode *vinode)
1863 {
1864 struct bch_fs *c = vinode->i_sb->s_fs_info;
1865 struct bch_inode_info *inode = to_bch_ei(vinode);
1866 bool delete = !inode->v.i_nlink && !is_bad_inode(&inode->v);
1867
1868 /*
1869 * evict() has waited for outstanding writeback, we'll do no more IO
1870 * through this inode: it's safe to remove from VFS inode hashtable here
1871 *
1872 * Do that now so that other threads aren't blocked from pulling it back
1873 * in, there's no reason for them to be:
1874 */
1875 if (!delete)
1876 bch2_inode_hash_remove(c, inode);
1877
1878 truncate_inode_pages_final(&inode->v.i_data);
1879
1880 clear_inode(&inode->v);
1881
1882 BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1883
1884 if (delete) {
1885 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1886 KEY_TYPE_QUOTA_WARN);
1887 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1888 KEY_TYPE_QUOTA_WARN);
1889 bch2_inode_rm(c, inode_inum(inode));
1890
1891 /*
1892 * If we are deleting, we need it present in the vfs hash table
1893 * so that fsck can check if unlinked inodes are still open:
1894 */
1895 bch2_inode_hash_remove(c, inode);
1896 }
1897
1898 mutex_lock(&c->vfs_inodes_lock);
1899 list_del_init(&inode->ei_vfs_inode_list);
1900 mutex_unlock(&c->vfs_inodes_lock);
1901 }
1902
bch2_evict_subvolume_inodes(struct bch_fs * c,snapshot_id_list * s)1903 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1904 {
1905 struct bch_inode_info *inode;
1906 DARRAY(struct bch_inode_info *) grabbed;
1907 bool clean_pass = false, this_pass_clean;
1908
1909 /*
1910 * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1911 * be pruned with d_mark_dontcache().
1912 *
1913 * Once we've had a clean pass where we didn't find any inodes without
1914 * I_DONTCACHE, we wait for them to be freed:
1915 */
1916
1917 darray_init(&grabbed);
1918 darray_make_room(&grabbed, 1024);
1919 again:
1920 cond_resched();
1921 this_pass_clean = true;
1922
1923 mutex_lock(&c->vfs_inodes_lock);
1924 list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1925 if (!snapshot_list_has_id(s, inode->ei_inum.subvol))
1926 continue;
1927
1928 if (!(inode->v.i_state & I_DONTCACHE) &&
1929 !(inode->v.i_state & I_FREEING) &&
1930 igrab(&inode->v)) {
1931 this_pass_clean = false;
1932
1933 if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1934 iput(&inode->v);
1935 break;
1936 }
1937 } else if (clean_pass && this_pass_clean) {
1938 struct wait_bit_queue_entry wqe;
1939 struct wait_queue_head *wq_head;
1940
1941 wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
1942 prepare_to_wait_event(wq_head, &wqe.wq_entry,
1943 TASK_UNINTERRUPTIBLE);
1944 mutex_unlock(&c->vfs_inodes_lock);
1945
1946 schedule();
1947 finish_wait(wq_head, &wqe.wq_entry);
1948 goto again;
1949 }
1950 }
1951 mutex_unlock(&c->vfs_inodes_lock);
1952
1953 darray_for_each(grabbed, i) {
1954 inode = *i;
1955 d_mark_dontcache(&inode->v);
1956 d_prune_aliases(&inode->v);
1957 iput(&inode->v);
1958 }
1959 grabbed.nr = 0;
1960
1961 if (!clean_pass || !this_pass_clean) {
1962 clean_pass = this_pass_clean;
1963 goto again;
1964 }
1965
1966 darray_exit(&grabbed);
1967 }
1968
bch2_statfs(struct dentry * dentry,struct kstatfs * buf)1969 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1970 {
1971 struct super_block *sb = dentry->d_sb;
1972 struct bch_fs *c = sb->s_fs_info;
1973 struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1974 unsigned shift = sb->s_blocksize_bits - 9;
1975 /*
1976 * this assumes inodes take up 64 bytes, which is a decent average
1977 * number:
1978 */
1979 u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1980
1981 buf->f_type = BCACHEFS_STATFS_MAGIC;
1982 buf->f_bsize = sb->s_blocksize;
1983 buf->f_blocks = usage.capacity >> shift;
1984 buf->f_bfree = usage.free >> shift;
1985 buf->f_bavail = avail_factor(usage.free) >> shift;
1986
1987 buf->f_files = usage.nr_inodes + avail_inodes;
1988 buf->f_ffree = avail_inodes;
1989
1990 buf->f_fsid = uuid_to_fsid(c->sb.user_uuid.b);
1991 buf->f_namelen = BCH_NAME_MAX;
1992
1993 return 0;
1994 }
1995
bch2_sync_fs(struct super_block * sb,int wait)1996 static int bch2_sync_fs(struct super_block *sb, int wait)
1997 {
1998 struct bch_fs *c = sb->s_fs_info;
1999 int ret;
2000
2001 trace_bch2_sync_fs(sb, wait);
2002
2003 if (c->opts.journal_flush_disabled)
2004 return 0;
2005
2006 if (!wait) {
2007 bch2_journal_flush_async(&c->journal, NULL);
2008 return 0;
2009 }
2010
2011 ret = bch2_journal_flush(&c->journal);
2012 return bch2_err_class(ret);
2013 }
2014
bch2_path_to_fs(const char * path)2015 static struct bch_fs *bch2_path_to_fs(const char *path)
2016 {
2017 struct bch_fs *c;
2018 dev_t dev;
2019 int ret;
2020
2021 ret = lookup_bdev(path, &dev);
2022 if (ret)
2023 return ERR_PTR(ret);
2024
2025 c = bch2_dev_to_fs(dev);
2026 if (c)
2027 closure_put(&c->cl);
2028 return c ?: ERR_PTR(-ENOENT);
2029 }
2030
bch2_show_devname(struct seq_file * seq,struct dentry * root)2031 static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
2032 {
2033 struct bch_fs *c = root->d_sb->s_fs_info;
2034 bool first = true;
2035
2036 for_each_online_member(c, ca) {
2037 if (!first)
2038 seq_putc(seq, ':');
2039 first = false;
2040 seq_puts(seq, ca->disk_sb.sb_name);
2041 }
2042
2043 return 0;
2044 }
2045
bch2_show_options(struct seq_file * seq,struct dentry * root)2046 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
2047 {
2048 struct bch_fs *c = root->d_sb->s_fs_info;
2049 struct printbuf buf = PRINTBUF;
2050
2051 bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
2052 OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
2053 printbuf_nul_terminate(&buf);
2054 seq_printf(seq, ",%s", buf.buf);
2055
2056 int ret = buf.allocation_failure ? -ENOMEM : 0;
2057 printbuf_exit(&buf);
2058 return ret;
2059 }
2060
bch2_put_super(struct super_block * sb)2061 static void bch2_put_super(struct super_block *sb)
2062 {
2063 struct bch_fs *c = sb->s_fs_info;
2064
2065 __bch2_fs_stop(c);
2066 }
2067
2068 /*
2069 * bcachefs doesn't currently integrate intwrite freeze protection but the
2070 * internal write references serve the same purpose. Therefore reuse the
2071 * read-only transition code to perform the quiesce. The caveat is that we don't
2072 * currently have the ability to block tasks that want a write reference while
2073 * the superblock is frozen. This is fine for now, but we should either add
2074 * blocking support or find a way to integrate sb_start_intwrite() and friends.
2075 */
bch2_freeze(struct super_block * sb)2076 static int bch2_freeze(struct super_block *sb)
2077 {
2078 struct bch_fs *c = sb->s_fs_info;
2079
2080 down_write(&c->state_lock);
2081 bch2_fs_read_only(c);
2082 up_write(&c->state_lock);
2083 return 0;
2084 }
2085
bch2_unfreeze(struct super_block * sb)2086 static int bch2_unfreeze(struct super_block *sb)
2087 {
2088 struct bch_fs *c = sb->s_fs_info;
2089 int ret;
2090
2091 if (test_bit(BCH_FS_emergency_ro, &c->flags))
2092 return 0;
2093
2094 down_write(&c->state_lock);
2095 ret = bch2_fs_read_write(c);
2096 up_write(&c->state_lock);
2097 return ret;
2098 }
2099
2100 static const struct super_operations bch_super_operations = {
2101 .alloc_inode = bch2_alloc_inode,
2102 .free_inode = bch2_free_inode,
2103 .write_inode = bch2_vfs_write_inode,
2104 .evict_inode = bch2_evict_inode,
2105 .sync_fs = bch2_sync_fs,
2106 .statfs = bch2_statfs,
2107 .show_devname = bch2_show_devname,
2108 .show_options = bch2_show_options,
2109 .put_super = bch2_put_super,
2110 .freeze_fs = bch2_freeze,
2111 .unfreeze_fs = bch2_unfreeze,
2112 };
2113
bch2_set_super(struct super_block * s,void * data)2114 static int bch2_set_super(struct super_block *s, void *data)
2115 {
2116 s->s_fs_info = data;
2117 return 0;
2118 }
2119
bch2_noset_super(struct super_block * s,void * data)2120 static int bch2_noset_super(struct super_block *s, void *data)
2121 {
2122 return -EBUSY;
2123 }
2124
2125 typedef DARRAY(struct bch_fs *) darray_fs;
2126
bch2_test_super(struct super_block * s,void * data)2127 static int bch2_test_super(struct super_block *s, void *data)
2128 {
2129 struct bch_fs *c = s->s_fs_info;
2130 darray_fs *d = data;
2131
2132 if (!c)
2133 return false;
2134
2135 darray_for_each(*d, i)
2136 if (c != *i)
2137 return false;
2138 return true;
2139 }
2140
bch2_fs_get_tree(struct fs_context * fc)2141 static int bch2_fs_get_tree(struct fs_context *fc)
2142 {
2143 struct bch_fs *c;
2144 struct super_block *sb;
2145 struct inode *vinode;
2146 struct bch2_opts_parse *opts_parse = fc->fs_private;
2147 struct bch_opts opts = opts_parse->opts;
2148 darray_str devs;
2149 darray_fs devs_to_fs = {};
2150 int ret;
2151
2152 opt_set(opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
2153 opt_set(opts, nostart, true);
2154
2155 if (!fc->source || strlen(fc->source) == 0)
2156 return -EINVAL;
2157
2158 ret = bch2_split_devs(fc->source, &devs);
2159 if (ret)
2160 return ret;
2161
2162 darray_for_each(devs, i) {
2163 ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
2164 if (ret)
2165 goto err;
2166 }
2167
2168 sb = sget(fc->fs_type, bch2_test_super, bch2_noset_super, fc->sb_flags|SB_NOSEC, &devs_to_fs);
2169 if (!IS_ERR(sb))
2170 goto got_sb;
2171
2172 c = bch2_fs_open(devs.data, devs.nr, opts);
2173 ret = PTR_ERR_OR_ZERO(c);
2174 if (ret)
2175 goto err;
2176
2177 if (opt_defined(opts, discard))
2178 set_bit(BCH_FS_discard_mount_opt_set, &c->flags);
2179
2180 /* Some options can't be parsed until after the fs is started: */
2181 opts = bch2_opts_empty();
2182 ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
2183 if (ret)
2184 goto err_stop_fs;
2185
2186 bch2_opts_apply(&c->opts, opts);
2187
2188 /*
2189 * need to initialise sb and set c->vfs_sb _before_ starting fs,
2190 * for blk_holder_ops
2191 */
2192
2193 sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
2194 ret = PTR_ERR_OR_ZERO(sb);
2195 if (ret)
2196 goto err_stop_fs;
2197 got_sb:
2198 c = sb->s_fs_info;
2199
2200 if (sb->s_root) {
2201 if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY) {
2202 ret = -EBUSY;
2203 goto err_put_super;
2204 }
2205 goto out;
2206 }
2207
2208 sb->s_blocksize = block_bytes(c);
2209 sb->s_blocksize_bits = ilog2(block_bytes(c));
2210 sb->s_maxbytes = MAX_LFS_FILESIZE;
2211 sb->s_op = &bch_super_operations;
2212 sb->s_export_op = &bch_export_ops;
2213 #ifdef CONFIG_BCACHEFS_QUOTA
2214 sb->s_qcop = &bch2_quotactl_operations;
2215 sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
2216 #endif
2217 sb->s_xattr = bch2_xattr_handlers;
2218 sb->s_magic = BCACHEFS_STATFS_MAGIC;
2219 sb->s_time_gran = c->sb.nsec_per_time_unit;
2220 sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
2221 sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
2222 super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid));
2223 super_set_sysfs_name_uuid(sb);
2224 sb->s_shrink->seeks = 0;
2225 c->vfs_sb = sb;
2226 strscpy(sb->s_id, c->name, sizeof(sb->s_id));
2227
2228 ret = super_setup_bdi(sb);
2229 if (ret)
2230 goto err_put_super;
2231
2232 sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
2233
2234 for_each_online_member(c, ca) {
2235 struct block_device *bdev = ca->disk_sb.bdev;
2236
2237 /* XXX: create an anonymous device for multi device filesystems */
2238 sb->s_bdev = bdev;
2239 sb->s_dev = bdev->bd_dev;
2240 percpu_ref_put(&ca->io_ref);
2241 break;
2242 }
2243
2244 c->dev = sb->s_dev;
2245
2246 #ifdef CONFIG_BCACHEFS_POSIX_ACL
2247 if (c->opts.acl)
2248 sb->s_flags |= SB_POSIXACL;
2249 #endif
2250
2251 sb->s_shrink->seeks = 0;
2252
2253 ret = bch2_fs_start(c);
2254 if (ret)
2255 goto err_put_super;
2256
2257 vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
2258 ret = PTR_ERR_OR_ZERO(vinode);
2259 bch_err_msg(c, ret, "mounting: error getting root inode");
2260 if (ret)
2261 goto err_put_super;
2262
2263 sb->s_root = d_make_root(vinode);
2264 if (!sb->s_root) {
2265 bch_err(c, "error mounting: error allocating root dentry");
2266 ret = -ENOMEM;
2267 goto err_put_super;
2268 }
2269
2270 sb->s_flags |= SB_ACTIVE;
2271 out:
2272 fc->root = dget(sb->s_root);
2273 err:
2274 darray_exit(&devs_to_fs);
2275 bch2_darray_str_exit(&devs);
2276 if (ret)
2277 pr_err("error: %s", bch2_err_str(ret));
2278 /*
2279 * On an inconsistency error in recovery we might see an -EROFS derived
2280 * errorcode (from the journal), but we don't want to return that to
2281 * userspace as that causes util-linux to retry the mount RO - which is
2282 * confusing:
2283 */
2284 if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
2285 ret = -EIO;
2286 return bch2_err_class(ret);
2287
2288 err_stop_fs:
2289 bch2_fs_stop(c);
2290 goto err;
2291
2292 err_put_super:
2293 __bch2_fs_stop(c);
2294 deactivate_locked_super(sb);
2295 goto err;
2296 }
2297
bch2_kill_sb(struct super_block * sb)2298 static void bch2_kill_sb(struct super_block *sb)
2299 {
2300 struct bch_fs *c = sb->s_fs_info;
2301
2302 generic_shutdown_super(sb);
2303 bch2_fs_free(c);
2304 }
2305
bch2_fs_context_free(struct fs_context * fc)2306 static void bch2_fs_context_free(struct fs_context *fc)
2307 {
2308 struct bch2_opts_parse *opts = fc->fs_private;
2309
2310 if (opts) {
2311 printbuf_exit(&opts->parse_later);
2312 kfree(opts);
2313 }
2314 }
2315
bch2_fs_parse_param(struct fs_context * fc,struct fs_parameter * param)2316 static int bch2_fs_parse_param(struct fs_context *fc,
2317 struct fs_parameter *param)
2318 {
2319 /*
2320 * the "source" param, i.e., the name of the device(s) to mount,
2321 * is handled by the VFS layer.
2322 */
2323 if (!strcmp(param->key, "source"))
2324 return -ENOPARAM;
2325
2326 struct bch2_opts_parse *opts = fc->fs_private;
2327 struct bch_fs *c = NULL;
2328
2329 /* for reconfigure, we already have a struct bch_fs */
2330 if (fc->root)
2331 c = fc->root->d_sb->s_fs_info;
2332
2333 int ret = bch2_parse_one_mount_opt(c, &opts->opts,
2334 &opts->parse_later, param->key,
2335 param->string);
2336
2337 return bch2_err_class(ret);
2338 }
2339
bch2_fs_reconfigure(struct fs_context * fc)2340 static int bch2_fs_reconfigure(struct fs_context *fc)
2341 {
2342 struct super_block *sb = fc->root->d_sb;
2343 struct bch2_opts_parse *opts = fc->fs_private;
2344 struct bch_fs *c = sb->s_fs_info;
2345 int ret = 0;
2346
2347 opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
2348
2349 if (opts->opts.read_only != c->opts.read_only) {
2350 down_write(&c->state_lock);
2351
2352 if (opts->opts.read_only) {
2353 bch2_fs_read_only(c);
2354
2355 sb->s_flags |= SB_RDONLY;
2356 } else {
2357 ret = bch2_fs_read_write(c);
2358 if (ret) {
2359 bch_err(c, "error going rw: %i", ret);
2360 up_write(&c->state_lock);
2361 ret = -EINVAL;
2362 goto err;
2363 }
2364
2365 sb->s_flags &= ~SB_RDONLY;
2366 }
2367
2368 c->opts.read_only = opts->opts.read_only;
2369
2370 up_write(&c->state_lock);
2371 }
2372
2373 if (opt_defined(opts->opts, errors))
2374 c->opts.errors = opts->opts.errors;
2375 err:
2376 return bch2_err_class(ret);
2377 }
2378
2379 static const struct fs_context_operations bch2_context_ops = {
2380 .free = bch2_fs_context_free,
2381 .parse_param = bch2_fs_parse_param,
2382 .get_tree = bch2_fs_get_tree,
2383 .reconfigure = bch2_fs_reconfigure,
2384 };
2385
bch2_init_fs_context(struct fs_context * fc)2386 static int bch2_init_fs_context(struct fs_context *fc)
2387 {
2388 struct bch2_opts_parse *opts = kzalloc(sizeof(*opts), GFP_KERNEL);
2389
2390 if (!opts)
2391 return -ENOMEM;
2392
2393 opts->parse_later = PRINTBUF;
2394
2395 fc->ops = &bch2_context_ops;
2396 fc->fs_private = opts;
2397
2398 return 0;
2399 }
2400
bch2_fs_vfs_exit(struct bch_fs * c)2401 void bch2_fs_vfs_exit(struct bch_fs *c)
2402 {
2403 if (c->vfs_inodes_by_inum_table.ht.tbl)
2404 rhltable_destroy(&c->vfs_inodes_by_inum_table);
2405 if (c->vfs_inodes_table.tbl)
2406 rhashtable_destroy(&c->vfs_inodes_table);
2407 }
2408
bch2_fs_vfs_init(struct bch_fs * c)2409 int bch2_fs_vfs_init(struct bch_fs *c)
2410 {
2411 return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params) ?:
2412 rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params);
2413 }
2414
2415 static struct file_system_type bcache_fs_type = {
2416 .owner = THIS_MODULE,
2417 .name = "bcachefs",
2418 .init_fs_context = bch2_init_fs_context,
2419 .kill_sb = bch2_kill_sb,
2420 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_LBS,
2421 };
2422
2423 MODULE_ALIAS_FS("bcachefs");
2424
bch2_vfs_exit(void)2425 void bch2_vfs_exit(void)
2426 {
2427 unregister_filesystem(&bcache_fs_type);
2428 kmem_cache_destroy(bch2_inode_cache);
2429 }
2430
bch2_vfs_init(void)2431 int __init bch2_vfs_init(void)
2432 {
2433 int ret = -ENOMEM;
2434
2435 bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
2436 SLAB_ACCOUNT);
2437 if (!bch2_inode_cache)
2438 goto err;
2439
2440 ret = register_filesystem(&bcache_fs_type);
2441 if (ret)
2442 goto err;
2443
2444 return 0;
2445 err:
2446 bch2_vfs_exit();
2447 return ret;
2448 }
2449
2450 #endif /* NO_BCACHEFS_FS */
2451