xref: /linux/fs/bcachefs/subvolume.c (revision f12a798a898dec36de9705d40a1b03e2418aabe0)
114b393eeSKent Overstreet // SPDX-License-Identifier: GPL-2.0
214b393eeSKent Overstreet 
314b393eeSKent Overstreet #include "bcachefs.h"
414b393eeSKent Overstreet #include "btree_key_cache.h"
514b393eeSKent Overstreet #include "btree_update.h"
6d4bf5eecSKent Overstreet #include "errcode.h"
714b393eeSKent Overstreet #include "error.h"
82027875bSKent Overstreet #include "fs.h"
914b393eeSKent Overstreet #include "subvolume.h"
1014b393eeSKent Overstreet 
1114b393eeSKent Overstreet /* Snapshot tree: */
1214b393eeSKent Overstreet 
1314b393eeSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
1414b393eeSKent Overstreet 			   struct bkey_s_c k)
1514b393eeSKent Overstreet {
1614b393eeSKent Overstreet 	struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
1714b393eeSKent Overstreet 
18416cc426SKent Overstreet 	prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u",
1914b393eeSKent Overstreet 	       BCH_SNAPSHOT_SUBVOL(s.v),
2014b393eeSKent Overstreet 	       BCH_SNAPSHOT_DELETED(s.v),
2114b393eeSKent Overstreet 	       le32_to_cpu(s.v->parent),
2214b393eeSKent Overstreet 	       le32_to_cpu(s.v->children[0]),
2314b393eeSKent Overstreet 	       le32_to_cpu(s.v->children[1]),
2414b393eeSKent Overstreet 	       le32_to_cpu(s.v->subvol));
2514b393eeSKent Overstreet }
2614b393eeSKent Overstreet 
27f0ac7df2SKent Overstreet int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
28facafdcbSKent Overstreet 			  unsigned flags, struct printbuf *err)
2914b393eeSKent Overstreet {
3014b393eeSKent Overstreet 	struct bkey_s_c_snapshot s;
3114b393eeSKent Overstreet 	u32 i, id;
3214b393eeSKent Overstreet 
33e88a75ebSKent Overstreet 	if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
34e88a75ebSKent Overstreet 	    bkey_lt(k.k->p, POS(0, 1))) {
35401ec4dbSKent Overstreet 		prt_printf(err, "bad pos");
3678c0b75cSKent Overstreet 		return -BCH_ERR_invalid_bkey;
37f0ac7df2SKent Overstreet 	}
3814b393eeSKent Overstreet 
3914b393eeSKent Overstreet 	s = bkey_s_c_to_snapshot(k);
4014b393eeSKent Overstreet 
4114b393eeSKent Overstreet 	id = le32_to_cpu(s.v->parent);
42f0ac7df2SKent Overstreet 	if (id && id <= k.k->p.offset) {
43401ec4dbSKent Overstreet 		prt_printf(err, "bad parent node (%u <= %llu)",
44f0ac7df2SKent Overstreet 		       id, k.k->p.offset);
4578c0b75cSKent Overstreet 		return -BCH_ERR_invalid_bkey;
46f0ac7df2SKent Overstreet 	}
4714b393eeSKent Overstreet 
48f0ac7df2SKent Overstreet 	if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
49401ec4dbSKent Overstreet 		prt_printf(err, "children not normalized");
5078c0b75cSKent Overstreet 		return -BCH_ERR_invalid_bkey;
51f0ac7df2SKent Overstreet 	}
5214b393eeSKent Overstreet 
5314b393eeSKent Overstreet 	if (s.v->children[0] &&
54f0ac7df2SKent Overstreet 	    s.v->children[0] == s.v->children[1]) {
55401ec4dbSKent Overstreet 		prt_printf(err, "duplicate child nodes");
5678c0b75cSKent Overstreet 		return -BCH_ERR_invalid_bkey;
57f0ac7df2SKent Overstreet 	}
5814b393eeSKent Overstreet 
5914b393eeSKent Overstreet 	for (i = 0; i < 2; i++) {
6014b393eeSKent Overstreet 		id = le32_to_cpu(s.v->children[i]);
6114b393eeSKent Overstreet 
62f0ac7df2SKent Overstreet 		if (id >= k.k->p.offset) {
63401ec4dbSKent Overstreet 			prt_printf(err, "bad child node (%u >= %llu)",
64f0ac7df2SKent Overstreet 			       id, k.k->p.offset);
6578c0b75cSKent Overstreet 			return -BCH_ERR_invalid_bkey;
66f0ac7df2SKent Overstreet 		}
6714b393eeSKent Overstreet 	}
6814b393eeSKent Overstreet 
69f0ac7df2SKent Overstreet 	return 0;
7014b393eeSKent Overstreet }
7114b393eeSKent Overstreet 
72904823deSKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans,
732611a041SKent Overstreet 		       enum btree_id btree, unsigned level,
7414b393eeSKent Overstreet 		       struct bkey_s_c old, struct bkey_s_c new,
75904823deSKent Overstreet 		       unsigned flags)
7614b393eeSKent Overstreet {
77904823deSKent Overstreet 	struct bch_fs *c = trans->c;
7814b393eeSKent Overstreet 	struct snapshot_t *t;
7914b393eeSKent Overstreet 
8014b393eeSKent Overstreet 	t = genradix_ptr_alloc(&c->snapshots,
8114b393eeSKent Overstreet 			       U32_MAX - new.k->p.offset,
8214b393eeSKent Overstreet 			       GFP_KERNEL);
8314b393eeSKent Overstreet 	if (!t)
8465d48e35SKent Overstreet 		return -BCH_ERR_ENOMEM_mark_snapshot;
8514b393eeSKent Overstreet 
8614b393eeSKent Overstreet 	if (new.k->type == KEY_TYPE_snapshot) {
8714b393eeSKent Overstreet 		struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
8814b393eeSKent Overstreet 
8914b393eeSKent Overstreet 		t->parent	= le32_to_cpu(s.v->parent);
9014b393eeSKent Overstreet 		t->children[0]	= le32_to_cpu(s.v->children[0]);
9114b393eeSKent Overstreet 		t->children[1]	= le32_to_cpu(s.v->children[1]);
9214b393eeSKent Overstreet 		t->subvol	= BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
9314b393eeSKent Overstreet 	} else {
9414b393eeSKent Overstreet 		t->parent	= 0;
9514b393eeSKent Overstreet 		t->children[0]	= 0;
9614b393eeSKent Overstreet 		t->children[1]	= 0;
9714b393eeSKent Overstreet 		t->subvol	= 0;
9814b393eeSKent Overstreet 	}
9914b393eeSKent Overstreet 
10014b393eeSKent Overstreet 	return 0;
10114b393eeSKent Overstreet }
10214b393eeSKent Overstreet 
10314b393eeSKent Overstreet static int snapshot_lookup(struct btree_trans *trans, u32 id,
10414b393eeSKent Overstreet 			   struct bch_snapshot *s)
10514b393eeSKent Overstreet {
106bcb79a51SKent Overstreet 	return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
107bcb79a51SKent Overstreet 				       BTREE_ITER_WITH_UPDATES, snapshot, s);
10814b393eeSKent Overstreet }
10914b393eeSKent Overstreet 
11014b393eeSKent Overstreet static int snapshot_live(struct btree_trans *trans, u32 id)
11114b393eeSKent Overstreet {
11214b393eeSKent Overstreet 	struct bch_snapshot v;
11314b393eeSKent Overstreet 	int ret;
11414b393eeSKent Overstreet 
11514b393eeSKent Overstreet 	if (!id)
11614b393eeSKent Overstreet 		return 0;
11714b393eeSKent Overstreet 
118a1783320SKent Overstreet 	ret = snapshot_lookup(trans, id, &v);
11914b393eeSKent Overstreet 	if (ret == -ENOENT)
12014b393eeSKent Overstreet 		bch_err(trans->c, "snapshot node %u not found", id);
12114b393eeSKent Overstreet 	if (ret)
12214b393eeSKent Overstreet 		return ret;
12314b393eeSKent Overstreet 
12414b393eeSKent Overstreet 	return !BCH_SNAPSHOT_DELETED(&v);
12514b393eeSKent Overstreet }
12614b393eeSKent Overstreet 
127a1783320SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
12814b393eeSKent Overstreet {
12914b393eeSKent Overstreet 	struct bch_fs *c = trans->c;
13035f1a503SKent Overstreet 	unsigned i, nr_live = 0, live_idx = 0;
131a1783320SKent Overstreet 	struct bkey_s_c_snapshot snap;
132a1783320SKent Overstreet 	u32 id = k.k->p.offset, child[2];
133a1783320SKent Overstreet 
134a1783320SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
135a1783320SKent Overstreet 		return 0;
136a1783320SKent Overstreet 
137a1783320SKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
138a1783320SKent Overstreet 
139a1783320SKent Overstreet 	child[0] = le32_to_cpu(snap.v->children[0]);
140a1783320SKent Overstreet 	child[1] = le32_to_cpu(snap.v->children[1]);
14114b393eeSKent Overstreet 
14214b393eeSKent Overstreet 	for (i = 0; i < 2; i++) {
14335f1a503SKent Overstreet 		int ret = snapshot_live(trans, child[i]);
144a1019576SKent Overstreet 
14514b393eeSKent Overstreet 		if (ret < 0)
14635f1a503SKent Overstreet 			return ret;
14714b393eeSKent Overstreet 
14814b393eeSKent Overstreet 		if (ret)
14914b393eeSKent Overstreet 			live_idx = i;
15014b393eeSKent Overstreet 		nr_live += ret;
15114b393eeSKent Overstreet 	}
15214b393eeSKent Overstreet 
15314b393eeSKent Overstreet 	snapshot_t(c, id)->equiv = nr_live == 1
15414b393eeSKent Overstreet 		? snapshot_t(c, child[live_idx])->equiv
15514b393eeSKent Overstreet 		: id;
15635f1a503SKent Overstreet 	return 0;
15714b393eeSKent Overstreet }
15835f1a503SKent Overstreet 
15914b393eeSKent Overstreet /* fsck: */
16035f1a503SKent Overstreet static int check_snapshot(struct btree_trans *trans,
161a1783320SKent Overstreet 			  struct btree_iter *iter,
162a1783320SKent Overstreet 			  struct bkey_s_c k)
16314b393eeSKent Overstreet {
16435f1a503SKent Overstreet 	struct bch_fs *c = trans->c;
16535f1a503SKent Overstreet 	struct bkey_s_c_snapshot s;
16614b393eeSKent Overstreet 	struct bch_subvolume subvol;
16714b393eeSKent Overstreet 	struct bch_snapshot v;
16835f1a503SKent Overstreet 	struct printbuf buf = PRINTBUF;
16935f1a503SKent Overstreet 	bool should_have_subvol;
17014b393eeSKent Overstreet 	u32 i, id;
17135f1a503SKent Overstreet 	int ret = 0;
17214b393eeSKent Overstreet 
17335f1a503SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
17435f1a503SKent Overstreet 		return 0;
17514b393eeSKent Overstreet 
17635f1a503SKent Overstreet 	s = bkey_s_c_to_snapshot(k);
17714b393eeSKent Overstreet 	id = le32_to_cpu(s.v->parent);
17814b393eeSKent Overstreet 	if (id) {
179a1783320SKent Overstreet 		ret = snapshot_lookup(trans, id, &v);
18014b393eeSKent Overstreet 		if (ret == -ENOENT)
18135f1a503SKent Overstreet 			bch_err(c, "snapshot with nonexistent parent:\n  %s",
18235f1a503SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
18314b393eeSKent Overstreet 		if (ret)
18435f1a503SKent Overstreet 			goto err;
18514b393eeSKent Overstreet 
18614b393eeSKent Overstreet 		if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
18714b393eeSKent Overstreet 		    le32_to_cpu(v.children[1]) != s.k->p.offset) {
18835f1a503SKent Overstreet 			bch_err(c, "snapshot parent %u missing pointer to child %llu",
18914b393eeSKent Overstreet 				id, s.k->p.offset);
19035f1a503SKent Overstreet 			ret = -EINVAL;
19135f1a503SKent Overstreet 			goto err;
19214b393eeSKent Overstreet 		}
19314b393eeSKent Overstreet 	}
19414b393eeSKent Overstreet 
19514b393eeSKent Overstreet 	for (i = 0; i < 2 && s.v->children[i]; i++) {
19614b393eeSKent Overstreet 		id = le32_to_cpu(s.v->children[i]);
19714b393eeSKent Overstreet 
198a1783320SKent Overstreet 		ret = snapshot_lookup(trans, id, &v);
19914b393eeSKent Overstreet 		if (ret == -ENOENT)
20035f1a503SKent Overstreet 			bch_err(c, "snapshot node %llu has nonexistent child %u",
20114b393eeSKent Overstreet 				s.k->p.offset, id);
20214b393eeSKent Overstreet 		if (ret)
20335f1a503SKent Overstreet 			goto err;
20414b393eeSKent Overstreet 
20514b393eeSKent Overstreet 		if (le32_to_cpu(v.parent) != s.k->p.offset) {
20635f1a503SKent Overstreet 			bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
20714b393eeSKent Overstreet 				id, le32_to_cpu(v.parent), s.k->p.offset);
20835f1a503SKent Overstreet 			ret = -EINVAL;
20935f1a503SKent Overstreet 			goto err;
21014b393eeSKent Overstreet 		}
21114b393eeSKent Overstreet 	}
21214b393eeSKent Overstreet 
21335f1a503SKent Overstreet 	should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
21435f1a503SKent Overstreet 		!BCH_SNAPSHOT_DELETED(s.v);
21535f1a503SKent Overstreet 
21635f1a503SKent Overstreet 	if (should_have_subvol) {
21735f1a503SKent Overstreet 		id = le32_to_cpu(s.v->subvol);
218a1783320SKent Overstreet 		ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
21935f1a503SKent Overstreet 		if (ret == -ENOENT)
22035f1a503SKent Overstreet 			bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
22135f1a503SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
22235f1a503SKent Overstreet 		if (ret)
22335f1a503SKent Overstreet 			goto err;
22435f1a503SKent Overstreet 
22535f1a503SKent Overstreet 		if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
22635f1a503SKent Overstreet 			bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
22735f1a503SKent Overstreet 				s.k->p.offset);
22835f1a503SKent Overstreet 			ret = -EINVAL;
22935f1a503SKent Overstreet 			goto err;
23035f1a503SKent Overstreet 		}
23135f1a503SKent Overstreet 	} else {
23235f1a503SKent Overstreet 		if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n  %s",
23335f1a503SKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
23435f1a503SKent Overstreet 			struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
23535f1a503SKent Overstreet 
23635f1a503SKent Overstreet 			ret = PTR_ERR_OR_ZERO(u);
23735f1a503SKent Overstreet 			if (ret)
23835f1a503SKent Overstreet 				goto err;
23935f1a503SKent Overstreet 
24035f1a503SKent Overstreet 			bkey_reassemble(&u->k_i, s.s_c);
24135f1a503SKent Overstreet 			u->v.subvol = 0;
24235f1a503SKent Overstreet 			ret = bch2_trans_update(trans, iter, &u->k_i, 0);
24335f1a503SKent Overstreet 			if (ret)
24435f1a503SKent Overstreet 				goto err;
24535f1a503SKent Overstreet 		}
24635f1a503SKent Overstreet 	}
24735f1a503SKent Overstreet 
24835f1a503SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(s.v))
24935f1a503SKent Overstreet 		set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
25035f1a503SKent Overstreet err:
25135f1a503SKent Overstreet fsck_err:
25235f1a503SKent Overstreet 	printbuf_exit(&buf);
25335f1a503SKent Overstreet 	return ret;
25414b393eeSKent Overstreet }
25514b393eeSKent Overstreet 
2564ab35c34SKent Overstreet int bch2_fs_check_snapshots(struct bch_fs *c)
25714b393eeSKent Overstreet {
25814b393eeSKent Overstreet 	struct btree_trans trans;
25914b393eeSKent Overstreet 	struct btree_iter iter;
260a1783320SKent Overstreet 	struct bkey_s_c k;
26114b393eeSKent Overstreet 	int ret;
26214b393eeSKent Overstreet 
26314b393eeSKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
26414b393eeSKent Overstreet 
265c59d66b5SKent Overstreet 	ret = for_each_btree_key_commit(&trans, iter,
266c59d66b5SKent Overstreet 			BTREE_ID_snapshots, POS_MIN,
267a1783320SKent Overstreet 			BTREE_ITER_PREFETCH, k,
268a1783320SKent Overstreet 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
269a1783320SKent Overstreet 		check_snapshot(&trans, &iter, k));
27014b393eeSKent Overstreet 
27135f1a503SKent Overstreet 	if (ret)
27214b393eeSKent Overstreet 		bch_err(c, "error %i checking snapshots", ret);
27314b393eeSKent Overstreet 
27414b393eeSKent Overstreet 	bch2_trans_exit(&trans);
27514b393eeSKent Overstreet 	return ret;
27614b393eeSKent Overstreet }
27714b393eeSKent Overstreet 
2784ab35c34SKent Overstreet static int check_subvol(struct btree_trans *trans,
2796738dd19SKent Overstreet 			struct btree_iter *iter,
2806738dd19SKent Overstreet 			struct bkey_s_c k)
2814ab35c34SKent Overstreet {
2824ab35c34SKent Overstreet 	struct bkey_s_c_subvolume subvol;
28335f1a503SKent Overstreet 	struct bch_snapshot snapshot;
28435f1a503SKent Overstreet 	unsigned snapid;
2854ab35c34SKent Overstreet 	int ret;
2864ab35c34SKent Overstreet 
2874ab35c34SKent Overstreet 	if (k.k->type != KEY_TYPE_subvolume)
2884ab35c34SKent Overstreet 		return 0;
2894ab35c34SKent Overstreet 
2904ab35c34SKent Overstreet 	subvol = bkey_s_c_to_subvolume(k);
29135f1a503SKent Overstreet 	snapid = le32_to_cpu(subvol.v->snapshot);
29235f1a503SKent Overstreet 	ret = snapshot_lookup(trans, snapid, &snapshot);
29335f1a503SKent Overstreet 
29435f1a503SKent Overstreet 	if (ret == -ENOENT)
29535f1a503SKent Overstreet 		bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
29635f1a503SKent Overstreet 			k.k->p.offset, snapid);
29735f1a503SKent Overstreet 	if (ret)
29835f1a503SKent Overstreet 		return ret;
2994ab35c34SKent Overstreet 
3004ab35c34SKent Overstreet 	if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
3014ab35c34SKent Overstreet 		ret = bch2_subvolume_delete(trans, iter->pos.offset);
302549d173cSKent Overstreet 		if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
303d4bf5eecSKent Overstreet 			bch_err(trans->c, "error deleting subvolume %llu: %s",
304d4bf5eecSKent Overstreet 				iter->pos.offset, bch2_err_str(ret));
3054ab35c34SKent Overstreet 		if (ret)
3064ab35c34SKent Overstreet 			return ret;
3074ab35c34SKent Overstreet 	}
3084ab35c34SKent Overstreet 
3094ab35c34SKent Overstreet 	return 0;
3104ab35c34SKent Overstreet }
3114ab35c34SKent Overstreet 
3124ab35c34SKent Overstreet int bch2_fs_check_subvols(struct bch_fs *c)
3134ab35c34SKent Overstreet {
3144ab35c34SKent Overstreet 	struct btree_trans trans;
3154ab35c34SKent Overstreet 	struct btree_iter iter;
3166738dd19SKent Overstreet 	struct bkey_s_c k;
3174ab35c34SKent Overstreet 	int ret;
3184ab35c34SKent Overstreet 
31935f1a503SKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
3204ab35c34SKent Overstreet 
3216738dd19SKent Overstreet 	ret = for_each_btree_key_commit(&trans, iter,
3226738dd19SKent Overstreet 			BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
3236738dd19SKent Overstreet 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
3246738dd19SKent Overstreet 		check_subvol(&trans, &iter, k));
3254ab35c34SKent Overstreet 
3264ab35c34SKent Overstreet 	bch2_trans_exit(&trans);
3274ab35c34SKent Overstreet 
3284ab35c34SKent Overstreet 	return ret;
3294ab35c34SKent Overstreet }
3304ab35c34SKent Overstreet 
33114b393eeSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c)
33214b393eeSKent Overstreet {
33314b393eeSKent Overstreet 	genradix_free(&c->snapshots);
33414b393eeSKent Overstreet }
33514b393eeSKent Overstreet 
33614b393eeSKent Overstreet int bch2_fs_snapshots_start(struct bch_fs *c)
33714b393eeSKent Overstreet {
33814b393eeSKent Overstreet 	struct btree_trans trans;
33914b393eeSKent Overstreet 	struct btree_iter iter;
34014b393eeSKent Overstreet 	struct bkey_s_c k;
34114b393eeSKent Overstreet 	int ret = 0;
34214b393eeSKent Overstreet 
34314b393eeSKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
34414b393eeSKent Overstreet 
345a1783320SKent Overstreet 	for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
346a1783320SKent Overstreet 			   POS_MIN, 0, k,
3472611a041SKent Overstreet 		bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
348a1783320SKent Overstreet 		bch2_snapshot_set_equiv(&trans, k));
34914b393eeSKent Overstreet 
35014b393eeSKent Overstreet 	bch2_trans_exit(&trans);
35135f1a503SKent Overstreet 
35235f1a503SKent Overstreet 	if (ret)
353d4bf5eecSKent Overstreet 		bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
35414b393eeSKent Overstreet 	return ret;
35514b393eeSKent Overstreet }
35614b393eeSKent Overstreet 
35714b393eeSKent Overstreet /*
35814b393eeSKent Overstreet  * Mark a snapshot as deleted, for future cleanup:
35914b393eeSKent Overstreet  */
36014b393eeSKent Overstreet static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
36114b393eeSKent Overstreet {
36214b393eeSKent Overstreet 	struct btree_iter iter;
36314b393eeSKent Overstreet 	struct bkey_i_snapshot *s;
36414b393eeSKent Overstreet 	int ret = 0;
36514b393eeSKent Overstreet 
36634dfa5dbSKent Overstreet 	s = bch2_bkey_get_mut_typed(trans, &iter,
36734dfa5dbSKent Overstreet 				    BTREE_ID_snapshots, POS(0, id),
36834dfa5dbSKent Overstreet 				    0, snapshot);
369994ba475SKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
370994ba475SKent Overstreet 	if (unlikely(ret)) {
371994ba475SKent Overstreet 		bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing snapshot %u", id);
372*f12a798aSKent Overstreet 		return ret;
37314b393eeSKent Overstreet 	}
37414b393eeSKent Overstreet 
37514b393eeSKent Overstreet 	/* already deleted? */
376994ba475SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(&s->v))
37714b393eeSKent Overstreet 		goto err;
37814b393eeSKent Overstreet 
37914b393eeSKent Overstreet 	SET_BCH_SNAPSHOT_DELETED(&s->v, true);
380416cc426SKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
381416cc426SKent Overstreet 	s->v.subvol = 0;
38214b393eeSKent Overstreet err:
38314b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
38414b393eeSKent Overstreet 	return ret;
38514b393eeSKent Overstreet }
38614b393eeSKent Overstreet 
38714b393eeSKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
38814b393eeSKent Overstreet {
389bcb79a51SKent Overstreet 	struct bch_fs *c = trans->c;
39014b393eeSKent Overstreet 	struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
39114b393eeSKent Overstreet 	struct bkey_s_c_snapshot s;
39214b393eeSKent Overstreet 	u32 parent_id;
39314b393eeSKent Overstreet 	unsigned i;
39414b393eeSKent Overstreet 	int ret = 0;
39514b393eeSKent Overstreet 
396bcb79a51SKent Overstreet 	s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
397bcb79a51SKent Overstreet 				     BTREE_ITER_INTENT, snapshot);
398bcb79a51SKent Overstreet 	ret = bkey_err(s);
399bcb79a51SKent Overstreet 	bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", id);
400bcb79a51SKent Overstreet 
40114b393eeSKent Overstreet 	if (ret)
40214b393eeSKent Overstreet 		goto err;
40314b393eeSKent Overstreet 
40414b393eeSKent Overstreet 	BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
40514b393eeSKent Overstreet 	parent_id = le32_to_cpu(s.v->parent);
40614b393eeSKent Overstreet 
40714b393eeSKent Overstreet 	if (parent_id) {
408994ba475SKent Overstreet 		struct bkey_i_snapshot *parent;
409994ba475SKent Overstreet 
41034dfa5dbSKent Overstreet 		parent = bch2_bkey_get_mut_typed(trans, &p_iter,
41134dfa5dbSKent Overstreet 				     BTREE_ID_snapshots, POS(0, parent_id),
41234dfa5dbSKent Overstreet 				     0, snapshot);
413994ba475SKent Overstreet 		ret = PTR_ERR_OR_ZERO(parent);
414994ba475SKent Overstreet 		if (unlikely(ret)) {
415bcb79a51SKent Overstreet 			bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", parent_id);
41614b393eeSKent Overstreet 			goto err;
41714b393eeSKent Overstreet 		}
41814b393eeSKent Overstreet 
41914b393eeSKent Overstreet 		for (i = 0; i < 2; i++)
42014b393eeSKent Overstreet 			if (le32_to_cpu(parent->v.children[i]) == id)
42114b393eeSKent Overstreet 				break;
42214b393eeSKent Overstreet 
42314b393eeSKent Overstreet 		if (i == 2)
424bcb79a51SKent Overstreet 			bch_err(c, "snapshot %u missing child pointer to %u",
42514b393eeSKent Overstreet 				parent_id, id);
42614b393eeSKent Overstreet 		else
42714b393eeSKent Overstreet 			parent->v.children[i] = 0;
42814b393eeSKent Overstreet 
42914b393eeSKent Overstreet 		if (le32_to_cpu(parent->v.children[0]) <
43014b393eeSKent Overstreet 		    le32_to_cpu(parent->v.children[1]))
43114b393eeSKent Overstreet 			swap(parent->v.children[0],
43214b393eeSKent Overstreet 			     parent->v.children[1]);
43314b393eeSKent Overstreet 	}
43414b393eeSKent Overstreet 
43514b393eeSKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
43614b393eeSKent Overstreet err:
43714b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &p_iter);
43814b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
43914b393eeSKent Overstreet 	return ret;
44014b393eeSKent Overstreet }
44114b393eeSKent Overstreet 
4427f6ff935SKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
44314b393eeSKent Overstreet 			      u32 *new_snapids,
44414b393eeSKent Overstreet 			      u32 *snapshot_subvols,
44514b393eeSKent Overstreet 			      unsigned nr_snapids)
44614b393eeSKent Overstreet {
44734dfa5dbSKent Overstreet 	struct btree_iter iter, parent_iter = { NULL };
44814b393eeSKent Overstreet 	struct bkey_i_snapshot *n;
44914b393eeSKent Overstreet 	struct bkey_s_c k;
45014b393eeSKent Overstreet 	unsigned i;
45114b393eeSKent Overstreet 	int ret = 0;
45214b393eeSKent Overstreet 
45314b393eeSKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
45414b393eeSKent Overstreet 			     POS_MIN, BTREE_ITER_INTENT);
45514b393eeSKent Overstreet 	k = bch2_btree_iter_peek(&iter);
45614b393eeSKent Overstreet 	ret = bkey_err(k);
45714b393eeSKent Overstreet 	if (ret)
45814b393eeSKent Overstreet 		goto err;
45914b393eeSKent Overstreet 
46014b393eeSKent Overstreet 	for (i = 0; i < nr_snapids; i++) {
46114b393eeSKent Overstreet 		k = bch2_btree_iter_prev_slot(&iter);
46214b393eeSKent Overstreet 		ret = bkey_err(k);
46314b393eeSKent Overstreet 		if (ret)
46414b393eeSKent Overstreet 			goto err;
46514b393eeSKent Overstreet 
46614b393eeSKent Overstreet 		if (!k.k || !k.k->p.offset) {
467098ef98dSKent Overstreet 			ret = -BCH_ERR_ENOSPC_snapshot_create;
46814b393eeSKent Overstreet 			goto err;
46914b393eeSKent Overstreet 		}
47014b393eeSKent Overstreet 
471f8cb35fdSKent Overstreet 		n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
47214b393eeSKent Overstreet 		ret = PTR_ERR_OR_ZERO(n);
47314b393eeSKent Overstreet 		if (ret)
47494a3e1a6SKent Overstreet 			goto err;
47514b393eeSKent Overstreet 
47614b393eeSKent Overstreet 		n->v.flags	= 0;
47714b393eeSKent Overstreet 		n->v.parent	= cpu_to_le32(parent);
47814b393eeSKent Overstreet 		n->v.subvol	= cpu_to_le32(snapshot_subvols[i]);
47914b393eeSKent Overstreet 		n->v.pad	= 0;
48014b393eeSKent Overstreet 		SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
48114b393eeSKent Overstreet 
482f8cb35fdSKent Overstreet 		ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
483ae1f5623SKent Overstreet 					 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
48414b393eeSKent Overstreet 		if (ret)
48594a3e1a6SKent Overstreet 			goto err;
48614b393eeSKent Overstreet 
48714b393eeSKent Overstreet 		new_snapids[i]	= iter.pos.offset;
48814b393eeSKent Overstreet 	}
48914b393eeSKent Overstreet 
49014b393eeSKent Overstreet 	if (parent) {
49134dfa5dbSKent Overstreet 		n = bch2_bkey_get_mut_typed(trans, &parent_iter,
49234dfa5dbSKent Overstreet 				BTREE_ID_snapshots, POS(0, parent),
49334dfa5dbSKent Overstreet 				0, snapshot);
494994ba475SKent Overstreet 		ret = PTR_ERR_OR_ZERO(n);
495994ba475SKent Overstreet 		if (unlikely(ret)) {
496994ba475SKent Overstreet 			if (ret == -ENOENT)
49714b393eeSKent Overstreet 				bch_err(trans->c, "snapshot %u not found", parent);
49814b393eeSKent Overstreet 			goto err;
49914b393eeSKent Overstreet 		}
50014b393eeSKent Overstreet 
50114b393eeSKent Overstreet 		if (n->v.children[0] || n->v.children[1]) {
50214b393eeSKent Overstreet 			bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
50314b393eeSKent Overstreet 			ret = -EINVAL;
50414b393eeSKent Overstreet 			goto err;
50514b393eeSKent Overstreet 		}
50614b393eeSKent Overstreet 
50714b393eeSKent Overstreet 		n->v.children[0] = cpu_to_le32(new_snapids[0]);
50814b393eeSKent Overstreet 		n->v.children[1] = cpu_to_le32(new_snapids[1]);
50935f1a503SKent Overstreet 		n->v.subvol = 0;
51014b393eeSKent Overstreet 		SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
51134dfa5dbSKent Overstreet 		ret   = bch2_trans_update(trans, &parent_iter, &n->k_i, 0);
51294a3e1a6SKent Overstreet 		if (ret)
51394a3e1a6SKent Overstreet 			goto err;
51414b393eeSKent Overstreet 	}
51514b393eeSKent Overstreet err:
51634dfa5dbSKent Overstreet 	bch2_trans_iter_exit(trans, &parent_iter);
51714b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
51814b393eeSKent Overstreet 	return ret;
51914b393eeSKent Overstreet }
52014b393eeSKent Overstreet 
5216738dd19SKent Overstreet static int snapshot_delete_key(struct btree_trans *trans,
5226738dd19SKent Overstreet 			       struct btree_iter *iter,
5236738dd19SKent Overstreet 			       struct bkey_s_c k,
52491d961baSKent Overstreet 			       snapshot_id_list *deleted,
5256738dd19SKent Overstreet 			       snapshot_id_list *equiv_seen,
5266738dd19SKent Overstreet 			       struct bpos *last_pos)
52714b393eeSKent Overstreet {
52814b393eeSKent Overstreet 	struct bch_fs *c = trans->c;
52914b393eeSKent Overstreet 	u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
53014b393eeSKent Overstreet 
531e88a75ebSKent Overstreet 	if (!bkey_eq(k.k->p, *last_pos))
5326738dd19SKent Overstreet 		equiv_seen->nr = 0;
5336738dd19SKent Overstreet 	*last_pos = k.k->p;
53414b393eeSKent Overstreet 
53514b393eeSKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
5366738dd19SKent Overstreet 	    snapshot_list_has_id(equiv_seen, equiv)) {
5376738dd19SKent Overstreet 		return bch2_btree_delete_at(trans, iter,
5386738dd19SKent Overstreet 					    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
53914b393eeSKent Overstreet 	} else {
5406738dd19SKent Overstreet 		return snapshot_list_add(c, equiv_seen, equiv);
54114b393eeSKent Overstreet 	}
54214b393eeSKent Overstreet }
54314b393eeSKent Overstreet 
544a1783320SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
545a1783320SKent Overstreet 					  struct bkey_s_c k)
546a1783320SKent Overstreet {
547a1783320SKent Overstreet 	struct bkey_s_c_snapshot snap;
548a1783320SKent Overstreet 	u32 children[2];
549a1783320SKent Overstreet 	int ret;
550a1783320SKent Overstreet 
551a1783320SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
552a1783320SKent Overstreet 		return 0;
553a1783320SKent Overstreet 
554a1783320SKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
555a1783320SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
556a1783320SKent Overstreet 	    BCH_SNAPSHOT_SUBVOL(snap.v))
557a1783320SKent Overstreet 		return 0;
558a1783320SKent Overstreet 
559a1783320SKent Overstreet 	children[0] = le32_to_cpu(snap.v->children[0]);
560a1783320SKent Overstreet 	children[1] = le32_to_cpu(snap.v->children[1]);
561a1783320SKent Overstreet 
562a1783320SKent Overstreet 	ret   = snapshot_live(trans, children[0]) ?:
563a1783320SKent Overstreet 		snapshot_live(trans, children[1]);
564a1783320SKent Overstreet 	if (ret < 0)
565a1783320SKent Overstreet 		return ret;
566a1783320SKent Overstreet 
567a1783320SKent Overstreet 	if (!ret)
568a1783320SKent Overstreet 		return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
569a1783320SKent Overstreet 	return 0;
570a1783320SKent Overstreet }
571a1783320SKent Overstreet 
5724ab35c34SKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c)
57314b393eeSKent Overstreet {
57414b393eeSKent Overstreet 	struct btree_trans trans;
57514b393eeSKent Overstreet 	struct btree_iter iter;
57614b393eeSKent Overstreet 	struct bkey_s_c k;
57714b393eeSKent Overstreet 	struct bkey_s_c_snapshot snap;
57891d961baSKent Overstreet 	snapshot_id_list deleted = { 0 };
579a1783320SKent Overstreet 	u32 i, id;
58014b393eeSKent Overstreet 	int ret = 0;
58114b393eeSKent Overstreet 
5824ab35c34SKent Overstreet 	if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
5834ab35c34SKent Overstreet 		return 0;
5844ab35c34SKent Overstreet 
5854ab35c34SKent Overstreet 	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
5864ab35c34SKent Overstreet 		ret = bch2_fs_read_write_early(c);
5874ab35c34SKent Overstreet 		if (ret) {
588d4bf5eecSKent Overstreet 			bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
5894ab35c34SKent Overstreet 			return ret;
5904ab35c34SKent Overstreet 		}
5914ab35c34SKent Overstreet 	}
5924ab35c34SKent Overstreet 
59314b393eeSKent Overstreet 	bch2_trans_init(&trans, c, 0, 0);
59414b393eeSKent Overstreet 
59514b393eeSKent Overstreet 	/*
59614b393eeSKent Overstreet 	 * For every snapshot node: If we have no live children and it's not
59714b393eeSKent Overstreet 	 * pointed to by a subvolume, delete it:
59814b393eeSKent Overstreet 	 */
599a1783320SKent Overstreet 	ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
600a1783320SKent Overstreet 			POS_MIN, 0, k,
601a1783320SKent Overstreet 			NULL, NULL, 0,
602a1783320SKent Overstreet 		bch2_delete_redundant_snapshot(&trans, &iter, k));
60314b393eeSKent Overstreet 	if (ret) {
604d4bf5eecSKent Overstreet 		bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
60514b393eeSKent Overstreet 		goto err;
60614b393eeSKent Overstreet 	}
60714b393eeSKent Overstreet 
608a1783320SKent Overstreet 	for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
609a1783320SKent Overstreet 			   POS_MIN, 0, k,
610a1783320SKent Overstreet 		bch2_snapshot_set_equiv(&trans, k));
611a1783320SKent Overstreet 	if (ret) {
612d4bf5eecSKent Overstreet 		bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
61314b393eeSKent Overstreet 		goto err;
614a1783320SKent Overstreet 	}
61514b393eeSKent Overstreet 
61614b393eeSKent Overstreet 	for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
61714b393eeSKent Overstreet 			   POS_MIN, 0, k, ret) {
61814b393eeSKent Overstreet 		if (k.k->type != KEY_TYPE_snapshot)
61914b393eeSKent Overstreet 			continue;
62014b393eeSKent Overstreet 
62114b393eeSKent Overstreet 		snap = bkey_s_c_to_snapshot(k);
62214b393eeSKent Overstreet 		if (BCH_SNAPSHOT_DELETED(snap.v)) {
623597dee1cSKent Overstreet 			ret = snapshot_list_add(c, &deleted, k.k->p.offset);
62414b393eeSKent Overstreet 			if (ret)
62514b393eeSKent Overstreet 				break;
62614b393eeSKent Overstreet 		}
62714b393eeSKent Overstreet 	}
62814b393eeSKent Overstreet 	bch2_trans_iter_exit(&trans, &iter);
62914b393eeSKent Overstreet 
63014b393eeSKent Overstreet 	if (ret) {
631d4bf5eecSKent Overstreet 		bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
63214b393eeSKent Overstreet 		goto err;
63314b393eeSKent Overstreet 	}
63414b393eeSKent Overstreet 
63514b393eeSKent Overstreet 	for (id = 0; id < BTREE_ID_NR; id++) {
6366738dd19SKent Overstreet 		struct bpos last_pos = POS_MIN;
6376738dd19SKent Overstreet 		snapshot_id_list equiv_seen = { 0 };
6386738dd19SKent Overstreet 
63914b393eeSKent Overstreet 		if (!btree_type_has_snapshots(id))
64014b393eeSKent Overstreet 			continue;
64114b393eeSKent Overstreet 
6426738dd19SKent Overstreet 		ret = for_each_btree_key_commit(&trans, iter,
6436738dd19SKent Overstreet 				id, POS_MIN,
6446738dd19SKent Overstreet 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
6456738dd19SKent Overstreet 				NULL, NULL, BTREE_INSERT_NOFAIL,
6466738dd19SKent Overstreet 			snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
6476738dd19SKent Overstreet 
6486738dd19SKent Overstreet 		darray_exit(&equiv_seen);
6496738dd19SKent Overstreet 
65014b393eeSKent Overstreet 		if (ret) {
651d4bf5eecSKent Overstreet 			bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
65214b393eeSKent Overstreet 			goto err;
65314b393eeSKent Overstreet 		}
65414b393eeSKent Overstreet 	}
65514b393eeSKent Overstreet 
65614b393eeSKent Overstreet 	for (i = 0; i < deleted.nr; i++) {
657e68914caSKent Overstreet 		ret = commit_do(&trans, NULL, NULL, 0,
65891d961baSKent Overstreet 			bch2_snapshot_node_delete(&trans, deleted.data[i]));
65914b393eeSKent Overstreet 		if (ret) {
660d4bf5eecSKent Overstreet 			bch_err(c, "error deleting snapshot %u: %s",
661d4bf5eecSKent Overstreet 				deleted.data[i], bch2_err_str(ret));
66214b393eeSKent Overstreet 			goto err;
66314b393eeSKent Overstreet 		}
66414b393eeSKent Overstreet 	}
6654ab35c34SKent Overstreet 
6664ab35c34SKent Overstreet 	clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
66714b393eeSKent Overstreet err:
66891d961baSKent Overstreet 	darray_exit(&deleted);
66914b393eeSKent Overstreet 	bch2_trans_exit(&trans);
6704ab35c34SKent Overstreet 	return ret;
6714ab35c34SKent Overstreet }
6724ab35c34SKent Overstreet 
6734ab35c34SKent Overstreet static void bch2_delete_dead_snapshots_work(struct work_struct *work)
6744ab35c34SKent Overstreet {
6754ab35c34SKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
6764ab35c34SKent Overstreet 
6774ab35c34SKent Overstreet 	bch2_delete_dead_snapshots(c);
678d94189adSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
67914b393eeSKent Overstreet }
68014b393eeSKent Overstreet 
6814ab35c34SKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c)
68214b393eeSKent Overstreet {
683d94189adSKent Overstreet 	if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
6848bff9875SBrian Foster 	    !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
685d94189adSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
68614b393eeSKent Overstreet }
68714b393eeSKent Overstreet 
68814b393eeSKent Overstreet static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
68914b393eeSKent Overstreet 					   struct btree_trans_commit_hook *h)
69014b393eeSKent Overstreet {
6914ab35c34SKent Overstreet 	struct bch_fs *c = trans->c;
6924ab35c34SKent Overstreet 
6934ab35c34SKent Overstreet 	set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
6944ab35c34SKent Overstreet 
6954ab35c34SKent Overstreet 	if (!test_bit(BCH_FS_FSCK_DONE, &c->flags))
6964ab35c34SKent Overstreet 		return 0;
6974ab35c34SKent Overstreet 
6984ab35c34SKent Overstreet 	bch2_delete_dead_snapshots_async(c);
69914b393eeSKent Overstreet 	return 0;
70014b393eeSKent Overstreet }
70114b393eeSKent Overstreet 
70214b393eeSKent Overstreet /* Subvolumes: */
70314b393eeSKent Overstreet 
704f0ac7df2SKent Overstreet int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
705facafdcbSKent Overstreet 			   unsigned flags, struct printbuf *err)
70614b393eeSKent Overstreet {
707e88a75ebSKent Overstreet 	if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
708e88a75ebSKent Overstreet 	    bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
709401ec4dbSKent Overstreet 		prt_printf(err, "invalid pos");
71078c0b75cSKent Overstreet 		return -BCH_ERR_invalid_bkey;
711f0ac7df2SKent Overstreet 	}
71214b393eeSKent Overstreet 
713f0ac7df2SKent Overstreet 	return 0;
71414b393eeSKent Overstreet }
71514b393eeSKent Overstreet 
71614b393eeSKent Overstreet void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
71714b393eeSKent Overstreet 			    struct bkey_s_c k)
71814b393eeSKent Overstreet {
71914b393eeSKent Overstreet 	struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
72014b393eeSKent Overstreet 
721401ec4dbSKent Overstreet 	prt_printf(out, "root %llu snapshot id %u",
72214b393eeSKent Overstreet 	       le64_to_cpu(s.v->inode),
72314b393eeSKent Overstreet 	       le32_to_cpu(s.v->snapshot));
72414b393eeSKent Overstreet }
72514b393eeSKent Overstreet 
72698638ffaSKent Overstreet static __always_inline int
72798638ffaSKent Overstreet bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
72897996ddfSKent Overstreet 			   bool inconsistent_if_not_found,
72997996ddfSKent Overstreet 			   int iter_flags,
73097996ddfSKent Overstreet 			   struct bch_subvolume *s)
73114b393eeSKent Overstreet {
732bcb79a51SKent Overstreet 	int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
733bcb79a51SKent Overstreet 					  iter_flags, subvolume, s);
734bcb79a51SKent Overstreet 	bch2_fs_inconsistent_on(ret == -ENOENT && inconsistent_if_not_found,
735bcb79a51SKent Overstreet 				trans->c, "missing subvolume %u", subvol);
73697996ddfSKent Overstreet 	return ret;
73714b393eeSKent Overstreet }
73814b393eeSKent Overstreet 
73998638ffaSKent Overstreet int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
74098638ffaSKent Overstreet 		       bool inconsistent_if_not_found,
74198638ffaSKent Overstreet 		       int iter_flags,
74298638ffaSKent Overstreet 		       struct bch_subvolume *s)
74398638ffaSKent Overstreet {
74498638ffaSKent Overstreet 	return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
74598638ffaSKent Overstreet }
74698638ffaSKent Overstreet 
7479ca4853bSKent Overstreet int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
7489ca4853bSKent Overstreet 			     struct bch_subvolume *subvol)
7499ca4853bSKent Overstreet {
7509ca4853bSKent Overstreet 	struct bch_snapshot snap;
7519ca4853bSKent Overstreet 
7529ca4853bSKent Overstreet 	return  snapshot_lookup(trans, snapshot, &snap) ?:
7539ca4853bSKent Overstreet 		bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
7549ca4853bSKent Overstreet }
7559ca4853bSKent Overstreet 
75697996ddfSKent Overstreet int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
75797996ddfSKent Overstreet 				u32 *snapid)
75897996ddfSKent Overstreet {
75997996ddfSKent Overstreet 	struct bch_subvolume s;
76097996ddfSKent Overstreet 	int ret;
76197996ddfSKent Overstreet 
76298638ffaSKent Overstreet 	ret = bch2_subvolume_get_inlined(trans, subvol, true,
76397996ddfSKent Overstreet 					 BTREE_ITER_CACHED|
76497996ddfSKent Overstreet 					 BTREE_ITER_WITH_UPDATES,
76597996ddfSKent Overstreet 					 &s);
76698638ffaSKent Overstreet 	if (!ret)
76797996ddfSKent Overstreet 		*snapid = le32_to_cpu(s.snapshot);
76814b393eeSKent Overstreet 	return ret;
76914b393eeSKent Overstreet }
77014b393eeSKent Overstreet 
7712027875bSKent Overstreet /*
7722027875bSKent Overstreet  * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
7732027875bSKent Overstreet  * deletion/cleanup:
7742027875bSKent Overstreet  */
7752027875bSKent Overstreet int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
77614b393eeSKent Overstreet {
77714b393eeSKent Overstreet 	struct btree_iter iter;
77814b393eeSKent Overstreet 	struct bkey_s_c_subvolume subvol;
77914b393eeSKent Overstreet 	struct btree_trans_commit_hook *h;
78014b393eeSKent Overstreet 	u32 snapid;
78114b393eeSKent Overstreet 	int ret = 0;
78214b393eeSKent Overstreet 
783bcb79a51SKent Overstreet 	subvol = bch2_bkey_get_iter_typed(trans, &iter,
784bcb79a51SKent Overstreet 				BTREE_ID_subvolumes, POS(0, subvolid),
785bcb79a51SKent Overstreet 				BTREE_ITER_CACHED|BTREE_ITER_INTENT,
786bcb79a51SKent Overstreet 				subvolume);
787bcb79a51SKent Overstreet 	ret = bkey_err(subvol);
788bcb79a51SKent Overstreet 	bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
78914b393eeSKent Overstreet 	if (ret)
790bcb79a51SKent Overstreet 		return ret;
79114b393eeSKent Overstreet 
79214b393eeSKent Overstreet 	snapid = le32_to_cpu(subvol.v->snapshot);
79314b393eeSKent Overstreet 
794416cc426SKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
79514b393eeSKent Overstreet 	if (ret)
79614b393eeSKent Overstreet 		goto err;
79714b393eeSKent Overstreet 
79814b393eeSKent Overstreet 	ret = bch2_snapshot_node_set_deleted(trans, snapid);
79931301dd4SKent Overstreet 	if (ret)
80031301dd4SKent Overstreet 		goto err;
80114b393eeSKent Overstreet 
80214b393eeSKent Overstreet 	h = bch2_trans_kmalloc(trans, sizeof(*h));
80314b393eeSKent Overstreet 	ret = PTR_ERR_OR_ZERO(h);
80414b393eeSKent Overstreet 	if (ret)
80514b393eeSKent Overstreet 		goto err;
80614b393eeSKent Overstreet 
80714b393eeSKent Overstreet 	h->fn = bch2_delete_dead_snapshots_hook;
80814b393eeSKent Overstreet 	bch2_trans_commit_hook(trans, h);
80914b393eeSKent Overstreet err:
81014b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
81114b393eeSKent Overstreet 	return ret;
81214b393eeSKent Overstreet }
81314b393eeSKent Overstreet 
8142027875bSKent Overstreet void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
8152027875bSKent Overstreet {
8162027875bSKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs,
8172027875bSKent Overstreet 				snapshot_wait_for_pagecache_and_delete_work);
81891d961baSKent Overstreet 	snapshot_id_list s;
8192027875bSKent Overstreet 	u32 *id;
8202027875bSKent Overstreet 	int ret = 0;
8212027875bSKent Overstreet 
8222027875bSKent Overstreet 	while (!ret) {
8232027875bSKent Overstreet 		mutex_lock(&c->snapshots_unlinked_lock);
8242027875bSKent Overstreet 		s = c->snapshots_unlinked;
82591d961baSKent Overstreet 		darray_init(&c->snapshots_unlinked);
8262027875bSKent Overstreet 		mutex_unlock(&c->snapshots_unlinked_lock);
8272027875bSKent Overstreet 
8282027875bSKent Overstreet 		if (!s.nr)
8292027875bSKent Overstreet 			break;
8302027875bSKent Overstreet 
8312027875bSKent Overstreet 		bch2_evict_subvolume_inodes(c, &s);
8322027875bSKent Overstreet 
83391d961baSKent Overstreet 		for (id = s.data; id < s.data + s.nr; id++) {
8342027875bSKent Overstreet 			ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
8352027875bSKent Overstreet 				      bch2_subvolume_delete(&trans, *id));
8362027875bSKent Overstreet 			if (ret) {
837d4bf5eecSKent Overstreet 				bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
8382027875bSKent Overstreet 				break;
8392027875bSKent Overstreet 			}
8402027875bSKent Overstreet 		}
8412027875bSKent Overstreet 
84291d961baSKent Overstreet 		darray_exit(&s);
8432027875bSKent Overstreet 	}
8442027875bSKent Overstreet 
845d94189adSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
8462027875bSKent Overstreet }
8472027875bSKent Overstreet 
8482027875bSKent Overstreet struct subvolume_unlink_hook {
8492027875bSKent Overstreet 	struct btree_trans_commit_hook	h;
8502027875bSKent Overstreet 	u32				subvol;
8512027875bSKent Overstreet };
8522027875bSKent Overstreet 
8532027875bSKent Overstreet int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
8542027875bSKent Overstreet 						      struct btree_trans_commit_hook *_h)
8552027875bSKent Overstreet {
8562027875bSKent Overstreet 	struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
8572027875bSKent Overstreet 	struct bch_fs *c = trans->c;
8582027875bSKent Overstreet 	int ret = 0;
8592027875bSKent Overstreet 
8602027875bSKent Overstreet 	mutex_lock(&c->snapshots_unlinked_lock);
8612027875bSKent Overstreet 	if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
862597dee1cSKent Overstreet 		ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
8632027875bSKent Overstreet 	mutex_unlock(&c->snapshots_unlinked_lock);
8642027875bSKent Overstreet 
8652027875bSKent Overstreet 	if (ret)
8662027875bSKent Overstreet 		return ret;
8672027875bSKent Overstreet 
868d94189adSKent Overstreet 	if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
8692027875bSKent Overstreet 		return -EROFS;
8702027875bSKent Overstreet 
8718bff9875SBrian Foster 	if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
872d94189adSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
8732027875bSKent Overstreet 	return 0;
8742027875bSKent Overstreet }
8752027875bSKent Overstreet 
8762027875bSKent Overstreet int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
8772027875bSKent Overstreet {
8782027875bSKent Overstreet 	struct btree_iter iter;
8792027875bSKent Overstreet 	struct bkey_i_subvolume *n;
8802027875bSKent Overstreet 	struct subvolume_unlink_hook *h;
8812027875bSKent Overstreet 	int ret = 0;
8822027875bSKent Overstreet 
883*f12a798aSKent Overstreet 	h = bch2_trans_kmalloc(trans, sizeof(*h));
884*f12a798aSKent Overstreet 	ret = PTR_ERR_OR_ZERO(h);
885*f12a798aSKent Overstreet 	if (ret)
886*f12a798aSKent Overstreet 		return ret;
887*f12a798aSKent Overstreet 
888*f12a798aSKent Overstreet 	h->h.fn		= bch2_subvolume_wait_for_pagecache_and_delete_hook;
889*f12a798aSKent Overstreet 	h->subvol	= subvolid;
890*f12a798aSKent Overstreet 	bch2_trans_commit_hook(trans, &h->h);
891*f12a798aSKent Overstreet 
89234dfa5dbSKent Overstreet 	n = bch2_bkey_get_mut_typed(trans, &iter,
89334dfa5dbSKent Overstreet 			BTREE_ID_subvolumes, POS(0, subvolid),
89434dfa5dbSKent Overstreet 			BTREE_ITER_CACHED, subvolume);
895994ba475SKent Overstreet 	ret = PTR_ERR_OR_ZERO(n);
896994ba475SKent Overstreet 	if (unlikely(ret)) {
897994ba475SKent Overstreet 		bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
898*f12a798aSKent Overstreet 		return ret;
8992027875bSKent Overstreet 	}
9002027875bSKent Overstreet 
9012027875bSKent Overstreet 	SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
9022027875bSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
9032027875bSKent Overstreet 	return ret;
9042027875bSKent Overstreet }
9052027875bSKent Overstreet 
90614b393eeSKent Overstreet int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
90714b393eeSKent Overstreet 			  u32 src_subvolid,
90814b393eeSKent Overstreet 			  u32 *new_subvolid,
90914b393eeSKent Overstreet 			  u32 *new_snapshotid,
91014b393eeSKent Overstreet 			  bool ro)
91114b393eeSKent Overstreet {
912ca130b9cSKent Overstreet 	struct bch_fs *c = trans->c;
91314b393eeSKent Overstreet 	struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
91414b393eeSKent Overstreet 	struct bkey_i_subvolume *new_subvol = NULL;
91514b393eeSKent Overstreet 	struct bkey_i_subvolume *src_subvol = NULL;
91614b393eeSKent Overstreet 	struct bkey_s_c k;
91714b393eeSKent Overstreet 	u32 parent = 0, new_nodes[2], snapshot_subvols[2];
91814b393eeSKent Overstreet 	int ret = 0;
91914b393eeSKent Overstreet 
92014b393eeSKent Overstreet 	for_each_btree_key(trans, dst_iter, BTREE_ID_subvolumes, SUBVOL_POS_MIN,
92114b393eeSKent Overstreet 			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
922e88a75ebSKent Overstreet 		if (bkey_gt(k.k->p, SUBVOL_POS_MAX))
92314b393eeSKent Overstreet 			break;
924ca130b9cSKent Overstreet 
925ca130b9cSKent Overstreet 		/*
926ca130b9cSKent Overstreet 		 * bch2_subvolume_delete() doesn't flush the btree key cache -
927ca130b9cSKent Overstreet 		 * ideally it would but that's tricky
928ca130b9cSKent Overstreet 		 */
929ca130b9cSKent Overstreet 		if (bkey_deleted(k.k) &&
930ca130b9cSKent Overstreet 		    !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos))
93114b393eeSKent Overstreet 			goto found_slot;
93214b393eeSKent Overstreet 	}
93314b393eeSKent Overstreet 
93414b393eeSKent Overstreet 	if (!ret)
935098ef98dSKent Overstreet 		ret = -BCH_ERR_ENOSPC_subvolume_create;
93614b393eeSKent Overstreet 	goto err;
93714b393eeSKent Overstreet found_slot:
93814b393eeSKent Overstreet 	snapshot_subvols[0] = dst_iter.pos.offset;
93914b393eeSKent Overstreet 	snapshot_subvols[1] = src_subvolid;
94014b393eeSKent Overstreet 
94114b393eeSKent Overstreet 	if (src_subvolid) {
94214b393eeSKent Overstreet 		/* Creating a snapshot: */
94314b393eeSKent Overstreet 
94434dfa5dbSKent Overstreet 		src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
94534dfa5dbSKent Overstreet 				BTREE_ID_subvolumes, POS(0, src_subvolid),
94634dfa5dbSKent Overstreet 				BTREE_ITER_CACHED, subvolume);
947994ba475SKent Overstreet 		ret = PTR_ERR_OR_ZERO(src_subvol);
948994ba475SKent Overstreet 		if (unlikely(ret)) {
949994ba475SKent Overstreet 			bch2_fs_inconsistent_on(ret == -ENOENT, trans->c,
950994ba475SKent Overstreet 						"subvolume %u not found", src_subvolid);
95114b393eeSKent Overstreet 			goto err;
95214b393eeSKent Overstreet 		}
95314b393eeSKent Overstreet 
95414b393eeSKent Overstreet 		parent = le32_to_cpu(src_subvol->v.snapshot);
95514b393eeSKent Overstreet 	}
95614b393eeSKent Overstreet 
95714b393eeSKent Overstreet 	ret = bch2_snapshot_node_create(trans, parent, new_nodes,
95814b393eeSKent Overstreet 					snapshot_subvols,
95914b393eeSKent Overstreet 					src_subvolid ? 2 : 1);
96014b393eeSKent Overstreet 	if (ret)
96114b393eeSKent Overstreet 		goto err;
96214b393eeSKent Overstreet 
96314b393eeSKent Overstreet 	if (src_subvolid) {
96414b393eeSKent Overstreet 		src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
96594a3e1a6SKent Overstreet 		ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
96694a3e1a6SKent Overstreet 		if (ret)
96794a3e1a6SKent Overstreet 			goto err;
96814b393eeSKent Overstreet 	}
96914b393eeSKent Overstreet 
970f8cb35fdSKent Overstreet 	new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
97114b393eeSKent Overstreet 	ret = PTR_ERR_OR_ZERO(new_subvol);
97214b393eeSKent Overstreet 	if (ret)
97314b393eeSKent Overstreet 		goto err;
97414b393eeSKent Overstreet 
97514b393eeSKent Overstreet 	new_subvol->v.flags	= 0;
97614b393eeSKent Overstreet 	new_subvol->v.snapshot	= cpu_to_le32(new_nodes[0]);
97714b393eeSKent Overstreet 	new_subvol->v.inode	= cpu_to_le64(inode);
97814b393eeSKent Overstreet 	SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
97914b393eeSKent Overstreet 	SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
98014b393eeSKent Overstreet 
98114b393eeSKent Overstreet 	*new_subvolid	= new_subvol->k.p.offset;
98214b393eeSKent Overstreet 	*new_snapshotid	= new_nodes[0];
98314b393eeSKent Overstreet err:
98414b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &src_iter);
98514b393eeSKent Overstreet 	bch2_trans_iter_exit(trans, &dst_iter);
98614b393eeSKent Overstreet 	return ret;
98714b393eeSKent Overstreet }
98814b393eeSKent Overstreet 
98914b393eeSKent Overstreet int bch2_fs_subvolumes_init(struct bch_fs *c)
99014b393eeSKent Overstreet {
99114b393eeSKent Overstreet 	INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
9922027875bSKent Overstreet 	INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
9932027875bSKent Overstreet 		  bch2_subvolume_wait_for_pagecache_and_delete);
9942027875bSKent Overstreet 	mutex_init(&c->snapshots_unlinked_lock);
99514b393eeSKent Overstreet 	return 0;
99614b393eeSKent Overstreet }
997