xref: /linux/fs/bcachefs/snapshot.c (revision b71817585383d96ddc51ebd126f6253fdb9a8568)
18e877caaSKent Overstreet // SPDX-License-Identifier: GPL-2.0
28e877caaSKent Overstreet 
38e877caaSKent Overstreet #include "bcachefs.h"
4a111901fSKent Overstreet #include "bkey_buf.h"
58e877caaSKent Overstreet #include "btree_key_cache.h"
68e877caaSKent Overstreet #include "btree_update.h"
7f55d6e07SKent Overstreet #include "buckets.h"
88e877caaSKent Overstreet #include "errcode.h"
98e877caaSKent Overstreet #include "error.h"
108e877caaSKent Overstreet #include "fs.h"
11a292be3bSKent Overstreet #include "recovery_passes.h"
128e877caaSKent Overstreet #include "snapshot.h"
138e877caaSKent Overstreet 
148e877caaSKent Overstreet #include <linux/random.h>
158e877caaSKent Overstreet 
168e877caaSKent Overstreet /*
178e877caaSKent Overstreet  * Snapshot trees:
188e877caaSKent Overstreet  *
198e877caaSKent Overstreet  * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
208e877caaSKent Overstreet  * exist to provide a stable identifier for the whole lifetime of a snapshot
218e877caaSKent Overstreet  * tree.
228e877caaSKent Overstreet  */
238e877caaSKent Overstreet 
bch2_snapshot_tree_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)248e877caaSKent Overstreet void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
258e877caaSKent Overstreet 				struct bkey_s_c k)
268e877caaSKent Overstreet {
278e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
288e877caaSKent Overstreet 
298e877caaSKent Overstreet 	prt_printf(out, "subvol %u root snapshot %u",
308e877caaSKent Overstreet 		   le32_to_cpu(t.v->master_subvol),
318e877caaSKent Overstreet 		   le32_to_cpu(t.v->root_snapshot));
328e877caaSKent Overstreet }
338e877caaSKent Overstreet 
bch2_snapshot_tree_validate(struct bch_fs * c,struct bkey_s_c k,enum bch_validate_flags flags)34*d97de0d0SKent Overstreet int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k,
35*d97de0d0SKent Overstreet 			       enum bch_validate_flags flags)
368e877caaSKent Overstreet {
37b65db750SKent Overstreet 	int ret = 0;
388e877caaSKent Overstreet 
39b65db750SKent Overstreet 	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
40*d97de0d0SKent Overstreet 			 bkey_lt(k.k->p, POS(0, 1)),
41*d97de0d0SKent Overstreet 			 c, snapshot_tree_pos_bad,
42b65db750SKent Overstreet 			 "bad pos");
43b65db750SKent Overstreet fsck_err:
44b65db750SKent Overstreet 	return ret;
458e877caaSKent Overstreet }
468e877caaSKent Overstreet 
bch2_snapshot_tree_lookup(struct btree_trans * trans,u32 id,struct bch_snapshot_tree * s)478e877caaSKent Overstreet int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
488e877caaSKent Overstreet 			      struct bch_snapshot_tree *s)
498e877caaSKent Overstreet {
508e877caaSKent Overstreet 	int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
515dd8c60eSKent Overstreet 					  BTREE_ITER_with_updates, snapshot_tree, s);
528e877caaSKent Overstreet 
538e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
548e877caaSKent Overstreet 		ret = -BCH_ERR_ENOENT_snapshot_tree;
558e877caaSKent Overstreet 	return ret;
568e877caaSKent Overstreet }
578e877caaSKent Overstreet 
588e877caaSKent Overstreet struct bkey_i_snapshot_tree *
__bch2_snapshot_tree_create(struct btree_trans * trans)598e877caaSKent Overstreet __bch2_snapshot_tree_create(struct btree_trans *trans)
608e877caaSKent Overstreet {
618e877caaSKent Overstreet 	struct btree_iter iter;
628e877caaSKent Overstreet 	int ret = bch2_bkey_get_empty_slot(trans, &iter,
638e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS(0, U32_MAX));
648e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *s_t;
658e877caaSKent Overstreet 
668e877caaSKent Overstreet 	if (ret == -BCH_ERR_ENOSPC_btree_slot)
678e877caaSKent Overstreet 		ret = -BCH_ERR_ENOSPC_snapshot_tree;
688e877caaSKent Overstreet 	if (ret)
698e877caaSKent Overstreet 		return ERR_PTR(ret);
708e877caaSKent Overstreet 
718e877caaSKent Overstreet 	s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
728e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s_t);
738e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
748e877caaSKent Overstreet 	return ret ? ERR_PTR(ret) : s_t;
758e877caaSKent Overstreet }
768e877caaSKent Overstreet 
bch2_snapshot_tree_create(struct btree_trans * trans,u32 root_id,u32 subvol_id,u32 * tree_id)778e877caaSKent Overstreet static int bch2_snapshot_tree_create(struct btree_trans *trans,
788e877caaSKent Overstreet 				u32 root_id, u32 subvol_id, u32 *tree_id)
798e877caaSKent Overstreet {
808e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree =
818e877caaSKent Overstreet 		__bch2_snapshot_tree_create(trans);
828e877caaSKent Overstreet 
838e877caaSKent Overstreet 	if (IS_ERR(n_tree))
848e877caaSKent Overstreet 		return PTR_ERR(n_tree);
858e877caaSKent Overstreet 
868e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(subvol_id);
878e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(root_id);
888e877caaSKent Overstreet 	*tree_id = n_tree->k.p.offset;
898e877caaSKent Overstreet 	return 0;
908e877caaSKent Overstreet }
918e877caaSKent Overstreet 
928e877caaSKent Overstreet /* Snapshot nodes: */
938e877caaSKent Overstreet 
__bch2_snapshot_is_ancestor_early(struct snapshot_table * t,u32 id,u32 ancestor)941c31b83aSKent Overstreet static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
9566487c54SKent Overstreet {
96ec9cc18fSKent Overstreet 	while (id && id < ancestor) {
97ec9cc18fSKent Overstreet 		const struct snapshot_t *s = __snapshot_t(t, id);
98ec9cc18fSKent Overstreet 		id = s ? s->parent : 0;
99ec9cc18fSKent Overstreet 	}
1001c31b83aSKent Overstreet 	return id == ancestor;
1011c31b83aSKent Overstreet }
1021c31b83aSKent Overstreet 
bch2_snapshot_is_ancestor_early(struct bch_fs * c,u32 id,u32 ancestor)1031c31b83aSKent Overstreet static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
1041c31b83aSKent Overstreet {
1051c31b83aSKent Overstreet 	rcu_read_lock();
1061c31b83aSKent Overstreet 	bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
10766487c54SKent Overstreet 	rcu_read_unlock();
10866487c54SKent Overstreet 
1091c31b83aSKent Overstreet 	return ret;
11066487c54SKent Overstreet }
11166487c54SKent Overstreet 
get_ancestor_below(struct snapshot_table * t,u32 id,u32 ancestor)1128e877caaSKent Overstreet static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
1138e877caaSKent Overstreet {
1148e877caaSKent Overstreet 	const struct snapshot_t *s = __snapshot_t(t, id);
115ec9cc18fSKent Overstreet 	if (!s)
116ec9cc18fSKent Overstreet 		return 0;
1178e877caaSKent Overstreet 
1188e877caaSKent Overstreet 	if (s->skip[2] <= ancestor)
1198e877caaSKent Overstreet 		return s->skip[2];
1208e877caaSKent Overstreet 	if (s->skip[1] <= ancestor)
1218e877caaSKent Overstreet 		return s->skip[1];
1228e877caaSKent Overstreet 	if (s->skip[0] <= ancestor)
1238e877caaSKent Overstreet 		return s->skip[0];
1248e877caaSKent Overstreet 	return s->parent;
1258e877caaSKent Overstreet }
1268e877caaSKent Overstreet 
test_ancestor_bitmap(struct snapshot_table * t,u32 id,u32 ancestor)12701e5f4fcSKent Overstreet static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
12801e5f4fcSKent Overstreet {
12901e5f4fcSKent Overstreet 	const struct snapshot_t *s = __snapshot_t(t, id);
13001e5f4fcSKent Overstreet 	if (!s)
13101e5f4fcSKent Overstreet 		return false;
13201e5f4fcSKent Overstreet 
13301e5f4fcSKent Overstreet 	return test_bit(ancestor - id - 1, s->is_ancestor);
13401e5f4fcSKent Overstreet }
13501e5f4fcSKent Overstreet 
__bch2_snapshot_is_ancestor(struct bch_fs * c,u32 id,u32 ancestor)1368e877caaSKent Overstreet bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
1378e877caaSKent Overstreet {
1388e877caaSKent Overstreet 	bool ret;
1398e877caaSKent Overstreet 
1408e877caaSKent Overstreet 	rcu_read_lock();
1411c31b83aSKent Overstreet 	struct snapshot_table *t = rcu_dereference(c->snapshots);
1421c31b83aSKent Overstreet 
14313c1e583SKent Overstreet 	if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
1441c31b83aSKent Overstreet 		ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
1451c31b83aSKent Overstreet 		goto out;
1461c31b83aSKent Overstreet 	}
1478e877caaSKent Overstreet 
1488e877caaSKent Overstreet 	while (id && id < ancestor - IS_ANCESTOR_BITMAP)
1498e877caaSKent Overstreet 		id = get_ancestor_below(t, id, ancestor);
1508e877caaSKent Overstreet 
15101e5f4fcSKent Overstreet 	ret = id && id < ancestor
15201e5f4fcSKent Overstreet 		? test_ancestor_bitmap(t, id, ancestor)
15301e5f4fcSKent Overstreet 		: id == ancestor;
15466487c54SKent Overstreet 
1551c31b83aSKent Overstreet 	EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
1561c31b83aSKent Overstreet out:
1578e877caaSKent Overstreet 	rcu_read_unlock();
1588e877caaSKent Overstreet 
1598e877caaSKent Overstreet 	return ret;
1608e877caaSKent Overstreet }
1618e877caaSKent Overstreet 
__snapshot_t_mut(struct bch_fs * c,u32 id)1628e877caaSKent Overstreet static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
1638e877caaSKent Overstreet {
1648e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
1658e877caaSKent Overstreet 	struct snapshot_table *new, *old;
1668e877caaSKent Overstreet 
16763332394SKent Overstreet 	size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
16863332394SKent Overstreet 	size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
1698e877caaSKent Overstreet 
17064cd7de9SPei Li 	if (unlikely(new_bytes > INT_MAX))
17164cd7de9SPei Li 		return NULL;
17264cd7de9SPei Li 
17363332394SKent Overstreet 	new = kvzalloc(new_bytes, GFP_KERNEL);
1748e877caaSKent Overstreet 	if (!new)
1758e877caaSKent Overstreet 		return NULL;
1768e877caaSKent Overstreet 
17763332394SKent Overstreet 	new->nr = new_size;
17863332394SKent Overstreet 
1798e877caaSKent Overstreet 	old = rcu_dereference_protected(c->snapshots, true);
1808e877caaSKent Overstreet 	if (old)
18163332394SKent Overstreet 		memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
1828e877caaSKent Overstreet 
1838e877caaSKent Overstreet 	rcu_assign_pointer(c->snapshots, new);
18463332394SKent Overstreet 	kvfree_rcu(old, rcu);
1858e877caaSKent Overstreet 
18663332394SKent Overstreet 	return &rcu_dereference_protected(c->snapshots,
18763332394SKent Overstreet 				lockdep_is_held(&c->snapshot_table_lock))->s[idx];
1888e877caaSKent Overstreet }
1898e877caaSKent Overstreet 
snapshot_t_mut(struct bch_fs * c,u32 id)1908e877caaSKent Overstreet static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
1918e877caaSKent Overstreet {
1928e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
19363332394SKent Overstreet 	struct snapshot_table *table =
19463332394SKent Overstreet 		rcu_dereference_protected(c->snapshots,
19563332394SKent Overstreet 				lockdep_is_held(&c->snapshot_table_lock));
1968e877caaSKent Overstreet 
1978e877caaSKent Overstreet 	lockdep_assert_held(&c->snapshot_table_lock);
1988e877caaSKent Overstreet 
19963332394SKent Overstreet 	if (likely(table && idx < table->nr))
20063332394SKent Overstreet 		return &table->s[idx];
2018e877caaSKent Overstreet 
2028e877caaSKent Overstreet 	return __snapshot_t_mut(c, id);
2038e877caaSKent Overstreet }
2048e877caaSKent Overstreet 
bch2_snapshot_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)2058e877caaSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
2068e877caaSKent Overstreet 			   struct bkey_s_c k)
2078e877caaSKent Overstreet {
2088e877caaSKent Overstreet 	struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
2098e877caaSKent Overstreet 
2108e877caaSKent Overstreet 	prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
2118e877caaSKent Overstreet 	       BCH_SNAPSHOT_SUBVOL(s.v),
2128e877caaSKent Overstreet 	       BCH_SNAPSHOT_DELETED(s.v),
2138e877caaSKent Overstreet 	       le32_to_cpu(s.v->parent),
2148e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[0]),
2158e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[1]),
2168e877caaSKent Overstreet 	       le32_to_cpu(s.v->subvol),
2178e877caaSKent Overstreet 	       le32_to_cpu(s.v->tree));
2188e877caaSKent Overstreet 
2198e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
2208e877caaSKent Overstreet 		prt_printf(out, " depth %u skiplist %u %u %u",
2218e877caaSKent Overstreet 			   le32_to_cpu(s.v->depth),
2228e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[0]),
2238e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[1]),
2248e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[2]));
2258e877caaSKent Overstreet }
2268e877caaSKent Overstreet 
bch2_snapshot_validate(struct bch_fs * c,struct bkey_s_c k,enum bch_validate_flags flags)227*d97de0d0SKent Overstreet int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k,
228*d97de0d0SKent Overstreet 			  enum bch_validate_flags flags)
2298e877caaSKent Overstreet {
2308e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
2318e877caaSKent Overstreet 	u32 i, id;
232b65db750SKent Overstreet 	int ret = 0;
2338e877caaSKent Overstreet 
234b65db750SKent Overstreet 	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
235*d97de0d0SKent Overstreet 			 bkey_lt(k.k->p, POS(0, 1)),
236*d97de0d0SKent Overstreet 			 c, snapshot_pos_bad,
237b65db750SKent Overstreet 			 "bad pos");
2388e877caaSKent Overstreet 
2398e877caaSKent Overstreet 	s = bkey_s_c_to_snapshot(k);
2408e877caaSKent Overstreet 
2418e877caaSKent Overstreet 	id = le32_to_cpu(s.v->parent);
242*d97de0d0SKent Overstreet 	bkey_fsck_err_on(id && id <= k.k->p.offset,
243*d97de0d0SKent Overstreet 			 c, snapshot_parent_bad,
244b65db750SKent Overstreet 			 "bad parent node (%u <= %llu)",
2458e877caaSKent Overstreet 			 id, k.k->p.offset);
2468e877caaSKent Overstreet 
247*d97de0d0SKent Overstreet 	bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]),
248*d97de0d0SKent Overstreet 			 c, snapshot_children_not_normalized,
249b65db750SKent Overstreet 			 "children not normalized");
2508e877caaSKent Overstreet 
251*d97de0d0SKent Overstreet 	bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1],
252*d97de0d0SKent Overstreet 			 c, snapshot_child_duplicate,
253b65db750SKent Overstreet 			 "duplicate child nodes");
2548e877caaSKent Overstreet 
2558e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
2568e877caaSKent Overstreet 		id = le32_to_cpu(s.v->children[i]);
2578e877caaSKent Overstreet 
258*d97de0d0SKent Overstreet 		bkey_fsck_err_on(id >= k.k->p.offset,
259*d97de0d0SKent Overstreet 				 c, snapshot_child_bad,
260b65db750SKent Overstreet 				 "bad child node (%u >= %llu)",
2618e877caaSKent Overstreet 				 id, k.k->p.offset);
2628e877caaSKent Overstreet 	}
2638e877caaSKent Overstreet 
2648e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
265b65db750SKent Overstreet 		bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
266*d97de0d0SKent Overstreet 				 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]),
267*d97de0d0SKent Overstreet 				 c, snapshot_skiplist_not_normalized,
268b65db750SKent Overstreet 				 "skiplist not normalized");
2698e877caaSKent Overstreet 
2708e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
2718e877caaSKent Overstreet 			id = le32_to_cpu(s.v->skip[i]);
2728e877caaSKent Overstreet 
273*d97de0d0SKent Overstreet 			bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent),
274*d97de0d0SKent Overstreet 					 c, snapshot_skiplist_bad,
275b65db750SKent Overstreet 					 "bad skiplist node %u", id);
2768e877caaSKent Overstreet 		}
2778e877caaSKent Overstreet 	}
278b65db750SKent Overstreet fsck_err:
279b65db750SKent Overstreet 	return ret;
2808e877caaSKent Overstreet }
2818e877caaSKent Overstreet 
__set_is_ancestor_bitmap(struct bch_fs * c,u32 id)28266487c54SKent Overstreet static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
28366487c54SKent Overstreet {
28466487c54SKent Overstreet 	struct snapshot_t *t = snapshot_t_mut(c, id);
28566487c54SKent Overstreet 	u32 parent = id;
28666487c54SKent Overstreet 
28766487c54SKent Overstreet 	while ((parent = bch2_snapshot_parent_early(c, parent)) &&
28866487c54SKent Overstreet 	       parent - id - 1 < IS_ANCESTOR_BITMAP)
28966487c54SKent Overstreet 		__set_bit(parent - id - 1, t->is_ancestor);
29066487c54SKent Overstreet }
29166487c54SKent Overstreet 
set_is_ancestor_bitmap(struct bch_fs * c,u32 id)29266487c54SKent Overstreet static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
29366487c54SKent Overstreet {
29466487c54SKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
29566487c54SKent Overstreet 	__set_is_ancestor_bitmap(c, id);
29666487c54SKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
29766487c54SKent Overstreet }
29866487c54SKent Overstreet 
__bch2_mark_snapshot(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_s_c new,enum btree_iter_update_trigger_flags flags)299ad00bce0SKent Overstreet static int __bch2_mark_snapshot(struct btree_trans *trans,
3008e877caaSKent Overstreet 		       enum btree_id btree, unsigned level,
3018e877caaSKent Overstreet 		       struct bkey_s_c old, struct bkey_s_c new,
3022d288745SNathan Chancellor 		       enum btree_iter_update_trigger_flags flags)
3038e877caaSKent Overstreet {
3048e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
3058e877caaSKent Overstreet 	struct snapshot_t *t;
3068e877caaSKent Overstreet 	u32 id = new.k->p.offset;
3078e877caaSKent Overstreet 	int ret = 0;
3088e877caaSKent Overstreet 
3098e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
3108e877caaSKent Overstreet 
3118e877caaSKent Overstreet 	t = snapshot_t_mut(c, id);
3128e877caaSKent Overstreet 	if (!t) {
3138e877caaSKent Overstreet 		ret = -BCH_ERR_ENOMEM_mark_snapshot;
3148e877caaSKent Overstreet 		goto err;
3158e877caaSKent Overstreet 	}
3168e877caaSKent Overstreet 
3178e877caaSKent Overstreet 	if (new.k->type == KEY_TYPE_snapshot) {
3188e877caaSKent Overstreet 		struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
3198e877caaSKent Overstreet 
3208e877caaSKent Overstreet 		t->parent	= le32_to_cpu(s.v->parent);
3218e877caaSKent Overstreet 		t->children[0]	= le32_to_cpu(s.v->children[0]);
3228e877caaSKent Overstreet 		t->children[1]	= le32_to_cpu(s.v->children[1]);
3238e877caaSKent Overstreet 		t->subvol	= BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
3248e877caaSKent Overstreet 		t->tree		= le32_to_cpu(s.v->tree);
3258e877caaSKent Overstreet 
3268e877caaSKent Overstreet 		if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
3278e877caaSKent Overstreet 			t->depth	= le32_to_cpu(s.v->depth);
3288e877caaSKent Overstreet 			t->skip[0]	= le32_to_cpu(s.v->skip[0]);
3298e877caaSKent Overstreet 			t->skip[1]	= le32_to_cpu(s.v->skip[1]);
3308e877caaSKent Overstreet 			t->skip[2]	= le32_to_cpu(s.v->skip[2]);
3318e877caaSKent Overstreet 		} else {
3328e877caaSKent Overstreet 			t->depth	= 0;
3338e877caaSKent Overstreet 			t->skip[0]	= 0;
3348e877caaSKent Overstreet 			t->skip[1]	= 0;
3358e877caaSKent Overstreet 			t->skip[2]	= 0;
3368e877caaSKent Overstreet 		}
3378e877caaSKent Overstreet 
33866487c54SKent Overstreet 		__set_is_ancestor_bitmap(c, id);
3398e877caaSKent Overstreet 
3408e877caaSKent Overstreet 		if (BCH_SNAPSHOT_DELETED(s.v)) {
3413c471b65SKent Overstreet 			set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
342b0b5bbf9SKent Overstreet 			if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
343b0b5bbf9SKent Overstreet 				bch2_delete_dead_snapshots_async(c);
3448e877caaSKent Overstreet 		}
3458e877caaSKent Overstreet 	} else {
3468e877caaSKent Overstreet 		memset(t, 0, sizeof(*t));
3478e877caaSKent Overstreet 	}
3488e877caaSKent Overstreet err:
3498e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
3508e877caaSKent Overstreet 	return ret;
3518e877caaSKent Overstreet }
3528e877caaSKent Overstreet 
bch2_mark_snapshot(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_s new,enum btree_iter_update_trigger_flags flags)353ad00bce0SKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans,
354ad00bce0SKent Overstreet 		       enum btree_id btree, unsigned level,
355ad00bce0SKent Overstreet 		       struct bkey_s_c old, struct bkey_s new,
3562d288745SNathan Chancellor 		       enum btree_iter_update_trigger_flags flags)
357ad00bce0SKent Overstreet {
358ad00bce0SKent Overstreet 	return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
359ad00bce0SKent Overstreet }
360ad00bce0SKent Overstreet 
bch2_snapshot_lookup(struct btree_trans * trans,u32 id,struct bch_snapshot * s)3618e877caaSKent Overstreet int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
3628e877caaSKent Overstreet 			 struct bch_snapshot *s)
3638e877caaSKent Overstreet {
3648e877caaSKent Overstreet 	return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
3655dd8c60eSKent Overstreet 				       BTREE_ITER_with_updates, snapshot, s);
3668e877caaSKent Overstreet }
3678e877caaSKent Overstreet 
bch2_snapshot_live(struct btree_trans * trans,u32 id)368eebe8a84SKent Overstreet static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
3698e877caaSKent Overstreet {
3708e877caaSKent Overstreet 	struct bch_snapshot v;
3718e877caaSKent Overstreet 	int ret;
3728e877caaSKent Overstreet 
3738e877caaSKent Overstreet 	if (!id)
3748e877caaSKent Overstreet 		return 0;
3758e877caaSKent Overstreet 
3768e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, id, &v);
3778e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
3788e877caaSKent Overstreet 		bch_err(trans->c, "snapshot node %u not found", id);
3798e877caaSKent Overstreet 	if (ret)
3808e877caaSKent Overstreet 		return ret;
3818e877caaSKent Overstreet 
3828e877caaSKent Overstreet 	return !BCH_SNAPSHOT_DELETED(&v);
3838e877caaSKent Overstreet }
3848e877caaSKent Overstreet 
3858e877caaSKent Overstreet /*
3868e877caaSKent Overstreet  * If @k is a snapshot with just one live child, it's part of a linear chain,
3878e877caaSKent Overstreet  * which we consider to be an equivalence class: and then after snapshot
3888e877caaSKent Overstreet  * deletion cleanup, there should only be a single key at a given position in
3898e877caaSKent Overstreet  * this equivalence class.
3908e877caaSKent Overstreet  *
3918e877caaSKent Overstreet  * This sets the equivalence class of @k to be the child's equivalence class, if
3928e877caaSKent Overstreet  * it's part of such a linear chain: this correctly sets equivalence classes on
3938e877caaSKent Overstreet  * startup if we run leaf to root (i.e. in natural key order).
3948e877caaSKent Overstreet  */
bch2_snapshot_set_equiv(struct btree_trans * trans,struct bkey_s_c k)395eebe8a84SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
3968e877caaSKent Overstreet {
3978e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
3988e877caaSKent Overstreet 	unsigned i, nr_live = 0, live_idx = 0;
3998e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
4008e877caaSKent Overstreet 	u32 id = k.k->p.offset, child[2];
4018e877caaSKent Overstreet 
4028e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
4038e877caaSKent Overstreet 		return 0;
4048e877caaSKent Overstreet 
4058e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
4068e877caaSKent Overstreet 
4078e877caaSKent Overstreet 	child[0] = le32_to_cpu(snap.v->children[0]);
4088e877caaSKent Overstreet 	child[1] = le32_to_cpu(snap.v->children[1]);
4098e877caaSKent Overstreet 
4108e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
4118e877caaSKent Overstreet 		int ret = bch2_snapshot_live(trans, child[i]);
4128e877caaSKent Overstreet 
4138e877caaSKent Overstreet 		if (ret < 0)
4148e877caaSKent Overstreet 			return ret;
4158e877caaSKent Overstreet 
4168e877caaSKent Overstreet 		if (ret)
4178e877caaSKent Overstreet 			live_idx = i;
4188e877caaSKent Overstreet 		nr_live += ret;
4198e877caaSKent Overstreet 	}
4208e877caaSKent Overstreet 
4218e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
4228e877caaSKent Overstreet 
4238e877caaSKent Overstreet 	snapshot_t_mut(c, id)->equiv = nr_live == 1
4248e877caaSKent Overstreet 		? snapshot_t_mut(c, child[live_idx])->equiv
4258e877caaSKent Overstreet 		: id;
4268e877caaSKent Overstreet 
4278e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
4288e877caaSKent Overstreet 
4298e877caaSKent Overstreet 	return 0;
4308e877caaSKent Overstreet }
4318e877caaSKent Overstreet 
4328e877caaSKent Overstreet /* fsck: */
4338e877caaSKent Overstreet 
bch2_snapshot_child(struct bch_fs * c,u32 id,unsigned child)4348e877caaSKent Overstreet static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
4358e877caaSKent Overstreet {
4368e877caaSKent Overstreet 	return snapshot_t(c, id)->children[child];
4378e877caaSKent Overstreet }
4388e877caaSKent Overstreet 
bch2_snapshot_left_child(struct bch_fs * c,u32 id)4398e877caaSKent Overstreet static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
4408e877caaSKent Overstreet {
4418e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 0);
4428e877caaSKent Overstreet }
4438e877caaSKent Overstreet 
bch2_snapshot_right_child(struct bch_fs * c,u32 id)4448e877caaSKent Overstreet static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
4458e877caaSKent Overstreet {
4468e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 1);
4478e877caaSKent Overstreet }
4488e877caaSKent Overstreet 
bch2_snapshot_tree_next(struct bch_fs * c,u32 id)4498e877caaSKent Overstreet static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
4508e877caaSKent Overstreet {
4518e877caaSKent Overstreet 	u32 n, parent;
4528e877caaSKent Overstreet 
4538e877caaSKent Overstreet 	n = bch2_snapshot_left_child(c, id);
4548e877caaSKent Overstreet 	if (n)
4558e877caaSKent Overstreet 		return n;
4568e877caaSKent Overstreet 
4578e877caaSKent Overstreet 	while ((parent = bch2_snapshot_parent(c, id))) {
4588e877caaSKent Overstreet 		n = bch2_snapshot_right_child(c, parent);
4598e877caaSKent Overstreet 		if (n && n != id)
4608e877caaSKent Overstreet 			return n;
4618e877caaSKent Overstreet 		id = parent;
4628e877caaSKent Overstreet 	}
4638e877caaSKent Overstreet 
4648e877caaSKent Overstreet 	return 0;
4658e877caaSKent Overstreet }
4668e877caaSKent Overstreet 
bch2_snapshot_tree_oldest_subvol(struct bch_fs * c,u32 snapshot_root)4678e877caaSKent Overstreet static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
4688e877caaSKent Overstreet {
4698e877caaSKent Overstreet 	u32 id = snapshot_root;
4708e877caaSKent Overstreet 	u32 subvol = 0, s;
4718e877caaSKent Overstreet 
4728e877caaSKent Overstreet 	while (id) {
4738e877caaSKent Overstreet 		s = snapshot_t(c, id)->subvol;
4748e877caaSKent Overstreet 
4758e877caaSKent Overstreet 		if (s && (!subvol || s < subvol))
4768e877caaSKent Overstreet 			subvol = s;
4778e877caaSKent Overstreet 
4788e877caaSKent Overstreet 		id = bch2_snapshot_tree_next(c, id);
4798e877caaSKent Overstreet 	}
4808e877caaSKent Overstreet 
4818e877caaSKent Overstreet 	return subvol;
4828e877caaSKent Overstreet }
4838e877caaSKent Overstreet 
bch2_snapshot_tree_master_subvol(struct btree_trans * trans,u32 snapshot_root,u32 * subvol_id)4848e877caaSKent Overstreet static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
4858e877caaSKent Overstreet 					    u32 snapshot_root, u32 *subvol_id)
4868e877caaSKent Overstreet {
4878e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
4888e877caaSKent Overstreet 	struct btree_iter iter;
4898e877caaSKent Overstreet 	struct bkey_s_c k;
4908e877caaSKent Overstreet 	bool found = false;
4918e877caaSKent Overstreet 	int ret;
4928e877caaSKent Overstreet 
4938e877caaSKent Overstreet 	for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
4948e877caaSKent Overstreet 				     0, k, ret) {
4958e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_subvolume)
4968e877caaSKent Overstreet 			continue;
4978e877caaSKent Overstreet 
498cf904c8dSKent Overstreet 		struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
4998e877caaSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
5008e877caaSKent Overstreet 			continue;
5018e877caaSKent Overstreet 		if (!BCH_SUBVOLUME_SNAP(s.v)) {
5028e877caaSKent Overstreet 			*subvol_id = s.k->p.offset;
5038e877caaSKent Overstreet 			found = true;
5048e877caaSKent Overstreet 			break;
5058e877caaSKent Overstreet 		}
5068e877caaSKent Overstreet 	}
5078e877caaSKent Overstreet 
5088e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
5098e877caaSKent Overstreet 
5108e877caaSKent Overstreet 	if (!ret && !found) {
51196dea3d5SKent Overstreet 		struct bkey_i_subvolume *u;
5128e877caaSKent Overstreet 
5138e877caaSKent Overstreet 		*subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
5148e877caaSKent Overstreet 
51596dea3d5SKent Overstreet 		u = bch2_bkey_get_mut_typed(trans, &iter,
5168e877caaSKent Overstreet 					    BTREE_ID_subvolumes, POS(0, *subvol_id),
5178e877caaSKent Overstreet 					    0, subvolume);
51896dea3d5SKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
5198e877caaSKent Overstreet 		if (ret)
5208e877caaSKent Overstreet 			return ret;
5218e877caaSKent Overstreet 
52296dea3d5SKent Overstreet 		SET_BCH_SUBVOLUME_SNAP(&u->v, false);
5238e877caaSKent Overstreet 	}
5248e877caaSKent Overstreet 
5258e877caaSKent Overstreet 	return ret;
5268e877caaSKent Overstreet }
5278e877caaSKent Overstreet 
check_snapshot_tree(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k)5288e877caaSKent Overstreet static int check_snapshot_tree(struct btree_trans *trans,
5298e877caaSKent Overstreet 			       struct btree_iter *iter,
5308e877caaSKent Overstreet 			       struct bkey_s_c k)
5318e877caaSKent Overstreet {
5328e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
5338e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree st;
5348e877caaSKent Overstreet 	struct bch_snapshot s;
5358e877caaSKent Overstreet 	struct bch_subvolume subvol;
5368e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
5378e877caaSKent Overstreet 	u32 root_id;
5388e877caaSKent Overstreet 	int ret;
5398e877caaSKent Overstreet 
5408e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot_tree)
5418e877caaSKent Overstreet 		return 0;
5428e877caaSKent Overstreet 
5438e877caaSKent Overstreet 	st = bkey_s_c_to_snapshot_tree(k);
5448e877caaSKent Overstreet 	root_id = le32_to_cpu(st.v->root_snapshot);
5458e877caaSKent Overstreet 
5468e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, root_id, &s);
5478e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5488e877caaSKent Overstreet 		goto err;
5498e877caaSKent Overstreet 
5508e877caaSKent Overstreet 	if (fsck_err_on(ret ||
5518e877caaSKent Overstreet 			root_id != bch2_snapshot_root(c, root_id) ||
5528e877caaSKent Overstreet 			st.k->p.offset != le32_to_cpu(s.tree),
553a850bde6SKent Overstreet 			trans, snapshot_tree_to_missing_snapshot,
5548e877caaSKent Overstreet 			"snapshot tree points to missing/incorrect snapshot:\n  %s",
5558e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5568e877caaSKent Overstreet 		ret = bch2_btree_delete_at(trans, iter, 0);
5578e877caaSKent Overstreet 		goto err;
5588e877caaSKent Overstreet 	}
5598e877caaSKent Overstreet 
5608e877caaSKent Overstreet 	ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
5618e877caaSKent Overstreet 				 false, 0, &subvol);
5628e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5638e877caaSKent Overstreet 		goto err;
5648e877caaSKent Overstreet 
565b65db750SKent Overstreet 	if (fsck_err_on(ret,
566a850bde6SKent Overstreet 			trans, snapshot_tree_to_missing_subvol,
5678e877caaSKent Overstreet 			"snapshot tree points to missing subvolume:\n  %s",
5688e877caaSKent Overstreet 			(printbuf_reset(&buf),
5698e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
5701c31b83aSKent Overstreet 	    fsck_err_on(!bch2_snapshot_is_ancestor(c,
5718e877caaSKent Overstreet 						le32_to_cpu(subvol.snapshot),
572b65db750SKent Overstreet 						root_id),
573a850bde6SKent Overstreet 			trans, snapshot_tree_to_wrong_subvol,
5748e877caaSKent Overstreet 			"snapshot tree points to subvolume that does not point to snapshot in this tree:\n  %s",
5758e877caaSKent Overstreet 			(printbuf_reset(&buf),
5768e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
577b65db750SKent Overstreet 	    fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
578a850bde6SKent Overstreet 			trans, snapshot_tree_to_snapshot_subvol,
5798e877caaSKent Overstreet 			"snapshot tree points to snapshot subvolume:\n  %s",
5808e877caaSKent Overstreet 			(printbuf_reset(&buf),
5818e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5828e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *u;
5838e877caaSKent Overstreet 		u32 subvol_id;
5848e877caaSKent Overstreet 
5858e877caaSKent Overstreet 		ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
586a292be3bSKent Overstreet 		bch_err_fn(c, ret);
587a292be3bSKent Overstreet 
588a292be3bSKent Overstreet 		if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
589a292be3bSKent Overstreet 			ret = 0;
590a292be3bSKent Overstreet 			goto err;
591a292be3bSKent Overstreet 		}
592a292be3bSKent Overstreet 
5938e877caaSKent Overstreet 		if (ret)
5948e877caaSKent Overstreet 			goto err;
5958e877caaSKent Overstreet 
5968e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
5978e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
5988e877caaSKent Overstreet 		if (ret)
5998e877caaSKent Overstreet 			goto err;
6008e877caaSKent Overstreet 
6018e877caaSKent Overstreet 		u->v.master_subvol = cpu_to_le32(subvol_id);
6028e877caaSKent Overstreet 		st = snapshot_tree_i_to_s_c(u);
6038e877caaSKent Overstreet 	}
6048e877caaSKent Overstreet err:
6058e877caaSKent Overstreet fsck_err:
6068e877caaSKent Overstreet 	printbuf_exit(&buf);
6078e877caaSKent Overstreet 	return ret;
6088e877caaSKent Overstreet }
6098e877caaSKent Overstreet 
6108e877caaSKent Overstreet /*
6118e877caaSKent Overstreet  * For each snapshot_tree, make sure it points to the root of a snapshot tree
6128e877caaSKent Overstreet  * and that snapshot entry points back to it, or delete it.
6138e877caaSKent Overstreet  *
6148e877caaSKent Overstreet  * And, make sure it points to a subvolume within that snapshot tree, or correct
6158e877caaSKent Overstreet  * it to point to the oldest subvolume within that snapshot tree.
6168e877caaSKent Overstreet  */
bch2_check_snapshot_trees(struct bch_fs * c)6178e877caaSKent Overstreet int bch2_check_snapshot_trees(struct bch_fs *c)
6188e877caaSKent Overstreet {
61980eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
6206bd68ec2SKent Overstreet 		for_each_btree_key_commit(trans, iter,
6218e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS_MIN,
6225dd8c60eSKent Overstreet 			BTREE_ITER_prefetch, k,
6233f0e297dSKent Overstreet 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
6246bd68ec2SKent Overstreet 		check_snapshot_tree(trans, &iter, k)));
625cf904c8dSKent Overstreet 	bch_err_fn(c, ret);
6268e877caaSKent Overstreet 	return ret;
6278e877caaSKent Overstreet }
6288e877caaSKent Overstreet 
6298e877caaSKent Overstreet /*
6308e877caaSKent Overstreet  * Look up snapshot tree for @tree_id and find root,
6318e877caaSKent Overstreet  * make sure @snap_id is a descendent:
6328e877caaSKent Overstreet  */
snapshot_tree_ptr_good(struct btree_trans * trans,u32 snap_id,u32 tree_id)6338e877caaSKent Overstreet static int snapshot_tree_ptr_good(struct btree_trans *trans,
6348e877caaSKent Overstreet 				  u32 snap_id, u32 tree_id)
6358e877caaSKent Overstreet {
6368e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6378e877caaSKent Overstreet 	int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
6388e877caaSKent Overstreet 
6398e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
6408e877caaSKent Overstreet 		return 0;
6418e877caaSKent Overstreet 	if (ret)
6428e877caaSKent Overstreet 		return ret;
6438e877caaSKent Overstreet 
6448e877caaSKent Overstreet 	return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
6458e877caaSKent Overstreet }
6468e877caaSKent Overstreet 
bch2_snapshot_skiplist_get(struct bch_fs * c,u32 id)6478e877caaSKent Overstreet u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
6488e877caaSKent Overstreet {
6498e877caaSKent Overstreet 	const struct snapshot_t *s;
6508e877caaSKent Overstreet 
6518e877caaSKent Overstreet 	if (!id)
6528e877caaSKent Overstreet 		return 0;
6538e877caaSKent Overstreet 
6548e877caaSKent Overstreet 	rcu_read_lock();
6558e877caaSKent Overstreet 	s = snapshot_t(c, id);
6568e877caaSKent Overstreet 	if (s->parent)
6578e877caaSKent Overstreet 		id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
6588e877caaSKent Overstreet 	rcu_read_unlock();
6598e877caaSKent Overstreet 
6608e877caaSKent Overstreet 	return id;
6618e877caaSKent Overstreet }
6628e877caaSKent Overstreet 
snapshot_skiplist_good(struct btree_trans * trans,u32 id,struct bch_snapshot s)663097d4cc8SKent Overstreet static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
6648e877caaSKent Overstreet {
6658e877caaSKent Overstreet 	unsigned i;
6668e877caaSKent Overstreet 
667097d4cc8SKent Overstreet 	for (i = 0; i < 3; i++)
668097d4cc8SKent Overstreet 		if (!s.parent) {
669097d4cc8SKent Overstreet 			if (s.skip[i])
6708e877caaSKent Overstreet 				return false;
671097d4cc8SKent Overstreet 		} else {
672097d4cc8SKent Overstreet 			if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i])))
6738e877caaSKent Overstreet 				return false;
6748e877caaSKent Overstreet 		}
6758e877caaSKent Overstreet 
6768e877caaSKent Overstreet 	return true;
6778e877caaSKent Overstreet }
6788e877caaSKent Overstreet 
6798e877caaSKent Overstreet /*
6808e877caaSKent Overstreet  * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
6818e877caaSKent Overstreet  * its snapshot_tree pointer is correct (allocate new one if necessary), then
6828e877caaSKent Overstreet  * update this node's pointer to root node's pointer:
6838e877caaSKent Overstreet  */
snapshot_tree_ptr_repair(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k,struct bch_snapshot * s)6848e877caaSKent Overstreet static int snapshot_tree_ptr_repair(struct btree_trans *trans,
6858e877caaSKent Overstreet 				    struct btree_iter *iter,
6868e877caaSKent Overstreet 				    struct bkey_s_c k,
6878e877caaSKent Overstreet 				    struct bch_snapshot *s)
6888e877caaSKent Overstreet {
6898e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
6908e877caaSKent Overstreet 	struct btree_iter root_iter;
6918e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6928e877caaSKent Overstreet 	struct bkey_s_c_snapshot root;
6938e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
6948e877caaSKent Overstreet 	u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
6958e877caaSKent Overstreet 	int ret;
6968e877caaSKent Overstreet 
6978e877caaSKent Overstreet 	root = bch2_bkey_get_iter_typed(trans, &root_iter,
6988e877caaSKent Overstreet 			       BTREE_ID_snapshots, POS(0, root_id),
6995dd8c60eSKent Overstreet 			       BTREE_ITER_with_updates, snapshot);
7008e877caaSKent Overstreet 	ret = bkey_err(root);
7018e877caaSKent Overstreet 	if (ret)
7028e877caaSKent Overstreet 		goto err;
7038e877caaSKent Overstreet 
7048e877caaSKent Overstreet 	tree_id = le32_to_cpu(root.v->tree);
7058e877caaSKent Overstreet 
7068e877caaSKent Overstreet 	ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
7078e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
7088e877caaSKent Overstreet 		return ret;
7098e877caaSKent Overstreet 
7108e877caaSKent Overstreet 	if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
7118e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
7128e877caaSKent Overstreet 		ret =   PTR_ERR_OR_ZERO(u) ?:
7138e877caaSKent Overstreet 			bch2_snapshot_tree_create(trans, root_id,
7148e877caaSKent Overstreet 				bch2_snapshot_tree_oldest_subvol(c, root_id),
7158e877caaSKent Overstreet 				&tree_id);
7168e877caaSKent Overstreet 		if (ret)
7178e877caaSKent Overstreet 			goto err;
7188e877caaSKent Overstreet 
7198e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
7208e877caaSKent Overstreet 		if (k.k->p.offset == root_id)
7218e877caaSKent Overstreet 			*s = u->v;
7228e877caaSKent Overstreet 	}
7238e877caaSKent Overstreet 
7248e877caaSKent Overstreet 	if (k.k->p.offset != root_id) {
7258e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
7268e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
7278e877caaSKent Overstreet 		if (ret)
7288e877caaSKent Overstreet 			goto err;
7298e877caaSKent Overstreet 
7308e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
7318e877caaSKent Overstreet 		*s = u->v;
7328e877caaSKent Overstreet 	}
7338e877caaSKent Overstreet err:
7348e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &root_iter);
7358e877caaSKent Overstreet 	return ret;
7368e877caaSKent Overstreet }
7378e877caaSKent Overstreet 
check_snapshot(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k)7388e877caaSKent Overstreet static int check_snapshot(struct btree_trans *trans,
7398e877caaSKent Overstreet 			  struct btree_iter *iter,
7408e877caaSKent Overstreet 			  struct bkey_s_c k)
7418e877caaSKent Overstreet {
7428e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
7438e877caaSKent Overstreet 	struct bch_snapshot s;
7448e877caaSKent Overstreet 	struct bch_subvolume subvol;
7458e877caaSKent Overstreet 	struct bch_snapshot v;
7468e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
7478e877caaSKent Overstreet 	u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
7488e877caaSKent Overstreet 	u32 real_depth;
7498e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
7508e877caaSKent Overstreet 	u32 i, id;
7518e877caaSKent Overstreet 	int ret = 0;
7528e877caaSKent Overstreet 
7538e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
7548e877caaSKent Overstreet 		return 0;
7558e877caaSKent Overstreet 
7568e877caaSKent Overstreet 	memset(&s, 0, sizeof(s));
757c4333eb5SKent Overstreet 	memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
7588e877caaSKent Overstreet 
7598e877caaSKent Overstreet 	id = le32_to_cpu(s.parent);
7608e877caaSKent Overstreet 	if (id) {
7618e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7628e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7638e877caaSKent Overstreet 			bch_err(c, "snapshot with nonexistent parent:\n  %s",
7648e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
7658e877caaSKent Overstreet 		if (ret)
7668e877caaSKent Overstreet 			goto err;
7678e877caaSKent Overstreet 
7688e877caaSKent Overstreet 		if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
7698e877caaSKent Overstreet 		    le32_to_cpu(v.children[1]) != k.k->p.offset) {
7708e877caaSKent Overstreet 			bch_err(c, "snapshot parent %u missing pointer to child %llu",
7718e877caaSKent Overstreet 				id, k.k->p.offset);
7728e877caaSKent Overstreet 			ret = -EINVAL;
7738e877caaSKent Overstreet 			goto err;
7748e877caaSKent Overstreet 		}
7758e877caaSKent Overstreet 	}
7768e877caaSKent Overstreet 
7778e877caaSKent Overstreet 	for (i = 0; i < 2 && s.children[i]; i++) {
7788e877caaSKent Overstreet 		id = le32_to_cpu(s.children[i]);
7798e877caaSKent Overstreet 
7808e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7818e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7828e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has nonexistent child %u",
7838e877caaSKent Overstreet 				k.k->p.offset, id);
7848e877caaSKent Overstreet 		if (ret)
7858e877caaSKent Overstreet 			goto err;
7868e877caaSKent Overstreet 
7878e877caaSKent Overstreet 		if (le32_to_cpu(v.parent) != k.k->p.offset) {
7888e877caaSKent Overstreet 			bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
7898e877caaSKent Overstreet 				id, le32_to_cpu(v.parent), k.k->p.offset);
7908e877caaSKent Overstreet 			ret = -EINVAL;
7918e877caaSKent Overstreet 			goto err;
7928e877caaSKent Overstreet 		}
7938e877caaSKent Overstreet 	}
7948e877caaSKent Overstreet 
795a292be3bSKent Overstreet 	bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
7968e877caaSKent Overstreet 		!BCH_SNAPSHOT_DELETED(&s);
7978e877caaSKent Overstreet 
7988e877caaSKent Overstreet 	if (should_have_subvol) {
7998e877caaSKent Overstreet 		id = le32_to_cpu(s.subvol);
8008e877caaSKent Overstreet 		ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
8018e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
8028e877caaSKent Overstreet 			bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
8038e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
8048e877caaSKent Overstreet 		if (ret)
8058e877caaSKent Overstreet 			goto err;
8068e877caaSKent Overstreet 
8078e877caaSKent Overstreet 		if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
8088e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
8098e877caaSKent Overstreet 				k.k->p.offset);
8108e877caaSKent Overstreet 			ret = -EINVAL;
8118e877caaSKent Overstreet 			goto err;
8128e877caaSKent Overstreet 		}
8138e877caaSKent Overstreet 	} else {
814b65db750SKent Overstreet 		if (fsck_err_on(s.subvol,
815a850bde6SKent Overstreet 				trans, snapshot_should_not_have_subvol,
816b65db750SKent Overstreet 				"snapshot should not point to subvol:\n  %s",
8178e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8188e877caaSKent Overstreet 			u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8198e877caaSKent Overstreet 			ret = PTR_ERR_OR_ZERO(u);
8208e877caaSKent Overstreet 			if (ret)
8218e877caaSKent Overstreet 				goto err;
8228e877caaSKent Overstreet 
8238e877caaSKent Overstreet 			u->v.subvol = 0;
8248e877caaSKent Overstreet 			s = u->v;
8258e877caaSKent Overstreet 		}
8268e877caaSKent Overstreet 	}
8278e877caaSKent Overstreet 
8288e877caaSKent Overstreet 	ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
8298e877caaSKent Overstreet 	if (ret < 0)
8308e877caaSKent Overstreet 		goto err;
8318e877caaSKent Overstreet 
832a850bde6SKent Overstreet 	if (fsck_err_on(!ret,
833a850bde6SKent Overstreet 			trans, snapshot_to_bad_snapshot_tree,
834b65db750SKent Overstreet 			"snapshot points to missing/incorrect tree:\n  %s",
8358e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8368e877caaSKent Overstreet 		ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
8378e877caaSKent Overstreet 		if (ret)
8388e877caaSKent Overstreet 			goto err;
8398e877caaSKent Overstreet 	}
8408e877caaSKent Overstreet 	ret = 0;
8418e877caaSKent Overstreet 
8428e877caaSKent Overstreet 	real_depth = bch2_snapshot_depth(c, parent_id);
8438e877caaSKent Overstreet 
844074cbcdaSKent Overstreet 	if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
845a850bde6SKent Overstreet 			trans, snapshot_bad_depth,
846b65db750SKent Overstreet 			"snapshot with incorrect depth field, should be %u:\n  %s",
847074cbcdaSKent Overstreet 			real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8488e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8498e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8508e877caaSKent Overstreet 		if (ret)
8518e877caaSKent Overstreet 			goto err;
8528e877caaSKent Overstreet 
8538e877caaSKent Overstreet 		u->v.depth = cpu_to_le32(real_depth);
8548e877caaSKent Overstreet 		s = u->v;
8558e877caaSKent Overstreet 	}
8568e877caaSKent Overstreet 
857097d4cc8SKent Overstreet 	ret = snapshot_skiplist_good(trans, k.k->p.offset, s);
8588e877caaSKent Overstreet 	if (ret < 0)
8598e877caaSKent Overstreet 		goto err;
8608e877caaSKent Overstreet 
861a850bde6SKent Overstreet 	if (fsck_err_on(!ret,
862a850bde6SKent Overstreet 			trans, snapshot_bad_skiplist,
863b65db750SKent Overstreet 			"snapshot with bad skiplist field:\n  %s",
864074cbcdaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8658e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8668e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8678e877caaSKent Overstreet 		if (ret)
8688e877caaSKent Overstreet 			goto err;
8698e877caaSKent Overstreet 
8708e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
8718e877caaSKent Overstreet 			u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
8728e877caaSKent Overstreet 
8738e877caaSKent Overstreet 		bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
8748e877caaSKent Overstreet 		s = u->v;
8758e877caaSKent Overstreet 	}
8768e877caaSKent Overstreet 	ret = 0;
8778e877caaSKent Overstreet err:
8788e877caaSKent Overstreet fsck_err:
8798e877caaSKent Overstreet 	printbuf_exit(&buf);
8808e877caaSKent Overstreet 	return ret;
8818e877caaSKent Overstreet }
8828e877caaSKent Overstreet 
bch2_check_snapshots(struct bch_fs * c)8838e877caaSKent Overstreet int bch2_check_snapshots(struct bch_fs *c)
8848e877caaSKent Overstreet {
8858e877caaSKent Overstreet 	/*
8868e877caaSKent Overstreet 	 * We iterate backwards as checking/fixing the depth field requires that
8878e877caaSKent Overstreet 	 * the parent's depth already be correct:
8888e877caaSKent Overstreet 	 */
88980eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
8906bd68ec2SKent Overstreet 		for_each_btree_key_reverse_commit(trans, iter,
8918e877caaSKent Overstreet 				BTREE_ID_snapshots, POS_MAX,
8925dd8c60eSKent Overstreet 				BTREE_ITER_prefetch, k,
8933f0e297dSKent Overstreet 				NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
8946bd68ec2SKent Overstreet 			check_snapshot(trans, &iter, k)));
8958e877caaSKent Overstreet 	bch_err_fn(c, ret);
8968e877caaSKent Overstreet 	return ret;
8978e877caaSKent Overstreet }
8988e877caaSKent Overstreet 
check_snapshot_exists(struct btree_trans * trans,u32 id)899a292be3bSKent Overstreet static int check_snapshot_exists(struct btree_trans *trans, u32 id)
900a292be3bSKent Overstreet {
901a292be3bSKent Overstreet 	struct bch_fs *c = trans->c;
902a292be3bSKent Overstreet 
903a292be3bSKent Overstreet 	if (bch2_snapshot_equiv(c, id))
904a292be3bSKent Overstreet 		return 0;
905a292be3bSKent Overstreet 
9060af0b963SHongbo Li 	/* 0 is an invalid tree ID */
9070af0b963SHongbo Li 	u32 tree_id = 0;
908a292be3bSKent Overstreet 	int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
909a292be3bSKent Overstreet 	if (ret)
910a292be3bSKent Overstreet 		return ret;
911a292be3bSKent Overstreet 
912a292be3bSKent Overstreet 	struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
913a292be3bSKent Overstreet 	ret = PTR_ERR_OR_ZERO(snapshot);
914a292be3bSKent Overstreet 	if (ret)
915a292be3bSKent Overstreet 		return ret;
916a292be3bSKent Overstreet 
917a292be3bSKent Overstreet 	bkey_snapshot_init(&snapshot->k_i);
918a292be3bSKent Overstreet 	snapshot->k.p		= POS(0, id);
919a292be3bSKent Overstreet 	snapshot->v.tree	= cpu_to_le32(tree_id);
920a292be3bSKent Overstreet 	snapshot->v.btime.lo	= cpu_to_le64(bch2_current_time(c));
921a292be3bSKent Overstreet 
922a292be3bSKent Overstreet 	return  bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
923a292be3bSKent Overstreet 		bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
924a292be3bSKent Overstreet 				   bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
925a292be3bSKent Overstreet 		bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
926a292be3bSKent Overstreet }
927a292be3bSKent Overstreet 
928a292be3bSKent Overstreet /* Figure out which snapshot nodes belong in the same tree: */
929a292be3bSKent Overstreet struct snapshot_tree_reconstruct {
930a292be3bSKent Overstreet 	enum btree_id			btree;
931a292be3bSKent Overstreet 	struct bpos			cur_pos;
932a292be3bSKent Overstreet 	snapshot_id_list		cur_ids;
933a292be3bSKent Overstreet 	DARRAY(snapshot_id_list)	trees;
934a292be3bSKent Overstreet };
935a292be3bSKent Overstreet 
snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct * r)936a292be3bSKent Overstreet static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
937a292be3bSKent Overstreet {
938a292be3bSKent Overstreet 	darray_for_each(r->trees, i)
939a292be3bSKent Overstreet 		darray_exit(i);
940a292be3bSKent Overstreet 	darray_exit(&r->trees);
941a292be3bSKent Overstreet 	darray_exit(&r->cur_ids);
942a292be3bSKent Overstreet }
943a292be3bSKent Overstreet 
same_snapshot(struct snapshot_tree_reconstruct * r,struct bpos pos)944a292be3bSKent Overstreet static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
945a292be3bSKent Overstreet {
946a292be3bSKent Overstreet 	return r->btree == BTREE_ID_inodes
947a292be3bSKent Overstreet 		? r->cur_pos.offset == pos.offset
948a292be3bSKent Overstreet 		: r->cur_pos.inode == pos.inode;
949a292be3bSKent Overstreet }
950a292be3bSKent Overstreet 
snapshot_id_lists_have_common(snapshot_id_list * l,snapshot_id_list * r)951a292be3bSKent Overstreet static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
952a292be3bSKent Overstreet {
953a292be3bSKent Overstreet 	darray_for_each(*l, i)
954a292be3bSKent Overstreet 		if (snapshot_list_has_id(r, *i))
955a292be3bSKent Overstreet 			return true;
956a292be3bSKent Overstreet 	return false;
957a292be3bSKent Overstreet }
958a292be3bSKent Overstreet 
snapshot_id_list_to_text(struct printbuf * out,snapshot_id_list * s)959a292be3bSKent Overstreet static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
960a292be3bSKent Overstreet {
961a292be3bSKent Overstreet 	bool first = true;
962a292be3bSKent Overstreet 	darray_for_each(*s, i) {
963a292be3bSKent Overstreet 		if (!first)
964a292be3bSKent Overstreet 			prt_char(out, ' ');
965a292be3bSKent Overstreet 		first = false;
966a292be3bSKent Overstreet 		prt_printf(out, "%u", *i);
967a292be3bSKent Overstreet 	}
968a292be3bSKent Overstreet }
969a292be3bSKent Overstreet 
snapshot_tree_reconstruct_next(struct bch_fs * c,struct snapshot_tree_reconstruct * r)970a292be3bSKent Overstreet static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
971a292be3bSKent Overstreet {
972a292be3bSKent Overstreet 	if (r->cur_ids.nr) {
973a292be3bSKent Overstreet 		darray_for_each(r->trees, i)
974a292be3bSKent Overstreet 			if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
975a292be3bSKent Overstreet 				int ret = snapshot_list_merge(c, i, &r->cur_ids);
976a292be3bSKent Overstreet 				if (ret)
977a292be3bSKent Overstreet 					return ret;
978a292be3bSKent Overstreet 				goto out;
979a292be3bSKent Overstreet 			}
980a292be3bSKent Overstreet 		darray_push(&r->trees, r->cur_ids);
981a292be3bSKent Overstreet 		darray_init(&r->cur_ids);
982a292be3bSKent Overstreet 	}
983a292be3bSKent Overstreet out:
984a292be3bSKent Overstreet 	r->cur_ids.nr = 0;
985a292be3bSKent Overstreet 	return 0;
986a292be3bSKent Overstreet }
987a292be3bSKent Overstreet 
get_snapshot_trees(struct bch_fs * c,struct snapshot_tree_reconstruct * r,struct bpos pos)988a292be3bSKent Overstreet static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
989a292be3bSKent Overstreet {
990a292be3bSKent Overstreet 	if (!same_snapshot(r, pos))
991a292be3bSKent Overstreet 		snapshot_tree_reconstruct_next(c, r);
992a292be3bSKent Overstreet 	r->cur_pos = pos;
993a292be3bSKent Overstreet 	return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
994a292be3bSKent Overstreet }
995a292be3bSKent Overstreet 
bch2_reconstruct_snapshots(struct bch_fs * c)996a292be3bSKent Overstreet int bch2_reconstruct_snapshots(struct bch_fs *c)
997a292be3bSKent Overstreet {
998a292be3bSKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
999a292be3bSKent Overstreet 	struct printbuf buf = PRINTBUF;
1000a292be3bSKent Overstreet 	struct snapshot_tree_reconstruct r = {};
1001a292be3bSKent Overstreet 	int ret = 0;
1002a292be3bSKent Overstreet 
1003a292be3bSKent Overstreet 	for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
1004a292be3bSKent Overstreet 		if (btree_type_has_snapshots(btree)) {
1005a292be3bSKent Overstreet 			r.btree = btree;
1006a292be3bSKent Overstreet 
1007a292be3bSKent Overstreet 			ret = for_each_btree_key(trans, iter, btree, POS_MIN,
10085dd8c60eSKent Overstreet 					BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({
1009a292be3bSKent Overstreet 				get_snapshot_trees(c, &r, k.k->p);
1010a292be3bSKent Overstreet 			}));
1011a292be3bSKent Overstreet 			if (ret)
1012a292be3bSKent Overstreet 				goto err;
1013a292be3bSKent Overstreet 
1014a292be3bSKent Overstreet 			snapshot_tree_reconstruct_next(c, &r);
1015a292be3bSKent Overstreet 		}
1016a292be3bSKent Overstreet 	}
1017a292be3bSKent Overstreet 
1018a292be3bSKent Overstreet 	darray_for_each(r.trees, t) {
1019a292be3bSKent Overstreet 		printbuf_reset(&buf);
1020a292be3bSKent Overstreet 		snapshot_id_list_to_text(&buf, t);
1021a292be3bSKent Overstreet 
1022a292be3bSKent Overstreet 		darray_for_each(*t, id) {
1023a292be3bSKent Overstreet 			if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
1024a850bde6SKent Overstreet 					trans, snapshot_node_missing,
102519391b92SKent Overstreet 					"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
1026a292be3bSKent Overstreet 				if (t->nr > 1) {
1027a292be3bSKent Overstreet 					bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
1028a292be3bSKent Overstreet 					ret = -BCH_ERR_fsck_repair_unimplemented;
1029a292be3bSKent Overstreet 					goto err;
1030a292be3bSKent Overstreet 				}
1031a292be3bSKent Overstreet 
1032a292be3bSKent Overstreet 				ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
1033a292be3bSKent Overstreet 						check_snapshot_exists(trans, *id));
1034a292be3bSKent Overstreet 				if (ret)
1035a292be3bSKent Overstreet 					goto err;
1036a292be3bSKent Overstreet 			}
1037a292be3bSKent Overstreet 		}
1038a292be3bSKent Overstreet 	}
1039a292be3bSKent Overstreet fsck_err:
1040a292be3bSKent Overstreet err:
1041a292be3bSKent Overstreet 	bch2_trans_put(trans);
1042a292be3bSKent Overstreet 	snapshot_tree_reconstruct_exit(&r);
1043a292be3bSKent Overstreet 	printbuf_exit(&buf);
1044a292be3bSKent Overstreet 	bch_err_fn(c, ret);
1045a292be3bSKent Overstreet 	return ret;
1046a292be3bSKent Overstreet }
1047a292be3bSKent Overstreet 
bch2_check_key_has_snapshot(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k)104808f50005SKent Overstreet int bch2_check_key_has_snapshot(struct btree_trans *trans,
104908f50005SKent Overstreet 				struct btree_iter *iter,
105008f50005SKent Overstreet 				struct bkey_s_c k)
105108f50005SKent Overstreet {
105208f50005SKent Overstreet 	struct bch_fs *c = trans->c;
105308f50005SKent Overstreet 	struct printbuf buf = PRINTBUF;
105408f50005SKent Overstreet 	int ret = 0;
105508f50005SKent Overstreet 
1056a850bde6SKent Overstreet 	if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot),
1057a850bde6SKent Overstreet 			trans, bkey_in_missing_snapshot,
105808f50005SKent Overstreet 			"key in missing snapshot %s, delete?",
105908f50005SKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
106008f50005SKent Overstreet 		ret = bch2_btree_delete_at(trans, iter,
106108f50005SKent Overstreet 					    BTREE_UPDATE_internal_snapshot_node) ?: 1;
106208f50005SKent Overstreet fsck_err:
106308f50005SKent Overstreet 	printbuf_exit(&buf);
106408f50005SKent Overstreet 	return ret;
106508f50005SKent Overstreet }
106608f50005SKent Overstreet 
10678e877caaSKent Overstreet /*
10688e877caaSKent Overstreet  * Mark a snapshot as deleted, for future cleanup:
10698e877caaSKent Overstreet  */
bch2_snapshot_node_set_deleted(struct btree_trans * trans,u32 id)10708e877caaSKent Overstreet int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
10718e877caaSKent Overstreet {
10728e877caaSKent Overstreet 	struct btree_iter iter;
10738e877caaSKent Overstreet 	struct bkey_i_snapshot *s;
10748e877caaSKent Overstreet 	int ret = 0;
10758e877caaSKent Overstreet 
10768e877caaSKent Overstreet 	s = bch2_bkey_get_mut_typed(trans, &iter,
10778e877caaSKent Overstreet 				    BTREE_ID_snapshots, POS(0, id),
10788e877caaSKent Overstreet 				    0, snapshot);
10798e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
10808e877caaSKent Overstreet 	if (unlikely(ret)) {
10818e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
10828e877caaSKent Overstreet 					trans->c, "missing snapshot %u", id);
10838e877caaSKent Overstreet 		return ret;
10848e877caaSKent Overstreet 	}
10858e877caaSKent Overstreet 
10868e877caaSKent Overstreet 	/* already deleted? */
10878e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(&s->v))
10888e877caaSKent Overstreet 		goto err;
10898e877caaSKent Overstreet 
10908e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_DELETED(&s->v, true);
10918e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
10928e877caaSKent Overstreet 	s->v.subvol = 0;
10938e877caaSKent Overstreet err:
10948e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
10958e877caaSKent Overstreet 	return ret;
10968e877caaSKent Overstreet }
10978e877caaSKent Overstreet 
normalize_snapshot_child_pointers(struct bch_snapshot * s)1098f55d6e07SKent Overstreet static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
1099f55d6e07SKent Overstreet {
1100f55d6e07SKent Overstreet 	if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
1101f55d6e07SKent Overstreet 		swap(s->children[0], s->children[1]);
1102f55d6e07SKent Overstreet }
1103f55d6e07SKent Overstreet 
bch2_snapshot_node_delete(struct btree_trans * trans,u32 id)110496dea3d5SKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
11058e877caaSKent Overstreet {
11068e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
11078e877caaSKent Overstreet 	struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
1108f55d6e07SKent Overstreet 	struct btree_iter c_iter = (struct btree_iter) { NULL };
11098e877caaSKent Overstreet 	struct btree_iter tree_iter = (struct btree_iter) { NULL };
11108e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
1111f55d6e07SKent Overstreet 	u32 parent_id, child_id;
11128e877caaSKent Overstreet 	unsigned i;
11138e877caaSKent Overstreet 	int ret = 0;
11148e877caaSKent Overstreet 
11158e877caaSKent Overstreet 	s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
11165dd8c60eSKent Overstreet 				     BTREE_ITER_intent, snapshot);
11178e877caaSKent Overstreet 	ret = bkey_err(s);
11188e877caaSKent Overstreet 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
11198e877caaSKent Overstreet 				"missing snapshot %u", id);
11208e877caaSKent Overstreet 
11218e877caaSKent Overstreet 	if (ret)
11228e877caaSKent Overstreet 		goto err;
11238e877caaSKent Overstreet 
1124f55d6e07SKent Overstreet 	BUG_ON(s.v->children[1]);
1125f55d6e07SKent Overstreet 
11268e877caaSKent Overstreet 	parent_id = le32_to_cpu(s.v->parent);
1127f55d6e07SKent Overstreet 	child_id = le32_to_cpu(s.v->children[0]);
11288e877caaSKent Overstreet 
11298e877caaSKent Overstreet 	if (parent_id) {
11308e877caaSKent Overstreet 		struct bkey_i_snapshot *parent;
11318e877caaSKent Overstreet 
11328e877caaSKent Overstreet 		parent = bch2_bkey_get_mut_typed(trans, &p_iter,
11338e877caaSKent Overstreet 				     BTREE_ID_snapshots, POS(0, parent_id),
11348e877caaSKent Overstreet 				     0, snapshot);
11358e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(parent);
11368e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
11378e877caaSKent Overstreet 					"missing snapshot %u", parent_id);
1138f55d6e07SKent Overstreet 		if (unlikely(ret))
11398e877caaSKent Overstreet 			goto err;
11408e877caaSKent Overstreet 
1141f55d6e07SKent Overstreet 		/* find entry in parent->children for node being deleted */
11428e877caaSKent Overstreet 		for (i = 0; i < 2; i++)
11438e877caaSKent Overstreet 			if (le32_to_cpu(parent->v.children[i]) == id)
11448e877caaSKent Overstreet 				break;
11458e877caaSKent Overstreet 
1146f55d6e07SKent Overstreet 		if (bch2_fs_inconsistent_on(i == 2, c,
1147f55d6e07SKent Overstreet 					"snapshot %u missing child pointer to %u",
1148f55d6e07SKent Overstreet 					parent_id, id))
1149f55d6e07SKent Overstreet 			goto err;
11508e877caaSKent Overstreet 
11510a11adfbSKent Overstreet 		parent->v.children[i] = cpu_to_le32(child_id);
1152f55d6e07SKent Overstreet 
1153f55d6e07SKent Overstreet 		normalize_snapshot_child_pointers(&parent->v);
1154f55d6e07SKent Overstreet 	}
1155f55d6e07SKent Overstreet 
1156f55d6e07SKent Overstreet 	if (child_id) {
1157f55d6e07SKent Overstreet 		struct bkey_i_snapshot *child;
1158f55d6e07SKent Overstreet 
1159f55d6e07SKent Overstreet 		child = bch2_bkey_get_mut_typed(trans, &c_iter,
1160f55d6e07SKent Overstreet 				     BTREE_ID_snapshots, POS(0, child_id),
1161f55d6e07SKent Overstreet 				     0, snapshot);
1162f55d6e07SKent Overstreet 		ret = PTR_ERR_OR_ZERO(child);
1163f55d6e07SKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
1164f55d6e07SKent Overstreet 					"missing snapshot %u", child_id);
1165f55d6e07SKent Overstreet 		if (unlikely(ret))
1166f55d6e07SKent Overstreet 			goto err;
1167f55d6e07SKent Overstreet 
1168f55d6e07SKent Overstreet 		child->v.parent = cpu_to_le32(parent_id);
1169f55d6e07SKent Overstreet 
1170f55d6e07SKent Overstreet 		if (!child->v.parent) {
1171f55d6e07SKent Overstreet 			child->v.skip[0] = 0;
1172f55d6e07SKent Overstreet 			child->v.skip[1] = 0;
1173f55d6e07SKent Overstreet 			child->v.skip[2] = 0;
1174f55d6e07SKent Overstreet 		}
1175f55d6e07SKent Overstreet 	}
1176f55d6e07SKent Overstreet 
1177f55d6e07SKent Overstreet 	if (!parent_id) {
11788e877caaSKent Overstreet 		/*
11798e877caaSKent Overstreet 		 * We're deleting the root of a snapshot tree: update the
11808e877caaSKent Overstreet 		 * snapshot_tree entry to point to the new root, or delete it if
11818e877caaSKent Overstreet 		 * this is the last snapshot ID in this tree:
11828e877caaSKent Overstreet 		 */
11838e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *s_t;
11848e877caaSKent Overstreet 
11858e877caaSKent Overstreet 		BUG_ON(s.v->children[1]);
11868e877caaSKent Overstreet 
11878e877caaSKent Overstreet 		s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
11888e877caaSKent Overstreet 				BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
11898e877caaSKent Overstreet 				0, snapshot_tree);
11908e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(s_t);
11918e877caaSKent Overstreet 		if (ret)
11928e877caaSKent Overstreet 			goto err;
11938e877caaSKent Overstreet 
11948e877caaSKent Overstreet 		if (s.v->children[0]) {
11958e877caaSKent Overstreet 			s_t->v.root_snapshot = s.v->children[0];
11968e877caaSKent Overstreet 		} else {
11978e877caaSKent Overstreet 			s_t->k.type = KEY_TYPE_deleted;
11988e877caaSKent Overstreet 			set_bkey_val_u64s(&s_t->k, 0);
11998e877caaSKent Overstreet 		}
12008e877caaSKent Overstreet 	}
12018e877caaSKent Overstreet 
12028e877caaSKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
12038e877caaSKent Overstreet err:
12048e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &tree_iter);
12058e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &p_iter);
1206f55d6e07SKent Overstreet 	bch2_trans_iter_exit(trans, &c_iter);
12078e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
12088e877caaSKent Overstreet 	return ret;
12098e877caaSKent Overstreet }
12108e877caaSKent Overstreet 
create_snapids(struct btree_trans * trans,u32 parent,u32 tree,u32 * new_snapids,u32 * snapshot_subvols,unsigned nr_snapids)12118e877caaSKent Overstreet static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
12128e877caaSKent Overstreet 			  u32 *new_snapids,
12138e877caaSKent Overstreet 			  u32 *snapshot_subvols,
12148e877caaSKent Overstreet 			  unsigned nr_snapids)
12158e877caaSKent Overstreet {
12168e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
12178e877caaSKent Overstreet 	struct btree_iter iter;
12188e877caaSKent Overstreet 	struct bkey_i_snapshot *n;
12198e877caaSKent Overstreet 	struct bkey_s_c k;
12208e877caaSKent Overstreet 	unsigned i, j;
12218e877caaSKent Overstreet 	u32 depth = bch2_snapshot_depth(c, parent);
12228e877caaSKent Overstreet 	int ret;
12238e877caaSKent Overstreet 
12248e877caaSKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
12255dd8c60eSKent Overstreet 			     POS_MIN, BTREE_ITER_intent);
12268e877caaSKent Overstreet 	k = bch2_btree_iter_peek(&iter);
12278e877caaSKent Overstreet 	ret = bkey_err(k);
12288e877caaSKent Overstreet 	if (ret)
12298e877caaSKent Overstreet 		goto err;
12308e877caaSKent Overstreet 
12318e877caaSKent Overstreet 	for (i = 0; i < nr_snapids; i++) {
12328e877caaSKent Overstreet 		k = bch2_btree_iter_prev_slot(&iter);
12338e877caaSKent Overstreet 		ret = bkey_err(k);
12348e877caaSKent Overstreet 		if (ret)
12358e877caaSKent Overstreet 			goto err;
12368e877caaSKent Overstreet 
12378e877caaSKent Overstreet 		if (!k.k || !k.k->p.offset) {
12388e877caaSKent Overstreet 			ret = -BCH_ERR_ENOSPC_snapshot_create;
12398e877caaSKent Overstreet 			goto err;
12408e877caaSKent Overstreet 		}
12418e877caaSKent Overstreet 
12428e877caaSKent Overstreet 		n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
12438e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(n);
12448e877caaSKent Overstreet 		if (ret)
12458e877caaSKent Overstreet 			goto err;
12468e877caaSKent Overstreet 
12478e877caaSKent Overstreet 		n->v.flags	= 0;
12488e877caaSKent Overstreet 		n->v.parent	= cpu_to_le32(parent);
12498e877caaSKent Overstreet 		n->v.subvol	= cpu_to_le32(snapshot_subvols[i]);
12508e877caaSKent Overstreet 		n->v.tree	= cpu_to_le32(tree);
12518e877caaSKent Overstreet 		n->v.depth	= cpu_to_le32(depth);
1252d32088f2SKent Overstreet 		n->v.btime.lo	= cpu_to_le64(bch2_current_time(c));
1253d32088f2SKent Overstreet 		n->v.btime.hi	= 0;
12548e877caaSKent Overstreet 
12558e877caaSKent Overstreet 		for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
12568e877caaSKent Overstreet 			n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
12578e877caaSKent Overstreet 
12588e877caaSKent Overstreet 		bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
12598e877caaSKent Overstreet 		SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
12608e877caaSKent Overstreet 
1261ad00bce0SKent Overstreet 		ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
12628e877caaSKent Overstreet 					 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
12638e877caaSKent Overstreet 		if (ret)
12648e877caaSKent Overstreet 			goto err;
12658e877caaSKent Overstreet 
12668e877caaSKent Overstreet 		new_snapids[i]	= iter.pos.offset;
1267eebe8a84SKent Overstreet 
1268eebe8a84SKent Overstreet 		mutex_lock(&c->snapshot_table_lock);
1269eebe8a84SKent Overstreet 		snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
1270eebe8a84SKent Overstreet 		mutex_unlock(&c->snapshot_table_lock);
12718e877caaSKent Overstreet 	}
12728e877caaSKent Overstreet err:
12738e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
12748e877caaSKent Overstreet 	return ret;
12758e877caaSKent Overstreet }
12768e877caaSKent Overstreet 
12778e877caaSKent Overstreet /*
12788e877caaSKent Overstreet  * Create new snapshot IDs as children of an existing snapshot ID:
12798e877caaSKent Overstreet  */
bch2_snapshot_node_create_children(struct btree_trans * trans,u32 parent,u32 * new_snapids,u32 * snapshot_subvols,unsigned nr_snapids)12808e877caaSKent Overstreet static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
12818e877caaSKent Overstreet 			      u32 *new_snapids,
12828e877caaSKent Overstreet 			      u32 *snapshot_subvols,
12838e877caaSKent Overstreet 			      unsigned nr_snapids)
12848e877caaSKent Overstreet {
12858e877caaSKent Overstreet 	struct btree_iter iter;
12868e877caaSKent Overstreet 	struct bkey_i_snapshot *n_parent;
12878e877caaSKent Overstreet 	int ret = 0;
12888e877caaSKent Overstreet 
12898e877caaSKent Overstreet 	n_parent = bch2_bkey_get_mut_typed(trans, &iter,
12908e877caaSKent Overstreet 			BTREE_ID_snapshots, POS(0, parent),
12918e877caaSKent Overstreet 			0, snapshot);
12928e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(n_parent);
12938e877caaSKent Overstreet 	if (unlikely(ret)) {
12948e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
12958e877caaSKent Overstreet 			bch_err(trans->c, "snapshot %u not found", parent);
12968e877caaSKent Overstreet 		return ret;
12978e877caaSKent Overstreet 	}
12988e877caaSKent Overstreet 
12998e877caaSKent Overstreet 	if (n_parent->v.children[0] || n_parent->v.children[1]) {
13008e877caaSKent Overstreet 		bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
13018e877caaSKent Overstreet 		ret = -EINVAL;
13028e877caaSKent Overstreet 		goto err;
13038e877caaSKent Overstreet 	}
13048e877caaSKent Overstreet 
13058e877caaSKent Overstreet 	ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
13068e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
13078e877caaSKent Overstreet 	if (ret)
13088e877caaSKent Overstreet 		goto err;
13098e877caaSKent Overstreet 
13108e877caaSKent Overstreet 	n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
13118e877caaSKent Overstreet 	n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
13128e877caaSKent Overstreet 	n_parent->v.subvol = 0;
13138e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
13148e877caaSKent Overstreet err:
13158e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
13168e877caaSKent Overstreet 	return ret;
13178e877caaSKent Overstreet }
13188e877caaSKent Overstreet 
13198e877caaSKent Overstreet /*
13208e877caaSKent Overstreet  * Create a snapshot node that is the root of a new tree:
13218e877caaSKent Overstreet  */
bch2_snapshot_node_create_tree(struct btree_trans * trans,u32 * new_snapids,u32 * snapshot_subvols,unsigned nr_snapids)13228e877caaSKent Overstreet static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
13238e877caaSKent Overstreet 			      u32 *new_snapids,
13248e877caaSKent Overstreet 			      u32 *snapshot_subvols,
13258e877caaSKent Overstreet 			      unsigned nr_snapids)
13268e877caaSKent Overstreet {
13278e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree;
13288e877caaSKent Overstreet 	int ret;
13298e877caaSKent Overstreet 
13308e877caaSKent Overstreet 	n_tree = __bch2_snapshot_tree_create(trans);
13318e877caaSKent Overstreet 	ret =   PTR_ERR_OR_ZERO(n_tree) ?:
13328e877caaSKent Overstreet 		create_snapids(trans, 0, n_tree->k.p.offset,
13338e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
13348e877caaSKent Overstreet 	if (ret)
13358e877caaSKent Overstreet 		return ret;
13368e877caaSKent Overstreet 
13378e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(snapshot_subvols[0]);
13388e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(new_snapids[0]);
13398e877caaSKent Overstreet 	return 0;
13408e877caaSKent Overstreet }
13418e877caaSKent Overstreet 
bch2_snapshot_node_create(struct btree_trans * trans,u32 parent,u32 * new_snapids,u32 * snapshot_subvols,unsigned nr_snapids)13428e877caaSKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
13438e877caaSKent Overstreet 			      u32 *new_snapids,
13448e877caaSKent Overstreet 			      u32 *snapshot_subvols,
13458e877caaSKent Overstreet 			      unsigned nr_snapids)
13468e877caaSKent Overstreet {
13478e877caaSKent Overstreet 	BUG_ON((parent == 0) != (nr_snapids == 1));
13488e877caaSKent Overstreet 	BUG_ON((parent != 0) != (nr_snapids == 2));
13498e877caaSKent Overstreet 
13508e877caaSKent Overstreet 	return parent
13518e877caaSKent Overstreet 		? bch2_snapshot_node_create_children(trans, parent,
13528e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids)
13538e877caaSKent Overstreet 		: bch2_snapshot_node_create_tree(trans,
13548e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids);
13558e877caaSKent Overstreet 
13568e877caaSKent Overstreet }
13578e877caaSKent Overstreet 
13588e877caaSKent Overstreet /*
13598e877caaSKent Overstreet  * If we have an unlinked inode in an internal snapshot node, and the inode
13608e877caaSKent Overstreet  * really has been deleted in all child snapshots, how does this get cleaned up?
13618e877caaSKent Overstreet  *
13628e877caaSKent Overstreet  * first there is the problem of how keys that have been overwritten in all
13638e877caaSKent Overstreet  * child snapshots get deleted (unimplemented?), but inodes may perhaps be
13648e877caaSKent Overstreet  * special?
13658e877caaSKent Overstreet  *
13668e877caaSKent Overstreet  * also: unlinked inode in internal snapshot appears to not be getting deleted
13678e877caaSKent Overstreet  * correctly if inode doesn't exist in leaf snapshots
1368f55d6e07SKent Overstreet  *
1369f55d6e07SKent Overstreet  * solution:
1370f55d6e07SKent Overstreet  *
1371f55d6e07SKent Overstreet  * for a key in an interior snapshot node that needs work to be done that
1372f55d6e07SKent Overstreet  * requires it to be mutated: iterate over all descendent leaf nodes and copy
1373f55d6e07SKent Overstreet  * that key to snapshot leaf nodes, where we can mutate it
13748e877caaSKent Overstreet  */
13758e877caaSKent Overstreet 
delete_dead_snapshots_process_key(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k,snapshot_id_list * deleted,snapshot_id_list * equiv_seen,struct bpos * last_pos)137682af5cebSKent Overstreet static int delete_dead_snapshots_process_key(struct btree_trans *trans,
13778e877caaSKent Overstreet 			       struct btree_iter *iter,
13788e877caaSKent Overstreet 			       struct bkey_s_c k,
13798e877caaSKent Overstreet 			       snapshot_id_list *deleted,
13808e877caaSKent Overstreet 			       snapshot_id_list *equiv_seen,
13818e877caaSKent Overstreet 			       struct bpos *last_pos)
13828e877caaSKent Overstreet {
138308f50005SKent Overstreet 	int ret = bch2_check_key_has_snapshot(trans, iter, k);
138408f50005SKent Overstreet 	if (ret)
138508f50005SKent Overstreet 		return ret < 0 ? ret : 0;
138608f50005SKent Overstreet 
13878e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
13888e877caaSKent Overstreet 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
138982af5cebSKent Overstreet 	if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
139082af5cebSKent Overstreet 		return 0;
13918e877caaSKent Overstreet 
13928e877caaSKent Overstreet 	if (!bkey_eq(k.k->p, *last_pos))
13938e877caaSKent Overstreet 		equiv_seen->nr = 0;
13948e877caaSKent Overstreet 
139582af5cebSKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.snapshot))
13968e877caaSKent Overstreet 		return bch2_btree_delete_at(trans, iter,
13975dd8c60eSKent Overstreet 					    BTREE_UPDATE_internal_snapshot_node);
13988e877caaSKent Overstreet 
139982af5cebSKent Overstreet 	if (!bpos_eq(*last_pos, k.k->p) &&
140082af5cebSKent Overstreet 	    snapshot_list_has_id(equiv_seen, equiv))
140182af5cebSKent Overstreet 		return bch2_btree_delete_at(trans, iter,
140282af5cebSKent Overstreet 					    BTREE_UPDATE_internal_snapshot_node);
140382af5cebSKent Overstreet 
140482af5cebSKent Overstreet 	*last_pos = k.k->p;
140582af5cebSKent Overstreet 
140608f50005SKent Overstreet 	ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
140782af5cebSKent Overstreet 	if (ret)
140882af5cebSKent Overstreet 		return ret;
1409f55d6e07SKent Overstreet 
1410f55d6e07SKent Overstreet 	/*
1411f55d6e07SKent Overstreet 	 * When we have a linear chain of snapshot nodes, we consider
1412f55d6e07SKent Overstreet 	 * those to form an equivalence class: we're going to collapse
1413f55d6e07SKent Overstreet 	 * them all down to a single node, and keep the leaf-most node -
1414f55d6e07SKent Overstreet 	 * which has the same id as the equivalence class id.
1415f55d6e07SKent Overstreet 	 *
1416f55d6e07SKent Overstreet 	 * If there are multiple keys in different snapshots at the same
1417f55d6e07SKent Overstreet 	 * position, we're only going to keep the one in the newest
141882af5cebSKent Overstreet 	 * snapshot (we delete the others above) - the rest have been
141982af5cebSKent Overstreet 	 * overwritten and are redundant, and for the key we're going to keep we
142082af5cebSKent Overstreet 	 * need to move it to the equivalance class ID if it's not there
142182af5cebSKent Overstreet 	 * already.
1422f55d6e07SKent Overstreet 	 */
1423f55d6e07SKent Overstreet 	if (equiv != k.k->p.snapshot) {
1424f55d6e07SKent Overstreet 		struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
142582af5cebSKent Overstreet 		int ret = PTR_ERR_OR_ZERO(new);
1426f55d6e07SKent Overstreet 		if (ret)
1427f55d6e07SKent Overstreet 			return ret;
1428f55d6e07SKent Overstreet 
1429f55d6e07SKent Overstreet 		new->k.p.snapshot = equiv;
1430f55d6e07SKent Overstreet 
143182af5cebSKent Overstreet 		struct btree_iter new_iter;
1432f55d6e07SKent Overstreet 		bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
14335dd8c60eSKent Overstreet 				     BTREE_ITER_all_snapshots|
14345dd8c60eSKent Overstreet 				     BTREE_ITER_cached|
14355dd8c60eSKent Overstreet 				     BTREE_ITER_intent);
1436f55d6e07SKent Overstreet 
1437f55d6e07SKent Overstreet 		ret =   bch2_btree_iter_traverse(&new_iter) ?:
1438f55d6e07SKent Overstreet 			bch2_trans_update(trans, &new_iter, new,
14395dd8c60eSKent Overstreet 					BTREE_UPDATE_internal_snapshot_node) ?:
1440f55d6e07SKent Overstreet 			bch2_btree_delete_at(trans, iter,
14415dd8c60eSKent Overstreet 					BTREE_UPDATE_internal_snapshot_node);
1442f55d6e07SKent Overstreet 		bch2_trans_iter_exit(trans, &new_iter);
1443f55d6e07SKent Overstreet 		if (ret)
1444f55d6e07SKent Overstreet 			return ret;
1445f55d6e07SKent Overstreet 	}
1446f55d6e07SKent Overstreet 
1447f55d6e07SKent Overstreet 	return 0;
1448f55d6e07SKent Overstreet }
1449f55d6e07SKent Overstreet 
bch2_snapshot_needs_delete(struct btree_trans * trans,struct bkey_s_c k)1450b0b5bbf9SKent Overstreet static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
14518e877caaSKent Overstreet {
14528e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
14538e877caaSKent Overstreet 	u32 children[2];
14548e877caaSKent Overstreet 	int ret;
14558e877caaSKent Overstreet 
14568e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
14578e877caaSKent Overstreet 		return 0;
14588e877caaSKent Overstreet 
14598e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
14608e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
14618e877caaSKent Overstreet 	    BCH_SNAPSHOT_SUBVOL(snap.v))
14628e877caaSKent Overstreet 		return 0;
14638e877caaSKent Overstreet 
14648e877caaSKent Overstreet 	children[0] = le32_to_cpu(snap.v->children[0]);
14658e877caaSKent Overstreet 	children[1] = le32_to_cpu(snap.v->children[1]);
14668e877caaSKent Overstreet 
14678e877caaSKent Overstreet 	ret   = bch2_snapshot_live(trans, children[0]) ?:
14688e877caaSKent Overstreet 		bch2_snapshot_live(trans, children[1]);
14698e877caaSKent Overstreet 	if (ret < 0)
14708e877caaSKent Overstreet 		return ret;
1471b0b5bbf9SKent Overstreet 	return !ret;
1472b0b5bbf9SKent Overstreet }
14738e877caaSKent Overstreet 
1474b0b5bbf9SKent Overstreet /*
1475b0b5bbf9SKent Overstreet  * For a given snapshot, if it doesn't have a subvolume that points to it, and
1476b0b5bbf9SKent Overstreet  * it doesn't have child snapshot nodes - it's now redundant and we can mark it
1477b0b5bbf9SKent Overstreet  * as deleted.
1478b0b5bbf9SKent Overstreet  */
bch2_delete_redundant_snapshot(struct btree_trans * trans,struct bkey_s_c k)1479b0b5bbf9SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
1480b0b5bbf9SKent Overstreet {
1481b0b5bbf9SKent Overstreet 	int ret = bch2_snapshot_needs_delete(trans, k);
1482b0b5bbf9SKent Overstreet 
1483b0b5bbf9SKent Overstreet 	return ret <= 0
1484b0b5bbf9SKent Overstreet 		? ret
1485b0b5bbf9SKent Overstreet 		: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
14868e877caaSKent Overstreet }
14878e877caaSKent Overstreet 
bch2_snapshot_nth_parent_skip(struct bch_fs * c,u32 id,u32 n,snapshot_id_list * skip)1488f55d6e07SKent Overstreet static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
1489f55d6e07SKent Overstreet 						snapshot_id_list *skip)
1490f55d6e07SKent Overstreet {
1491f55d6e07SKent Overstreet 	rcu_read_lock();
14921e2d3999SKent Overstreet 	while (snapshot_list_has_id(skip, id))
14931e2d3999SKent Overstreet 		id = __bch2_snapshot_parent(c, id);
14941e2d3999SKent Overstreet 
1495f55d6e07SKent Overstreet 	while (n--) {
1496f55d6e07SKent Overstreet 		do {
1497f55d6e07SKent Overstreet 			id = __bch2_snapshot_parent(c, id);
1498f55d6e07SKent Overstreet 		} while (snapshot_list_has_id(skip, id));
1499f55d6e07SKent Overstreet 	}
1500f55d6e07SKent Overstreet 	rcu_read_unlock();
1501f55d6e07SKent Overstreet 
1502f55d6e07SKent Overstreet 	return id;
1503f55d6e07SKent Overstreet }
1504f55d6e07SKent Overstreet 
bch2_fix_child_of_deleted_snapshot(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k,snapshot_id_list * deleted)1505f55d6e07SKent Overstreet static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
1506f55d6e07SKent Overstreet 					      struct btree_iter *iter, struct bkey_s_c k,
1507f55d6e07SKent Overstreet 					      snapshot_id_list *deleted)
1508f55d6e07SKent Overstreet {
1509f55d6e07SKent Overstreet 	struct bch_fs *c = trans->c;
1510f55d6e07SKent Overstreet 	u32 nr_deleted_ancestors = 0;
1511f55d6e07SKent Overstreet 	struct bkey_i_snapshot *s;
1512f55d6e07SKent Overstreet 	int ret;
1513f55d6e07SKent Overstreet 
1514f55d6e07SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1515f55d6e07SKent Overstreet 		return 0;
1516f55d6e07SKent Overstreet 
1517f55d6e07SKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.offset))
1518f55d6e07SKent Overstreet 		return 0;
1519f55d6e07SKent Overstreet 
1520f55d6e07SKent Overstreet 	s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
1521f55d6e07SKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
1522f55d6e07SKent Overstreet 	if (ret)
1523f55d6e07SKent Overstreet 		return ret;
1524f55d6e07SKent Overstreet 
1525f55d6e07SKent Overstreet 	darray_for_each(*deleted, i)
1526f55d6e07SKent Overstreet 		nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
1527f55d6e07SKent Overstreet 
1528f55d6e07SKent Overstreet 	if (!nr_deleted_ancestors)
1529f55d6e07SKent Overstreet 		return 0;
1530f55d6e07SKent Overstreet 
1531f55d6e07SKent Overstreet 	le32_add_cpu(&s->v.depth, -nr_deleted_ancestors);
1532f55d6e07SKent Overstreet 
1533f55d6e07SKent Overstreet 	if (!s->v.depth) {
1534f55d6e07SKent Overstreet 		s->v.skip[0] = 0;
1535f55d6e07SKent Overstreet 		s->v.skip[1] = 0;
1536f55d6e07SKent Overstreet 		s->v.skip[2] = 0;
1537f55d6e07SKent Overstreet 	} else {
1538f55d6e07SKent Overstreet 		u32 depth = le32_to_cpu(s->v.depth);
1539f55d6e07SKent Overstreet 		u32 parent = bch2_snapshot_parent(c, s->k.p.offset);
1540f55d6e07SKent Overstreet 
1541f55d6e07SKent Overstreet 		for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
1542f55d6e07SKent Overstreet 			u32 id = le32_to_cpu(s->v.skip[j]);
1543f55d6e07SKent Overstreet 
1544f55d6e07SKent Overstreet 			if (snapshot_list_has_id(deleted, id)) {
15455394fe94SKent Overstreet 				id = bch2_snapshot_nth_parent_skip(c,
1546f55d6e07SKent Overstreet 							parent,
15475394fe94SKent Overstreet 							depth > 1
15485394fe94SKent Overstreet 							? get_random_u32_below(depth - 1)
15495394fe94SKent Overstreet 							: 0,
15505394fe94SKent Overstreet 							deleted);
1551f55d6e07SKent Overstreet 				s->v.skip[j] = cpu_to_le32(id);
1552f55d6e07SKent Overstreet 			}
1553f55d6e07SKent Overstreet 		}
1554f55d6e07SKent Overstreet 
1555f55d6e07SKent Overstreet 		bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32);
1556f55d6e07SKent Overstreet 	}
1557f55d6e07SKent Overstreet 
1558f55d6e07SKent Overstreet 	return bch2_trans_update(trans, iter, &s->k_i, 0);
1559f55d6e07SKent Overstreet }
1560f55d6e07SKent Overstreet 
bch2_delete_dead_snapshots(struct bch_fs * c)15618e877caaSKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c)
15628e877caaSKent Overstreet {
15636bd68ec2SKent Overstreet 	struct btree_trans *trans;
15648e877caaSKent Overstreet 	snapshot_id_list deleted = { 0 };
1565f55d6e07SKent Overstreet 	snapshot_id_list deleted_interior = { 0 };
15668e877caaSKent Overstreet 	int ret = 0;
15678e877caaSKent Overstreet 
15683c471b65SKent Overstreet 	if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
1569b0b5bbf9SKent Overstreet 		return 0;
1570b0b5bbf9SKent Overstreet 
15716bd68ec2SKent Overstreet 	trans = bch2_trans_get(c);
15728e877caaSKent Overstreet 
15738e877caaSKent Overstreet 	/*
15748e877caaSKent Overstreet 	 * For every snapshot node: If we have no live children and it's not
15758e877caaSKent Overstreet 	 * pointed to by a subvolume, delete it:
15768e877caaSKent Overstreet 	 */
15776bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
15788e877caaSKent Overstreet 			POS_MIN, 0, k,
15798e877caaSKent Overstreet 			NULL, NULL, 0,
1580b0b5bbf9SKent Overstreet 		bch2_delete_redundant_snapshot(trans, k));
1581e46c181aSKent Overstreet 	bch_err_msg(c, ret, "deleting redundant snapshots");
1582cf904c8dSKent Overstreet 	if (ret)
15838e877caaSKent Overstreet 		goto err;
15848e877caaSKent Overstreet 
15855028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
15868e877caaSKent Overstreet 				 POS_MIN, 0, k,
15876bd68ec2SKent Overstreet 		bch2_snapshot_set_equiv(trans, k));
1588e46c181aSKent Overstreet 	bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
1589cf904c8dSKent Overstreet 	if (ret)
15908e877caaSKent Overstreet 		goto err;
15918e877caaSKent Overstreet 
15925028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
159327b2df98SKent Overstreet 				 POS_MIN, 0, k, ({
15948e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_snapshot)
15958e877caaSKent Overstreet 			continue;
15968e877caaSKent Overstreet 
159780eab7a7SKent Overstreet 		BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
159827b2df98SKent Overstreet 			? snapshot_list_add(c, &deleted, k.k->p.offset)
159927b2df98SKent Overstreet 			: 0;
160027b2df98SKent Overstreet 	}));
16018e877caaSKent Overstreet 	bch_err_msg(c, ret, "walking snapshots");
1602cf904c8dSKent Overstreet 	if (ret)
16038e877caaSKent Overstreet 		goto err;
16048e877caaSKent Overstreet 
160582af5cebSKent Overstreet 	for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
16068e877caaSKent Overstreet 		struct bpos last_pos = POS_MIN;
16078e877caaSKent Overstreet 		snapshot_id_list equiv_seen = { 0 };
1608f55d6e07SKent Overstreet 		struct disk_reservation res = { 0 };
16098e877caaSKent Overstreet 
161082af5cebSKent Overstreet 		if (!btree_type_has_snapshots(btree))
16112e7acdfbSKent Overstreet 			continue;
16122e7acdfbSKent Overstreet 
16136bd68ec2SKent Overstreet 		ret = for_each_btree_key_commit(trans, iter,
161482af5cebSKent Overstreet 				btree, POS_MIN,
16155dd8c60eSKent Overstreet 				BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
1616cb52d23eSKent Overstreet 				&res, NULL, BCH_TRANS_COMMIT_no_enospc,
161782af5cebSKent Overstreet 			delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
161882af5cebSKent Overstreet 							  &equiv_seen, &last_pos));
16198e877caaSKent Overstreet 
1620f55d6e07SKent Overstreet 		bch2_disk_reservation_put(c, &res);
16218e877caaSKent Overstreet 		darray_exit(&equiv_seen);
16228e877caaSKent Overstreet 
16238e877caaSKent Overstreet 		bch_err_msg(c, ret, "deleting keys from dying snapshots");
1624cf904c8dSKent Overstreet 		if (ret)
16258e877caaSKent Overstreet 			goto err;
16268e877caaSKent Overstreet 	}
16278e877caaSKent Overstreet 
16280dd092bfSKent Overstreet 	bch2_trans_unlock(trans);
162937fad949SKent Overstreet 	down_write(&c->snapshot_create_lock);
163037fad949SKent Overstreet 
16315028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
163227b2df98SKent Overstreet 				 POS_MIN, 0, k, ({
1633f55d6e07SKent Overstreet 		u32 snapshot = k.k->p.offset;
1634f55d6e07SKent Overstreet 		u32 equiv = bch2_snapshot_equiv(c, snapshot);
16358e877caaSKent Overstreet 
163627b2df98SKent Overstreet 		equiv != snapshot
163727b2df98SKent Overstreet 			? snapshot_list_add(c, &deleted_interior, snapshot)
163827b2df98SKent Overstreet 			: 0;
163927b2df98SKent Overstreet 	}));
1640f55d6e07SKent Overstreet 
164127b2df98SKent Overstreet 	bch_err_msg(c, ret, "walking snapshots");
1642cf904c8dSKent Overstreet 	if (ret)
164337fad949SKent Overstreet 		goto err_create_lock;
164437fad949SKent Overstreet 
1645f55d6e07SKent Overstreet 	/*
1646f55d6e07SKent Overstreet 	 * Fixing children of deleted snapshots can't be done completely
1647f55d6e07SKent Overstreet 	 * atomically, if we crash between here and when we delete the interior
1648f55d6e07SKent Overstreet 	 * nodes some depth fields will be off:
1649f55d6e07SKent Overstreet 	 */
16506bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
16515dd8c60eSKent Overstreet 				  BTREE_ITER_intent, k,
1652cb52d23eSKent Overstreet 				  NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
16536bd68ec2SKent Overstreet 		bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
1654f55d6e07SKent Overstreet 	if (ret)
165537fad949SKent Overstreet 		goto err_create_lock;
1656f55d6e07SKent Overstreet 
1657f55d6e07SKent Overstreet 	darray_for_each(deleted, i) {
16586bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
16596bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
1660f55d6e07SKent Overstreet 		bch_err_msg(c, ret, "deleting snapshot %u", *i);
1661cf904c8dSKent Overstreet 		if (ret)
166237fad949SKent Overstreet 			goto err_create_lock;
1663f55d6e07SKent Overstreet 	}
1664f55d6e07SKent Overstreet 
1665f55d6e07SKent Overstreet 	darray_for_each(deleted_interior, i) {
16666bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
16676bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
1668f55d6e07SKent Overstreet 		bch_err_msg(c, ret, "deleting snapshot %u", *i);
1669cf904c8dSKent Overstreet 		if (ret)
167037fad949SKent Overstreet 			goto err_create_lock;
16718e877caaSKent Overstreet 	}
167237fad949SKent Overstreet err_create_lock:
167337fad949SKent Overstreet 	up_write(&c->snapshot_create_lock);
16748e877caaSKent Overstreet err:
1675f55d6e07SKent Overstreet 	darray_exit(&deleted_interior);
16768e877caaSKent Overstreet 	darray_exit(&deleted);
16776bd68ec2SKent Overstreet 	bch2_trans_put(trans);
16788e877caaSKent Overstreet 	bch_err_fn(c, ret);
16798e877caaSKent Overstreet 	return ret;
16808e877caaSKent Overstreet }
16818e877caaSKent Overstreet 
bch2_delete_dead_snapshots_work(struct work_struct * work)16828e877caaSKent Overstreet void bch2_delete_dead_snapshots_work(struct work_struct *work)
16838e877caaSKent Overstreet {
16848e877caaSKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
16858e877caaSKent Overstreet 
16860a2a507dSKent Overstreet 	set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
16870a2a507dSKent Overstreet 
16888e877caaSKent Overstreet 	bch2_delete_dead_snapshots(c);
16898e877caaSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
16908e877caaSKent Overstreet }
16918e877caaSKent Overstreet 
bch2_delete_dead_snapshots_async(struct bch_fs * c)16928e877caaSKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c)
16938e877caaSKent Overstreet {
16948e877caaSKent Overstreet 	if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
16958e877caaSKent Overstreet 	    !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
16968e877caaSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
16978e877caaSKent Overstreet }
16988e877caaSKent Overstreet 
__bch2_key_has_snapshot_overwrites(struct btree_trans * trans,enum btree_id id,struct bpos pos)1699fa5bed37SKent Overstreet int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
1700fa5bed37SKent Overstreet 				       enum btree_id id,
1701fa5bed37SKent Overstreet 				       struct bpos pos)
1702fa5bed37SKent Overstreet {
1703fa5bed37SKent Overstreet 	struct bch_fs *c = trans->c;
1704fa5bed37SKent Overstreet 	struct btree_iter iter;
1705fa5bed37SKent Overstreet 	struct bkey_s_c k;
1706fa5bed37SKent Overstreet 	int ret;
1707fa5bed37SKent Overstreet 
1708fa5bed37SKent Overstreet 	bch2_trans_iter_init(trans, &iter, id, pos,
17095dd8c60eSKent Overstreet 			     BTREE_ITER_not_extents|
17105dd8c60eSKent Overstreet 			     BTREE_ITER_all_snapshots);
1711fa5bed37SKent Overstreet 	while (1) {
1712fa5bed37SKent Overstreet 		k = bch2_btree_iter_prev(&iter);
1713fa5bed37SKent Overstreet 		ret = bkey_err(k);
1714fa5bed37SKent Overstreet 		if (ret)
1715fa5bed37SKent Overstreet 			break;
1716fa5bed37SKent Overstreet 
1717fa5bed37SKent Overstreet 		if (!k.k)
1718fa5bed37SKent Overstreet 			break;
1719fa5bed37SKent Overstreet 
1720fa5bed37SKent Overstreet 		if (!bkey_eq(pos, k.k->p))
1721fa5bed37SKent Overstreet 			break;
1722fa5bed37SKent Overstreet 
1723fa5bed37SKent Overstreet 		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
1724fa5bed37SKent Overstreet 			ret = 1;
1725fa5bed37SKent Overstreet 			break;
1726fa5bed37SKent Overstreet 		}
1727fa5bed37SKent Overstreet 	}
1728fa5bed37SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1729fa5bed37SKent Overstreet 
1730fa5bed37SKent Overstreet 	return ret;
1731fa5bed37SKent Overstreet }
1732fa5bed37SKent Overstreet 
bch2_snapshot_smallest_child(struct bch_fs * c,u32 id)1733a111901fSKent Overstreet static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
1734a111901fSKent Overstreet {
1735a111901fSKent Overstreet 	const struct snapshot_t *s = snapshot_t(c, id);
1736a111901fSKent Overstreet 
1737a111901fSKent Overstreet 	return s->children[1] ?: s->children[0];
1738a111901fSKent Overstreet }
1739a111901fSKent Overstreet 
bch2_snapshot_smallest_descendent(struct bch_fs * c,u32 id)1740a111901fSKent Overstreet static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
1741a111901fSKent Overstreet {
1742a111901fSKent Overstreet 	u32 child;
1743a111901fSKent Overstreet 
1744a111901fSKent Overstreet 	while ((child = bch2_snapshot_smallest_child(c, id)))
1745a111901fSKent Overstreet 		id = child;
1746a111901fSKent Overstreet 	return id;
1747a111901fSKent Overstreet }
1748a111901fSKent Overstreet 
bch2_propagate_key_to_snapshot_leaf(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c interior_k,u32 leaf_id,struct bpos * new_min_pos)1749a111901fSKent Overstreet static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
1750a111901fSKent Overstreet 					       enum btree_id btree,
1751a111901fSKent Overstreet 					       struct bkey_s_c interior_k,
1752a111901fSKent Overstreet 					       u32 leaf_id, struct bpos *new_min_pos)
1753a111901fSKent Overstreet {
1754a111901fSKent Overstreet 	struct btree_iter iter;
1755a111901fSKent Overstreet 	struct bpos pos = interior_k.k->p;
1756a111901fSKent Overstreet 	struct bkey_s_c k;
1757a111901fSKent Overstreet 	struct bkey_i *new;
1758a111901fSKent Overstreet 	int ret;
1759a111901fSKent Overstreet 
1760a111901fSKent Overstreet 	pos.snapshot = leaf_id;
1761a111901fSKent Overstreet 
17625dd8c60eSKent Overstreet 	bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent);
1763a111901fSKent Overstreet 	k = bch2_btree_iter_peek_slot(&iter);
1764a111901fSKent Overstreet 	ret = bkey_err(k);
1765a111901fSKent Overstreet 	if (ret)
1766a111901fSKent Overstreet 		goto out;
1767a111901fSKent Overstreet 
1768a111901fSKent Overstreet 	/* key already overwritten in this snapshot? */
1769a111901fSKent Overstreet 	if (k.k->p.snapshot != interior_k.k->p.snapshot)
1770a111901fSKent Overstreet 		goto out;
1771a111901fSKent Overstreet 
1772a111901fSKent Overstreet 	if (bpos_eq(*new_min_pos, POS_MIN)) {
1773a111901fSKent Overstreet 		*new_min_pos = k.k->p;
1774a111901fSKent Overstreet 		new_min_pos->snapshot = leaf_id;
1775a111901fSKent Overstreet 	}
1776a111901fSKent Overstreet 
1777a111901fSKent Overstreet 	new = bch2_bkey_make_mut_noupdate(trans, interior_k);
1778a111901fSKent Overstreet 	ret = PTR_ERR_OR_ZERO(new);
1779a111901fSKent Overstreet 	if (ret)
1780a111901fSKent Overstreet 		goto out;
1781a111901fSKent Overstreet 
1782a111901fSKent Overstreet 	new->k.p.snapshot = leaf_id;
1783a111901fSKent Overstreet 	ret = bch2_trans_update(trans, &iter, new, 0);
1784a111901fSKent Overstreet out:
1785a111901fSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1786a111901fSKent Overstreet 	return ret;
1787a111901fSKent Overstreet }
1788a111901fSKent Overstreet 
bch2_propagate_key_to_snapshot_leaves(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c k,struct bpos * new_min_pos)1789a111901fSKent Overstreet int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
1790a111901fSKent Overstreet 					  enum btree_id btree,
1791a111901fSKent Overstreet 					  struct bkey_s_c k,
1792a111901fSKent Overstreet 					  struct bpos *new_min_pos)
1793a111901fSKent Overstreet {
1794a111901fSKent Overstreet 	struct bch_fs *c = trans->c;
1795a111901fSKent Overstreet 	struct bkey_buf sk;
1796c872afa2SKent Overstreet 	u32 restart_count = trans->restart_count;
1797d04fdf5cSKent Overstreet 	int ret = 0;
1798a111901fSKent Overstreet 
1799a111901fSKent Overstreet 	bch2_bkey_buf_init(&sk);
1800a111901fSKent Overstreet 	bch2_bkey_buf_reassemble(&sk, c, k);
1801a111901fSKent Overstreet 	k = bkey_i_to_s_c(sk.k);
1802a111901fSKent Overstreet 
1803a111901fSKent Overstreet 	*new_min_pos = POS_MIN;
1804a111901fSKent Overstreet 
1805a111901fSKent Overstreet 	for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
1806a111901fSKent Overstreet 	     id < k.k->p.snapshot;
1807a111901fSKent Overstreet 	     id++) {
1808a111901fSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
1809a111901fSKent Overstreet 		    !bch2_snapshot_is_leaf(c, id))
1810a111901fSKent Overstreet 			continue;
1811d281701bSKent Overstreet again:
1812d281701bSKent Overstreet 		ret =   btree_trans_too_many_iters(trans) ?:
1813d281701bSKent Overstreet 			bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
1814d281701bSKent Overstreet 			bch2_trans_commit(trans, NULL, NULL, 0);
1815d281701bSKent Overstreet 		if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
1816d281701bSKent Overstreet 			bch2_trans_begin(trans);
1817d281701bSKent Overstreet 			goto again;
1818d281701bSKent Overstreet 		}
1819a111901fSKent Overstreet 
1820a111901fSKent Overstreet 		if (ret)
1821a111901fSKent Overstreet 			break;
1822a111901fSKent Overstreet 	}
1823a111901fSKent Overstreet 
1824a111901fSKent Overstreet 	bch2_bkey_buf_exit(&sk, c);
1825c872afa2SKent Overstreet 
1826c872afa2SKent Overstreet 	return ret ?: trans_was_restarted(trans, restart_count);
1827a111901fSKent Overstreet }
1828a111901fSKent Overstreet 
bch2_check_snapshot_needs_deletion(struct btree_trans * trans,struct bkey_s_c k)1829b0b5bbf9SKent Overstreet static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
1830b0b5bbf9SKent Overstreet {
1831b0b5bbf9SKent Overstreet 	struct bch_fs *c = trans->c;
1832b0b5bbf9SKent Overstreet 	struct bkey_s_c_snapshot snap;
1833b0b5bbf9SKent Overstreet 	int ret = 0;
1834b0b5bbf9SKent Overstreet 
1835b0b5bbf9SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1836b0b5bbf9SKent Overstreet 		return 0;
1837b0b5bbf9SKent Overstreet 
1838b0b5bbf9SKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
1839b0b5bbf9SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
1840b0b5bbf9SKent Overstreet 	    bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
1841b0b5bbf9SKent Overstreet 	    (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
18423c471b65SKent Overstreet 		set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
1843b0b5bbf9SKent Overstreet 		return 0;
1844b0b5bbf9SKent Overstreet 	}
1845b0b5bbf9SKent Overstreet 
1846b0b5bbf9SKent Overstreet 	return ret;
1847b0b5bbf9SKent Overstreet }
1848b0b5bbf9SKent Overstreet 
bch2_snapshots_read(struct bch_fs * c)18498e877caaSKent Overstreet int bch2_snapshots_read(struct bch_fs *c)
18508e877caaSKent Overstreet {
185180eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
18525028b907SKent Overstreet 		for_each_btree_key(trans, iter, BTREE_ID_snapshots,
18538e877caaSKent Overstreet 				   POS_MIN, 0, k,
1854ad00bce0SKent Overstreet 			__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
1855b0b5bbf9SKent Overstreet 			bch2_snapshot_set_equiv(trans, k) ?:
1856b0b5bbf9SKent Overstreet 			bch2_check_snapshot_needs_deletion(trans, k)) ?:
18575028b907SKent Overstreet 		for_each_btree_key(trans, iter, BTREE_ID_snapshots,
185866487c54SKent Overstreet 				   POS_MIN, 0, k,
185966487c54SKent Overstreet 			   (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
18608e877caaSKent Overstreet 	bch_err_fn(c, ret);
1861a292be3bSKent Overstreet 
1862a292be3bSKent Overstreet 	/*
1863a292be3bSKent Overstreet 	 * It's important that we check if we need to reconstruct snapshots
1864a292be3bSKent Overstreet 	 * before going RW, so we mark that pass as required in the superblock -
1865a292be3bSKent Overstreet 	 * otherwise, we could end up deleting keys with missing snapshot nodes
1866a292be3bSKent Overstreet 	 * instead
1867a292be3bSKent Overstreet 	 */
1868a292be3bSKent Overstreet 	BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
1869a292be3bSKent Overstreet 	       test_bit(BCH_FS_may_go_rw, &c->flags));
1870a292be3bSKent Overstreet 
1871a292be3bSKent Overstreet 	if (bch2_err_matches(ret, EIO) ||
1872a292be3bSKent Overstreet 	    (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
1873a292be3bSKent Overstreet 		ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
1874a292be3bSKent Overstreet 
18758e877caaSKent Overstreet 	return ret;
18768e877caaSKent Overstreet }
18778e877caaSKent Overstreet 
bch2_fs_snapshots_exit(struct bch_fs * c)18788e877caaSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c)
18798e877caaSKent Overstreet {
1880369acf97SSu Yue 	kvfree(rcu_dereference_protected(c->snapshots, true));
18818e877caaSKent Overstreet }
1882