xref: /linux/fs/bcachefs/snapshot.c (revision 13c1e583f9179ad7953dc71ebb2f12e613b9d052)
18e877caaSKent Overstreet // SPDX-License-Identifier: GPL-2.0
28e877caaSKent Overstreet 
38e877caaSKent Overstreet #include "bcachefs.h"
4a111901fSKent Overstreet #include "bkey_buf.h"
58e877caaSKent Overstreet #include "btree_key_cache.h"
68e877caaSKent Overstreet #include "btree_update.h"
7f55d6e07SKent Overstreet #include "buckets.h"
88e877caaSKent Overstreet #include "errcode.h"
98e877caaSKent Overstreet #include "error.h"
108e877caaSKent Overstreet #include "fs.h"
118e877caaSKent Overstreet #include "snapshot.h"
128e877caaSKent Overstreet 
138e877caaSKent Overstreet #include <linux/random.h>
148e877caaSKent Overstreet 
158e877caaSKent Overstreet /*
168e877caaSKent Overstreet  * Snapshot trees:
178e877caaSKent Overstreet  *
188e877caaSKent Overstreet  * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
198e877caaSKent Overstreet  * exist to provide a stable identifier for the whole lifetime of a snapshot
208e877caaSKent Overstreet  * tree.
218e877caaSKent Overstreet  */
228e877caaSKent Overstreet 
238e877caaSKent Overstreet void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
248e877caaSKent Overstreet 				struct bkey_s_c k)
258e877caaSKent Overstreet {
268e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
278e877caaSKent Overstreet 
288e877caaSKent Overstreet 	prt_printf(out, "subvol %u root snapshot %u",
298e877caaSKent Overstreet 		   le32_to_cpu(t.v->master_subvol),
308e877caaSKent Overstreet 		   le32_to_cpu(t.v->root_snapshot));
318e877caaSKent Overstreet }
328e877caaSKent Overstreet 
33b65db750SKent Overstreet int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
348e877caaSKent Overstreet 			       enum bkey_invalid_flags flags,
358e877caaSKent Overstreet 			       struct printbuf *err)
368e877caaSKent Overstreet {
37b65db750SKent Overstreet 	int ret = 0;
388e877caaSKent Overstreet 
39b65db750SKent Overstreet 	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
40b65db750SKent Overstreet 			 bkey_lt(k.k->p, POS(0, 1)), c, err,
41b65db750SKent Overstreet 			 snapshot_tree_pos_bad,
42b65db750SKent Overstreet 			 "bad pos");
43b65db750SKent Overstreet fsck_err:
44b65db750SKent Overstreet 	return ret;
458e877caaSKent Overstreet }
468e877caaSKent Overstreet 
478e877caaSKent Overstreet int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
488e877caaSKent Overstreet 			      struct bch_snapshot_tree *s)
498e877caaSKent Overstreet {
508e877caaSKent Overstreet 	int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
518e877caaSKent Overstreet 					  BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
528e877caaSKent Overstreet 
538e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
548e877caaSKent Overstreet 		ret = -BCH_ERR_ENOENT_snapshot_tree;
558e877caaSKent Overstreet 	return ret;
568e877caaSKent Overstreet }
578e877caaSKent Overstreet 
588e877caaSKent Overstreet struct bkey_i_snapshot_tree *
598e877caaSKent Overstreet __bch2_snapshot_tree_create(struct btree_trans *trans)
608e877caaSKent Overstreet {
618e877caaSKent Overstreet 	struct btree_iter iter;
628e877caaSKent Overstreet 	int ret = bch2_bkey_get_empty_slot(trans, &iter,
638e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS(0, U32_MAX));
648e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *s_t;
658e877caaSKent Overstreet 
668e877caaSKent Overstreet 	if (ret == -BCH_ERR_ENOSPC_btree_slot)
678e877caaSKent Overstreet 		ret = -BCH_ERR_ENOSPC_snapshot_tree;
688e877caaSKent Overstreet 	if (ret)
698e877caaSKent Overstreet 		return ERR_PTR(ret);
708e877caaSKent Overstreet 
718e877caaSKent Overstreet 	s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
728e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s_t);
738e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
748e877caaSKent Overstreet 	return ret ? ERR_PTR(ret) : s_t;
758e877caaSKent Overstreet }
768e877caaSKent Overstreet 
778e877caaSKent Overstreet static int bch2_snapshot_tree_create(struct btree_trans *trans,
788e877caaSKent Overstreet 				u32 root_id, u32 subvol_id, u32 *tree_id)
798e877caaSKent Overstreet {
808e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree =
818e877caaSKent Overstreet 		__bch2_snapshot_tree_create(trans);
828e877caaSKent Overstreet 
838e877caaSKent Overstreet 	if (IS_ERR(n_tree))
848e877caaSKent Overstreet 		return PTR_ERR(n_tree);
858e877caaSKent Overstreet 
868e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(subvol_id);
878e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(root_id);
888e877caaSKent Overstreet 	*tree_id = n_tree->k.p.offset;
898e877caaSKent Overstreet 	return 0;
908e877caaSKent Overstreet }
918e877caaSKent Overstreet 
928e877caaSKent Overstreet /* Snapshot nodes: */
938e877caaSKent Overstreet 
941c31b83aSKent Overstreet static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
9566487c54SKent Overstreet {
96ec9cc18fSKent Overstreet 	while (id && id < ancestor) {
97ec9cc18fSKent Overstreet 		const struct snapshot_t *s = __snapshot_t(t, id);
98ec9cc18fSKent Overstreet 		id = s ? s->parent : 0;
99ec9cc18fSKent Overstreet 	}
1001c31b83aSKent Overstreet 	return id == ancestor;
1011c31b83aSKent Overstreet }
1021c31b83aSKent Overstreet 
1031c31b83aSKent Overstreet static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
1041c31b83aSKent Overstreet {
1051c31b83aSKent Overstreet 	rcu_read_lock();
1061c31b83aSKent Overstreet 	bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
10766487c54SKent Overstreet 	rcu_read_unlock();
10866487c54SKent Overstreet 
1091c31b83aSKent Overstreet 	return ret;
11066487c54SKent Overstreet }
11166487c54SKent Overstreet 
1128e877caaSKent Overstreet static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
1138e877caaSKent Overstreet {
1148e877caaSKent Overstreet 	const struct snapshot_t *s = __snapshot_t(t, id);
115ec9cc18fSKent Overstreet 	if (!s)
116ec9cc18fSKent Overstreet 		return 0;
1178e877caaSKent Overstreet 
1188e877caaSKent Overstreet 	if (s->skip[2] <= ancestor)
1198e877caaSKent Overstreet 		return s->skip[2];
1208e877caaSKent Overstreet 	if (s->skip[1] <= ancestor)
1218e877caaSKent Overstreet 		return s->skip[1];
1228e877caaSKent Overstreet 	if (s->skip[0] <= ancestor)
1238e877caaSKent Overstreet 		return s->skip[0];
1248e877caaSKent Overstreet 	return s->parent;
1258e877caaSKent Overstreet }
1268e877caaSKent Overstreet 
1278e877caaSKent Overstreet bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
1288e877caaSKent Overstreet {
1298e877caaSKent Overstreet 	bool ret;
1308e877caaSKent Overstreet 
1318e877caaSKent Overstreet 	rcu_read_lock();
1321c31b83aSKent Overstreet 	struct snapshot_table *t = rcu_dereference(c->snapshots);
1331c31b83aSKent Overstreet 
134*13c1e583SKent Overstreet 	if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
1351c31b83aSKent Overstreet 		ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
1361c31b83aSKent Overstreet 		goto out;
1371c31b83aSKent Overstreet 	}
1388e877caaSKent Overstreet 
1398e877caaSKent Overstreet 	while (id && id < ancestor - IS_ANCESTOR_BITMAP)
1408e877caaSKent Overstreet 		id = get_ancestor_below(t, id, ancestor);
1418e877caaSKent Overstreet 
14266487c54SKent Overstreet 	if (id && id < ancestor) {
14366487c54SKent Overstreet 		ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
14466487c54SKent Overstreet 
1451c31b83aSKent Overstreet 		EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
14666487c54SKent Overstreet 	} else {
14766487c54SKent Overstreet 		ret = id == ancestor;
14866487c54SKent Overstreet 	}
1491c31b83aSKent Overstreet out:
1508e877caaSKent Overstreet 	rcu_read_unlock();
1518e877caaSKent Overstreet 
1528e877caaSKent Overstreet 	return ret;
1538e877caaSKent Overstreet }
1548e877caaSKent Overstreet 
1558e877caaSKent Overstreet static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
1568e877caaSKent Overstreet {
1578e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
1588e877caaSKent Overstreet 	struct snapshot_table *new, *old;
1598e877caaSKent Overstreet 
16063332394SKent Overstreet 	size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
16163332394SKent Overstreet 	size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
1628e877caaSKent Overstreet 
16363332394SKent Overstreet 	new = kvzalloc(new_bytes, GFP_KERNEL);
1648e877caaSKent Overstreet 	if (!new)
1658e877caaSKent Overstreet 		return NULL;
1668e877caaSKent Overstreet 
16763332394SKent Overstreet 	new->nr = new_size;
16863332394SKent Overstreet 
1698e877caaSKent Overstreet 	old = rcu_dereference_protected(c->snapshots, true);
1708e877caaSKent Overstreet 	if (old)
17163332394SKent Overstreet 		memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
1728e877caaSKent Overstreet 
1738e877caaSKent Overstreet 	rcu_assign_pointer(c->snapshots, new);
17463332394SKent Overstreet 	kvfree_rcu(old, rcu);
1758e877caaSKent Overstreet 
17663332394SKent Overstreet 	return &rcu_dereference_protected(c->snapshots,
17763332394SKent Overstreet 				lockdep_is_held(&c->snapshot_table_lock))->s[idx];
1788e877caaSKent Overstreet }
1798e877caaSKent Overstreet 
1808e877caaSKent Overstreet static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
1818e877caaSKent Overstreet {
1828e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
18363332394SKent Overstreet 	struct snapshot_table *table =
18463332394SKent Overstreet 		rcu_dereference_protected(c->snapshots,
18563332394SKent Overstreet 				lockdep_is_held(&c->snapshot_table_lock));
1868e877caaSKent Overstreet 
1878e877caaSKent Overstreet 	lockdep_assert_held(&c->snapshot_table_lock);
1888e877caaSKent Overstreet 
18963332394SKent Overstreet 	if (likely(table && idx < table->nr))
19063332394SKent Overstreet 		return &table->s[idx];
1918e877caaSKent Overstreet 
1928e877caaSKent Overstreet 	return __snapshot_t_mut(c, id);
1938e877caaSKent Overstreet }
1948e877caaSKent Overstreet 
1958e877caaSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
1968e877caaSKent Overstreet 			   struct bkey_s_c k)
1978e877caaSKent Overstreet {
1988e877caaSKent Overstreet 	struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
1998e877caaSKent Overstreet 
2008e877caaSKent Overstreet 	prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
2018e877caaSKent Overstreet 	       BCH_SNAPSHOT_SUBVOL(s.v),
2028e877caaSKent Overstreet 	       BCH_SNAPSHOT_DELETED(s.v),
2038e877caaSKent Overstreet 	       le32_to_cpu(s.v->parent),
2048e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[0]),
2058e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[1]),
2068e877caaSKent Overstreet 	       le32_to_cpu(s.v->subvol),
2078e877caaSKent Overstreet 	       le32_to_cpu(s.v->tree));
2088e877caaSKent Overstreet 
2098e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
2108e877caaSKent Overstreet 		prt_printf(out, " depth %u skiplist %u %u %u",
2118e877caaSKent Overstreet 			   le32_to_cpu(s.v->depth),
2128e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[0]),
2138e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[1]),
2148e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[2]));
2158e877caaSKent Overstreet }
2168e877caaSKent Overstreet 
217b65db750SKent Overstreet int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
2188e877caaSKent Overstreet 			  enum bkey_invalid_flags flags,
2198e877caaSKent Overstreet 			  struct printbuf *err)
2208e877caaSKent Overstreet {
2218e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
2228e877caaSKent Overstreet 	u32 i, id;
223b65db750SKent Overstreet 	int ret = 0;
2248e877caaSKent Overstreet 
225b65db750SKent Overstreet 	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
226b65db750SKent Overstreet 			 bkey_lt(k.k->p, POS(0, 1)), c, err,
227b65db750SKent Overstreet 			 snapshot_pos_bad,
228b65db750SKent Overstreet 			 "bad pos");
2298e877caaSKent Overstreet 
2308e877caaSKent Overstreet 	s = bkey_s_c_to_snapshot(k);
2318e877caaSKent Overstreet 
2328e877caaSKent Overstreet 	id = le32_to_cpu(s.v->parent);
233b65db750SKent Overstreet 	bkey_fsck_err_on(id && id <= k.k->p.offset, c, err,
234b65db750SKent Overstreet 			 snapshot_parent_bad,
235b65db750SKent Overstreet 			 "bad parent node (%u <= %llu)",
2368e877caaSKent Overstreet 			 id, k.k->p.offset);
2378e877caaSKent Overstreet 
238b65db750SKent Overstreet 	bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err,
239b65db750SKent Overstreet 			 snapshot_children_not_normalized,
240b65db750SKent Overstreet 			 "children not normalized");
2418e877caaSKent Overstreet 
242b65db750SKent Overstreet 	bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err,
243b65db750SKent Overstreet 			 snapshot_child_duplicate,
244b65db750SKent Overstreet 			 "duplicate child nodes");
2458e877caaSKent Overstreet 
2468e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
2478e877caaSKent Overstreet 		id = le32_to_cpu(s.v->children[i]);
2488e877caaSKent Overstreet 
249b65db750SKent Overstreet 		bkey_fsck_err_on(id >= k.k->p.offset, c, err,
250b65db750SKent Overstreet 				 snapshot_child_bad,
251b65db750SKent Overstreet 				 "bad child node (%u >= %llu)",
2528e877caaSKent Overstreet 				 id, k.k->p.offset);
2538e877caaSKent Overstreet 	}
2548e877caaSKent Overstreet 
2558e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
256b65db750SKent Overstreet 		bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
257b65db750SKent Overstreet 				 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err,
258b65db750SKent Overstreet 				 snapshot_skiplist_not_normalized,
259b65db750SKent Overstreet 				 "skiplist not normalized");
2608e877caaSKent Overstreet 
2618e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
2628e877caaSKent Overstreet 			id = le32_to_cpu(s.v->skip[i]);
2638e877caaSKent Overstreet 
264b65db750SKent Overstreet 			bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err,
265b65db750SKent Overstreet 					 snapshot_skiplist_bad,
266b65db750SKent Overstreet 					 "bad skiplist node %u", id);
2678e877caaSKent Overstreet 		}
2688e877caaSKent Overstreet 	}
269b65db750SKent Overstreet fsck_err:
270b65db750SKent Overstreet 	return ret;
2718e877caaSKent Overstreet }
2728e877caaSKent Overstreet 
27366487c54SKent Overstreet static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
27466487c54SKent Overstreet {
27566487c54SKent Overstreet 	struct snapshot_t *t = snapshot_t_mut(c, id);
27666487c54SKent Overstreet 	u32 parent = id;
27766487c54SKent Overstreet 
27866487c54SKent Overstreet 	while ((parent = bch2_snapshot_parent_early(c, parent)) &&
27966487c54SKent Overstreet 	       parent - id - 1 < IS_ANCESTOR_BITMAP)
28066487c54SKent Overstreet 		__set_bit(parent - id - 1, t->is_ancestor);
28166487c54SKent Overstreet }
28266487c54SKent Overstreet 
28366487c54SKent Overstreet static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
28466487c54SKent Overstreet {
28566487c54SKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
28666487c54SKent Overstreet 	__set_is_ancestor_bitmap(c, id);
28766487c54SKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
28866487c54SKent Overstreet }
28966487c54SKent Overstreet 
290ad00bce0SKent Overstreet static int __bch2_mark_snapshot(struct btree_trans *trans,
2918e877caaSKent Overstreet 		       enum btree_id btree, unsigned level,
2928e877caaSKent Overstreet 		       struct bkey_s_c old, struct bkey_s_c new,
2938e877caaSKent Overstreet 		       unsigned flags)
2948e877caaSKent Overstreet {
2958e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
2968e877caaSKent Overstreet 	struct snapshot_t *t;
2978e877caaSKent Overstreet 	u32 id = new.k->p.offset;
2988e877caaSKent Overstreet 	int ret = 0;
2998e877caaSKent Overstreet 
3008e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
3018e877caaSKent Overstreet 
3028e877caaSKent Overstreet 	t = snapshot_t_mut(c, id);
3038e877caaSKent Overstreet 	if (!t) {
3048e877caaSKent Overstreet 		ret = -BCH_ERR_ENOMEM_mark_snapshot;
3058e877caaSKent Overstreet 		goto err;
3068e877caaSKent Overstreet 	}
3078e877caaSKent Overstreet 
3088e877caaSKent Overstreet 	if (new.k->type == KEY_TYPE_snapshot) {
3098e877caaSKent Overstreet 		struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
3108e877caaSKent Overstreet 
3118e877caaSKent Overstreet 		t->parent	= le32_to_cpu(s.v->parent);
3128e877caaSKent Overstreet 		t->children[0]	= le32_to_cpu(s.v->children[0]);
3138e877caaSKent Overstreet 		t->children[1]	= le32_to_cpu(s.v->children[1]);
3148e877caaSKent Overstreet 		t->subvol	= BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
3158e877caaSKent Overstreet 		t->tree		= le32_to_cpu(s.v->tree);
3168e877caaSKent Overstreet 
3178e877caaSKent Overstreet 		if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
3188e877caaSKent Overstreet 			t->depth	= le32_to_cpu(s.v->depth);
3198e877caaSKent Overstreet 			t->skip[0]	= le32_to_cpu(s.v->skip[0]);
3208e877caaSKent Overstreet 			t->skip[1]	= le32_to_cpu(s.v->skip[1]);
3218e877caaSKent Overstreet 			t->skip[2]	= le32_to_cpu(s.v->skip[2]);
3228e877caaSKent Overstreet 		} else {
3238e877caaSKent Overstreet 			t->depth	= 0;
3248e877caaSKent Overstreet 			t->skip[0]	= 0;
3258e877caaSKent Overstreet 			t->skip[1]	= 0;
3268e877caaSKent Overstreet 			t->skip[2]	= 0;
3278e877caaSKent Overstreet 		}
3288e877caaSKent Overstreet 
32966487c54SKent Overstreet 		__set_is_ancestor_bitmap(c, id);
3308e877caaSKent Overstreet 
3318e877caaSKent Overstreet 		if (BCH_SNAPSHOT_DELETED(s.v)) {
3323c471b65SKent Overstreet 			set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
333b0b5bbf9SKent Overstreet 			if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
334b0b5bbf9SKent Overstreet 				bch2_delete_dead_snapshots_async(c);
3358e877caaSKent Overstreet 		}
3368e877caaSKent Overstreet 	} else {
3378e877caaSKent Overstreet 		memset(t, 0, sizeof(*t));
3388e877caaSKent Overstreet 	}
3398e877caaSKent Overstreet err:
3408e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
3418e877caaSKent Overstreet 	return ret;
3428e877caaSKent Overstreet }
3438e877caaSKent Overstreet 
344ad00bce0SKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans,
345ad00bce0SKent Overstreet 		       enum btree_id btree, unsigned level,
346ad00bce0SKent Overstreet 		       struct bkey_s_c old, struct bkey_s new,
347ad00bce0SKent Overstreet 		       unsigned flags)
348ad00bce0SKent Overstreet {
349ad00bce0SKent Overstreet 	return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
350ad00bce0SKent Overstreet }
351ad00bce0SKent Overstreet 
3528e877caaSKent Overstreet int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
3538e877caaSKent Overstreet 			 struct bch_snapshot *s)
3548e877caaSKent Overstreet {
3558e877caaSKent Overstreet 	return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
3568e877caaSKent Overstreet 				       BTREE_ITER_WITH_UPDATES, snapshot, s);
3578e877caaSKent Overstreet }
3588e877caaSKent Overstreet 
359eebe8a84SKent Overstreet static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
3608e877caaSKent Overstreet {
3618e877caaSKent Overstreet 	struct bch_snapshot v;
3628e877caaSKent Overstreet 	int ret;
3638e877caaSKent Overstreet 
3648e877caaSKent Overstreet 	if (!id)
3658e877caaSKent Overstreet 		return 0;
3668e877caaSKent Overstreet 
3678e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, id, &v);
3688e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
3698e877caaSKent Overstreet 		bch_err(trans->c, "snapshot node %u not found", id);
3708e877caaSKent Overstreet 	if (ret)
3718e877caaSKent Overstreet 		return ret;
3728e877caaSKent Overstreet 
3738e877caaSKent Overstreet 	return !BCH_SNAPSHOT_DELETED(&v);
3748e877caaSKent Overstreet }
3758e877caaSKent Overstreet 
3768e877caaSKent Overstreet /*
3778e877caaSKent Overstreet  * If @k is a snapshot with just one live child, it's part of a linear chain,
3788e877caaSKent Overstreet  * which we consider to be an equivalence class: and then after snapshot
3798e877caaSKent Overstreet  * deletion cleanup, there should only be a single key at a given position in
3808e877caaSKent Overstreet  * this equivalence class.
3818e877caaSKent Overstreet  *
3828e877caaSKent Overstreet  * This sets the equivalence class of @k to be the child's equivalence class, if
3838e877caaSKent Overstreet  * it's part of such a linear chain: this correctly sets equivalence classes on
3848e877caaSKent Overstreet  * startup if we run leaf to root (i.e. in natural key order).
3858e877caaSKent Overstreet  */
386eebe8a84SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
3878e877caaSKent Overstreet {
3888e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
3898e877caaSKent Overstreet 	unsigned i, nr_live = 0, live_idx = 0;
3908e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
3918e877caaSKent Overstreet 	u32 id = k.k->p.offset, child[2];
3928e877caaSKent Overstreet 
3938e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
3948e877caaSKent Overstreet 		return 0;
3958e877caaSKent Overstreet 
3968e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
3978e877caaSKent Overstreet 
3988e877caaSKent Overstreet 	child[0] = le32_to_cpu(snap.v->children[0]);
3998e877caaSKent Overstreet 	child[1] = le32_to_cpu(snap.v->children[1]);
4008e877caaSKent Overstreet 
4018e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
4028e877caaSKent Overstreet 		int ret = bch2_snapshot_live(trans, child[i]);
4038e877caaSKent Overstreet 
4048e877caaSKent Overstreet 		if (ret < 0)
4058e877caaSKent Overstreet 			return ret;
4068e877caaSKent Overstreet 
4078e877caaSKent Overstreet 		if (ret)
4088e877caaSKent Overstreet 			live_idx = i;
4098e877caaSKent Overstreet 		nr_live += ret;
4108e877caaSKent Overstreet 	}
4118e877caaSKent Overstreet 
4128e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
4138e877caaSKent Overstreet 
4148e877caaSKent Overstreet 	snapshot_t_mut(c, id)->equiv = nr_live == 1
4158e877caaSKent Overstreet 		? snapshot_t_mut(c, child[live_idx])->equiv
4168e877caaSKent Overstreet 		: id;
4178e877caaSKent Overstreet 
4188e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
4198e877caaSKent Overstreet 
4208e877caaSKent Overstreet 	return 0;
4218e877caaSKent Overstreet }
4228e877caaSKent Overstreet 
4238e877caaSKent Overstreet /* fsck: */
4248e877caaSKent Overstreet 
4258e877caaSKent Overstreet static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
4268e877caaSKent Overstreet {
4278e877caaSKent Overstreet 	return snapshot_t(c, id)->children[child];
4288e877caaSKent Overstreet }
4298e877caaSKent Overstreet 
4308e877caaSKent Overstreet static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
4318e877caaSKent Overstreet {
4328e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 0);
4338e877caaSKent Overstreet }
4348e877caaSKent Overstreet 
4358e877caaSKent Overstreet static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
4368e877caaSKent Overstreet {
4378e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 1);
4388e877caaSKent Overstreet }
4398e877caaSKent Overstreet 
4408e877caaSKent Overstreet static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
4418e877caaSKent Overstreet {
4428e877caaSKent Overstreet 	u32 n, parent;
4438e877caaSKent Overstreet 
4448e877caaSKent Overstreet 	n = bch2_snapshot_left_child(c, id);
4458e877caaSKent Overstreet 	if (n)
4468e877caaSKent Overstreet 		return n;
4478e877caaSKent Overstreet 
4488e877caaSKent Overstreet 	while ((parent = bch2_snapshot_parent(c, id))) {
4498e877caaSKent Overstreet 		n = bch2_snapshot_right_child(c, parent);
4508e877caaSKent Overstreet 		if (n && n != id)
4518e877caaSKent Overstreet 			return n;
4528e877caaSKent Overstreet 		id = parent;
4538e877caaSKent Overstreet 	}
4548e877caaSKent Overstreet 
4558e877caaSKent Overstreet 	return 0;
4568e877caaSKent Overstreet }
4578e877caaSKent Overstreet 
4588e877caaSKent Overstreet static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
4598e877caaSKent Overstreet {
4608e877caaSKent Overstreet 	u32 id = snapshot_root;
4618e877caaSKent Overstreet 	u32 subvol = 0, s;
4628e877caaSKent Overstreet 
4638e877caaSKent Overstreet 	while (id) {
4648e877caaSKent Overstreet 		s = snapshot_t(c, id)->subvol;
4658e877caaSKent Overstreet 
4668e877caaSKent Overstreet 		if (s && (!subvol || s < subvol))
4678e877caaSKent Overstreet 			subvol = s;
4688e877caaSKent Overstreet 
4698e877caaSKent Overstreet 		id = bch2_snapshot_tree_next(c, id);
4708e877caaSKent Overstreet 	}
4718e877caaSKent Overstreet 
4728e877caaSKent Overstreet 	return subvol;
4738e877caaSKent Overstreet }
4748e877caaSKent Overstreet 
4758e877caaSKent Overstreet static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
4768e877caaSKent Overstreet 					    u32 snapshot_root, u32 *subvol_id)
4778e877caaSKent Overstreet {
4788e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
4798e877caaSKent Overstreet 	struct btree_iter iter;
4808e877caaSKent Overstreet 	struct bkey_s_c k;
4818e877caaSKent Overstreet 	bool found = false;
4828e877caaSKent Overstreet 	int ret;
4838e877caaSKent Overstreet 
4848e877caaSKent Overstreet 	for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
4858e877caaSKent Overstreet 				     0, k, ret) {
4868e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_subvolume)
4878e877caaSKent Overstreet 			continue;
4888e877caaSKent Overstreet 
489cf904c8dSKent Overstreet 		struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
4908e877caaSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
4918e877caaSKent Overstreet 			continue;
4928e877caaSKent Overstreet 		if (!BCH_SUBVOLUME_SNAP(s.v)) {
4938e877caaSKent Overstreet 			*subvol_id = s.k->p.offset;
4948e877caaSKent Overstreet 			found = true;
4958e877caaSKent Overstreet 			break;
4968e877caaSKent Overstreet 		}
4978e877caaSKent Overstreet 	}
4988e877caaSKent Overstreet 
4998e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
5008e877caaSKent Overstreet 
5018e877caaSKent Overstreet 	if (!ret && !found) {
50296dea3d5SKent Overstreet 		struct bkey_i_subvolume *u;
5038e877caaSKent Overstreet 
5048e877caaSKent Overstreet 		*subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
5058e877caaSKent Overstreet 
50696dea3d5SKent Overstreet 		u = bch2_bkey_get_mut_typed(trans, &iter,
5078e877caaSKent Overstreet 					    BTREE_ID_subvolumes, POS(0, *subvol_id),
5088e877caaSKent Overstreet 					    0, subvolume);
50996dea3d5SKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
5108e877caaSKent Overstreet 		if (ret)
5118e877caaSKent Overstreet 			return ret;
5128e877caaSKent Overstreet 
51396dea3d5SKent Overstreet 		SET_BCH_SUBVOLUME_SNAP(&u->v, false);
5148e877caaSKent Overstreet 	}
5158e877caaSKent Overstreet 
5168e877caaSKent Overstreet 	return ret;
5178e877caaSKent Overstreet }
5188e877caaSKent Overstreet 
5198e877caaSKent Overstreet static int check_snapshot_tree(struct btree_trans *trans,
5208e877caaSKent Overstreet 			       struct btree_iter *iter,
5218e877caaSKent Overstreet 			       struct bkey_s_c k)
5228e877caaSKent Overstreet {
5238e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
5248e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree st;
5258e877caaSKent Overstreet 	struct bch_snapshot s;
5268e877caaSKent Overstreet 	struct bch_subvolume subvol;
5278e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
5288e877caaSKent Overstreet 	u32 root_id;
5298e877caaSKent Overstreet 	int ret;
5308e877caaSKent Overstreet 
5318e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot_tree)
5328e877caaSKent Overstreet 		return 0;
5338e877caaSKent Overstreet 
5348e877caaSKent Overstreet 	st = bkey_s_c_to_snapshot_tree(k);
5358e877caaSKent Overstreet 	root_id = le32_to_cpu(st.v->root_snapshot);
5368e877caaSKent Overstreet 
5378e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, root_id, &s);
5388e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5398e877caaSKent Overstreet 		goto err;
5408e877caaSKent Overstreet 
5418e877caaSKent Overstreet 	if (fsck_err_on(ret ||
5428e877caaSKent Overstreet 			root_id != bch2_snapshot_root(c, root_id) ||
5438e877caaSKent Overstreet 			st.k->p.offset != le32_to_cpu(s.tree),
544b65db750SKent Overstreet 			c, snapshot_tree_to_missing_snapshot,
5458e877caaSKent Overstreet 			"snapshot tree points to missing/incorrect snapshot:\n  %s",
5468e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5478e877caaSKent Overstreet 		ret = bch2_btree_delete_at(trans, iter, 0);
5488e877caaSKent Overstreet 		goto err;
5498e877caaSKent Overstreet 	}
5508e877caaSKent Overstreet 
5518e877caaSKent Overstreet 	ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
5528e877caaSKent Overstreet 				 false, 0, &subvol);
5538e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5548e877caaSKent Overstreet 		goto err;
5558e877caaSKent Overstreet 
556b65db750SKent Overstreet 	if (fsck_err_on(ret,
557b65db750SKent Overstreet 			c, snapshot_tree_to_missing_subvol,
5588e877caaSKent Overstreet 			"snapshot tree points to missing subvolume:\n  %s",
5598e877caaSKent Overstreet 			(printbuf_reset(&buf),
5608e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
5611c31b83aSKent Overstreet 	    fsck_err_on(!bch2_snapshot_is_ancestor(c,
5628e877caaSKent Overstreet 						le32_to_cpu(subvol.snapshot),
563b65db750SKent Overstreet 						root_id),
564b65db750SKent Overstreet 			c, snapshot_tree_to_wrong_subvol,
5658e877caaSKent Overstreet 			"snapshot tree points to subvolume that does not point to snapshot in this tree:\n  %s",
5668e877caaSKent Overstreet 			(printbuf_reset(&buf),
5678e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
568b65db750SKent Overstreet 	    fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
569b65db750SKent Overstreet 			c, snapshot_tree_to_snapshot_subvol,
5708e877caaSKent Overstreet 			"snapshot tree points to snapshot subvolume:\n  %s",
5718e877caaSKent Overstreet 			(printbuf_reset(&buf),
5728e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5738e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *u;
5748e877caaSKent Overstreet 		u32 subvol_id;
5758e877caaSKent Overstreet 
5768e877caaSKent Overstreet 		ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
5778e877caaSKent Overstreet 		if (ret)
5788e877caaSKent Overstreet 			goto err;
5798e877caaSKent Overstreet 
5808e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
5818e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
5828e877caaSKent Overstreet 		if (ret)
5838e877caaSKent Overstreet 			goto err;
5848e877caaSKent Overstreet 
5858e877caaSKent Overstreet 		u->v.master_subvol = cpu_to_le32(subvol_id);
5868e877caaSKent Overstreet 		st = snapshot_tree_i_to_s_c(u);
5878e877caaSKent Overstreet 	}
5888e877caaSKent Overstreet err:
5898e877caaSKent Overstreet fsck_err:
5908e877caaSKent Overstreet 	printbuf_exit(&buf);
5918e877caaSKent Overstreet 	return ret;
5928e877caaSKent Overstreet }
5938e877caaSKent Overstreet 
5948e877caaSKent Overstreet /*
5958e877caaSKent Overstreet  * For each snapshot_tree, make sure it points to the root of a snapshot tree
5968e877caaSKent Overstreet  * and that snapshot entry points back to it, or delete it.
5978e877caaSKent Overstreet  *
5988e877caaSKent Overstreet  * And, make sure it points to a subvolume within that snapshot tree, or correct
5998e877caaSKent Overstreet  * it to point to the oldest subvolume within that snapshot tree.
6008e877caaSKent Overstreet  */
6018e877caaSKent Overstreet int bch2_check_snapshot_trees(struct bch_fs *c)
6028e877caaSKent Overstreet {
60380eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
6046bd68ec2SKent Overstreet 		for_each_btree_key_commit(trans, iter,
6058e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS_MIN,
6068e877caaSKent Overstreet 			BTREE_ITER_PREFETCH, k,
6073f0e297dSKent Overstreet 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
6086bd68ec2SKent Overstreet 		check_snapshot_tree(trans, &iter, k)));
609cf904c8dSKent Overstreet 	bch_err_fn(c, ret);
6108e877caaSKent Overstreet 	return ret;
6118e877caaSKent Overstreet }
6128e877caaSKent Overstreet 
6138e877caaSKent Overstreet /*
6148e877caaSKent Overstreet  * Look up snapshot tree for @tree_id and find root,
6158e877caaSKent Overstreet  * make sure @snap_id is a descendent:
6168e877caaSKent Overstreet  */
6178e877caaSKent Overstreet static int snapshot_tree_ptr_good(struct btree_trans *trans,
6188e877caaSKent Overstreet 				  u32 snap_id, u32 tree_id)
6198e877caaSKent Overstreet {
6208e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6218e877caaSKent Overstreet 	int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
6228e877caaSKent Overstreet 
6238e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
6248e877caaSKent Overstreet 		return 0;
6258e877caaSKent Overstreet 	if (ret)
6268e877caaSKent Overstreet 		return ret;
6278e877caaSKent Overstreet 
6288e877caaSKent Overstreet 	return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
6298e877caaSKent Overstreet }
6308e877caaSKent Overstreet 
6318e877caaSKent Overstreet u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
6328e877caaSKent Overstreet {
6338e877caaSKent Overstreet 	const struct snapshot_t *s;
6348e877caaSKent Overstreet 
6358e877caaSKent Overstreet 	if (!id)
6368e877caaSKent Overstreet 		return 0;
6378e877caaSKent Overstreet 
6388e877caaSKent Overstreet 	rcu_read_lock();
6398e877caaSKent Overstreet 	s = snapshot_t(c, id);
6408e877caaSKent Overstreet 	if (s->parent)
6418e877caaSKent Overstreet 		id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
6428e877caaSKent Overstreet 	rcu_read_unlock();
6438e877caaSKent Overstreet 
6448e877caaSKent Overstreet 	return id;
6458e877caaSKent Overstreet }
6468e877caaSKent Overstreet 
647097d4cc8SKent Overstreet static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
6488e877caaSKent Overstreet {
6498e877caaSKent Overstreet 	unsigned i;
6508e877caaSKent Overstreet 
651097d4cc8SKent Overstreet 	for (i = 0; i < 3; i++)
652097d4cc8SKent Overstreet 		if (!s.parent) {
653097d4cc8SKent Overstreet 			if (s.skip[i])
6548e877caaSKent Overstreet 				return false;
655097d4cc8SKent Overstreet 		} else {
656097d4cc8SKent Overstreet 			if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i])))
6578e877caaSKent Overstreet 				return false;
6588e877caaSKent Overstreet 		}
6598e877caaSKent Overstreet 
6608e877caaSKent Overstreet 	return true;
6618e877caaSKent Overstreet }
6628e877caaSKent Overstreet 
6638e877caaSKent Overstreet /*
6648e877caaSKent Overstreet  * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
6658e877caaSKent Overstreet  * its snapshot_tree pointer is correct (allocate new one if necessary), then
6668e877caaSKent Overstreet  * update this node's pointer to root node's pointer:
6678e877caaSKent Overstreet  */
6688e877caaSKent Overstreet static int snapshot_tree_ptr_repair(struct btree_trans *trans,
6698e877caaSKent Overstreet 				    struct btree_iter *iter,
6708e877caaSKent Overstreet 				    struct bkey_s_c k,
6718e877caaSKent Overstreet 				    struct bch_snapshot *s)
6728e877caaSKent Overstreet {
6738e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
6748e877caaSKent Overstreet 	struct btree_iter root_iter;
6758e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6768e877caaSKent Overstreet 	struct bkey_s_c_snapshot root;
6778e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
6788e877caaSKent Overstreet 	u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
6798e877caaSKent Overstreet 	int ret;
6808e877caaSKent Overstreet 
6818e877caaSKent Overstreet 	root = bch2_bkey_get_iter_typed(trans, &root_iter,
6828e877caaSKent Overstreet 			       BTREE_ID_snapshots, POS(0, root_id),
6838e877caaSKent Overstreet 			       BTREE_ITER_WITH_UPDATES, snapshot);
6848e877caaSKent Overstreet 	ret = bkey_err(root);
6858e877caaSKent Overstreet 	if (ret)
6868e877caaSKent Overstreet 		goto err;
6878e877caaSKent Overstreet 
6888e877caaSKent Overstreet 	tree_id = le32_to_cpu(root.v->tree);
6898e877caaSKent Overstreet 
6908e877caaSKent Overstreet 	ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
6918e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
6928e877caaSKent Overstreet 		return ret;
6938e877caaSKent Overstreet 
6948e877caaSKent Overstreet 	if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
6958e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
6968e877caaSKent Overstreet 		ret =   PTR_ERR_OR_ZERO(u) ?:
6978e877caaSKent Overstreet 			bch2_snapshot_tree_create(trans, root_id,
6988e877caaSKent Overstreet 				bch2_snapshot_tree_oldest_subvol(c, root_id),
6998e877caaSKent Overstreet 				&tree_id);
7008e877caaSKent Overstreet 		if (ret)
7018e877caaSKent Overstreet 			goto err;
7028e877caaSKent Overstreet 
7038e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
7048e877caaSKent Overstreet 		if (k.k->p.offset == root_id)
7058e877caaSKent Overstreet 			*s = u->v;
7068e877caaSKent Overstreet 	}
7078e877caaSKent Overstreet 
7088e877caaSKent Overstreet 	if (k.k->p.offset != root_id) {
7098e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
7108e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
7118e877caaSKent Overstreet 		if (ret)
7128e877caaSKent Overstreet 			goto err;
7138e877caaSKent Overstreet 
7148e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
7158e877caaSKent Overstreet 		*s = u->v;
7168e877caaSKent Overstreet 	}
7178e877caaSKent Overstreet err:
7188e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &root_iter);
7198e877caaSKent Overstreet 	return ret;
7208e877caaSKent Overstreet }
7218e877caaSKent Overstreet 
7228e877caaSKent Overstreet static int check_snapshot(struct btree_trans *trans,
7238e877caaSKent Overstreet 			  struct btree_iter *iter,
7248e877caaSKent Overstreet 			  struct bkey_s_c k)
7258e877caaSKent Overstreet {
7268e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
7278e877caaSKent Overstreet 	struct bch_snapshot s;
7288e877caaSKent Overstreet 	struct bch_subvolume subvol;
7298e877caaSKent Overstreet 	struct bch_snapshot v;
7308e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
7318e877caaSKent Overstreet 	u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
7328e877caaSKent Overstreet 	u32 real_depth;
7338e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
7348e877caaSKent Overstreet 	bool should_have_subvol;
7358e877caaSKent Overstreet 	u32 i, id;
7368e877caaSKent Overstreet 	int ret = 0;
7378e877caaSKent Overstreet 
7388e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
7398e877caaSKent Overstreet 		return 0;
7408e877caaSKent Overstreet 
7418e877caaSKent Overstreet 	memset(&s, 0, sizeof(s));
742c4333eb5SKent Overstreet 	memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
7438e877caaSKent Overstreet 
7448e877caaSKent Overstreet 	id = le32_to_cpu(s.parent);
7458e877caaSKent Overstreet 	if (id) {
7468e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7478e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7488e877caaSKent Overstreet 			bch_err(c, "snapshot with nonexistent parent:\n  %s",
7498e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
7508e877caaSKent Overstreet 		if (ret)
7518e877caaSKent Overstreet 			goto err;
7528e877caaSKent Overstreet 
7538e877caaSKent Overstreet 		if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
7548e877caaSKent Overstreet 		    le32_to_cpu(v.children[1]) != k.k->p.offset) {
7558e877caaSKent Overstreet 			bch_err(c, "snapshot parent %u missing pointer to child %llu",
7568e877caaSKent Overstreet 				id, k.k->p.offset);
7578e877caaSKent Overstreet 			ret = -EINVAL;
7588e877caaSKent Overstreet 			goto err;
7598e877caaSKent Overstreet 		}
7608e877caaSKent Overstreet 	}
7618e877caaSKent Overstreet 
7628e877caaSKent Overstreet 	for (i = 0; i < 2 && s.children[i]; i++) {
7638e877caaSKent Overstreet 		id = le32_to_cpu(s.children[i]);
7648e877caaSKent Overstreet 
7658e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7668e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7678e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has nonexistent child %u",
7688e877caaSKent Overstreet 				k.k->p.offset, id);
7698e877caaSKent Overstreet 		if (ret)
7708e877caaSKent Overstreet 			goto err;
7718e877caaSKent Overstreet 
7728e877caaSKent Overstreet 		if (le32_to_cpu(v.parent) != k.k->p.offset) {
7738e877caaSKent Overstreet 			bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
7748e877caaSKent Overstreet 				id, le32_to_cpu(v.parent), k.k->p.offset);
7758e877caaSKent Overstreet 			ret = -EINVAL;
7768e877caaSKent Overstreet 			goto err;
7778e877caaSKent Overstreet 		}
7788e877caaSKent Overstreet 	}
7798e877caaSKent Overstreet 
7808e877caaSKent Overstreet 	should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
7818e877caaSKent Overstreet 		!BCH_SNAPSHOT_DELETED(&s);
7828e877caaSKent Overstreet 
7838e877caaSKent Overstreet 	if (should_have_subvol) {
7848e877caaSKent Overstreet 		id = le32_to_cpu(s.subvol);
7858e877caaSKent Overstreet 		ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
7868e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7878e877caaSKent Overstreet 			bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
7888e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
7898e877caaSKent Overstreet 		if (ret)
7908e877caaSKent Overstreet 			goto err;
7918e877caaSKent Overstreet 
7928e877caaSKent Overstreet 		if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
7938e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
7948e877caaSKent Overstreet 				k.k->p.offset);
7958e877caaSKent Overstreet 			ret = -EINVAL;
7968e877caaSKent Overstreet 			goto err;
7978e877caaSKent Overstreet 		}
7988e877caaSKent Overstreet 	} else {
799b65db750SKent Overstreet 		if (fsck_err_on(s.subvol,
800b65db750SKent Overstreet 				c, snapshot_should_not_have_subvol,
801b65db750SKent Overstreet 				"snapshot should not point to subvol:\n  %s",
8028e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8038e877caaSKent Overstreet 			u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8048e877caaSKent Overstreet 			ret = PTR_ERR_OR_ZERO(u);
8058e877caaSKent Overstreet 			if (ret)
8068e877caaSKent Overstreet 				goto err;
8078e877caaSKent Overstreet 
8088e877caaSKent Overstreet 			u->v.subvol = 0;
8098e877caaSKent Overstreet 			s = u->v;
8108e877caaSKent Overstreet 		}
8118e877caaSKent Overstreet 	}
8128e877caaSKent Overstreet 
8138e877caaSKent Overstreet 	ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
8148e877caaSKent Overstreet 	if (ret < 0)
8158e877caaSKent Overstreet 		goto err;
8168e877caaSKent Overstreet 
817b65db750SKent Overstreet 	if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree,
818b65db750SKent Overstreet 			"snapshot points to missing/incorrect tree:\n  %s",
8198e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8208e877caaSKent Overstreet 		ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
8218e877caaSKent Overstreet 		if (ret)
8228e877caaSKent Overstreet 			goto err;
8238e877caaSKent Overstreet 	}
8248e877caaSKent Overstreet 	ret = 0;
8258e877caaSKent Overstreet 
8268e877caaSKent Overstreet 	real_depth = bch2_snapshot_depth(c, parent_id);
8278e877caaSKent Overstreet 
828074cbcdaSKent Overstreet 	if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
829074cbcdaSKent Overstreet 			c, snapshot_bad_depth,
830b65db750SKent Overstreet 			"snapshot with incorrect depth field, should be %u:\n  %s",
831074cbcdaSKent Overstreet 			real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8328e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8338e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8348e877caaSKent Overstreet 		if (ret)
8358e877caaSKent Overstreet 			goto err;
8368e877caaSKent Overstreet 
8378e877caaSKent Overstreet 		u->v.depth = cpu_to_le32(real_depth);
8388e877caaSKent Overstreet 		s = u->v;
8398e877caaSKent Overstreet 	}
8408e877caaSKent Overstreet 
841097d4cc8SKent Overstreet 	ret = snapshot_skiplist_good(trans, k.k->p.offset, s);
8428e877caaSKent Overstreet 	if (ret < 0)
8438e877caaSKent Overstreet 		goto err;
8448e877caaSKent Overstreet 
845074cbcdaSKent Overstreet 	if (fsck_err_on(!ret, c, snapshot_bad_skiplist,
846b65db750SKent Overstreet 			"snapshot with bad skiplist field:\n  %s",
847074cbcdaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8488e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8498e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8508e877caaSKent Overstreet 		if (ret)
8518e877caaSKent Overstreet 			goto err;
8528e877caaSKent Overstreet 
8538e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
8548e877caaSKent Overstreet 			u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
8558e877caaSKent Overstreet 
8568e877caaSKent Overstreet 		bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
8578e877caaSKent Overstreet 		s = u->v;
8588e877caaSKent Overstreet 	}
8598e877caaSKent Overstreet 	ret = 0;
8608e877caaSKent Overstreet err:
8618e877caaSKent Overstreet fsck_err:
8628e877caaSKent Overstreet 	printbuf_exit(&buf);
8638e877caaSKent Overstreet 	return ret;
8648e877caaSKent Overstreet }
8658e877caaSKent Overstreet 
8668e877caaSKent Overstreet int bch2_check_snapshots(struct bch_fs *c)
8678e877caaSKent Overstreet {
8688e877caaSKent Overstreet 	/*
8698e877caaSKent Overstreet 	 * We iterate backwards as checking/fixing the depth field requires that
8708e877caaSKent Overstreet 	 * the parent's depth already be correct:
8718e877caaSKent Overstreet 	 */
87280eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
8736bd68ec2SKent Overstreet 		for_each_btree_key_reverse_commit(trans, iter,
8748e877caaSKent Overstreet 				BTREE_ID_snapshots, POS_MAX,
8758e877caaSKent Overstreet 				BTREE_ITER_PREFETCH, k,
8763f0e297dSKent Overstreet 				NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
8776bd68ec2SKent Overstreet 			check_snapshot(trans, &iter, k)));
8788e877caaSKent Overstreet 	bch_err_fn(c, ret);
8798e877caaSKent Overstreet 	return ret;
8808e877caaSKent Overstreet }
8818e877caaSKent Overstreet 
8828e877caaSKent Overstreet /*
8838e877caaSKent Overstreet  * Mark a snapshot as deleted, for future cleanup:
8848e877caaSKent Overstreet  */
8858e877caaSKent Overstreet int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
8868e877caaSKent Overstreet {
8878e877caaSKent Overstreet 	struct btree_iter iter;
8888e877caaSKent Overstreet 	struct bkey_i_snapshot *s;
8898e877caaSKent Overstreet 	int ret = 0;
8908e877caaSKent Overstreet 
8918e877caaSKent Overstreet 	s = bch2_bkey_get_mut_typed(trans, &iter,
8928e877caaSKent Overstreet 				    BTREE_ID_snapshots, POS(0, id),
8938e877caaSKent Overstreet 				    0, snapshot);
8948e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
8958e877caaSKent Overstreet 	if (unlikely(ret)) {
8968e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
8978e877caaSKent Overstreet 					trans->c, "missing snapshot %u", id);
8988e877caaSKent Overstreet 		return ret;
8998e877caaSKent Overstreet 	}
9008e877caaSKent Overstreet 
9018e877caaSKent Overstreet 	/* already deleted? */
9028e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(&s->v))
9038e877caaSKent Overstreet 		goto err;
9048e877caaSKent Overstreet 
9058e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_DELETED(&s->v, true);
9068e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
9078e877caaSKent Overstreet 	s->v.subvol = 0;
9088e877caaSKent Overstreet err:
9098e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
9108e877caaSKent Overstreet 	return ret;
9118e877caaSKent Overstreet }
9128e877caaSKent Overstreet 
913f55d6e07SKent Overstreet static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
914f55d6e07SKent Overstreet {
915f55d6e07SKent Overstreet 	if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
916f55d6e07SKent Overstreet 		swap(s->children[0], s->children[1]);
917f55d6e07SKent Overstreet }
918f55d6e07SKent Overstreet 
91996dea3d5SKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
9208e877caaSKent Overstreet {
9218e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
9228e877caaSKent Overstreet 	struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
923f55d6e07SKent Overstreet 	struct btree_iter c_iter = (struct btree_iter) { NULL };
9248e877caaSKent Overstreet 	struct btree_iter tree_iter = (struct btree_iter) { NULL };
9258e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
926f55d6e07SKent Overstreet 	u32 parent_id, child_id;
9278e877caaSKent Overstreet 	unsigned i;
9288e877caaSKent Overstreet 	int ret = 0;
9298e877caaSKent Overstreet 
9308e877caaSKent Overstreet 	s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
9318e877caaSKent Overstreet 				     BTREE_ITER_INTENT, snapshot);
9328e877caaSKent Overstreet 	ret = bkey_err(s);
9338e877caaSKent Overstreet 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
9348e877caaSKent Overstreet 				"missing snapshot %u", id);
9358e877caaSKent Overstreet 
9368e877caaSKent Overstreet 	if (ret)
9378e877caaSKent Overstreet 		goto err;
9388e877caaSKent Overstreet 
939f55d6e07SKent Overstreet 	BUG_ON(s.v->children[1]);
940f55d6e07SKent Overstreet 
9418e877caaSKent Overstreet 	parent_id = le32_to_cpu(s.v->parent);
942f55d6e07SKent Overstreet 	child_id = le32_to_cpu(s.v->children[0]);
9438e877caaSKent Overstreet 
9448e877caaSKent Overstreet 	if (parent_id) {
9458e877caaSKent Overstreet 		struct bkey_i_snapshot *parent;
9468e877caaSKent Overstreet 
9478e877caaSKent Overstreet 		parent = bch2_bkey_get_mut_typed(trans, &p_iter,
9488e877caaSKent Overstreet 				     BTREE_ID_snapshots, POS(0, parent_id),
9498e877caaSKent Overstreet 				     0, snapshot);
9508e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(parent);
9518e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
9528e877caaSKent Overstreet 					"missing snapshot %u", parent_id);
953f55d6e07SKent Overstreet 		if (unlikely(ret))
9548e877caaSKent Overstreet 			goto err;
9558e877caaSKent Overstreet 
956f55d6e07SKent Overstreet 		/* find entry in parent->children for node being deleted */
9578e877caaSKent Overstreet 		for (i = 0; i < 2; i++)
9588e877caaSKent Overstreet 			if (le32_to_cpu(parent->v.children[i]) == id)
9598e877caaSKent Overstreet 				break;
9608e877caaSKent Overstreet 
961f55d6e07SKent Overstreet 		if (bch2_fs_inconsistent_on(i == 2, c,
962f55d6e07SKent Overstreet 					"snapshot %u missing child pointer to %u",
963f55d6e07SKent Overstreet 					parent_id, id))
964f55d6e07SKent Overstreet 			goto err;
9658e877caaSKent Overstreet 
9660a11adfbSKent Overstreet 		parent->v.children[i] = cpu_to_le32(child_id);
967f55d6e07SKent Overstreet 
968f55d6e07SKent Overstreet 		normalize_snapshot_child_pointers(&parent->v);
969f55d6e07SKent Overstreet 	}
970f55d6e07SKent Overstreet 
971f55d6e07SKent Overstreet 	if (child_id) {
972f55d6e07SKent Overstreet 		struct bkey_i_snapshot *child;
973f55d6e07SKent Overstreet 
974f55d6e07SKent Overstreet 		child = bch2_bkey_get_mut_typed(trans, &c_iter,
975f55d6e07SKent Overstreet 				     BTREE_ID_snapshots, POS(0, child_id),
976f55d6e07SKent Overstreet 				     0, snapshot);
977f55d6e07SKent Overstreet 		ret = PTR_ERR_OR_ZERO(child);
978f55d6e07SKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
979f55d6e07SKent Overstreet 					"missing snapshot %u", child_id);
980f55d6e07SKent Overstreet 		if (unlikely(ret))
981f55d6e07SKent Overstreet 			goto err;
982f55d6e07SKent Overstreet 
983f55d6e07SKent Overstreet 		child->v.parent = cpu_to_le32(parent_id);
984f55d6e07SKent Overstreet 
985f55d6e07SKent Overstreet 		if (!child->v.parent) {
986f55d6e07SKent Overstreet 			child->v.skip[0] = 0;
987f55d6e07SKent Overstreet 			child->v.skip[1] = 0;
988f55d6e07SKent Overstreet 			child->v.skip[2] = 0;
989f55d6e07SKent Overstreet 		}
990f55d6e07SKent Overstreet 	}
991f55d6e07SKent Overstreet 
992f55d6e07SKent Overstreet 	if (!parent_id) {
9938e877caaSKent Overstreet 		/*
9948e877caaSKent Overstreet 		 * We're deleting the root of a snapshot tree: update the
9958e877caaSKent Overstreet 		 * snapshot_tree entry to point to the new root, or delete it if
9968e877caaSKent Overstreet 		 * this is the last snapshot ID in this tree:
9978e877caaSKent Overstreet 		 */
9988e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *s_t;
9998e877caaSKent Overstreet 
10008e877caaSKent Overstreet 		BUG_ON(s.v->children[1]);
10018e877caaSKent Overstreet 
10028e877caaSKent Overstreet 		s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
10038e877caaSKent Overstreet 				BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
10048e877caaSKent Overstreet 				0, snapshot_tree);
10058e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(s_t);
10068e877caaSKent Overstreet 		if (ret)
10078e877caaSKent Overstreet 			goto err;
10088e877caaSKent Overstreet 
10098e877caaSKent Overstreet 		if (s.v->children[0]) {
10108e877caaSKent Overstreet 			s_t->v.root_snapshot = s.v->children[0];
10118e877caaSKent Overstreet 		} else {
10128e877caaSKent Overstreet 			s_t->k.type = KEY_TYPE_deleted;
10138e877caaSKent Overstreet 			set_bkey_val_u64s(&s_t->k, 0);
10148e877caaSKent Overstreet 		}
10158e877caaSKent Overstreet 	}
10168e877caaSKent Overstreet 
10178e877caaSKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
10188e877caaSKent Overstreet err:
10198e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &tree_iter);
10208e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &p_iter);
1021f55d6e07SKent Overstreet 	bch2_trans_iter_exit(trans, &c_iter);
10228e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
10238e877caaSKent Overstreet 	return ret;
10248e877caaSKent Overstreet }
10258e877caaSKent Overstreet 
10268e877caaSKent Overstreet static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
10278e877caaSKent Overstreet 			  u32 *new_snapids,
10288e877caaSKent Overstreet 			  u32 *snapshot_subvols,
10298e877caaSKent Overstreet 			  unsigned nr_snapids)
10308e877caaSKent Overstreet {
10318e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
10328e877caaSKent Overstreet 	struct btree_iter iter;
10338e877caaSKent Overstreet 	struct bkey_i_snapshot *n;
10348e877caaSKent Overstreet 	struct bkey_s_c k;
10358e877caaSKent Overstreet 	unsigned i, j;
10368e877caaSKent Overstreet 	u32 depth = bch2_snapshot_depth(c, parent);
10378e877caaSKent Overstreet 	int ret;
10388e877caaSKent Overstreet 
10398e877caaSKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
10408e877caaSKent Overstreet 			     POS_MIN, BTREE_ITER_INTENT);
10418e877caaSKent Overstreet 	k = bch2_btree_iter_peek(&iter);
10428e877caaSKent Overstreet 	ret = bkey_err(k);
10438e877caaSKent Overstreet 	if (ret)
10448e877caaSKent Overstreet 		goto err;
10458e877caaSKent Overstreet 
10468e877caaSKent Overstreet 	for (i = 0; i < nr_snapids; i++) {
10478e877caaSKent Overstreet 		k = bch2_btree_iter_prev_slot(&iter);
10488e877caaSKent Overstreet 		ret = bkey_err(k);
10498e877caaSKent Overstreet 		if (ret)
10508e877caaSKent Overstreet 			goto err;
10518e877caaSKent Overstreet 
10528e877caaSKent Overstreet 		if (!k.k || !k.k->p.offset) {
10538e877caaSKent Overstreet 			ret = -BCH_ERR_ENOSPC_snapshot_create;
10548e877caaSKent Overstreet 			goto err;
10558e877caaSKent Overstreet 		}
10568e877caaSKent Overstreet 
10578e877caaSKent Overstreet 		n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
10588e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(n);
10598e877caaSKent Overstreet 		if (ret)
10608e877caaSKent Overstreet 			goto err;
10618e877caaSKent Overstreet 
10628e877caaSKent Overstreet 		n->v.flags	= 0;
10638e877caaSKent Overstreet 		n->v.parent	= cpu_to_le32(parent);
10648e877caaSKent Overstreet 		n->v.subvol	= cpu_to_le32(snapshot_subvols[i]);
10658e877caaSKent Overstreet 		n->v.tree	= cpu_to_le32(tree);
10668e877caaSKent Overstreet 		n->v.depth	= cpu_to_le32(depth);
1067d32088f2SKent Overstreet 		n->v.btime.lo	= cpu_to_le64(bch2_current_time(c));
1068d32088f2SKent Overstreet 		n->v.btime.hi	= 0;
10698e877caaSKent Overstreet 
10708e877caaSKent Overstreet 		for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
10718e877caaSKent Overstreet 			n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
10728e877caaSKent Overstreet 
10738e877caaSKent Overstreet 		bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
10748e877caaSKent Overstreet 		SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
10758e877caaSKent Overstreet 
1076ad00bce0SKent Overstreet 		ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
10778e877caaSKent Overstreet 					 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
10788e877caaSKent Overstreet 		if (ret)
10798e877caaSKent Overstreet 			goto err;
10808e877caaSKent Overstreet 
10818e877caaSKent Overstreet 		new_snapids[i]	= iter.pos.offset;
1082eebe8a84SKent Overstreet 
1083eebe8a84SKent Overstreet 		mutex_lock(&c->snapshot_table_lock);
1084eebe8a84SKent Overstreet 		snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
1085eebe8a84SKent Overstreet 		mutex_unlock(&c->snapshot_table_lock);
10868e877caaSKent Overstreet 	}
10878e877caaSKent Overstreet err:
10888e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
10898e877caaSKent Overstreet 	return ret;
10908e877caaSKent Overstreet }
10918e877caaSKent Overstreet 
10928e877caaSKent Overstreet /*
10938e877caaSKent Overstreet  * Create new snapshot IDs as children of an existing snapshot ID:
10948e877caaSKent Overstreet  */
10958e877caaSKent Overstreet static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
10968e877caaSKent Overstreet 			      u32 *new_snapids,
10978e877caaSKent Overstreet 			      u32 *snapshot_subvols,
10988e877caaSKent Overstreet 			      unsigned nr_snapids)
10998e877caaSKent Overstreet {
11008e877caaSKent Overstreet 	struct btree_iter iter;
11018e877caaSKent Overstreet 	struct bkey_i_snapshot *n_parent;
11028e877caaSKent Overstreet 	int ret = 0;
11038e877caaSKent Overstreet 
11048e877caaSKent Overstreet 	n_parent = bch2_bkey_get_mut_typed(trans, &iter,
11058e877caaSKent Overstreet 			BTREE_ID_snapshots, POS(0, parent),
11068e877caaSKent Overstreet 			0, snapshot);
11078e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(n_parent);
11088e877caaSKent Overstreet 	if (unlikely(ret)) {
11098e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
11108e877caaSKent Overstreet 			bch_err(trans->c, "snapshot %u not found", parent);
11118e877caaSKent Overstreet 		return ret;
11128e877caaSKent Overstreet 	}
11138e877caaSKent Overstreet 
11148e877caaSKent Overstreet 	if (n_parent->v.children[0] || n_parent->v.children[1]) {
11158e877caaSKent Overstreet 		bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
11168e877caaSKent Overstreet 		ret = -EINVAL;
11178e877caaSKent Overstreet 		goto err;
11188e877caaSKent Overstreet 	}
11198e877caaSKent Overstreet 
11208e877caaSKent Overstreet 	ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
11218e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
11228e877caaSKent Overstreet 	if (ret)
11238e877caaSKent Overstreet 		goto err;
11248e877caaSKent Overstreet 
11258e877caaSKent Overstreet 	n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
11268e877caaSKent Overstreet 	n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
11278e877caaSKent Overstreet 	n_parent->v.subvol = 0;
11288e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
11298e877caaSKent Overstreet err:
11308e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
11318e877caaSKent Overstreet 	return ret;
11328e877caaSKent Overstreet }
11338e877caaSKent Overstreet 
11348e877caaSKent Overstreet /*
11358e877caaSKent Overstreet  * Create a snapshot node that is the root of a new tree:
11368e877caaSKent Overstreet  */
11378e877caaSKent Overstreet static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
11388e877caaSKent Overstreet 			      u32 *new_snapids,
11398e877caaSKent Overstreet 			      u32 *snapshot_subvols,
11408e877caaSKent Overstreet 			      unsigned nr_snapids)
11418e877caaSKent Overstreet {
11428e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree;
11438e877caaSKent Overstreet 	int ret;
11448e877caaSKent Overstreet 
11458e877caaSKent Overstreet 	n_tree = __bch2_snapshot_tree_create(trans);
11468e877caaSKent Overstreet 	ret =   PTR_ERR_OR_ZERO(n_tree) ?:
11478e877caaSKent Overstreet 		create_snapids(trans, 0, n_tree->k.p.offset,
11488e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
11498e877caaSKent Overstreet 	if (ret)
11508e877caaSKent Overstreet 		return ret;
11518e877caaSKent Overstreet 
11528e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(snapshot_subvols[0]);
11538e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(new_snapids[0]);
11548e877caaSKent Overstreet 	return 0;
11558e877caaSKent Overstreet }
11568e877caaSKent Overstreet 
11578e877caaSKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
11588e877caaSKent Overstreet 			      u32 *new_snapids,
11598e877caaSKent Overstreet 			      u32 *snapshot_subvols,
11608e877caaSKent Overstreet 			      unsigned nr_snapids)
11618e877caaSKent Overstreet {
11628e877caaSKent Overstreet 	BUG_ON((parent == 0) != (nr_snapids == 1));
11638e877caaSKent Overstreet 	BUG_ON((parent != 0) != (nr_snapids == 2));
11648e877caaSKent Overstreet 
11658e877caaSKent Overstreet 	return parent
11668e877caaSKent Overstreet 		? bch2_snapshot_node_create_children(trans, parent,
11678e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids)
11688e877caaSKent Overstreet 		: bch2_snapshot_node_create_tree(trans,
11698e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids);
11708e877caaSKent Overstreet 
11718e877caaSKent Overstreet }
11728e877caaSKent Overstreet 
11738e877caaSKent Overstreet /*
11748e877caaSKent Overstreet  * If we have an unlinked inode in an internal snapshot node, and the inode
11758e877caaSKent Overstreet  * really has been deleted in all child snapshots, how does this get cleaned up?
11768e877caaSKent Overstreet  *
11778e877caaSKent Overstreet  * first there is the problem of how keys that have been overwritten in all
11788e877caaSKent Overstreet  * child snapshots get deleted (unimplemented?), but inodes may perhaps be
11798e877caaSKent Overstreet  * special?
11808e877caaSKent Overstreet  *
11818e877caaSKent Overstreet  * also: unlinked inode in internal snapshot appears to not be getting deleted
11828e877caaSKent Overstreet  * correctly if inode doesn't exist in leaf snapshots
1183f55d6e07SKent Overstreet  *
1184f55d6e07SKent Overstreet  * solution:
1185f55d6e07SKent Overstreet  *
1186f55d6e07SKent Overstreet  * for a key in an interior snapshot node that needs work to be done that
1187f55d6e07SKent Overstreet  * requires it to be mutated: iterate over all descendent leaf nodes and copy
1188f55d6e07SKent Overstreet  * that key to snapshot leaf nodes, where we can mutate it
11898e877caaSKent Overstreet  */
11908e877caaSKent Overstreet 
11918e877caaSKent Overstreet static int snapshot_delete_key(struct btree_trans *trans,
11928e877caaSKent Overstreet 			       struct btree_iter *iter,
11938e877caaSKent Overstreet 			       struct bkey_s_c k,
11948e877caaSKent Overstreet 			       snapshot_id_list *deleted,
11958e877caaSKent Overstreet 			       snapshot_id_list *equiv_seen,
11968e877caaSKent Overstreet 			       struct bpos *last_pos)
11978e877caaSKent Overstreet {
11988e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
11998e877caaSKent Overstreet 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
12008e877caaSKent Overstreet 
12018e877caaSKent Overstreet 	if (!bkey_eq(k.k->p, *last_pos))
12028e877caaSKent Overstreet 		equiv_seen->nr = 0;
12038e877caaSKent Overstreet 	*last_pos = k.k->p;
12048e877caaSKent Overstreet 
12058e877caaSKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
12068e877caaSKent Overstreet 	    snapshot_list_has_id(equiv_seen, equiv)) {
12078e877caaSKent Overstreet 		return bch2_btree_delete_at(trans, iter,
12088e877caaSKent Overstreet 					    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
12098e877caaSKent Overstreet 	} else {
12108e877caaSKent Overstreet 		return snapshot_list_add(c, equiv_seen, equiv);
12118e877caaSKent Overstreet 	}
12128e877caaSKent Overstreet }
12138e877caaSKent Overstreet 
1214f55d6e07SKent Overstreet static int move_key_to_correct_snapshot(struct btree_trans *trans,
1215f55d6e07SKent Overstreet 			       struct btree_iter *iter,
1216f55d6e07SKent Overstreet 			       struct bkey_s_c k)
1217f55d6e07SKent Overstreet {
1218f55d6e07SKent Overstreet 	struct bch_fs *c = trans->c;
1219f55d6e07SKent Overstreet 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
1220f55d6e07SKent Overstreet 
1221f55d6e07SKent Overstreet 	/*
1222f55d6e07SKent Overstreet 	 * When we have a linear chain of snapshot nodes, we consider
1223f55d6e07SKent Overstreet 	 * those to form an equivalence class: we're going to collapse
1224f55d6e07SKent Overstreet 	 * them all down to a single node, and keep the leaf-most node -
1225f55d6e07SKent Overstreet 	 * which has the same id as the equivalence class id.
1226f55d6e07SKent Overstreet 	 *
1227f55d6e07SKent Overstreet 	 * If there are multiple keys in different snapshots at the same
1228f55d6e07SKent Overstreet 	 * position, we're only going to keep the one in the newest
1229f55d6e07SKent Overstreet 	 * snapshot - the rest have been overwritten and are redundant,
1230f55d6e07SKent Overstreet 	 * and for the key we're going to keep we need to move it to the
1231f55d6e07SKent Overstreet 	 * equivalance class ID if it's not there already.
1232f55d6e07SKent Overstreet 	 */
1233f55d6e07SKent Overstreet 	if (equiv != k.k->p.snapshot) {
1234f55d6e07SKent Overstreet 		struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
1235f55d6e07SKent Overstreet 		struct btree_iter new_iter;
1236f55d6e07SKent Overstreet 		int ret;
1237f55d6e07SKent Overstreet 
1238f55d6e07SKent Overstreet 		ret = PTR_ERR_OR_ZERO(new);
1239f55d6e07SKent Overstreet 		if (ret)
1240f55d6e07SKent Overstreet 			return ret;
1241f55d6e07SKent Overstreet 
1242f55d6e07SKent Overstreet 		new->k.p.snapshot = equiv;
1243f55d6e07SKent Overstreet 
1244f55d6e07SKent Overstreet 		bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
1245f55d6e07SKent Overstreet 				     BTREE_ITER_ALL_SNAPSHOTS|
1246f55d6e07SKent Overstreet 				     BTREE_ITER_CACHED|
1247f55d6e07SKent Overstreet 				     BTREE_ITER_INTENT);
1248f55d6e07SKent Overstreet 
1249f55d6e07SKent Overstreet 		ret =   bch2_btree_iter_traverse(&new_iter) ?:
1250f55d6e07SKent Overstreet 			bch2_trans_update(trans, &new_iter, new,
1251f55d6e07SKent Overstreet 					BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
1252f55d6e07SKent Overstreet 			bch2_btree_delete_at(trans, iter,
1253f55d6e07SKent Overstreet 					BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1254f55d6e07SKent Overstreet 		bch2_trans_iter_exit(trans, &new_iter);
1255f55d6e07SKent Overstreet 		if (ret)
1256f55d6e07SKent Overstreet 			return ret;
1257f55d6e07SKent Overstreet 	}
1258f55d6e07SKent Overstreet 
1259f55d6e07SKent Overstreet 	return 0;
1260f55d6e07SKent Overstreet }
1261f55d6e07SKent Overstreet 
1262b0b5bbf9SKent Overstreet static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
12638e877caaSKent Overstreet {
12648e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
12658e877caaSKent Overstreet 	u32 children[2];
12668e877caaSKent Overstreet 	int ret;
12678e877caaSKent Overstreet 
12688e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
12698e877caaSKent Overstreet 		return 0;
12708e877caaSKent Overstreet 
12718e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
12728e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
12738e877caaSKent Overstreet 	    BCH_SNAPSHOT_SUBVOL(snap.v))
12748e877caaSKent Overstreet 		return 0;
12758e877caaSKent Overstreet 
12768e877caaSKent Overstreet 	children[0] = le32_to_cpu(snap.v->children[0]);
12778e877caaSKent Overstreet 	children[1] = le32_to_cpu(snap.v->children[1]);
12788e877caaSKent Overstreet 
12798e877caaSKent Overstreet 	ret   = bch2_snapshot_live(trans, children[0]) ?:
12808e877caaSKent Overstreet 		bch2_snapshot_live(trans, children[1]);
12818e877caaSKent Overstreet 	if (ret < 0)
12828e877caaSKent Overstreet 		return ret;
1283b0b5bbf9SKent Overstreet 	return !ret;
1284b0b5bbf9SKent Overstreet }
12858e877caaSKent Overstreet 
1286b0b5bbf9SKent Overstreet /*
1287b0b5bbf9SKent Overstreet  * For a given snapshot, if it doesn't have a subvolume that points to it, and
1288b0b5bbf9SKent Overstreet  * it doesn't have child snapshot nodes - it's now redundant and we can mark it
1289b0b5bbf9SKent Overstreet  * as deleted.
1290b0b5bbf9SKent Overstreet  */
1291b0b5bbf9SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
1292b0b5bbf9SKent Overstreet {
1293b0b5bbf9SKent Overstreet 	int ret = bch2_snapshot_needs_delete(trans, k);
1294b0b5bbf9SKent Overstreet 
1295b0b5bbf9SKent Overstreet 	return ret <= 0
1296b0b5bbf9SKent Overstreet 		? ret
1297b0b5bbf9SKent Overstreet 		: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
12988e877caaSKent Overstreet }
12998e877caaSKent Overstreet 
1300f55d6e07SKent Overstreet static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
1301f55d6e07SKent Overstreet 						snapshot_id_list *skip)
1302f55d6e07SKent Overstreet {
1303f55d6e07SKent Overstreet 	rcu_read_lock();
13041e2d3999SKent Overstreet 	while (snapshot_list_has_id(skip, id))
13051e2d3999SKent Overstreet 		id = __bch2_snapshot_parent(c, id);
13061e2d3999SKent Overstreet 
1307f55d6e07SKent Overstreet 	while (n--) {
1308f55d6e07SKent Overstreet 		do {
1309f55d6e07SKent Overstreet 			id = __bch2_snapshot_parent(c, id);
1310f55d6e07SKent Overstreet 		} while (snapshot_list_has_id(skip, id));
1311f55d6e07SKent Overstreet 	}
1312f55d6e07SKent Overstreet 	rcu_read_unlock();
1313f55d6e07SKent Overstreet 
1314f55d6e07SKent Overstreet 	return id;
1315f55d6e07SKent Overstreet }
1316f55d6e07SKent Overstreet 
1317f55d6e07SKent Overstreet static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
1318f55d6e07SKent Overstreet 					      struct btree_iter *iter, struct bkey_s_c k,
1319f55d6e07SKent Overstreet 					      snapshot_id_list *deleted)
1320f55d6e07SKent Overstreet {
1321f55d6e07SKent Overstreet 	struct bch_fs *c = trans->c;
1322f55d6e07SKent Overstreet 	u32 nr_deleted_ancestors = 0;
1323f55d6e07SKent Overstreet 	struct bkey_i_snapshot *s;
1324f55d6e07SKent Overstreet 	int ret;
1325f55d6e07SKent Overstreet 
1326f55d6e07SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1327f55d6e07SKent Overstreet 		return 0;
1328f55d6e07SKent Overstreet 
1329f55d6e07SKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.offset))
1330f55d6e07SKent Overstreet 		return 0;
1331f55d6e07SKent Overstreet 
1332f55d6e07SKent Overstreet 	s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
1333f55d6e07SKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
1334f55d6e07SKent Overstreet 	if (ret)
1335f55d6e07SKent Overstreet 		return ret;
1336f55d6e07SKent Overstreet 
1337f55d6e07SKent Overstreet 	darray_for_each(*deleted, i)
1338f55d6e07SKent Overstreet 		nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
1339f55d6e07SKent Overstreet 
1340f55d6e07SKent Overstreet 	if (!nr_deleted_ancestors)
1341f55d6e07SKent Overstreet 		return 0;
1342f55d6e07SKent Overstreet 
1343f55d6e07SKent Overstreet 	le32_add_cpu(&s->v.depth, -nr_deleted_ancestors);
1344f55d6e07SKent Overstreet 
1345f55d6e07SKent Overstreet 	if (!s->v.depth) {
1346f55d6e07SKent Overstreet 		s->v.skip[0] = 0;
1347f55d6e07SKent Overstreet 		s->v.skip[1] = 0;
1348f55d6e07SKent Overstreet 		s->v.skip[2] = 0;
1349f55d6e07SKent Overstreet 	} else {
1350f55d6e07SKent Overstreet 		u32 depth = le32_to_cpu(s->v.depth);
1351f55d6e07SKent Overstreet 		u32 parent = bch2_snapshot_parent(c, s->k.p.offset);
1352f55d6e07SKent Overstreet 
1353f55d6e07SKent Overstreet 		for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
1354f55d6e07SKent Overstreet 			u32 id = le32_to_cpu(s->v.skip[j]);
1355f55d6e07SKent Overstreet 
1356f55d6e07SKent Overstreet 			if (snapshot_list_has_id(deleted, id)) {
13575394fe94SKent Overstreet 				id = bch2_snapshot_nth_parent_skip(c,
1358f55d6e07SKent Overstreet 							parent,
13595394fe94SKent Overstreet 							depth > 1
13605394fe94SKent Overstreet 							? get_random_u32_below(depth - 1)
13615394fe94SKent Overstreet 							: 0,
13625394fe94SKent Overstreet 							deleted);
1363f55d6e07SKent Overstreet 				s->v.skip[j] = cpu_to_le32(id);
1364f55d6e07SKent Overstreet 			}
1365f55d6e07SKent Overstreet 		}
1366f55d6e07SKent Overstreet 
1367f55d6e07SKent Overstreet 		bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32);
1368f55d6e07SKent Overstreet 	}
1369f55d6e07SKent Overstreet 
1370f55d6e07SKent Overstreet 	return bch2_trans_update(trans, iter, &s->k_i, 0);
1371f55d6e07SKent Overstreet }
1372f55d6e07SKent Overstreet 
13738e877caaSKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c)
13748e877caaSKent Overstreet {
13756bd68ec2SKent Overstreet 	struct btree_trans *trans;
13768e877caaSKent Overstreet 	snapshot_id_list deleted = { 0 };
1377f55d6e07SKent Overstreet 	snapshot_id_list deleted_interior = { 0 };
1378defd9e39SKent Overstreet 	u32 id;
13798e877caaSKent Overstreet 	int ret = 0;
13808e877caaSKent Overstreet 
13813c471b65SKent Overstreet 	if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
1382b0b5bbf9SKent Overstreet 		return 0;
1383b0b5bbf9SKent Overstreet 
13843c471b65SKent Overstreet 	if (!test_bit(BCH_FS_started, &c->flags)) {
13858e877caaSKent Overstreet 		ret = bch2_fs_read_write_early(c);
13866bf3766bSColin Ian King 		bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
1387cf904c8dSKent Overstreet 		if (ret)
13888e877caaSKent Overstreet 			return ret;
13898e877caaSKent Overstreet 	}
13908e877caaSKent Overstreet 
13916bd68ec2SKent Overstreet 	trans = bch2_trans_get(c);
13928e877caaSKent Overstreet 
13938e877caaSKent Overstreet 	/*
13948e877caaSKent Overstreet 	 * For every snapshot node: If we have no live children and it's not
13958e877caaSKent Overstreet 	 * pointed to by a subvolume, delete it:
13968e877caaSKent Overstreet 	 */
13976bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
13988e877caaSKent Overstreet 			POS_MIN, 0, k,
13998e877caaSKent Overstreet 			NULL, NULL, 0,
1400b0b5bbf9SKent Overstreet 		bch2_delete_redundant_snapshot(trans, k));
1401e46c181aSKent Overstreet 	bch_err_msg(c, ret, "deleting redundant snapshots");
1402cf904c8dSKent Overstreet 	if (ret)
14038e877caaSKent Overstreet 		goto err;
14048e877caaSKent Overstreet 
14055028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
14068e877caaSKent Overstreet 				 POS_MIN, 0, k,
14076bd68ec2SKent Overstreet 		bch2_snapshot_set_equiv(trans, k));
1408e46c181aSKent Overstreet 	bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
1409cf904c8dSKent Overstreet 	if (ret)
14108e877caaSKent Overstreet 		goto err;
14118e877caaSKent Overstreet 
14125028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
141327b2df98SKent Overstreet 				 POS_MIN, 0, k, ({
14148e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_snapshot)
14158e877caaSKent Overstreet 			continue;
14168e877caaSKent Overstreet 
141780eab7a7SKent Overstreet 		BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
141827b2df98SKent Overstreet 			? snapshot_list_add(c, &deleted, k.k->p.offset)
141927b2df98SKent Overstreet 			: 0;
142027b2df98SKent Overstreet 	}));
14218e877caaSKent Overstreet 	bch_err_msg(c, ret, "walking snapshots");
1422cf904c8dSKent Overstreet 	if (ret)
14238e877caaSKent Overstreet 		goto err;
14248e877caaSKent Overstreet 
14258e877caaSKent Overstreet 	for (id = 0; id < BTREE_ID_NR; id++) {
14268e877caaSKent Overstreet 		struct bpos last_pos = POS_MIN;
14278e877caaSKent Overstreet 		snapshot_id_list equiv_seen = { 0 };
1428f55d6e07SKent Overstreet 		struct disk_reservation res = { 0 };
14298e877caaSKent Overstreet 
14308e877caaSKent Overstreet 		if (!btree_type_has_snapshots(id))
14318e877caaSKent Overstreet 			continue;
14328e877caaSKent Overstreet 
14332e7acdfbSKent Overstreet 		/*
14342e7acdfbSKent Overstreet 		 * deleted inodes btree is maintained by a trigger on the inodes
14352e7acdfbSKent Overstreet 		 * btree - no work for us to do here, and it's not safe to scan
14362e7acdfbSKent Overstreet 		 * it because we'll see out of date keys due to the btree write
14372e7acdfbSKent Overstreet 		 * buffer:
14382e7acdfbSKent Overstreet 		 */
14392e7acdfbSKent Overstreet 		if (id == BTREE_ID_deleted_inodes)
14402e7acdfbSKent Overstreet 			continue;
14412e7acdfbSKent Overstreet 
14426bd68ec2SKent Overstreet 		ret = for_each_btree_key_commit(trans, iter,
14438e877caaSKent Overstreet 				id, POS_MIN,
14448e877caaSKent Overstreet 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1445cb52d23eSKent Overstreet 				&res, NULL, BCH_TRANS_COMMIT_no_enospc,
14466bd68ec2SKent Overstreet 			snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
14476bd68ec2SKent Overstreet 		      for_each_btree_key_commit(trans, iter,
1448f55d6e07SKent Overstreet 				id, POS_MIN,
1449f55d6e07SKent Overstreet 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1450cb52d23eSKent Overstreet 				&res, NULL, BCH_TRANS_COMMIT_no_enospc,
14516bd68ec2SKent Overstreet 			move_key_to_correct_snapshot(trans, &iter, k));
14528e877caaSKent Overstreet 
1453f55d6e07SKent Overstreet 		bch2_disk_reservation_put(c, &res);
14548e877caaSKent Overstreet 		darray_exit(&equiv_seen);
14558e877caaSKent Overstreet 
14568e877caaSKent Overstreet 		bch_err_msg(c, ret, "deleting keys from dying snapshots");
1457cf904c8dSKent Overstreet 		if (ret)
14588e877caaSKent Overstreet 			goto err;
14598e877caaSKent Overstreet 	}
14608e877caaSKent Overstreet 
14610dd092bfSKent Overstreet 	bch2_trans_unlock(trans);
146237fad949SKent Overstreet 	down_write(&c->snapshot_create_lock);
146337fad949SKent Overstreet 
14645028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
146527b2df98SKent Overstreet 				 POS_MIN, 0, k, ({
1466f55d6e07SKent Overstreet 		u32 snapshot = k.k->p.offset;
1467f55d6e07SKent Overstreet 		u32 equiv = bch2_snapshot_equiv(c, snapshot);
14688e877caaSKent Overstreet 
146927b2df98SKent Overstreet 		equiv != snapshot
147027b2df98SKent Overstreet 			? snapshot_list_add(c, &deleted_interior, snapshot)
147127b2df98SKent Overstreet 			: 0;
147227b2df98SKent Overstreet 	}));
1473f55d6e07SKent Overstreet 
147427b2df98SKent Overstreet 	bch_err_msg(c, ret, "walking snapshots");
1475cf904c8dSKent Overstreet 	if (ret)
147637fad949SKent Overstreet 		goto err_create_lock;
147737fad949SKent Overstreet 
1478f55d6e07SKent Overstreet 	/*
1479f55d6e07SKent Overstreet 	 * Fixing children of deleted snapshots can't be done completely
1480f55d6e07SKent Overstreet 	 * atomically, if we crash between here and when we delete the interior
1481f55d6e07SKent Overstreet 	 * nodes some depth fields will be off:
1482f55d6e07SKent Overstreet 	 */
14836bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
1484f55d6e07SKent Overstreet 				  BTREE_ITER_INTENT, k,
1485cb52d23eSKent Overstreet 				  NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
14866bd68ec2SKent Overstreet 		bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
1487f55d6e07SKent Overstreet 	if (ret)
148837fad949SKent Overstreet 		goto err_create_lock;
1489f55d6e07SKent Overstreet 
1490f55d6e07SKent Overstreet 	darray_for_each(deleted, i) {
14916bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
14926bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
1493f55d6e07SKent Overstreet 		bch_err_msg(c, ret, "deleting snapshot %u", *i);
1494cf904c8dSKent Overstreet 		if (ret)
149537fad949SKent Overstreet 			goto err_create_lock;
1496f55d6e07SKent Overstreet 	}
1497f55d6e07SKent Overstreet 
1498f55d6e07SKent Overstreet 	darray_for_each(deleted_interior, i) {
14996bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
15006bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
1501f55d6e07SKent Overstreet 		bch_err_msg(c, ret, "deleting snapshot %u", *i);
1502cf904c8dSKent Overstreet 		if (ret)
150337fad949SKent Overstreet 			goto err_create_lock;
15048e877caaSKent Overstreet 	}
150537fad949SKent Overstreet err_create_lock:
150637fad949SKent Overstreet 	up_write(&c->snapshot_create_lock);
15078e877caaSKent Overstreet err:
1508f55d6e07SKent Overstreet 	darray_exit(&deleted_interior);
15098e877caaSKent Overstreet 	darray_exit(&deleted);
15106bd68ec2SKent Overstreet 	bch2_trans_put(trans);
15118e877caaSKent Overstreet 	bch_err_fn(c, ret);
15128e877caaSKent Overstreet 	return ret;
15138e877caaSKent Overstreet }
15148e877caaSKent Overstreet 
15158e877caaSKent Overstreet void bch2_delete_dead_snapshots_work(struct work_struct *work)
15168e877caaSKent Overstreet {
15178e877caaSKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
15188e877caaSKent Overstreet 
15198e877caaSKent Overstreet 	bch2_delete_dead_snapshots(c);
15208e877caaSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
15218e877caaSKent Overstreet }
15228e877caaSKent Overstreet 
15238e877caaSKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c)
15248e877caaSKent Overstreet {
15258e877caaSKent Overstreet 	if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
15268e877caaSKent Overstreet 	    !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
15278e877caaSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
15288e877caaSKent Overstreet }
15298e877caaSKent Overstreet 
1530fa5bed37SKent Overstreet int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
1531fa5bed37SKent Overstreet 				       enum btree_id id,
1532fa5bed37SKent Overstreet 				       struct bpos pos)
1533fa5bed37SKent Overstreet {
1534fa5bed37SKent Overstreet 	struct bch_fs *c = trans->c;
1535fa5bed37SKent Overstreet 	struct btree_iter iter;
1536fa5bed37SKent Overstreet 	struct bkey_s_c k;
1537fa5bed37SKent Overstreet 	int ret;
1538fa5bed37SKent Overstreet 
1539fa5bed37SKent Overstreet 	bch2_trans_iter_init(trans, &iter, id, pos,
1540fa5bed37SKent Overstreet 			     BTREE_ITER_NOT_EXTENTS|
1541fa5bed37SKent Overstreet 			     BTREE_ITER_ALL_SNAPSHOTS);
1542fa5bed37SKent Overstreet 	while (1) {
1543fa5bed37SKent Overstreet 		k = bch2_btree_iter_prev(&iter);
1544fa5bed37SKent Overstreet 		ret = bkey_err(k);
1545fa5bed37SKent Overstreet 		if (ret)
1546fa5bed37SKent Overstreet 			break;
1547fa5bed37SKent Overstreet 
1548fa5bed37SKent Overstreet 		if (!k.k)
1549fa5bed37SKent Overstreet 			break;
1550fa5bed37SKent Overstreet 
1551fa5bed37SKent Overstreet 		if (!bkey_eq(pos, k.k->p))
1552fa5bed37SKent Overstreet 			break;
1553fa5bed37SKent Overstreet 
1554fa5bed37SKent Overstreet 		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
1555fa5bed37SKent Overstreet 			ret = 1;
1556fa5bed37SKent Overstreet 			break;
1557fa5bed37SKent Overstreet 		}
1558fa5bed37SKent Overstreet 	}
1559fa5bed37SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1560fa5bed37SKent Overstreet 
1561fa5bed37SKent Overstreet 	return ret;
1562fa5bed37SKent Overstreet }
1563fa5bed37SKent Overstreet 
1564a111901fSKent Overstreet static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
1565a111901fSKent Overstreet {
1566a111901fSKent Overstreet 	const struct snapshot_t *s = snapshot_t(c, id);
1567a111901fSKent Overstreet 
1568a111901fSKent Overstreet 	return s->children[1] ?: s->children[0];
1569a111901fSKent Overstreet }
1570a111901fSKent Overstreet 
1571a111901fSKent Overstreet static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
1572a111901fSKent Overstreet {
1573a111901fSKent Overstreet 	u32 child;
1574a111901fSKent Overstreet 
1575a111901fSKent Overstreet 	while ((child = bch2_snapshot_smallest_child(c, id)))
1576a111901fSKent Overstreet 		id = child;
1577a111901fSKent Overstreet 	return id;
1578a111901fSKent Overstreet }
1579a111901fSKent Overstreet 
1580a111901fSKent Overstreet static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
1581a111901fSKent Overstreet 					       enum btree_id btree,
1582a111901fSKent Overstreet 					       struct bkey_s_c interior_k,
1583a111901fSKent Overstreet 					       u32 leaf_id, struct bpos *new_min_pos)
1584a111901fSKent Overstreet {
1585a111901fSKent Overstreet 	struct btree_iter iter;
1586a111901fSKent Overstreet 	struct bpos pos = interior_k.k->p;
1587a111901fSKent Overstreet 	struct bkey_s_c k;
1588a111901fSKent Overstreet 	struct bkey_i *new;
1589a111901fSKent Overstreet 	int ret;
1590a111901fSKent Overstreet 
1591a111901fSKent Overstreet 	pos.snapshot = leaf_id;
1592a111901fSKent Overstreet 
1593a111901fSKent Overstreet 	bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
1594a111901fSKent Overstreet 	k = bch2_btree_iter_peek_slot(&iter);
1595a111901fSKent Overstreet 	ret = bkey_err(k);
1596a111901fSKent Overstreet 	if (ret)
1597a111901fSKent Overstreet 		goto out;
1598a111901fSKent Overstreet 
1599a111901fSKent Overstreet 	/* key already overwritten in this snapshot? */
1600a111901fSKent Overstreet 	if (k.k->p.snapshot != interior_k.k->p.snapshot)
1601a111901fSKent Overstreet 		goto out;
1602a111901fSKent Overstreet 
1603a111901fSKent Overstreet 	if (bpos_eq(*new_min_pos, POS_MIN)) {
1604a111901fSKent Overstreet 		*new_min_pos = k.k->p;
1605a111901fSKent Overstreet 		new_min_pos->snapshot = leaf_id;
1606a111901fSKent Overstreet 	}
1607a111901fSKent Overstreet 
1608a111901fSKent Overstreet 	new = bch2_bkey_make_mut_noupdate(trans, interior_k);
1609a111901fSKent Overstreet 	ret = PTR_ERR_OR_ZERO(new);
1610a111901fSKent Overstreet 	if (ret)
1611a111901fSKent Overstreet 		goto out;
1612a111901fSKent Overstreet 
1613a111901fSKent Overstreet 	new->k.p.snapshot = leaf_id;
1614a111901fSKent Overstreet 	ret = bch2_trans_update(trans, &iter, new, 0);
1615a111901fSKent Overstreet out:
1616a111901fSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1617a111901fSKent Overstreet 	return ret;
1618a111901fSKent Overstreet }
1619a111901fSKent Overstreet 
1620a111901fSKent Overstreet int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
1621a111901fSKent Overstreet 					  enum btree_id btree,
1622a111901fSKent Overstreet 					  struct bkey_s_c k,
1623a111901fSKent Overstreet 					  struct bpos *new_min_pos)
1624a111901fSKent Overstreet {
1625a111901fSKent Overstreet 	struct bch_fs *c = trans->c;
1626a111901fSKent Overstreet 	struct bkey_buf sk;
1627c872afa2SKent Overstreet 	u32 restart_count = trans->restart_count;
1628d04fdf5cSKent Overstreet 	int ret = 0;
1629a111901fSKent Overstreet 
1630a111901fSKent Overstreet 	bch2_bkey_buf_init(&sk);
1631a111901fSKent Overstreet 	bch2_bkey_buf_reassemble(&sk, c, k);
1632a111901fSKent Overstreet 	k = bkey_i_to_s_c(sk.k);
1633a111901fSKent Overstreet 
1634a111901fSKent Overstreet 	*new_min_pos = POS_MIN;
1635a111901fSKent Overstreet 
1636a111901fSKent Overstreet 	for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
1637a111901fSKent Overstreet 	     id < k.k->p.snapshot;
1638a111901fSKent Overstreet 	     id++) {
1639a111901fSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
1640a111901fSKent Overstreet 		    !bch2_snapshot_is_leaf(c, id))
1641a111901fSKent Overstreet 			continue;
1642d281701bSKent Overstreet again:
1643d281701bSKent Overstreet 		ret =   btree_trans_too_many_iters(trans) ?:
1644d281701bSKent Overstreet 			bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
1645d281701bSKent Overstreet 			bch2_trans_commit(trans, NULL, NULL, 0);
1646d281701bSKent Overstreet 		if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
1647d281701bSKent Overstreet 			bch2_trans_begin(trans);
1648d281701bSKent Overstreet 			goto again;
1649d281701bSKent Overstreet 		}
1650a111901fSKent Overstreet 
1651a111901fSKent Overstreet 		if (ret)
1652a111901fSKent Overstreet 			break;
1653a111901fSKent Overstreet 	}
1654a111901fSKent Overstreet 
1655a111901fSKent Overstreet 	bch2_bkey_buf_exit(&sk, c);
1656c872afa2SKent Overstreet 
1657c872afa2SKent Overstreet 	return ret ?: trans_was_restarted(trans, restart_count);
1658a111901fSKent Overstreet }
1659a111901fSKent Overstreet 
1660b0b5bbf9SKent Overstreet static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
1661b0b5bbf9SKent Overstreet {
1662b0b5bbf9SKent Overstreet 	struct bch_fs *c = trans->c;
1663b0b5bbf9SKent Overstreet 	struct bkey_s_c_snapshot snap;
1664b0b5bbf9SKent Overstreet 	int ret = 0;
1665b0b5bbf9SKent Overstreet 
1666b0b5bbf9SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1667b0b5bbf9SKent Overstreet 		return 0;
1668b0b5bbf9SKent Overstreet 
1669b0b5bbf9SKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
1670b0b5bbf9SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
1671b0b5bbf9SKent Overstreet 	    bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
1672b0b5bbf9SKent Overstreet 	    (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
16733c471b65SKent Overstreet 		set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
1674b0b5bbf9SKent Overstreet 		return 0;
1675b0b5bbf9SKent Overstreet 	}
1676b0b5bbf9SKent Overstreet 
1677b0b5bbf9SKent Overstreet 	return ret;
1678b0b5bbf9SKent Overstreet }
1679b0b5bbf9SKent Overstreet 
16808e877caaSKent Overstreet int bch2_snapshots_read(struct bch_fs *c)
16818e877caaSKent Overstreet {
168280eab7a7SKent Overstreet 	int ret = bch2_trans_run(c,
16835028b907SKent Overstreet 		for_each_btree_key(trans, iter, BTREE_ID_snapshots,
16848e877caaSKent Overstreet 				   POS_MIN, 0, k,
1685ad00bce0SKent Overstreet 			__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
1686b0b5bbf9SKent Overstreet 			bch2_snapshot_set_equiv(trans, k) ?:
1687b0b5bbf9SKent Overstreet 			bch2_check_snapshot_needs_deletion(trans, k)) ?:
16885028b907SKent Overstreet 		for_each_btree_key(trans, iter, BTREE_ID_snapshots,
168966487c54SKent Overstreet 				   POS_MIN, 0, k,
169066487c54SKent Overstreet 			   (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
16918e877caaSKent Overstreet 	bch_err_fn(c, ret);
16928e877caaSKent Overstreet 	return ret;
16938e877caaSKent Overstreet }
16948e877caaSKent Overstreet 
16958e877caaSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c)
16968e877caaSKent Overstreet {
1697369acf97SSu Yue 	kvfree(rcu_dereference_protected(c->snapshots, true));
16988e877caaSKent Overstreet }
1699