xref: /linux/fs/bcachefs/snapshot.c (revision b0b5bbf99fc269e10d01c2a9873de5a042bdc7f5)
18e877caaSKent Overstreet // SPDX-License-Identifier: GPL-2.0
28e877caaSKent Overstreet 
38e877caaSKent Overstreet #include "bcachefs.h"
4a111901fSKent Overstreet #include "bkey_buf.h"
58e877caaSKent Overstreet #include "btree_key_cache.h"
68e877caaSKent Overstreet #include "btree_update.h"
7f55d6e07SKent Overstreet #include "buckets.h"
88e877caaSKent Overstreet #include "errcode.h"
98e877caaSKent Overstreet #include "error.h"
108e877caaSKent Overstreet #include "fs.h"
118e877caaSKent Overstreet #include "snapshot.h"
128e877caaSKent Overstreet 
138e877caaSKent Overstreet #include <linux/random.h>
148e877caaSKent Overstreet 
158e877caaSKent Overstreet /*
168e877caaSKent Overstreet  * Snapshot trees:
178e877caaSKent Overstreet  *
188e877caaSKent Overstreet  * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
198e877caaSKent Overstreet  * exist to provide a stable identifier for the whole lifetime of a snapshot
208e877caaSKent Overstreet  * tree.
218e877caaSKent Overstreet  */
228e877caaSKent Overstreet 
238e877caaSKent Overstreet void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
248e877caaSKent Overstreet 				struct bkey_s_c k)
258e877caaSKent Overstreet {
268e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
278e877caaSKent Overstreet 
288e877caaSKent Overstreet 	prt_printf(out, "subvol %u root snapshot %u",
298e877caaSKent Overstreet 		   le32_to_cpu(t.v->master_subvol),
308e877caaSKent Overstreet 		   le32_to_cpu(t.v->root_snapshot));
318e877caaSKent Overstreet }
328e877caaSKent Overstreet 
338e877caaSKent Overstreet int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
348e877caaSKent Overstreet 			       enum bkey_invalid_flags flags,
358e877caaSKent Overstreet 			       struct printbuf *err)
368e877caaSKent Overstreet {
378e877caaSKent Overstreet 	if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
388e877caaSKent Overstreet 	    bkey_lt(k.k->p, POS(0, 1))) {
398e877caaSKent Overstreet 		prt_printf(err, "bad pos");
408e877caaSKent Overstreet 		return -BCH_ERR_invalid_bkey;
418e877caaSKent Overstreet 	}
428e877caaSKent Overstreet 
438e877caaSKent Overstreet 	return 0;
448e877caaSKent Overstreet }
458e877caaSKent Overstreet 
468e877caaSKent Overstreet int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
478e877caaSKent Overstreet 			      struct bch_snapshot_tree *s)
488e877caaSKent Overstreet {
498e877caaSKent Overstreet 	int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
508e877caaSKent Overstreet 					  BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
518e877caaSKent Overstreet 
528e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
538e877caaSKent Overstreet 		ret = -BCH_ERR_ENOENT_snapshot_tree;
548e877caaSKent Overstreet 	return ret;
558e877caaSKent Overstreet }
568e877caaSKent Overstreet 
578e877caaSKent Overstreet struct bkey_i_snapshot_tree *
588e877caaSKent Overstreet __bch2_snapshot_tree_create(struct btree_trans *trans)
598e877caaSKent Overstreet {
608e877caaSKent Overstreet 	struct btree_iter iter;
618e877caaSKent Overstreet 	int ret = bch2_bkey_get_empty_slot(trans, &iter,
628e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS(0, U32_MAX));
638e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *s_t;
648e877caaSKent Overstreet 
658e877caaSKent Overstreet 	if (ret == -BCH_ERR_ENOSPC_btree_slot)
668e877caaSKent Overstreet 		ret = -BCH_ERR_ENOSPC_snapshot_tree;
678e877caaSKent Overstreet 	if (ret)
688e877caaSKent Overstreet 		return ERR_PTR(ret);
698e877caaSKent Overstreet 
708e877caaSKent Overstreet 	s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
718e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s_t);
728e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
738e877caaSKent Overstreet 	return ret ? ERR_PTR(ret) : s_t;
748e877caaSKent Overstreet }
758e877caaSKent Overstreet 
768e877caaSKent Overstreet static int bch2_snapshot_tree_create(struct btree_trans *trans,
778e877caaSKent Overstreet 				u32 root_id, u32 subvol_id, u32 *tree_id)
788e877caaSKent Overstreet {
798e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree =
808e877caaSKent Overstreet 		__bch2_snapshot_tree_create(trans);
818e877caaSKent Overstreet 
828e877caaSKent Overstreet 	if (IS_ERR(n_tree))
838e877caaSKent Overstreet 		return PTR_ERR(n_tree);
848e877caaSKent Overstreet 
858e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(subvol_id);
868e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(root_id);
878e877caaSKent Overstreet 	*tree_id = n_tree->k.p.offset;
888e877caaSKent Overstreet 	return 0;
898e877caaSKent Overstreet }
908e877caaSKent Overstreet 
918e877caaSKent Overstreet /* Snapshot nodes: */
928e877caaSKent Overstreet 
9366487c54SKent Overstreet static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
9466487c54SKent Overstreet {
9566487c54SKent Overstreet 	struct snapshot_table *t;
9666487c54SKent Overstreet 
9766487c54SKent Overstreet 	rcu_read_lock();
9866487c54SKent Overstreet 	t = rcu_dereference(c->snapshots);
9966487c54SKent Overstreet 
10066487c54SKent Overstreet 	while (id && id < ancestor)
10166487c54SKent Overstreet 		id = __snapshot_t(t, id)->parent;
10266487c54SKent Overstreet 	rcu_read_unlock();
10366487c54SKent Overstreet 
10466487c54SKent Overstreet 	return id == ancestor;
10566487c54SKent Overstreet }
10666487c54SKent Overstreet 
1078e877caaSKent Overstreet static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
1088e877caaSKent Overstreet {
1098e877caaSKent Overstreet 	const struct snapshot_t *s = __snapshot_t(t, id);
1108e877caaSKent Overstreet 
1118e877caaSKent Overstreet 	if (s->skip[2] <= ancestor)
1128e877caaSKent Overstreet 		return s->skip[2];
1138e877caaSKent Overstreet 	if (s->skip[1] <= ancestor)
1148e877caaSKent Overstreet 		return s->skip[1];
1158e877caaSKent Overstreet 	if (s->skip[0] <= ancestor)
1168e877caaSKent Overstreet 		return s->skip[0];
1178e877caaSKent Overstreet 	return s->parent;
1188e877caaSKent Overstreet }
1198e877caaSKent Overstreet 
1208e877caaSKent Overstreet bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
1218e877caaSKent Overstreet {
1228e877caaSKent Overstreet 	struct snapshot_table *t;
1238e877caaSKent Overstreet 	bool ret;
1248e877caaSKent Overstreet 
1258e877caaSKent Overstreet 	EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
1268e877caaSKent Overstreet 
1278e877caaSKent Overstreet 	rcu_read_lock();
1288e877caaSKent Overstreet 	t = rcu_dereference(c->snapshots);
1298e877caaSKent Overstreet 
1308e877caaSKent Overstreet 	while (id && id < ancestor - IS_ANCESTOR_BITMAP)
1318e877caaSKent Overstreet 		id = get_ancestor_below(t, id, ancestor);
1328e877caaSKent Overstreet 
13366487c54SKent Overstreet 	if (id && id < ancestor) {
13466487c54SKent Overstreet 		ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
13566487c54SKent Overstreet 
13666487c54SKent Overstreet 		EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
13766487c54SKent Overstreet 	} else {
13866487c54SKent Overstreet 		ret = id == ancestor;
13966487c54SKent Overstreet 	}
14066487c54SKent Overstreet 
1418e877caaSKent Overstreet 	rcu_read_unlock();
1428e877caaSKent Overstreet 
1438e877caaSKent Overstreet 	return ret;
1448e877caaSKent Overstreet }
1458e877caaSKent Overstreet 
1468e877caaSKent Overstreet static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
1478e877caaSKent Overstreet {
1488e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
1498e877caaSKent Overstreet 	size_t new_size;
1508e877caaSKent Overstreet 	struct snapshot_table *new, *old;
1518e877caaSKent Overstreet 
1528e877caaSKent Overstreet 	new_size = max(16UL, roundup_pow_of_two(idx + 1));
1538e877caaSKent Overstreet 
1548e877caaSKent Overstreet 	new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
1558e877caaSKent Overstreet 	if (!new)
1568e877caaSKent Overstreet 		return NULL;
1578e877caaSKent Overstreet 
1588e877caaSKent Overstreet 	old = rcu_dereference_protected(c->snapshots, true);
1598e877caaSKent Overstreet 	if (old)
1608e877caaSKent Overstreet 		memcpy(new->s,
1618e877caaSKent Overstreet 		       rcu_dereference_protected(c->snapshots, true)->s,
1628e877caaSKent Overstreet 		       sizeof(new->s[0]) * c->snapshot_table_size);
1638e877caaSKent Overstreet 
1648e877caaSKent Overstreet 	rcu_assign_pointer(c->snapshots, new);
1658e877caaSKent Overstreet 	c->snapshot_table_size = new_size;
166d04fdf5cSKent Overstreet 	kvfree_rcu_mightsleep(old);
1678e877caaSKent Overstreet 
1688e877caaSKent Overstreet 	return &rcu_dereference_protected(c->snapshots, true)->s[idx];
1698e877caaSKent Overstreet }
1708e877caaSKent Overstreet 
1718e877caaSKent Overstreet static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
1728e877caaSKent Overstreet {
1738e877caaSKent Overstreet 	size_t idx = U32_MAX - id;
1748e877caaSKent Overstreet 
1758e877caaSKent Overstreet 	lockdep_assert_held(&c->snapshot_table_lock);
1768e877caaSKent Overstreet 
1778e877caaSKent Overstreet 	if (likely(idx < c->snapshot_table_size))
1788e877caaSKent Overstreet 		return &rcu_dereference_protected(c->snapshots, true)->s[idx];
1798e877caaSKent Overstreet 
1808e877caaSKent Overstreet 	return __snapshot_t_mut(c, id);
1818e877caaSKent Overstreet }
1828e877caaSKent Overstreet 
1838e877caaSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
1848e877caaSKent Overstreet 			   struct bkey_s_c k)
1858e877caaSKent Overstreet {
1868e877caaSKent Overstreet 	struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
1878e877caaSKent Overstreet 
1888e877caaSKent Overstreet 	prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
1898e877caaSKent Overstreet 	       BCH_SNAPSHOT_SUBVOL(s.v),
1908e877caaSKent Overstreet 	       BCH_SNAPSHOT_DELETED(s.v),
1918e877caaSKent Overstreet 	       le32_to_cpu(s.v->parent),
1928e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[0]),
1938e877caaSKent Overstreet 	       le32_to_cpu(s.v->children[1]),
1948e877caaSKent Overstreet 	       le32_to_cpu(s.v->subvol),
1958e877caaSKent Overstreet 	       le32_to_cpu(s.v->tree));
1968e877caaSKent Overstreet 
1978e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
1988e877caaSKent Overstreet 		prt_printf(out, " depth %u skiplist %u %u %u",
1998e877caaSKent Overstreet 			   le32_to_cpu(s.v->depth),
2008e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[0]),
2018e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[1]),
2028e877caaSKent Overstreet 			   le32_to_cpu(s.v->skip[2]));
2038e877caaSKent Overstreet }
2048e877caaSKent Overstreet 
2058e877caaSKent Overstreet int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
2068e877caaSKent Overstreet 			  enum bkey_invalid_flags flags,
2078e877caaSKent Overstreet 			  struct printbuf *err)
2088e877caaSKent Overstreet {
2098e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
2108e877caaSKent Overstreet 	u32 i, id;
2118e877caaSKent Overstreet 
2128e877caaSKent Overstreet 	if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
2138e877caaSKent Overstreet 	    bkey_lt(k.k->p, POS(0, 1))) {
2148e877caaSKent Overstreet 		prt_printf(err, "bad pos");
2158e877caaSKent Overstreet 		return -BCH_ERR_invalid_bkey;
2168e877caaSKent Overstreet 	}
2178e877caaSKent Overstreet 
2188e877caaSKent Overstreet 	s = bkey_s_c_to_snapshot(k);
2198e877caaSKent Overstreet 
2208e877caaSKent Overstreet 	id = le32_to_cpu(s.v->parent);
2218e877caaSKent Overstreet 	if (id && id <= k.k->p.offset) {
2228e877caaSKent Overstreet 		prt_printf(err, "bad parent node (%u <= %llu)",
2238e877caaSKent Overstreet 		       id, k.k->p.offset);
2248e877caaSKent Overstreet 		return -BCH_ERR_invalid_bkey;
2258e877caaSKent Overstreet 	}
2268e877caaSKent Overstreet 
2278e877caaSKent Overstreet 	if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
2288e877caaSKent Overstreet 		prt_printf(err, "children not normalized");
2298e877caaSKent Overstreet 		return -BCH_ERR_invalid_bkey;
2308e877caaSKent Overstreet 	}
2318e877caaSKent Overstreet 
2328e877caaSKent Overstreet 	if (s.v->children[0] &&
2338e877caaSKent Overstreet 	    s.v->children[0] == s.v->children[1]) {
2348e877caaSKent Overstreet 		prt_printf(err, "duplicate child nodes");
2358e877caaSKent Overstreet 		return -BCH_ERR_invalid_bkey;
2368e877caaSKent Overstreet 	}
2378e877caaSKent Overstreet 
2388e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
2398e877caaSKent Overstreet 		id = le32_to_cpu(s.v->children[i]);
2408e877caaSKent Overstreet 
2418e877caaSKent Overstreet 		if (id >= k.k->p.offset) {
2428e877caaSKent Overstreet 			prt_printf(err, "bad child node (%u >= %llu)",
2438e877caaSKent Overstreet 			       id, k.k->p.offset);
2448e877caaSKent Overstreet 			return -BCH_ERR_invalid_bkey;
2458e877caaSKent Overstreet 		}
2468e877caaSKent Overstreet 	}
2478e877caaSKent Overstreet 
2488e877caaSKent Overstreet 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
2498e877caaSKent Overstreet 		if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
2508e877caaSKent Overstreet 		    le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
2518e877caaSKent Overstreet 			prt_printf(err, "skiplist not normalized");
2528e877caaSKent Overstreet 			return -BCH_ERR_invalid_bkey;
2538e877caaSKent Overstreet 		}
2548e877caaSKent Overstreet 
2558e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
2568e877caaSKent Overstreet 			id = le32_to_cpu(s.v->skip[i]);
2578e877caaSKent Overstreet 
258f55d6e07SKent Overstreet 			if ((id && !s.v->parent) ||
259f55d6e07SKent Overstreet 			    (id && id <= k.k->p.offset)) {
260f55d6e07SKent Overstreet 				prt_printf(err, "bad skiplist node %u", id);
2618e877caaSKent Overstreet 				return -BCH_ERR_invalid_bkey;
2628e877caaSKent Overstreet 			}
2638e877caaSKent Overstreet 		}
2648e877caaSKent Overstreet 	}
2658e877caaSKent Overstreet 
2668e877caaSKent Overstreet 	return 0;
2678e877caaSKent Overstreet }
2688e877caaSKent Overstreet 
26966487c54SKent Overstreet static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
27066487c54SKent Overstreet {
27166487c54SKent Overstreet 	struct snapshot_t *t = snapshot_t_mut(c, id);
27266487c54SKent Overstreet 	u32 parent = id;
27366487c54SKent Overstreet 
27466487c54SKent Overstreet 	while ((parent = bch2_snapshot_parent_early(c, parent)) &&
27566487c54SKent Overstreet 	       parent - id - 1 < IS_ANCESTOR_BITMAP)
27666487c54SKent Overstreet 		__set_bit(parent - id - 1, t->is_ancestor);
27766487c54SKent Overstreet }
27866487c54SKent Overstreet 
27966487c54SKent Overstreet static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
28066487c54SKent Overstreet {
28166487c54SKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
28266487c54SKent Overstreet 	__set_is_ancestor_bitmap(c, id);
28366487c54SKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
28466487c54SKent Overstreet }
28566487c54SKent Overstreet 
2868e877caaSKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans,
2878e877caaSKent Overstreet 		       enum btree_id btree, unsigned level,
2888e877caaSKent Overstreet 		       struct bkey_s_c old, struct bkey_s_c new,
2898e877caaSKent Overstreet 		       unsigned flags)
2908e877caaSKent Overstreet {
2918e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
2928e877caaSKent Overstreet 	struct snapshot_t *t;
2938e877caaSKent Overstreet 	u32 id = new.k->p.offset;
2948e877caaSKent Overstreet 	int ret = 0;
2958e877caaSKent Overstreet 
2968e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
2978e877caaSKent Overstreet 
2988e877caaSKent Overstreet 	t = snapshot_t_mut(c, id);
2998e877caaSKent Overstreet 	if (!t) {
3008e877caaSKent Overstreet 		ret = -BCH_ERR_ENOMEM_mark_snapshot;
3018e877caaSKent Overstreet 		goto err;
3028e877caaSKent Overstreet 	}
3038e877caaSKent Overstreet 
3048e877caaSKent Overstreet 	if (new.k->type == KEY_TYPE_snapshot) {
3058e877caaSKent Overstreet 		struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
3068e877caaSKent Overstreet 
3078e877caaSKent Overstreet 		t->parent	= le32_to_cpu(s.v->parent);
3088e877caaSKent Overstreet 		t->children[0]	= le32_to_cpu(s.v->children[0]);
3098e877caaSKent Overstreet 		t->children[1]	= le32_to_cpu(s.v->children[1]);
3108e877caaSKent Overstreet 		t->subvol	= BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
3118e877caaSKent Overstreet 		t->tree		= le32_to_cpu(s.v->tree);
3128e877caaSKent Overstreet 
3138e877caaSKent Overstreet 		if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
3148e877caaSKent Overstreet 			t->depth	= le32_to_cpu(s.v->depth);
3158e877caaSKent Overstreet 			t->skip[0]	= le32_to_cpu(s.v->skip[0]);
3168e877caaSKent Overstreet 			t->skip[1]	= le32_to_cpu(s.v->skip[1]);
3178e877caaSKent Overstreet 			t->skip[2]	= le32_to_cpu(s.v->skip[2]);
3188e877caaSKent Overstreet 		} else {
3198e877caaSKent Overstreet 			t->depth	= 0;
3208e877caaSKent Overstreet 			t->skip[0]	= 0;
3218e877caaSKent Overstreet 			t->skip[1]	= 0;
3228e877caaSKent Overstreet 			t->skip[2]	= 0;
3238e877caaSKent Overstreet 		}
3248e877caaSKent Overstreet 
32566487c54SKent Overstreet 		__set_is_ancestor_bitmap(c, id);
3268e877caaSKent Overstreet 
3278e877caaSKent Overstreet 		if (BCH_SNAPSHOT_DELETED(s.v)) {
328*b0b5bbf9SKent Overstreet 			set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
329*b0b5bbf9SKent Overstreet 			if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
330*b0b5bbf9SKent Overstreet 				bch2_delete_dead_snapshots_async(c);
3318e877caaSKent Overstreet 		}
3328e877caaSKent Overstreet 	} else {
3338e877caaSKent Overstreet 		memset(t, 0, sizeof(*t));
3348e877caaSKent Overstreet 	}
3358e877caaSKent Overstreet err:
3368e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
3378e877caaSKent Overstreet 	return ret;
3388e877caaSKent Overstreet }
3398e877caaSKent Overstreet 
3408e877caaSKent Overstreet int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
3418e877caaSKent Overstreet 			 struct bch_snapshot *s)
3428e877caaSKent Overstreet {
3438e877caaSKent Overstreet 	return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
3448e877caaSKent Overstreet 				       BTREE_ITER_WITH_UPDATES, snapshot, s);
3458e877caaSKent Overstreet }
3468e877caaSKent Overstreet 
347eebe8a84SKent Overstreet static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
3488e877caaSKent Overstreet {
3498e877caaSKent Overstreet 	struct bch_snapshot v;
3508e877caaSKent Overstreet 	int ret;
3518e877caaSKent Overstreet 
3528e877caaSKent Overstreet 	if (!id)
3538e877caaSKent Overstreet 		return 0;
3548e877caaSKent Overstreet 
3558e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, id, &v);
3568e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
3578e877caaSKent Overstreet 		bch_err(trans->c, "snapshot node %u not found", id);
3588e877caaSKent Overstreet 	if (ret)
3598e877caaSKent Overstreet 		return ret;
3608e877caaSKent Overstreet 
3618e877caaSKent Overstreet 	return !BCH_SNAPSHOT_DELETED(&v);
3628e877caaSKent Overstreet }
3638e877caaSKent Overstreet 
3648e877caaSKent Overstreet /*
3658e877caaSKent Overstreet  * If @k is a snapshot with just one live child, it's part of a linear chain,
3668e877caaSKent Overstreet  * which we consider to be an equivalence class: and then after snapshot
3678e877caaSKent Overstreet  * deletion cleanup, there should only be a single key at a given position in
3688e877caaSKent Overstreet  * this equivalence class.
3698e877caaSKent Overstreet  *
3708e877caaSKent Overstreet  * This sets the equivalence class of @k to be the child's equivalence class, if
3718e877caaSKent Overstreet  * it's part of such a linear chain: this correctly sets equivalence classes on
3728e877caaSKent Overstreet  * startup if we run leaf to root (i.e. in natural key order).
3738e877caaSKent Overstreet  */
374eebe8a84SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
3758e877caaSKent Overstreet {
3768e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
3778e877caaSKent Overstreet 	unsigned i, nr_live = 0, live_idx = 0;
3788e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
3798e877caaSKent Overstreet 	u32 id = k.k->p.offset, child[2];
3808e877caaSKent Overstreet 
3818e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
3828e877caaSKent Overstreet 		return 0;
3838e877caaSKent Overstreet 
3848e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
3858e877caaSKent Overstreet 
3868e877caaSKent Overstreet 	child[0] = le32_to_cpu(snap.v->children[0]);
3878e877caaSKent Overstreet 	child[1] = le32_to_cpu(snap.v->children[1]);
3888e877caaSKent Overstreet 
3898e877caaSKent Overstreet 	for (i = 0; i < 2; i++) {
3908e877caaSKent Overstreet 		int ret = bch2_snapshot_live(trans, child[i]);
3918e877caaSKent Overstreet 
3928e877caaSKent Overstreet 		if (ret < 0)
3938e877caaSKent Overstreet 			return ret;
3948e877caaSKent Overstreet 
3958e877caaSKent Overstreet 		if (ret)
3968e877caaSKent Overstreet 			live_idx = i;
3978e877caaSKent Overstreet 		nr_live += ret;
3988e877caaSKent Overstreet 	}
3998e877caaSKent Overstreet 
4008e877caaSKent Overstreet 	mutex_lock(&c->snapshot_table_lock);
4018e877caaSKent Overstreet 
4028e877caaSKent Overstreet 	snapshot_t_mut(c, id)->equiv = nr_live == 1
4038e877caaSKent Overstreet 		? snapshot_t_mut(c, child[live_idx])->equiv
4048e877caaSKent Overstreet 		: id;
4058e877caaSKent Overstreet 
4068e877caaSKent Overstreet 	mutex_unlock(&c->snapshot_table_lock);
4078e877caaSKent Overstreet 
4088e877caaSKent Overstreet 	return 0;
4098e877caaSKent Overstreet }
4108e877caaSKent Overstreet 
4118e877caaSKent Overstreet /* fsck: */
4128e877caaSKent Overstreet 
4138e877caaSKent Overstreet static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
4148e877caaSKent Overstreet {
4158e877caaSKent Overstreet 	return snapshot_t(c, id)->children[child];
4168e877caaSKent Overstreet }
4178e877caaSKent Overstreet 
4188e877caaSKent Overstreet static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
4198e877caaSKent Overstreet {
4208e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 0);
4218e877caaSKent Overstreet }
4228e877caaSKent Overstreet 
4238e877caaSKent Overstreet static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
4248e877caaSKent Overstreet {
4258e877caaSKent Overstreet 	return bch2_snapshot_child(c, id, 1);
4268e877caaSKent Overstreet }
4278e877caaSKent Overstreet 
4288e877caaSKent Overstreet static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
4298e877caaSKent Overstreet {
4308e877caaSKent Overstreet 	u32 n, parent;
4318e877caaSKent Overstreet 
4328e877caaSKent Overstreet 	n = bch2_snapshot_left_child(c, id);
4338e877caaSKent Overstreet 	if (n)
4348e877caaSKent Overstreet 		return n;
4358e877caaSKent Overstreet 
4368e877caaSKent Overstreet 	while ((parent = bch2_snapshot_parent(c, id))) {
4378e877caaSKent Overstreet 		n = bch2_snapshot_right_child(c, parent);
4388e877caaSKent Overstreet 		if (n && n != id)
4398e877caaSKent Overstreet 			return n;
4408e877caaSKent Overstreet 		id = parent;
4418e877caaSKent Overstreet 	}
4428e877caaSKent Overstreet 
4438e877caaSKent Overstreet 	return 0;
4448e877caaSKent Overstreet }
4458e877caaSKent Overstreet 
4468e877caaSKent Overstreet static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
4478e877caaSKent Overstreet {
4488e877caaSKent Overstreet 	u32 id = snapshot_root;
4498e877caaSKent Overstreet 	u32 subvol = 0, s;
4508e877caaSKent Overstreet 
4518e877caaSKent Overstreet 	while (id) {
4528e877caaSKent Overstreet 		s = snapshot_t(c, id)->subvol;
4538e877caaSKent Overstreet 
4548e877caaSKent Overstreet 		if (s && (!subvol || s < subvol))
4558e877caaSKent Overstreet 			subvol = s;
4568e877caaSKent Overstreet 
4578e877caaSKent Overstreet 		id = bch2_snapshot_tree_next(c, id);
4588e877caaSKent Overstreet 	}
4598e877caaSKent Overstreet 
4608e877caaSKent Overstreet 	return subvol;
4618e877caaSKent Overstreet }
4628e877caaSKent Overstreet 
4638e877caaSKent Overstreet static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
4648e877caaSKent Overstreet 					    u32 snapshot_root, u32 *subvol_id)
4658e877caaSKent Overstreet {
4668e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
4678e877caaSKent Overstreet 	struct btree_iter iter;
4688e877caaSKent Overstreet 	struct bkey_s_c k;
4698e877caaSKent Overstreet 	struct bkey_s_c_subvolume s;
4708e877caaSKent Overstreet 	bool found = false;
4718e877caaSKent Overstreet 	int ret;
4728e877caaSKent Overstreet 
4738e877caaSKent Overstreet 	for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
4748e877caaSKent Overstreet 				     0, k, ret) {
4758e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_subvolume)
4768e877caaSKent Overstreet 			continue;
4778e877caaSKent Overstreet 
4788e877caaSKent Overstreet 		s = bkey_s_c_to_subvolume(k);
4798e877caaSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
4808e877caaSKent Overstreet 			continue;
4818e877caaSKent Overstreet 		if (!BCH_SUBVOLUME_SNAP(s.v)) {
4828e877caaSKent Overstreet 			*subvol_id = s.k->p.offset;
4838e877caaSKent Overstreet 			found = true;
4848e877caaSKent Overstreet 			break;
4858e877caaSKent Overstreet 		}
4868e877caaSKent Overstreet 	}
4878e877caaSKent Overstreet 
4888e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
4898e877caaSKent Overstreet 
4908e877caaSKent Overstreet 	if (!ret && !found) {
49196dea3d5SKent Overstreet 		struct bkey_i_subvolume *u;
4928e877caaSKent Overstreet 
4938e877caaSKent Overstreet 		*subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
4948e877caaSKent Overstreet 
49596dea3d5SKent Overstreet 		u = bch2_bkey_get_mut_typed(trans, &iter,
4968e877caaSKent Overstreet 					    BTREE_ID_subvolumes, POS(0, *subvol_id),
4978e877caaSKent Overstreet 					    0, subvolume);
49896dea3d5SKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
4998e877caaSKent Overstreet 		if (ret)
5008e877caaSKent Overstreet 			return ret;
5018e877caaSKent Overstreet 
50296dea3d5SKent Overstreet 		SET_BCH_SUBVOLUME_SNAP(&u->v, false);
5038e877caaSKent Overstreet 	}
5048e877caaSKent Overstreet 
5058e877caaSKent Overstreet 	return ret;
5068e877caaSKent Overstreet }
5078e877caaSKent Overstreet 
5088e877caaSKent Overstreet static int check_snapshot_tree(struct btree_trans *trans,
5098e877caaSKent Overstreet 			       struct btree_iter *iter,
5108e877caaSKent Overstreet 			       struct bkey_s_c k)
5118e877caaSKent Overstreet {
5128e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
5138e877caaSKent Overstreet 	struct bkey_s_c_snapshot_tree st;
5148e877caaSKent Overstreet 	struct bch_snapshot s;
5158e877caaSKent Overstreet 	struct bch_subvolume subvol;
5168e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
5178e877caaSKent Overstreet 	u32 root_id;
5188e877caaSKent Overstreet 	int ret;
5198e877caaSKent Overstreet 
5208e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot_tree)
5218e877caaSKent Overstreet 		return 0;
5228e877caaSKent Overstreet 
5238e877caaSKent Overstreet 	st = bkey_s_c_to_snapshot_tree(k);
5248e877caaSKent Overstreet 	root_id = le32_to_cpu(st.v->root_snapshot);
5258e877caaSKent Overstreet 
5268e877caaSKent Overstreet 	ret = bch2_snapshot_lookup(trans, root_id, &s);
5278e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5288e877caaSKent Overstreet 		goto err;
5298e877caaSKent Overstreet 
5308e877caaSKent Overstreet 	if (fsck_err_on(ret ||
5318e877caaSKent Overstreet 			root_id != bch2_snapshot_root(c, root_id) ||
5328e877caaSKent Overstreet 			st.k->p.offset != le32_to_cpu(s.tree),
5338e877caaSKent Overstreet 			c,
5348e877caaSKent Overstreet 			"snapshot tree points to missing/incorrect snapshot:\n  %s",
5358e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5368e877caaSKent Overstreet 		ret = bch2_btree_delete_at(trans, iter, 0);
5378e877caaSKent Overstreet 		goto err;
5388e877caaSKent Overstreet 	}
5398e877caaSKent Overstreet 
5408e877caaSKent Overstreet 	ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
5418e877caaSKent Overstreet 				 false, 0, &subvol);
5428e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
5438e877caaSKent Overstreet 		goto err;
5448e877caaSKent Overstreet 
5458e877caaSKent Overstreet 	if (fsck_err_on(ret, c,
5468e877caaSKent Overstreet 			"snapshot tree points to missing subvolume:\n  %s",
5478e877caaSKent Overstreet 			(printbuf_reset(&buf),
5488e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
5498e877caaSKent Overstreet 	    fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
5508e877caaSKent Overstreet 						le32_to_cpu(subvol.snapshot),
5518e877caaSKent Overstreet 						root_id), c,
5528e877caaSKent Overstreet 			"snapshot tree points to subvolume that does not point to snapshot in this tree:\n  %s",
5538e877caaSKent Overstreet 			(printbuf_reset(&buf),
5548e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
5558e877caaSKent Overstreet 	    fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
5568e877caaSKent Overstreet 			"snapshot tree points to snapshot subvolume:\n  %s",
5578e877caaSKent Overstreet 			(printbuf_reset(&buf),
5588e877caaSKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
5598e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *u;
5608e877caaSKent Overstreet 		u32 subvol_id;
5618e877caaSKent Overstreet 
5628e877caaSKent Overstreet 		ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
5638e877caaSKent Overstreet 		if (ret)
5648e877caaSKent Overstreet 			goto err;
5658e877caaSKent Overstreet 
5668e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
5678e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
5688e877caaSKent Overstreet 		if (ret)
5698e877caaSKent Overstreet 			goto err;
5708e877caaSKent Overstreet 
5718e877caaSKent Overstreet 		u->v.master_subvol = cpu_to_le32(subvol_id);
5728e877caaSKent Overstreet 		st = snapshot_tree_i_to_s_c(u);
5738e877caaSKent Overstreet 	}
5748e877caaSKent Overstreet err:
5758e877caaSKent Overstreet fsck_err:
5768e877caaSKent Overstreet 	printbuf_exit(&buf);
5778e877caaSKent Overstreet 	return ret;
5788e877caaSKent Overstreet }
5798e877caaSKent Overstreet 
5808e877caaSKent Overstreet /*
5818e877caaSKent Overstreet  * For each snapshot_tree, make sure it points to the root of a snapshot tree
5828e877caaSKent Overstreet  * and that snapshot entry points back to it, or delete it.
5838e877caaSKent Overstreet  *
5848e877caaSKent Overstreet  * And, make sure it points to a subvolume within that snapshot tree, or correct
5858e877caaSKent Overstreet  * it to point to the oldest subvolume within that snapshot tree.
5868e877caaSKent Overstreet  */
5878e877caaSKent Overstreet int bch2_check_snapshot_trees(struct bch_fs *c)
5888e877caaSKent Overstreet {
5898e877caaSKent Overstreet 	struct btree_iter iter;
5908e877caaSKent Overstreet 	struct bkey_s_c k;
5918e877caaSKent Overstreet 	int ret;
5928e877caaSKent Overstreet 
5938e877caaSKent Overstreet 	ret = bch2_trans_run(c,
5946bd68ec2SKent Overstreet 		for_each_btree_key_commit(trans, iter,
5958e877caaSKent Overstreet 			BTREE_ID_snapshot_trees, POS_MIN,
5968e877caaSKent Overstreet 			BTREE_ITER_PREFETCH, k,
5978e877caaSKent Overstreet 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
5986bd68ec2SKent Overstreet 		check_snapshot_tree(trans, &iter, k)));
5998e877caaSKent Overstreet 
6008e877caaSKent Overstreet 	if (ret)
6018e877caaSKent Overstreet 		bch_err(c, "error %i checking snapshot trees", ret);
6028e877caaSKent Overstreet 	return ret;
6038e877caaSKent Overstreet }
6048e877caaSKent Overstreet 
6058e877caaSKent Overstreet /*
6068e877caaSKent Overstreet  * Look up snapshot tree for @tree_id and find root,
6078e877caaSKent Overstreet  * make sure @snap_id is a descendent:
6088e877caaSKent Overstreet  */
6098e877caaSKent Overstreet static int snapshot_tree_ptr_good(struct btree_trans *trans,
6108e877caaSKent Overstreet 				  u32 snap_id, u32 tree_id)
6118e877caaSKent Overstreet {
6128e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6138e877caaSKent Overstreet 	int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
6148e877caaSKent Overstreet 
6158e877caaSKent Overstreet 	if (bch2_err_matches(ret, ENOENT))
6168e877caaSKent Overstreet 		return 0;
6178e877caaSKent Overstreet 	if (ret)
6188e877caaSKent Overstreet 		return ret;
6198e877caaSKent Overstreet 
6208e877caaSKent Overstreet 	return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
6218e877caaSKent Overstreet }
6228e877caaSKent Overstreet 
6238e877caaSKent Overstreet u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
6248e877caaSKent Overstreet {
6258e877caaSKent Overstreet 	const struct snapshot_t *s;
6268e877caaSKent Overstreet 
6278e877caaSKent Overstreet 	if (!id)
6288e877caaSKent Overstreet 		return 0;
6298e877caaSKent Overstreet 
6308e877caaSKent Overstreet 	rcu_read_lock();
6318e877caaSKent Overstreet 	s = snapshot_t(c, id);
6328e877caaSKent Overstreet 	if (s->parent)
6338e877caaSKent Overstreet 		id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
6348e877caaSKent Overstreet 	rcu_read_unlock();
6358e877caaSKent Overstreet 
6368e877caaSKent Overstreet 	return id;
6378e877caaSKent Overstreet }
6388e877caaSKent Overstreet 
639097d4cc8SKent Overstreet static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
6408e877caaSKent Overstreet {
6418e877caaSKent Overstreet 	unsigned i;
6428e877caaSKent Overstreet 
643097d4cc8SKent Overstreet 	for (i = 0; i < 3; i++)
644097d4cc8SKent Overstreet 		if (!s.parent) {
645097d4cc8SKent Overstreet 			if (s.skip[i])
6468e877caaSKent Overstreet 				return false;
647097d4cc8SKent Overstreet 		} else {
648097d4cc8SKent Overstreet 			if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i])))
6498e877caaSKent Overstreet 				return false;
6508e877caaSKent Overstreet 		}
6518e877caaSKent Overstreet 
6528e877caaSKent Overstreet 	return true;
6538e877caaSKent Overstreet }
6548e877caaSKent Overstreet 
6558e877caaSKent Overstreet /*
6568e877caaSKent Overstreet  * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
6578e877caaSKent Overstreet  * its snapshot_tree pointer is correct (allocate new one if necessary), then
6588e877caaSKent Overstreet  * update this node's pointer to root node's pointer:
6598e877caaSKent Overstreet  */
6608e877caaSKent Overstreet static int snapshot_tree_ptr_repair(struct btree_trans *trans,
6618e877caaSKent Overstreet 				    struct btree_iter *iter,
6628e877caaSKent Overstreet 				    struct bkey_s_c k,
6638e877caaSKent Overstreet 				    struct bch_snapshot *s)
6648e877caaSKent Overstreet {
6658e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
6668e877caaSKent Overstreet 	struct btree_iter root_iter;
6678e877caaSKent Overstreet 	struct bch_snapshot_tree s_t;
6688e877caaSKent Overstreet 	struct bkey_s_c_snapshot root;
6698e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
6708e877caaSKent Overstreet 	u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
6718e877caaSKent Overstreet 	int ret;
6728e877caaSKent Overstreet 
6738e877caaSKent Overstreet 	root = bch2_bkey_get_iter_typed(trans, &root_iter,
6748e877caaSKent Overstreet 			       BTREE_ID_snapshots, POS(0, root_id),
6758e877caaSKent Overstreet 			       BTREE_ITER_WITH_UPDATES, snapshot);
6768e877caaSKent Overstreet 	ret = bkey_err(root);
6778e877caaSKent Overstreet 	if (ret)
6788e877caaSKent Overstreet 		goto err;
6798e877caaSKent Overstreet 
6808e877caaSKent Overstreet 	tree_id = le32_to_cpu(root.v->tree);
6818e877caaSKent Overstreet 
6828e877caaSKent Overstreet 	ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
6838e877caaSKent Overstreet 	if (ret && !bch2_err_matches(ret, ENOENT))
6848e877caaSKent Overstreet 		return ret;
6858e877caaSKent Overstreet 
6868e877caaSKent Overstreet 	if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
6878e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
6888e877caaSKent Overstreet 		ret =   PTR_ERR_OR_ZERO(u) ?:
6898e877caaSKent Overstreet 			bch2_snapshot_tree_create(trans, root_id,
6908e877caaSKent Overstreet 				bch2_snapshot_tree_oldest_subvol(c, root_id),
6918e877caaSKent Overstreet 				&tree_id);
6928e877caaSKent Overstreet 		if (ret)
6938e877caaSKent Overstreet 			goto err;
6948e877caaSKent Overstreet 
6958e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
6968e877caaSKent Overstreet 		if (k.k->p.offset == root_id)
6978e877caaSKent Overstreet 			*s = u->v;
6988e877caaSKent Overstreet 	}
6998e877caaSKent Overstreet 
7008e877caaSKent Overstreet 	if (k.k->p.offset != root_id) {
7018e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
7028e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
7038e877caaSKent Overstreet 		if (ret)
7048e877caaSKent Overstreet 			goto err;
7058e877caaSKent Overstreet 
7068e877caaSKent Overstreet 		u->v.tree = cpu_to_le32(tree_id);
7078e877caaSKent Overstreet 		*s = u->v;
7088e877caaSKent Overstreet 	}
7098e877caaSKent Overstreet err:
7108e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &root_iter);
7118e877caaSKent Overstreet 	return ret;
7128e877caaSKent Overstreet }
7138e877caaSKent Overstreet 
7148e877caaSKent Overstreet static int check_snapshot(struct btree_trans *trans,
7158e877caaSKent Overstreet 			  struct btree_iter *iter,
7168e877caaSKent Overstreet 			  struct bkey_s_c k)
7178e877caaSKent Overstreet {
7188e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
7198e877caaSKent Overstreet 	struct bch_snapshot s;
7208e877caaSKent Overstreet 	struct bch_subvolume subvol;
7218e877caaSKent Overstreet 	struct bch_snapshot v;
7228e877caaSKent Overstreet 	struct bkey_i_snapshot *u;
7238e877caaSKent Overstreet 	u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
7248e877caaSKent Overstreet 	u32 real_depth;
7258e877caaSKent Overstreet 	struct printbuf buf = PRINTBUF;
7268e877caaSKent Overstreet 	bool should_have_subvol;
7278e877caaSKent Overstreet 	u32 i, id;
7288e877caaSKent Overstreet 	int ret = 0;
7298e877caaSKent Overstreet 
7308e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
7318e877caaSKent Overstreet 		return 0;
7328e877caaSKent Overstreet 
7338e877caaSKent Overstreet 	memset(&s, 0, sizeof(s));
7348e877caaSKent Overstreet 	memcpy(&s, k.v, bkey_val_bytes(k.k));
7358e877caaSKent Overstreet 
7368e877caaSKent Overstreet 	id = le32_to_cpu(s.parent);
7378e877caaSKent Overstreet 	if (id) {
7388e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7398e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7408e877caaSKent Overstreet 			bch_err(c, "snapshot with nonexistent parent:\n  %s",
7418e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
7428e877caaSKent Overstreet 		if (ret)
7438e877caaSKent Overstreet 			goto err;
7448e877caaSKent Overstreet 
7458e877caaSKent Overstreet 		if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
7468e877caaSKent Overstreet 		    le32_to_cpu(v.children[1]) != k.k->p.offset) {
7478e877caaSKent Overstreet 			bch_err(c, "snapshot parent %u missing pointer to child %llu",
7488e877caaSKent Overstreet 				id, k.k->p.offset);
7498e877caaSKent Overstreet 			ret = -EINVAL;
7508e877caaSKent Overstreet 			goto err;
7518e877caaSKent Overstreet 		}
7528e877caaSKent Overstreet 	}
7538e877caaSKent Overstreet 
7548e877caaSKent Overstreet 	for (i = 0; i < 2 && s.children[i]; i++) {
7558e877caaSKent Overstreet 		id = le32_to_cpu(s.children[i]);
7568e877caaSKent Overstreet 
7578e877caaSKent Overstreet 		ret = bch2_snapshot_lookup(trans, id, &v);
7588e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7598e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has nonexistent child %u",
7608e877caaSKent Overstreet 				k.k->p.offset, id);
7618e877caaSKent Overstreet 		if (ret)
7628e877caaSKent Overstreet 			goto err;
7638e877caaSKent Overstreet 
7648e877caaSKent Overstreet 		if (le32_to_cpu(v.parent) != k.k->p.offset) {
7658e877caaSKent Overstreet 			bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
7668e877caaSKent Overstreet 				id, le32_to_cpu(v.parent), k.k->p.offset);
7678e877caaSKent Overstreet 			ret = -EINVAL;
7688e877caaSKent Overstreet 			goto err;
7698e877caaSKent Overstreet 		}
7708e877caaSKent Overstreet 	}
7718e877caaSKent Overstreet 
7728e877caaSKent Overstreet 	should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
7738e877caaSKent Overstreet 		!BCH_SNAPSHOT_DELETED(&s);
7748e877caaSKent Overstreet 
7758e877caaSKent Overstreet 	if (should_have_subvol) {
7768e877caaSKent Overstreet 		id = le32_to_cpu(s.subvol);
7778e877caaSKent Overstreet 		ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
7788e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
7798e877caaSKent Overstreet 			bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
7808e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
7818e877caaSKent Overstreet 		if (ret)
7828e877caaSKent Overstreet 			goto err;
7838e877caaSKent Overstreet 
7848e877caaSKent Overstreet 		if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
7858e877caaSKent Overstreet 			bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
7868e877caaSKent Overstreet 				k.k->p.offset);
7878e877caaSKent Overstreet 			ret = -EINVAL;
7888e877caaSKent Overstreet 			goto err;
7898e877caaSKent Overstreet 		}
7908e877caaSKent Overstreet 	} else {
7918e877caaSKent Overstreet 		if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n  %s",
7928e877caaSKent Overstreet 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
7938e877caaSKent Overstreet 			u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
7948e877caaSKent Overstreet 			ret = PTR_ERR_OR_ZERO(u);
7958e877caaSKent Overstreet 			if (ret)
7968e877caaSKent Overstreet 				goto err;
7978e877caaSKent Overstreet 
7988e877caaSKent Overstreet 			u->v.subvol = 0;
7998e877caaSKent Overstreet 			s = u->v;
8008e877caaSKent Overstreet 		}
8018e877caaSKent Overstreet 	}
8028e877caaSKent Overstreet 
8038e877caaSKent Overstreet 	ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
8048e877caaSKent Overstreet 	if (ret < 0)
8058e877caaSKent Overstreet 		goto err;
8068e877caaSKent Overstreet 
8078e877caaSKent Overstreet 	if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n  %s",
8088e877caaSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
8098e877caaSKent Overstreet 		ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
8108e877caaSKent Overstreet 		if (ret)
8118e877caaSKent Overstreet 			goto err;
8128e877caaSKent Overstreet 	}
8138e877caaSKent Overstreet 	ret = 0;
8148e877caaSKent Overstreet 
8158e877caaSKent Overstreet 	real_depth = bch2_snapshot_depth(c, parent_id);
8168e877caaSKent Overstreet 
8178e877caaSKent Overstreet 	if (le32_to_cpu(s.depth) != real_depth &&
8188e877caaSKent Overstreet 	    (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
8198e877caaSKent Overstreet 	     fsck_err(c, "snapshot with incorrect depth field, should be %u:\n  %s",
8208e877caaSKent Overstreet 		      real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
8218e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8228e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8238e877caaSKent Overstreet 		if (ret)
8248e877caaSKent Overstreet 			goto err;
8258e877caaSKent Overstreet 
8268e877caaSKent Overstreet 		u->v.depth = cpu_to_le32(real_depth);
8278e877caaSKent Overstreet 		s = u->v;
8288e877caaSKent Overstreet 	}
8298e877caaSKent Overstreet 
830097d4cc8SKent Overstreet 	ret = snapshot_skiplist_good(trans, k.k->p.offset, s);
8318e877caaSKent Overstreet 	if (ret < 0)
8328e877caaSKent Overstreet 		goto err;
8338e877caaSKent Overstreet 
8348e877caaSKent Overstreet 	if (!ret &&
8358e877caaSKent Overstreet 	    (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
8368e877caaSKent Overstreet 	     fsck_err(c, "snapshot with bad skiplist field:\n  %s",
8378e877caaSKent Overstreet 		      (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
8388e877caaSKent Overstreet 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
8398e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
8408e877caaSKent Overstreet 		if (ret)
8418e877caaSKent Overstreet 			goto err;
8428e877caaSKent Overstreet 
8438e877caaSKent Overstreet 		for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
8448e877caaSKent Overstreet 			u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
8458e877caaSKent Overstreet 
8468e877caaSKent Overstreet 		bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
8478e877caaSKent Overstreet 		s = u->v;
8488e877caaSKent Overstreet 	}
8498e877caaSKent Overstreet 	ret = 0;
8508e877caaSKent Overstreet err:
8518e877caaSKent Overstreet fsck_err:
8528e877caaSKent Overstreet 	printbuf_exit(&buf);
8538e877caaSKent Overstreet 	return ret;
8548e877caaSKent Overstreet }
8558e877caaSKent Overstreet 
8568e877caaSKent Overstreet int bch2_check_snapshots(struct bch_fs *c)
8578e877caaSKent Overstreet {
8588e877caaSKent Overstreet 	struct btree_iter iter;
8598e877caaSKent Overstreet 	struct bkey_s_c k;
8608e877caaSKent Overstreet 	int ret;
8618e877caaSKent Overstreet 
8628e877caaSKent Overstreet 	/*
8638e877caaSKent Overstreet 	 * We iterate backwards as checking/fixing the depth field requires that
8648e877caaSKent Overstreet 	 * the parent's depth already be correct:
8658e877caaSKent Overstreet 	 */
8668e877caaSKent Overstreet 	ret = bch2_trans_run(c,
8676bd68ec2SKent Overstreet 		for_each_btree_key_reverse_commit(trans, iter,
8688e877caaSKent Overstreet 			BTREE_ID_snapshots, POS_MAX,
8698e877caaSKent Overstreet 			BTREE_ITER_PREFETCH, k,
8708e877caaSKent Overstreet 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
8716bd68ec2SKent Overstreet 		check_snapshot(trans, &iter, k)));
8728e877caaSKent Overstreet 	if (ret)
8738e877caaSKent Overstreet 		bch_err_fn(c, ret);
8748e877caaSKent Overstreet 	return ret;
8758e877caaSKent Overstreet }
8768e877caaSKent Overstreet 
8778e877caaSKent Overstreet /*
8788e877caaSKent Overstreet  * Mark a snapshot as deleted, for future cleanup:
8798e877caaSKent Overstreet  */
8808e877caaSKent Overstreet int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
8818e877caaSKent Overstreet {
8828e877caaSKent Overstreet 	struct btree_iter iter;
8838e877caaSKent Overstreet 	struct bkey_i_snapshot *s;
8848e877caaSKent Overstreet 	int ret = 0;
8858e877caaSKent Overstreet 
8868e877caaSKent Overstreet 	s = bch2_bkey_get_mut_typed(trans, &iter,
8878e877caaSKent Overstreet 				    BTREE_ID_snapshots, POS(0, id),
8888e877caaSKent Overstreet 				    0, snapshot);
8898e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
8908e877caaSKent Overstreet 	if (unlikely(ret)) {
8918e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
8928e877caaSKent Overstreet 					trans->c, "missing snapshot %u", id);
8938e877caaSKent Overstreet 		return ret;
8948e877caaSKent Overstreet 	}
8958e877caaSKent Overstreet 
8968e877caaSKent Overstreet 	/* already deleted? */
8978e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(&s->v))
8988e877caaSKent Overstreet 		goto err;
8998e877caaSKent Overstreet 
9008e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_DELETED(&s->v, true);
9018e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
9028e877caaSKent Overstreet 	s->v.subvol = 0;
9038e877caaSKent Overstreet err:
9048e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
9058e877caaSKent Overstreet 	return ret;
9068e877caaSKent Overstreet }
9078e877caaSKent Overstreet 
908f55d6e07SKent Overstreet static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
909f55d6e07SKent Overstreet {
910f55d6e07SKent Overstreet 	if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
911f55d6e07SKent Overstreet 		swap(s->children[0], s->children[1]);
912f55d6e07SKent Overstreet }
913f55d6e07SKent Overstreet 
91496dea3d5SKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
9158e877caaSKent Overstreet {
9168e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
9178e877caaSKent Overstreet 	struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
918f55d6e07SKent Overstreet 	struct btree_iter c_iter = (struct btree_iter) { NULL };
9198e877caaSKent Overstreet 	struct btree_iter tree_iter = (struct btree_iter) { NULL };
9208e877caaSKent Overstreet 	struct bkey_s_c_snapshot s;
921f55d6e07SKent Overstreet 	u32 parent_id, child_id;
9228e877caaSKent Overstreet 	unsigned i;
9238e877caaSKent Overstreet 	int ret = 0;
9248e877caaSKent Overstreet 
9258e877caaSKent Overstreet 	s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
9268e877caaSKent Overstreet 				     BTREE_ITER_INTENT, snapshot);
9278e877caaSKent Overstreet 	ret = bkey_err(s);
9288e877caaSKent Overstreet 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
9298e877caaSKent Overstreet 				"missing snapshot %u", id);
9308e877caaSKent Overstreet 
9318e877caaSKent Overstreet 	if (ret)
9328e877caaSKent Overstreet 		goto err;
9338e877caaSKent Overstreet 
934f55d6e07SKent Overstreet 	BUG_ON(s.v->children[1]);
935f55d6e07SKent Overstreet 
9368e877caaSKent Overstreet 	parent_id = le32_to_cpu(s.v->parent);
937f55d6e07SKent Overstreet 	child_id = le32_to_cpu(s.v->children[0]);
9388e877caaSKent Overstreet 
9398e877caaSKent Overstreet 	if (parent_id) {
9408e877caaSKent Overstreet 		struct bkey_i_snapshot *parent;
9418e877caaSKent Overstreet 
9428e877caaSKent Overstreet 		parent = bch2_bkey_get_mut_typed(trans, &p_iter,
9438e877caaSKent Overstreet 				     BTREE_ID_snapshots, POS(0, parent_id),
9448e877caaSKent Overstreet 				     0, snapshot);
9458e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(parent);
9468e877caaSKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
9478e877caaSKent Overstreet 					"missing snapshot %u", parent_id);
948f55d6e07SKent Overstreet 		if (unlikely(ret))
9498e877caaSKent Overstreet 			goto err;
9508e877caaSKent Overstreet 
951f55d6e07SKent Overstreet 		/* find entry in parent->children for node being deleted */
9528e877caaSKent Overstreet 		for (i = 0; i < 2; i++)
9538e877caaSKent Overstreet 			if (le32_to_cpu(parent->v.children[i]) == id)
9548e877caaSKent Overstreet 				break;
9558e877caaSKent Overstreet 
956f55d6e07SKent Overstreet 		if (bch2_fs_inconsistent_on(i == 2, c,
957f55d6e07SKent Overstreet 					"snapshot %u missing child pointer to %u",
958f55d6e07SKent Overstreet 					parent_id, id))
959f55d6e07SKent Overstreet 			goto err;
9608e877caaSKent Overstreet 
961f55d6e07SKent Overstreet 		parent->v.children[i] = le32_to_cpu(child_id);
962f55d6e07SKent Overstreet 
963f55d6e07SKent Overstreet 		normalize_snapshot_child_pointers(&parent->v);
964f55d6e07SKent Overstreet 	}
965f55d6e07SKent Overstreet 
966f55d6e07SKent Overstreet 	if (child_id) {
967f55d6e07SKent Overstreet 		struct bkey_i_snapshot *child;
968f55d6e07SKent Overstreet 
969f55d6e07SKent Overstreet 		child = bch2_bkey_get_mut_typed(trans, &c_iter,
970f55d6e07SKent Overstreet 				     BTREE_ID_snapshots, POS(0, child_id),
971f55d6e07SKent Overstreet 				     0, snapshot);
972f55d6e07SKent Overstreet 		ret = PTR_ERR_OR_ZERO(child);
973f55d6e07SKent Overstreet 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
974f55d6e07SKent Overstreet 					"missing snapshot %u", child_id);
975f55d6e07SKent Overstreet 		if (unlikely(ret))
976f55d6e07SKent Overstreet 			goto err;
977f55d6e07SKent Overstreet 
978f55d6e07SKent Overstreet 		child->v.parent = cpu_to_le32(parent_id);
979f55d6e07SKent Overstreet 
980f55d6e07SKent Overstreet 		if (!child->v.parent) {
981f55d6e07SKent Overstreet 			child->v.skip[0] = 0;
982f55d6e07SKent Overstreet 			child->v.skip[1] = 0;
983f55d6e07SKent Overstreet 			child->v.skip[2] = 0;
984f55d6e07SKent Overstreet 		}
985f55d6e07SKent Overstreet 	}
986f55d6e07SKent Overstreet 
987f55d6e07SKent Overstreet 	if (!parent_id) {
9888e877caaSKent Overstreet 		/*
9898e877caaSKent Overstreet 		 * We're deleting the root of a snapshot tree: update the
9908e877caaSKent Overstreet 		 * snapshot_tree entry to point to the new root, or delete it if
9918e877caaSKent Overstreet 		 * this is the last snapshot ID in this tree:
9928e877caaSKent Overstreet 		 */
9938e877caaSKent Overstreet 		struct bkey_i_snapshot_tree *s_t;
9948e877caaSKent Overstreet 
9958e877caaSKent Overstreet 		BUG_ON(s.v->children[1]);
9968e877caaSKent Overstreet 
9978e877caaSKent Overstreet 		s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
9988e877caaSKent Overstreet 				BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
9998e877caaSKent Overstreet 				0, snapshot_tree);
10008e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(s_t);
10018e877caaSKent Overstreet 		if (ret)
10028e877caaSKent Overstreet 			goto err;
10038e877caaSKent Overstreet 
10048e877caaSKent Overstreet 		if (s.v->children[0]) {
10058e877caaSKent Overstreet 			s_t->v.root_snapshot = s.v->children[0];
10068e877caaSKent Overstreet 		} else {
10078e877caaSKent Overstreet 			s_t->k.type = KEY_TYPE_deleted;
10088e877caaSKent Overstreet 			set_bkey_val_u64s(&s_t->k, 0);
10098e877caaSKent Overstreet 		}
10108e877caaSKent Overstreet 	}
10118e877caaSKent Overstreet 
10128e877caaSKent Overstreet 	ret = bch2_btree_delete_at(trans, &iter, 0);
10138e877caaSKent Overstreet err:
10148e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &tree_iter);
10158e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &p_iter);
1016f55d6e07SKent Overstreet 	bch2_trans_iter_exit(trans, &c_iter);
10178e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
10188e877caaSKent Overstreet 	return ret;
10198e877caaSKent Overstreet }
10208e877caaSKent Overstreet 
10218e877caaSKent Overstreet static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
10228e877caaSKent Overstreet 			  u32 *new_snapids,
10238e877caaSKent Overstreet 			  u32 *snapshot_subvols,
10248e877caaSKent Overstreet 			  unsigned nr_snapids)
10258e877caaSKent Overstreet {
10268e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
10278e877caaSKent Overstreet 	struct btree_iter iter;
10288e877caaSKent Overstreet 	struct bkey_i_snapshot *n;
10298e877caaSKent Overstreet 	struct bkey_s_c k;
10308e877caaSKent Overstreet 	unsigned i, j;
10318e877caaSKent Overstreet 	u32 depth = bch2_snapshot_depth(c, parent);
10328e877caaSKent Overstreet 	int ret;
10338e877caaSKent Overstreet 
10348e877caaSKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
10358e877caaSKent Overstreet 			     POS_MIN, BTREE_ITER_INTENT);
10368e877caaSKent Overstreet 	k = bch2_btree_iter_peek(&iter);
10378e877caaSKent Overstreet 	ret = bkey_err(k);
10388e877caaSKent Overstreet 	if (ret)
10398e877caaSKent Overstreet 		goto err;
10408e877caaSKent Overstreet 
10418e877caaSKent Overstreet 	for (i = 0; i < nr_snapids; i++) {
10428e877caaSKent Overstreet 		k = bch2_btree_iter_prev_slot(&iter);
10438e877caaSKent Overstreet 		ret = bkey_err(k);
10448e877caaSKent Overstreet 		if (ret)
10458e877caaSKent Overstreet 			goto err;
10468e877caaSKent Overstreet 
10478e877caaSKent Overstreet 		if (!k.k || !k.k->p.offset) {
10488e877caaSKent Overstreet 			ret = -BCH_ERR_ENOSPC_snapshot_create;
10498e877caaSKent Overstreet 			goto err;
10508e877caaSKent Overstreet 		}
10518e877caaSKent Overstreet 
10528e877caaSKent Overstreet 		n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
10538e877caaSKent Overstreet 		ret = PTR_ERR_OR_ZERO(n);
10548e877caaSKent Overstreet 		if (ret)
10558e877caaSKent Overstreet 			goto err;
10568e877caaSKent Overstreet 
10578e877caaSKent Overstreet 		n->v.flags	= 0;
10588e877caaSKent Overstreet 		n->v.parent	= cpu_to_le32(parent);
10598e877caaSKent Overstreet 		n->v.subvol	= cpu_to_le32(snapshot_subvols[i]);
10608e877caaSKent Overstreet 		n->v.tree	= cpu_to_le32(tree);
10618e877caaSKent Overstreet 		n->v.depth	= cpu_to_le32(depth);
10628e877caaSKent Overstreet 
10638e877caaSKent Overstreet 		for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
10648e877caaSKent Overstreet 			n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
10658e877caaSKent Overstreet 
10668e877caaSKent Overstreet 		bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
10678e877caaSKent Overstreet 		SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
10688e877caaSKent Overstreet 
10698e877caaSKent Overstreet 		ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
10708e877caaSKent Overstreet 					 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
10718e877caaSKent Overstreet 		if (ret)
10728e877caaSKent Overstreet 			goto err;
10738e877caaSKent Overstreet 
10748e877caaSKent Overstreet 		new_snapids[i]	= iter.pos.offset;
1075eebe8a84SKent Overstreet 
1076eebe8a84SKent Overstreet 		mutex_lock(&c->snapshot_table_lock);
1077eebe8a84SKent Overstreet 		snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
1078eebe8a84SKent Overstreet 		mutex_unlock(&c->snapshot_table_lock);
10798e877caaSKent Overstreet 	}
10808e877caaSKent Overstreet err:
10818e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
10828e877caaSKent Overstreet 	return ret;
10838e877caaSKent Overstreet }
10848e877caaSKent Overstreet 
10858e877caaSKent Overstreet /*
10868e877caaSKent Overstreet  * Create new snapshot IDs as children of an existing snapshot ID:
10878e877caaSKent Overstreet  */
10888e877caaSKent Overstreet static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
10898e877caaSKent Overstreet 			      u32 *new_snapids,
10908e877caaSKent Overstreet 			      u32 *snapshot_subvols,
10918e877caaSKent Overstreet 			      unsigned nr_snapids)
10928e877caaSKent Overstreet {
10938e877caaSKent Overstreet 	struct btree_iter iter;
10948e877caaSKent Overstreet 	struct bkey_i_snapshot *n_parent;
10958e877caaSKent Overstreet 	int ret = 0;
10968e877caaSKent Overstreet 
10978e877caaSKent Overstreet 	n_parent = bch2_bkey_get_mut_typed(trans, &iter,
10988e877caaSKent Overstreet 			BTREE_ID_snapshots, POS(0, parent),
10998e877caaSKent Overstreet 			0, snapshot);
11008e877caaSKent Overstreet 	ret = PTR_ERR_OR_ZERO(n_parent);
11018e877caaSKent Overstreet 	if (unlikely(ret)) {
11028e877caaSKent Overstreet 		if (bch2_err_matches(ret, ENOENT))
11038e877caaSKent Overstreet 			bch_err(trans->c, "snapshot %u not found", parent);
11048e877caaSKent Overstreet 		return ret;
11058e877caaSKent Overstreet 	}
11068e877caaSKent Overstreet 
11078e877caaSKent Overstreet 	if (n_parent->v.children[0] || n_parent->v.children[1]) {
11088e877caaSKent Overstreet 		bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
11098e877caaSKent Overstreet 		ret = -EINVAL;
11108e877caaSKent Overstreet 		goto err;
11118e877caaSKent Overstreet 	}
11128e877caaSKent Overstreet 
11138e877caaSKent Overstreet 	ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
11148e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
11158e877caaSKent Overstreet 	if (ret)
11168e877caaSKent Overstreet 		goto err;
11178e877caaSKent Overstreet 
11188e877caaSKent Overstreet 	n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
11198e877caaSKent Overstreet 	n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
11208e877caaSKent Overstreet 	n_parent->v.subvol = 0;
11218e877caaSKent Overstreet 	SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
11228e877caaSKent Overstreet err:
11238e877caaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
11248e877caaSKent Overstreet 	return ret;
11258e877caaSKent Overstreet }
11268e877caaSKent Overstreet 
11278e877caaSKent Overstreet /*
11288e877caaSKent Overstreet  * Create a snapshot node that is the root of a new tree:
11298e877caaSKent Overstreet  */
11308e877caaSKent Overstreet static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
11318e877caaSKent Overstreet 			      u32 *new_snapids,
11328e877caaSKent Overstreet 			      u32 *snapshot_subvols,
11338e877caaSKent Overstreet 			      unsigned nr_snapids)
11348e877caaSKent Overstreet {
11358e877caaSKent Overstreet 	struct bkey_i_snapshot_tree *n_tree;
11368e877caaSKent Overstreet 	int ret;
11378e877caaSKent Overstreet 
11388e877caaSKent Overstreet 	n_tree = __bch2_snapshot_tree_create(trans);
11398e877caaSKent Overstreet 	ret =   PTR_ERR_OR_ZERO(n_tree) ?:
11408e877caaSKent Overstreet 		create_snapids(trans, 0, n_tree->k.p.offset,
11418e877caaSKent Overstreet 			     new_snapids, snapshot_subvols, nr_snapids);
11428e877caaSKent Overstreet 	if (ret)
11438e877caaSKent Overstreet 		return ret;
11448e877caaSKent Overstreet 
11458e877caaSKent Overstreet 	n_tree->v.master_subvol	= cpu_to_le32(snapshot_subvols[0]);
11468e877caaSKent Overstreet 	n_tree->v.root_snapshot	= cpu_to_le32(new_snapids[0]);
11478e877caaSKent Overstreet 	return 0;
11488e877caaSKent Overstreet }
11498e877caaSKent Overstreet 
11508e877caaSKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
11518e877caaSKent Overstreet 			      u32 *new_snapids,
11528e877caaSKent Overstreet 			      u32 *snapshot_subvols,
11538e877caaSKent Overstreet 			      unsigned nr_snapids)
11548e877caaSKent Overstreet {
11558e877caaSKent Overstreet 	BUG_ON((parent == 0) != (nr_snapids == 1));
11568e877caaSKent Overstreet 	BUG_ON((parent != 0) != (nr_snapids == 2));
11578e877caaSKent Overstreet 
11588e877caaSKent Overstreet 	return parent
11598e877caaSKent Overstreet 		? bch2_snapshot_node_create_children(trans, parent,
11608e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids)
11618e877caaSKent Overstreet 		: bch2_snapshot_node_create_tree(trans,
11628e877caaSKent Overstreet 				new_snapids, snapshot_subvols, nr_snapids);
11638e877caaSKent Overstreet 
11648e877caaSKent Overstreet }
11658e877caaSKent Overstreet 
11668e877caaSKent Overstreet /*
11678e877caaSKent Overstreet  * If we have an unlinked inode in an internal snapshot node, and the inode
11688e877caaSKent Overstreet  * really has been deleted in all child snapshots, how does this get cleaned up?
11698e877caaSKent Overstreet  *
11708e877caaSKent Overstreet  * first there is the problem of how keys that have been overwritten in all
11718e877caaSKent Overstreet  * child snapshots get deleted (unimplemented?), but inodes may perhaps be
11728e877caaSKent Overstreet  * special?
11738e877caaSKent Overstreet  *
11748e877caaSKent Overstreet  * also: unlinked inode in internal snapshot appears to not be getting deleted
11758e877caaSKent Overstreet  * correctly if inode doesn't exist in leaf snapshots
1176f55d6e07SKent Overstreet  *
1177f55d6e07SKent Overstreet  * solution:
1178f55d6e07SKent Overstreet  *
1179f55d6e07SKent Overstreet  * for a key in an interior snapshot node that needs work to be done that
1180f55d6e07SKent Overstreet  * requires it to be mutated: iterate over all descendent leaf nodes and copy
1181f55d6e07SKent Overstreet  * that key to snapshot leaf nodes, where we can mutate it
11828e877caaSKent Overstreet  */
11838e877caaSKent Overstreet 
11848e877caaSKent Overstreet static int snapshot_delete_key(struct btree_trans *trans,
11858e877caaSKent Overstreet 			       struct btree_iter *iter,
11868e877caaSKent Overstreet 			       struct bkey_s_c k,
11878e877caaSKent Overstreet 			       snapshot_id_list *deleted,
11888e877caaSKent Overstreet 			       snapshot_id_list *equiv_seen,
11898e877caaSKent Overstreet 			       struct bpos *last_pos)
11908e877caaSKent Overstreet {
11918e877caaSKent Overstreet 	struct bch_fs *c = trans->c;
11928e877caaSKent Overstreet 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
11938e877caaSKent Overstreet 
11948e877caaSKent Overstreet 	if (!bkey_eq(k.k->p, *last_pos))
11958e877caaSKent Overstreet 		equiv_seen->nr = 0;
11968e877caaSKent Overstreet 	*last_pos = k.k->p;
11978e877caaSKent Overstreet 
11988e877caaSKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
11998e877caaSKent Overstreet 	    snapshot_list_has_id(equiv_seen, equiv)) {
12008e877caaSKent Overstreet 		return bch2_btree_delete_at(trans, iter,
12018e877caaSKent Overstreet 					    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
12028e877caaSKent Overstreet 	} else {
12038e877caaSKent Overstreet 		return snapshot_list_add(c, equiv_seen, equiv);
12048e877caaSKent Overstreet 	}
12058e877caaSKent Overstreet }
12068e877caaSKent Overstreet 
1207f55d6e07SKent Overstreet static int move_key_to_correct_snapshot(struct btree_trans *trans,
1208f55d6e07SKent Overstreet 			       struct btree_iter *iter,
1209f55d6e07SKent Overstreet 			       struct bkey_s_c k)
1210f55d6e07SKent Overstreet {
1211f55d6e07SKent Overstreet 	struct bch_fs *c = trans->c;
1212f55d6e07SKent Overstreet 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
1213f55d6e07SKent Overstreet 
1214f55d6e07SKent Overstreet 	/*
1215f55d6e07SKent Overstreet 	 * When we have a linear chain of snapshot nodes, we consider
1216f55d6e07SKent Overstreet 	 * those to form an equivalence class: we're going to collapse
1217f55d6e07SKent Overstreet 	 * them all down to a single node, and keep the leaf-most node -
1218f55d6e07SKent Overstreet 	 * which has the same id as the equivalence class id.
1219f55d6e07SKent Overstreet 	 *
1220f55d6e07SKent Overstreet 	 * If there are multiple keys in different snapshots at the same
1221f55d6e07SKent Overstreet 	 * position, we're only going to keep the one in the newest
1222f55d6e07SKent Overstreet 	 * snapshot - the rest have been overwritten and are redundant,
1223f55d6e07SKent Overstreet 	 * and for the key we're going to keep we need to move it to the
1224f55d6e07SKent Overstreet 	 * equivalance class ID if it's not there already.
1225f55d6e07SKent Overstreet 	 */
1226f55d6e07SKent Overstreet 	if (equiv != k.k->p.snapshot) {
1227f55d6e07SKent Overstreet 		struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
1228f55d6e07SKent Overstreet 		struct btree_iter new_iter;
1229f55d6e07SKent Overstreet 		int ret;
1230f55d6e07SKent Overstreet 
1231f55d6e07SKent Overstreet 		ret = PTR_ERR_OR_ZERO(new);
1232f55d6e07SKent Overstreet 		if (ret)
1233f55d6e07SKent Overstreet 			return ret;
1234f55d6e07SKent Overstreet 
1235f55d6e07SKent Overstreet 		new->k.p.snapshot = equiv;
1236f55d6e07SKent Overstreet 
1237f55d6e07SKent Overstreet 		bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
1238f55d6e07SKent Overstreet 				     BTREE_ITER_ALL_SNAPSHOTS|
1239f55d6e07SKent Overstreet 				     BTREE_ITER_CACHED|
1240f55d6e07SKent Overstreet 				     BTREE_ITER_INTENT);
1241f55d6e07SKent Overstreet 
1242f55d6e07SKent Overstreet 		ret =   bch2_btree_iter_traverse(&new_iter) ?:
1243f55d6e07SKent Overstreet 			bch2_trans_update(trans, &new_iter, new,
1244f55d6e07SKent Overstreet 					BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
1245f55d6e07SKent Overstreet 			bch2_btree_delete_at(trans, iter,
1246f55d6e07SKent Overstreet 					BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1247f55d6e07SKent Overstreet 		bch2_trans_iter_exit(trans, &new_iter);
1248f55d6e07SKent Overstreet 		if (ret)
1249f55d6e07SKent Overstreet 			return ret;
1250f55d6e07SKent Overstreet 	}
1251f55d6e07SKent Overstreet 
1252f55d6e07SKent Overstreet 	return 0;
1253f55d6e07SKent Overstreet }
1254f55d6e07SKent Overstreet 
1255*b0b5bbf9SKent Overstreet static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
12568e877caaSKent Overstreet {
12578e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
12588e877caaSKent Overstreet 	u32 children[2];
12598e877caaSKent Overstreet 	int ret;
12608e877caaSKent Overstreet 
12618e877caaSKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
12628e877caaSKent Overstreet 		return 0;
12638e877caaSKent Overstreet 
12648e877caaSKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
12658e877caaSKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
12668e877caaSKent Overstreet 	    BCH_SNAPSHOT_SUBVOL(snap.v))
12678e877caaSKent Overstreet 		return 0;
12688e877caaSKent Overstreet 
12698e877caaSKent Overstreet 	children[0] = le32_to_cpu(snap.v->children[0]);
12708e877caaSKent Overstreet 	children[1] = le32_to_cpu(snap.v->children[1]);
12718e877caaSKent Overstreet 
12728e877caaSKent Overstreet 	ret   = bch2_snapshot_live(trans, children[0]) ?:
12738e877caaSKent Overstreet 		bch2_snapshot_live(trans, children[1]);
12748e877caaSKent Overstreet 	if (ret < 0)
12758e877caaSKent Overstreet 		return ret;
1276*b0b5bbf9SKent Overstreet 	return !ret;
1277*b0b5bbf9SKent Overstreet }
12788e877caaSKent Overstreet 
1279*b0b5bbf9SKent Overstreet /*
1280*b0b5bbf9SKent Overstreet  * For a given snapshot, if it doesn't have a subvolume that points to it, and
1281*b0b5bbf9SKent Overstreet  * it doesn't have child snapshot nodes - it's now redundant and we can mark it
1282*b0b5bbf9SKent Overstreet  * as deleted.
1283*b0b5bbf9SKent Overstreet  */
1284*b0b5bbf9SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
1285*b0b5bbf9SKent Overstreet {
1286*b0b5bbf9SKent Overstreet 	int ret = bch2_snapshot_needs_delete(trans, k);
1287*b0b5bbf9SKent Overstreet 
1288*b0b5bbf9SKent Overstreet 	return ret <= 0
1289*b0b5bbf9SKent Overstreet 		? ret
1290*b0b5bbf9SKent Overstreet 		: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
12918e877caaSKent Overstreet }
12928e877caaSKent Overstreet 
1293f55d6e07SKent Overstreet static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
1294f55d6e07SKent Overstreet 						snapshot_id_list *skip)
1295f55d6e07SKent Overstreet {
1296f55d6e07SKent Overstreet 	rcu_read_lock();
12971e2d3999SKent Overstreet 	while (snapshot_list_has_id(skip, id))
12981e2d3999SKent Overstreet 		id = __bch2_snapshot_parent(c, id);
12991e2d3999SKent Overstreet 
1300f55d6e07SKent Overstreet 	while (n--) {
1301f55d6e07SKent Overstreet 		do {
1302f55d6e07SKent Overstreet 			id = __bch2_snapshot_parent(c, id);
1303f55d6e07SKent Overstreet 		} while (snapshot_list_has_id(skip, id));
1304f55d6e07SKent Overstreet 	}
1305f55d6e07SKent Overstreet 	rcu_read_unlock();
1306f55d6e07SKent Overstreet 
1307f55d6e07SKent Overstreet 	return id;
1308f55d6e07SKent Overstreet }
1309f55d6e07SKent Overstreet 
1310f55d6e07SKent Overstreet static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
1311f55d6e07SKent Overstreet 					      struct btree_iter *iter, struct bkey_s_c k,
1312f55d6e07SKent Overstreet 					      snapshot_id_list *deleted)
1313f55d6e07SKent Overstreet {
1314f55d6e07SKent Overstreet 	struct bch_fs *c = trans->c;
1315f55d6e07SKent Overstreet 	u32 nr_deleted_ancestors = 0;
1316f55d6e07SKent Overstreet 	struct bkey_i_snapshot *s;
1317f55d6e07SKent Overstreet 	u32 *i;
1318f55d6e07SKent Overstreet 	int ret;
1319f55d6e07SKent Overstreet 
1320f55d6e07SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1321f55d6e07SKent Overstreet 		return 0;
1322f55d6e07SKent Overstreet 
1323f55d6e07SKent Overstreet 	if (snapshot_list_has_id(deleted, k.k->p.offset))
1324f55d6e07SKent Overstreet 		return 0;
1325f55d6e07SKent Overstreet 
1326f55d6e07SKent Overstreet 	s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
1327f55d6e07SKent Overstreet 	ret = PTR_ERR_OR_ZERO(s);
1328f55d6e07SKent Overstreet 	if (ret)
1329f55d6e07SKent Overstreet 		return ret;
1330f55d6e07SKent Overstreet 
1331f55d6e07SKent Overstreet 	darray_for_each(*deleted, i)
1332f55d6e07SKent Overstreet 		nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
1333f55d6e07SKent Overstreet 
1334f55d6e07SKent Overstreet 	if (!nr_deleted_ancestors)
1335f55d6e07SKent Overstreet 		return 0;
1336f55d6e07SKent Overstreet 
1337f55d6e07SKent Overstreet 	le32_add_cpu(&s->v.depth, -nr_deleted_ancestors);
1338f55d6e07SKent Overstreet 
1339f55d6e07SKent Overstreet 	if (!s->v.depth) {
1340f55d6e07SKent Overstreet 		s->v.skip[0] = 0;
1341f55d6e07SKent Overstreet 		s->v.skip[1] = 0;
1342f55d6e07SKent Overstreet 		s->v.skip[2] = 0;
1343f55d6e07SKent Overstreet 	} else {
1344f55d6e07SKent Overstreet 		u32 depth = le32_to_cpu(s->v.depth);
1345f55d6e07SKent Overstreet 		u32 parent = bch2_snapshot_parent(c, s->k.p.offset);
1346f55d6e07SKent Overstreet 
1347f55d6e07SKent Overstreet 		for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
1348f55d6e07SKent Overstreet 			u32 id = le32_to_cpu(s->v.skip[j]);
1349f55d6e07SKent Overstreet 
1350f55d6e07SKent Overstreet 			if (snapshot_list_has_id(deleted, id)) {
1351f55d6e07SKent Overstreet 				id = depth > 1
1352f55d6e07SKent Overstreet 					? bch2_snapshot_nth_parent_skip(c,
1353f55d6e07SKent Overstreet 							parent,
1354f55d6e07SKent Overstreet 							get_random_u32_below(depth - 1),
1355f55d6e07SKent Overstreet 							deleted)
1356f55d6e07SKent Overstreet 					: parent;
1357f55d6e07SKent Overstreet 				s->v.skip[j] = cpu_to_le32(id);
1358f55d6e07SKent Overstreet 			}
1359f55d6e07SKent Overstreet 		}
1360f55d6e07SKent Overstreet 
1361f55d6e07SKent Overstreet 		bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32);
1362f55d6e07SKent Overstreet 	}
1363f55d6e07SKent Overstreet 
1364f55d6e07SKent Overstreet 	return bch2_trans_update(trans, iter, &s->k_i, 0);
1365f55d6e07SKent Overstreet }
1366f55d6e07SKent Overstreet 
13678e877caaSKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c)
13688e877caaSKent Overstreet {
13696bd68ec2SKent Overstreet 	struct btree_trans *trans;
13708e877caaSKent Overstreet 	struct btree_iter iter;
13718e877caaSKent Overstreet 	struct bkey_s_c k;
13728e877caaSKent Overstreet 	struct bkey_s_c_snapshot snap;
13738e877caaSKent Overstreet 	snapshot_id_list deleted = { 0 };
1374f55d6e07SKent Overstreet 	snapshot_id_list deleted_interior = { 0 };
1375f55d6e07SKent Overstreet 	u32 *i, id;
13768e877caaSKent Overstreet 	int ret = 0;
13778e877caaSKent Overstreet 
1378*b0b5bbf9SKent Overstreet 	if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags))
1379*b0b5bbf9SKent Overstreet 		return 0;
1380*b0b5bbf9SKent Overstreet 
13818e877caaSKent Overstreet 	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
13828e877caaSKent Overstreet 		ret = bch2_fs_read_write_early(c);
13838e877caaSKent Overstreet 		if (ret) {
13846bf3766bSColin Ian King 			bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
13858e877caaSKent Overstreet 			return ret;
13868e877caaSKent Overstreet 		}
13878e877caaSKent Overstreet 	}
13888e877caaSKent Overstreet 
13896bd68ec2SKent Overstreet 	trans = bch2_trans_get(c);
13908e877caaSKent Overstreet 
13918e877caaSKent Overstreet 	/*
13928e877caaSKent Overstreet 	 * For every snapshot node: If we have no live children and it's not
13938e877caaSKent Overstreet 	 * pointed to by a subvolume, delete it:
13948e877caaSKent Overstreet 	 */
13956bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
13968e877caaSKent Overstreet 			POS_MIN, 0, k,
13978e877caaSKent Overstreet 			NULL, NULL, 0,
1398*b0b5bbf9SKent Overstreet 		bch2_delete_redundant_snapshot(trans, k));
13998e877caaSKent Overstreet 	if (ret) {
1400e46c181aSKent Overstreet 		bch_err_msg(c, ret, "deleting redundant snapshots");
14018e877caaSKent Overstreet 		goto err;
14028e877caaSKent Overstreet 	}
14038e877caaSKent Overstreet 
1404d67a72bfSDan Carpenter 	ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
14058e877caaSKent Overstreet 				  POS_MIN, 0, k,
14066bd68ec2SKent Overstreet 		bch2_snapshot_set_equiv(trans, k));
14078e877caaSKent Overstreet 	if (ret) {
1408e46c181aSKent Overstreet 		bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
14098e877caaSKent Overstreet 		goto err;
14108e877caaSKent Overstreet 	}
14118e877caaSKent Overstreet 
14126bd68ec2SKent Overstreet 	for_each_btree_key(trans, iter, BTREE_ID_snapshots,
14138e877caaSKent Overstreet 			   POS_MIN, 0, k, ret) {
14148e877caaSKent Overstreet 		if (k.k->type != KEY_TYPE_snapshot)
14158e877caaSKent Overstreet 			continue;
14168e877caaSKent Overstreet 
14178e877caaSKent Overstreet 		snap = bkey_s_c_to_snapshot(k);
14188e877caaSKent Overstreet 		if (BCH_SNAPSHOT_DELETED(snap.v)) {
14198e877caaSKent Overstreet 			ret = snapshot_list_add(c, &deleted, k.k->p.offset);
14208e877caaSKent Overstreet 			if (ret)
14218e877caaSKent Overstreet 				break;
14228e877caaSKent Overstreet 		}
14238e877caaSKent Overstreet 	}
14246bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
14258e877caaSKent Overstreet 
14268e877caaSKent Overstreet 	if (ret) {
14278e877caaSKent Overstreet 		bch_err_msg(c, ret, "walking snapshots");
14288e877caaSKent Overstreet 		goto err;
14298e877caaSKent Overstreet 	}
14308e877caaSKent Overstreet 
14318e877caaSKent Overstreet 	for (id = 0; id < BTREE_ID_NR; id++) {
14328e877caaSKent Overstreet 		struct bpos last_pos = POS_MIN;
14338e877caaSKent Overstreet 		snapshot_id_list equiv_seen = { 0 };
1434f55d6e07SKent Overstreet 		struct disk_reservation res = { 0 };
14358e877caaSKent Overstreet 
14368e877caaSKent Overstreet 		if (!btree_type_has_snapshots(id))
14378e877caaSKent Overstreet 			continue;
14388e877caaSKent Overstreet 
14396bd68ec2SKent Overstreet 		ret = for_each_btree_key_commit(trans, iter,
14408e877caaSKent Overstreet 				id, POS_MIN,
14418e877caaSKent Overstreet 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1442f55d6e07SKent Overstreet 				&res, NULL, BTREE_INSERT_NOFAIL,
14436bd68ec2SKent Overstreet 			snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
14446bd68ec2SKent Overstreet 		      for_each_btree_key_commit(trans, iter,
1445f55d6e07SKent Overstreet 				id, POS_MIN,
1446f55d6e07SKent Overstreet 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1447f55d6e07SKent Overstreet 				&res, NULL, BTREE_INSERT_NOFAIL,
14486bd68ec2SKent Overstreet 			move_key_to_correct_snapshot(trans, &iter, k));
14498e877caaSKent Overstreet 
1450f55d6e07SKent Overstreet 		bch2_disk_reservation_put(c, &res);
14518e877caaSKent Overstreet 		darray_exit(&equiv_seen);
14528e877caaSKent Overstreet 
14538e877caaSKent Overstreet 		if (ret) {
14548e877caaSKent Overstreet 			bch_err_msg(c, ret, "deleting keys from dying snapshots");
14558e877caaSKent Overstreet 			goto err;
14568e877caaSKent Overstreet 		}
14578e877caaSKent Overstreet 	}
14588e877caaSKent Overstreet 
14590dd092bfSKent Overstreet 	bch2_trans_unlock(trans);
146037fad949SKent Overstreet 	down_write(&c->snapshot_create_lock);
146137fad949SKent Overstreet 
14626bd68ec2SKent Overstreet 	for_each_btree_key(trans, iter, BTREE_ID_snapshots,
1463f55d6e07SKent Overstreet 			   POS_MIN, 0, k, ret) {
1464f55d6e07SKent Overstreet 		u32 snapshot = k.k->p.offset;
1465f55d6e07SKent Overstreet 		u32 equiv = bch2_snapshot_equiv(c, snapshot);
14668e877caaSKent Overstreet 
1467f55d6e07SKent Overstreet 		if (equiv != snapshot)
1468f55d6e07SKent Overstreet 			snapshot_list_add(c, &deleted_interior, snapshot);
1469f55d6e07SKent Overstreet 	}
14706bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1471f55d6e07SKent Overstreet 
147237fad949SKent Overstreet 	if (ret)
147337fad949SKent Overstreet 		goto err_create_lock;
147437fad949SKent Overstreet 
1475f55d6e07SKent Overstreet 	/*
1476f55d6e07SKent Overstreet 	 * Fixing children of deleted snapshots can't be done completely
1477f55d6e07SKent Overstreet 	 * atomically, if we crash between here and when we delete the interior
1478f55d6e07SKent Overstreet 	 * nodes some depth fields will be off:
1479f55d6e07SKent Overstreet 	 */
14806bd68ec2SKent Overstreet 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
1481f55d6e07SKent Overstreet 				  BTREE_ITER_INTENT, k,
1482f55d6e07SKent Overstreet 				  NULL, NULL, BTREE_INSERT_NOFAIL,
14836bd68ec2SKent Overstreet 		bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
1484f55d6e07SKent Overstreet 	if (ret)
148537fad949SKent Overstreet 		goto err_create_lock;
1486f55d6e07SKent Overstreet 
1487f55d6e07SKent Overstreet 	darray_for_each(deleted, i) {
14886bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
14896bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
14908e877caaSKent Overstreet 		if (ret) {
1491f55d6e07SKent Overstreet 			bch_err_msg(c, ret, "deleting snapshot %u", *i);
149237fad949SKent Overstreet 			goto err_create_lock;
1493f55d6e07SKent Overstreet 		}
1494f55d6e07SKent Overstreet 	}
1495f55d6e07SKent Overstreet 
1496f55d6e07SKent Overstreet 	darray_for_each(deleted_interior, i) {
14976bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL, 0,
14986bd68ec2SKent Overstreet 			bch2_snapshot_node_delete(trans, *i));
1499f55d6e07SKent Overstreet 		if (ret) {
1500f55d6e07SKent Overstreet 			bch_err_msg(c, ret, "deleting snapshot %u", *i);
150137fad949SKent Overstreet 			goto err_create_lock;
15028e877caaSKent Overstreet 		}
15038e877caaSKent Overstreet 	}
150437fad949SKent Overstreet err_create_lock:
150537fad949SKent Overstreet 	up_write(&c->snapshot_create_lock);
15068e877caaSKent Overstreet err:
1507f55d6e07SKent Overstreet 	darray_exit(&deleted_interior);
15088e877caaSKent Overstreet 	darray_exit(&deleted);
15096bd68ec2SKent Overstreet 	bch2_trans_put(trans);
15108e877caaSKent Overstreet 	if (ret)
15118e877caaSKent Overstreet 		bch_err_fn(c, ret);
15128e877caaSKent Overstreet 	return ret;
15138e877caaSKent Overstreet }
15148e877caaSKent Overstreet 
15158e877caaSKent Overstreet void bch2_delete_dead_snapshots_work(struct work_struct *work)
15168e877caaSKent Overstreet {
15178e877caaSKent Overstreet 	struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
15188e877caaSKent Overstreet 
15198e877caaSKent Overstreet 	bch2_delete_dead_snapshots(c);
15208e877caaSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
15218e877caaSKent Overstreet }
15228e877caaSKent Overstreet 
15238e877caaSKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c)
15248e877caaSKent Overstreet {
15258e877caaSKent Overstreet 	if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
15268e877caaSKent Overstreet 	    !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
15278e877caaSKent Overstreet 		bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
15288e877caaSKent Overstreet }
15298e877caaSKent Overstreet 
1530fa5bed37SKent Overstreet int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
1531fa5bed37SKent Overstreet 				       enum btree_id id,
1532fa5bed37SKent Overstreet 				       struct bpos pos)
1533fa5bed37SKent Overstreet {
1534fa5bed37SKent Overstreet 	struct bch_fs *c = trans->c;
1535fa5bed37SKent Overstreet 	struct btree_iter iter;
1536fa5bed37SKent Overstreet 	struct bkey_s_c k;
1537fa5bed37SKent Overstreet 	int ret;
1538fa5bed37SKent Overstreet 
1539fa5bed37SKent Overstreet 	bch2_trans_iter_init(trans, &iter, id, pos,
1540fa5bed37SKent Overstreet 			     BTREE_ITER_NOT_EXTENTS|
1541fa5bed37SKent Overstreet 			     BTREE_ITER_ALL_SNAPSHOTS);
1542fa5bed37SKent Overstreet 	while (1) {
1543fa5bed37SKent Overstreet 		k = bch2_btree_iter_prev(&iter);
1544fa5bed37SKent Overstreet 		ret = bkey_err(k);
1545fa5bed37SKent Overstreet 		if (ret)
1546fa5bed37SKent Overstreet 			break;
1547fa5bed37SKent Overstreet 
1548fa5bed37SKent Overstreet 		if (!k.k)
1549fa5bed37SKent Overstreet 			break;
1550fa5bed37SKent Overstreet 
1551fa5bed37SKent Overstreet 		if (!bkey_eq(pos, k.k->p))
1552fa5bed37SKent Overstreet 			break;
1553fa5bed37SKent Overstreet 
1554fa5bed37SKent Overstreet 		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
1555fa5bed37SKent Overstreet 			ret = 1;
1556fa5bed37SKent Overstreet 			break;
1557fa5bed37SKent Overstreet 		}
1558fa5bed37SKent Overstreet 	}
1559fa5bed37SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1560fa5bed37SKent Overstreet 
1561fa5bed37SKent Overstreet 	return ret;
1562fa5bed37SKent Overstreet }
1563fa5bed37SKent Overstreet 
1564a111901fSKent Overstreet static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
1565a111901fSKent Overstreet {
1566a111901fSKent Overstreet 	const struct snapshot_t *s = snapshot_t(c, id);
1567a111901fSKent Overstreet 
1568a111901fSKent Overstreet 	return s->children[1] ?: s->children[0];
1569a111901fSKent Overstreet }
1570a111901fSKent Overstreet 
1571a111901fSKent Overstreet static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
1572a111901fSKent Overstreet {
1573a111901fSKent Overstreet 	u32 child;
1574a111901fSKent Overstreet 
1575a111901fSKent Overstreet 	while ((child = bch2_snapshot_smallest_child(c, id)))
1576a111901fSKent Overstreet 		id = child;
1577a111901fSKent Overstreet 	return id;
1578a111901fSKent Overstreet }
1579a111901fSKent Overstreet 
1580a111901fSKent Overstreet static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
1581a111901fSKent Overstreet 					       enum btree_id btree,
1582a111901fSKent Overstreet 					       struct bkey_s_c interior_k,
1583a111901fSKent Overstreet 					       u32 leaf_id, struct bpos *new_min_pos)
1584a111901fSKent Overstreet {
1585a111901fSKent Overstreet 	struct btree_iter iter;
1586a111901fSKent Overstreet 	struct bpos pos = interior_k.k->p;
1587a111901fSKent Overstreet 	struct bkey_s_c k;
1588a111901fSKent Overstreet 	struct bkey_i *new;
1589a111901fSKent Overstreet 	int ret;
1590a111901fSKent Overstreet 
1591a111901fSKent Overstreet 	pos.snapshot = leaf_id;
1592a111901fSKent Overstreet 
1593a111901fSKent Overstreet 	bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
1594a111901fSKent Overstreet 	k = bch2_btree_iter_peek_slot(&iter);
1595a111901fSKent Overstreet 	ret = bkey_err(k);
1596a111901fSKent Overstreet 	if (ret)
1597a111901fSKent Overstreet 		goto out;
1598a111901fSKent Overstreet 
1599a111901fSKent Overstreet 	/* key already overwritten in this snapshot? */
1600a111901fSKent Overstreet 	if (k.k->p.snapshot != interior_k.k->p.snapshot)
1601a111901fSKent Overstreet 		goto out;
1602a111901fSKent Overstreet 
1603a111901fSKent Overstreet 	if (bpos_eq(*new_min_pos, POS_MIN)) {
1604a111901fSKent Overstreet 		*new_min_pos = k.k->p;
1605a111901fSKent Overstreet 		new_min_pos->snapshot = leaf_id;
1606a111901fSKent Overstreet 	}
1607a111901fSKent Overstreet 
1608a111901fSKent Overstreet 	new = bch2_bkey_make_mut_noupdate(trans, interior_k);
1609a111901fSKent Overstreet 	ret = PTR_ERR_OR_ZERO(new);
1610a111901fSKent Overstreet 	if (ret)
1611a111901fSKent Overstreet 		goto out;
1612a111901fSKent Overstreet 
1613a111901fSKent Overstreet 	new->k.p.snapshot = leaf_id;
1614a111901fSKent Overstreet 	ret = bch2_trans_update(trans, &iter, new, 0);
1615a111901fSKent Overstreet out:
1616a111901fSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1617a111901fSKent Overstreet 	return ret;
1618a111901fSKent Overstreet }
1619a111901fSKent Overstreet 
1620a111901fSKent Overstreet int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
1621a111901fSKent Overstreet 					  enum btree_id btree,
1622a111901fSKent Overstreet 					  struct bkey_s_c k,
1623a111901fSKent Overstreet 					  struct bpos *new_min_pos)
1624a111901fSKent Overstreet {
1625a111901fSKent Overstreet 	struct bch_fs *c = trans->c;
1626a111901fSKent Overstreet 	struct bkey_buf sk;
1627c872afa2SKent Overstreet 	u32 restart_count = trans->restart_count;
1628d04fdf5cSKent Overstreet 	int ret = 0;
1629a111901fSKent Overstreet 
1630a111901fSKent Overstreet 	bch2_bkey_buf_init(&sk);
1631a111901fSKent Overstreet 	bch2_bkey_buf_reassemble(&sk, c, k);
1632a111901fSKent Overstreet 	k = bkey_i_to_s_c(sk.k);
1633a111901fSKent Overstreet 
1634a111901fSKent Overstreet 	*new_min_pos = POS_MIN;
1635a111901fSKent Overstreet 
1636a111901fSKent Overstreet 	for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
1637a111901fSKent Overstreet 	     id < k.k->p.snapshot;
1638a111901fSKent Overstreet 	     id++) {
1639a111901fSKent Overstreet 		if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
1640a111901fSKent Overstreet 		    !bch2_snapshot_is_leaf(c, id))
1641a111901fSKent Overstreet 			continue;
1642d281701bSKent Overstreet again:
1643d281701bSKent Overstreet 		ret =   btree_trans_too_many_iters(trans) ?:
1644d281701bSKent Overstreet 			bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
1645d281701bSKent Overstreet 			bch2_trans_commit(trans, NULL, NULL, 0);
1646d281701bSKent Overstreet 		if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
1647d281701bSKent Overstreet 			bch2_trans_begin(trans);
1648d281701bSKent Overstreet 			goto again;
1649d281701bSKent Overstreet 		}
1650a111901fSKent Overstreet 
1651a111901fSKent Overstreet 		if (ret)
1652a111901fSKent Overstreet 			break;
1653a111901fSKent Overstreet 	}
1654a111901fSKent Overstreet 
1655a111901fSKent Overstreet 	bch2_bkey_buf_exit(&sk, c);
1656c872afa2SKent Overstreet 
1657c872afa2SKent Overstreet 	return ret ?: trans_was_restarted(trans, restart_count);
1658a111901fSKent Overstreet }
1659a111901fSKent Overstreet 
1660*b0b5bbf9SKent Overstreet static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
1661*b0b5bbf9SKent Overstreet {
1662*b0b5bbf9SKent Overstreet 	struct bch_fs *c = trans->c;
1663*b0b5bbf9SKent Overstreet 	struct bkey_s_c_snapshot snap;
1664*b0b5bbf9SKent Overstreet 	int ret = 0;
1665*b0b5bbf9SKent Overstreet 
1666*b0b5bbf9SKent Overstreet 	if (k.k->type != KEY_TYPE_snapshot)
1667*b0b5bbf9SKent Overstreet 		return 0;
1668*b0b5bbf9SKent Overstreet 
1669*b0b5bbf9SKent Overstreet 	snap = bkey_s_c_to_snapshot(k);
1670*b0b5bbf9SKent Overstreet 	if (BCH_SNAPSHOT_DELETED(snap.v) ||
1671*b0b5bbf9SKent Overstreet 	    bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
1672*b0b5bbf9SKent Overstreet 	    (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
1673*b0b5bbf9SKent Overstreet 		set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
1674*b0b5bbf9SKent Overstreet 		return 0;
1675*b0b5bbf9SKent Overstreet 	}
1676*b0b5bbf9SKent Overstreet 
1677*b0b5bbf9SKent Overstreet 	return ret;
1678*b0b5bbf9SKent Overstreet }
1679*b0b5bbf9SKent Overstreet 
16808e877caaSKent Overstreet int bch2_snapshots_read(struct bch_fs *c)
16818e877caaSKent Overstreet {
16828e877caaSKent Overstreet 	struct btree_iter iter;
16838e877caaSKent Overstreet 	struct bkey_s_c k;
16848e877caaSKent Overstreet 	int ret = 0;
16858e877caaSKent Overstreet 
16868e877caaSKent Overstreet 	ret = bch2_trans_run(c,
16876bd68ec2SKent Overstreet 		for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
16888e877caaSKent Overstreet 			   POS_MIN, 0, k,
16896bd68ec2SKent Overstreet 			bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
1690*b0b5bbf9SKent Overstreet 			bch2_snapshot_set_equiv(trans, k) ?:
1691*b0b5bbf9SKent Overstreet 			bch2_check_snapshot_needs_deletion(trans, k)) ?:
16926bd68ec2SKent Overstreet 		for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
169366487c54SKent Overstreet 			   POS_MIN, 0, k,
169466487c54SKent Overstreet 			   (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
16958e877caaSKent Overstreet 	if (ret)
16968e877caaSKent Overstreet 		bch_err_fn(c, ret);
16978e877caaSKent Overstreet 	return ret;
16988e877caaSKent Overstreet }
16998e877caaSKent Overstreet 
17008e877caaSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c)
17018e877caaSKent Overstreet {
17028e877caaSKent Overstreet 	kfree(rcu_dereference_protected(c->snapshots, true));
17038e877caaSKent Overstreet }
1704