18e877caaSKent Overstreet // SPDX-License-Identifier: GPL-2.0 28e877caaSKent Overstreet 38e877caaSKent Overstreet #include "bcachefs.h" 4a111901fSKent Overstreet #include "bkey_buf.h" 58e877caaSKent Overstreet #include "btree_key_cache.h" 68e877caaSKent Overstreet #include "btree_update.h" 7f55d6e07SKent Overstreet #include "buckets.h" 88e877caaSKent Overstreet #include "errcode.h" 98e877caaSKent Overstreet #include "error.h" 108e877caaSKent Overstreet #include "fs.h" 118e877caaSKent Overstreet #include "snapshot.h" 128e877caaSKent Overstreet 138e877caaSKent Overstreet #include <linux/random.h> 148e877caaSKent Overstreet 158e877caaSKent Overstreet /* 168e877caaSKent Overstreet * Snapshot trees: 178e877caaSKent Overstreet * 188e877caaSKent Overstreet * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they 198e877caaSKent Overstreet * exist to provide a stable identifier for the whole lifetime of a snapshot 208e877caaSKent Overstreet * tree. 218e877caaSKent Overstreet */ 228e877caaSKent Overstreet 238e877caaSKent Overstreet void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, 248e877caaSKent Overstreet struct bkey_s_c k) 258e877caaSKent Overstreet { 268e877caaSKent Overstreet struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); 278e877caaSKent Overstreet 288e877caaSKent Overstreet prt_printf(out, "subvol %u root snapshot %u", 298e877caaSKent Overstreet le32_to_cpu(t.v->master_subvol), 308e877caaSKent Overstreet le32_to_cpu(t.v->root_snapshot)); 318e877caaSKent Overstreet } 328e877caaSKent Overstreet 33b65db750SKent Overstreet int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, 348e877caaSKent Overstreet enum bkey_invalid_flags flags, 358e877caaSKent Overstreet struct printbuf *err) 368e877caaSKent Overstreet { 37b65db750SKent Overstreet int ret = 0; 388e877caaSKent Overstreet 39b65db750SKent Overstreet bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 40b65db750SKent Overstreet bkey_lt(k.k->p, POS(0, 1)), c, err, 41b65db750SKent Overstreet snapshot_tree_pos_bad, 42b65db750SKent Overstreet "bad pos"); 43b65db750SKent Overstreet fsck_err: 44b65db750SKent Overstreet return ret; 458e877caaSKent Overstreet } 468e877caaSKent Overstreet 478e877caaSKent Overstreet int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, 488e877caaSKent Overstreet struct bch_snapshot_tree *s) 498e877caaSKent Overstreet { 508e877caaSKent Overstreet int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), 518e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot_tree, s); 528e877caaSKent Overstreet 538e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 548e877caaSKent Overstreet ret = -BCH_ERR_ENOENT_snapshot_tree; 558e877caaSKent Overstreet return ret; 568e877caaSKent Overstreet } 578e877caaSKent Overstreet 588e877caaSKent Overstreet struct bkey_i_snapshot_tree * 598e877caaSKent Overstreet __bch2_snapshot_tree_create(struct btree_trans *trans) 608e877caaSKent Overstreet { 618e877caaSKent Overstreet struct btree_iter iter; 628e877caaSKent Overstreet int ret = bch2_bkey_get_empty_slot(trans, &iter, 638e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS(0, U32_MAX)); 648e877caaSKent Overstreet struct bkey_i_snapshot_tree *s_t; 658e877caaSKent Overstreet 668e877caaSKent Overstreet if (ret == -BCH_ERR_ENOSPC_btree_slot) 678e877caaSKent Overstreet ret = -BCH_ERR_ENOSPC_snapshot_tree; 688e877caaSKent Overstreet if (ret) 698e877caaSKent Overstreet return ERR_PTR(ret); 708e877caaSKent Overstreet 718e877caaSKent Overstreet s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); 728e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s_t); 738e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 748e877caaSKent Overstreet return ret ? ERR_PTR(ret) : s_t; 758e877caaSKent Overstreet } 768e877caaSKent Overstreet 778e877caaSKent Overstreet static int bch2_snapshot_tree_create(struct btree_trans *trans, 788e877caaSKent Overstreet u32 root_id, u32 subvol_id, u32 *tree_id) 798e877caaSKent Overstreet { 808e877caaSKent Overstreet struct bkey_i_snapshot_tree *n_tree = 818e877caaSKent Overstreet __bch2_snapshot_tree_create(trans); 828e877caaSKent Overstreet 838e877caaSKent Overstreet if (IS_ERR(n_tree)) 848e877caaSKent Overstreet return PTR_ERR(n_tree); 858e877caaSKent Overstreet 868e877caaSKent Overstreet n_tree->v.master_subvol = cpu_to_le32(subvol_id); 878e877caaSKent Overstreet n_tree->v.root_snapshot = cpu_to_le32(root_id); 888e877caaSKent Overstreet *tree_id = n_tree->k.p.offset; 898e877caaSKent Overstreet return 0; 908e877caaSKent Overstreet } 918e877caaSKent Overstreet 928e877caaSKent Overstreet /* Snapshot nodes: */ 938e877caaSKent Overstreet 941c31b83aSKent Overstreet static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) 9566487c54SKent Overstreet { 96ec9cc18fSKent Overstreet while (id && id < ancestor) { 97ec9cc18fSKent Overstreet const struct snapshot_t *s = __snapshot_t(t, id); 98ec9cc18fSKent Overstreet id = s ? s->parent : 0; 99ec9cc18fSKent Overstreet } 1001c31b83aSKent Overstreet return id == ancestor; 1011c31b83aSKent Overstreet } 1021c31b83aSKent Overstreet 1031c31b83aSKent Overstreet static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) 1041c31b83aSKent Overstreet { 1051c31b83aSKent Overstreet rcu_read_lock(); 1061c31b83aSKent Overstreet bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); 10766487c54SKent Overstreet rcu_read_unlock(); 10866487c54SKent Overstreet 1091c31b83aSKent Overstreet return ret; 11066487c54SKent Overstreet } 11166487c54SKent Overstreet 1128e877caaSKent Overstreet static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) 1138e877caaSKent Overstreet { 1148e877caaSKent Overstreet const struct snapshot_t *s = __snapshot_t(t, id); 115ec9cc18fSKent Overstreet if (!s) 116ec9cc18fSKent Overstreet return 0; 1178e877caaSKent Overstreet 1188e877caaSKent Overstreet if (s->skip[2] <= ancestor) 1198e877caaSKent Overstreet return s->skip[2]; 1208e877caaSKent Overstreet if (s->skip[1] <= ancestor) 1218e877caaSKent Overstreet return s->skip[1]; 1228e877caaSKent Overstreet if (s->skip[0] <= ancestor) 1238e877caaSKent Overstreet return s->skip[0]; 1248e877caaSKent Overstreet return s->parent; 1258e877caaSKent Overstreet } 1268e877caaSKent Overstreet 1278e877caaSKent Overstreet bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) 1288e877caaSKent Overstreet { 1298e877caaSKent Overstreet bool ret; 1308e877caaSKent Overstreet 1318e877caaSKent Overstreet rcu_read_lock(); 1321c31b83aSKent Overstreet struct snapshot_table *t = rcu_dereference(c->snapshots); 1331c31b83aSKent Overstreet 134*13c1e583SKent Overstreet if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) { 1351c31b83aSKent Overstreet ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); 1361c31b83aSKent Overstreet goto out; 1371c31b83aSKent Overstreet } 1388e877caaSKent Overstreet 1398e877caaSKent Overstreet while (id && id < ancestor - IS_ANCESTOR_BITMAP) 1408e877caaSKent Overstreet id = get_ancestor_below(t, id, ancestor); 1418e877caaSKent Overstreet 14266487c54SKent Overstreet if (id && id < ancestor) { 14366487c54SKent Overstreet ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); 14466487c54SKent Overstreet 1451c31b83aSKent Overstreet EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); 14666487c54SKent Overstreet } else { 14766487c54SKent Overstreet ret = id == ancestor; 14866487c54SKent Overstreet } 1491c31b83aSKent Overstreet out: 1508e877caaSKent Overstreet rcu_read_unlock(); 1518e877caaSKent Overstreet 1528e877caaSKent Overstreet return ret; 1538e877caaSKent Overstreet } 1548e877caaSKent Overstreet 1558e877caaSKent Overstreet static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) 1568e877caaSKent Overstreet { 1578e877caaSKent Overstreet size_t idx = U32_MAX - id; 1588e877caaSKent Overstreet struct snapshot_table *new, *old; 1598e877caaSKent Overstreet 16063332394SKent Overstreet size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); 16163332394SKent Overstreet size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); 1628e877caaSKent Overstreet 16363332394SKent Overstreet new = kvzalloc(new_bytes, GFP_KERNEL); 1648e877caaSKent Overstreet if (!new) 1658e877caaSKent Overstreet return NULL; 1668e877caaSKent Overstreet 16763332394SKent Overstreet new->nr = new_size; 16863332394SKent Overstreet 1698e877caaSKent Overstreet old = rcu_dereference_protected(c->snapshots, true); 1708e877caaSKent Overstreet if (old) 17163332394SKent Overstreet memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr); 1728e877caaSKent Overstreet 1738e877caaSKent Overstreet rcu_assign_pointer(c->snapshots, new); 17463332394SKent Overstreet kvfree_rcu(old, rcu); 1758e877caaSKent Overstreet 17663332394SKent Overstreet return &rcu_dereference_protected(c->snapshots, 17763332394SKent Overstreet lockdep_is_held(&c->snapshot_table_lock))->s[idx]; 1788e877caaSKent Overstreet } 1798e877caaSKent Overstreet 1808e877caaSKent Overstreet static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) 1818e877caaSKent Overstreet { 1828e877caaSKent Overstreet size_t idx = U32_MAX - id; 18363332394SKent Overstreet struct snapshot_table *table = 18463332394SKent Overstreet rcu_dereference_protected(c->snapshots, 18563332394SKent Overstreet lockdep_is_held(&c->snapshot_table_lock)); 1868e877caaSKent Overstreet 1878e877caaSKent Overstreet lockdep_assert_held(&c->snapshot_table_lock); 1888e877caaSKent Overstreet 18963332394SKent Overstreet if (likely(table && idx < table->nr)) 19063332394SKent Overstreet return &table->s[idx]; 1918e877caaSKent Overstreet 1928e877caaSKent Overstreet return __snapshot_t_mut(c, id); 1938e877caaSKent Overstreet } 1948e877caaSKent Overstreet 1958e877caaSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, 1968e877caaSKent Overstreet struct bkey_s_c k) 1978e877caaSKent Overstreet { 1988e877caaSKent Overstreet struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 1998e877caaSKent Overstreet 2008e877caaSKent Overstreet prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", 2018e877caaSKent Overstreet BCH_SNAPSHOT_SUBVOL(s.v), 2028e877caaSKent Overstreet BCH_SNAPSHOT_DELETED(s.v), 2038e877caaSKent Overstreet le32_to_cpu(s.v->parent), 2048e877caaSKent Overstreet le32_to_cpu(s.v->children[0]), 2058e877caaSKent Overstreet le32_to_cpu(s.v->children[1]), 2068e877caaSKent Overstreet le32_to_cpu(s.v->subvol), 2078e877caaSKent Overstreet le32_to_cpu(s.v->tree)); 2088e877caaSKent Overstreet 2098e877caaSKent Overstreet if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) 2108e877caaSKent Overstreet prt_printf(out, " depth %u skiplist %u %u %u", 2118e877caaSKent Overstreet le32_to_cpu(s.v->depth), 2128e877caaSKent Overstreet le32_to_cpu(s.v->skip[0]), 2138e877caaSKent Overstreet le32_to_cpu(s.v->skip[1]), 2148e877caaSKent Overstreet le32_to_cpu(s.v->skip[2])); 2158e877caaSKent Overstreet } 2168e877caaSKent Overstreet 217b65db750SKent Overstreet int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, 2188e877caaSKent Overstreet enum bkey_invalid_flags flags, 2198e877caaSKent Overstreet struct printbuf *err) 2208e877caaSKent Overstreet { 2218e877caaSKent Overstreet struct bkey_s_c_snapshot s; 2228e877caaSKent Overstreet u32 i, id; 223b65db750SKent Overstreet int ret = 0; 2248e877caaSKent Overstreet 225b65db750SKent Overstreet bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 226b65db750SKent Overstreet bkey_lt(k.k->p, POS(0, 1)), c, err, 227b65db750SKent Overstreet snapshot_pos_bad, 228b65db750SKent Overstreet "bad pos"); 2298e877caaSKent Overstreet 2308e877caaSKent Overstreet s = bkey_s_c_to_snapshot(k); 2318e877caaSKent Overstreet 2328e877caaSKent Overstreet id = le32_to_cpu(s.v->parent); 233b65db750SKent Overstreet bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, 234b65db750SKent Overstreet snapshot_parent_bad, 235b65db750SKent Overstreet "bad parent node (%u <= %llu)", 2368e877caaSKent Overstreet id, k.k->p.offset); 2378e877caaSKent Overstreet 238b65db750SKent Overstreet bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, 239b65db750SKent Overstreet snapshot_children_not_normalized, 240b65db750SKent Overstreet "children not normalized"); 2418e877caaSKent Overstreet 242b65db750SKent Overstreet bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, 243b65db750SKent Overstreet snapshot_child_duplicate, 244b65db750SKent Overstreet "duplicate child nodes"); 2458e877caaSKent Overstreet 2468e877caaSKent Overstreet for (i = 0; i < 2; i++) { 2478e877caaSKent Overstreet id = le32_to_cpu(s.v->children[i]); 2488e877caaSKent Overstreet 249b65db750SKent Overstreet bkey_fsck_err_on(id >= k.k->p.offset, c, err, 250b65db750SKent Overstreet snapshot_child_bad, 251b65db750SKent Overstreet "bad child node (%u >= %llu)", 2528e877caaSKent Overstreet id, k.k->p.offset); 2538e877caaSKent Overstreet } 2548e877caaSKent Overstreet 2558e877caaSKent Overstreet if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { 256b65db750SKent Overstreet bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || 257b65db750SKent Overstreet le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, 258b65db750SKent Overstreet snapshot_skiplist_not_normalized, 259b65db750SKent Overstreet "skiplist not normalized"); 2608e877caaSKent Overstreet 2618e877caaSKent Overstreet for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { 2628e877caaSKent Overstreet id = le32_to_cpu(s.v->skip[i]); 2638e877caaSKent Overstreet 264b65db750SKent Overstreet bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, 265b65db750SKent Overstreet snapshot_skiplist_bad, 266b65db750SKent Overstreet "bad skiplist node %u", id); 2678e877caaSKent Overstreet } 2688e877caaSKent Overstreet } 269b65db750SKent Overstreet fsck_err: 270b65db750SKent Overstreet return ret; 2718e877caaSKent Overstreet } 2728e877caaSKent Overstreet 27366487c54SKent Overstreet static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) 27466487c54SKent Overstreet { 27566487c54SKent Overstreet struct snapshot_t *t = snapshot_t_mut(c, id); 27666487c54SKent Overstreet u32 parent = id; 27766487c54SKent Overstreet 27866487c54SKent Overstreet while ((parent = bch2_snapshot_parent_early(c, parent)) && 27966487c54SKent Overstreet parent - id - 1 < IS_ANCESTOR_BITMAP) 28066487c54SKent Overstreet __set_bit(parent - id - 1, t->is_ancestor); 28166487c54SKent Overstreet } 28266487c54SKent Overstreet 28366487c54SKent Overstreet static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) 28466487c54SKent Overstreet { 28566487c54SKent Overstreet mutex_lock(&c->snapshot_table_lock); 28666487c54SKent Overstreet __set_is_ancestor_bitmap(c, id); 28766487c54SKent Overstreet mutex_unlock(&c->snapshot_table_lock); 28866487c54SKent Overstreet } 28966487c54SKent Overstreet 290ad00bce0SKent Overstreet static int __bch2_mark_snapshot(struct btree_trans *trans, 2918e877caaSKent Overstreet enum btree_id btree, unsigned level, 2928e877caaSKent Overstreet struct bkey_s_c old, struct bkey_s_c new, 2938e877caaSKent Overstreet unsigned flags) 2948e877caaSKent Overstreet { 2958e877caaSKent Overstreet struct bch_fs *c = trans->c; 2968e877caaSKent Overstreet struct snapshot_t *t; 2978e877caaSKent Overstreet u32 id = new.k->p.offset; 2988e877caaSKent Overstreet int ret = 0; 2998e877caaSKent Overstreet 3008e877caaSKent Overstreet mutex_lock(&c->snapshot_table_lock); 3018e877caaSKent Overstreet 3028e877caaSKent Overstreet t = snapshot_t_mut(c, id); 3038e877caaSKent Overstreet if (!t) { 3048e877caaSKent Overstreet ret = -BCH_ERR_ENOMEM_mark_snapshot; 3058e877caaSKent Overstreet goto err; 3068e877caaSKent Overstreet } 3078e877caaSKent Overstreet 3088e877caaSKent Overstreet if (new.k->type == KEY_TYPE_snapshot) { 3098e877caaSKent Overstreet struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); 3108e877caaSKent Overstreet 3118e877caaSKent Overstreet t->parent = le32_to_cpu(s.v->parent); 3128e877caaSKent Overstreet t->children[0] = le32_to_cpu(s.v->children[0]); 3138e877caaSKent Overstreet t->children[1] = le32_to_cpu(s.v->children[1]); 3148e877caaSKent Overstreet t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; 3158e877caaSKent Overstreet t->tree = le32_to_cpu(s.v->tree); 3168e877caaSKent Overstreet 3178e877caaSKent Overstreet if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { 3188e877caaSKent Overstreet t->depth = le32_to_cpu(s.v->depth); 3198e877caaSKent Overstreet t->skip[0] = le32_to_cpu(s.v->skip[0]); 3208e877caaSKent Overstreet t->skip[1] = le32_to_cpu(s.v->skip[1]); 3218e877caaSKent Overstreet t->skip[2] = le32_to_cpu(s.v->skip[2]); 3228e877caaSKent Overstreet } else { 3238e877caaSKent Overstreet t->depth = 0; 3248e877caaSKent Overstreet t->skip[0] = 0; 3258e877caaSKent Overstreet t->skip[1] = 0; 3268e877caaSKent Overstreet t->skip[2] = 0; 3278e877caaSKent Overstreet } 3288e877caaSKent Overstreet 32966487c54SKent Overstreet __set_is_ancestor_bitmap(c, id); 3308e877caaSKent Overstreet 3318e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(s.v)) { 3323c471b65SKent Overstreet set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 333b0b5bbf9SKent Overstreet if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) 334b0b5bbf9SKent Overstreet bch2_delete_dead_snapshots_async(c); 3358e877caaSKent Overstreet } 3368e877caaSKent Overstreet } else { 3378e877caaSKent Overstreet memset(t, 0, sizeof(*t)); 3388e877caaSKent Overstreet } 3398e877caaSKent Overstreet err: 3408e877caaSKent Overstreet mutex_unlock(&c->snapshot_table_lock); 3418e877caaSKent Overstreet return ret; 3428e877caaSKent Overstreet } 3438e877caaSKent Overstreet 344ad00bce0SKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans, 345ad00bce0SKent Overstreet enum btree_id btree, unsigned level, 346ad00bce0SKent Overstreet struct bkey_s_c old, struct bkey_s new, 347ad00bce0SKent Overstreet unsigned flags) 348ad00bce0SKent Overstreet { 349ad00bce0SKent Overstreet return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); 350ad00bce0SKent Overstreet } 351ad00bce0SKent Overstreet 3528e877caaSKent Overstreet int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, 3538e877caaSKent Overstreet struct bch_snapshot *s) 3548e877caaSKent Overstreet { 3558e877caaSKent Overstreet return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), 3568e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot, s); 3578e877caaSKent Overstreet } 3588e877caaSKent Overstreet 359eebe8a84SKent Overstreet static int bch2_snapshot_live(struct btree_trans *trans, u32 id) 3608e877caaSKent Overstreet { 3618e877caaSKent Overstreet struct bch_snapshot v; 3628e877caaSKent Overstreet int ret; 3638e877caaSKent Overstreet 3648e877caaSKent Overstreet if (!id) 3658e877caaSKent Overstreet return 0; 3668e877caaSKent Overstreet 3678e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 3688e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 3698e877caaSKent Overstreet bch_err(trans->c, "snapshot node %u not found", id); 3708e877caaSKent Overstreet if (ret) 3718e877caaSKent Overstreet return ret; 3728e877caaSKent Overstreet 3738e877caaSKent Overstreet return !BCH_SNAPSHOT_DELETED(&v); 3748e877caaSKent Overstreet } 3758e877caaSKent Overstreet 3768e877caaSKent Overstreet /* 3778e877caaSKent Overstreet * If @k is a snapshot with just one live child, it's part of a linear chain, 3788e877caaSKent Overstreet * which we consider to be an equivalence class: and then after snapshot 3798e877caaSKent Overstreet * deletion cleanup, there should only be a single key at a given position in 3808e877caaSKent Overstreet * this equivalence class. 3818e877caaSKent Overstreet * 3828e877caaSKent Overstreet * This sets the equivalence class of @k to be the child's equivalence class, if 3838e877caaSKent Overstreet * it's part of such a linear chain: this correctly sets equivalence classes on 3848e877caaSKent Overstreet * startup if we run leaf to root (i.e. in natural key order). 3858e877caaSKent Overstreet */ 386eebe8a84SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) 3878e877caaSKent Overstreet { 3888e877caaSKent Overstreet struct bch_fs *c = trans->c; 3898e877caaSKent Overstreet unsigned i, nr_live = 0, live_idx = 0; 3908e877caaSKent Overstreet struct bkey_s_c_snapshot snap; 3918e877caaSKent Overstreet u32 id = k.k->p.offset, child[2]; 3928e877caaSKent Overstreet 3938e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 3948e877caaSKent Overstreet return 0; 3958e877caaSKent Overstreet 3968e877caaSKent Overstreet snap = bkey_s_c_to_snapshot(k); 3978e877caaSKent Overstreet 3988e877caaSKent Overstreet child[0] = le32_to_cpu(snap.v->children[0]); 3998e877caaSKent Overstreet child[1] = le32_to_cpu(snap.v->children[1]); 4008e877caaSKent Overstreet 4018e877caaSKent Overstreet for (i = 0; i < 2; i++) { 4028e877caaSKent Overstreet int ret = bch2_snapshot_live(trans, child[i]); 4038e877caaSKent Overstreet 4048e877caaSKent Overstreet if (ret < 0) 4058e877caaSKent Overstreet return ret; 4068e877caaSKent Overstreet 4078e877caaSKent Overstreet if (ret) 4088e877caaSKent Overstreet live_idx = i; 4098e877caaSKent Overstreet nr_live += ret; 4108e877caaSKent Overstreet } 4118e877caaSKent Overstreet 4128e877caaSKent Overstreet mutex_lock(&c->snapshot_table_lock); 4138e877caaSKent Overstreet 4148e877caaSKent Overstreet snapshot_t_mut(c, id)->equiv = nr_live == 1 4158e877caaSKent Overstreet ? snapshot_t_mut(c, child[live_idx])->equiv 4168e877caaSKent Overstreet : id; 4178e877caaSKent Overstreet 4188e877caaSKent Overstreet mutex_unlock(&c->snapshot_table_lock); 4198e877caaSKent Overstreet 4208e877caaSKent Overstreet return 0; 4218e877caaSKent Overstreet } 4228e877caaSKent Overstreet 4238e877caaSKent Overstreet /* fsck: */ 4248e877caaSKent Overstreet 4258e877caaSKent Overstreet static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) 4268e877caaSKent Overstreet { 4278e877caaSKent Overstreet return snapshot_t(c, id)->children[child]; 4288e877caaSKent Overstreet } 4298e877caaSKent Overstreet 4308e877caaSKent Overstreet static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) 4318e877caaSKent Overstreet { 4328e877caaSKent Overstreet return bch2_snapshot_child(c, id, 0); 4338e877caaSKent Overstreet } 4348e877caaSKent Overstreet 4358e877caaSKent Overstreet static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) 4368e877caaSKent Overstreet { 4378e877caaSKent Overstreet return bch2_snapshot_child(c, id, 1); 4388e877caaSKent Overstreet } 4398e877caaSKent Overstreet 4408e877caaSKent Overstreet static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) 4418e877caaSKent Overstreet { 4428e877caaSKent Overstreet u32 n, parent; 4438e877caaSKent Overstreet 4448e877caaSKent Overstreet n = bch2_snapshot_left_child(c, id); 4458e877caaSKent Overstreet if (n) 4468e877caaSKent Overstreet return n; 4478e877caaSKent Overstreet 4488e877caaSKent Overstreet while ((parent = bch2_snapshot_parent(c, id))) { 4498e877caaSKent Overstreet n = bch2_snapshot_right_child(c, parent); 4508e877caaSKent Overstreet if (n && n != id) 4518e877caaSKent Overstreet return n; 4528e877caaSKent Overstreet id = parent; 4538e877caaSKent Overstreet } 4548e877caaSKent Overstreet 4558e877caaSKent Overstreet return 0; 4568e877caaSKent Overstreet } 4578e877caaSKent Overstreet 4588e877caaSKent Overstreet static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) 4598e877caaSKent Overstreet { 4608e877caaSKent Overstreet u32 id = snapshot_root; 4618e877caaSKent Overstreet u32 subvol = 0, s; 4628e877caaSKent Overstreet 4638e877caaSKent Overstreet while (id) { 4648e877caaSKent Overstreet s = snapshot_t(c, id)->subvol; 4658e877caaSKent Overstreet 4668e877caaSKent Overstreet if (s && (!subvol || s < subvol)) 4678e877caaSKent Overstreet subvol = s; 4688e877caaSKent Overstreet 4698e877caaSKent Overstreet id = bch2_snapshot_tree_next(c, id); 4708e877caaSKent Overstreet } 4718e877caaSKent Overstreet 4728e877caaSKent Overstreet return subvol; 4738e877caaSKent Overstreet } 4748e877caaSKent Overstreet 4758e877caaSKent Overstreet static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, 4768e877caaSKent Overstreet u32 snapshot_root, u32 *subvol_id) 4778e877caaSKent Overstreet { 4788e877caaSKent Overstreet struct bch_fs *c = trans->c; 4798e877caaSKent Overstreet struct btree_iter iter; 4808e877caaSKent Overstreet struct bkey_s_c k; 4818e877caaSKent Overstreet bool found = false; 4828e877caaSKent Overstreet int ret; 4838e877caaSKent Overstreet 4848e877caaSKent Overstreet for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 4858e877caaSKent Overstreet 0, k, ret) { 4868e877caaSKent Overstreet if (k.k->type != KEY_TYPE_subvolume) 4878e877caaSKent Overstreet continue; 4888e877caaSKent Overstreet 489cf904c8dSKent Overstreet struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 4908e877caaSKent Overstreet if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) 4918e877caaSKent Overstreet continue; 4928e877caaSKent Overstreet if (!BCH_SUBVOLUME_SNAP(s.v)) { 4938e877caaSKent Overstreet *subvol_id = s.k->p.offset; 4948e877caaSKent Overstreet found = true; 4958e877caaSKent Overstreet break; 4968e877caaSKent Overstreet } 4978e877caaSKent Overstreet } 4988e877caaSKent Overstreet 4998e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 5008e877caaSKent Overstreet 5018e877caaSKent Overstreet if (!ret && !found) { 50296dea3d5SKent Overstreet struct bkey_i_subvolume *u; 5038e877caaSKent Overstreet 5048e877caaSKent Overstreet *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); 5058e877caaSKent Overstreet 50696dea3d5SKent Overstreet u = bch2_bkey_get_mut_typed(trans, &iter, 5078e877caaSKent Overstreet BTREE_ID_subvolumes, POS(0, *subvol_id), 5088e877caaSKent Overstreet 0, subvolume); 50996dea3d5SKent Overstreet ret = PTR_ERR_OR_ZERO(u); 5108e877caaSKent Overstreet if (ret) 5118e877caaSKent Overstreet return ret; 5128e877caaSKent Overstreet 51396dea3d5SKent Overstreet SET_BCH_SUBVOLUME_SNAP(&u->v, false); 5148e877caaSKent Overstreet } 5158e877caaSKent Overstreet 5168e877caaSKent Overstreet return ret; 5178e877caaSKent Overstreet } 5188e877caaSKent Overstreet 5198e877caaSKent Overstreet static int check_snapshot_tree(struct btree_trans *trans, 5208e877caaSKent Overstreet struct btree_iter *iter, 5218e877caaSKent Overstreet struct bkey_s_c k) 5228e877caaSKent Overstreet { 5238e877caaSKent Overstreet struct bch_fs *c = trans->c; 5248e877caaSKent Overstreet struct bkey_s_c_snapshot_tree st; 5258e877caaSKent Overstreet struct bch_snapshot s; 5268e877caaSKent Overstreet struct bch_subvolume subvol; 5278e877caaSKent Overstreet struct printbuf buf = PRINTBUF; 5288e877caaSKent Overstreet u32 root_id; 5298e877caaSKent Overstreet int ret; 5308e877caaSKent Overstreet 5318e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot_tree) 5328e877caaSKent Overstreet return 0; 5338e877caaSKent Overstreet 5348e877caaSKent Overstreet st = bkey_s_c_to_snapshot_tree(k); 5358e877caaSKent Overstreet root_id = le32_to_cpu(st.v->root_snapshot); 5368e877caaSKent Overstreet 5378e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, root_id, &s); 5388e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 5398e877caaSKent Overstreet goto err; 5408e877caaSKent Overstreet 5418e877caaSKent Overstreet if (fsck_err_on(ret || 5428e877caaSKent Overstreet root_id != bch2_snapshot_root(c, root_id) || 5438e877caaSKent Overstreet st.k->p.offset != le32_to_cpu(s.tree), 544b65db750SKent Overstreet c, snapshot_tree_to_missing_snapshot, 5458e877caaSKent Overstreet "snapshot tree points to missing/incorrect snapshot:\n %s", 5468e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 5478e877caaSKent Overstreet ret = bch2_btree_delete_at(trans, iter, 0); 5488e877caaSKent Overstreet goto err; 5498e877caaSKent Overstreet } 5508e877caaSKent Overstreet 5518e877caaSKent Overstreet ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), 5528e877caaSKent Overstreet false, 0, &subvol); 5538e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 5548e877caaSKent Overstreet goto err; 5558e877caaSKent Overstreet 556b65db750SKent Overstreet if (fsck_err_on(ret, 557b65db750SKent Overstreet c, snapshot_tree_to_missing_subvol, 5588e877caaSKent Overstreet "snapshot tree points to missing subvolume:\n %s", 5598e877caaSKent Overstreet (printbuf_reset(&buf), 5608e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 5611c31b83aSKent Overstreet fsck_err_on(!bch2_snapshot_is_ancestor(c, 5628e877caaSKent Overstreet le32_to_cpu(subvol.snapshot), 563b65db750SKent Overstreet root_id), 564b65db750SKent Overstreet c, snapshot_tree_to_wrong_subvol, 5658e877caaSKent Overstreet "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", 5668e877caaSKent Overstreet (printbuf_reset(&buf), 5678e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 568b65db750SKent Overstreet fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), 569b65db750SKent Overstreet c, snapshot_tree_to_snapshot_subvol, 5708e877caaSKent Overstreet "snapshot tree points to snapshot subvolume:\n %s", 5718e877caaSKent Overstreet (printbuf_reset(&buf), 5728e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 5738e877caaSKent Overstreet struct bkey_i_snapshot_tree *u; 5748e877caaSKent Overstreet u32 subvol_id; 5758e877caaSKent Overstreet 5768e877caaSKent Overstreet ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); 5778e877caaSKent Overstreet if (ret) 5788e877caaSKent Overstreet goto err; 5798e877caaSKent Overstreet 5808e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); 5818e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 5828e877caaSKent Overstreet if (ret) 5838e877caaSKent Overstreet goto err; 5848e877caaSKent Overstreet 5858e877caaSKent Overstreet u->v.master_subvol = cpu_to_le32(subvol_id); 5868e877caaSKent Overstreet st = snapshot_tree_i_to_s_c(u); 5878e877caaSKent Overstreet } 5888e877caaSKent Overstreet err: 5898e877caaSKent Overstreet fsck_err: 5908e877caaSKent Overstreet printbuf_exit(&buf); 5918e877caaSKent Overstreet return ret; 5928e877caaSKent Overstreet } 5938e877caaSKent Overstreet 5948e877caaSKent Overstreet /* 5958e877caaSKent Overstreet * For each snapshot_tree, make sure it points to the root of a snapshot tree 5968e877caaSKent Overstreet * and that snapshot entry points back to it, or delete it. 5978e877caaSKent Overstreet * 5988e877caaSKent Overstreet * And, make sure it points to a subvolume within that snapshot tree, or correct 5998e877caaSKent Overstreet * it to point to the oldest subvolume within that snapshot tree. 6008e877caaSKent Overstreet */ 6018e877caaSKent Overstreet int bch2_check_snapshot_trees(struct bch_fs *c) 6028e877caaSKent Overstreet { 60380eab7a7SKent Overstreet int ret = bch2_trans_run(c, 6046bd68ec2SKent Overstreet for_each_btree_key_commit(trans, iter, 6058e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS_MIN, 6068e877caaSKent Overstreet BTREE_ITER_PREFETCH, k, 6073f0e297dSKent Overstreet NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 6086bd68ec2SKent Overstreet check_snapshot_tree(trans, &iter, k))); 609cf904c8dSKent Overstreet bch_err_fn(c, ret); 6108e877caaSKent Overstreet return ret; 6118e877caaSKent Overstreet } 6128e877caaSKent Overstreet 6138e877caaSKent Overstreet /* 6148e877caaSKent Overstreet * Look up snapshot tree for @tree_id and find root, 6158e877caaSKent Overstreet * make sure @snap_id is a descendent: 6168e877caaSKent Overstreet */ 6178e877caaSKent Overstreet static int snapshot_tree_ptr_good(struct btree_trans *trans, 6188e877caaSKent Overstreet u32 snap_id, u32 tree_id) 6198e877caaSKent Overstreet { 6208e877caaSKent Overstreet struct bch_snapshot_tree s_t; 6218e877caaSKent Overstreet int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 6228e877caaSKent Overstreet 6238e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 6248e877caaSKent Overstreet return 0; 6258e877caaSKent Overstreet if (ret) 6268e877caaSKent Overstreet return ret; 6278e877caaSKent Overstreet 6288e877caaSKent Overstreet return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); 6298e877caaSKent Overstreet } 6308e877caaSKent Overstreet 6318e877caaSKent Overstreet u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) 6328e877caaSKent Overstreet { 6338e877caaSKent Overstreet const struct snapshot_t *s; 6348e877caaSKent Overstreet 6358e877caaSKent Overstreet if (!id) 6368e877caaSKent Overstreet return 0; 6378e877caaSKent Overstreet 6388e877caaSKent Overstreet rcu_read_lock(); 6398e877caaSKent Overstreet s = snapshot_t(c, id); 6408e877caaSKent Overstreet if (s->parent) 6418e877caaSKent Overstreet id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); 6428e877caaSKent Overstreet rcu_read_unlock(); 6438e877caaSKent Overstreet 6448e877caaSKent Overstreet return id; 6458e877caaSKent Overstreet } 6468e877caaSKent Overstreet 647097d4cc8SKent Overstreet static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) 6488e877caaSKent Overstreet { 6498e877caaSKent Overstreet unsigned i; 6508e877caaSKent Overstreet 651097d4cc8SKent Overstreet for (i = 0; i < 3; i++) 652097d4cc8SKent Overstreet if (!s.parent) { 653097d4cc8SKent Overstreet if (s.skip[i]) 6548e877caaSKent Overstreet return false; 655097d4cc8SKent Overstreet } else { 656097d4cc8SKent Overstreet if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) 6578e877caaSKent Overstreet return false; 6588e877caaSKent Overstreet } 6598e877caaSKent Overstreet 6608e877caaSKent Overstreet return true; 6618e877caaSKent Overstreet } 6628e877caaSKent Overstreet 6638e877caaSKent Overstreet /* 6648e877caaSKent Overstreet * snapshot_tree pointer was incorrect: look up root snapshot node, make sure 6658e877caaSKent Overstreet * its snapshot_tree pointer is correct (allocate new one if necessary), then 6668e877caaSKent Overstreet * update this node's pointer to root node's pointer: 6678e877caaSKent Overstreet */ 6688e877caaSKent Overstreet static int snapshot_tree_ptr_repair(struct btree_trans *trans, 6698e877caaSKent Overstreet struct btree_iter *iter, 6708e877caaSKent Overstreet struct bkey_s_c k, 6718e877caaSKent Overstreet struct bch_snapshot *s) 6728e877caaSKent Overstreet { 6738e877caaSKent Overstreet struct bch_fs *c = trans->c; 6748e877caaSKent Overstreet struct btree_iter root_iter; 6758e877caaSKent Overstreet struct bch_snapshot_tree s_t; 6768e877caaSKent Overstreet struct bkey_s_c_snapshot root; 6778e877caaSKent Overstreet struct bkey_i_snapshot *u; 6788e877caaSKent Overstreet u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; 6798e877caaSKent Overstreet int ret; 6808e877caaSKent Overstreet 6818e877caaSKent Overstreet root = bch2_bkey_get_iter_typed(trans, &root_iter, 6828e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, root_id), 6838e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot); 6848e877caaSKent Overstreet ret = bkey_err(root); 6858e877caaSKent Overstreet if (ret) 6868e877caaSKent Overstreet goto err; 6878e877caaSKent Overstreet 6888e877caaSKent Overstreet tree_id = le32_to_cpu(root.v->tree); 6898e877caaSKent Overstreet 6908e877caaSKent Overstreet ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 6918e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 6928e877caaSKent Overstreet return ret; 6938e877caaSKent Overstreet 6948e877caaSKent Overstreet if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { 6958e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); 6968e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u) ?: 6978e877caaSKent Overstreet bch2_snapshot_tree_create(trans, root_id, 6988e877caaSKent Overstreet bch2_snapshot_tree_oldest_subvol(c, root_id), 6998e877caaSKent Overstreet &tree_id); 7008e877caaSKent Overstreet if (ret) 7018e877caaSKent Overstreet goto err; 7028e877caaSKent Overstreet 7038e877caaSKent Overstreet u->v.tree = cpu_to_le32(tree_id); 7048e877caaSKent Overstreet if (k.k->p.offset == root_id) 7058e877caaSKent Overstreet *s = u->v; 7068e877caaSKent Overstreet } 7078e877caaSKent Overstreet 7088e877caaSKent Overstreet if (k.k->p.offset != root_id) { 7098e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 7108e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 7118e877caaSKent Overstreet if (ret) 7128e877caaSKent Overstreet goto err; 7138e877caaSKent Overstreet 7148e877caaSKent Overstreet u->v.tree = cpu_to_le32(tree_id); 7158e877caaSKent Overstreet *s = u->v; 7168e877caaSKent Overstreet } 7178e877caaSKent Overstreet err: 7188e877caaSKent Overstreet bch2_trans_iter_exit(trans, &root_iter); 7198e877caaSKent Overstreet return ret; 7208e877caaSKent Overstreet } 7218e877caaSKent Overstreet 7228e877caaSKent Overstreet static int check_snapshot(struct btree_trans *trans, 7238e877caaSKent Overstreet struct btree_iter *iter, 7248e877caaSKent Overstreet struct bkey_s_c k) 7258e877caaSKent Overstreet { 7268e877caaSKent Overstreet struct bch_fs *c = trans->c; 7278e877caaSKent Overstreet struct bch_snapshot s; 7288e877caaSKent Overstreet struct bch_subvolume subvol; 7298e877caaSKent Overstreet struct bch_snapshot v; 7308e877caaSKent Overstreet struct bkey_i_snapshot *u; 7318e877caaSKent Overstreet u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); 7328e877caaSKent Overstreet u32 real_depth; 7338e877caaSKent Overstreet struct printbuf buf = PRINTBUF; 7348e877caaSKent Overstreet bool should_have_subvol; 7358e877caaSKent Overstreet u32 i, id; 7368e877caaSKent Overstreet int ret = 0; 7378e877caaSKent Overstreet 7388e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 7398e877caaSKent Overstreet return 0; 7408e877caaSKent Overstreet 7418e877caaSKent Overstreet memset(&s, 0, sizeof(s)); 742c4333eb5SKent Overstreet memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); 7438e877caaSKent Overstreet 7448e877caaSKent Overstreet id = le32_to_cpu(s.parent); 7458e877caaSKent Overstreet if (id) { 7468e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 7478e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7488e877caaSKent Overstreet bch_err(c, "snapshot with nonexistent parent:\n %s", 7498e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 7508e877caaSKent Overstreet if (ret) 7518e877caaSKent Overstreet goto err; 7528e877caaSKent Overstreet 7538e877caaSKent Overstreet if (le32_to_cpu(v.children[0]) != k.k->p.offset && 7548e877caaSKent Overstreet le32_to_cpu(v.children[1]) != k.k->p.offset) { 7558e877caaSKent Overstreet bch_err(c, "snapshot parent %u missing pointer to child %llu", 7568e877caaSKent Overstreet id, k.k->p.offset); 7578e877caaSKent Overstreet ret = -EINVAL; 7588e877caaSKent Overstreet goto err; 7598e877caaSKent Overstreet } 7608e877caaSKent Overstreet } 7618e877caaSKent Overstreet 7628e877caaSKent Overstreet for (i = 0; i < 2 && s.children[i]; i++) { 7638e877caaSKent Overstreet id = le32_to_cpu(s.children[i]); 7648e877caaSKent Overstreet 7658e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 7668e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7678e877caaSKent Overstreet bch_err(c, "snapshot node %llu has nonexistent child %u", 7688e877caaSKent Overstreet k.k->p.offset, id); 7698e877caaSKent Overstreet if (ret) 7708e877caaSKent Overstreet goto err; 7718e877caaSKent Overstreet 7728e877caaSKent Overstreet if (le32_to_cpu(v.parent) != k.k->p.offset) { 7738e877caaSKent Overstreet bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", 7748e877caaSKent Overstreet id, le32_to_cpu(v.parent), k.k->p.offset); 7758e877caaSKent Overstreet ret = -EINVAL; 7768e877caaSKent Overstreet goto err; 7778e877caaSKent Overstreet } 7788e877caaSKent Overstreet } 7798e877caaSKent Overstreet 7808e877caaSKent Overstreet should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && 7818e877caaSKent Overstreet !BCH_SNAPSHOT_DELETED(&s); 7828e877caaSKent Overstreet 7838e877caaSKent Overstreet if (should_have_subvol) { 7848e877caaSKent Overstreet id = le32_to_cpu(s.subvol); 7858e877caaSKent Overstreet ret = bch2_subvolume_get(trans, id, 0, false, &subvol); 7868e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7878e877caaSKent Overstreet bch_err(c, "snapshot points to nonexistent subvolume:\n %s", 7888e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 7898e877caaSKent Overstreet if (ret) 7908e877caaSKent Overstreet goto err; 7918e877caaSKent Overstreet 7928e877caaSKent Overstreet if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { 7938e877caaSKent Overstreet bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", 7948e877caaSKent Overstreet k.k->p.offset); 7958e877caaSKent Overstreet ret = -EINVAL; 7968e877caaSKent Overstreet goto err; 7978e877caaSKent Overstreet } 7988e877caaSKent Overstreet } else { 799b65db750SKent Overstreet if (fsck_err_on(s.subvol, 800b65db750SKent Overstreet c, snapshot_should_not_have_subvol, 801b65db750SKent Overstreet "snapshot should not point to subvol:\n %s", 8028e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 8038e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 8048e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 8058e877caaSKent Overstreet if (ret) 8068e877caaSKent Overstreet goto err; 8078e877caaSKent Overstreet 8088e877caaSKent Overstreet u->v.subvol = 0; 8098e877caaSKent Overstreet s = u->v; 8108e877caaSKent Overstreet } 8118e877caaSKent Overstreet } 8128e877caaSKent Overstreet 8138e877caaSKent Overstreet ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); 8148e877caaSKent Overstreet if (ret < 0) 8158e877caaSKent Overstreet goto err; 8168e877caaSKent Overstreet 817b65db750SKent Overstreet if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree, 818b65db750SKent Overstreet "snapshot points to missing/incorrect tree:\n %s", 8198e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 8208e877caaSKent Overstreet ret = snapshot_tree_ptr_repair(trans, iter, k, &s); 8218e877caaSKent Overstreet if (ret) 8228e877caaSKent Overstreet goto err; 8238e877caaSKent Overstreet } 8248e877caaSKent Overstreet ret = 0; 8258e877caaSKent Overstreet 8268e877caaSKent Overstreet real_depth = bch2_snapshot_depth(c, parent_id); 8278e877caaSKent Overstreet 828074cbcdaSKent Overstreet if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, 829074cbcdaSKent Overstreet c, snapshot_bad_depth, 830b65db750SKent Overstreet "snapshot with incorrect depth field, should be %u:\n %s", 831074cbcdaSKent Overstreet real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 8328e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 8338e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 8348e877caaSKent Overstreet if (ret) 8358e877caaSKent Overstreet goto err; 8368e877caaSKent Overstreet 8378e877caaSKent Overstreet u->v.depth = cpu_to_le32(real_depth); 8388e877caaSKent Overstreet s = u->v; 8398e877caaSKent Overstreet } 8408e877caaSKent Overstreet 841097d4cc8SKent Overstreet ret = snapshot_skiplist_good(trans, k.k->p.offset, s); 8428e877caaSKent Overstreet if (ret < 0) 8438e877caaSKent Overstreet goto err; 8448e877caaSKent Overstreet 845074cbcdaSKent Overstreet if (fsck_err_on(!ret, c, snapshot_bad_skiplist, 846b65db750SKent Overstreet "snapshot with bad skiplist field:\n %s", 847074cbcdaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 8488e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 8498e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 8508e877caaSKent Overstreet if (ret) 8518e877caaSKent Overstreet goto err; 8528e877caaSKent Overstreet 8538e877caaSKent Overstreet for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) 8548e877caaSKent Overstreet u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); 8558e877caaSKent Overstreet 8568e877caaSKent Overstreet bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); 8578e877caaSKent Overstreet s = u->v; 8588e877caaSKent Overstreet } 8598e877caaSKent Overstreet ret = 0; 8608e877caaSKent Overstreet err: 8618e877caaSKent Overstreet fsck_err: 8628e877caaSKent Overstreet printbuf_exit(&buf); 8638e877caaSKent Overstreet return ret; 8648e877caaSKent Overstreet } 8658e877caaSKent Overstreet 8668e877caaSKent Overstreet int bch2_check_snapshots(struct bch_fs *c) 8678e877caaSKent Overstreet { 8688e877caaSKent Overstreet /* 8698e877caaSKent Overstreet * We iterate backwards as checking/fixing the depth field requires that 8708e877caaSKent Overstreet * the parent's depth already be correct: 8718e877caaSKent Overstreet */ 87280eab7a7SKent Overstreet int ret = bch2_trans_run(c, 8736bd68ec2SKent Overstreet for_each_btree_key_reverse_commit(trans, iter, 8748e877caaSKent Overstreet BTREE_ID_snapshots, POS_MAX, 8758e877caaSKent Overstreet BTREE_ITER_PREFETCH, k, 8763f0e297dSKent Overstreet NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 8776bd68ec2SKent Overstreet check_snapshot(trans, &iter, k))); 8788e877caaSKent Overstreet bch_err_fn(c, ret); 8798e877caaSKent Overstreet return ret; 8808e877caaSKent Overstreet } 8818e877caaSKent Overstreet 8828e877caaSKent Overstreet /* 8838e877caaSKent Overstreet * Mark a snapshot as deleted, for future cleanup: 8848e877caaSKent Overstreet */ 8858e877caaSKent Overstreet int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) 8868e877caaSKent Overstreet { 8878e877caaSKent Overstreet struct btree_iter iter; 8888e877caaSKent Overstreet struct bkey_i_snapshot *s; 8898e877caaSKent Overstreet int ret = 0; 8908e877caaSKent Overstreet 8918e877caaSKent Overstreet s = bch2_bkey_get_mut_typed(trans, &iter, 8928e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, id), 8938e877caaSKent Overstreet 0, snapshot); 8948e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s); 8958e877caaSKent Overstreet if (unlikely(ret)) { 8968e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), 8978e877caaSKent Overstreet trans->c, "missing snapshot %u", id); 8988e877caaSKent Overstreet return ret; 8998e877caaSKent Overstreet } 9008e877caaSKent Overstreet 9018e877caaSKent Overstreet /* already deleted? */ 9028e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(&s->v)) 9038e877caaSKent Overstreet goto err; 9048e877caaSKent Overstreet 9058e877caaSKent Overstreet SET_BCH_SNAPSHOT_DELETED(&s->v, true); 9068e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); 9078e877caaSKent Overstreet s->v.subvol = 0; 9088e877caaSKent Overstreet err: 9098e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 9108e877caaSKent Overstreet return ret; 9118e877caaSKent Overstreet } 9128e877caaSKent Overstreet 913f55d6e07SKent Overstreet static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) 914f55d6e07SKent Overstreet { 915f55d6e07SKent Overstreet if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) 916f55d6e07SKent Overstreet swap(s->children[0], s->children[1]); 917f55d6e07SKent Overstreet } 918f55d6e07SKent Overstreet 91996dea3d5SKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) 9208e877caaSKent Overstreet { 9218e877caaSKent Overstreet struct bch_fs *c = trans->c; 9228e877caaSKent Overstreet struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; 923f55d6e07SKent Overstreet struct btree_iter c_iter = (struct btree_iter) { NULL }; 9248e877caaSKent Overstreet struct btree_iter tree_iter = (struct btree_iter) { NULL }; 9258e877caaSKent Overstreet struct bkey_s_c_snapshot s; 926f55d6e07SKent Overstreet u32 parent_id, child_id; 9278e877caaSKent Overstreet unsigned i; 9288e877caaSKent Overstreet int ret = 0; 9298e877caaSKent Overstreet 9308e877caaSKent Overstreet s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), 9318e877caaSKent Overstreet BTREE_ITER_INTENT, snapshot); 9328e877caaSKent Overstreet ret = bkey_err(s); 9338e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 9348e877caaSKent Overstreet "missing snapshot %u", id); 9358e877caaSKent Overstreet 9368e877caaSKent Overstreet if (ret) 9378e877caaSKent Overstreet goto err; 9388e877caaSKent Overstreet 939f55d6e07SKent Overstreet BUG_ON(s.v->children[1]); 940f55d6e07SKent Overstreet 9418e877caaSKent Overstreet parent_id = le32_to_cpu(s.v->parent); 942f55d6e07SKent Overstreet child_id = le32_to_cpu(s.v->children[0]); 9438e877caaSKent Overstreet 9448e877caaSKent Overstreet if (parent_id) { 9458e877caaSKent Overstreet struct bkey_i_snapshot *parent; 9468e877caaSKent Overstreet 9478e877caaSKent Overstreet parent = bch2_bkey_get_mut_typed(trans, &p_iter, 9488e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, parent_id), 9498e877caaSKent Overstreet 0, snapshot); 9508e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(parent); 9518e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 9528e877caaSKent Overstreet "missing snapshot %u", parent_id); 953f55d6e07SKent Overstreet if (unlikely(ret)) 9548e877caaSKent Overstreet goto err; 9558e877caaSKent Overstreet 956f55d6e07SKent Overstreet /* find entry in parent->children for node being deleted */ 9578e877caaSKent Overstreet for (i = 0; i < 2; i++) 9588e877caaSKent Overstreet if (le32_to_cpu(parent->v.children[i]) == id) 9598e877caaSKent Overstreet break; 9608e877caaSKent Overstreet 961f55d6e07SKent Overstreet if (bch2_fs_inconsistent_on(i == 2, c, 962f55d6e07SKent Overstreet "snapshot %u missing child pointer to %u", 963f55d6e07SKent Overstreet parent_id, id)) 964f55d6e07SKent Overstreet goto err; 9658e877caaSKent Overstreet 9660a11adfbSKent Overstreet parent->v.children[i] = cpu_to_le32(child_id); 967f55d6e07SKent Overstreet 968f55d6e07SKent Overstreet normalize_snapshot_child_pointers(&parent->v); 969f55d6e07SKent Overstreet } 970f55d6e07SKent Overstreet 971f55d6e07SKent Overstreet if (child_id) { 972f55d6e07SKent Overstreet struct bkey_i_snapshot *child; 973f55d6e07SKent Overstreet 974f55d6e07SKent Overstreet child = bch2_bkey_get_mut_typed(trans, &c_iter, 975f55d6e07SKent Overstreet BTREE_ID_snapshots, POS(0, child_id), 976f55d6e07SKent Overstreet 0, snapshot); 977f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(child); 978f55d6e07SKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 979f55d6e07SKent Overstreet "missing snapshot %u", child_id); 980f55d6e07SKent Overstreet if (unlikely(ret)) 981f55d6e07SKent Overstreet goto err; 982f55d6e07SKent Overstreet 983f55d6e07SKent Overstreet child->v.parent = cpu_to_le32(parent_id); 984f55d6e07SKent Overstreet 985f55d6e07SKent Overstreet if (!child->v.parent) { 986f55d6e07SKent Overstreet child->v.skip[0] = 0; 987f55d6e07SKent Overstreet child->v.skip[1] = 0; 988f55d6e07SKent Overstreet child->v.skip[2] = 0; 989f55d6e07SKent Overstreet } 990f55d6e07SKent Overstreet } 991f55d6e07SKent Overstreet 992f55d6e07SKent Overstreet if (!parent_id) { 9938e877caaSKent Overstreet /* 9948e877caaSKent Overstreet * We're deleting the root of a snapshot tree: update the 9958e877caaSKent Overstreet * snapshot_tree entry to point to the new root, or delete it if 9968e877caaSKent Overstreet * this is the last snapshot ID in this tree: 9978e877caaSKent Overstreet */ 9988e877caaSKent Overstreet struct bkey_i_snapshot_tree *s_t; 9998e877caaSKent Overstreet 10008e877caaSKent Overstreet BUG_ON(s.v->children[1]); 10018e877caaSKent Overstreet 10028e877caaSKent Overstreet s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, 10038e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), 10048e877caaSKent Overstreet 0, snapshot_tree); 10058e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s_t); 10068e877caaSKent Overstreet if (ret) 10078e877caaSKent Overstreet goto err; 10088e877caaSKent Overstreet 10098e877caaSKent Overstreet if (s.v->children[0]) { 10108e877caaSKent Overstreet s_t->v.root_snapshot = s.v->children[0]; 10118e877caaSKent Overstreet } else { 10128e877caaSKent Overstreet s_t->k.type = KEY_TYPE_deleted; 10138e877caaSKent Overstreet set_bkey_val_u64s(&s_t->k, 0); 10148e877caaSKent Overstreet } 10158e877caaSKent Overstreet } 10168e877caaSKent Overstreet 10178e877caaSKent Overstreet ret = bch2_btree_delete_at(trans, &iter, 0); 10188e877caaSKent Overstreet err: 10198e877caaSKent Overstreet bch2_trans_iter_exit(trans, &tree_iter); 10208e877caaSKent Overstreet bch2_trans_iter_exit(trans, &p_iter); 1021f55d6e07SKent Overstreet bch2_trans_iter_exit(trans, &c_iter); 10228e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 10238e877caaSKent Overstreet return ret; 10248e877caaSKent Overstreet } 10258e877caaSKent Overstreet 10268e877caaSKent Overstreet static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, 10278e877caaSKent Overstreet u32 *new_snapids, 10288e877caaSKent Overstreet u32 *snapshot_subvols, 10298e877caaSKent Overstreet unsigned nr_snapids) 10308e877caaSKent Overstreet { 10318e877caaSKent Overstreet struct bch_fs *c = trans->c; 10328e877caaSKent Overstreet struct btree_iter iter; 10338e877caaSKent Overstreet struct bkey_i_snapshot *n; 10348e877caaSKent Overstreet struct bkey_s_c k; 10358e877caaSKent Overstreet unsigned i, j; 10368e877caaSKent Overstreet u32 depth = bch2_snapshot_depth(c, parent); 10378e877caaSKent Overstreet int ret; 10388e877caaSKent Overstreet 10398e877caaSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, 10408e877caaSKent Overstreet POS_MIN, BTREE_ITER_INTENT); 10418e877caaSKent Overstreet k = bch2_btree_iter_peek(&iter); 10428e877caaSKent Overstreet ret = bkey_err(k); 10438e877caaSKent Overstreet if (ret) 10448e877caaSKent Overstreet goto err; 10458e877caaSKent Overstreet 10468e877caaSKent Overstreet for (i = 0; i < nr_snapids; i++) { 10478e877caaSKent Overstreet k = bch2_btree_iter_prev_slot(&iter); 10488e877caaSKent Overstreet ret = bkey_err(k); 10498e877caaSKent Overstreet if (ret) 10508e877caaSKent Overstreet goto err; 10518e877caaSKent Overstreet 10528e877caaSKent Overstreet if (!k.k || !k.k->p.offset) { 10538e877caaSKent Overstreet ret = -BCH_ERR_ENOSPC_snapshot_create; 10548e877caaSKent Overstreet goto err; 10558e877caaSKent Overstreet } 10568e877caaSKent Overstreet 10578e877caaSKent Overstreet n = bch2_bkey_alloc(trans, &iter, 0, snapshot); 10588e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n); 10598e877caaSKent Overstreet if (ret) 10608e877caaSKent Overstreet goto err; 10618e877caaSKent Overstreet 10628e877caaSKent Overstreet n->v.flags = 0; 10638e877caaSKent Overstreet n->v.parent = cpu_to_le32(parent); 10648e877caaSKent Overstreet n->v.subvol = cpu_to_le32(snapshot_subvols[i]); 10658e877caaSKent Overstreet n->v.tree = cpu_to_le32(tree); 10668e877caaSKent Overstreet n->v.depth = cpu_to_le32(depth); 1067d32088f2SKent Overstreet n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 1068d32088f2SKent Overstreet n->v.btime.hi = 0; 10698e877caaSKent Overstreet 10708e877caaSKent Overstreet for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) 10718e877caaSKent Overstreet n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); 10728e877caaSKent Overstreet 10738e877caaSKent Overstreet bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); 10748e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); 10758e877caaSKent Overstreet 1076ad00bce0SKent Overstreet ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, 10778e877caaSKent Overstreet bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); 10788e877caaSKent Overstreet if (ret) 10798e877caaSKent Overstreet goto err; 10808e877caaSKent Overstreet 10818e877caaSKent Overstreet new_snapids[i] = iter.pos.offset; 1082eebe8a84SKent Overstreet 1083eebe8a84SKent Overstreet mutex_lock(&c->snapshot_table_lock); 1084eebe8a84SKent Overstreet snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; 1085eebe8a84SKent Overstreet mutex_unlock(&c->snapshot_table_lock); 10868e877caaSKent Overstreet } 10878e877caaSKent Overstreet err: 10888e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 10898e877caaSKent Overstreet return ret; 10908e877caaSKent Overstreet } 10918e877caaSKent Overstreet 10928e877caaSKent Overstreet /* 10938e877caaSKent Overstreet * Create new snapshot IDs as children of an existing snapshot ID: 10948e877caaSKent Overstreet */ 10958e877caaSKent Overstreet static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, 10968e877caaSKent Overstreet u32 *new_snapids, 10978e877caaSKent Overstreet u32 *snapshot_subvols, 10988e877caaSKent Overstreet unsigned nr_snapids) 10998e877caaSKent Overstreet { 11008e877caaSKent Overstreet struct btree_iter iter; 11018e877caaSKent Overstreet struct bkey_i_snapshot *n_parent; 11028e877caaSKent Overstreet int ret = 0; 11038e877caaSKent Overstreet 11048e877caaSKent Overstreet n_parent = bch2_bkey_get_mut_typed(trans, &iter, 11058e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, parent), 11068e877caaSKent Overstreet 0, snapshot); 11078e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n_parent); 11088e877caaSKent Overstreet if (unlikely(ret)) { 11098e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 11108e877caaSKent Overstreet bch_err(trans->c, "snapshot %u not found", parent); 11118e877caaSKent Overstreet return ret; 11128e877caaSKent Overstreet } 11138e877caaSKent Overstreet 11148e877caaSKent Overstreet if (n_parent->v.children[0] || n_parent->v.children[1]) { 11158e877caaSKent Overstreet bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); 11168e877caaSKent Overstreet ret = -EINVAL; 11178e877caaSKent Overstreet goto err; 11188e877caaSKent Overstreet } 11198e877caaSKent Overstreet 11208e877caaSKent Overstreet ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), 11218e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11228e877caaSKent Overstreet if (ret) 11238e877caaSKent Overstreet goto err; 11248e877caaSKent Overstreet 11258e877caaSKent Overstreet n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); 11268e877caaSKent Overstreet n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); 11278e877caaSKent Overstreet n_parent->v.subvol = 0; 11288e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); 11298e877caaSKent Overstreet err: 11308e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 11318e877caaSKent Overstreet return ret; 11328e877caaSKent Overstreet } 11338e877caaSKent Overstreet 11348e877caaSKent Overstreet /* 11358e877caaSKent Overstreet * Create a snapshot node that is the root of a new tree: 11368e877caaSKent Overstreet */ 11378e877caaSKent Overstreet static int bch2_snapshot_node_create_tree(struct btree_trans *trans, 11388e877caaSKent Overstreet u32 *new_snapids, 11398e877caaSKent Overstreet u32 *snapshot_subvols, 11408e877caaSKent Overstreet unsigned nr_snapids) 11418e877caaSKent Overstreet { 11428e877caaSKent Overstreet struct bkey_i_snapshot_tree *n_tree; 11438e877caaSKent Overstreet int ret; 11448e877caaSKent Overstreet 11458e877caaSKent Overstreet n_tree = __bch2_snapshot_tree_create(trans); 11468e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n_tree) ?: 11478e877caaSKent Overstreet create_snapids(trans, 0, n_tree->k.p.offset, 11488e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11498e877caaSKent Overstreet if (ret) 11508e877caaSKent Overstreet return ret; 11518e877caaSKent Overstreet 11528e877caaSKent Overstreet n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); 11538e877caaSKent Overstreet n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); 11548e877caaSKent Overstreet return 0; 11558e877caaSKent Overstreet } 11568e877caaSKent Overstreet 11578e877caaSKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, 11588e877caaSKent Overstreet u32 *new_snapids, 11598e877caaSKent Overstreet u32 *snapshot_subvols, 11608e877caaSKent Overstreet unsigned nr_snapids) 11618e877caaSKent Overstreet { 11628e877caaSKent Overstreet BUG_ON((parent == 0) != (nr_snapids == 1)); 11638e877caaSKent Overstreet BUG_ON((parent != 0) != (nr_snapids == 2)); 11648e877caaSKent Overstreet 11658e877caaSKent Overstreet return parent 11668e877caaSKent Overstreet ? bch2_snapshot_node_create_children(trans, parent, 11678e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids) 11688e877caaSKent Overstreet : bch2_snapshot_node_create_tree(trans, 11698e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11708e877caaSKent Overstreet 11718e877caaSKent Overstreet } 11728e877caaSKent Overstreet 11738e877caaSKent Overstreet /* 11748e877caaSKent Overstreet * If we have an unlinked inode in an internal snapshot node, and the inode 11758e877caaSKent Overstreet * really has been deleted in all child snapshots, how does this get cleaned up? 11768e877caaSKent Overstreet * 11778e877caaSKent Overstreet * first there is the problem of how keys that have been overwritten in all 11788e877caaSKent Overstreet * child snapshots get deleted (unimplemented?), but inodes may perhaps be 11798e877caaSKent Overstreet * special? 11808e877caaSKent Overstreet * 11818e877caaSKent Overstreet * also: unlinked inode in internal snapshot appears to not be getting deleted 11828e877caaSKent Overstreet * correctly if inode doesn't exist in leaf snapshots 1183f55d6e07SKent Overstreet * 1184f55d6e07SKent Overstreet * solution: 1185f55d6e07SKent Overstreet * 1186f55d6e07SKent Overstreet * for a key in an interior snapshot node that needs work to be done that 1187f55d6e07SKent Overstreet * requires it to be mutated: iterate over all descendent leaf nodes and copy 1188f55d6e07SKent Overstreet * that key to snapshot leaf nodes, where we can mutate it 11898e877caaSKent Overstreet */ 11908e877caaSKent Overstreet 11918e877caaSKent Overstreet static int snapshot_delete_key(struct btree_trans *trans, 11928e877caaSKent Overstreet struct btree_iter *iter, 11938e877caaSKent Overstreet struct bkey_s_c k, 11948e877caaSKent Overstreet snapshot_id_list *deleted, 11958e877caaSKent Overstreet snapshot_id_list *equiv_seen, 11968e877caaSKent Overstreet struct bpos *last_pos) 11978e877caaSKent Overstreet { 11988e877caaSKent Overstreet struct bch_fs *c = trans->c; 11998e877caaSKent Overstreet u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 12008e877caaSKent Overstreet 12018e877caaSKent Overstreet if (!bkey_eq(k.k->p, *last_pos)) 12028e877caaSKent Overstreet equiv_seen->nr = 0; 12038e877caaSKent Overstreet *last_pos = k.k->p; 12048e877caaSKent Overstreet 12058e877caaSKent Overstreet if (snapshot_list_has_id(deleted, k.k->p.snapshot) || 12068e877caaSKent Overstreet snapshot_list_has_id(equiv_seen, equiv)) { 12078e877caaSKent Overstreet return bch2_btree_delete_at(trans, iter, 12088e877caaSKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 12098e877caaSKent Overstreet } else { 12108e877caaSKent Overstreet return snapshot_list_add(c, equiv_seen, equiv); 12118e877caaSKent Overstreet } 12128e877caaSKent Overstreet } 12138e877caaSKent Overstreet 1214f55d6e07SKent Overstreet static int move_key_to_correct_snapshot(struct btree_trans *trans, 1215f55d6e07SKent Overstreet struct btree_iter *iter, 1216f55d6e07SKent Overstreet struct bkey_s_c k) 1217f55d6e07SKent Overstreet { 1218f55d6e07SKent Overstreet struct bch_fs *c = trans->c; 1219f55d6e07SKent Overstreet u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 1220f55d6e07SKent Overstreet 1221f55d6e07SKent Overstreet /* 1222f55d6e07SKent Overstreet * When we have a linear chain of snapshot nodes, we consider 1223f55d6e07SKent Overstreet * those to form an equivalence class: we're going to collapse 1224f55d6e07SKent Overstreet * them all down to a single node, and keep the leaf-most node - 1225f55d6e07SKent Overstreet * which has the same id as the equivalence class id. 1226f55d6e07SKent Overstreet * 1227f55d6e07SKent Overstreet * If there are multiple keys in different snapshots at the same 1228f55d6e07SKent Overstreet * position, we're only going to keep the one in the newest 1229f55d6e07SKent Overstreet * snapshot - the rest have been overwritten and are redundant, 1230f55d6e07SKent Overstreet * and for the key we're going to keep we need to move it to the 1231f55d6e07SKent Overstreet * equivalance class ID if it's not there already. 1232f55d6e07SKent Overstreet */ 1233f55d6e07SKent Overstreet if (equiv != k.k->p.snapshot) { 1234f55d6e07SKent Overstreet struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 1235f55d6e07SKent Overstreet struct btree_iter new_iter; 1236f55d6e07SKent Overstreet int ret; 1237f55d6e07SKent Overstreet 1238f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(new); 1239f55d6e07SKent Overstreet if (ret) 1240f55d6e07SKent Overstreet return ret; 1241f55d6e07SKent Overstreet 1242f55d6e07SKent Overstreet new->k.p.snapshot = equiv; 1243f55d6e07SKent Overstreet 1244f55d6e07SKent Overstreet bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, 1245f55d6e07SKent Overstreet BTREE_ITER_ALL_SNAPSHOTS| 1246f55d6e07SKent Overstreet BTREE_ITER_CACHED| 1247f55d6e07SKent Overstreet BTREE_ITER_INTENT); 1248f55d6e07SKent Overstreet 1249f55d6e07SKent Overstreet ret = bch2_btree_iter_traverse(&new_iter) ?: 1250f55d6e07SKent Overstreet bch2_trans_update(trans, &new_iter, new, 1251f55d6e07SKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1252f55d6e07SKent Overstreet bch2_btree_delete_at(trans, iter, 1253f55d6e07SKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1254f55d6e07SKent Overstreet bch2_trans_iter_exit(trans, &new_iter); 1255f55d6e07SKent Overstreet if (ret) 1256f55d6e07SKent Overstreet return ret; 1257f55d6e07SKent Overstreet } 1258f55d6e07SKent Overstreet 1259f55d6e07SKent Overstreet return 0; 1260f55d6e07SKent Overstreet } 1261f55d6e07SKent Overstreet 1262b0b5bbf9SKent Overstreet static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) 12638e877caaSKent Overstreet { 12648e877caaSKent Overstreet struct bkey_s_c_snapshot snap; 12658e877caaSKent Overstreet u32 children[2]; 12668e877caaSKent Overstreet int ret; 12678e877caaSKent Overstreet 12688e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 12698e877caaSKent Overstreet return 0; 12708e877caaSKent Overstreet 12718e877caaSKent Overstreet snap = bkey_s_c_to_snapshot(k); 12728e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(snap.v) || 12738e877caaSKent Overstreet BCH_SNAPSHOT_SUBVOL(snap.v)) 12748e877caaSKent Overstreet return 0; 12758e877caaSKent Overstreet 12768e877caaSKent Overstreet children[0] = le32_to_cpu(snap.v->children[0]); 12778e877caaSKent Overstreet children[1] = le32_to_cpu(snap.v->children[1]); 12788e877caaSKent Overstreet 12798e877caaSKent Overstreet ret = bch2_snapshot_live(trans, children[0]) ?: 12808e877caaSKent Overstreet bch2_snapshot_live(trans, children[1]); 12818e877caaSKent Overstreet if (ret < 0) 12828e877caaSKent Overstreet return ret; 1283b0b5bbf9SKent Overstreet return !ret; 1284b0b5bbf9SKent Overstreet } 12858e877caaSKent Overstreet 1286b0b5bbf9SKent Overstreet /* 1287b0b5bbf9SKent Overstreet * For a given snapshot, if it doesn't have a subvolume that points to it, and 1288b0b5bbf9SKent Overstreet * it doesn't have child snapshot nodes - it's now redundant and we can mark it 1289b0b5bbf9SKent Overstreet * as deleted. 1290b0b5bbf9SKent Overstreet */ 1291b0b5bbf9SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) 1292b0b5bbf9SKent Overstreet { 1293b0b5bbf9SKent Overstreet int ret = bch2_snapshot_needs_delete(trans, k); 1294b0b5bbf9SKent Overstreet 1295b0b5bbf9SKent Overstreet return ret <= 0 1296b0b5bbf9SKent Overstreet ? ret 1297b0b5bbf9SKent Overstreet : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); 12988e877caaSKent Overstreet } 12998e877caaSKent Overstreet 1300f55d6e07SKent Overstreet static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, 1301f55d6e07SKent Overstreet snapshot_id_list *skip) 1302f55d6e07SKent Overstreet { 1303f55d6e07SKent Overstreet rcu_read_lock(); 13041e2d3999SKent Overstreet while (snapshot_list_has_id(skip, id)) 13051e2d3999SKent Overstreet id = __bch2_snapshot_parent(c, id); 13061e2d3999SKent Overstreet 1307f55d6e07SKent Overstreet while (n--) { 1308f55d6e07SKent Overstreet do { 1309f55d6e07SKent Overstreet id = __bch2_snapshot_parent(c, id); 1310f55d6e07SKent Overstreet } while (snapshot_list_has_id(skip, id)); 1311f55d6e07SKent Overstreet } 1312f55d6e07SKent Overstreet rcu_read_unlock(); 1313f55d6e07SKent Overstreet 1314f55d6e07SKent Overstreet return id; 1315f55d6e07SKent Overstreet } 1316f55d6e07SKent Overstreet 1317f55d6e07SKent Overstreet static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, 1318f55d6e07SKent Overstreet struct btree_iter *iter, struct bkey_s_c k, 1319f55d6e07SKent Overstreet snapshot_id_list *deleted) 1320f55d6e07SKent Overstreet { 1321f55d6e07SKent Overstreet struct bch_fs *c = trans->c; 1322f55d6e07SKent Overstreet u32 nr_deleted_ancestors = 0; 1323f55d6e07SKent Overstreet struct bkey_i_snapshot *s; 1324f55d6e07SKent Overstreet int ret; 1325f55d6e07SKent Overstreet 1326f55d6e07SKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 1327f55d6e07SKent Overstreet return 0; 1328f55d6e07SKent Overstreet 1329f55d6e07SKent Overstreet if (snapshot_list_has_id(deleted, k.k->p.offset)) 1330f55d6e07SKent Overstreet return 0; 1331f55d6e07SKent Overstreet 1332f55d6e07SKent Overstreet s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); 1333f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(s); 1334f55d6e07SKent Overstreet if (ret) 1335f55d6e07SKent Overstreet return ret; 1336f55d6e07SKent Overstreet 1337f55d6e07SKent Overstreet darray_for_each(*deleted, i) 1338f55d6e07SKent Overstreet nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); 1339f55d6e07SKent Overstreet 1340f55d6e07SKent Overstreet if (!nr_deleted_ancestors) 1341f55d6e07SKent Overstreet return 0; 1342f55d6e07SKent Overstreet 1343f55d6e07SKent Overstreet le32_add_cpu(&s->v.depth, -nr_deleted_ancestors); 1344f55d6e07SKent Overstreet 1345f55d6e07SKent Overstreet if (!s->v.depth) { 1346f55d6e07SKent Overstreet s->v.skip[0] = 0; 1347f55d6e07SKent Overstreet s->v.skip[1] = 0; 1348f55d6e07SKent Overstreet s->v.skip[2] = 0; 1349f55d6e07SKent Overstreet } else { 1350f55d6e07SKent Overstreet u32 depth = le32_to_cpu(s->v.depth); 1351f55d6e07SKent Overstreet u32 parent = bch2_snapshot_parent(c, s->k.p.offset); 1352f55d6e07SKent Overstreet 1353f55d6e07SKent Overstreet for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { 1354f55d6e07SKent Overstreet u32 id = le32_to_cpu(s->v.skip[j]); 1355f55d6e07SKent Overstreet 1356f55d6e07SKent Overstreet if (snapshot_list_has_id(deleted, id)) { 13575394fe94SKent Overstreet id = bch2_snapshot_nth_parent_skip(c, 1358f55d6e07SKent Overstreet parent, 13595394fe94SKent Overstreet depth > 1 13605394fe94SKent Overstreet ? get_random_u32_below(depth - 1) 13615394fe94SKent Overstreet : 0, 13625394fe94SKent Overstreet deleted); 1363f55d6e07SKent Overstreet s->v.skip[j] = cpu_to_le32(id); 1364f55d6e07SKent Overstreet } 1365f55d6e07SKent Overstreet } 1366f55d6e07SKent Overstreet 1367f55d6e07SKent Overstreet bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32); 1368f55d6e07SKent Overstreet } 1369f55d6e07SKent Overstreet 1370f55d6e07SKent Overstreet return bch2_trans_update(trans, iter, &s->k_i, 0); 1371f55d6e07SKent Overstreet } 1372f55d6e07SKent Overstreet 13738e877caaSKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c) 13748e877caaSKent Overstreet { 13756bd68ec2SKent Overstreet struct btree_trans *trans; 13768e877caaSKent Overstreet snapshot_id_list deleted = { 0 }; 1377f55d6e07SKent Overstreet snapshot_id_list deleted_interior = { 0 }; 1378defd9e39SKent Overstreet u32 id; 13798e877caaSKent Overstreet int ret = 0; 13808e877caaSKent Overstreet 13813c471b65SKent Overstreet if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) 1382b0b5bbf9SKent Overstreet return 0; 1383b0b5bbf9SKent Overstreet 13843c471b65SKent Overstreet if (!test_bit(BCH_FS_started, &c->flags)) { 13858e877caaSKent Overstreet ret = bch2_fs_read_write_early(c); 13866bf3766bSColin Ian King bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); 1387cf904c8dSKent Overstreet if (ret) 13888e877caaSKent Overstreet return ret; 13898e877caaSKent Overstreet } 13908e877caaSKent Overstreet 13916bd68ec2SKent Overstreet trans = bch2_trans_get(c); 13928e877caaSKent Overstreet 13938e877caaSKent Overstreet /* 13948e877caaSKent Overstreet * For every snapshot node: If we have no live children and it's not 13958e877caaSKent Overstreet * pointed to by a subvolume, delete it: 13968e877caaSKent Overstreet */ 13976bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, 13988e877caaSKent Overstreet POS_MIN, 0, k, 13998e877caaSKent Overstreet NULL, NULL, 0, 1400b0b5bbf9SKent Overstreet bch2_delete_redundant_snapshot(trans, k)); 1401e46c181aSKent Overstreet bch_err_msg(c, ret, "deleting redundant snapshots"); 1402cf904c8dSKent Overstreet if (ret) 14038e877caaSKent Overstreet goto err; 14048e877caaSKent Overstreet 14055028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, 14068e877caaSKent Overstreet POS_MIN, 0, k, 14076bd68ec2SKent Overstreet bch2_snapshot_set_equiv(trans, k)); 1408e46c181aSKent Overstreet bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); 1409cf904c8dSKent Overstreet if (ret) 14108e877caaSKent Overstreet goto err; 14118e877caaSKent Overstreet 14125028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, 141327b2df98SKent Overstreet POS_MIN, 0, k, ({ 14148e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 14158e877caaSKent Overstreet continue; 14168e877caaSKent Overstreet 141780eab7a7SKent Overstreet BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v) 141827b2df98SKent Overstreet ? snapshot_list_add(c, &deleted, k.k->p.offset) 141927b2df98SKent Overstreet : 0; 142027b2df98SKent Overstreet })); 14218e877caaSKent Overstreet bch_err_msg(c, ret, "walking snapshots"); 1422cf904c8dSKent Overstreet if (ret) 14238e877caaSKent Overstreet goto err; 14248e877caaSKent Overstreet 14258e877caaSKent Overstreet for (id = 0; id < BTREE_ID_NR; id++) { 14268e877caaSKent Overstreet struct bpos last_pos = POS_MIN; 14278e877caaSKent Overstreet snapshot_id_list equiv_seen = { 0 }; 1428f55d6e07SKent Overstreet struct disk_reservation res = { 0 }; 14298e877caaSKent Overstreet 14308e877caaSKent Overstreet if (!btree_type_has_snapshots(id)) 14318e877caaSKent Overstreet continue; 14328e877caaSKent Overstreet 14332e7acdfbSKent Overstreet /* 14342e7acdfbSKent Overstreet * deleted inodes btree is maintained by a trigger on the inodes 14352e7acdfbSKent Overstreet * btree - no work for us to do here, and it's not safe to scan 14362e7acdfbSKent Overstreet * it because we'll see out of date keys due to the btree write 14372e7acdfbSKent Overstreet * buffer: 14382e7acdfbSKent Overstreet */ 14392e7acdfbSKent Overstreet if (id == BTREE_ID_deleted_inodes) 14402e7acdfbSKent Overstreet continue; 14412e7acdfbSKent Overstreet 14426bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, 14438e877caaSKent Overstreet id, POS_MIN, 14448e877caaSKent Overstreet BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1445cb52d23eSKent Overstreet &res, NULL, BCH_TRANS_COMMIT_no_enospc, 14466bd68ec2SKent Overstreet snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: 14476bd68ec2SKent Overstreet for_each_btree_key_commit(trans, iter, 1448f55d6e07SKent Overstreet id, POS_MIN, 1449f55d6e07SKent Overstreet BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1450cb52d23eSKent Overstreet &res, NULL, BCH_TRANS_COMMIT_no_enospc, 14516bd68ec2SKent Overstreet move_key_to_correct_snapshot(trans, &iter, k)); 14528e877caaSKent Overstreet 1453f55d6e07SKent Overstreet bch2_disk_reservation_put(c, &res); 14548e877caaSKent Overstreet darray_exit(&equiv_seen); 14558e877caaSKent Overstreet 14568e877caaSKent Overstreet bch_err_msg(c, ret, "deleting keys from dying snapshots"); 1457cf904c8dSKent Overstreet if (ret) 14588e877caaSKent Overstreet goto err; 14598e877caaSKent Overstreet } 14608e877caaSKent Overstreet 14610dd092bfSKent Overstreet bch2_trans_unlock(trans); 146237fad949SKent Overstreet down_write(&c->snapshot_create_lock); 146337fad949SKent Overstreet 14645028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, 146527b2df98SKent Overstreet POS_MIN, 0, k, ({ 1466f55d6e07SKent Overstreet u32 snapshot = k.k->p.offset; 1467f55d6e07SKent Overstreet u32 equiv = bch2_snapshot_equiv(c, snapshot); 14688e877caaSKent Overstreet 146927b2df98SKent Overstreet equiv != snapshot 147027b2df98SKent Overstreet ? snapshot_list_add(c, &deleted_interior, snapshot) 147127b2df98SKent Overstreet : 0; 147227b2df98SKent Overstreet })); 1473f55d6e07SKent Overstreet 147427b2df98SKent Overstreet bch_err_msg(c, ret, "walking snapshots"); 1475cf904c8dSKent Overstreet if (ret) 147637fad949SKent Overstreet goto err_create_lock; 147737fad949SKent Overstreet 1478f55d6e07SKent Overstreet /* 1479f55d6e07SKent Overstreet * Fixing children of deleted snapshots can't be done completely 1480f55d6e07SKent Overstreet * atomically, if we crash between here and when we delete the interior 1481f55d6e07SKent Overstreet * nodes some depth fields will be off: 1482f55d6e07SKent Overstreet */ 14836bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 1484f55d6e07SKent Overstreet BTREE_ITER_INTENT, k, 1485cb52d23eSKent Overstreet NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 14866bd68ec2SKent Overstreet bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); 1487f55d6e07SKent Overstreet if (ret) 148837fad949SKent Overstreet goto err_create_lock; 1489f55d6e07SKent Overstreet 1490f55d6e07SKent Overstreet darray_for_each(deleted, i) { 14916bd68ec2SKent Overstreet ret = commit_do(trans, NULL, NULL, 0, 14926bd68ec2SKent Overstreet bch2_snapshot_node_delete(trans, *i)); 1493f55d6e07SKent Overstreet bch_err_msg(c, ret, "deleting snapshot %u", *i); 1494cf904c8dSKent Overstreet if (ret) 149537fad949SKent Overstreet goto err_create_lock; 1496f55d6e07SKent Overstreet } 1497f55d6e07SKent Overstreet 1498f55d6e07SKent Overstreet darray_for_each(deleted_interior, i) { 14996bd68ec2SKent Overstreet ret = commit_do(trans, NULL, NULL, 0, 15006bd68ec2SKent Overstreet bch2_snapshot_node_delete(trans, *i)); 1501f55d6e07SKent Overstreet bch_err_msg(c, ret, "deleting snapshot %u", *i); 1502cf904c8dSKent Overstreet if (ret) 150337fad949SKent Overstreet goto err_create_lock; 15048e877caaSKent Overstreet } 150537fad949SKent Overstreet err_create_lock: 150637fad949SKent Overstreet up_write(&c->snapshot_create_lock); 15078e877caaSKent Overstreet err: 1508f55d6e07SKent Overstreet darray_exit(&deleted_interior); 15098e877caaSKent Overstreet darray_exit(&deleted); 15106bd68ec2SKent Overstreet bch2_trans_put(trans); 15118e877caaSKent Overstreet bch_err_fn(c, ret); 15128e877caaSKent Overstreet return ret; 15138e877caaSKent Overstreet } 15148e877caaSKent Overstreet 15158e877caaSKent Overstreet void bch2_delete_dead_snapshots_work(struct work_struct *work) 15168e877caaSKent Overstreet { 15178e877caaSKent Overstreet struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); 15188e877caaSKent Overstreet 15198e877caaSKent Overstreet bch2_delete_dead_snapshots(c); 15208e877caaSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); 15218e877caaSKent Overstreet } 15228e877caaSKent Overstreet 15238e877caaSKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c) 15248e877caaSKent Overstreet { 15258e877caaSKent Overstreet if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && 15268e877caaSKent Overstreet !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) 15278e877caaSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); 15288e877caaSKent Overstreet } 15298e877caaSKent Overstreet 1530fa5bed37SKent Overstreet int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, 1531fa5bed37SKent Overstreet enum btree_id id, 1532fa5bed37SKent Overstreet struct bpos pos) 1533fa5bed37SKent Overstreet { 1534fa5bed37SKent Overstreet struct bch_fs *c = trans->c; 1535fa5bed37SKent Overstreet struct btree_iter iter; 1536fa5bed37SKent Overstreet struct bkey_s_c k; 1537fa5bed37SKent Overstreet int ret; 1538fa5bed37SKent Overstreet 1539fa5bed37SKent Overstreet bch2_trans_iter_init(trans, &iter, id, pos, 1540fa5bed37SKent Overstreet BTREE_ITER_NOT_EXTENTS| 1541fa5bed37SKent Overstreet BTREE_ITER_ALL_SNAPSHOTS); 1542fa5bed37SKent Overstreet while (1) { 1543fa5bed37SKent Overstreet k = bch2_btree_iter_prev(&iter); 1544fa5bed37SKent Overstreet ret = bkey_err(k); 1545fa5bed37SKent Overstreet if (ret) 1546fa5bed37SKent Overstreet break; 1547fa5bed37SKent Overstreet 1548fa5bed37SKent Overstreet if (!k.k) 1549fa5bed37SKent Overstreet break; 1550fa5bed37SKent Overstreet 1551fa5bed37SKent Overstreet if (!bkey_eq(pos, k.k->p)) 1552fa5bed37SKent Overstreet break; 1553fa5bed37SKent Overstreet 1554fa5bed37SKent Overstreet if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { 1555fa5bed37SKent Overstreet ret = 1; 1556fa5bed37SKent Overstreet break; 1557fa5bed37SKent Overstreet } 1558fa5bed37SKent Overstreet } 1559fa5bed37SKent Overstreet bch2_trans_iter_exit(trans, &iter); 1560fa5bed37SKent Overstreet 1561fa5bed37SKent Overstreet return ret; 1562fa5bed37SKent Overstreet } 1563fa5bed37SKent Overstreet 1564a111901fSKent Overstreet static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id) 1565a111901fSKent Overstreet { 1566a111901fSKent Overstreet const struct snapshot_t *s = snapshot_t(c, id); 1567a111901fSKent Overstreet 1568a111901fSKent Overstreet return s->children[1] ?: s->children[0]; 1569a111901fSKent Overstreet } 1570a111901fSKent Overstreet 1571a111901fSKent Overstreet static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id) 1572a111901fSKent Overstreet { 1573a111901fSKent Overstreet u32 child; 1574a111901fSKent Overstreet 1575a111901fSKent Overstreet while ((child = bch2_snapshot_smallest_child(c, id))) 1576a111901fSKent Overstreet id = child; 1577a111901fSKent Overstreet return id; 1578a111901fSKent Overstreet } 1579a111901fSKent Overstreet 1580a111901fSKent Overstreet static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans, 1581a111901fSKent Overstreet enum btree_id btree, 1582a111901fSKent Overstreet struct bkey_s_c interior_k, 1583a111901fSKent Overstreet u32 leaf_id, struct bpos *new_min_pos) 1584a111901fSKent Overstreet { 1585a111901fSKent Overstreet struct btree_iter iter; 1586a111901fSKent Overstreet struct bpos pos = interior_k.k->p; 1587a111901fSKent Overstreet struct bkey_s_c k; 1588a111901fSKent Overstreet struct bkey_i *new; 1589a111901fSKent Overstreet int ret; 1590a111901fSKent Overstreet 1591a111901fSKent Overstreet pos.snapshot = leaf_id; 1592a111901fSKent Overstreet 1593a111901fSKent Overstreet bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT); 1594a111901fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 1595a111901fSKent Overstreet ret = bkey_err(k); 1596a111901fSKent Overstreet if (ret) 1597a111901fSKent Overstreet goto out; 1598a111901fSKent Overstreet 1599a111901fSKent Overstreet /* key already overwritten in this snapshot? */ 1600a111901fSKent Overstreet if (k.k->p.snapshot != interior_k.k->p.snapshot) 1601a111901fSKent Overstreet goto out; 1602a111901fSKent Overstreet 1603a111901fSKent Overstreet if (bpos_eq(*new_min_pos, POS_MIN)) { 1604a111901fSKent Overstreet *new_min_pos = k.k->p; 1605a111901fSKent Overstreet new_min_pos->snapshot = leaf_id; 1606a111901fSKent Overstreet } 1607a111901fSKent Overstreet 1608a111901fSKent Overstreet new = bch2_bkey_make_mut_noupdate(trans, interior_k); 1609a111901fSKent Overstreet ret = PTR_ERR_OR_ZERO(new); 1610a111901fSKent Overstreet if (ret) 1611a111901fSKent Overstreet goto out; 1612a111901fSKent Overstreet 1613a111901fSKent Overstreet new->k.p.snapshot = leaf_id; 1614a111901fSKent Overstreet ret = bch2_trans_update(trans, &iter, new, 0); 1615a111901fSKent Overstreet out: 1616a111901fSKent Overstreet bch2_trans_iter_exit(trans, &iter); 1617a111901fSKent Overstreet return ret; 1618a111901fSKent Overstreet } 1619a111901fSKent Overstreet 1620a111901fSKent Overstreet int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans, 1621a111901fSKent Overstreet enum btree_id btree, 1622a111901fSKent Overstreet struct bkey_s_c k, 1623a111901fSKent Overstreet struct bpos *new_min_pos) 1624a111901fSKent Overstreet { 1625a111901fSKent Overstreet struct bch_fs *c = trans->c; 1626a111901fSKent Overstreet struct bkey_buf sk; 1627c872afa2SKent Overstreet u32 restart_count = trans->restart_count; 1628d04fdf5cSKent Overstreet int ret = 0; 1629a111901fSKent Overstreet 1630a111901fSKent Overstreet bch2_bkey_buf_init(&sk); 1631a111901fSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 1632a111901fSKent Overstreet k = bkey_i_to_s_c(sk.k); 1633a111901fSKent Overstreet 1634a111901fSKent Overstreet *new_min_pos = POS_MIN; 1635a111901fSKent Overstreet 1636a111901fSKent Overstreet for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot); 1637a111901fSKent Overstreet id < k.k->p.snapshot; 1638a111901fSKent Overstreet id++) { 1639a111901fSKent Overstreet if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) || 1640a111901fSKent Overstreet !bch2_snapshot_is_leaf(c, id)) 1641a111901fSKent Overstreet continue; 1642d281701bSKent Overstreet again: 1643d281701bSKent Overstreet ret = btree_trans_too_many_iters(trans) ?: 1644d281701bSKent Overstreet bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?: 1645d281701bSKent Overstreet bch2_trans_commit(trans, NULL, NULL, 0); 1646d281701bSKent Overstreet if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 1647d281701bSKent Overstreet bch2_trans_begin(trans); 1648d281701bSKent Overstreet goto again; 1649d281701bSKent Overstreet } 1650a111901fSKent Overstreet 1651a111901fSKent Overstreet if (ret) 1652a111901fSKent Overstreet break; 1653a111901fSKent Overstreet } 1654a111901fSKent Overstreet 1655a111901fSKent Overstreet bch2_bkey_buf_exit(&sk, c); 1656c872afa2SKent Overstreet 1657c872afa2SKent Overstreet return ret ?: trans_was_restarted(trans, restart_count); 1658a111901fSKent Overstreet } 1659a111901fSKent Overstreet 1660b0b5bbf9SKent Overstreet static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) 1661b0b5bbf9SKent Overstreet { 1662b0b5bbf9SKent Overstreet struct bch_fs *c = trans->c; 1663b0b5bbf9SKent Overstreet struct bkey_s_c_snapshot snap; 1664b0b5bbf9SKent Overstreet int ret = 0; 1665b0b5bbf9SKent Overstreet 1666b0b5bbf9SKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 1667b0b5bbf9SKent Overstreet return 0; 1668b0b5bbf9SKent Overstreet 1669b0b5bbf9SKent Overstreet snap = bkey_s_c_to_snapshot(k); 1670b0b5bbf9SKent Overstreet if (BCH_SNAPSHOT_DELETED(snap.v) || 1671b0b5bbf9SKent Overstreet bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || 1672b0b5bbf9SKent Overstreet (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { 16733c471b65SKent Overstreet set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 1674b0b5bbf9SKent Overstreet return 0; 1675b0b5bbf9SKent Overstreet } 1676b0b5bbf9SKent Overstreet 1677b0b5bbf9SKent Overstreet return ret; 1678b0b5bbf9SKent Overstreet } 1679b0b5bbf9SKent Overstreet 16808e877caaSKent Overstreet int bch2_snapshots_read(struct bch_fs *c) 16818e877caaSKent Overstreet { 168280eab7a7SKent Overstreet int ret = bch2_trans_run(c, 16835028b907SKent Overstreet for_each_btree_key(trans, iter, BTREE_ID_snapshots, 16848e877caaSKent Overstreet POS_MIN, 0, k, 1685ad00bce0SKent Overstreet __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: 1686b0b5bbf9SKent Overstreet bch2_snapshot_set_equiv(trans, k) ?: 1687b0b5bbf9SKent Overstreet bch2_check_snapshot_needs_deletion(trans, k)) ?: 16885028b907SKent Overstreet for_each_btree_key(trans, iter, BTREE_ID_snapshots, 168966487c54SKent Overstreet POS_MIN, 0, k, 169066487c54SKent Overstreet (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); 16918e877caaSKent Overstreet bch_err_fn(c, ret); 16928e877caaSKent Overstreet return ret; 16938e877caaSKent Overstreet } 16948e877caaSKent Overstreet 16958e877caaSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c) 16968e877caaSKent Overstreet { 1697369acf97SSu Yue kvfree(rcu_dereference_protected(c->snapshots, true)); 16988e877caaSKent Overstreet } 1699