18e877caaSKent Overstreet // SPDX-License-Identifier: GPL-2.0 28e877caaSKent Overstreet 38e877caaSKent Overstreet #include "bcachefs.h" 4a111901fSKent Overstreet #include "bkey_buf.h" 58e877caaSKent Overstreet #include "btree_key_cache.h" 68e877caaSKent Overstreet #include "btree_update.h" 7f55d6e07SKent Overstreet #include "buckets.h" 88e877caaSKent Overstreet #include "errcode.h" 98e877caaSKent Overstreet #include "error.h" 108e877caaSKent Overstreet #include "fs.h" 118e877caaSKent Overstreet #include "snapshot.h" 128e877caaSKent Overstreet 138e877caaSKent Overstreet #include <linux/random.h> 148e877caaSKent Overstreet 158e877caaSKent Overstreet /* 168e877caaSKent Overstreet * Snapshot trees: 178e877caaSKent Overstreet * 188e877caaSKent Overstreet * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they 198e877caaSKent Overstreet * exist to provide a stable identifier for the whole lifetime of a snapshot 208e877caaSKent Overstreet * tree. 218e877caaSKent Overstreet */ 228e877caaSKent Overstreet 238e877caaSKent Overstreet void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, 248e877caaSKent Overstreet struct bkey_s_c k) 258e877caaSKent Overstreet { 268e877caaSKent Overstreet struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); 278e877caaSKent Overstreet 288e877caaSKent Overstreet prt_printf(out, "subvol %u root snapshot %u", 298e877caaSKent Overstreet le32_to_cpu(t.v->master_subvol), 308e877caaSKent Overstreet le32_to_cpu(t.v->root_snapshot)); 318e877caaSKent Overstreet } 328e877caaSKent Overstreet 338e877caaSKent Overstreet int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, 348e877caaSKent Overstreet enum bkey_invalid_flags flags, 358e877caaSKent Overstreet struct printbuf *err) 368e877caaSKent Overstreet { 378e877caaSKent Overstreet if (bkey_gt(k.k->p, POS(0, U32_MAX)) || 388e877caaSKent Overstreet bkey_lt(k.k->p, POS(0, 1))) { 398e877caaSKent Overstreet prt_printf(err, "bad pos"); 408e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 418e877caaSKent Overstreet } 428e877caaSKent Overstreet 438e877caaSKent Overstreet return 0; 448e877caaSKent Overstreet } 458e877caaSKent Overstreet 468e877caaSKent Overstreet int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, 478e877caaSKent Overstreet struct bch_snapshot_tree *s) 488e877caaSKent Overstreet { 498e877caaSKent Overstreet int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), 508e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot_tree, s); 518e877caaSKent Overstreet 528e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 538e877caaSKent Overstreet ret = -BCH_ERR_ENOENT_snapshot_tree; 548e877caaSKent Overstreet return ret; 558e877caaSKent Overstreet } 568e877caaSKent Overstreet 578e877caaSKent Overstreet struct bkey_i_snapshot_tree * 588e877caaSKent Overstreet __bch2_snapshot_tree_create(struct btree_trans *trans) 598e877caaSKent Overstreet { 608e877caaSKent Overstreet struct btree_iter iter; 618e877caaSKent Overstreet int ret = bch2_bkey_get_empty_slot(trans, &iter, 628e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS(0, U32_MAX)); 638e877caaSKent Overstreet struct bkey_i_snapshot_tree *s_t; 648e877caaSKent Overstreet 658e877caaSKent Overstreet if (ret == -BCH_ERR_ENOSPC_btree_slot) 668e877caaSKent Overstreet ret = -BCH_ERR_ENOSPC_snapshot_tree; 678e877caaSKent Overstreet if (ret) 688e877caaSKent Overstreet return ERR_PTR(ret); 698e877caaSKent Overstreet 708e877caaSKent Overstreet s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); 718e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s_t); 728e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 738e877caaSKent Overstreet return ret ? ERR_PTR(ret) : s_t; 748e877caaSKent Overstreet } 758e877caaSKent Overstreet 768e877caaSKent Overstreet static int bch2_snapshot_tree_create(struct btree_trans *trans, 778e877caaSKent Overstreet u32 root_id, u32 subvol_id, u32 *tree_id) 788e877caaSKent Overstreet { 798e877caaSKent Overstreet struct bkey_i_snapshot_tree *n_tree = 808e877caaSKent Overstreet __bch2_snapshot_tree_create(trans); 818e877caaSKent Overstreet 828e877caaSKent Overstreet if (IS_ERR(n_tree)) 838e877caaSKent Overstreet return PTR_ERR(n_tree); 848e877caaSKent Overstreet 858e877caaSKent Overstreet n_tree->v.master_subvol = cpu_to_le32(subvol_id); 868e877caaSKent Overstreet n_tree->v.root_snapshot = cpu_to_le32(root_id); 878e877caaSKent Overstreet *tree_id = n_tree->k.p.offset; 888e877caaSKent Overstreet return 0; 898e877caaSKent Overstreet } 908e877caaSKent Overstreet 918e877caaSKent Overstreet /* Snapshot nodes: */ 928e877caaSKent Overstreet 9366487c54SKent Overstreet static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) 9466487c54SKent Overstreet { 9566487c54SKent Overstreet struct snapshot_table *t; 9666487c54SKent Overstreet 9766487c54SKent Overstreet rcu_read_lock(); 9866487c54SKent Overstreet t = rcu_dereference(c->snapshots); 9966487c54SKent Overstreet 10066487c54SKent Overstreet while (id && id < ancestor) 10166487c54SKent Overstreet id = __snapshot_t(t, id)->parent; 10266487c54SKent Overstreet rcu_read_unlock(); 10366487c54SKent Overstreet 10466487c54SKent Overstreet return id == ancestor; 10566487c54SKent Overstreet } 10666487c54SKent Overstreet 1078e877caaSKent Overstreet static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) 1088e877caaSKent Overstreet { 1098e877caaSKent Overstreet const struct snapshot_t *s = __snapshot_t(t, id); 1108e877caaSKent Overstreet 1118e877caaSKent Overstreet if (s->skip[2] <= ancestor) 1128e877caaSKent Overstreet return s->skip[2]; 1138e877caaSKent Overstreet if (s->skip[1] <= ancestor) 1148e877caaSKent Overstreet return s->skip[1]; 1158e877caaSKent Overstreet if (s->skip[0] <= ancestor) 1168e877caaSKent Overstreet return s->skip[0]; 1178e877caaSKent Overstreet return s->parent; 1188e877caaSKent Overstreet } 1198e877caaSKent Overstreet 1208e877caaSKent Overstreet bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) 1218e877caaSKent Overstreet { 1228e877caaSKent Overstreet struct snapshot_table *t; 1238e877caaSKent Overstreet bool ret; 1248e877caaSKent Overstreet 1258e877caaSKent Overstreet EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); 1268e877caaSKent Overstreet 1278e877caaSKent Overstreet rcu_read_lock(); 1288e877caaSKent Overstreet t = rcu_dereference(c->snapshots); 1298e877caaSKent Overstreet 1308e877caaSKent Overstreet while (id && id < ancestor - IS_ANCESTOR_BITMAP) 1318e877caaSKent Overstreet id = get_ancestor_below(t, id, ancestor); 1328e877caaSKent Overstreet 13366487c54SKent Overstreet if (id && id < ancestor) { 13466487c54SKent Overstreet ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); 13566487c54SKent Overstreet 13666487c54SKent Overstreet EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor)); 13766487c54SKent Overstreet } else { 13866487c54SKent Overstreet ret = id == ancestor; 13966487c54SKent Overstreet } 14066487c54SKent Overstreet 1418e877caaSKent Overstreet rcu_read_unlock(); 1428e877caaSKent Overstreet 1438e877caaSKent Overstreet return ret; 1448e877caaSKent Overstreet } 1458e877caaSKent Overstreet 1468e877caaSKent Overstreet static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) 1478e877caaSKent Overstreet { 1488e877caaSKent Overstreet size_t idx = U32_MAX - id; 1498e877caaSKent Overstreet size_t new_size; 1508e877caaSKent Overstreet struct snapshot_table *new, *old; 1518e877caaSKent Overstreet 1528e877caaSKent Overstreet new_size = max(16UL, roundup_pow_of_two(idx + 1)); 1538e877caaSKent Overstreet 1548e877caaSKent Overstreet new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); 1558e877caaSKent Overstreet if (!new) 1568e877caaSKent Overstreet return NULL; 1578e877caaSKent Overstreet 1588e877caaSKent Overstreet old = rcu_dereference_protected(c->snapshots, true); 1598e877caaSKent Overstreet if (old) 1608e877caaSKent Overstreet memcpy(new->s, 1618e877caaSKent Overstreet rcu_dereference_protected(c->snapshots, true)->s, 1628e877caaSKent Overstreet sizeof(new->s[0]) * c->snapshot_table_size); 1638e877caaSKent Overstreet 1648e877caaSKent Overstreet rcu_assign_pointer(c->snapshots, new); 1658e877caaSKent Overstreet c->snapshot_table_size = new_size; 166d04fdf5cSKent Overstreet kvfree_rcu_mightsleep(old); 1678e877caaSKent Overstreet 1688e877caaSKent Overstreet return &rcu_dereference_protected(c->snapshots, true)->s[idx]; 1698e877caaSKent Overstreet } 1708e877caaSKent Overstreet 1718e877caaSKent Overstreet static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) 1728e877caaSKent Overstreet { 1738e877caaSKent Overstreet size_t idx = U32_MAX - id; 1748e877caaSKent Overstreet 1758e877caaSKent Overstreet lockdep_assert_held(&c->snapshot_table_lock); 1768e877caaSKent Overstreet 1778e877caaSKent Overstreet if (likely(idx < c->snapshot_table_size)) 1788e877caaSKent Overstreet return &rcu_dereference_protected(c->snapshots, true)->s[idx]; 1798e877caaSKent Overstreet 1808e877caaSKent Overstreet return __snapshot_t_mut(c, id); 1818e877caaSKent Overstreet } 1828e877caaSKent Overstreet 1838e877caaSKent Overstreet void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, 1848e877caaSKent Overstreet struct bkey_s_c k) 1858e877caaSKent Overstreet { 1868e877caaSKent Overstreet struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 1878e877caaSKent Overstreet 1888e877caaSKent Overstreet prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", 1898e877caaSKent Overstreet BCH_SNAPSHOT_SUBVOL(s.v), 1908e877caaSKent Overstreet BCH_SNAPSHOT_DELETED(s.v), 1918e877caaSKent Overstreet le32_to_cpu(s.v->parent), 1928e877caaSKent Overstreet le32_to_cpu(s.v->children[0]), 1938e877caaSKent Overstreet le32_to_cpu(s.v->children[1]), 1948e877caaSKent Overstreet le32_to_cpu(s.v->subvol), 1958e877caaSKent Overstreet le32_to_cpu(s.v->tree)); 1968e877caaSKent Overstreet 1978e877caaSKent Overstreet if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) 1988e877caaSKent Overstreet prt_printf(out, " depth %u skiplist %u %u %u", 1998e877caaSKent Overstreet le32_to_cpu(s.v->depth), 2008e877caaSKent Overstreet le32_to_cpu(s.v->skip[0]), 2018e877caaSKent Overstreet le32_to_cpu(s.v->skip[1]), 2028e877caaSKent Overstreet le32_to_cpu(s.v->skip[2])); 2038e877caaSKent Overstreet } 2048e877caaSKent Overstreet 2058e877caaSKent Overstreet int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, 2068e877caaSKent Overstreet enum bkey_invalid_flags flags, 2078e877caaSKent Overstreet struct printbuf *err) 2088e877caaSKent Overstreet { 2098e877caaSKent Overstreet struct bkey_s_c_snapshot s; 2108e877caaSKent Overstreet u32 i, id; 2118e877caaSKent Overstreet 2128e877caaSKent Overstreet if (bkey_gt(k.k->p, POS(0, U32_MAX)) || 2138e877caaSKent Overstreet bkey_lt(k.k->p, POS(0, 1))) { 2148e877caaSKent Overstreet prt_printf(err, "bad pos"); 2158e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2168e877caaSKent Overstreet } 2178e877caaSKent Overstreet 2188e877caaSKent Overstreet s = bkey_s_c_to_snapshot(k); 2198e877caaSKent Overstreet 2208e877caaSKent Overstreet id = le32_to_cpu(s.v->parent); 2218e877caaSKent Overstreet if (id && id <= k.k->p.offset) { 2228e877caaSKent Overstreet prt_printf(err, "bad parent node (%u <= %llu)", 2238e877caaSKent Overstreet id, k.k->p.offset); 2248e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2258e877caaSKent Overstreet } 2268e877caaSKent Overstreet 2278e877caaSKent Overstreet if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { 2288e877caaSKent Overstreet prt_printf(err, "children not normalized"); 2298e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2308e877caaSKent Overstreet } 2318e877caaSKent Overstreet 2328e877caaSKent Overstreet if (s.v->children[0] && 2338e877caaSKent Overstreet s.v->children[0] == s.v->children[1]) { 2348e877caaSKent Overstreet prt_printf(err, "duplicate child nodes"); 2358e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2368e877caaSKent Overstreet } 2378e877caaSKent Overstreet 2388e877caaSKent Overstreet for (i = 0; i < 2; i++) { 2398e877caaSKent Overstreet id = le32_to_cpu(s.v->children[i]); 2408e877caaSKent Overstreet 2418e877caaSKent Overstreet if (id >= k.k->p.offset) { 2428e877caaSKent Overstreet prt_printf(err, "bad child node (%u >= %llu)", 2438e877caaSKent Overstreet id, k.k->p.offset); 2448e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2458e877caaSKent Overstreet } 2468e877caaSKent Overstreet } 2478e877caaSKent Overstreet 2488e877caaSKent Overstreet if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { 2498e877caaSKent Overstreet if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || 2508e877caaSKent Overstreet le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { 2518e877caaSKent Overstreet prt_printf(err, "skiplist not normalized"); 2528e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2538e877caaSKent Overstreet } 2548e877caaSKent Overstreet 2558e877caaSKent Overstreet for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { 2568e877caaSKent Overstreet id = le32_to_cpu(s.v->skip[i]); 2578e877caaSKent Overstreet 258f55d6e07SKent Overstreet if ((id && !s.v->parent) || 259f55d6e07SKent Overstreet (id && id <= k.k->p.offset)) { 260f55d6e07SKent Overstreet prt_printf(err, "bad skiplist node %u", id); 2618e877caaSKent Overstreet return -BCH_ERR_invalid_bkey; 2628e877caaSKent Overstreet } 2638e877caaSKent Overstreet } 2648e877caaSKent Overstreet } 2658e877caaSKent Overstreet 2668e877caaSKent Overstreet return 0; 2678e877caaSKent Overstreet } 2688e877caaSKent Overstreet 26966487c54SKent Overstreet static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) 27066487c54SKent Overstreet { 27166487c54SKent Overstreet struct snapshot_t *t = snapshot_t_mut(c, id); 27266487c54SKent Overstreet u32 parent = id; 27366487c54SKent Overstreet 27466487c54SKent Overstreet while ((parent = bch2_snapshot_parent_early(c, parent)) && 27566487c54SKent Overstreet parent - id - 1 < IS_ANCESTOR_BITMAP) 27666487c54SKent Overstreet __set_bit(parent - id - 1, t->is_ancestor); 27766487c54SKent Overstreet } 27866487c54SKent Overstreet 27966487c54SKent Overstreet static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) 28066487c54SKent Overstreet { 28166487c54SKent Overstreet mutex_lock(&c->snapshot_table_lock); 28266487c54SKent Overstreet __set_is_ancestor_bitmap(c, id); 28366487c54SKent Overstreet mutex_unlock(&c->snapshot_table_lock); 28466487c54SKent Overstreet } 28566487c54SKent Overstreet 2868e877caaSKent Overstreet int bch2_mark_snapshot(struct btree_trans *trans, 2878e877caaSKent Overstreet enum btree_id btree, unsigned level, 2888e877caaSKent Overstreet struct bkey_s_c old, struct bkey_s_c new, 2898e877caaSKent Overstreet unsigned flags) 2908e877caaSKent Overstreet { 2918e877caaSKent Overstreet struct bch_fs *c = trans->c; 2928e877caaSKent Overstreet struct snapshot_t *t; 2938e877caaSKent Overstreet u32 id = new.k->p.offset; 2948e877caaSKent Overstreet int ret = 0; 2958e877caaSKent Overstreet 2968e877caaSKent Overstreet mutex_lock(&c->snapshot_table_lock); 2978e877caaSKent Overstreet 2988e877caaSKent Overstreet t = snapshot_t_mut(c, id); 2998e877caaSKent Overstreet if (!t) { 3008e877caaSKent Overstreet ret = -BCH_ERR_ENOMEM_mark_snapshot; 3018e877caaSKent Overstreet goto err; 3028e877caaSKent Overstreet } 3038e877caaSKent Overstreet 3048e877caaSKent Overstreet if (new.k->type == KEY_TYPE_snapshot) { 3058e877caaSKent Overstreet struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); 3068e877caaSKent Overstreet 3078e877caaSKent Overstreet t->parent = le32_to_cpu(s.v->parent); 3088e877caaSKent Overstreet t->children[0] = le32_to_cpu(s.v->children[0]); 3098e877caaSKent Overstreet t->children[1] = le32_to_cpu(s.v->children[1]); 3108e877caaSKent Overstreet t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; 3118e877caaSKent Overstreet t->tree = le32_to_cpu(s.v->tree); 3128e877caaSKent Overstreet 3138e877caaSKent Overstreet if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { 3148e877caaSKent Overstreet t->depth = le32_to_cpu(s.v->depth); 3158e877caaSKent Overstreet t->skip[0] = le32_to_cpu(s.v->skip[0]); 3168e877caaSKent Overstreet t->skip[1] = le32_to_cpu(s.v->skip[1]); 3178e877caaSKent Overstreet t->skip[2] = le32_to_cpu(s.v->skip[2]); 3188e877caaSKent Overstreet } else { 3198e877caaSKent Overstreet t->depth = 0; 3208e877caaSKent Overstreet t->skip[0] = 0; 3218e877caaSKent Overstreet t->skip[1] = 0; 3228e877caaSKent Overstreet t->skip[2] = 0; 3238e877caaSKent Overstreet } 3248e877caaSKent Overstreet 32566487c54SKent Overstreet __set_is_ancestor_bitmap(c, id); 3268e877caaSKent Overstreet 3278e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(s.v)) { 328*b0b5bbf9SKent Overstreet set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); 329*b0b5bbf9SKent Overstreet if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) 330*b0b5bbf9SKent Overstreet bch2_delete_dead_snapshots_async(c); 3318e877caaSKent Overstreet } 3328e877caaSKent Overstreet } else { 3338e877caaSKent Overstreet memset(t, 0, sizeof(*t)); 3348e877caaSKent Overstreet } 3358e877caaSKent Overstreet err: 3368e877caaSKent Overstreet mutex_unlock(&c->snapshot_table_lock); 3378e877caaSKent Overstreet return ret; 3388e877caaSKent Overstreet } 3398e877caaSKent Overstreet 3408e877caaSKent Overstreet int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, 3418e877caaSKent Overstreet struct bch_snapshot *s) 3428e877caaSKent Overstreet { 3438e877caaSKent Overstreet return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), 3448e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot, s); 3458e877caaSKent Overstreet } 3468e877caaSKent Overstreet 347eebe8a84SKent Overstreet static int bch2_snapshot_live(struct btree_trans *trans, u32 id) 3488e877caaSKent Overstreet { 3498e877caaSKent Overstreet struct bch_snapshot v; 3508e877caaSKent Overstreet int ret; 3518e877caaSKent Overstreet 3528e877caaSKent Overstreet if (!id) 3538e877caaSKent Overstreet return 0; 3548e877caaSKent Overstreet 3558e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 3568e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 3578e877caaSKent Overstreet bch_err(trans->c, "snapshot node %u not found", id); 3588e877caaSKent Overstreet if (ret) 3598e877caaSKent Overstreet return ret; 3608e877caaSKent Overstreet 3618e877caaSKent Overstreet return !BCH_SNAPSHOT_DELETED(&v); 3628e877caaSKent Overstreet } 3638e877caaSKent Overstreet 3648e877caaSKent Overstreet /* 3658e877caaSKent Overstreet * If @k is a snapshot with just one live child, it's part of a linear chain, 3668e877caaSKent Overstreet * which we consider to be an equivalence class: and then after snapshot 3678e877caaSKent Overstreet * deletion cleanup, there should only be a single key at a given position in 3688e877caaSKent Overstreet * this equivalence class. 3698e877caaSKent Overstreet * 3708e877caaSKent Overstreet * This sets the equivalence class of @k to be the child's equivalence class, if 3718e877caaSKent Overstreet * it's part of such a linear chain: this correctly sets equivalence classes on 3728e877caaSKent Overstreet * startup if we run leaf to root (i.e. in natural key order). 3738e877caaSKent Overstreet */ 374eebe8a84SKent Overstreet static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) 3758e877caaSKent Overstreet { 3768e877caaSKent Overstreet struct bch_fs *c = trans->c; 3778e877caaSKent Overstreet unsigned i, nr_live = 0, live_idx = 0; 3788e877caaSKent Overstreet struct bkey_s_c_snapshot snap; 3798e877caaSKent Overstreet u32 id = k.k->p.offset, child[2]; 3808e877caaSKent Overstreet 3818e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 3828e877caaSKent Overstreet return 0; 3838e877caaSKent Overstreet 3848e877caaSKent Overstreet snap = bkey_s_c_to_snapshot(k); 3858e877caaSKent Overstreet 3868e877caaSKent Overstreet child[0] = le32_to_cpu(snap.v->children[0]); 3878e877caaSKent Overstreet child[1] = le32_to_cpu(snap.v->children[1]); 3888e877caaSKent Overstreet 3898e877caaSKent Overstreet for (i = 0; i < 2; i++) { 3908e877caaSKent Overstreet int ret = bch2_snapshot_live(trans, child[i]); 3918e877caaSKent Overstreet 3928e877caaSKent Overstreet if (ret < 0) 3938e877caaSKent Overstreet return ret; 3948e877caaSKent Overstreet 3958e877caaSKent Overstreet if (ret) 3968e877caaSKent Overstreet live_idx = i; 3978e877caaSKent Overstreet nr_live += ret; 3988e877caaSKent Overstreet } 3998e877caaSKent Overstreet 4008e877caaSKent Overstreet mutex_lock(&c->snapshot_table_lock); 4018e877caaSKent Overstreet 4028e877caaSKent Overstreet snapshot_t_mut(c, id)->equiv = nr_live == 1 4038e877caaSKent Overstreet ? snapshot_t_mut(c, child[live_idx])->equiv 4048e877caaSKent Overstreet : id; 4058e877caaSKent Overstreet 4068e877caaSKent Overstreet mutex_unlock(&c->snapshot_table_lock); 4078e877caaSKent Overstreet 4088e877caaSKent Overstreet return 0; 4098e877caaSKent Overstreet } 4108e877caaSKent Overstreet 4118e877caaSKent Overstreet /* fsck: */ 4128e877caaSKent Overstreet 4138e877caaSKent Overstreet static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) 4148e877caaSKent Overstreet { 4158e877caaSKent Overstreet return snapshot_t(c, id)->children[child]; 4168e877caaSKent Overstreet } 4178e877caaSKent Overstreet 4188e877caaSKent Overstreet static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) 4198e877caaSKent Overstreet { 4208e877caaSKent Overstreet return bch2_snapshot_child(c, id, 0); 4218e877caaSKent Overstreet } 4228e877caaSKent Overstreet 4238e877caaSKent Overstreet static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) 4248e877caaSKent Overstreet { 4258e877caaSKent Overstreet return bch2_snapshot_child(c, id, 1); 4268e877caaSKent Overstreet } 4278e877caaSKent Overstreet 4288e877caaSKent Overstreet static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) 4298e877caaSKent Overstreet { 4308e877caaSKent Overstreet u32 n, parent; 4318e877caaSKent Overstreet 4328e877caaSKent Overstreet n = bch2_snapshot_left_child(c, id); 4338e877caaSKent Overstreet if (n) 4348e877caaSKent Overstreet return n; 4358e877caaSKent Overstreet 4368e877caaSKent Overstreet while ((parent = bch2_snapshot_parent(c, id))) { 4378e877caaSKent Overstreet n = bch2_snapshot_right_child(c, parent); 4388e877caaSKent Overstreet if (n && n != id) 4398e877caaSKent Overstreet return n; 4408e877caaSKent Overstreet id = parent; 4418e877caaSKent Overstreet } 4428e877caaSKent Overstreet 4438e877caaSKent Overstreet return 0; 4448e877caaSKent Overstreet } 4458e877caaSKent Overstreet 4468e877caaSKent Overstreet static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) 4478e877caaSKent Overstreet { 4488e877caaSKent Overstreet u32 id = snapshot_root; 4498e877caaSKent Overstreet u32 subvol = 0, s; 4508e877caaSKent Overstreet 4518e877caaSKent Overstreet while (id) { 4528e877caaSKent Overstreet s = snapshot_t(c, id)->subvol; 4538e877caaSKent Overstreet 4548e877caaSKent Overstreet if (s && (!subvol || s < subvol)) 4558e877caaSKent Overstreet subvol = s; 4568e877caaSKent Overstreet 4578e877caaSKent Overstreet id = bch2_snapshot_tree_next(c, id); 4588e877caaSKent Overstreet } 4598e877caaSKent Overstreet 4608e877caaSKent Overstreet return subvol; 4618e877caaSKent Overstreet } 4628e877caaSKent Overstreet 4638e877caaSKent Overstreet static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, 4648e877caaSKent Overstreet u32 snapshot_root, u32 *subvol_id) 4658e877caaSKent Overstreet { 4668e877caaSKent Overstreet struct bch_fs *c = trans->c; 4678e877caaSKent Overstreet struct btree_iter iter; 4688e877caaSKent Overstreet struct bkey_s_c k; 4698e877caaSKent Overstreet struct bkey_s_c_subvolume s; 4708e877caaSKent Overstreet bool found = false; 4718e877caaSKent Overstreet int ret; 4728e877caaSKent Overstreet 4738e877caaSKent Overstreet for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 4748e877caaSKent Overstreet 0, k, ret) { 4758e877caaSKent Overstreet if (k.k->type != KEY_TYPE_subvolume) 4768e877caaSKent Overstreet continue; 4778e877caaSKent Overstreet 4788e877caaSKent Overstreet s = bkey_s_c_to_subvolume(k); 4798e877caaSKent Overstreet if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) 4808e877caaSKent Overstreet continue; 4818e877caaSKent Overstreet if (!BCH_SUBVOLUME_SNAP(s.v)) { 4828e877caaSKent Overstreet *subvol_id = s.k->p.offset; 4838e877caaSKent Overstreet found = true; 4848e877caaSKent Overstreet break; 4858e877caaSKent Overstreet } 4868e877caaSKent Overstreet } 4878e877caaSKent Overstreet 4888e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 4898e877caaSKent Overstreet 4908e877caaSKent Overstreet if (!ret && !found) { 49196dea3d5SKent Overstreet struct bkey_i_subvolume *u; 4928e877caaSKent Overstreet 4938e877caaSKent Overstreet *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); 4948e877caaSKent Overstreet 49596dea3d5SKent Overstreet u = bch2_bkey_get_mut_typed(trans, &iter, 4968e877caaSKent Overstreet BTREE_ID_subvolumes, POS(0, *subvol_id), 4978e877caaSKent Overstreet 0, subvolume); 49896dea3d5SKent Overstreet ret = PTR_ERR_OR_ZERO(u); 4998e877caaSKent Overstreet if (ret) 5008e877caaSKent Overstreet return ret; 5018e877caaSKent Overstreet 50296dea3d5SKent Overstreet SET_BCH_SUBVOLUME_SNAP(&u->v, false); 5038e877caaSKent Overstreet } 5048e877caaSKent Overstreet 5058e877caaSKent Overstreet return ret; 5068e877caaSKent Overstreet } 5078e877caaSKent Overstreet 5088e877caaSKent Overstreet static int check_snapshot_tree(struct btree_trans *trans, 5098e877caaSKent Overstreet struct btree_iter *iter, 5108e877caaSKent Overstreet struct bkey_s_c k) 5118e877caaSKent Overstreet { 5128e877caaSKent Overstreet struct bch_fs *c = trans->c; 5138e877caaSKent Overstreet struct bkey_s_c_snapshot_tree st; 5148e877caaSKent Overstreet struct bch_snapshot s; 5158e877caaSKent Overstreet struct bch_subvolume subvol; 5168e877caaSKent Overstreet struct printbuf buf = PRINTBUF; 5178e877caaSKent Overstreet u32 root_id; 5188e877caaSKent Overstreet int ret; 5198e877caaSKent Overstreet 5208e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot_tree) 5218e877caaSKent Overstreet return 0; 5228e877caaSKent Overstreet 5238e877caaSKent Overstreet st = bkey_s_c_to_snapshot_tree(k); 5248e877caaSKent Overstreet root_id = le32_to_cpu(st.v->root_snapshot); 5258e877caaSKent Overstreet 5268e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, root_id, &s); 5278e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 5288e877caaSKent Overstreet goto err; 5298e877caaSKent Overstreet 5308e877caaSKent Overstreet if (fsck_err_on(ret || 5318e877caaSKent Overstreet root_id != bch2_snapshot_root(c, root_id) || 5328e877caaSKent Overstreet st.k->p.offset != le32_to_cpu(s.tree), 5338e877caaSKent Overstreet c, 5348e877caaSKent Overstreet "snapshot tree points to missing/incorrect snapshot:\n %s", 5358e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 5368e877caaSKent Overstreet ret = bch2_btree_delete_at(trans, iter, 0); 5378e877caaSKent Overstreet goto err; 5388e877caaSKent Overstreet } 5398e877caaSKent Overstreet 5408e877caaSKent Overstreet ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), 5418e877caaSKent Overstreet false, 0, &subvol); 5428e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 5438e877caaSKent Overstreet goto err; 5448e877caaSKent Overstreet 5458e877caaSKent Overstreet if (fsck_err_on(ret, c, 5468e877caaSKent Overstreet "snapshot tree points to missing subvolume:\n %s", 5478e877caaSKent Overstreet (printbuf_reset(&buf), 5488e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 5498e877caaSKent Overstreet fsck_err_on(!bch2_snapshot_is_ancestor_early(c, 5508e877caaSKent Overstreet le32_to_cpu(subvol.snapshot), 5518e877caaSKent Overstreet root_id), c, 5528e877caaSKent Overstreet "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", 5538e877caaSKent Overstreet (printbuf_reset(&buf), 5548e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 5558e877caaSKent Overstreet fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c, 5568e877caaSKent Overstreet "snapshot tree points to snapshot subvolume:\n %s", 5578e877caaSKent Overstreet (printbuf_reset(&buf), 5588e877caaSKent Overstreet bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 5598e877caaSKent Overstreet struct bkey_i_snapshot_tree *u; 5608e877caaSKent Overstreet u32 subvol_id; 5618e877caaSKent Overstreet 5628e877caaSKent Overstreet ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); 5638e877caaSKent Overstreet if (ret) 5648e877caaSKent Overstreet goto err; 5658e877caaSKent Overstreet 5668e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); 5678e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 5688e877caaSKent Overstreet if (ret) 5698e877caaSKent Overstreet goto err; 5708e877caaSKent Overstreet 5718e877caaSKent Overstreet u->v.master_subvol = cpu_to_le32(subvol_id); 5728e877caaSKent Overstreet st = snapshot_tree_i_to_s_c(u); 5738e877caaSKent Overstreet } 5748e877caaSKent Overstreet err: 5758e877caaSKent Overstreet fsck_err: 5768e877caaSKent Overstreet printbuf_exit(&buf); 5778e877caaSKent Overstreet return ret; 5788e877caaSKent Overstreet } 5798e877caaSKent Overstreet 5808e877caaSKent Overstreet /* 5818e877caaSKent Overstreet * For each snapshot_tree, make sure it points to the root of a snapshot tree 5828e877caaSKent Overstreet * and that snapshot entry points back to it, or delete it. 5838e877caaSKent Overstreet * 5848e877caaSKent Overstreet * And, make sure it points to a subvolume within that snapshot tree, or correct 5858e877caaSKent Overstreet * it to point to the oldest subvolume within that snapshot tree. 5868e877caaSKent Overstreet */ 5878e877caaSKent Overstreet int bch2_check_snapshot_trees(struct bch_fs *c) 5888e877caaSKent Overstreet { 5898e877caaSKent Overstreet struct btree_iter iter; 5908e877caaSKent Overstreet struct bkey_s_c k; 5918e877caaSKent Overstreet int ret; 5928e877caaSKent Overstreet 5938e877caaSKent Overstreet ret = bch2_trans_run(c, 5946bd68ec2SKent Overstreet for_each_btree_key_commit(trans, iter, 5958e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS_MIN, 5968e877caaSKent Overstreet BTREE_ITER_PREFETCH, k, 5978e877caaSKent Overstreet NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, 5986bd68ec2SKent Overstreet check_snapshot_tree(trans, &iter, k))); 5998e877caaSKent Overstreet 6008e877caaSKent Overstreet if (ret) 6018e877caaSKent Overstreet bch_err(c, "error %i checking snapshot trees", ret); 6028e877caaSKent Overstreet return ret; 6038e877caaSKent Overstreet } 6048e877caaSKent Overstreet 6058e877caaSKent Overstreet /* 6068e877caaSKent Overstreet * Look up snapshot tree for @tree_id and find root, 6078e877caaSKent Overstreet * make sure @snap_id is a descendent: 6088e877caaSKent Overstreet */ 6098e877caaSKent Overstreet static int snapshot_tree_ptr_good(struct btree_trans *trans, 6108e877caaSKent Overstreet u32 snap_id, u32 tree_id) 6118e877caaSKent Overstreet { 6128e877caaSKent Overstreet struct bch_snapshot_tree s_t; 6138e877caaSKent Overstreet int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 6148e877caaSKent Overstreet 6158e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 6168e877caaSKent Overstreet return 0; 6178e877caaSKent Overstreet if (ret) 6188e877caaSKent Overstreet return ret; 6198e877caaSKent Overstreet 6208e877caaSKent Overstreet return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); 6218e877caaSKent Overstreet } 6228e877caaSKent Overstreet 6238e877caaSKent Overstreet u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) 6248e877caaSKent Overstreet { 6258e877caaSKent Overstreet const struct snapshot_t *s; 6268e877caaSKent Overstreet 6278e877caaSKent Overstreet if (!id) 6288e877caaSKent Overstreet return 0; 6298e877caaSKent Overstreet 6308e877caaSKent Overstreet rcu_read_lock(); 6318e877caaSKent Overstreet s = snapshot_t(c, id); 6328e877caaSKent Overstreet if (s->parent) 6338e877caaSKent Overstreet id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); 6348e877caaSKent Overstreet rcu_read_unlock(); 6358e877caaSKent Overstreet 6368e877caaSKent Overstreet return id; 6378e877caaSKent Overstreet } 6388e877caaSKent Overstreet 639097d4cc8SKent Overstreet static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) 6408e877caaSKent Overstreet { 6418e877caaSKent Overstreet unsigned i; 6428e877caaSKent Overstreet 643097d4cc8SKent Overstreet for (i = 0; i < 3; i++) 644097d4cc8SKent Overstreet if (!s.parent) { 645097d4cc8SKent Overstreet if (s.skip[i]) 6468e877caaSKent Overstreet return false; 647097d4cc8SKent Overstreet } else { 648097d4cc8SKent Overstreet if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) 6498e877caaSKent Overstreet return false; 6508e877caaSKent Overstreet } 6518e877caaSKent Overstreet 6528e877caaSKent Overstreet return true; 6538e877caaSKent Overstreet } 6548e877caaSKent Overstreet 6558e877caaSKent Overstreet /* 6568e877caaSKent Overstreet * snapshot_tree pointer was incorrect: look up root snapshot node, make sure 6578e877caaSKent Overstreet * its snapshot_tree pointer is correct (allocate new one if necessary), then 6588e877caaSKent Overstreet * update this node's pointer to root node's pointer: 6598e877caaSKent Overstreet */ 6608e877caaSKent Overstreet static int snapshot_tree_ptr_repair(struct btree_trans *trans, 6618e877caaSKent Overstreet struct btree_iter *iter, 6628e877caaSKent Overstreet struct bkey_s_c k, 6638e877caaSKent Overstreet struct bch_snapshot *s) 6648e877caaSKent Overstreet { 6658e877caaSKent Overstreet struct bch_fs *c = trans->c; 6668e877caaSKent Overstreet struct btree_iter root_iter; 6678e877caaSKent Overstreet struct bch_snapshot_tree s_t; 6688e877caaSKent Overstreet struct bkey_s_c_snapshot root; 6698e877caaSKent Overstreet struct bkey_i_snapshot *u; 6708e877caaSKent Overstreet u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; 6718e877caaSKent Overstreet int ret; 6728e877caaSKent Overstreet 6738e877caaSKent Overstreet root = bch2_bkey_get_iter_typed(trans, &root_iter, 6748e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, root_id), 6758e877caaSKent Overstreet BTREE_ITER_WITH_UPDATES, snapshot); 6768e877caaSKent Overstreet ret = bkey_err(root); 6778e877caaSKent Overstreet if (ret) 6788e877caaSKent Overstreet goto err; 6798e877caaSKent Overstreet 6808e877caaSKent Overstreet tree_id = le32_to_cpu(root.v->tree); 6818e877caaSKent Overstreet 6828e877caaSKent Overstreet ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 6838e877caaSKent Overstreet if (ret && !bch2_err_matches(ret, ENOENT)) 6848e877caaSKent Overstreet return ret; 6858e877caaSKent Overstreet 6868e877caaSKent Overstreet if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { 6878e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); 6888e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u) ?: 6898e877caaSKent Overstreet bch2_snapshot_tree_create(trans, root_id, 6908e877caaSKent Overstreet bch2_snapshot_tree_oldest_subvol(c, root_id), 6918e877caaSKent Overstreet &tree_id); 6928e877caaSKent Overstreet if (ret) 6938e877caaSKent Overstreet goto err; 6948e877caaSKent Overstreet 6958e877caaSKent Overstreet u->v.tree = cpu_to_le32(tree_id); 6968e877caaSKent Overstreet if (k.k->p.offset == root_id) 6978e877caaSKent Overstreet *s = u->v; 6988e877caaSKent Overstreet } 6998e877caaSKent Overstreet 7008e877caaSKent Overstreet if (k.k->p.offset != root_id) { 7018e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 7028e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 7038e877caaSKent Overstreet if (ret) 7048e877caaSKent Overstreet goto err; 7058e877caaSKent Overstreet 7068e877caaSKent Overstreet u->v.tree = cpu_to_le32(tree_id); 7078e877caaSKent Overstreet *s = u->v; 7088e877caaSKent Overstreet } 7098e877caaSKent Overstreet err: 7108e877caaSKent Overstreet bch2_trans_iter_exit(trans, &root_iter); 7118e877caaSKent Overstreet return ret; 7128e877caaSKent Overstreet } 7138e877caaSKent Overstreet 7148e877caaSKent Overstreet static int check_snapshot(struct btree_trans *trans, 7158e877caaSKent Overstreet struct btree_iter *iter, 7168e877caaSKent Overstreet struct bkey_s_c k) 7178e877caaSKent Overstreet { 7188e877caaSKent Overstreet struct bch_fs *c = trans->c; 7198e877caaSKent Overstreet struct bch_snapshot s; 7208e877caaSKent Overstreet struct bch_subvolume subvol; 7218e877caaSKent Overstreet struct bch_snapshot v; 7228e877caaSKent Overstreet struct bkey_i_snapshot *u; 7238e877caaSKent Overstreet u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); 7248e877caaSKent Overstreet u32 real_depth; 7258e877caaSKent Overstreet struct printbuf buf = PRINTBUF; 7268e877caaSKent Overstreet bool should_have_subvol; 7278e877caaSKent Overstreet u32 i, id; 7288e877caaSKent Overstreet int ret = 0; 7298e877caaSKent Overstreet 7308e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 7318e877caaSKent Overstreet return 0; 7328e877caaSKent Overstreet 7338e877caaSKent Overstreet memset(&s, 0, sizeof(s)); 7348e877caaSKent Overstreet memcpy(&s, k.v, bkey_val_bytes(k.k)); 7358e877caaSKent Overstreet 7368e877caaSKent Overstreet id = le32_to_cpu(s.parent); 7378e877caaSKent Overstreet if (id) { 7388e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 7398e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7408e877caaSKent Overstreet bch_err(c, "snapshot with nonexistent parent:\n %s", 7418e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 7428e877caaSKent Overstreet if (ret) 7438e877caaSKent Overstreet goto err; 7448e877caaSKent Overstreet 7458e877caaSKent Overstreet if (le32_to_cpu(v.children[0]) != k.k->p.offset && 7468e877caaSKent Overstreet le32_to_cpu(v.children[1]) != k.k->p.offset) { 7478e877caaSKent Overstreet bch_err(c, "snapshot parent %u missing pointer to child %llu", 7488e877caaSKent Overstreet id, k.k->p.offset); 7498e877caaSKent Overstreet ret = -EINVAL; 7508e877caaSKent Overstreet goto err; 7518e877caaSKent Overstreet } 7528e877caaSKent Overstreet } 7538e877caaSKent Overstreet 7548e877caaSKent Overstreet for (i = 0; i < 2 && s.children[i]; i++) { 7558e877caaSKent Overstreet id = le32_to_cpu(s.children[i]); 7568e877caaSKent Overstreet 7578e877caaSKent Overstreet ret = bch2_snapshot_lookup(trans, id, &v); 7588e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7598e877caaSKent Overstreet bch_err(c, "snapshot node %llu has nonexistent child %u", 7608e877caaSKent Overstreet k.k->p.offset, id); 7618e877caaSKent Overstreet if (ret) 7628e877caaSKent Overstreet goto err; 7638e877caaSKent Overstreet 7648e877caaSKent Overstreet if (le32_to_cpu(v.parent) != k.k->p.offset) { 7658e877caaSKent Overstreet bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", 7668e877caaSKent Overstreet id, le32_to_cpu(v.parent), k.k->p.offset); 7678e877caaSKent Overstreet ret = -EINVAL; 7688e877caaSKent Overstreet goto err; 7698e877caaSKent Overstreet } 7708e877caaSKent Overstreet } 7718e877caaSKent Overstreet 7728e877caaSKent Overstreet should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && 7738e877caaSKent Overstreet !BCH_SNAPSHOT_DELETED(&s); 7748e877caaSKent Overstreet 7758e877caaSKent Overstreet if (should_have_subvol) { 7768e877caaSKent Overstreet id = le32_to_cpu(s.subvol); 7778e877caaSKent Overstreet ret = bch2_subvolume_get(trans, id, 0, false, &subvol); 7788e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 7798e877caaSKent Overstreet bch_err(c, "snapshot points to nonexistent subvolume:\n %s", 7808e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 7818e877caaSKent Overstreet if (ret) 7828e877caaSKent Overstreet goto err; 7838e877caaSKent Overstreet 7848e877caaSKent Overstreet if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { 7858e877caaSKent Overstreet bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", 7868e877caaSKent Overstreet k.k->p.offset); 7878e877caaSKent Overstreet ret = -EINVAL; 7888e877caaSKent Overstreet goto err; 7898e877caaSKent Overstreet } 7908e877caaSKent Overstreet } else { 7918e877caaSKent Overstreet if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", 7928e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 7938e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 7948e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 7958e877caaSKent Overstreet if (ret) 7968e877caaSKent Overstreet goto err; 7978e877caaSKent Overstreet 7988e877caaSKent Overstreet u->v.subvol = 0; 7998e877caaSKent Overstreet s = u->v; 8008e877caaSKent Overstreet } 8018e877caaSKent Overstreet } 8028e877caaSKent Overstreet 8038e877caaSKent Overstreet ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); 8048e877caaSKent Overstreet if (ret < 0) 8058e877caaSKent Overstreet goto err; 8068e877caaSKent Overstreet 8078e877caaSKent Overstreet if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", 8088e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 8098e877caaSKent Overstreet ret = snapshot_tree_ptr_repair(trans, iter, k, &s); 8108e877caaSKent Overstreet if (ret) 8118e877caaSKent Overstreet goto err; 8128e877caaSKent Overstreet } 8138e877caaSKent Overstreet ret = 0; 8148e877caaSKent Overstreet 8158e877caaSKent Overstreet real_depth = bch2_snapshot_depth(c, parent_id); 8168e877caaSKent Overstreet 8178e877caaSKent Overstreet if (le32_to_cpu(s.depth) != real_depth && 8188e877caaSKent Overstreet (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || 8198e877caaSKent Overstreet fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", 8208e877caaSKent Overstreet real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { 8218e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 8228e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 8238e877caaSKent Overstreet if (ret) 8248e877caaSKent Overstreet goto err; 8258e877caaSKent Overstreet 8268e877caaSKent Overstreet u->v.depth = cpu_to_le32(real_depth); 8278e877caaSKent Overstreet s = u->v; 8288e877caaSKent Overstreet } 8298e877caaSKent Overstreet 830097d4cc8SKent Overstreet ret = snapshot_skiplist_good(trans, k.k->p.offset, s); 8318e877caaSKent Overstreet if (ret < 0) 8328e877caaSKent Overstreet goto err; 8338e877caaSKent Overstreet 8348e877caaSKent Overstreet if (!ret && 8358e877caaSKent Overstreet (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || 8368e877caaSKent Overstreet fsck_err(c, "snapshot with bad skiplist field:\n %s", 8378e877caaSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { 8388e877caaSKent Overstreet u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 8398e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(u); 8408e877caaSKent Overstreet if (ret) 8418e877caaSKent Overstreet goto err; 8428e877caaSKent Overstreet 8438e877caaSKent Overstreet for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) 8448e877caaSKent Overstreet u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); 8458e877caaSKent Overstreet 8468e877caaSKent Overstreet bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); 8478e877caaSKent Overstreet s = u->v; 8488e877caaSKent Overstreet } 8498e877caaSKent Overstreet ret = 0; 8508e877caaSKent Overstreet err: 8518e877caaSKent Overstreet fsck_err: 8528e877caaSKent Overstreet printbuf_exit(&buf); 8538e877caaSKent Overstreet return ret; 8548e877caaSKent Overstreet } 8558e877caaSKent Overstreet 8568e877caaSKent Overstreet int bch2_check_snapshots(struct bch_fs *c) 8578e877caaSKent Overstreet { 8588e877caaSKent Overstreet struct btree_iter iter; 8598e877caaSKent Overstreet struct bkey_s_c k; 8608e877caaSKent Overstreet int ret; 8618e877caaSKent Overstreet 8628e877caaSKent Overstreet /* 8638e877caaSKent Overstreet * We iterate backwards as checking/fixing the depth field requires that 8648e877caaSKent Overstreet * the parent's depth already be correct: 8658e877caaSKent Overstreet */ 8668e877caaSKent Overstreet ret = bch2_trans_run(c, 8676bd68ec2SKent Overstreet for_each_btree_key_reverse_commit(trans, iter, 8688e877caaSKent Overstreet BTREE_ID_snapshots, POS_MAX, 8698e877caaSKent Overstreet BTREE_ITER_PREFETCH, k, 8708e877caaSKent Overstreet NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, 8716bd68ec2SKent Overstreet check_snapshot(trans, &iter, k))); 8728e877caaSKent Overstreet if (ret) 8738e877caaSKent Overstreet bch_err_fn(c, ret); 8748e877caaSKent Overstreet return ret; 8758e877caaSKent Overstreet } 8768e877caaSKent Overstreet 8778e877caaSKent Overstreet /* 8788e877caaSKent Overstreet * Mark a snapshot as deleted, for future cleanup: 8798e877caaSKent Overstreet */ 8808e877caaSKent Overstreet int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) 8818e877caaSKent Overstreet { 8828e877caaSKent Overstreet struct btree_iter iter; 8838e877caaSKent Overstreet struct bkey_i_snapshot *s; 8848e877caaSKent Overstreet int ret = 0; 8858e877caaSKent Overstreet 8868e877caaSKent Overstreet s = bch2_bkey_get_mut_typed(trans, &iter, 8878e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, id), 8888e877caaSKent Overstreet 0, snapshot); 8898e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s); 8908e877caaSKent Overstreet if (unlikely(ret)) { 8918e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), 8928e877caaSKent Overstreet trans->c, "missing snapshot %u", id); 8938e877caaSKent Overstreet return ret; 8948e877caaSKent Overstreet } 8958e877caaSKent Overstreet 8968e877caaSKent Overstreet /* already deleted? */ 8978e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(&s->v)) 8988e877caaSKent Overstreet goto err; 8998e877caaSKent Overstreet 9008e877caaSKent Overstreet SET_BCH_SNAPSHOT_DELETED(&s->v, true); 9018e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); 9028e877caaSKent Overstreet s->v.subvol = 0; 9038e877caaSKent Overstreet err: 9048e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 9058e877caaSKent Overstreet return ret; 9068e877caaSKent Overstreet } 9078e877caaSKent Overstreet 908f55d6e07SKent Overstreet static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) 909f55d6e07SKent Overstreet { 910f55d6e07SKent Overstreet if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) 911f55d6e07SKent Overstreet swap(s->children[0], s->children[1]); 912f55d6e07SKent Overstreet } 913f55d6e07SKent Overstreet 91496dea3d5SKent Overstreet static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) 9158e877caaSKent Overstreet { 9168e877caaSKent Overstreet struct bch_fs *c = trans->c; 9178e877caaSKent Overstreet struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; 918f55d6e07SKent Overstreet struct btree_iter c_iter = (struct btree_iter) { NULL }; 9198e877caaSKent Overstreet struct btree_iter tree_iter = (struct btree_iter) { NULL }; 9208e877caaSKent Overstreet struct bkey_s_c_snapshot s; 921f55d6e07SKent Overstreet u32 parent_id, child_id; 9228e877caaSKent Overstreet unsigned i; 9238e877caaSKent Overstreet int ret = 0; 9248e877caaSKent Overstreet 9258e877caaSKent Overstreet s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), 9268e877caaSKent Overstreet BTREE_ITER_INTENT, snapshot); 9278e877caaSKent Overstreet ret = bkey_err(s); 9288e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 9298e877caaSKent Overstreet "missing snapshot %u", id); 9308e877caaSKent Overstreet 9318e877caaSKent Overstreet if (ret) 9328e877caaSKent Overstreet goto err; 9338e877caaSKent Overstreet 934f55d6e07SKent Overstreet BUG_ON(s.v->children[1]); 935f55d6e07SKent Overstreet 9368e877caaSKent Overstreet parent_id = le32_to_cpu(s.v->parent); 937f55d6e07SKent Overstreet child_id = le32_to_cpu(s.v->children[0]); 9388e877caaSKent Overstreet 9398e877caaSKent Overstreet if (parent_id) { 9408e877caaSKent Overstreet struct bkey_i_snapshot *parent; 9418e877caaSKent Overstreet 9428e877caaSKent Overstreet parent = bch2_bkey_get_mut_typed(trans, &p_iter, 9438e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, parent_id), 9448e877caaSKent Overstreet 0, snapshot); 9458e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(parent); 9468e877caaSKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 9478e877caaSKent Overstreet "missing snapshot %u", parent_id); 948f55d6e07SKent Overstreet if (unlikely(ret)) 9498e877caaSKent Overstreet goto err; 9508e877caaSKent Overstreet 951f55d6e07SKent Overstreet /* find entry in parent->children for node being deleted */ 9528e877caaSKent Overstreet for (i = 0; i < 2; i++) 9538e877caaSKent Overstreet if (le32_to_cpu(parent->v.children[i]) == id) 9548e877caaSKent Overstreet break; 9558e877caaSKent Overstreet 956f55d6e07SKent Overstreet if (bch2_fs_inconsistent_on(i == 2, c, 957f55d6e07SKent Overstreet "snapshot %u missing child pointer to %u", 958f55d6e07SKent Overstreet parent_id, id)) 959f55d6e07SKent Overstreet goto err; 9608e877caaSKent Overstreet 961f55d6e07SKent Overstreet parent->v.children[i] = le32_to_cpu(child_id); 962f55d6e07SKent Overstreet 963f55d6e07SKent Overstreet normalize_snapshot_child_pointers(&parent->v); 964f55d6e07SKent Overstreet } 965f55d6e07SKent Overstreet 966f55d6e07SKent Overstreet if (child_id) { 967f55d6e07SKent Overstreet struct bkey_i_snapshot *child; 968f55d6e07SKent Overstreet 969f55d6e07SKent Overstreet child = bch2_bkey_get_mut_typed(trans, &c_iter, 970f55d6e07SKent Overstreet BTREE_ID_snapshots, POS(0, child_id), 971f55d6e07SKent Overstreet 0, snapshot); 972f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(child); 973f55d6e07SKent Overstreet bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 974f55d6e07SKent Overstreet "missing snapshot %u", child_id); 975f55d6e07SKent Overstreet if (unlikely(ret)) 976f55d6e07SKent Overstreet goto err; 977f55d6e07SKent Overstreet 978f55d6e07SKent Overstreet child->v.parent = cpu_to_le32(parent_id); 979f55d6e07SKent Overstreet 980f55d6e07SKent Overstreet if (!child->v.parent) { 981f55d6e07SKent Overstreet child->v.skip[0] = 0; 982f55d6e07SKent Overstreet child->v.skip[1] = 0; 983f55d6e07SKent Overstreet child->v.skip[2] = 0; 984f55d6e07SKent Overstreet } 985f55d6e07SKent Overstreet } 986f55d6e07SKent Overstreet 987f55d6e07SKent Overstreet if (!parent_id) { 9888e877caaSKent Overstreet /* 9898e877caaSKent Overstreet * We're deleting the root of a snapshot tree: update the 9908e877caaSKent Overstreet * snapshot_tree entry to point to the new root, or delete it if 9918e877caaSKent Overstreet * this is the last snapshot ID in this tree: 9928e877caaSKent Overstreet */ 9938e877caaSKent Overstreet struct bkey_i_snapshot_tree *s_t; 9948e877caaSKent Overstreet 9958e877caaSKent Overstreet BUG_ON(s.v->children[1]); 9968e877caaSKent Overstreet 9978e877caaSKent Overstreet s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, 9988e877caaSKent Overstreet BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), 9998e877caaSKent Overstreet 0, snapshot_tree); 10008e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(s_t); 10018e877caaSKent Overstreet if (ret) 10028e877caaSKent Overstreet goto err; 10038e877caaSKent Overstreet 10048e877caaSKent Overstreet if (s.v->children[0]) { 10058e877caaSKent Overstreet s_t->v.root_snapshot = s.v->children[0]; 10068e877caaSKent Overstreet } else { 10078e877caaSKent Overstreet s_t->k.type = KEY_TYPE_deleted; 10088e877caaSKent Overstreet set_bkey_val_u64s(&s_t->k, 0); 10098e877caaSKent Overstreet } 10108e877caaSKent Overstreet } 10118e877caaSKent Overstreet 10128e877caaSKent Overstreet ret = bch2_btree_delete_at(trans, &iter, 0); 10138e877caaSKent Overstreet err: 10148e877caaSKent Overstreet bch2_trans_iter_exit(trans, &tree_iter); 10158e877caaSKent Overstreet bch2_trans_iter_exit(trans, &p_iter); 1016f55d6e07SKent Overstreet bch2_trans_iter_exit(trans, &c_iter); 10178e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 10188e877caaSKent Overstreet return ret; 10198e877caaSKent Overstreet } 10208e877caaSKent Overstreet 10218e877caaSKent Overstreet static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, 10228e877caaSKent Overstreet u32 *new_snapids, 10238e877caaSKent Overstreet u32 *snapshot_subvols, 10248e877caaSKent Overstreet unsigned nr_snapids) 10258e877caaSKent Overstreet { 10268e877caaSKent Overstreet struct bch_fs *c = trans->c; 10278e877caaSKent Overstreet struct btree_iter iter; 10288e877caaSKent Overstreet struct bkey_i_snapshot *n; 10298e877caaSKent Overstreet struct bkey_s_c k; 10308e877caaSKent Overstreet unsigned i, j; 10318e877caaSKent Overstreet u32 depth = bch2_snapshot_depth(c, parent); 10328e877caaSKent Overstreet int ret; 10338e877caaSKent Overstreet 10348e877caaSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, 10358e877caaSKent Overstreet POS_MIN, BTREE_ITER_INTENT); 10368e877caaSKent Overstreet k = bch2_btree_iter_peek(&iter); 10378e877caaSKent Overstreet ret = bkey_err(k); 10388e877caaSKent Overstreet if (ret) 10398e877caaSKent Overstreet goto err; 10408e877caaSKent Overstreet 10418e877caaSKent Overstreet for (i = 0; i < nr_snapids; i++) { 10428e877caaSKent Overstreet k = bch2_btree_iter_prev_slot(&iter); 10438e877caaSKent Overstreet ret = bkey_err(k); 10448e877caaSKent Overstreet if (ret) 10458e877caaSKent Overstreet goto err; 10468e877caaSKent Overstreet 10478e877caaSKent Overstreet if (!k.k || !k.k->p.offset) { 10488e877caaSKent Overstreet ret = -BCH_ERR_ENOSPC_snapshot_create; 10498e877caaSKent Overstreet goto err; 10508e877caaSKent Overstreet } 10518e877caaSKent Overstreet 10528e877caaSKent Overstreet n = bch2_bkey_alloc(trans, &iter, 0, snapshot); 10538e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n); 10548e877caaSKent Overstreet if (ret) 10558e877caaSKent Overstreet goto err; 10568e877caaSKent Overstreet 10578e877caaSKent Overstreet n->v.flags = 0; 10588e877caaSKent Overstreet n->v.parent = cpu_to_le32(parent); 10598e877caaSKent Overstreet n->v.subvol = cpu_to_le32(snapshot_subvols[i]); 10608e877caaSKent Overstreet n->v.tree = cpu_to_le32(tree); 10618e877caaSKent Overstreet n->v.depth = cpu_to_le32(depth); 10628e877caaSKent Overstreet 10638e877caaSKent Overstreet for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) 10648e877caaSKent Overstreet n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); 10658e877caaSKent Overstreet 10668e877caaSKent Overstreet bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); 10678e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); 10688e877caaSKent Overstreet 10698e877caaSKent Overstreet ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, 10708e877caaSKent Overstreet bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); 10718e877caaSKent Overstreet if (ret) 10728e877caaSKent Overstreet goto err; 10738e877caaSKent Overstreet 10748e877caaSKent Overstreet new_snapids[i] = iter.pos.offset; 1075eebe8a84SKent Overstreet 1076eebe8a84SKent Overstreet mutex_lock(&c->snapshot_table_lock); 1077eebe8a84SKent Overstreet snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; 1078eebe8a84SKent Overstreet mutex_unlock(&c->snapshot_table_lock); 10798e877caaSKent Overstreet } 10808e877caaSKent Overstreet err: 10818e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 10828e877caaSKent Overstreet return ret; 10838e877caaSKent Overstreet } 10848e877caaSKent Overstreet 10858e877caaSKent Overstreet /* 10868e877caaSKent Overstreet * Create new snapshot IDs as children of an existing snapshot ID: 10878e877caaSKent Overstreet */ 10888e877caaSKent Overstreet static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, 10898e877caaSKent Overstreet u32 *new_snapids, 10908e877caaSKent Overstreet u32 *snapshot_subvols, 10918e877caaSKent Overstreet unsigned nr_snapids) 10928e877caaSKent Overstreet { 10938e877caaSKent Overstreet struct btree_iter iter; 10948e877caaSKent Overstreet struct bkey_i_snapshot *n_parent; 10958e877caaSKent Overstreet int ret = 0; 10968e877caaSKent Overstreet 10978e877caaSKent Overstreet n_parent = bch2_bkey_get_mut_typed(trans, &iter, 10988e877caaSKent Overstreet BTREE_ID_snapshots, POS(0, parent), 10998e877caaSKent Overstreet 0, snapshot); 11008e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n_parent); 11018e877caaSKent Overstreet if (unlikely(ret)) { 11028e877caaSKent Overstreet if (bch2_err_matches(ret, ENOENT)) 11038e877caaSKent Overstreet bch_err(trans->c, "snapshot %u not found", parent); 11048e877caaSKent Overstreet return ret; 11058e877caaSKent Overstreet } 11068e877caaSKent Overstreet 11078e877caaSKent Overstreet if (n_parent->v.children[0] || n_parent->v.children[1]) { 11088e877caaSKent Overstreet bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); 11098e877caaSKent Overstreet ret = -EINVAL; 11108e877caaSKent Overstreet goto err; 11118e877caaSKent Overstreet } 11128e877caaSKent Overstreet 11138e877caaSKent Overstreet ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), 11148e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11158e877caaSKent Overstreet if (ret) 11168e877caaSKent Overstreet goto err; 11178e877caaSKent Overstreet 11188e877caaSKent Overstreet n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); 11198e877caaSKent Overstreet n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); 11208e877caaSKent Overstreet n_parent->v.subvol = 0; 11218e877caaSKent Overstreet SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); 11228e877caaSKent Overstreet err: 11238e877caaSKent Overstreet bch2_trans_iter_exit(trans, &iter); 11248e877caaSKent Overstreet return ret; 11258e877caaSKent Overstreet } 11268e877caaSKent Overstreet 11278e877caaSKent Overstreet /* 11288e877caaSKent Overstreet * Create a snapshot node that is the root of a new tree: 11298e877caaSKent Overstreet */ 11308e877caaSKent Overstreet static int bch2_snapshot_node_create_tree(struct btree_trans *trans, 11318e877caaSKent Overstreet u32 *new_snapids, 11328e877caaSKent Overstreet u32 *snapshot_subvols, 11338e877caaSKent Overstreet unsigned nr_snapids) 11348e877caaSKent Overstreet { 11358e877caaSKent Overstreet struct bkey_i_snapshot_tree *n_tree; 11368e877caaSKent Overstreet int ret; 11378e877caaSKent Overstreet 11388e877caaSKent Overstreet n_tree = __bch2_snapshot_tree_create(trans); 11398e877caaSKent Overstreet ret = PTR_ERR_OR_ZERO(n_tree) ?: 11408e877caaSKent Overstreet create_snapids(trans, 0, n_tree->k.p.offset, 11418e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11428e877caaSKent Overstreet if (ret) 11438e877caaSKent Overstreet return ret; 11448e877caaSKent Overstreet 11458e877caaSKent Overstreet n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); 11468e877caaSKent Overstreet n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); 11478e877caaSKent Overstreet return 0; 11488e877caaSKent Overstreet } 11498e877caaSKent Overstreet 11508e877caaSKent Overstreet int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, 11518e877caaSKent Overstreet u32 *new_snapids, 11528e877caaSKent Overstreet u32 *snapshot_subvols, 11538e877caaSKent Overstreet unsigned nr_snapids) 11548e877caaSKent Overstreet { 11558e877caaSKent Overstreet BUG_ON((parent == 0) != (nr_snapids == 1)); 11568e877caaSKent Overstreet BUG_ON((parent != 0) != (nr_snapids == 2)); 11578e877caaSKent Overstreet 11588e877caaSKent Overstreet return parent 11598e877caaSKent Overstreet ? bch2_snapshot_node_create_children(trans, parent, 11608e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids) 11618e877caaSKent Overstreet : bch2_snapshot_node_create_tree(trans, 11628e877caaSKent Overstreet new_snapids, snapshot_subvols, nr_snapids); 11638e877caaSKent Overstreet 11648e877caaSKent Overstreet } 11658e877caaSKent Overstreet 11668e877caaSKent Overstreet /* 11678e877caaSKent Overstreet * If we have an unlinked inode in an internal snapshot node, and the inode 11688e877caaSKent Overstreet * really has been deleted in all child snapshots, how does this get cleaned up? 11698e877caaSKent Overstreet * 11708e877caaSKent Overstreet * first there is the problem of how keys that have been overwritten in all 11718e877caaSKent Overstreet * child snapshots get deleted (unimplemented?), but inodes may perhaps be 11728e877caaSKent Overstreet * special? 11738e877caaSKent Overstreet * 11748e877caaSKent Overstreet * also: unlinked inode in internal snapshot appears to not be getting deleted 11758e877caaSKent Overstreet * correctly if inode doesn't exist in leaf snapshots 1176f55d6e07SKent Overstreet * 1177f55d6e07SKent Overstreet * solution: 1178f55d6e07SKent Overstreet * 1179f55d6e07SKent Overstreet * for a key in an interior snapshot node that needs work to be done that 1180f55d6e07SKent Overstreet * requires it to be mutated: iterate over all descendent leaf nodes and copy 1181f55d6e07SKent Overstreet * that key to snapshot leaf nodes, where we can mutate it 11828e877caaSKent Overstreet */ 11838e877caaSKent Overstreet 11848e877caaSKent Overstreet static int snapshot_delete_key(struct btree_trans *trans, 11858e877caaSKent Overstreet struct btree_iter *iter, 11868e877caaSKent Overstreet struct bkey_s_c k, 11878e877caaSKent Overstreet snapshot_id_list *deleted, 11888e877caaSKent Overstreet snapshot_id_list *equiv_seen, 11898e877caaSKent Overstreet struct bpos *last_pos) 11908e877caaSKent Overstreet { 11918e877caaSKent Overstreet struct bch_fs *c = trans->c; 11928e877caaSKent Overstreet u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 11938e877caaSKent Overstreet 11948e877caaSKent Overstreet if (!bkey_eq(k.k->p, *last_pos)) 11958e877caaSKent Overstreet equiv_seen->nr = 0; 11968e877caaSKent Overstreet *last_pos = k.k->p; 11978e877caaSKent Overstreet 11988e877caaSKent Overstreet if (snapshot_list_has_id(deleted, k.k->p.snapshot) || 11998e877caaSKent Overstreet snapshot_list_has_id(equiv_seen, equiv)) { 12008e877caaSKent Overstreet return bch2_btree_delete_at(trans, iter, 12018e877caaSKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 12028e877caaSKent Overstreet } else { 12038e877caaSKent Overstreet return snapshot_list_add(c, equiv_seen, equiv); 12048e877caaSKent Overstreet } 12058e877caaSKent Overstreet } 12068e877caaSKent Overstreet 1207f55d6e07SKent Overstreet static int move_key_to_correct_snapshot(struct btree_trans *trans, 1208f55d6e07SKent Overstreet struct btree_iter *iter, 1209f55d6e07SKent Overstreet struct bkey_s_c k) 1210f55d6e07SKent Overstreet { 1211f55d6e07SKent Overstreet struct bch_fs *c = trans->c; 1212f55d6e07SKent Overstreet u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 1213f55d6e07SKent Overstreet 1214f55d6e07SKent Overstreet /* 1215f55d6e07SKent Overstreet * When we have a linear chain of snapshot nodes, we consider 1216f55d6e07SKent Overstreet * those to form an equivalence class: we're going to collapse 1217f55d6e07SKent Overstreet * them all down to a single node, and keep the leaf-most node - 1218f55d6e07SKent Overstreet * which has the same id as the equivalence class id. 1219f55d6e07SKent Overstreet * 1220f55d6e07SKent Overstreet * If there are multiple keys in different snapshots at the same 1221f55d6e07SKent Overstreet * position, we're only going to keep the one in the newest 1222f55d6e07SKent Overstreet * snapshot - the rest have been overwritten and are redundant, 1223f55d6e07SKent Overstreet * and for the key we're going to keep we need to move it to the 1224f55d6e07SKent Overstreet * equivalance class ID if it's not there already. 1225f55d6e07SKent Overstreet */ 1226f55d6e07SKent Overstreet if (equiv != k.k->p.snapshot) { 1227f55d6e07SKent Overstreet struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 1228f55d6e07SKent Overstreet struct btree_iter new_iter; 1229f55d6e07SKent Overstreet int ret; 1230f55d6e07SKent Overstreet 1231f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(new); 1232f55d6e07SKent Overstreet if (ret) 1233f55d6e07SKent Overstreet return ret; 1234f55d6e07SKent Overstreet 1235f55d6e07SKent Overstreet new->k.p.snapshot = equiv; 1236f55d6e07SKent Overstreet 1237f55d6e07SKent Overstreet bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, 1238f55d6e07SKent Overstreet BTREE_ITER_ALL_SNAPSHOTS| 1239f55d6e07SKent Overstreet BTREE_ITER_CACHED| 1240f55d6e07SKent Overstreet BTREE_ITER_INTENT); 1241f55d6e07SKent Overstreet 1242f55d6e07SKent Overstreet ret = bch2_btree_iter_traverse(&new_iter) ?: 1243f55d6e07SKent Overstreet bch2_trans_update(trans, &new_iter, new, 1244f55d6e07SKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1245f55d6e07SKent Overstreet bch2_btree_delete_at(trans, iter, 1246f55d6e07SKent Overstreet BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1247f55d6e07SKent Overstreet bch2_trans_iter_exit(trans, &new_iter); 1248f55d6e07SKent Overstreet if (ret) 1249f55d6e07SKent Overstreet return ret; 1250f55d6e07SKent Overstreet } 1251f55d6e07SKent Overstreet 1252f55d6e07SKent Overstreet return 0; 1253f55d6e07SKent Overstreet } 1254f55d6e07SKent Overstreet 1255*b0b5bbf9SKent Overstreet static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) 12568e877caaSKent Overstreet { 12578e877caaSKent Overstreet struct bkey_s_c_snapshot snap; 12588e877caaSKent Overstreet u32 children[2]; 12598e877caaSKent Overstreet int ret; 12608e877caaSKent Overstreet 12618e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 12628e877caaSKent Overstreet return 0; 12638e877caaSKent Overstreet 12648e877caaSKent Overstreet snap = bkey_s_c_to_snapshot(k); 12658e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(snap.v) || 12668e877caaSKent Overstreet BCH_SNAPSHOT_SUBVOL(snap.v)) 12678e877caaSKent Overstreet return 0; 12688e877caaSKent Overstreet 12698e877caaSKent Overstreet children[0] = le32_to_cpu(snap.v->children[0]); 12708e877caaSKent Overstreet children[1] = le32_to_cpu(snap.v->children[1]); 12718e877caaSKent Overstreet 12728e877caaSKent Overstreet ret = bch2_snapshot_live(trans, children[0]) ?: 12738e877caaSKent Overstreet bch2_snapshot_live(trans, children[1]); 12748e877caaSKent Overstreet if (ret < 0) 12758e877caaSKent Overstreet return ret; 1276*b0b5bbf9SKent Overstreet return !ret; 1277*b0b5bbf9SKent Overstreet } 12788e877caaSKent Overstreet 1279*b0b5bbf9SKent Overstreet /* 1280*b0b5bbf9SKent Overstreet * For a given snapshot, if it doesn't have a subvolume that points to it, and 1281*b0b5bbf9SKent Overstreet * it doesn't have child snapshot nodes - it's now redundant and we can mark it 1282*b0b5bbf9SKent Overstreet * as deleted. 1283*b0b5bbf9SKent Overstreet */ 1284*b0b5bbf9SKent Overstreet static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) 1285*b0b5bbf9SKent Overstreet { 1286*b0b5bbf9SKent Overstreet int ret = bch2_snapshot_needs_delete(trans, k); 1287*b0b5bbf9SKent Overstreet 1288*b0b5bbf9SKent Overstreet return ret <= 0 1289*b0b5bbf9SKent Overstreet ? ret 1290*b0b5bbf9SKent Overstreet : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); 12918e877caaSKent Overstreet } 12928e877caaSKent Overstreet 1293f55d6e07SKent Overstreet static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, 1294f55d6e07SKent Overstreet snapshot_id_list *skip) 1295f55d6e07SKent Overstreet { 1296f55d6e07SKent Overstreet rcu_read_lock(); 12971e2d3999SKent Overstreet while (snapshot_list_has_id(skip, id)) 12981e2d3999SKent Overstreet id = __bch2_snapshot_parent(c, id); 12991e2d3999SKent Overstreet 1300f55d6e07SKent Overstreet while (n--) { 1301f55d6e07SKent Overstreet do { 1302f55d6e07SKent Overstreet id = __bch2_snapshot_parent(c, id); 1303f55d6e07SKent Overstreet } while (snapshot_list_has_id(skip, id)); 1304f55d6e07SKent Overstreet } 1305f55d6e07SKent Overstreet rcu_read_unlock(); 1306f55d6e07SKent Overstreet 1307f55d6e07SKent Overstreet return id; 1308f55d6e07SKent Overstreet } 1309f55d6e07SKent Overstreet 1310f55d6e07SKent Overstreet static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, 1311f55d6e07SKent Overstreet struct btree_iter *iter, struct bkey_s_c k, 1312f55d6e07SKent Overstreet snapshot_id_list *deleted) 1313f55d6e07SKent Overstreet { 1314f55d6e07SKent Overstreet struct bch_fs *c = trans->c; 1315f55d6e07SKent Overstreet u32 nr_deleted_ancestors = 0; 1316f55d6e07SKent Overstreet struct bkey_i_snapshot *s; 1317f55d6e07SKent Overstreet u32 *i; 1318f55d6e07SKent Overstreet int ret; 1319f55d6e07SKent Overstreet 1320f55d6e07SKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 1321f55d6e07SKent Overstreet return 0; 1322f55d6e07SKent Overstreet 1323f55d6e07SKent Overstreet if (snapshot_list_has_id(deleted, k.k->p.offset)) 1324f55d6e07SKent Overstreet return 0; 1325f55d6e07SKent Overstreet 1326f55d6e07SKent Overstreet s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); 1327f55d6e07SKent Overstreet ret = PTR_ERR_OR_ZERO(s); 1328f55d6e07SKent Overstreet if (ret) 1329f55d6e07SKent Overstreet return ret; 1330f55d6e07SKent Overstreet 1331f55d6e07SKent Overstreet darray_for_each(*deleted, i) 1332f55d6e07SKent Overstreet nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); 1333f55d6e07SKent Overstreet 1334f55d6e07SKent Overstreet if (!nr_deleted_ancestors) 1335f55d6e07SKent Overstreet return 0; 1336f55d6e07SKent Overstreet 1337f55d6e07SKent Overstreet le32_add_cpu(&s->v.depth, -nr_deleted_ancestors); 1338f55d6e07SKent Overstreet 1339f55d6e07SKent Overstreet if (!s->v.depth) { 1340f55d6e07SKent Overstreet s->v.skip[0] = 0; 1341f55d6e07SKent Overstreet s->v.skip[1] = 0; 1342f55d6e07SKent Overstreet s->v.skip[2] = 0; 1343f55d6e07SKent Overstreet } else { 1344f55d6e07SKent Overstreet u32 depth = le32_to_cpu(s->v.depth); 1345f55d6e07SKent Overstreet u32 parent = bch2_snapshot_parent(c, s->k.p.offset); 1346f55d6e07SKent Overstreet 1347f55d6e07SKent Overstreet for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { 1348f55d6e07SKent Overstreet u32 id = le32_to_cpu(s->v.skip[j]); 1349f55d6e07SKent Overstreet 1350f55d6e07SKent Overstreet if (snapshot_list_has_id(deleted, id)) { 1351f55d6e07SKent Overstreet id = depth > 1 1352f55d6e07SKent Overstreet ? bch2_snapshot_nth_parent_skip(c, 1353f55d6e07SKent Overstreet parent, 1354f55d6e07SKent Overstreet get_random_u32_below(depth - 1), 1355f55d6e07SKent Overstreet deleted) 1356f55d6e07SKent Overstreet : parent; 1357f55d6e07SKent Overstreet s->v.skip[j] = cpu_to_le32(id); 1358f55d6e07SKent Overstreet } 1359f55d6e07SKent Overstreet } 1360f55d6e07SKent Overstreet 1361f55d6e07SKent Overstreet bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32); 1362f55d6e07SKent Overstreet } 1363f55d6e07SKent Overstreet 1364f55d6e07SKent Overstreet return bch2_trans_update(trans, iter, &s->k_i, 0); 1365f55d6e07SKent Overstreet } 1366f55d6e07SKent Overstreet 13678e877caaSKent Overstreet int bch2_delete_dead_snapshots(struct bch_fs *c) 13688e877caaSKent Overstreet { 13696bd68ec2SKent Overstreet struct btree_trans *trans; 13708e877caaSKent Overstreet struct btree_iter iter; 13718e877caaSKent Overstreet struct bkey_s_c k; 13728e877caaSKent Overstreet struct bkey_s_c_snapshot snap; 13738e877caaSKent Overstreet snapshot_id_list deleted = { 0 }; 1374f55d6e07SKent Overstreet snapshot_id_list deleted_interior = { 0 }; 1375f55d6e07SKent Overstreet u32 *i, id; 13768e877caaSKent Overstreet int ret = 0; 13778e877caaSKent Overstreet 1378*b0b5bbf9SKent Overstreet if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) 1379*b0b5bbf9SKent Overstreet return 0; 1380*b0b5bbf9SKent Overstreet 13818e877caaSKent Overstreet if (!test_bit(BCH_FS_STARTED, &c->flags)) { 13828e877caaSKent Overstreet ret = bch2_fs_read_write_early(c); 13838e877caaSKent Overstreet if (ret) { 13846bf3766bSColin Ian King bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); 13858e877caaSKent Overstreet return ret; 13868e877caaSKent Overstreet } 13878e877caaSKent Overstreet } 13888e877caaSKent Overstreet 13896bd68ec2SKent Overstreet trans = bch2_trans_get(c); 13908e877caaSKent Overstreet 13918e877caaSKent Overstreet /* 13928e877caaSKent Overstreet * For every snapshot node: If we have no live children and it's not 13938e877caaSKent Overstreet * pointed to by a subvolume, delete it: 13948e877caaSKent Overstreet */ 13956bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, 13968e877caaSKent Overstreet POS_MIN, 0, k, 13978e877caaSKent Overstreet NULL, NULL, 0, 1398*b0b5bbf9SKent Overstreet bch2_delete_redundant_snapshot(trans, k)); 13998e877caaSKent Overstreet if (ret) { 1400e46c181aSKent Overstreet bch_err_msg(c, ret, "deleting redundant snapshots"); 14018e877caaSKent Overstreet goto err; 14028e877caaSKent Overstreet } 14038e877caaSKent Overstreet 1404d67a72bfSDan Carpenter ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, 14058e877caaSKent Overstreet POS_MIN, 0, k, 14066bd68ec2SKent Overstreet bch2_snapshot_set_equiv(trans, k)); 14078e877caaSKent Overstreet if (ret) { 1408e46c181aSKent Overstreet bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); 14098e877caaSKent Overstreet goto err; 14108e877caaSKent Overstreet } 14118e877caaSKent Overstreet 14126bd68ec2SKent Overstreet for_each_btree_key(trans, iter, BTREE_ID_snapshots, 14138e877caaSKent Overstreet POS_MIN, 0, k, ret) { 14148e877caaSKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 14158e877caaSKent Overstreet continue; 14168e877caaSKent Overstreet 14178e877caaSKent Overstreet snap = bkey_s_c_to_snapshot(k); 14188e877caaSKent Overstreet if (BCH_SNAPSHOT_DELETED(snap.v)) { 14198e877caaSKent Overstreet ret = snapshot_list_add(c, &deleted, k.k->p.offset); 14208e877caaSKent Overstreet if (ret) 14218e877caaSKent Overstreet break; 14228e877caaSKent Overstreet } 14238e877caaSKent Overstreet } 14246bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 14258e877caaSKent Overstreet 14268e877caaSKent Overstreet if (ret) { 14278e877caaSKent Overstreet bch_err_msg(c, ret, "walking snapshots"); 14288e877caaSKent Overstreet goto err; 14298e877caaSKent Overstreet } 14308e877caaSKent Overstreet 14318e877caaSKent Overstreet for (id = 0; id < BTREE_ID_NR; id++) { 14328e877caaSKent Overstreet struct bpos last_pos = POS_MIN; 14338e877caaSKent Overstreet snapshot_id_list equiv_seen = { 0 }; 1434f55d6e07SKent Overstreet struct disk_reservation res = { 0 }; 14358e877caaSKent Overstreet 14368e877caaSKent Overstreet if (!btree_type_has_snapshots(id)) 14378e877caaSKent Overstreet continue; 14388e877caaSKent Overstreet 14396bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, 14408e877caaSKent Overstreet id, POS_MIN, 14418e877caaSKent Overstreet BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1442f55d6e07SKent Overstreet &res, NULL, BTREE_INSERT_NOFAIL, 14436bd68ec2SKent Overstreet snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: 14446bd68ec2SKent Overstreet for_each_btree_key_commit(trans, iter, 1445f55d6e07SKent Overstreet id, POS_MIN, 1446f55d6e07SKent Overstreet BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1447f55d6e07SKent Overstreet &res, NULL, BTREE_INSERT_NOFAIL, 14486bd68ec2SKent Overstreet move_key_to_correct_snapshot(trans, &iter, k)); 14498e877caaSKent Overstreet 1450f55d6e07SKent Overstreet bch2_disk_reservation_put(c, &res); 14518e877caaSKent Overstreet darray_exit(&equiv_seen); 14528e877caaSKent Overstreet 14538e877caaSKent Overstreet if (ret) { 14548e877caaSKent Overstreet bch_err_msg(c, ret, "deleting keys from dying snapshots"); 14558e877caaSKent Overstreet goto err; 14568e877caaSKent Overstreet } 14578e877caaSKent Overstreet } 14588e877caaSKent Overstreet 14590dd092bfSKent Overstreet bch2_trans_unlock(trans); 146037fad949SKent Overstreet down_write(&c->snapshot_create_lock); 146137fad949SKent Overstreet 14626bd68ec2SKent Overstreet for_each_btree_key(trans, iter, BTREE_ID_snapshots, 1463f55d6e07SKent Overstreet POS_MIN, 0, k, ret) { 1464f55d6e07SKent Overstreet u32 snapshot = k.k->p.offset; 1465f55d6e07SKent Overstreet u32 equiv = bch2_snapshot_equiv(c, snapshot); 14668e877caaSKent Overstreet 1467f55d6e07SKent Overstreet if (equiv != snapshot) 1468f55d6e07SKent Overstreet snapshot_list_add(c, &deleted_interior, snapshot); 1469f55d6e07SKent Overstreet } 14706bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter); 1471f55d6e07SKent Overstreet 147237fad949SKent Overstreet if (ret) 147337fad949SKent Overstreet goto err_create_lock; 147437fad949SKent Overstreet 1475f55d6e07SKent Overstreet /* 1476f55d6e07SKent Overstreet * Fixing children of deleted snapshots can't be done completely 1477f55d6e07SKent Overstreet * atomically, if we crash between here and when we delete the interior 1478f55d6e07SKent Overstreet * nodes some depth fields will be off: 1479f55d6e07SKent Overstreet */ 14806bd68ec2SKent Overstreet ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 1481f55d6e07SKent Overstreet BTREE_ITER_INTENT, k, 1482f55d6e07SKent Overstreet NULL, NULL, BTREE_INSERT_NOFAIL, 14836bd68ec2SKent Overstreet bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); 1484f55d6e07SKent Overstreet if (ret) 148537fad949SKent Overstreet goto err_create_lock; 1486f55d6e07SKent Overstreet 1487f55d6e07SKent Overstreet darray_for_each(deleted, i) { 14886bd68ec2SKent Overstreet ret = commit_do(trans, NULL, NULL, 0, 14896bd68ec2SKent Overstreet bch2_snapshot_node_delete(trans, *i)); 14908e877caaSKent Overstreet if (ret) { 1491f55d6e07SKent Overstreet bch_err_msg(c, ret, "deleting snapshot %u", *i); 149237fad949SKent Overstreet goto err_create_lock; 1493f55d6e07SKent Overstreet } 1494f55d6e07SKent Overstreet } 1495f55d6e07SKent Overstreet 1496f55d6e07SKent Overstreet darray_for_each(deleted_interior, i) { 14976bd68ec2SKent Overstreet ret = commit_do(trans, NULL, NULL, 0, 14986bd68ec2SKent Overstreet bch2_snapshot_node_delete(trans, *i)); 1499f55d6e07SKent Overstreet if (ret) { 1500f55d6e07SKent Overstreet bch_err_msg(c, ret, "deleting snapshot %u", *i); 150137fad949SKent Overstreet goto err_create_lock; 15028e877caaSKent Overstreet } 15038e877caaSKent Overstreet } 150437fad949SKent Overstreet err_create_lock: 150537fad949SKent Overstreet up_write(&c->snapshot_create_lock); 15068e877caaSKent Overstreet err: 1507f55d6e07SKent Overstreet darray_exit(&deleted_interior); 15088e877caaSKent Overstreet darray_exit(&deleted); 15096bd68ec2SKent Overstreet bch2_trans_put(trans); 15108e877caaSKent Overstreet if (ret) 15118e877caaSKent Overstreet bch_err_fn(c, ret); 15128e877caaSKent Overstreet return ret; 15138e877caaSKent Overstreet } 15148e877caaSKent Overstreet 15158e877caaSKent Overstreet void bch2_delete_dead_snapshots_work(struct work_struct *work) 15168e877caaSKent Overstreet { 15178e877caaSKent Overstreet struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); 15188e877caaSKent Overstreet 15198e877caaSKent Overstreet bch2_delete_dead_snapshots(c); 15208e877caaSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); 15218e877caaSKent Overstreet } 15228e877caaSKent Overstreet 15238e877caaSKent Overstreet void bch2_delete_dead_snapshots_async(struct bch_fs *c) 15248e877caaSKent Overstreet { 15258e877caaSKent Overstreet if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && 15268e877caaSKent Overstreet !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) 15278e877caaSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); 15288e877caaSKent Overstreet } 15298e877caaSKent Overstreet 1530fa5bed37SKent Overstreet int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, 1531fa5bed37SKent Overstreet enum btree_id id, 1532fa5bed37SKent Overstreet struct bpos pos) 1533fa5bed37SKent Overstreet { 1534fa5bed37SKent Overstreet struct bch_fs *c = trans->c; 1535fa5bed37SKent Overstreet struct btree_iter iter; 1536fa5bed37SKent Overstreet struct bkey_s_c k; 1537fa5bed37SKent Overstreet int ret; 1538fa5bed37SKent Overstreet 1539fa5bed37SKent Overstreet bch2_trans_iter_init(trans, &iter, id, pos, 1540fa5bed37SKent Overstreet BTREE_ITER_NOT_EXTENTS| 1541fa5bed37SKent Overstreet BTREE_ITER_ALL_SNAPSHOTS); 1542fa5bed37SKent Overstreet while (1) { 1543fa5bed37SKent Overstreet k = bch2_btree_iter_prev(&iter); 1544fa5bed37SKent Overstreet ret = bkey_err(k); 1545fa5bed37SKent Overstreet if (ret) 1546fa5bed37SKent Overstreet break; 1547fa5bed37SKent Overstreet 1548fa5bed37SKent Overstreet if (!k.k) 1549fa5bed37SKent Overstreet break; 1550fa5bed37SKent Overstreet 1551fa5bed37SKent Overstreet if (!bkey_eq(pos, k.k->p)) 1552fa5bed37SKent Overstreet break; 1553fa5bed37SKent Overstreet 1554fa5bed37SKent Overstreet if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { 1555fa5bed37SKent Overstreet ret = 1; 1556fa5bed37SKent Overstreet break; 1557fa5bed37SKent Overstreet } 1558fa5bed37SKent Overstreet } 1559fa5bed37SKent Overstreet bch2_trans_iter_exit(trans, &iter); 1560fa5bed37SKent Overstreet 1561fa5bed37SKent Overstreet return ret; 1562fa5bed37SKent Overstreet } 1563fa5bed37SKent Overstreet 1564a111901fSKent Overstreet static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id) 1565a111901fSKent Overstreet { 1566a111901fSKent Overstreet const struct snapshot_t *s = snapshot_t(c, id); 1567a111901fSKent Overstreet 1568a111901fSKent Overstreet return s->children[1] ?: s->children[0]; 1569a111901fSKent Overstreet } 1570a111901fSKent Overstreet 1571a111901fSKent Overstreet static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id) 1572a111901fSKent Overstreet { 1573a111901fSKent Overstreet u32 child; 1574a111901fSKent Overstreet 1575a111901fSKent Overstreet while ((child = bch2_snapshot_smallest_child(c, id))) 1576a111901fSKent Overstreet id = child; 1577a111901fSKent Overstreet return id; 1578a111901fSKent Overstreet } 1579a111901fSKent Overstreet 1580a111901fSKent Overstreet static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans, 1581a111901fSKent Overstreet enum btree_id btree, 1582a111901fSKent Overstreet struct bkey_s_c interior_k, 1583a111901fSKent Overstreet u32 leaf_id, struct bpos *new_min_pos) 1584a111901fSKent Overstreet { 1585a111901fSKent Overstreet struct btree_iter iter; 1586a111901fSKent Overstreet struct bpos pos = interior_k.k->p; 1587a111901fSKent Overstreet struct bkey_s_c k; 1588a111901fSKent Overstreet struct bkey_i *new; 1589a111901fSKent Overstreet int ret; 1590a111901fSKent Overstreet 1591a111901fSKent Overstreet pos.snapshot = leaf_id; 1592a111901fSKent Overstreet 1593a111901fSKent Overstreet bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT); 1594a111901fSKent Overstreet k = bch2_btree_iter_peek_slot(&iter); 1595a111901fSKent Overstreet ret = bkey_err(k); 1596a111901fSKent Overstreet if (ret) 1597a111901fSKent Overstreet goto out; 1598a111901fSKent Overstreet 1599a111901fSKent Overstreet /* key already overwritten in this snapshot? */ 1600a111901fSKent Overstreet if (k.k->p.snapshot != interior_k.k->p.snapshot) 1601a111901fSKent Overstreet goto out; 1602a111901fSKent Overstreet 1603a111901fSKent Overstreet if (bpos_eq(*new_min_pos, POS_MIN)) { 1604a111901fSKent Overstreet *new_min_pos = k.k->p; 1605a111901fSKent Overstreet new_min_pos->snapshot = leaf_id; 1606a111901fSKent Overstreet } 1607a111901fSKent Overstreet 1608a111901fSKent Overstreet new = bch2_bkey_make_mut_noupdate(trans, interior_k); 1609a111901fSKent Overstreet ret = PTR_ERR_OR_ZERO(new); 1610a111901fSKent Overstreet if (ret) 1611a111901fSKent Overstreet goto out; 1612a111901fSKent Overstreet 1613a111901fSKent Overstreet new->k.p.snapshot = leaf_id; 1614a111901fSKent Overstreet ret = bch2_trans_update(trans, &iter, new, 0); 1615a111901fSKent Overstreet out: 1616a111901fSKent Overstreet bch2_trans_iter_exit(trans, &iter); 1617a111901fSKent Overstreet return ret; 1618a111901fSKent Overstreet } 1619a111901fSKent Overstreet 1620a111901fSKent Overstreet int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans, 1621a111901fSKent Overstreet enum btree_id btree, 1622a111901fSKent Overstreet struct bkey_s_c k, 1623a111901fSKent Overstreet struct bpos *new_min_pos) 1624a111901fSKent Overstreet { 1625a111901fSKent Overstreet struct bch_fs *c = trans->c; 1626a111901fSKent Overstreet struct bkey_buf sk; 1627c872afa2SKent Overstreet u32 restart_count = trans->restart_count; 1628d04fdf5cSKent Overstreet int ret = 0; 1629a111901fSKent Overstreet 1630a111901fSKent Overstreet bch2_bkey_buf_init(&sk); 1631a111901fSKent Overstreet bch2_bkey_buf_reassemble(&sk, c, k); 1632a111901fSKent Overstreet k = bkey_i_to_s_c(sk.k); 1633a111901fSKent Overstreet 1634a111901fSKent Overstreet *new_min_pos = POS_MIN; 1635a111901fSKent Overstreet 1636a111901fSKent Overstreet for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot); 1637a111901fSKent Overstreet id < k.k->p.snapshot; 1638a111901fSKent Overstreet id++) { 1639a111901fSKent Overstreet if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) || 1640a111901fSKent Overstreet !bch2_snapshot_is_leaf(c, id)) 1641a111901fSKent Overstreet continue; 1642d281701bSKent Overstreet again: 1643d281701bSKent Overstreet ret = btree_trans_too_many_iters(trans) ?: 1644d281701bSKent Overstreet bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?: 1645d281701bSKent Overstreet bch2_trans_commit(trans, NULL, NULL, 0); 1646d281701bSKent Overstreet if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 1647d281701bSKent Overstreet bch2_trans_begin(trans); 1648d281701bSKent Overstreet goto again; 1649d281701bSKent Overstreet } 1650a111901fSKent Overstreet 1651a111901fSKent Overstreet if (ret) 1652a111901fSKent Overstreet break; 1653a111901fSKent Overstreet } 1654a111901fSKent Overstreet 1655a111901fSKent Overstreet bch2_bkey_buf_exit(&sk, c); 1656c872afa2SKent Overstreet 1657c872afa2SKent Overstreet return ret ?: trans_was_restarted(trans, restart_count); 1658a111901fSKent Overstreet } 1659a111901fSKent Overstreet 1660*b0b5bbf9SKent Overstreet static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) 1661*b0b5bbf9SKent Overstreet { 1662*b0b5bbf9SKent Overstreet struct bch_fs *c = trans->c; 1663*b0b5bbf9SKent Overstreet struct bkey_s_c_snapshot snap; 1664*b0b5bbf9SKent Overstreet int ret = 0; 1665*b0b5bbf9SKent Overstreet 1666*b0b5bbf9SKent Overstreet if (k.k->type != KEY_TYPE_snapshot) 1667*b0b5bbf9SKent Overstreet return 0; 1668*b0b5bbf9SKent Overstreet 1669*b0b5bbf9SKent Overstreet snap = bkey_s_c_to_snapshot(k); 1670*b0b5bbf9SKent Overstreet if (BCH_SNAPSHOT_DELETED(snap.v) || 1671*b0b5bbf9SKent Overstreet bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || 1672*b0b5bbf9SKent Overstreet (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { 1673*b0b5bbf9SKent Overstreet set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); 1674*b0b5bbf9SKent Overstreet return 0; 1675*b0b5bbf9SKent Overstreet } 1676*b0b5bbf9SKent Overstreet 1677*b0b5bbf9SKent Overstreet return ret; 1678*b0b5bbf9SKent Overstreet } 1679*b0b5bbf9SKent Overstreet 16808e877caaSKent Overstreet int bch2_snapshots_read(struct bch_fs *c) 16818e877caaSKent Overstreet { 16828e877caaSKent Overstreet struct btree_iter iter; 16838e877caaSKent Overstreet struct bkey_s_c k; 16848e877caaSKent Overstreet int ret = 0; 16858e877caaSKent Overstreet 16868e877caaSKent Overstreet ret = bch2_trans_run(c, 16876bd68ec2SKent Overstreet for_each_btree_key2(trans, iter, BTREE_ID_snapshots, 16888e877caaSKent Overstreet POS_MIN, 0, k, 16896bd68ec2SKent Overstreet bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: 1690*b0b5bbf9SKent Overstreet bch2_snapshot_set_equiv(trans, k) ?: 1691*b0b5bbf9SKent Overstreet bch2_check_snapshot_needs_deletion(trans, k)) ?: 16926bd68ec2SKent Overstreet for_each_btree_key2(trans, iter, BTREE_ID_snapshots, 169366487c54SKent Overstreet POS_MIN, 0, k, 169466487c54SKent Overstreet (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); 16958e877caaSKent Overstreet if (ret) 16968e877caaSKent Overstreet bch_err_fn(c, ret); 16978e877caaSKent Overstreet return ret; 16988e877caaSKent Overstreet } 16998e877caaSKent Overstreet 17008e877caaSKent Overstreet void bch2_fs_snapshots_exit(struct bch_fs *c) 17018e877caaSKent Overstreet { 17028e877caaSKent Overstreet kfree(rcu_dereference_protected(c->snapshots, true)); 17038e877caaSKent Overstreet } 1704