1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bbpos.h" 5 #include "bkey_buf.h" 6 #include "btree_cache.h" 7 #include "btree_key_cache.h" 8 #include "btree_update.h" 9 #include "buckets.h" 10 #include "enumerated_ref.h" 11 #include "errcode.h" 12 #include "error.h" 13 #include "fs.h" 14 #include "recovery_passes.h" 15 #include "snapshot.h" 16 17 #include <linux/random.h> 18 19 /* 20 * Snapshot trees: 21 * 22 * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they 23 * exist to provide a stable identifier for the whole lifetime of a snapshot 24 * tree. 25 */ 26 27 void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, 28 struct bkey_s_c k) 29 { 30 struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); 31 32 prt_printf(out, "subvol %u root snapshot %u", 33 le32_to_cpu(t.v->master_subvol), 34 le32_to_cpu(t.v->root_snapshot)); 35 } 36 37 int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, 38 struct bkey_validate_context from) 39 { 40 int ret = 0; 41 42 bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 43 bkey_lt(k.k->p, POS(0, 1)), 44 c, snapshot_tree_pos_bad, 45 "bad pos"); 46 fsck_err: 47 return ret; 48 } 49 50 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, 51 struct bch_snapshot_tree *s) 52 { 53 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), 54 BTREE_ITER_with_updates, snapshot_tree, s); 55 56 if (bch2_err_matches(ret, ENOENT)) 57 ret = bch_err_throw(trans->c, ENOENT_snapshot_tree); 58 return ret; 59 } 60 61 struct bkey_i_snapshot_tree * 62 __bch2_snapshot_tree_create(struct btree_trans *trans) 63 { 64 struct btree_iter iter; 65 int ret = bch2_bkey_get_empty_slot(trans, &iter, 66 BTREE_ID_snapshot_trees, POS(0, U32_MAX)); 67 struct bkey_i_snapshot_tree *s_t; 68 69 if (ret == -BCH_ERR_ENOSPC_btree_slot) 70 ret = bch_err_throw(trans->c, ENOSPC_snapshot_tree); 71 if (ret) 72 return ERR_PTR(ret); 73 74 s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); 75 ret = PTR_ERR_OR_ZERO(s_t); 76 bch2_trans_iter_exit(trans, &iter); 77 return ret ? ERR_PTR(ret) : s_t; 78 } 79 80 static int bch2_snapshot_tree_create(struct btree_trans *trans, 81 u32 root_id, u32 subvol_id, u32 *tree_id) 82 { 83 struct bkey_i_snapshot_tree *n_tree = 84 __bch2_snapshot_tree_create(trans); 85 86 if (IS_ERR(n_tree)) 87 return PTR_ERR(n_tree); 88 89 n_tree->v.master_subvol = cpu_to_le32(subvol_id); 90 n_tree->v.root_snapshot = cpu_to_le32(root_id); 91 *tree_id = n_tree->k.p.offset; 92 return 0; 93 } 94 95 /* Snapshot nodes: */ 96 97 static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) 98 { 99 while (id && id < ancestor) { 100 const struct snapshot_t *s = __snapshot_t(t, id); 101 id = s ? s->parent : 0; 102 } 103 return id == ancestor; 104 } 105 106 static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) 107 { 108 guard(rcu)(); 109 return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); 110 } 111 112 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) 113 { 114 const struct snapshot_t *s = __snapshot_t(t, id); 115 if (!s) 116 return 0; 117 118 if (s->skip[2] <= ancestor) 119 return s->skip[2]; 120 if (s->skip[1] <= ancestor) 121 return s->skip[1]; 122 if (s->skip[0] <= ancestor) 123 return s->skip[0]; 124 return s->parent; 125 } 126 127 static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) 128 { 129 const struct snapshot_t *s = __snapshot_t(t, id); 130 if (!s) 131 return false; 132 133 return test_bit(ancestor - id - 1, s->is_ancestor); 134 } 135 136 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) 137 { 138 bool ret; 139 140 guard(rcu)(); 141 struct snapshot_table *t = rcu_dereference(c->snapshots); 142 143 if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) 144 return __bch2_snapshot_is_ancestor_early(t, id, ancestor); 145 146 if (likely(ancestor >= IS_ANCESTOR_BITMAP)) 147 while (id && id < ancestor - IS_ANCESTOR_BITMAP) 148 id = get_ancestor_below(t, id, ancestor); 149 150 ret = id && id < ancestor 151 ? test_ancestor_bitmap(t, id, ancestor) 152 : id == ancestor; 153 154 EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); 155 return ret; 156 } 157 158 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) 159 { 160 size_t idx = U32_MAX - id; 161 struct snapshot_table *new, *old; 162 163 size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); 164 size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); 165 166 if (unlikely(new_bytes > INT_MAX)) 167 return NULL; 168 169 new = kvzalloc(new_bytes, GFP_KERNEL); 170 if (!new) 171 return NULL; 172 173 new->nr = new_size; 174 175 old = rcu_dereference_protected(c->snapshots, true); 176 if (old) 177 memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr); 178 179 rcu_assign_pointer(c->snapshots, new); 180 kvfree_rcu(old, rcu); 181 182 return &rcu_dereference_protected(c->snapshots, 183 lockdep_is_held(&c->snapshot_table_lock))->s[idx]; 184 } 185 186 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) 187 { 188 size_t idx = U32_MAX - id; 189 struct snapshot_table *table = 190 rcu_dereference_protected(c->snapshots, 191 lockdep_is_held(&c->snapshot_table_lock)); 192 193 lockdep_assert_held(&c->snapshot_table_lock); 194 195 if (likely(table && idx < table->nr)) 196 return &table->s[idx]; 197 198 return __snapshot_t_mut(c, id); 199 } 200 201 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, 202 struct bkey_s_c k) 203 { 204 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 205 206 if (BCH_SNAPSHOT_SUBVOL(s.v)) 207 prt_str(out, "subvol "); 208 if (BCH_SNAPSHOT_WILL_DELETE(s.v)) 209 prt_str(out, "will_delete "); 210 if (BCH_SNAPSHOT_DELETED(s.v)) 211 prt_str(out, "deleted "); 212 213 prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u", 214 le32_to_cpu(s.v->parent), 215 le32_to_cpu(s.v->children[0]), 216 le32_to_cpu(s.v->children[1]), 217 le32_to_cpu(s.v->subvol), 218 le32_to_cpu(s.v->tree)); 219 220 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) 221 prt_printf(out, " depth %u skiplist %u %u %u", 222 le32_to_cpu(s.v->depth), 223 le32_to_cpu(s.v->skip[0]), 224 le32_to_cpu(s.v->skip[1]), 225 le32_to_cpu(s.v->skip[2])); 226 } 227 228 int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, 229 struct bkey_validate_context from) 230 { 231 struct bkey_s_c_snapshot s; 232 u32 i, id; 233 int ret = 0; 234 235 bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 236 bkey_lt(k.k->p, POS(0, 1)), 237 c, snapshot_pos_bad, 238 "bad pos"); 239 240 s = bkey_s_c_to_snapshot(k); 241 242 id = le32_to_cpu(s.v->parent); 243 bkey_fsck_err_on(id && id <= k.k->p.offset, 244 c, snapshot_parent_bad, 245 "bad parent node (%u <= %llu)", 246 id, k.k->p.offset); 247 248 bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), 249 c, snapshot_children_not_normalized, 250 "children not normalized"); 251 252 bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], 253 c, snapshot_child_duplicate, 254 "duplicate child nodes"); 255 256 for (i = 0; i < 2; i++) { 257 id = le32_to_cpu(s.v->children[i]); 258 259 bkey_fsck_err_on(id >= k.k->p.offset, 260 c, snapshot_child_bad, 261 "bad child node (%u >= %llu)", 262 id, k.k->p.offset); 263 } 264 265 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { 266 bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || 267 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), 268 c, snapshot_skiplist_not_normalized, 269 "skiplist not normalized"); 270 271 for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { 272 id = le32_to_cpu(s.v->skip[i]); 273 274 bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), 275 c, snapshot_skiplist_bad, 276 "bad skiplist node %u", id); 277 } 278 } 279 fsck_err: 280 return ret; 281 } 282 283 static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) 284 { 285 mutex_lock(&c->snapshot_table_lock); 286 int ret = snapshot_t_mut(c, id) 287 ? 0 288 : bch_err_throw(c, ENOMEM_mark_snapshot); 289 mutex_unlock(&c->snapshot_table_lock); 290 return ret; 291 } 292 293 static int __bch2_mark_snapshot(struct btree_trans *trans, 294 enum btree_id btree, unsigned level, 295 struct bkey_s_c old, struct bkey_s_c new, 296 enum btree_iter_update_trigger_flags flags) 297 { 298 struct bch_fs *c = trans->c; 299 struct snapshot_t *t; 300 u32 id = new.k->p.offset; 301 int ret = 0; 302 303 mutex_lock(&c->snapshot_table_lock); 304 305 t = snapshot_t_mut(c, id); 306 if (!t) { 307 ret = bch_err_throw(c, ENOMEM_mark_snapshot); 308 goto err; 309 } 310 311 if (new.k->type == KEY_TYPE_snapshot) { 312 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); 313 314 t->state = !BCH_SNAPSHOT_DELETED(s.v) 315 ? SNAPSHOT_ID_live 316 : SNAPSHOT_ID_deleted; 317 t->parent = le32_to_cpu(s.v->parent); 318 t->children[0] = le32_to_cpu(s.v->children[0]); 319 t->children[1] = le32_to_cpu(s.v->children[1]); 320 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; 321 t->tree = le32_to_cpu(s.v->tree); 322 323 if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { 324 t->depth = le32_to_cpu(s.v->depth); 325 t->skip[0] = le32_to_cpu(s.v->skip[0]); 326 t->skip[1] = le32_to_cpu(s.v->skip[1]); 327 t->skip[2] = le32_to_cpu(s.v->skip[2]); 328 } else { 329 t->depth = 0; 330 t->skip[0] = 0; 331 t->skip[1] = 0; 332 t->skip[2] = 0; 333 } 334 335 u32 parent = id; 336 337 while ((parent = bch2_snapshot_parent_early(c, parent)) && 338 parent - id - 1 < IS_ANCESTOR_BITMAP) 339 __set_bit(parent - id - 1, t->is_ancestor); 340 341 if (BCH_SNAPSHOT_WILL_DELETE(s.v)) { 342 set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 343 if (c->recovery.pass_done > BCH_RECOVERY_PASS_delete_dead_snapshots) 344 bch2_delete_dead_snapshots_async(c); 345 } 346 } else { 347 memset(t, 0, sizeof(*t)); 348 } 349 err: 350 mutex_unlock(&c->snapshot_table_lock); 351 return ret; 352 } 353 354 int bch2_mark_snapshot(struct btree_trans *trans, 355 enum btree_id btree, unsigned level, 356 struct bkey_s_c old, struct bkey_s new, 357 enum btree_iter_update_trigger_flags flags) 358 { 359 return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); 360 } 361 362 int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, 363 struct bch_snapshot *s) 364 { 365 return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), 366 BTREE_ITER_with_updates, snapshot, s); 367 } 368 369 /* fsck: */ 370 371 static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) 372 { 373 return snapshot_t(c, id)->children[child]; 374 } 375 376 static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) 377 { 378 return bch2_snapshot_child(c, id, 0); 379 } 380 381 static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) 382 { 383 return bch2_snapshot_child(c, id, 1); 384 } 385 386 static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) 387 { 388 u32 n, parent; 389 390 n = bch2_snapshot_left_child(c, id); 391 if (n) 392 return n; 393 394 while ((parent = bch2_snapshot_parent(c, id))) { 395 n = bch2_snapshot_right_child(c, parent); 396 if (n && n != id) 397 return n; 398 id = parent; 399 } 400 401 return 0; 402 } 403 404 u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, 405 snapshot_id_list *skip) 406 { 407 guard(rcu)(); 408 u32 id, subvol = 0, s; 409 retry: 410 id = snapshot_root; 411 while (id && bch2_snapshot_exists(c, id)) { 412 if (!(skip && snapshot_list_has_id(skip, id))) { 413 s = snapshot_t(c, id)->subvol; 414 415 if (s && (!subvol || s < subvol)) 416 subvol = s; 417 } 418 id = bch2_snapshot_tree_next(c, id); 419 if (id == snapshot_root) 420 break; 421 } 422 423 if (!subvol && skip) { 424 skip = NULL; 425 goto retry; 426 } 427 428 return subvol; 429 } 430 431 static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, 432 u32 snapshot_root, u32 *subvol_id) 433 { 434 struct bch_fs *c = trans->c; 435 struct btree_iter iter; 436 struct bkey_s_c k; 437 bool found = false; 438 int ret; 439 440 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 441 0, k, ret) { 442 if (k.k->type != KEY_TYPE_subvolume) 443 continue; 444 445 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 446 if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) 447 continue; 448 if (!BCH_SUBVOLUME_SNAP(s.v)) { 449 *subvol_id = s.k->p.offset; 450 found = true; 451 break; 452 } 453 } 454 bch2_trans_iter_exit(trans, &iter); 455 456 if (!ret && !found) { 457 struct bkey_i_subvolume *u; 458 459 *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); 460 461 u = bch2_bkey_get_mut_typed(trans, &iter, 462 BTREE_ID_subvolumes, POS(0, *subvol_id), 463 0, subvolume); 464 ret = PTR_ERR_OR_ZERO(u); 465 if (ret) 466 return ret; 467 468 SET_BCH_SUBVOLUME_SNAP(&u->v, false); 469 } 470 471 return ret; 472 } 473 474 static int check_snapshot_tree(struct btree_trans *trans, 475 struct btree_iter *iter, 476 struct bkey_s_c k) 477 { 478 struct bch_fs *c = trans->c; 479 struct bkey_s_c_snapshot_tree st; 480 struct bch_snapshot s; 481 struct bch_subvolume subvol; 482 struct printbuf buf = PRINTBUF; 483 struct btree_iter snapshot_iter = {}; 484 u32 root_id; 485 int ret; 486 487 if (k.k->type != KEY_TYPE_snapshot_tree) 488 return 0; 489 490 st = bkey_s_c_to_snapshot_tree(k); 491 root_id = le32_to_cpu(st.v->root_snapshot); 492 493 struct bkey_s_c_snapshot snapshot_k = 494 bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, 495 POS(0, root_id), 0, snapshot); 496 ret = bkey_err(snapshot_k); 497 if (ret && !bch2_err_matches(ret, ENOENT)) 498 goto err; 499 500 if (!ret) 501 bkey_val_copy(&s, snapshot_k); 502 503 if (fsck_err_on(ret || 504 root_id != bch2_snapshot_root(c, root_id) || 505 st.k->p.offset != le32_to_cpu(s.tree), 506 trans, snapshot_tree_to_missing_snapshot, 507 "snapshot tree points to missing/incorrect snapshot:\n%s", 508 (bch2_bkey_val_to_text(&buf, c, st.s_c), 509 prt_newline(&buf), 510 ret 511 ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) 512 : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), 513 buf.buf))) { 514 ret = bch2_btree_delete_at(trans, iter, 0); 515 goto err; 516 } 517 518 if (!st.v->master_subvol) 519 goto out; 520 521 ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); 522 if (ret && !bch2_err_matches(ret, ENOENT)) 523 goto err; 524 525 if (fsck_err_on(ret, 526 trans, snapshot_tree_to_missing_subvol, 527 "snapshot tree points to missing subvolume:\n%s", 528 (printbuf_reset(&buf), 529 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 530 fsck_err_on(!bch2_snapshot_is_ancestor(c, 531 le32_to_cpu(subvol.snapshot), 532 root_id), 533 trans, snapshot_tree_to_wrong_subvol, 534 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s", 535 (printbuf_reset(&buf), 536 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 537 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), 538 trans, snapshot_tree_to_snapshot_subvol, 539 "snapshot tree points to snapshot subvolume:\n%s", 540 (printbuf_reset(&buf), 541 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 542 struct bkey_i_snapshot_tree *u; 543 u32 subvol_id; 544 545 ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); 546 bch_err_fn(c, ret); 547 548 if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */ 549 ret = 0; 550 goto err; 551 } 552 553 if (ret) 554 goto err; 555 556 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); 557 ret = PTR_ERR_OR_ZERO(u); 558 if (ret) 559 goto err; 560 561 u->v.master_subvol = cpu_to_le32(subvol_id); 562 st = snapshot_tree_i_to_s_c(u); 563 } 564 out: 565 err: 566 fsck_err: 567 bch2_trans_iter_exit(trans, &snapshot_iter); 568 printbuf_exit(&buf); 569 return ret; 570 } 571 572 /* 573 * For each snapshot_tree, make sure it points to the root of a snapshot tree 574 * and that snapshot entry points back to it, or delete it. 575 * 576 * And, make sure it points to a subvolume within that snapshot tree, or correct 577 * it to point to the oldest subvolume within that snapshot tree. 578 */ 579 int bch2_check_snapshot_trees(struct bch_fs *c) 580 { 581 int ret = bch2_trans_run(c, 582 for_each_btree_key_commit(trans, iter, 583 BTREE_ID_snapshot_trees, POS_MIN, 584 BTREE_ITER_prefetch, k, 585 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 586 check_snapshot_tree(trans, &iter, k))); 587 bch_err_fn(c, ret); 588 return ret; 589 } 590 591 /* 592 * Look up snapshot tree for @tree_id and find root, 593 * make sure @snap_id is a descendent: 594 */ 595 static int snapshot_tree_ptr_good(struct btree_trans *trans, 596 u32 snap_id, u32 tree_id) 597 { 598 struct bch_snapshot_tree s_t; 599 int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 600 601 if (bch2_err_matches(ret, ENOENT)) 602 return 0; 603 if (ret) 604 return ret; 605 606 return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); 607 } 608 609 u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) 610 { 611 if (!id) 612 return 0; 613 614 guard(rcu)(); 615 const struct snapshot_t *s = snapshot_t(c, id); 616 return s->parent 617 ? bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)) 618 : id; 619 } 620 621 static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) 622 { 623 unsigned i; 624 625 for (i = 0; i < 3; i++) 626 if (!s.parent) { 627 if (s.skip[i]) 628 return false; 629 } else { 630 if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) 631 return false; 632 } 633 634 return true; 635 } 636 637 /* 638 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure 639 * its snapshot_tree pointer is correct (allocate new one if necessary), then 640 * update this node's pointer to root node's pointer: 641 */ 642 static int snapshot_tree_ptr_repair(struct btree_trans *trans, 643 struct btree_iter *iter, 644 struct bkey_s_c k, 645 struct bch_snapshot *s) 646 { 647 struct bch_fs *c = trans->c; 648 struct btree_iter root_iter; 649 struct bch_snapshot_tree s_t; 650 struct bkey_s_c_snapshot root; 651 struct bkey_i_snapshot *u; 652 u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; 653 int ret; 654 655 root = bch2_bkey_get_iter_typed(trans, &root_iter, 656 BTREE_ID_snapshots, POS(0, root_id), 657 BTREE_ITER_with_updates, snapshot); 658 ret = bkey_err(root); 659 if (ret) 660 goto err; 661 662 tree_id = le32_to_cpu(root.v->tree); 663 664 ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 665 if (ret && !bch2_err_matches(ret, ENOENT)) 666 return ret; 667 668 if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { 669 u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); 670 ret = PTR_ERR_OR_ZERO(u) ?: 671 bch2_snapshot_tree_create(trans, root_id, 672 bch2_snapshot_oldest_subvol(c, root_id, NULL), 673 &tree_id); 674 if (ret) 675 goto err; 676 677 u->v.tree = cpu_to_le32(tree_id); 678 if (k.k->p.offset == root_id) 679 *s = u->v; 680 } 681 682 if (k.k->p.offset != root_id) { 683 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 684 ret = PTR_ERR_OR_ZERO(u); 685 if (ret) 686 goto err; 687 688 u->v.tree = cpu_to_le32(tree_id); 689 *s = u->v; 690 } 691 err: 692 bch2_trans_iter_exit(trans, &root_iter); 693 return ret; 694 } 695 696 static int check_snapshot(struct btree_trans *trans, 697 struct btree_iter *iter, 698 struct bkey_s_c k) 699 { 700 struct bch_fs *c = trans->c; 701 struct bch_snapshot s; 702 struct bch_subvolume subvol; 703 struct bch_snapshot v; 704 struct bkey_i_snapshot *u; 705 u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); 706 u32 real_depth; 707 struct printbuf buf = PRINTBUF; 708 u32 i, id; 709 int ret = 0; 710 711 if (k.k->type != KEY_TYPE_snapshot) 712 return 0; 713 714 memset(&s, 0, sizeof(s)); 715 memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); 716 717 if (BCH_SNAPSHOT_DELETED(&s)) 718 return 0; 719 720 id = le32_to_cpu(s.parent); 721 if (id) { 722 ret = bch2_snapshot_lookup(trans, id, &v); 723 if (bch2_err_matches(ret, ENOENT)) 724 bch_err(c, "snapshot with nonexistent parent:\n %s", 725 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 726 if (ret) 727 goto err; 728 729 if (le32_to_cpu(v.children[0]) != k.k->p.offset && 730 le32_to_cpu(v.children[1]) != k.k->p.offset) { 731 bch_err(c, "snapshot parent %u missing pointer to child %llu", 732 id, k.k->p.offset); 733 ret = -EINVAL; 734 goto err; 735 } 736 } 737 738 for (i = 0; i < 2 && s.children[i]; i++) { 739 id = le32_to_cpu(s.children[i]); 740 741 ret = bch2_snapshot_lookup(trans, id, &v); 742 if (bch2_err_matches(ret, ENOENT)) 743 bch_err(c, "snapshot node %llu has nonexistent child %u", 744 k.k->p.offset, id); 745 if (ret) 746 goto err; 747 748 if (le32_to_cpu(v.parent) != k.k->p.offset) { 749 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", 750 id, le32_to_cpu(v.parent), k.k->p.offset); 751 ret = -EINVAL; 752 goto err; 753 } 754 } 755 756 bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && 757 !BCH_SNAPSHOT_WILL_DELETE(&s); 758 759 if (should_have_subvol) { 760 id = le32_to_cpu(s.subvol); 761 ret = bch2_subvolume_get(trans, id, false, &subvol); 762 if (bch2_err_matches(ret, ENOENT)) 763 bch_err(c, "snapshot points to nonexistent subvolume:\n %s", 764 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 765 if (ret) 766 goto err; 767 768 if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { 769 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", 770 k.k->p.offset); 771 ret = -EINVAL; 772 goto err; 773 } 774 } else { 775 if (fsck_err_on(s.subvol, 776 trans, snapshot_should_not_have_subvol, 777 "snapshot should not point to subvol:\n%s", 778 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 779 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 780 ret = PTR_ERR_OR_ZERO(u); 781 if (ret) 782 goto err; 783 784 u->v.subvol = 0; 785 s = u->v; 786 } 787 } 788 789 ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); 790 if (ret < 0) 791 goto err; 792 793 if (fsck_err_on(!ret, 794 trans, snapshot_to_bad_snapshot_tree, 795 "snapshot points to missing/incorrect tree:\n%s", 796 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 797 ret = snapshot_tree_ptr_repair(trans, iter, k, &s); 798 if (ret) 799 goto err; 800 } 801 ret = 0; 802 803 real_depth = bch2_snapshot_depth(c, parent_id); 804 805 if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, 806 trans, snapshot_bad_depth, 807 "snapshot with incorrect depth field, should be %u:\n%s", 808 real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 809 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 810 ret = PTR_ERR_OR_ZERO(u); 811 if (ret) 812 goto err; 813 814 u->v.depth = cpu_to_le32(real_depth); 815 s = u->v; 816 } 817 818 ret = snapshot_skiplist_good(trans, k.k->p.offset, s); 819 if (ret < 0) 820 goto err; 821 822 if (fsck_err_on(!ret, 823 trans, snapshot_bad_skiplist, 824 "snapshot with bad skiplist field:\n%s", 825 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 826 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 827 ret = PTR_ERR_OR_ZERO(u); 828 if (ret) 829 goto err; 830 831 for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) 832 u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); 833 834 bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); 835 s = u->v; 836 } 837 ret = 0; 838 err: 839 fsck_err: 840 printbuf_exit(&buf); 841 return ret; 842 } 843 844 int bch2_check_snapshots(struct bch_fs *c) 845 { 846 /* 847 * We iterate backwards as checking/fixing the depth field requires that 848 * the parent's depth already be correct: 849 */ 850 int ret = bch2_trans_run(c, 851 for_each_btree_key_reverse_commit(trans, iter, 852 BTREE_ID_snapshots, POS_MAX, 853 BTREE_ITER_prefetch, k, 854 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 855 check_snapshot(trans, &iter, k))); 856 bch_err_fn(c, ret); 857 return ret; 858 } 859 860 static int check_snapshot_exists(struct btree_trans *trans, u32 id) 861 { 862 struct bch_fs *c = trans->c; 863 864 /* Do we need to reconstruct the snapshot_tree entry as well? */ 865 struct btree_iter iter; 866 struct bkey_s_c k; 867 int ret = 0; 868 u32 tree_id = 0; 869 870 for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, 871 0, k, ret) { 872 if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { 873 tree_id = k.k->p.offset; 874 break; 875 } 876 } 877 bch2_trans_iter_exit(trans, &iter); 878 879 if (ret) 880 return ret; 881 882 if (!tree_id) { 883 ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); 884 if (ret) 885 return ret; 886 } 887 888 struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); 889 ret = PTR_ERR_OR_ZERO(snapshot); 890 if (ret) 891 return ret; 892 893 bkey_snapshot_init(&snapshot->k_i); 894 snapshot->k.p = POS(0, id); 895 snapshot->v.tree = cpu_to_le32(tree_id); 896 snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 897 898 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 899 0, k, ret) { 900 if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { 901 snapshot->v.subvol = cpu_to_le32(k.k->p.offset); 902 SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); 903 break; 904 } 905 } 906 bch2_trans_iter_exit(trans, &iter); 907 908 return bch2_snapshot_table_make_room(c, id) ?: 909 bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0); 910 } 911 912 /* Figure out which snapshot nodes belong in the same tree: */ 913 struct snapshot_tree_reconstruct { 914 enum btree_id btree; 915 struct bpos cur_pos; 916 snapshot_id_list cur_ids; 917 DARRAY(snapshot_id_list) trees; 918 }; 919 920 static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) 921 { 922 darray_for_each(r->trees, i) 923 darray_exit(i); 924 darray_exit(&r->trees); 925 darray_exit(&r->cur_ids); 926 } 927 928 static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) 929 { 930 return r->btree == BTREE_ID_inodes 931 ? r->cur_pos.offset == pos.offset 932 : r->cur_pos.inode == pos.inode; 933 } 934 935 static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) 936 { 937 return darray_find_p(*l, i, snapshot_list_has_id(r, *i)) != NULL; 938 } 939 940 static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) 941 { 942 bool first = true; 943 darray_for_each(*s, i) { 944 if (!first) 945 prt_char(out, ' '); 946 first = false; 947 prt_printf(out, "%u", *i); 948 } 949 } 950 951 static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) 952 { 953 if (r->cur_ids.nr) { 954 darray_for_each(r->trees, i) 955 if (snapshot_id_lists_have_common(i, &r->cur_ids)) { 956 int ret = snapshot_list_merge(c, i, &r->cur_ids); 957 if (ret) 958 return ret; 959 goto out; 960 } 961 darray_push(&r->trees, r->cur_ids); 962 darray_init(&r->cur_ids); 963 } 964 out: 965 r->cur_ids.nr = 0; 966 return 0; 967 } 968 969 static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) 970 { 971 if (!same_snapshot(r, pos)) 972 snapshot_tree_reconstruct_next(c, r); 973 r->cur_pos = pos; 974 return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); 975 } 976 977 int bch2_reconstruct_snapshots(struct bch_fs *c) 978 { 979 struct btree_trans *trans = bch2_trans_get(c); 980 struct printbuf buf = PRINTBUF; 981 struct snapshot_tree_reconstruct r = {}; 982 int ret = 0; 983 984 for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { 985 if (btree_type_has_snapshots(btree)) { 986 r.btree = btree; 987 988 ret = for_each_btree_key(trans, iter, btree, POS_MIN, 989 BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ 990 get_snapshot_trees(c, &r, k.k->p); 991 })); 992 if (ret) 993 goto err; 994 995 snapshot_tree_reconstruct_next(c, &r); 996 } 997 } 998 999 darray_for_each(r.trees, t) { 1000 printbuf_reset(&buf); 1001 snapshot_id_list_to_text(&buf, t); 1002 1003 darray_for_each(*t, id) { 1004 if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty, 1005 trans, snapshot_node_missing, 1006 "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { 1007 if (t->nr > 1) { 1008 bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); 1009 ret = bch_err_throw(c, fsck_repair_unimplemented); 1010 goto err; 1011 } 1012 1013 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1014 check_snapshot_exists(trans, *id)); 1015 if (ret) 1016 goto err; 1017 } 1018 } 1019 } 1020 fsck_err: 1021 err: 1022 bch2_trans_put(trans); 1023 snapshot_tree_reconstruct_exit(&r); 1024 printbuf_exit(&buf); 1025 bch_err_fn(c, ret); 1026 return ret; 1027 } 1028 1029 int __bch2_check_key_has_snapshot(struct btree_trans *trans, 1030 struct btree_iter *iter, 1031 struct bkey_s_c k) 1032 { 1033 struct bch_fs *c = trans->c; 1034 struct printbuf buf = PRINTBUF; 1035 int ret = 0; 1036 enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); 1037 1038 /* Snapshot was definitively deleted, this error is marked autofix */ 1039 if (fsck_err_on(state == SNAPSHOT_ID_deleted, 1040 trans, bkey_in_deleted_snapshot, 1041 "key in deleted snapshot %s, delete?", 1042 (bch2_btree_id_to_text(&buf, iter->btree_id), 1043 prt_char(&buf, ' '), 1044 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1045 ret = bch2_btree_delete_at(trans, iter, 1046 BTREE_UPDATE_internal_snapshot_node) ?: 1; 1047 1048 if (state == SNAPSHOT_ID_empty) { 1049 /* 1050 * Snapshot missing: we should have caught this with btree_lost_data and 1051 * kicked off reconstruct_snapshots, so if we end up here we have no 1052 * idea what happened. 1053 * 1054 * Do not delete unless we know that subvolumes and snapshots 1055 * are consistent: 1056 * 1057 * XXX: 1058 * 1059 * We could be smarter here, and instead of using the generic 1060 * recovery pass ratelimiting, track if there have been any 1061 * changes to the snapshots or inodes btrees since those passes 1062 * last ran. 1063 */ 1064 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret; 1065 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret; 1066 1067 if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)) 1068 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; 1069 1070 unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0); 1071 1072 if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot, 1073 "key in missing snapshot %s, delete?", 1074 (bch2_btree_id_to_text(&buf, iter->btree_id), 1075 prt_char(&buf, ' '), 1076 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1077 ret = bch2_btree_delete_at(trans, iter, 1078 BTREE_UPDATE_internal_snapshot_node) ?: 1; 1079 } 1080 } 1081 fsck_err: 1082 printbuf_exit(&buf); 1083 return ret; 1084 } 1085 1086 int __bch2_get_snapshot_overwrites(struct btree_trans *trans, 1087 enum btree_id btree, struct bpos pos, 1088 snapshot_id_list *s) 1089 { 1090 struct bch_fs *c = trans->c; 1091 struct btree_iter iter; 1092 struct bkey_s_c k; 1093 int ret = 0; 1094 1095 for_each_btree_key_reverse_norestart(trans, iter, btree, bpos_predecessor(pos), 1096 BTREE_ITER_all_snapshots, k, ret) { 1097 if (!bkey_eq(k.k->p, pos)) 1098 break; 1099 1100 if (!bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot) || 1101 snapshot_list_has_ancestor(c, s, k.k->p.snapshot)) 1102 continue; 1103 1104 ret = snapshot_list_add(c, s, k.k->p.snapshot); 1105 if (ret) 1106 break; 1107 } 1108 bch2_trans_iter_exit(trans, &iter); 1109 if (ret) 1110 darray_exit(s); 1111 1112 return ret; 1113 } 1114 1115 /* 1116 * Mark a snapshot as deleted, for future cleanup: 1117 */ 1118 int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) 1119 { 1120 struct btree_iter iter; 1121 struct bkey_i_snapshot *s = 1122 bch2_bkey_get_mut_typed(trans, &iter, 1123 BTREE_ID_snapshots, POS(0, id), 1124 0, snapshot); 1125 int ret = PTR_ERR_OR_ZERO(s); 1126 if (unlikely(ret)) { 1127 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), 1128 trans->c, "missing snapshot %u", id); 1129 return ret; 1130 } 1131 1132 /* already deleted? */ 1133 if (BCH_SNAPSHOT_WILL_DELETE(&s->v)) 1134 goto err; 1135 1136 SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true); 1137 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); 1138 s->v.subvol = 0; 1139 err: 1140 bch2_trans_iter_exit(trans, &iter); 1141 return ret; 1142 } 1143 1144 static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) 1145 { 1146 if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) 1147 swap(s->children[0], s->children[1]); 1148 } 1149 1150 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) 1151 { 1152 struct bch_fs *c = trans->c; 1153 struct btree_iter iter, p_iter = {}; 1154 struct btree_iter c_iter = {}; 1155 struct btree_iter tree_iter = {}; 1156 u32 parent_id, child_id; 1157 unsigned i; 1158 int ret = 0; 1159 1160 struct bkey_i_snapshot *s = 1161 bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), 1162 BTREE_ITER_intent, snapshot); 1163 ret = PTR_ERR_OR_ZERO(s); 1164 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1165 "missing snapshot %u", id); 1166 1167 if (ret) 1168 goto err; 1169 1170 BUG_ON(BCH_SNAPSHOT_DELETED(&s->v)); 1171 BUG_ON(s->v.children[1]); 1172 1173 parent_id = le32_to_cpu(s->v.parent); 1174 child_id = le32_to_cpu(s->v.children[0]); 1175 1176 if (parent_id) { 1177 struct bkey_i_snapshot *parent; 1178 1179 parent = bch2_bkey_get_mut_typed(trans, &p_iter, 1180 BTREE_ID_snapshots, POS(0, parent_id), 1181 0, snapshot); 1182 ret = PTR_ERR_OR_ZERO(parent); 1183 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1184 "missing snapshot %u", parent_id); 1185 if (unlikely(ret)) 1186 goto err; 1187 1188 /* find entry in parent->children for node being deleted */ 1189 for (i = 0; i < 2; i++) 1190 if (le32_to_cpu(parent->v.children[i]) == id) 1191 break; 1192 1193 if (bch2_fs_inconsistent_on(i == 2, c, 1194 "snapshot %u missing child pointer to %u", 1195 parent_id, id)) 1196 goto err; 1197 1198 parent->v.children[i] = cpu_to_le32(child_id); 1199 1200 normalize_snapshot_child_pointers(&parent->v); 1201 } 1202 1203 if (child_id) { 1204 struct bkey_i_snapshot *child; 1205 1206 child = bch2_bkey_get_mut_typed(trans, &c_iter, 1207 BTREE_ID_snapshots, POS(0, child_id), 1208 0, snapshot); 1209 ret = PTR_ERR_OR_ZERO(child); 1210 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1211 "missing snapshot %u", child_id); 1212 if (unlikely(ret)) 1213 goto err; 1214 1215 child->v.parent = cpu_to_le32(parent_id); 1216 1217 if (!child->v.parent) { 1218 child->v.skip[0] = 0; 1219 child->v.skip[1] = 0; 1220 child->v.skip[2] = 0; 1221 } 1222 } 1223 1224 if (!parent_id) { 1225 /* 1226 * We're deleting the root of a snapshot tree: update the 1227 * snapshot_tree entry to point to the new root, or delete it if 1228 * this is the last snapshot ID in this tree: 1229 */ 1230 struct bkey_i_snapshot_tree *s_t; 1231 1232 BUG_ON(s->v.children[1]); 1233 1234 s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, 1235 BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), 1236 0, snapshot_tree); 1237 ret = PTR_ERR_OR_ZERO(s_t); 1238 if (ret) 1239 goto err; 1240 1241 if (s->v.children[0]) { 1242 s_t->v.root_snapshot = s->v.children[0]; 1243 } else { 1244 s_t->k.type = KEY_TYPE_deleted; 1245 set_bkey_val_u64s(&s_t->k, 0); 1246 } 1247 } 1248 1249 if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) { 1250 SET_BCH_SNAPSHOT_DELETED(&s->v, true); 1251 s->v.parent = 0; 1252 s->v.children[0] = 0; 1253 s->v.children[1] = 0; 1254 s->v.subvol = 0; 1255 s->v.tree = 0; 1256 s->v.depth = 0; 1257 s->v.skip[0] = 0; 1258 s->v.skip[1] = 0; 1259 s->v.skip[2] = 0; 1260 } else { 1261 s->k.type = KEY_TYPE_deleted; 1262 set_bkey_val_u64s(&s->k, 0); 1263 } 1264 err: 1265 bch2_trans_iter_exit(trans, &tree_iter); 1266 bch2_trans_iter_exit(trans, &p_iter); 1267 bch2_trans_iter_exit(trans, &c_iter); 1268 bch2_trans_iter_exit(trans, &iter); 1269 return ret; 1270 } 1271 1272 static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, 1273 u32 *new_snapids, 1274 u32 *snapshot_subvols, 1275 unsigned nr_snapids) 1276 { 1277 struct bch_fs *c = trans->c; 1278 struct btree_iter iter; 1279 struct bkey_i_snapshot *n; 1280 struct bkey_s_c k; 1281 unsigned i, j; 1282 u32 depth = bch2_snapshot_depth(c, parent); 1283 int ret; 1284 1285 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, 1286 POS_MIN, BTREE_ITER_intent); 1287 k = bch2_btree_iter_peek(trans, &iter); 1288 ret = bkey_err(k); 1289 if (ret) 1290 goto err; 1291 1292 for (i = 0; i < nr_snapids; i++) { 1293 k = bch2_btree_iter_prev_slot(trans, &iter); 1294 ret = bkey_err(k); 1295 if (ret) 1296 goto err; 1297 1298 if (!k.k || !k.k->p.offset) { 1299 ret = bch_err_throw(c, ENOSPC_snapshot_create); 1300 goto err; 1301 } 1302 1303 n = bch2_bkey_alloc(trans, &iter, 0, snapshot); 1304 ret = PTR_ERR_OR_ZERO(n); 1305 if (ret) 1306 goto err; 1307 1308 n->v.flags = 0; 1309 n->v.parent = cpu_to_le32(parent); 1310 n->v.subvol = cpu_to_le32(snapshot_subvols[i]); 1311 n->v.tree = cpu_to_le32(tree); 1312 n->v.depth = cpu_to_le32(depth); 1313 n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 1314 n->v.btime.hi = 0; 1315 1316 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) 1317 n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); 1318 1319 bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); 1320 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); 1321 1322 ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, 1323 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); 1324 if (ret) 1325 goto err; 1326 1327 new_snapids[i] = iter.pos.offset; 1328 } 1329 err: 1330 bch2_trans_iter_exit(trans, &iter); 1331 return ret; 1332 } 1333 1334 /* 1335 * Create new snapshot IDs as children of an existing snapshot ID: 1336 */ 1337 static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, 1338 u32 *new_snapids, 1339 u32 *snapshot_subvols, 1340 unsigned nr_snapids) 1341 { 1342 struct btree_iter iter; 1343 struct bkey_i_snapshot *n_parent; 1344 int ret = 0; 1345 1346 n_parent = bch2_bkey_get_mut_typed(trans, &iter, 1347 BTREE_ID_snapshots, POS(0, parent), 1348 0, snapshot); 1349 ret = PTR_ERR_OR_ZERO(n_parent); 1350 if (unlikely(ret)) { 1351 if (bch2_err_matches(ret, ENOENT)) 1352 bch_err(trans->c, "snapshot %u not found", parent); 1353 return ret; 1354 } 1355 1356 if (n_parent->v.children[0] || n_parent->v.children[1]) { 1357 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); 1358 ret = -EINVAL; 1359 goto err; 1360 } 1361 1362 ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), 1363 new_snapids, snapshot_subvols, nr_snapids); 1364 if (ret) 1365 goto err; 1366 1367 n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); 1368 n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); 1369 n_parent->v.subvol = 0; 1370 SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); 1371 err: 1372 bch2_trans_iter_exit(trans, &iter); 1373 return ret; 1374 } 1375 1376 /* 1377 * Create a snapshot node that is the root of a new tree: 1378 */ 1379 static int bch2_snapshot_node_create_tree(struct btree_trans *trans, 1380 u32 *new_snapids, 1381 u32 *snapshot_subvols, 1382 unsigned nr_snapids) 1383 { 1384 struct bkey_i_snapshot_tree *n_tree; 1385 int ret; 1386 1387 n_tree = __bch2_snapshot_tree_create(trans); 1388 ret = PTR_ERR_OR_ZERO(n_tree) ?: 1389 create_snapids(trans, 0, n_tree->k.p.offset, 1390 new_snapids, snapshot_subvols, nr_snapids); 1391 if (ret) 1392 return ret; 1393 1394 n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); 1395 n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); 1396 return 0; 1397 } 1398 1399 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, 1400 u32 *new_snapids, 1401 u32 *snapshot_subvols, 1402 unsigned nr_snapids) 1403 { 1404 BUG_ON((parent == 0) != (nr_snapids == 1)); 1405 BUG_ON((parent != 0) != (nr_snapids == 2)); 1406 1407 return parent 1408 ? bch2_snapshot_node_create_children(trans, parent, 1409 new_snapids, snapshot_subvols, nr_snapids) 1410 : bch2_snapshot_node_create_tree(trans, 1411 new_snapids, snapshot_subvols, nr_snapids); 1412 1413 } 1414 1415 /* 1416 * If we have an unlinked inode in an internal snapshot node, and the inode 1417 * really has been deleted in all child snapshots, how does this get cleaned up? 1418 * 1419 * first there is the problem of how keys that have been overwritten in all 1420 * child snapshots get deleted (unimplemented?), but inodes may perhaps be 1421 * special? 1422 * 1423 * also: unlinked inode in internal snapshot appears to not be getting deleted 1424 * correctly if inode doesn't exist in leaf snapshots 1425 * 1426 * solution: 1427 * 1428 * for a key in an interior snapshot node that needs work to be done that 1429 * requires it to be mutated: iterate over all descendent leaf nodes and copy 1430 * that key to snapshot leaf nodes, where we can mutate it 1431 */ 1432 1433 static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) 1434 { 1435 struct snapshot_interior_delete *i = darray_find_p(*l, i, i->id == id); 1436 return i ? i->live_child : 0; 1437 } 1438 1439 static unsigned __live_child(struct snapshot_table *t, u32 id, 1440 snapshot_id_list *delete_leaves, 1441 interior_delete_list *delete_interior) 1442 { 1443 struct snapshot_t *s = __snapshot_t(t, id); 1444 if (!s) 1445 return 0; 1446 1447 for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) 1448 if (s->children[i] && 1449 !snapshot_list_has_id(delete_leaves, s->children[i]) && 1450 !interior_delete_has_id(delete_interior, s->children[i])) 1451 return s->children[i]; 1452 1453 for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { 1454 u32 live_child = s->children[i] 1455 ? __live_child(t, s->children[i], delete_leaves, delete_interior) 1456 : 0; 1457 if (live_child) 1458 return live_child; 1459 } 1460 1461 return 0; 1462 } 1463 1464 static unsigned live_child(struct bch_fs *c, u32 id) 1465 { 1466 struct snapshot_delete *d = &c->snapshot_delete; 1467 1468 guard(rcu)(); 1469 return __live_child(rcu_dereference(c->snapshots), id, 1470 &d->delete_leaves, &d->delete_interior); 1471 } 1472 1473 static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) 1474 { 1475 return snapshot_list_has_id(&d->delete_leaves, id) || 1476 interior_delete_has_id(&d->delete_interior, id) != 0; 1477 } 1478 1479 static int delete_dead_snapshots_process_key(struct btree_trans *trans, 1480 struct btree_iter *iter, 1481 struct bkey_s_c k) 1482 { 1483 struct snapshot_delete *d = &trans->c->snapshot_delete; 1484 1485 if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot)) 1486 return bch2_btree_delete_at(trans, iter, 1487 BTREE_UPDATE_internal_snapshot_node); 1488 1489 u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot); 1490 if (live_child) { 1491 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 1492 int ret = PTR_ERR_OR_ZERO(new); 1493 if (ret) 1494 return ret; 1495 1496 new->k.p.snapshot = live_child; 1497 1498 struct btree_iter dst_iter; 1499 struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, 1500 iter->btree_id, new->k.p, 1501 BTREE_ITER_all_snapshots| 1502 BTREE_ITER_intent); 1503 ret = bkey_err(dst_k); 1504 if (ret) 1505 return ret; 1506 1507 ret = (bkey_deleted(dst_k.k) 1508 ? bch2_trans_update(trans, &dst_iter, new, 1509 BTREE_UPDATE_internal_snapshot_node) 1510 : 0) ?: 1511 bch2_btree_delete_at(trans, iter, 1512 BTREE_UPDATE_internal_snapshot_node); 1513 bch2_trans_iter_exit(trans, &dst_iter); 1514 return ret; 1515 } 1516 1517 return 0; 1518 } 1519 1520 static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum) 1521 { 1522 struct bch_fs *c = trans->c; 1523 struct snapshot_delete *d = &c->snapshot_delete; 1524 1525 u64 inum = iter->btree_id != BTREE_ID_inodes 1526 ? iter->pos.inode 1527 : iter->pos.offset; 1528 1529 if (*prev_inum == inum) 1530 return false; 1531 1532 *prev_inum = inum; 1533 1534 bool ret = !snapshot_list_has_id(&d->deleting_from_trees, 1535 bch2_snapshot_tree(c, iter->pos.snapshot)); 1536 if (unlikely(ret)) { 1537 struct bpos pos = iter->pos; 1538 pos.snapshot = 0; 1539 if (iter->btree_id != BTREE_ID_inodes) 1540 pos.offset = U64_MAX; 1541 bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos)); 1542 } 1543 1544 return ret; 1545 } 1546 1547 static int delete_dead_snapshot_keys_v1(struct btree_trans *trans) 1548 { 1549 struct bch_fs *c = trans->c; 1550 struct snapshot_delete *d = &c->snapshot_delete; 1551 1552 for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) { 1553 struct disk_reservation res = { 0 }; 1554 u64 prev_inum = 0; 1555 1556 d->pos.pos = POS_MIN; 1557 1558 if (!btree_type_has_snapshots(d->pos.btree)) 1559 continue; 1560 1561 int ret = for_each_btree_key_commit(trans, iter, 1562 d->pos.btree, POS_MIN, 1563 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1564 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1565 d->pos.pos = iter.pos; 1566 1567 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1568 continue; 1569 1570 delete_dead_snapshots_process_key(trans, &iter, k); 1571 })); 1572 1573 bch2_disk_reservation_put(c, &res); 1574 1575 if (ret) 1576 return ret; 1577 } 1578 1579 return 0; 1580 } 1581 1582 static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree, 1583 struct bpos start, struct bpos end) 1584 { 1585 struct bch_fs *c = trans->c; 1586 struct snapshot_delete *d = &c->snapshot_delete; 1587 struct disk_reservation res = { 0 }; 1588 1589 d->pos.btree = btree; 1590 d->pos.pos = POS_MIN; 1591 1592 int ret = for_each_btree_key_max_commit(trans, iter, 1593 btree, start, end, 1594 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1595 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1596 d->pos.pos = iter.pos; 1597 delete_dead_snapshots_process_key(trans, &iter, k); 1598 })); 1599 1600 bch2_disk_reservation_put(c, &res); 1601 return ret; 1602 } 1603 1604 static int delete_dead_snapshot_keys_v2(struct btree_trans *trans) 1605 { 1606 struct bch_fs *c = trans->c; 1607 struct snapshot_delete *d = &c->snapshot_delete; 1608 struct disk_reservation res = { 0 }; 1609 u64 prev_inum = 0; 1610 int ret = 0; 1611 1612 struct btree_iter iter; 1613 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN, 1614 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); 1615 1616 while (1) { 1617 struct bkey_s_c k; 1618 ret = lockrestart_do(trans, 1619 bkey_err(k = bch2_btree_iter_peek(trans, &iter))); 1620 if (ret) 1621 break; 1622 1623 if (!k.k) 1624 break; 1625 1626 d->pos.btree = iter.btree_id; 1627 d->pos.pos = iter.pos; 1628 1629 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1630 continue; 1631 1632 if (snapshot_id_dying(d, k.k->p.snapshot)) { 1633 struct bpos start = POS(k.k->p.offset, 0); 1634 struct bpos end = POS(k.k->p.offset, U64_MAX); 1635 1636 ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?: 1637 delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?: 1638 delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end); 1639 if (ret) 1640 break; 1641 1642 bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1)); 1643 } else { 1644 bch2_btree_iter_advance(trans, &iter); 1645 } 1646 } 1647 bch2_trans_iter_exit(trans, &iter); 1648 1649 if (ret) 1650 goto err; 1651 1652 prev_inum = 0; 1653 ret = for_each_btree_key_commit(trans, iter, 1654 BTREE_ID_inodes, POS_MIN, 1655 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1656 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1657 d->pos.btree = iter.btree_id; 1658 d->pos.pos = iter.pos; 1659 1660 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1661 continue; 1662 1663 delete_dead_snapshots_process_key(trans, &iter, k); 1664 })); 1665 err: 1666 bch2_disk_reservation_put(c, &res); 1667 return ret; 1668 } 1669 1670 /* 1671 * For a given snapshot, if it doesn't have a subvolume that points to it, and 1672 * it doesn't have child snapshot nodes - it's now redundant and we can mark it 1673 * as deleted. 1674 */ 1675 static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k) 1676 { 1677 if (k.k->type != KEY_TYPE_snapshot) 1678 return 0; 1679 1680 struct bch_fs *c = trans->c; 1681 struct snapshot_delete *d = &c->snapshot_delete; 1682 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 1683 unsigned live_children = 0; 1684 int ret = 0; 1685 1686 if (BCH_SNAPSHOT_SUBVOL(s.v)) 1687 return 0; 1688 1689 if (BCH_SNAPSHOT_DELETED(s.v)) 1690 return 0; 1691 1692 mutex_lock(&d->progress_lock); 1693 for (unsigned i = 0; i < 2; i++) { 1694 u32 child = le32_to_cpu(s.v->children[i]); 1695 1696 live_children += child && 1697 !snapshot_list_has_id(&d->delete_leaves, child); 1698 } 1699 1700 u32 tree = bch2_snapshot_tree(c, s.k->p.offset); 1701 1702 if (live_children == 0) { 1703 ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: 1704 snapshot_list_add(c, &d->delete_leaves, s.k->p.offset); 1705 } else if (live_children == 1) { 1706 struct snapshot_interior_delete n = { 1707 .id = s.k->p.offset, 1708 .live_child = live_child(c, s.k->p.offset), 1709 }; 1710 1711 if (!n.live_child) { 1712 bch_err(c, "error finding live child of snapshot %u", n.id); 1713 ret = -EINVAL; 1714 } else { 1715 ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: 1716 darray_push(&d->delete_interior, n); 1717 } 1718 } 1719 mutex_unlock(&d->progress_lock); 1720 1721 return ret; 1722 } 1723 1724 static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, 1725 interior_delete_list *skip) 1726 { 1727 guard(rcu)(); 1728 while (interior_delete_has_id(skip, id)) 1729 id = __bch2_snapshot_parent(c, id); 1730 1731 while (n--) { 1732 do { 1733 id = __bch2_snapshot_parent(c, id); 1734 } while (interior_delete_has_id(skip, id)); 1735 } 1736 1737 return id; 1738 } 1739 1740 static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, 1741 struct btree_iter *iter, struct bkey_s_c k, 1742 interior_delete_list *deleted) 1743 { 1744 struct bch_fs *c = trans->c; 1745 u32 nr_deleted_ancestors = 0; 1746 struct bkey_i_snapshot *s; 1747 int ret; 1748 1749 if (!bch2_snapshot_exists(c, k.k->p.offset)) 1750 return 0; 1751 1752 if (k.k->type != KEY_TYPE_snapshot) 1753 return 0; 1754 1755 if (interior_delete_has_id(deleted, k.k->p.offset)) 1756 return 0; 1757 1758 s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); 1759 ret = PTR_ERR_OR_ZERO(s); 1760 if (ret) 1761 return ret; 1762 1763 darray_for_each(*deleted, i) 1764 nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); 1765 1766 if (!nr_deleted_ancestors) 1767 return 0; 1768 1769 le32_add_cpu(&s->v.depth, -nr_deleted_ancestors); 1770 1771 if (!s->v.depth) { 1772 s->v.skip[0] = 0; 1773 s->v.skip[1] = 0; 1774 s->v.skip[2] = 0; 1775 } else { 1776 u32 depth = le32_to_cpu(s->v.depth); 1777 u32 parent = bch2_snapshot_parent(c, s->k.p.offset); 1778 1779 for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { 1780 u32 id = le32_to_cpu(s->v.skip[j]); 1781 1782 if (interior_delete_has_id(deleted, id)) { 1783 id = bch2_snapshot_nth_parent_skip(c, 1784 parent, 1785 depth > 1 1786 ? get_random_u32_below(depth - 1) 1787 : 0, 1788 deleted); 1789 s->v.skip[j] = cpu_to_le32(id); 1790 } 1791 } 1792 1793 bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32); 1794 } 1795 1796 return bch2_trans_update(trans, iter, &s->k_i, 0); 1797 } 1798 1799 static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d) 1800 { 1801 prt_printf(out, "deleting from trees"); 1802 darray_for_each(d->deleting_from_trees, i) 1803 prt_printf(out, " %u", *i); 1804 1805 prt_printf(out, "deleting leaves"); 1806 darray_for_each(d->delete_leaves, i) 1807 prt_printf(out, " %u", *i); 1808 prt_newline(out); 1809 1810 prt_printf(out, "interior"); 1811 darray_for_each(d->delete_interior, i) 1812 prt_printf(out, " %u->%u", i->id, i->live_child); 1813 prt_newline(out); 1814 } 1815 1816 int __bch2_delete_dead_snapshots(struct bch_fs *c) 1817 { 1818 struct snapshot_delete *d = &c->snapshot_delete; 1819 int ret = 0; 1820 1821 if (!mutex_trylock(&d->lock)) 1822 return 0; 1823 1824 if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) 1825 goto out_unlock; 1826 1827 struct btree_trans *trans = bch2_trans_get(c); 1828 1829 /* 1830 * For every snapshot node: If we have no live children and it's not 1831 * pointed to by a subvolume, delete it: 1832 */ 1833 d->running = true; 1834 d->pos = BBPOS_MIN; 1835 1836 ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, 1837 check_should_delete_snapshot(trans, k)); 1838 if (!bch2_err_matches(ret, EROFS)) 1839 bch_err_msg(c, ret, "walking snapshots"); 1840 if (ret) 1841 goto err; 1842 1843 if (!d->delete_leaves.nr && !d->delete_interior.nr) 1844 goto err; 1845 1846 { 1847 struct printbuf buf = PRINTBUF; 1848 bch2_snapshot_delete_nodes_to_text(&buf, d); 1849 1850 ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); 1851 printbuf_exit(&buf); 1852 if (ret) 1853 goto err; 1854 } 1855 1856 ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2) 1857 ? delete_dead_snapshot_keys_v2(trans) 1858 : delete_dead_snapshot_keys_v1(trans); 1859 if (!bch2_err_matches(ret, EROFS)) 1860 bch_err_msg(c, ret, "deleting keys from dying snapshots"); 1861 if (ret) 1862 goto err; 1863 1864 darray_for_each(d->delete_leaves, i) { 1865 ret = commit_do(trans, NULL, NULL, 0, 1866 bch2_snapshot_node_delete(trans, *i)); 1867 if (!bch2_err_matches(ret, EROFS)) 1868 bch_err_msg(c, ret, "deleting snapshot %u", *i); 1869 if (ret) 1870 goto err; 1871 } 1872 1873 /* 1874 * Fixing children of deleted snapshots can't be done completely 1875 * atomically, if we crash between here and when we delete the interior 1876 * nodes some depth fields will be off: 1877 */ 1878 ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 1879 BTREE_ITER_intent, k, 1880 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1881 bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); 1882 if (ret) 1883 goto err; 1884 1885 darray_for_each(d->delete_interior, i) { 1886 ret = commit_do(trans, NULL, NULL, 0, 1887 bch2_snapshot_node_delete(trans, i->id)); 1888 if (!bch2_err_matches(ret, EROFS)) 1889 bch_err_msg(c, ret, "deleting snapshot %u", i->id); 1890 if (ret) 1891 goto err; 1892 } 1893 err: 1894 mutex_lock(&d->progress_lock); 1895 darray_exit(&d->deleting_from_trees); 1896 darray_exit(&d->delete_interior); 1897 darray_exit(&d->delete_leaves); 1898 d->running = false; 1899 mutex_unlock(&d->progress_lock); 1900 bch2_trans_put(trans); 1901 1902 bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots); 1903 out_unlock: 1904 mutex_unlock(&d->lock); 1905 if (!bch2_err_matches(ret, EROFS)) 1906 bch_err_fn(c, ret); 1907 return ret; 1908 } 1909 1910 int bch2_delete_dead_snapshots(struct bch_fs *c) 1911 { 1912 if (!c->opts.auto_snapshot_deletion) 1913 return 0; 1914 1915 return __bch2_delete_dead_snapshots(c); 1916 } 1917 1918 void bch2_delete_dead_snapshots_work(struct work_struct *work) 1919 { 1920 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work); 1921 1922 set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); 1923 1924 bch2_delete_dead_snapshots(c); 1925 enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); 1926 } 1927 1928 void bch2_delete_dead_snapshots_async(struct bch_fs *c) 1929 { 1930 if (!c->opts.auto_snapshot_deletion) 1931 return; 1932 1933 if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots)) 1934 return; 1935 1936 BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); 1937 1938 if (!queue_work(system_long_wq, &c->snapshot_delete.work)) 1939 enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); 1940 } 1941 1942 void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c) 1943 { 1944 struct snapshot_delete *d = &c->snapshot_delete; 1945 1946 if (!d->running) { 1947 prt_str(out, "(not running)"); 1948 return; 1949 } 1950 1951 mutex_lock(&d->progress_lock); 1952 bch2_snapshot_delete_nodes_to_text(out, d); 1953 1954 bch2_bbpos_to_text(out, d->pos); 1955 mutex_unlock(&d->progress_lock); 1956 } 1957 1958 int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, 1959 enum btree_id id, 1960 struct bpos pos) 1961 { 1962 struct bch_fs *c = trans->c; 1963 struct btree_iter iter; 1964 struct bkey_s_c k; 1965 int ret; 1966 1967 for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), 1968 BTREE_ITER_not_extents| 1969 BTREE_ITER_all_snapshots, 1970 k, ret) { 1971 if (!bkey_eq(pos, k.k->p)) 1972 break; 1973 1974 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { 1975 ret = 1; 1976 break; 1977 } 1978 } 1979 bch2_trans_iter_exit(trans, &iter); 1980 1981 return ret; 1982 } 1983 1984 static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) 1985 { 1986 /* If there's one child, it's redundant and keys will be moved to the child */ 1987 return !!snap.v->children[0] + !!snap.v->children[1] == 1; 1988 } 1989 1990 static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) 1991 { 1992 if (k.k->type != KEY_TYPE_snapshot) 1993 return 0; 1994 1995 struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); 1996 if (BCH_SNAPSHOT_WILL_DELETE(snap.v) || 1997 interior_snapshot_needs_delete(snap)) 1998 set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); 1999 2000 return 0; 2001 } 2002 2003 int bch2_snapshots_read(struct bch_fs *c) 2004 { 2005 /* 2006 * Initializing the is_ancestor bitmaps requires ancestors to already be 2007 * initialized - so mark in reverse: 2008 */ 2009 int ret = bch2_trans_run(c, 2010 for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, 2011 POS_MAX, 0, k, 2012 __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: 2013 bch2_check_snapshot_needs_deletion(trans, k))); 2014 bch_err_fn(c, ret); 2015 2016 /* 2017 * It's important that we check if we need to reconstruct snapshots 2018 * before going RW, so we mark that pass as required in the superblock - 2019 * otherwise, we could end up deleting keys with missing snapshot nodes 2020 * instead 2021 */ 2022 BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && 2023 test_bit(BCH_FS_may_go_rw, &c->flags)); 2024 2025 return ret; 2026 } 2027 2028 void bch2_fs_snapshots_exit(struct bch_fs *c) 2029 { 2030 kvfree(rcu_dereference_protected(c->snapshots, true)); 2031 } 2032 2033 void bch2_fs_snapshots_init_early(struct bch_fs *c) 2034 { 2035 INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work); 2036 mutex_init(&c->snapshot_delete.lock); 2037 mutex_init(&c->snapshot_delete.progress_lock); 2038 mutex_init(&c->snapshots_unlinked_lock); 2039 } 2040