1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bbpos.h" 5 #include "bkey_buf.h" 6 #include "btree_cache.h" 7 #include "btree_key_cache.h" 8 #include "btree_update.h" 9 #include "buckets.h" 10 #include "enumerated_ref.h" 11 #include "errcode.h" 12 #include "error.h" 13 #include "fs.h" 14 #include "recovery_passes.h" 15 #include "snapshot.h" 16 17 #include <linux/random.h> 18 19 /* 20 * Snapshot trees: 21 * 22 * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they 23 * exist to provide a stable identifier for the whole lifetime of a snapshot 24 * tree. 25 */ 26 27 void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, 28 struct bkey_s_c k) 29 { 30 struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); 31 32 prt_printf(out, "subvol %u root snapshot %u", 33 le32_to_cpu(t.v->master_subvol), 34 le32_to_cpu(t.v->root_snapshot)); 35 } 36 37 int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, 38 struct bkey_validate_context from) 39 { 40 int ret = 0; 41 42 bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 43 bkey_lt(k.k->p, POS(0, 1)), 44 c, snapshot_tree_pos_bad, 45 "bad pos"); 46 fsck_err: 47 return ret; 48 } 49 50 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, 51 struct bch_snapshot_tree *s) 52 { 53 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), 54 BTREE_ITER_with_updates, snapshot_tree, s); 55 56 if (bch2_err_matches(ret, ENOENT)) 57 ret = bch_err_throw(trans->c, ENOENT_snapshot_tree); 58 return ret; 59 } 60 61 struct bkey_i_snapshot_tree * 62 __bch2_snapshot_tree_create(struct btree_trans *trans) 63 { 64 struct btree_iter iter; 65 int ret = bch2_bkey_get_empty_slot(trans, &iter, 66 BTREE_ID_snapshot_trees, POS(0, U32_MAX)); 67 struct bkey_i_snapshot_tree *s_t; 68 69 if (ret == -BCH_ERR_ENOSPC_btree_slot) 70 ret = bch_err_throw(trans->c, ENOSPC_snapshot_tree); 71 if (ret) 72 return ERR_PTR(ret); 73 74 s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); 75 ret = PTR_ERR_OR_ZERO(s_t); 76 bch2_trans_iter_exit(trans, &iter); 77 return ret ? ERR_PTR(ret) : s_t; 78 } 79 80 static int bch2_snapshot_tree_create(struct btree_trans *trans, 81 u32 root_id, u32 subvol_id, u32 *tree_id) 82 { 83 struct bkey_i_snapshot_tree *n_tree = 84 __bch2_snapshot_tree_create(trans); 85 86 if (IS_ERR(n_tree)) 87 return PTR_ERR(n_tree); 88 89 n_tree->v.master_subvol = cpu_to_le32(subvol_id); 90 n_tree->v.root_snapshot = cpu_to_le32(root_id); 91 *tree_id = n_tree->k.p.offset; 92 return 0; 93 } 94 95 /* Snapshot nodes: */ 96 97 static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) 98 { 99 while (id && id < ancestor) { 100 const struct snapshot_t *s = __snapshot_t(t, id); 101 id = s ? s->parent : 0; 102 } 103 return id == ancestor; 104 } 105 106 static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) 107 { 108 guard(rcu)(); 109 return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); 110 } 111 112 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) 113 { 114 const struct snapshot_t *s = __snapshot_t(t, id); 115 if (!s) 116 return 0; 117 118 if (s->skip[2] <= ancestor) 119 return s->skip[2]; 120 if (s->skip[1] <= ancestor) 121 return s->skip[1]; 122 if (s->skip[0] <= ancestor) 123 return s->skip[0]; 124 return s->parent; 125 } 126 127 static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) 128 { 129 const struct snapshot_t *s = __snapshot_t(t, id); 130 if (!s) 131 return false; 132 133 return test_bit(ancestor - id - 1, s->is_ancestor); 134 } 135 136 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) 137 { 138 #ifdef CONFIG_BCACHEFS_DEBUG 139 u32 orig_id = id; 140 #endif 141 142 guard(rcu)(); 143 struct snapshot_table *t = rcu_dereference(c->snapshots); 144 145 if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) 146 return __bch2_snapshot_is_ancestor_early(t, id, ancestor); 147 148 if (likely(ancestor >= IS_ANCESTOR_BITMAP)) 149 while (id && id < ancestor - IS_ANCESTOR_BITMAP) 150 id = get_ancestor_below(t, id, ancestor); 151 152 bool ret = id && id < ancestor 153 ? test_ancestor_bitmap(t, id, ancestor) 154 : id == ancestor; 155 156 EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor)); 157 return ret; 158 } 159 160 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) 161 { 162 size_t idx = U32_MAX - id; 163 struct snapshot_table *new, *old; 164 165 size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); 166 size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); 167 168 if (unlikely(new_bytes > INT_MAX)) 169 return NULL; 170 171 new = kvzalloc(new_bytes, GFP_KERNEL); 172 if (!new) 173 return NULL; 174 175 new->nr = new_size; 176 177 old = rcu_dereference_protected(c->snapshots, true); 178 if (old) 179 memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr); 180 181 rcu_assign_pointer(c->snapshots, new); 182 kvfree_rcu(old, rcu); 183 184 return &rcu_dereference_protected(c->snapshots, 185 lockdep_is_held(&c->snapshot_table_lock))->s[idx]; 186 } 187 188 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) 189 { 190 size_t idx = U32_MAX - id; 191 struct snapshot_table *table = 192 rcu_dereference_protected(c->snapshots, 193 lockdep_is_held(&c->snapshot_table_lock)); 194 195 lockdep_assert_held(&c->snapshot_table_lock); 196 197 if (likely(table && idx < table->nr)) 198 return &table->s[idx]; 199 200 return __snapshot_t_mut(c, id); 201 } 202 203 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, 204 struct bkey_s_c k) 205 { 206 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 207 208 if (BCH_SNAPSHOT_SUBVOL(s.v)) 209 prt_str(out, "subvol "); 210 if (BCH_SNAPSHOT_WILL_DELETE(s.v)) 211 prt_str(out, "will_delete "); 212 if (BCH_SNAPSHOT_DELETED(s.v)) 213 prt_str(out, "deleted "); 214 215 prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u", 216 le32_to_cpu(s.v->parent), 217 le32_to_cpu(s.v->children[0]), 218 le32_to_cpu(s.v->children[1]), 219 le32_to_cpu(s.v->subvol), 220 le32_to_cpu(s.v->tree)); 221 222 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) 223 prt_printf(out, " depth %u skiplist %u %u %u", 224 le32_to_cpu(s.v->depth), 225 le32_to_cpu(s.v->skip[0]), 226 le32_to_cpu(s.v->skip[1]), 227 le32_to_cpu(s.v->skip[2])); 228 } 229 230 int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, 231 struct bkey_validate_context from) 232 { 233 struct bkey_s_c_snapshot s; 234 u32 i, id; 235 int ret = 0; 236 237 bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || 238 bkey_lt(k.k->p, POS(0, 1)), 239 c, snapshot_pos_bad, 240 "bad pos"); 241 242 s = bkey_s_c_to_snapshot(k); 243 244 id = le32_to_cpu(s.v->parent); 245 bkey_fsck_err_on(id && id <= k.k->p.offset, 246 c, snapshot_parent_bad, 247 "bad parent node (%u <= %llu)", 248 id, k.k->p.offset); 249 250 bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), 251 c, snapshot_children_not_normalized, 252 "children not normalized"); 253 254 bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], 255 c, snapshot_child_duplicate, 256 "duplicate child nodes"); 257 258 for (i = 0; i < 2; i++) { 259 id = le32_to_cpu(s.v->children[i]); 260 261 bkey_fsck_err_on(id >= k.k->p.offset, 262 c, snapshot_child_bad, 263 "bad child node (%u >= %llu)", 264 id, k.k->p.offset); 265 } 266 267 if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { 268 bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || 269 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), 270 c, snapshot_skiplist_not_normalized, 271 "skiplist not normalized"); 272 273 for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { 274 id = le32_to_cpu(s.v->skip[i]); 275 276 bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), 277 c, snapshot_skiplist_bad, 278 "bad skiplist node %u", id); 279 } 280 } 281 fsck_err: 282 return ret; 283 } 284 285 static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) 286 { 287 mutex_lock(&c->snapshot_table_lock); 288 int ret = snapshot_t_mut(c, id) 289 ? 0 290 : bch_err_throw(c, ENOMEM_mark_snapshot); 291 mutex_unlock(&c->snapshot_table_lock); 292 return ret; 293 } 294 295 static int __bch2_mark_snapshot(struct btree_trans *trans, 296 enum btree_id btree, unsigned level, 297 struct bkey_s_c old, struct bkey_s_c new, 298 enum btree_iter_update_trigger_flags flags) 299 { 300 struct bch_fs *c = trans->c; 301 struct snapshot_t *t; 302 u32 id = new.k->p.offset; 303 int ret = 0; 304 305 mutex_lock(&c->snapshot_table_lock); 306 307 t = snapshot_t_mut(c, id); 308 if (!t) { 309 ret = bch_err_throw(c, ENOMEM_mark_snapshot); 310 goto err; 311 } 312 313 if (new.k->type == KEY_TYPE_snapshot) { 314 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); 315 316 t->state = !BCH_SNAPSHOT_DELETED(s.v) 317 ? SNAPSHOT_ID_live 318 : SNAPSHOT_ID_deleted; 319 t->parent = le32_to_cpu(s.v->parent); 320 t->children[0] = le32_to_cpu(s.v->children[0]); 321 t->children[1] = le32_to_cpu(s.v->children[1]); 322 t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; 323 t->tree = le32_to_cpu(s.v->tree); 324 325 if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { 326 t->depth = le32_to_cpu(s.v->depth); 327 t->skip[0] = le32_to_cpu(s.v->skip[0]); 328 t->skip[1] = le32_to_cpu(s.v->skip[1]); 329 t->skip[2] = le32_to_cpu(s.v->skip[2]); 330 } else { 331 t->depth = 0; 332 t->skip[0] = 0; 333 t->skip[1] = 0; 334 t->skip[2] = 0; 335 } 336 337 u32 parent = id; 338 339 while ((parent = bch2_snapshot_parent_early(c, parent)) && 340 parent - id - 1 < IS_ANCESTOR_BITMAP) 341 __set_bit(parent - id - 1, t->is_ancestor); 342 343 if (BCH_SNAPSHOT_WILL_DELETE(s.v)) { 344 set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 345 if (c->recovery.pass_done > BCH_RECOVERY_PASS_delete_dead_snapshots) 346 bch2_delete_dead_snapshots_async(c); 347 } 348 } else { 349 memset(t, 0, sizeof(*t)); 350 } 351 err: 352 mutex_unlock(&c->snapshot_table_lock); 353 return ret; 354 } 355 356 int bch2_mark_snapshot(struct btree_trans *trans, 357 enum btree_id btree, unsigned level, 358 struct bkey_s_c old, struct bkey_s new, 359 enum btree_iter_update_trigger_flags flags) 360 { 361 return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); 362 } 363 364 int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, 365 struct bch_snapshot *s) 366 { 367 return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), 368 BTREE_ITER_with_updates, snapshot, s); 369 } 370 371 /* fsck: */ 372 373 static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) 374 { 375 return snapshot_t(c, id)->children[child]; 376 } 377 378 static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) 379 { 380 return bch2_snapshot_child(c, id, 0); 381 } 382 383 static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) 384 { 385 return bch2_snapshot_child(c, id, 1); 386 } 387 388 static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) 389 { 390 u32 n, parent; 391 392 n = bch2_snapshot_left_child(c, id); 393 if (n) 394 return n; 395 396 while ((parent = bch2_snapshot_parent(c, id))) { 397 n = bch2_snapshot_right_child(c, parent); 398 if (n && n != id) 399 return n; 400 id = parent; 401 } 402 403 return 0; 404 } 405 406 u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, 407 snapshot_id_list *skip) 408 { 409 guard(rcu)(); 410 u32 id, subvol = 0, s; 411 retry: 412 id = snapshot_root; 413 while (id && bch2_snapshot_exists(c, id)) { 414 if (!(skip && snapshot_list_has_id(skip, id))) { 415 s = snapshot_t(c, id)->subvol; 416 417 if (s && (!subvol || s < subvol)) 418 subvol = s; 419 } 420 id = bch2_snapshot_tree_next(c, id); 421 if (id == snapshot_root) 422 break; 423 } 424 425 if (!subvol && skip) { 426 skip = NULL; 427 goto retry; 428 } 429 430 return subvol; 431 } 432 433 static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, 434 u32 snapshot_root, u32 *subvol_id) 435 { 436 struct bch_fs *c = trans->c; 437 struct btree_iter iter; 438 struct bkey_s_c k; 439 bool found = false; 440 int ret; 441 442 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 443 0, k, ret) { 444 if (k.k->type != KEY_TYPE_subvolume) 445 continue; 446 447 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 448 if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) 449 continue; 450 if (!BCH_SUBVOLUME_SNAP(s.v)) { 451 *subvol_id = s.k->p.offset; 452 found = true; 453 break; 454 } 455 } 456 bch2_trans_iter_exit(trans, &iter); 457 458 if (!ret && !found) { 459 struct bkey_i_subvolume *u; 460 461 *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); 462 463 u = bch2_bkey_get_mut_typed(trans, &iter, 464 BTREE_ID_subvolumes, POS(0, *subvol_id), 465 0, subvolume); 466 ret = PTR_ERR_OR_ZERO(u); 467 if (ret) 468 return ret; 469 470 SET_BCH_SUBVOLUME_SNAP(&u->v, false); 471 } 472 473 return ret; 474 } 475 476 static int check_snapshot_tree(struct btree_trans *trans, 477 struct btree_iter *iter, 478 struct bkey_s_c k) 479 { 480 struct bch_fs *c = trans->c; 481 struct bkey_s_c_snapshot_tree st; 482 struct bch_snapshot s; 483 struct bch_subvolume subvol; 484 struct printbuf buf = PRINTBUF; 485 struct btree_iter snapshot_iter = {}; 486 u32 root_id; 487 int ret; 488 489 if (k.k->type != KEY_TYPE_snapshot_tree) 490 return 0; 491 492 st = bkey_s_c_to_snapshot_tree(k); 493 root_id = le32_to_cpu(st.v->root_snapshot); 494 495 struct bkey_s_c_snapshot snapshot_k = 496 bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, 497 POS(0, root_id), 0, snapshot); 498 ret = bkey_err(snapshot_k); 499 if (ret && !bch2_err_matches(ret, ENOENT)) 500 goto err; 501 502 if (!ret) 503 bkey_val_copy(&s, snapshot_k); 504 505 if (fsck_err_on(ret || 506 root_id != bch2_snapshot_root(c, root_id) || 507 st.k->p.offset != le32_to_cpu(s.tree), 508 trans, snapshot_tree_to_missing_snapshot, 509 "snapshot tree points to missing/incorrect snapshot:\n%s", 510 (bch2_bkey_val_to_text(&buf, c, st.s_c), 511 prt_newline(&buf), 512 ret 513 ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) 514 : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), 515 buf.buf))) { 516 ret = bch2_btree_delete_at(trans, iter, 0); 517 goto err; 518 } 519 520 if (!st.v->master_subvol) 521 goto out; 522 523 ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); 524 if (ret && !bch2_err_matches(ret, ENOENT)) 525 goto err; 526 527 if (fsck_err_on(ret, 528 trans, snapshot_tree_to_missing_subvol, 529 "snapshot tree points to missing subvolume:\n%s", 530 (printbuf_reset(&buf), 531 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 532 fsck_err_on(!bch2_snapshot_is_ancestor(c, 533 le32_to_cpu(subvol.snapshot), 534 root_id), 535 trans, snapshot_tree_to_wrong_subvol, 536 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s", 537 (printbuf_reset(&buf), 538 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || 539 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), 540 trans, snapshot_tree_to_snapshot_subvol, 541 "snapshot tree points to snapshot subvolume:\n%s", 542 (printbuf_reset(&buf), 543 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { 544 struct bkey_i_snapshot_tree *u; 545 u32 subvol_id; 546 547 ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); 548 bch_err_fn(c, ret); 549 550 if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */ 551 ret = 0; 552 goto err; 553 } 554 555 if (ret) 556 goto err; 557 558 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); 559 ret = PTR_ERR_OR_ZERO(u); 560 if (ret) 561 goto err; 562 563 u->v.master_subvol = cpu_to_le32(subvol_id); 564 st = snapshot_tree_i_to_s_c(u); 565 } 566 out: 567 err: 568 fsck_err: 569 bch2_trans_iter_exit(trans, &snapshot_iter); 570 printbuf_exit(&buf); 571 return ret; 572 } 573 574 /* 575 * For each snapshot_tree, make sure it points to the root of a snapshot tree 576 * and that snapshot entry points back to it, or delete it. 577 * 578 * And, make sure it points to a subvolume within that snapshot tree, or correct 579 * it to point to the oldest subvolume within that snapshot tree. 580 */ 581 int bch2_check_snapshot_trees(struct bch_fs *c) 582 { 583 int ret = bch2_trans_run(c, 584 for_each_btree_key_commit(trans, iter, 585 BTREE_ID_snapshot_trees, POS_MIN, 586 BTREE_ITER_prefetch, k, 587 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 588 check_snapshot_tree(trans, &iter, k))); 589 bch_err_fn(c, ret); 590 return ret; 591 } 592 593 /* 594 * Look up snapshot tree for @tree_id and find root, 595 * make sure @snap_id is a descendent: 596 */ 597 static int snapshot_tree_ptr_good(struct btree_trans *trans, 598 u32 snap_id, u32 tree_id) 599 { 600 struct bch_snapshot_tree s_t; 601 int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 602 603 if (bch2_err_matches(ret, ENOENT)) 604 return 0; 605 if (ret) 606 return ret; 607 608 return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); 609 } 610 611 u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) 612 { 613 if (!id) 614 return 0; 615 616 guard(rcu)(); 617 const struct snapshot_t *s = snapshot_t(c, id); 618 return s->parent 619 ? bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)) 620 : id; 621 } 622 623 static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) 624 { 625 unsigned i; 626 627 for (i = 0; i < 3; i++) 628 if (!s.parent) { 629 if (s.skip[i]) 630 return false; 631 } else { 632 if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) 633 return false; 634 } 635 636 return true; 637 } 638 639 /* 640 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure 641 * its snapshot_tree pointer is correct (allocate new one if necessary), then 642 * update this node's pointer to root node's pointer: 643 */ 644 static int snapshot_tree_ptr_repair(struct btree_trans *trans, 645 struct btree_iter *iter, 646 struct bkey_s_c k, 647 struct bch_snapshot *s) 648 { 649 struct bch_fs *c = trans->c; 650 struct btree_iter root_iter; 651 struct bch_snapshot_tree s_t; 652 struct bkey_s_c_snapshot root; 653 struct bkey_i_snapshot *u; 654 u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; 655 int ret; 656 657 root = bch2_bkey_get_iter_typed(trans, &root_iter, 658 BTREE_ID_snapshots, POS(0, root_id), 659 BTREE_ITER_with_updates, snapshot); 660 ret = bkey_err(root); 661 if (ret) 662 goto err; 663 664 tree_id = le32_to_cpu(root.v->tree); 665 666 ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); 667 if (ret && !bch2_err_matches(ret, ENOENT)) 668 return ret; 669 670 if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { 671 u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); 672 ret = PTR_ERR_OR_ZERO(u) ?: 673 bch2_snapshot_tree_create(trans, root_id, 674 bch2_snapshot_oldest_subvol(c, root_id, NULL), 675 &tree_id); 676 if (ret) 677 goto err; 678 679 u->v.tree = cpu_to_le32(tree_id); 680 if (k.k->p.offset == root_id) 681 *s = u->v; 682 } 683 684 if (k.k->p.offset != root_id) { 685 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 686 ret = PTR_ERR_OR_ZERO(u); 687 if (ret) 688 goto err; 689 690 u->v.tree = cpu_to_le32(tree_id); 691 *s = u->v; 692 } 693 err: 694 bch2_trans_iter_exit(trans, &root_iter); 695 return ret; 696 } 697 698 static int check_snapshot(struct btree_trans *trans, 699 struct btree_iter *iter, 700 struct bkey_s_c k) 701 { 702 struct bch_fs *c = trans->c; 703 struct bch_snapshot s; 704 struct bch_subvolume subvol; 705 struct bch_snapshot v; 706 struct bkey_i_snapshot *u; 707 u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); 708 u32 real_depth; 709 struct printbuf buf = PRINTBUF; 710 u32 i, id; 711 int ret = 0; 712 713 if (k.k->type != KEY_TYPE_snapshot) 714 return 0; 715 716 memset(&s, 0, sizeof(s)); 717 memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); 718 719 if (BCH_SNAPSHOT_DELETED(&s)) 720 return 0; 721 722 id = le32_to_cpu(s.parent); 723 if (id) { 724 ret = bch2_snapshot_lookup(trans, id, &v); 725 if (bch2_err_matches(ret, ENOENT)) 726 bch_err(c, "snapshot with nonexistent parent:\n %s", 727 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 728 if (ret) 729 goto err; 730 731 if (le32_to_cpu(v.children[0]) != k.k->p.offset && 732 le32_to_cpu(v.children[1]) != k.k->p.offset) { 733 bch_err(c, "snapshot parent %u missing pointer to child %llu", 734 id, k.k->p.offset); 735 ret = -EINVAL; 736 goto err; 737 } 738 } 739 740 for (i = 0; i < 2 && s.children[i]; i++) { 741 id = le32_to_cpu(s.children[i]); 742 743 ret = bch2_snapshot_lookup(trans, id, &v); 744 if (bch2_err_matches(ret, ENOENT)) 745 bch_err(c, "snapshot node %llu has nonexistent child %u", 746 k.k->p.offset, id); 747 if (ret) 748 goto err; 749 750 if (le32_to_cpu(v.parent) != k.k->p.offset) { 751 bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", 752 id, le32_to_cpu(v.parent), k.k->p.offset); 753 ret = -EINVAL; 754 goto err; 755 } 756 } 757 758 bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && 759 !BCH_SNAPSHOT_WILL_DELETE(&s); 760 761 if (should_have_subvol) { 762 id = le32_to_cpu(s.subvol); 763 ret = bch2_subvolume_get(trans, id, false, &subvol); 764 if (bch2_err_matches(ret, ENOENT)) 765 bch_err(c, "snapshot points to nonexistent subvolume:\n %s", 766 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 767 if (ret) 768 goto err; 769 770 if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { 771 bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", 772 k.k->p.offset); 773 ret = -EINVAL; 774 goto err; 775 } 776 } else { 777 if (fsck_err_on(s.subvol, 778 trans, snapshot_should_not_have_subvol, 779 "snapshot should not point to subvol:\n%s", 780 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 781 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 782 ret = PTR_ERR_OR_ZERO(u); 783 if (ret) 784 goto err; 785 786 u->v.subvol = 0; 787 s = u->v; 788 } 789 } 790 791 ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); 792 if (ret < 0) 793 goto err; 794 795 if (fsck_err_on(!ret, 796 trans, snapshot_to_bad_snapshot_tree, 797 "snapshot points to missing/incorrect tree:\n%s", 798 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 799 ret = snapshot_tree_ptr_repair(trans, iter, k, &s); 800 if (ret) 801 goto err; 802 } 803 ret = 0; 804 805 real_depth = bch2_snapshot_depth(c, parent_id); 806 807 if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, 808 trans, snapshot_bad_depth, 809 "snapshot with incorrect depth field, should be %u:\n%s", 810 real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 811 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 812 ret = PTR_ERR_OR_ZERO(u); 813 if (ret) 814 goto err; 815 816 u->v.depth = cpu_to_le32(real_depth); 817 s = u->v; 818 } 819 820 ret = snapshot_skiplist_good(trans, k.k->p.offset, s); 821 if (ret < 0) 822 goto err; 823 824 if (fsck_err_on(!ret, 825 trans, snapshot_bad_skiplist, 826 "snapshot with bad skiplist field:\n%s", 827 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 828 u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); 829 ret = PTR_ERR_OR_ZERO(u); 830 if (ret) 831 goto err; 832 833 for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) 834 u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); 835 836 bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); 837 s = u->v; 838 } 839 ret = 0; 840 err: 841 fsck_err: 842 printbuf_exit(&buf); 843 return ret; 844 } 845 846 int bch2_check_snapshots(struct bch_fs *c) 847 { 848 /* 849 * We iterate backwards as checking/fixing the depth field requires that 850 * the parent's depth already be correct: 851 */ 852 int ret = bch2_trans_run(c, 853 for_each_btree_key_reverse_commit(trans, iter, 854 BTREE_ID_snapshots, POS_MAX, 855 BTREE_ITER_prefetch, k, 856 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 857 check_snapshot(trans, &iter, k))); 858 bch_err_fn(c, ret); 859 return ret; 860 } 861 862 static int check_snapshot_exists(struct btree_trans *trans, u32 id) 863 { 864 struct bch_fs *c = trans->c; 865 866 /* Do we need to reconstruct the snapshot_tree entry as well? */ 867 struct btree_iter iter; 868 struct bkey_s_c k; 869 int ret = 0; 870 u32 tree_id = 0; 871 872 for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, 873 0, k, ret) { 874 if (k.k->type == KEY_TYPE_snapshot_tree && 875 le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { 876 tree_id = k.k->p.offset; 877 break; 878 } 879 } 880 bch2_trans_iter_exit(trans, &iter); 881 882 if (ret) 883 return ret; 884 885 if (!tree_id) { 886 ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); 887 if (ret) 888 return ret; 889 } 890 891 struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); 892 ret = PTR_ERR_OR_ZERO(snapshot); 893 if (ret) 894 return ret; 895 896 bkey_snapshot_init(&snapshot->k_i); 897 snapshot->k.p = POS(0, id); 898 snapshot->v.tree = cpu_to_le32(tree_id); 899 snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 900 901 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 902 0, k, ret) { 903 if (k.k->type == KEY_TYPE_subvolume && 904 le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { 905 snapshot->v.subvol = cpu_to_le32(k.k->p.offset); 906 SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); 907 break; 908 } 909 } 910 bch2_trans_iter_exit(trans, &iter); 911 912 return bch2_snapshot_table_make_room(c, id) ?: 913 bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0); 914 } 915 916 /* Figure out which snapshot nodes belong in the same tree: */ 917 struct snapshot_tree_reconstruct { 918 enum btree_id btree; 919 struct bpos cur_pos; 920 snapshot_id_list cur_ids; 921 DARRAY(snapshot_id_list) trees; 922 }; 923 924 static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) 925 { 926 darray_for_each(r->trees, i) 927 darray_exit(i); 928 darray_exit(&r->trees); 929 darray_exit(&r->cur_ids); 930 } 931 932 static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) 933 { 934 return r->btree == BTREE_ID_inodes 935 ? r->cur_pos.offset == pos.offset 936 : r->cur_pos.inode == pos.inode; 937 } 938 939 static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) 940 { 941 return darray_find_p(*l, i, snapshot_list_has_id(r, *i)) != NULL; 942 } 943 944 static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) 945 { 946 bool first = true; 947 darray_for_each(*s, i) { 948 if (!first) 949 prt_char(out, ' '); 950 first = false; 951 prt_printf(out, "%u", *i); 952 } 953 } 954 955 static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) 956 { 957 if (r->cur_ids.nr) { 958 darray_for_each(r->trees, i) 959 if (snapshot_id_lists_have_common(i, &r->cur_ids)) { 960 int ret = snapshot_list_merge(c, i, &r->cur_ids); 961 if (ret) 962 return ret; 963 goto out; 964 } 965 darray_push(&r->trees, r->cur_ids); 966 darray_init(&r->cur_ids); 967 } 968 out: 969 r->cur_ids.nr = 0; 970 return 0; 971 } 972 973 static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) 974 { 975 if (!same_snapshot(r, pos)) 976 snapshot_tree_reconstruct_next(c, r); 977 r->cur_pos = pos; 978 return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); 979 } 980 981 int bch2_reconstruct_snapshots(struct bch_fs *c) 982 { 983 struct btree_trans *trans = bch2_trans_get(c); 984 struct printbuf buf = PRINTBUF; 985 struct snapshot_tree_reconstruct r = {}; 986 int ret = 0; 987 988 for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { 989 if (btree_type_has_snapshots(btree)) { 990 r.btree = btree; 991 992 ret = for_each_btree_key(trans, iter, btree, POS_MIN, 993 BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ 994 get_snapshot_trees(c, &r, k.k->p); 995 })); 996 if (ret) 997 goto err; 998 999 snapshot_tree_reconstruct_next(c, &r); 1000 } 1001 } 1002 1003 darray_for_each(r.trees, t) { 1004 printbuf_reset(&buf); 1005 snapshot_id_list_to_text(&buf, t); 1006 1007 darray_for_each(*t, id) { 1008 if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty, 1009 trans, snapshot_node_missing, 1010 "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { 1011 if (t->nr > 1) { 1012 bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); 1013 ret = bch_err_throw(c, fsck_repair_unimplemented); 1014 goto err; 1015 } 1016 1017 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1018 check_snapshot_exists(trans, *id)); 1019 if (ret) 1020 goto err; 1021 } 1022 } 1023 } 1024 fsck_err: 1025 err: 1026 bch2_trans_put(trans); 1027 snapshot_tree_reconstruct_exit(&r); 1028 printbuf_exit(&buf); 1029 bch_err_fn(c, ret); 1030 return ret; 1031 } 1032 1033 int __bch2_check_key_has_snapshot(struct btree_trans *trans, 1034 struct btree_iter *iter, 1035 struct bkey_s_c k) 1036 { 1037 struct bch_fs *c = trans->c; 1038 struct printbuf buf = PRINTBUF; 1039 int ret = 0; 1040 enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); 1041 1042 /* Snapshot was definitively deleted, this error is marked autofix */ 1043 if (fsck_err_on(state == SNAPSHOT_ID_deleted, 1044 trans, bkey_in_deleted_snapshot, 1045 "key in deleted snapshot %s, delete?", 1046 (bch2_btree_id_to_text(&buf, iter->btree_id), 1047 prt_char(&buf, ' '), 1048 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1049 ret = bch2_btree_delete_at(trans, iter, 1050 BTREE_UPDATE_internal_snapshot_node) ?: 1; 1051 1052 if (state == SNAPSHOT_ID_empty) { 1053 /* 1054 * Snapshot missing: we should have caught this with btree_lost_data and 1055 * kicked off reconstruct_snapshots, so if we end up here we have no 1056 * idea what happened. 1057 * 1058 * Do not delete unless we know that subvolumes and snapshots 1059 * are consistent: 1060 * 1061 * XXX: 1062 * 1063 * We could be smarter here, and instead of using the generic 1064 * recovery pass ratelimiting, track if there have been any 1065 * changes to the snapshots or inodes btrees since those passes 1066 * last ran. 1067 */ 1068 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret; 1069 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret; 1070 1071 if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)) 1072 ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; 1073 1074 unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0); 1075 1076 if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot, 1077 "key in missing snapshot %s, delete?", 1078 (bch2_btree_id_to_text(&buf, iter->btree_id), 1079 prt_char(&buf, ' '), 1080 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1081 ret = bch2_btree_delete_at(trans, iter, 1082 BTREE_UPDATE_internal_snapshot_node) ?: 1; 1083 } 1084 } 1085 fsck_err: 1086 printbuf_exit(&buf); 1087 return ret; 1088 } 1089 1090 int __bch2_get_snapshot_overwrites(struct btree_trans *trans, 1091 enum btree_id btree, struct bpos pos, 1092 snapshot_id_list *s) 1093 { 1094 struct bch_fs *c = trans->c; 1095 struct btree_iter iter; 1096 struct bkey_s_c k; 1097 int ret = 0; 1098 1099 for_each_btree_key_reverse_norestart(trans, iter, btree, bpos_predecessor(pos), 1100 BTREE_ITER_all_snapshots, k, ret) { 1101 if (!bkey_eq(k.k->p, pos)) 1102 break; 1103 1104 if (!bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot) || 1105 snapshot_list_has_ancestor(c, s, k.k->p.snapshot)) 1106 continue; 1107 1108 ret = snapshot_list_add(c, s, k.k->p.snapshot); 1109 if (ret) 1110 break; 1111 } 1112 bch2_trans_iter_exit(trans, &iter); 1113 if (ret) 1114 darray_exit(s); 1115 1116 return ret; 1117 } 1118 1119 /* 1120 * Mark a snapshot as deleted, for future cleanup: 1121 */ 1122 int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) 1123 { 1124 struct btree_iter iter; 1125 struct bkey_i_snapshot *s = 1126 bch2_bkey_get_mut_typed(trans, &iter, 1127 BTREE_ID_snapshots, POS(0, id), 1128 0, snapshot); 1129 int ret = PTR_ERR_OR_ZERO(s); 1130 if (unlikely(ret)) { 1131 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), 1132 trans->c, "missing snapshot %u", id); 1133 return ret; 1134 } 1135 1136 /* already deleted? */ 1137 if (BCH_SNAPSHOT_WILL_DELETE(&s->v)) 1138 goto err; 1139 1140 SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true); 1141 SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); 1142 s->v.subvol = 0; 1143 err: 1144 bch2_trans_iter_exit(trans, &iter); 1145 return ret; 1146 } 1147 1148 static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) 1149 { 1150 if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) 1151 swap(s->children[0], s->children[1]); 1152 } 1153 1154 static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) 1155 { 1156 struct bch_fs *c = trans->c; 1157 struct btree_iter iter, p_iter = {}; 1158 struct btree_iter c_iter = {}; 1159 struct btree_iter tree_iter = {}; 1160 u32 parent_id, child_id; 1161 unsigned i; 1162 int ret = 0; 1163 1164 struct bkey_i_snapshot *s = 1165 bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), 1166 BTREE_ITER_intent, snapshot); 1167 ret = PTR_ERR_OR_ZERO(s); 1168 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1169 "missing snapshot %u", id); 1170 1171 if (ret) 1172 goto err; 1173 1174 BUG_ON(BCH_SNAPSHOT_DELETED(&s->v)); 1175 BUG_ON(s->v.children[1]); 1176 1177 parent_id = le32_to_cpu(s->v.parent); 1178 child_id = le32_to_cpu(s->v.children[0]); 1179 1180 if (parent_id) { 1181 struct bkey_i_snapshot *parent; 1182 1183 parent = bch2_bkey_get_mut_typed(trans, &p_iter, 1184 BTREE_ID_snapshots, POS(0, parent_id), 1185 0, snapshot); 1186 ret = PTR_ERR_OR_ZERO(parent); 1187 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1188 "missing snapshot %u", parent_id); 1189 if (unlikely(ret)) 1190 goto err; 1191 1192 /* find entry in parent->children for node being deleted */ 1193 for (i = 0; i < 2; i++) 1194 if (le32_to_cpu(parent->v.children[i]) == id) 1195 break; 1196 1197 if (bch2_fs_inconsistent_on(i == 2, c, 1198 "snapshot %u missing child pointer to %u", 1199 parent_id, id)) 1200 goto err; 1201 1202 parent->v.children[i] = cpu_to_le32(child_id); 1203 1204 normalize_snapshot_child_pointers(&parent->v); 1205 } 1206 1207 if (child_id) { 1208 struct bkey_i_snapshot *child; 1209 1210 child = bch2_bkey_get_mut_typed(trans, &c_iter, 1211 BTREE_ID_snapshots, POS(0, child_id), 1212 0, snapshot); 1213 ret = PTR_ERR_OR_ZERO(child); 1214 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 1215 "missing snapshot %u", child_id); 1216 if (unlikely(ret)) 1217 goto err; 1218 1219 child->v.parent = cpu_to_le32(parent_id); 1220 1221 if (!child->v.parent) { 1222 child->v.skip[0] = 0; 1223 child->v.skip[1] = 0; 1224 child->v.skip[2] = 0; 1225 } 1226 } 1227 1228 if (!parent_id) { 1229 /* 1230 * We're deleting the root of a snapshot tree: update the 1231 * snapshot_tree entry to point to the new root, or delete it if 1232 * this is the last snapshot ID in this tree: 1233 */ 1234 struct bkey_i_snapshot_tree *s_t; 1235 1236 BUG_ON(s->v.children[1]); 1237 1238 s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, 1239 BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), 1240 0, snapshot_tree); 1241 ret = PTR_ERR_OR_ZERO(s_t); 1242 if (ret) 1243 goto err; 1244 1245 if (s->v.children[0]) { 1246 s_t->v.root_snapshot = s->v.children[0]; 1247 } else { 1248 s_t->k.type = KEY_TYPE_deleted; 1249 set_bkey_val_u64s(&s_t->k, 0); 1250 } 1251 } 1252 1253 if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) { 1254 SET_BCH_SNAPSHOT_DELETED(&s->v, true); 1255 s->v.parent = 0; 1256 s->v.children[0] = 0; 1257 s->v.children[1] = 0; 1258 s->v.subvol = 0; 1259 s->v.tree = 0; 1260 s->v.depth = 0; 1261 s->v.skip[0] = 0; 1262 s->v.skip[1] = 0; 1263 s->v.skip[2] = 0; 1264 } else { 1265 s->k.type = KEY_TYPE_deleted; 1266 set_bkey_val_u64s(&s->k, 0); 1267 } 1268 err: 1269 bch2_trans_iter_exit(trans, &tree_iter); 1270 bch2_trans_iter_exit(trans, &p_iter); 1271 bch2_trans_iter_exit(trans, &c_iter); 1272 bch2_trans_iter_exit(trans, &iter); 1273 return ret; 1274 } 1275 1276 static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, 1277 u32 *new_snapids, 1278 u32 *snapshot_subvols, 1279 unsigned nr_snapids) 1280 { 1281 struct bch_fs *c = trans->c; 1282 struct btree_iter iter; 1283 struct bkey_i_snapshot *n; 1284 struct bkey_s_c k; 1285 unsigned i, j; 1286 u32 depth = bch2_snapshot_depth(c, parent); 1287 int ret; 1288 1289 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, 1290 POS_MIN, BTREE_ITER_intent); 1291 k = bch2_btree_iter_peek(trans, &iter); 1292 ret = bkey_err(k); 1293 if (ret) 1294 goto err; 1295 1296 for (i = 0; i < nr_snapids; i++) { 1297 k = bch2_btree_iter_prev_slot(trans, &iter); 1298 ret = bkey_err(k); 1299 if (ret) 1300 goto err; 1301 1302 if (!k.k || !k.k->p.offset) { 1303 ret = bch_err_throw(c, ENOSPC_snapshot_create); 1304 goto err; 1305 } 1306 1307 n = bch2_bkey_alloc(trans, &iter, 0, snapshot); 1308 ret = PTR_ERR_OR_ZERO(n); 1309 if (ret) 1310 goto err; 1311 1312 n->v.flags = 0; 1313 n->v.parent = cpu_to_le32(parent); 1314 n->v.subvol = cpu_to_le32(snapshot_subvols[i]); 1315 n->v.tree = cpu_to_le32(tree); 1316 n->v.depth = cpu_to_le32(depth); 1317 n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 1318 n->v.btime.hi = 0; 1319 1320 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) 1321 n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); 1322 1323 bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); 1324 SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); 1325 1326 ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, 1327 bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); 1328 if (ret) 1329 goto err; 1330 1331 new_snapids[i] = iter.pos.offset; 1332 } 1333 err: 1334 bch2_trans_iter_exit(trans, &iter); 1335 return ret; 1336 } 1337 1338 /* 1339 * Create new snapshot IDs as children of an existing snapshot ID: 1340 */ 1341 static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, 1342 u32 *new_snapids, 1343 u32 *snapshot_subvols, 1344 unsigned nr_snapids) 1345 { 1346 struct btree_iter iter; 1347 struct bkey_i_snapshot *n_parent; 1348 int ret = 0; 1349 1350 n_parent = bch2_bkey_get_mut_typed(trans, &iter, 1351 BTREE_ID_snapshots, POS(0, parent), 1352 0, snapshot); 1353 ret = PTR_ERR_OR_ZERO(n_parent); 1354 if (unlikely(ret)) { 1355 if (bch2_err_matches(ret, ENOENT)) 1356 bch_err(trans->c, "snapshot %u not found", parent); 1357 return ret; 1358 } 1359 1360 if (n_parent->v.children[0] || n_parent->v.children[1]) { 1361 bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); 1362 ret = -EINVAL; 1363 goto err; 1364 } 1365 1366 ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), 1367 new_snapids, snapshot_subvols, nr_snapids); 1368 if (ret) 1369 goto err; 1370 1371 n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); 1372 n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); 1373 n_parent->v.subvol = 0; 1374 SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); 1375 err: 1376 bch2_trans_iter_exit(trans, &iter); 1377 return ret; 1378 } 1379 1380 /* 1381 * Create a snapshot node that is the root of a new tree: 1382 */ 1383 static int bch2_snapshot_node_create_tree(struct btree_trans *trans, 1384 u32 *new_snapids, 1385 u32 *snapshot_subvols, 1386 unsigned nr_snapids) 1387 { 1388 struct bkey_i_snapshot_tree *n_tree; 1389 int ret; 1390 1391 n_tree = __bch2_snapshot_tree_create(trans); 1392 ret = PTR_ERR_OR_ZERO(n_tree) ?: 1393 create_snapids(trans, 0, n_tree->k.p.offset, 1394 new_snapids, snapshot_subvols, nr_snapids); 1395 if (ret) 1396 return ret; 1397 1398 n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); 1399 n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); 1400 return 0; 1401 } 1402 1403 int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, 1404 u32 *new_snapids, 1405 u32 *snapshot_subvols, 1406 unsigned nr_snapids) 1407 { 1408 BUG_ON((parent == 0) != (nr_snapids == 1)); 1409 BUG_ON((parent != 0) != (nr_snapids == 2)); 1410 1411 return parent 1412 ? bch2_snapshot_node_create_children(trans, parent, 1413 new_snapids, snapshot_subvols, nr_snapids) 1414 : bch2_snapshot_node_create_tree(trans, 1415 new_snapids, snapshot_subvols, nr_snapids); 1416 1417 } 1418 1419 /* 1420 * If we have an unlinked inode in an internal snapshot node, and the inode 1421 * really has been deleted in all child snapshots, how does this get cleaned up? 1422 * 1423 * first there is the problem of how keys that have been overwritten in all 1424 * child snapshots get deleted (unimplemented?), but inodes may perhaps be 1425 * special? 1426 * 1427 * also: unlinked inode in internal snapshot appears to not be getting deleted 1428 * correctly if inode doesn't exist in leaf snapshots 1429 * 1430 * solution: 1431 * 1432 * for a key in an interior snapshot node that needs work to be done that 1433 * requires it to be mutated: iterate over all descendent leaf nodes and copy 1434 * that key to snapshot leaf nodes, where we can mutate it 1435 */ 1436 1437 static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) 1438 { 1439 struct snapshot_interior_delete *i = darray_find_p(*l, i, i->id == id); 1440 return i ? i->live_child : 0; 1441 } 1442 1443 static unsigned __live_child(struct snapshot_table *t, u32 id, 1444 snapshot_id_list *delete_leaves, 1445 interior_delete_list *delete_interior) 1446 { 1447 struct snapshot_t *s = __snapshot_t(t, id); 1448 if (!s) 1449 return 0; 1450 1451 for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) 1452 if (s->children[i] && 1453 !snapshot_list_has_id(delete_leaves, s->children[i]) && 1454 !interior_delete_has_id(delete_interior, s->children[i])) 1455 return s->children[i]; 1456 1457 for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { 1458 u32 live_child = s->children[i] 1459 ? __live_child(t, s->children[i], delete_leaves, delete_interior) 1460 : 0; 1461 if (live_child) 1462 return live_child; 1463 } 1464 1465 return 0; 1466 } 1467 1468 static unsigned live_child(struct bch_fs *c, u32 id) 1469 { 1470 struct snapshot_delete *d = &c->snapshot_delete; 1471 1472 guard(rcu)(); 1473 return __live_child(rcu_dereference(c->snapshots), id, 1474 &d->delete_leaves, &d->delete_interior); 1475 } 1476 1477 static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) 1478 { 1479 return snapshot_list_has_id(&d->delete_leaves, id) || 1480 interior_delete_has_id(&d->delete_interior, id) != 0; 1481 } 1482 1483 static int delete_dead_snapshots_process_key(struct btree_trans *trans, 1484 struct btree_iter *iter, 1485 struct bkey_s_c k) 1486 { 1487 struct snapshot_delete *d = &trans->c->snapshot_delete; 1488 1489 if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot)) 1490 return bch2_btree_delete_at(trans, iter, 1491 BTREE_UPDATE_internal_snapshot_node); 1492 1493 u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot); 1494 if (live_child) { 1495 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 1496 int ret = PTR_ERR_OR_ZERO(new); 1497 if (ret) 1498 return ret; 1499 1500 new->k.p.snapshot = live_child; 1501 1502 struct btree_iter dst_iter; 1503 struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, 1504 iter->btree_id, new->k.p, 1505 BTREE_ITER_all_snapshots| 1506 BTREE_ITER_intent); 1507 ret = bkey_err(dst_k); 1508 if (ret) 1509 return ret; 1510 1511 ret = (bkey_deleted(dst_k.k) 1512 ? bch2_trans_update(trans, &dst_iter, new, 1513 BTREE_UPDATE_internal_snapshot_node) 1514 : 0) ?: 1515 bch2_btree_delete_at(trans, iter, 1516 BTREE_UPDATE_internal_snapshot_node); 1517 bch2_trans_iter_exit(trans, &dst_iter); 1518 return ret; 1519 } 1520 1521 return 0; 1522 } 1523 1524 static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum) 1525 { 1526 struct bch_fs *c = trans->c; 1527 struct snapshot_delete *d = &c->snapshot_delete; 1528 1529 u64 inum = iter->btree_id != BTREE_ID_inodes 1530 ? iter->pos.inode 1531 : iter->pos.offset; 1532 1533 if (*prev_inum == inum) 1534 return false; 1535 1536 *prev_inum = inum; 1537 1538 bool ret = !snapshot_list_has_id(&d->deleting_from_trees, 1539 bch2_snapshot_tree(c, iter->pos.snapshot)); 1540 if (unlikely(ret)) { 1541 struct bpos pos = iter->pos; 1542 pos.snapshot = 0; 1543 if (iter->btree_id != BTREE_ID_inodes) 1544 pos.offset = U64_MAX; 1545 bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos)); 1546 } 1547 1548 return ret; 1549 } 1550 1551 static int delete_dead_snapshot_keys_v1(struct btree_trans *trans) 1552 { 1553 struct bch_fs *c = trans->c; 1554 struct snapshot_delete *d = &c->snapshot_delete; 1555 1556 for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) { 1557 struct disk_reservation res = { 0 }; 1558 u64 prev_inum = 0; 1559 1560 d->pos.pos = POS_MIN; 1561 1562 if (!btree_type_has_snapshots(d->pos.btree)) 1563 continue; 1564 1565 int ret = for_each_btree_key_commit(trans, iter, 1566 d->pos.btree, POS_MIN, 1567 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1568 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1569 d->pos.pos = iter.pos; 1570 1571 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1572 continue; 1573 1574 delete_dead_snapshots_process_key(trans, &iter, k); 1575 })); 1576 1577 bch2_disk_reservation_put(c, &res); 1578 1579 if (ret) 1580 return ret; 1581 } 1582 1583 return 0; 1584 } 1585 1586 static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree, 1587 struct bpos start, struct bpos end) 1588 { 1589 struct bch_fs *c = trans->c; 1590 struct snapshot_delete *d = &c->snapshot_delete; 1591 struct disk_reservation res = { 0 }; 1592 1593 d->pos.btree = btree; 1594 d->pos.pos = POS_MIN; 1595 1596 int ret = for_each_btree_key_max_commit(trans, iter, 1597 btree, start, end, 1598 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1599 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1600 d->pos.pos = iter.pos; 1601 delete_dead_snapshots_process_key(trans, &iter, k); 1602 })); 1603 1604 bch2_disk_reservation_put(c, &res); 1605 return ret; 1606 } 1607 1608 static int delete_dead_snapshot_keys_v2(struct btree_trans *trans) 1609 { 1610 struct bch_fs *c = trans->c; 1611 struct snapshot_delete *d = &c->snapshot_delete; 1612 struct disk_reservation res = { 0 }; 1613 u64 prev_inum = 0; 1614 int ret = 0; 1615 1616 struct btree_iter iter; 1617 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN, 1618 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); 1619 1620 while (1) { 1621 struct bkey_s_c k; 1622 ret = lockrestart_do(trans, 1623 bkey_err(k = bch2_btree_iter_peek(trans, &iter))); 1624 if (ret) 1625 break; 1626 1627 if (!k.k) 1628 break; 1629 1630 d->pos.btree = iter.btree_id; 1631 d->pos.pos = iter.pos; 1632 1633 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1634 continue; 1635 1636 if (snapshot_id_dying(d, k.k->p.snapshot)) { 1637 struct bpos start = POS(k.k->p.offset, 0); 1638 struct bpos end = POS(k.k->p.offset, U64_MAX); 1639 1640 ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?: 1641 delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?: 1642 delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end); 1643 if (ret) 1644 break; 1645 1646 bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1)); 1647 } else { 1648 bch2_btree_iter_advance(trans, &iter); 1649 } 1650 } 1651 bch2_trans_iter_exit(trans, &iter); 1652 1653 if (ret) 1654 goto err; 1655 1656 prev_inum = 0; 1657 ret = for_each_btree_key_commit(trans, iter, 1658 BTREE_ID_inodes, POS_MIN, 1659 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1660 &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1661 d->pos.btree = iter.btree_id; 1662 d->pos.pos = iter.pos; 1663 1664 if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) 1665 continue; 1666 1667 delete_dead_snapshots_process_key(trans, &iter, k); 1668 })); 1669 err: 1670 bch2_disk_reservation_put(c, &res); 1671 return ret; 1672 } 1673 1674 /* 1675 * For a given snapshot, if it doesn't have a subvolume that points to it, and 1676 * it doesn't have child snapshot nodes - it's now redundant and we can mark it 1677 * as deleted. 1678 */ 1679 static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k) 1680 { 1681 if (k.k->type != KEY_TYPE_snapshot) 1682 return 0; 1683 1684 struct bch_fs *c = trans->c; 1685 struct snapshot_delete *d = &c->snapshot_delete; 1686 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 1687 unsigned live_children = 0; 1688 int ret = 0; 1689 1690 if (BCH_SNAPSHOT_SUBVOL(s.v)) 1691 return 0; 1692 1693 if (BCH_SNAPSHOT_DELETED(s.v)) 1694 return 0; 1695 1696 mutex_lock(&d->progress_lock); 1697 for (unsigned i = 0; i < 2; i++) { 1698 u32 child = le32_to_cpu(s.v->children[i]); 1699 1700 live_children += child && 1701 !snapshot_list_has_id(&d->delete_leaves, child); 1702 } 1703 1704 u32 tree = bch2_snapshot_tree(c, s.k->p.offset); 1705 1706 if (live_children == 0) { 1707 ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: 1708 snapshot_list_add(c, &d->delete_leaves, s.k->p.offset); 1709 } else if (live_children == 1) { 1710 struct snapshot_interior_delete n = { 1711 .id = s.k->p.offset, 1712 .live_child = live_child(c, s.k->p.offset), 1713 }; 1714 1715 if (!n.live_child) { 1716 bch_err(c, "error finding live child of snapshot %u", n.id); 1717 ret = -EINVAL; 1718 } else { 1719 ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: 1720 darray_push(&d->delete_interior, n); 1721 } 1722 } 1723 mutex_unlock(&d->progress_lock); 1724 1725 return ret; 1726 } 1727 1728 static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, 1729 interior_delete_list *skip) 1730 { 1731 guard(rcu)(); 1732 while (interior_delete_has_id(skip, id)) 1733 id = __bch2_snapshot_parent(c, id); 1734 1735 while (n--) { 1736 do { 1737 id = __bch2_snapshot_parent(c, id); 1738 } while (interior_delete_has_id(skip, id)); 1739 } 1740 1741 return id; 1742 } 1743 1744 static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, 1745 struct btree_iter *iter, struct bkey_s_c k, 1746 interior_delete_list *deleted) 1747 { 1748 struct bch_fs *c = trans->c; 1749 u32 nr_deleted_ancestors = 0; 1750 struct bkey_i_snapshot *s; 1751 int ret; 1752 1753 if (!bch2_snapshot_exists(c, k.k->p.offset)) 1754 return 0; 1755 1756 if (k.k->type != KEY_TYPE_snapshot) 1757 return 0; 1758 1759 if (interior_delete_has_id(deleted, k.k->p.offset)) 1760 return 0; 1761 1762 s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); 1763 ret = PTR_ERR_OR_ZERO(s); 1764 if (ret) 1765 return ret; 1766 1767 darray_for_each(*deleted, i) 1768 nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); 1769 1770 if (!nr_deleted_ancestors) 1771 return 0; 1772 1773 le32_add_cpu(&s->v.depth, -nr_deleted_ancestors); 1774 1775 if (!s->v.depth) { 1776 s->v.skip[0] = 0; 1777 s->v.skip[1] = 0; 1778 s->v.skip[2] = 0; 1779 } else { 1780 u32 depth = le32_to_cpu(s->v.depth); 1781 u32 parent = bch2_snapshot_parent(c, s->k.p.offset); 1782 1783 for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { 1784 u32 id = le32_to_cpu(s->v.skip[j]); 1785 1786 if (interior_delete_has_id(deleted, id)) { 1787 id = bch2_snapshot_nth_parent_skip(c, 1788 parent, 1789 depth > 1 1790 ? get_random_u32_below(depth - 1) 1791 : 0, 1792 deleted); 1793 s->v.skip[j] = cpu_to_le32(id); 1794 } 1795 } 1796 1797 bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32); 1798 } 1799 1800 return bch2_trans_update(trans, iter, &s->k_i, 0); 1801 } 1802 1803 static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d) 1804 { 1805 prt_printf(out, "deleting from trees"); 1806 darray_for_each(d->deleting_from_trees, i) 1807 prt_printf(out, " %u", *i); 1808 1809 prt_printf(out, "deleting leaves"); 1810 darray_for_each(d->delete_leaves, i) 1811 prt_printf(out, " %u", *i); 1812 prt_newline(out); 1813 1814 prt_printf(out, "interior"); 1815 darray_for_each(d->delete_interior, i) 1816 prt_printf(out, " %u->%u", i->id, i->live_child); 1817 prt_newline(out); 1818 } 1819 1820 int __bch2_delete_dead_snapshots(struct bch_fs *c) 1821 { 1822 struct snapshot_delete *d = &c->snapshot_delete; 1823 int ret = 0; 1824 1825 if (!mutex_trylock(&d->lock)) 1826 return 0; 1827 1828 if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) 1829 goto out_unlock; 1830 1831 struct btree_trans *trans = bch2_trans_get(c); 1832 1833 /* 1834 * For every snapshot node: If we have no live children and it's not 1835 * pointed to by a subvolume, delete it: 1836 */ 1837 d->running = true; 1838 d->pos = BBPOS_MIN; 1839 1840 ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, 1841 check_should_delete_snapshot(trans, k)); 1842 if (!bch2_err_matches(ret, EROFS)) 1843 bch_err_msg(c, ret, "walking snapshots"); 1844 if (ret) 1845 goto err; 1846 1847 if (!d->delete_leaves.nr && !d->delete_interior.nr) 1848 goto err; 1849 1850 { 1851 struct printbuf buf = PRINTBUF; 1852 bch2_snapshot_delete_nodes_to_text(&buf, d); 1853 1854 ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); 1855 printbuf_exit(&buf); 1856 if (ret) 1857 goto err; 1858 } 1859 1860 ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2) 1861 ? delete_dead_snapshot_keys_v2(trans) 1862 : delete_dead_snapshot_keys_v1(trans); 1863 if (!bch2_err_matches(ret, EROFS)) 1864 bch_err_msg(c, ret, "deleting keys from dying snapshots"); 1865 if (ret) 1866 goto err; 1867 1868 darray_for_each(d->delete_leaves, i) { 1869 ret = commit_do(trans, NULL, NULL, 0, 1870 bch2_snapshot_node_delete(trans, *i)); 1871 if (!bch2_err_matches(ret, EROFS)) 1872 bch_err_msg(c, ret, "deleting snapshot %u", *i); 1873 if (ret) 1874 goto err; 1875 } 1876 1877 /* 1878 * Fixing children of deleted snapshots can't be done completely 1879 * atomically, if we crash between here and when we delete the interior 1880 * nodes some depth fields will be off: 1881 */ 1882 ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 1883 BTREE_ITER_intent, k, 1884 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1885 bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); 1886 if (ret) 1887 goto err; 1888 1889 darray_for_each(d->delete_interior, i) { 1890 ret = commit_do(trans, NULL, NULL, 0, 1891 bch2_snapshot_node_delete(trans, i->id)); 1892 if (!bch2_err_matches(ret, EROFS)) 1893 bch_err_msg(c, ret, "deleting snapshot %u", i->id); 1894 if (ret) 1895 goto err; 1896 } 1897 err: 1898 mutex_lock(&d->progress_lock); 1899 darray_exit(&d->deleting_from_trees); 1900 darray_exit(&d->delete_interior); 1901 darray_exit(&d->delete_leaves); 1902 d->running = false; 1903 mutex_unlock(&d->progress_lock); 1904 bch2_trans_put(trans); 1905 1906 bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots); 1907 out_unlock: 1908 mutex_unlock(&d->lock); 1909 if (!bch2_err_matches(ret, EROFS)) 1910 bch_err_fn(c, ret); 1911 return ret; 1912 } 1913 1914 int bch2_delete_dead_snapshots(struct bch_fs *c) 1915 { 1916 if (!c->opts.auto_snapshot_deletion) 1917 return 0; 1918 1919 return __bch2_delete_dead_snapshots(c); 1920 } 1921 1922 void bch2_delete_dead_snapshots_work(struct work_struct *work) 1923 { 1924 struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work); 1925 1926 set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); 1927 1928 bch2_delete_dead_snapshots(c); 1929 enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); 1930 } 1931 1932 void bch2_delete_dead_snapshots_async(struct bch_fs *c) 1933 { 1934 if (!c->opts.auto_snapshot_deletion) 1935 return; 1936 1937 if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots)) 1938 return; 1939 1940 BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); 1941 1942 if (!queue_work(system_long_wq, &c->snapshot_delete.work)) 1943 enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); 1944 } 1945 1946 void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c) 1947 { 1948 struct snapshot_delete *d = &c->snapshot_delete; 1949 1950 if (!d->running) { 1951 prt_str(out, "(not running)"); 1952 return; 1953 } 1954 1955 mutex_lock(&d->progress_lock); 1956 bch2_snapshot_delete_nodes_to_text(out, d); 1957 1958 bch2_bbpos_to_text(out, d->pos); 1959 mutex_unlock(&d->progress_lock); 1960 } 1961 1962 int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, 1963 enum btree_id id, 1964 struct bpos pos) 1965 { 1966 struct bch_fs *c = trans->c; 1967 struct btree_iter iter; 1968 struct bkey_s_c k; 1969 int ret; 1970 1971 for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), 1972 BTREE_ITER_not_extents| 1973 BTREE_ITER_all_snapshots, 1974 k, ret) { 1975 if (!bkey_eq(pos, k.k->p)) 1976 break; 1977 1978 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { 1979 ret = 1; 1980 break; 1981 } 1982 } 1983 bch2_trans_iter_exit(trans, &iter); 1984 1985 return ret; 1986 } 1987 1988 static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) 1989 { 1990 /* If there's one child, it's redundant and keys will be moved to the child */ 1991 return !!snap.v->children[0] + !!snap.v->children[1] == 1; 1992 } 1993 1994 static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) 1995 { 1996 if (k.k->type != KEY_TYPE_snapshot) 1997 return 0; 1998 1999 struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); 2000 if (BCH_SNAPSHOT_WILL_DELETE(snap.v) || 2001 interior_snapshot_needs_delete(snap)) 2002 set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); 2003 2004 return 0; 2005 } 2006 2007 int bch2_snapshots_read(struct bch_fs *c) 2008 { 2009 /* 2010 * Initializing the is_ancestor bitmaps requires ancestors to already be 2011 * initialized - so mark in reverse: 2012 */ 2013 int ret = bch2_trans_run(c, 2014 for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, 2015 POS_MAX, 0, k, 2016 __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: 2017 bch2_check_snapshot_needs_deletion(trans, k))); 2018 bch_err_fn(c, ret); 2019 2020 /* 2021 * It's important that we check if we need to reconstruct snapshots 2022 * before going RW, so we mark that pass as required in the superblock - 2023 * otherwise, we could end up deleting keys with missing snapshot nodes 2024 * instead 2025 */ 2026 BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && 2027 test_bit(BCH_FS_may_go_rw, &c->flags)); 2028 2029 return ret; 2030 } 2031 2032 void bch2_fs_snapshots_exit(struct bch_fs *c) 2033 { 2034 kvfree(rcu_dereference_protected(c->snapshots, true)); 2035 } 2036 2037 void bch2_fs_snapshots_init_early(struct bch_fs *c) 2038 { 2039 INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work); 2040 mutex_init(&c->snapshot_delete.lock); 2041 mutex_init(&c->snapshot_delete.progress_lock); 2042 mutex_init(&c->snapshots_unlinked_lock); 2043 } 2044