1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_key_cache.h" 5 #include "btree_update.h" 6 #include "errcode.h" 7 #include "error.h" 8 #include "fs.h" 9 #include "snapshot.h" 10 #include "subvolume.h" 11 12 #include <linux/random.h> 13 14 static int bch2_subvolume_delete(struct btree_trans *, u32); 15 16 static struct bpos subvolume_children_pos(struct bkey_s_c k) 17 { 18 if (k.k->type != KEY_TYPE_subvolume) 19 return POS_MIN; 20 21 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 22 if (!s.v->fs_path_parent) 23 return POS_MIN; 24 return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset); 25 } 26 27 static int check_subvol(struct btree_trans *trans, 28 struct btree_iter *iter, 29 struct bkey_s_c k) 30 { 31 struct bch_fs *c = trans->c; 32 struct bkey_s_c_subvolume subvol; 33 struct btree_iter subvol_children_iter = {}; 34 struct bch_snapshot snapshot; 35 struct printbuf buf = PRINTBUF; 36 unsigned snapid; 37 int ret = 0; 38 39 if (k.k->type != KEY_TYPE_subvolume) 40 return 0; 41 42 subvol = bkey_s_c_to_subvolume(k); 43 snapid = le32_to_cpu(subvol.v->snapshot); 44 ret = bch2_snapshot_lookup(trans, snapid, &snapshot); 45 46 if (bch2_err_matches(ret, ENOENT)) 47 bch_err(c, "subvolume %llu points to nonexistent snapshot %u", 48 k.k->p.offset, snapid); 49 if (ret) 50 return ret; 51 52 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { 53 ret = bch2_subvolume_delete(trans, iter->pos.offset); 54 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 55 return ret ?: -BCH_ERR_transaction_restart_nested; 56 } 57 58 if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL && 59 subvol.v->fs_path_parent, 60 c, subvol_root_fs_path_parent_nonzero, 61 "root subvolume has nonzero fs_path_parent\n%s", 62 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 63 struct bkey_i_subvolume *n = 64 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); 65 ret = PTR_ERR_OR_ZERO(n); 66 if (ret) 67 goto err; 68 69 n->v.fs_path_parent = 0; 70 } 71 72 if (subvol.v->fs_path_parent) { 73 struct bpos pos = subvolume_children_pos(k); 74 75 struct bkey_s_c subvol_children_k = 76 bch2_bkey_get_iter(trans, &subvol_children_iter, 77 BTREE_ID_subvolume_children, pos, 0); 78 ret = bkey_err(subvol_children_k); 79 if (ret) 80 goto err; 81 82 if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set, 83 c, subvol_children_not_set, 84 "subvolume not set in subvolume_children btree at %llu:%llu\n%s", 85 pos.inode, pos.offset, 86 (printbuf_reset(&buf), 87 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 88 ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true); 89 if (ret) 90 goto err; 91 } 92 } 93 94 struct bch_inode_unpacked inode; 95 struct btree_iter inode_iter = {}; 96 ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode, 97 (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, 98 0); 99 bch2_trans_iter_exit(trans, &inode_iter); 100 101 if (ret && !bch2_err_matches(ret, ENOENT)) 102 return ret; 103 104 if (fsck_err_on(ret, c, subvol_to_missing_root, 105 "subvolume %llu points to missing subvolume root %llu:%u", 106 k.k->p.offset, le64_to_cpu(subvol.v->inode), 107 le32_to_cpu(subvol.v->snapshot))) { 108 ret = bch2_subvolume_delete(trans, iter->pos.offset); 109 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 110 return ret ?: -BCH_ERR_transaction_restart_nested; 111 } 112 113 if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, 114 c, subvol_root_wrong_bi_subvol, 115 "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", 116 inode.bi_inum, inode_iter.k.p.snapshot, 117 inode.bi_subvol, subvol.k->p.offset)) { 118 inode.bi_subvol = subvol.k->p.offset; 119 ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); 120 if (ret) 121 goto err; 122 } 123 124 if (!BCH_SUBVOLUME_SNAP(subvol.v)) { 125 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); 126 u32 snapshot_tree; 127 struct bch_snapshot_tree st; 128 129 rcu_read_lock(); 130 snapshot_tree = snapshot_t(c, snapshot_root)->tree; 131 rcu_read_unlock(); 132 133 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); 134 135 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 136 "%s: snapshot tree %u not found", __func__, snapshot_tree); 137 138 if (ret) 139 return ret; 140 141 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, 142 c, subvol_not_master_and_not_snapshot, 143 "subvolume %llu is not set as snapshot but is not master subvolume", 144 k.k->p.offset)) { 145 struct bkey_i_subvolume *s = 146 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); 147 ret = PTR_ERR_OR_ZERO(s); 148 if (ret) 149 return ret; 150 151 SET_BCH_SUBVOLUME_SNAP(&s->v, true); 152 } 153 } 154 err: 155 fsck_err: 156 bch2_trans_iter_exit(trans, &subvol_children_iter); 157 printbuf_exit(&buf); 158 return ret; 159 } 160 161 int bch2_check_subvols(struct bch_fs *c) 162 { 163 int ret = bch2_trans_run(c, 164 for_each_btree_key_commit(trans, iter, 165 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 166 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 167 check_subvol(trans, &iter, k))); 168 bch_err_fn(c, ret); 169 return ret; 170 } 171 172 static int check_subvol_child(struct btree_trans *trans, 173 struct btree_iter *child_iter, 174 struct bkey_s_c child_k) 175 { 176 struct bch_fs *c = trans->c; 177 struct bch_subvolume s; 178 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset), 179 0, subvolume, &s); 180 if (ret && !bch2_err_matches(ret, ENOENT)) 181 return ret; 182 183 if (fsck_err_on(ret || 184 le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode, 185 c, subvol_children_bad, 186 "incorrect entry in subvolume_children btree %llu:%llu", 187 child_k.k->p.inode, child_k.k->p.offset)) { 188 ret = bch2_btree_delete_at(trans, child_iter, 0); 189 if (ret) 190 goto err; 191 } 192 err: 193 fsck_err: 194 return ret; 195 } 196 197 int bch2_check_subvol_children(struct bch_fs *c) 198 { 199 int ret = bch2_trans_run(c, 200 for_each_btree_key_commit(trans, iter, 201 BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k, 202 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 203 check_subvol_child(trans, &iter, k))); 204 bch_err_fn(c, ret); 205 return 0; 206 } 207 208 /* Subvolumes: */ 209 210 int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, 211 enum bkey_invalid_flags flags, struct printbuf *err) 212 { 213 int ret = 0; 214 215 bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || 216 bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, 217 subvol_pos_bad, 218 "invalid pos"); 219 fsck_err: 220 return ret; 221 } 222 223 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, 224 struct bkey_s_c k) 225 { 226 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 227 228 prt_printf(out, "root %llu snapshot id %u", 229 le64_to_cpu(s.v->inode), 230 le32_to_cpu(s.v->snapshot)); 231 232 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) { 233 prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent)); 234 prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent)); 235 } 236 } 237 238 static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set) 239 { 240 return !bpos_eq(pos, POS_MIN) 241 ? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set) 242 : 0; 243 } 244 245 int bch2_subvolume_trigger(struct btree_trans *trans, 246 enum btree_id btree_id, unsigned level, 247 struct bkey_s_c old, struct bkey_s new, 248 unsigned flags) 249 { 250 if (flags & BTREE_TRIGGER_TRANSACTIONAL) { 251 struct bpos children_pos_old = subvolume_children_pos(old); 252 struct bpos children_pos_new = subvolume_children_pos(new.s_c); 253 254 if (!bpos_eq(children_pos_old, children_pos_new)) { 255 int ret = subvolume_children_mod(trans, children_pos_old, false) ?: 256 subvolume_children_mod(trans, children_pos_new, true); 257 if (ret) 258 return ret; 259 } 260 } 261 262 return 0; 263 } 264 265 int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) 266 { 267 struct btree_iter iter; 268 269 bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0); 270 struct bkey_s_c k = bch2_btree_iter_peek(&iter); 271 bch2_trans_iter_exit(trans, &iter); 272 273 return bkey_err(k) ?: k.k && k.k->p.inode == subvol 274 ? -BCH_ERR_ENOTEMPTY_subvol_not_empty 275 : 0; 276 } 277 278 static __always_inline int 279 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, 280 bool inconsistent_if_not_found, 281 int iter_flags, 282 struct bch_subvolume *s) 283 { 284 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), 285 iter_flags, subvolume, s); 286 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && 287 inconsistent_if_not_found, 288 trans->c, "missing subvolume %u", subvol); 289 return ret; 290 } 291 292 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, 293 bool inconsistent_if_not_found, 294 int iter_flags, 295 struct bch_subvolume *s) 296 { 297 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); 298 } 299 300 int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) 301 { 302 struct bch_subvolume s; 303 int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); 304 if (ret) 305 return ret; 306 307 if (BCH_SUBVOLUME_RO(&s)) 308 return -EROFS; 309 return 0; 310 } 311 312 int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) 313 { 314 return bch2_trans_do(c, NULL, NULL, 0, 315 bch2_subvol_is_ro_trans(trans, subvol)); 316 } 317 318 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, 319 struct bch_subvolume *subvol) 320 { 321 struct bch_snapshot snap; 322 323 return bch2_snapshot_lookup(trans, snapshot, &snap) ?: 324 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); 325 } 326 327 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 328 u32 *snapid) 329 { 330 struct btree_iter iter; 331 struct bkey_s_c_subvolume subvol; 332 int ret; 333 334 subvol = bch2_bkey_get_iter_typed(trans, &iter, 335 BTREE_ID_subvolumes, POS(0, subvolid), 336 BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, 337 subvolume); 338 ret = bkey_err(subvol); 339 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 340 "missing subvolume %u", subvolid); 341 342 if (likely(!ret)) 343 *snapid = le32_to_cpu(subvol.v->snapshot); 344 bch2_trans_iter_exit(trans, &iter); 345 return ret; 346 } 347 348 static int bch2_subvolume_reparent(struct btree_trans *trans, 349 struct btree_iter *iter, 350 struct bkey_s_c k, 351 u32 old_parent, u32 new_parent) 352 { 353 struct bkey_i_subvolume *s; 354 int ret; 355 356 if (k.k->type != KEY_TYPE_subvolume) 357 return 0; 358 359 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) && 360 le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent) 361 return 0; 362 363 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); 364 ret = PTR_ERR_OR_ZERO(s); 365 if (ret) 366 return ret; 367 368 s->v.creation_parent = cpu_to_le32(new_parent); 369 return 0; 370 } 371 372 /* 373 * Separate from the snapshot tree in the snapshots btree, we record the tree 374 * structure of how snapshot subvolumes were created - the parent subvolume of 375 * each snapshot subvolume. 376 * 377 * When a subvolume is deleted, we scan for child subvolumes and reparant them, 378 * to avoid dangling references: 379 */ 380 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) 381 { 382 struct bch_subvolume s; 383 384 return lockrestart_do(trans, 385 bch2_subvolume_get(trans, subvolid_to_delete, true, 386 BTREE_ITER_CACHED, &s)) ?: 387 for_each_btree_key_commit(trans, iter, 388 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 389 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 390 bch2_subvolume_reparent(trans, &iter, k, 391 subvolid_to_delete, le32_to_cpu(s.creation_parent))); 392 } 393 394 /* 395 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot 396 * deletion/cleanup: 397 */ 398 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 399 { 400 struct btree_iter iter; 401 struct bkey_s_c_subvolume subvol; 402 u32 snapid; 403 int ret = 0; 404 405 subvol = bch2_bkey_get_iter_typed(trans, &iter, 406 BTREE_ID_subvolumes, POS(0, subvolid), 407 BTREE_ITER_CACHED|BTREE_ITER_INTENT, 408 subvolume); 409 ret = bkey_err(subvol); 410 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 411 "missing subvolume %u", subvolid); 412 if (ret) 413 return ret; 414 415 snapid = le32_to_cpu(subvol.v->snapshot); 416 417 ret = bch2_btree_delete_at(trans, &iter, 0) ?: 418 bch2_snapshot_node_set_deleted(trans, snapid); 419 bch2_trans_iter_exit(trans, &iter); 420 return ret; 421 } 422 423 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 424 { 425 return bch2_subvolumes_reparent(trans, subvolid) ?: 426 commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 427 __bch2_subvolume_delete(trans, subvolid)); 428 } 429 430 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) 431 { 432 struct bch_fs *c = container_of(work, struct bch_fs, 433 snapshot_wait_for_pagecache_and_delete_work); 434 snapshot_id_list s; 435 u32 *id; 436 int ret = 0; 437 438 while (!ret) { 439 mutex_lock(&c->snapshots_unlinked_lock); 440 s = c->snapshots_unlinked; 441 darray_init(&c->snapshots_unlinked); 442 mutex_unlock(&c->snapshots_unlinked_lock); 443 444 if (!s.nr) 445 break; 446 447 bch2_evict_subvolume_inodes(c, &s); 448 449 for (id = s.data; id < s.data + s.nr; id++) { 450 ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); 451 bch_err_msg(c, ret, "deleting subvolume %u", *id); 452 if (ret) 453 break; 454 } 455 456 darray_exit(&s); 457 } 458 459 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 460 } 461 462 struct subvolume_unlink_hook { 463 struct btree_trans_commit_hook h; 464 u32 subvol; 465 }; 466 467 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, 468 struct btree_trans_commit_hook *_h) 469 { 470 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); 471 struct bch_fs *c = trans->c; 472 int ret = 0; 473 474 mutex_lock(&c->snapshots_unlinked_lock); 475 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) 476 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); 477 mutex_unlock(&c->snapshots_unlinked_lock); 478 479 if (ret) 480 return ret; 481 482 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) 483 return -EROFS; 484 485 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) 486 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 487 return 0; 488 } 489 490 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) 491 { 492 struct btree_iter iter; 493 struct bkey_i_subvolume *n; 494 struct subvolume_unlink_hook *h; 495 int ret = 0; 496 497 h = bch2_trans_kmalloc(trans, sizeof(*h)); 498 ret = PTR_ERR_OR_ZERO(h); 499 if (ret) 500 return ret; 501 502 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; 503 h->subvol = subvolid; 504 bch2_trans_commit_hook(trans, &h->h); 505 506 n = bch2_bkey_get_mut_typed(trans, &iter, 507 BTREE_ID_subvolumes, POS(0, subvolid), 508 BTREE_ITER_CACHED, subvolume); 509 ret = PTR_ERR_OR_ZERO(n); 510 if (unlikely(ret)) { 511 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 512 "missing subvolume %u", subvolid); 513 return ret; 514 } 515 516 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); 517 bch2_trans_iter_exit(trans, &iter); 518 return ret; 519 } 520 521 int bch2_subvolume_create(struct btree_trans *trans, u64 inode, 522 u32 parent_subvolid, 523 u32 src_subvolid, 524 u32 *new_subvolid, 525 u32 *new_snapshotid, 526 bool ro) 527 { 528 struct bch_fs *c = trans->c; 529 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; 530 struct bkey_i_subvolume *new_subvol = NULL; 531 struct bkey_i_subvolume *src_subvol = NULL; 532 u32 parent = 0, new_nodes[2], snapshot_subvols[2]; 533 int ret = 0; 534 535 ret = bch2_bkey_get_empty_slot(trans, &dst_iter, 536 BTREE_ID_subvolumes, POS(0, U32_MAX)); 537 if (ret == -BCH_ERR_ENOSPC_btree_slot) 538 ret = -BCH_ERR_ENOSPC_subvolume_create; 539 if (ret) 540 return ret; 541 542 snapshot_subvols[0] = dst_iter.pos.offset; 543 snapshot_subvols[1] = src_subvolid; 544 545 if (src_subvolid) { 546 /* Creating a snapshot: */ 547 548 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, 549 BTREE_ID_subvolumes, POS(0, src_subvolid), 550 BTREE_ITER_CACHED, subvolume); 551 ret = PTR_ERR_OR_ZERO(src_subvol); 552 if (unlikely(ret)) { 553 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 554 "subvolume %u not found", src_subvolid); 555 goto err; 556 } 557 558 parent = le32_to_cpu(src_subvol->v.snapshot); 559 } 560 561 ret = bch2_snapshot_node_create(trans, parent, new_nodes, 562 snapshot_subvols, 563 src_subvolid ? 2 : 1); 564 if (ret) 565 goto err; 566 567 if (src_subvolid) { 568 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); 569 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); 570 if (ret) 571 goto err; 572 } 573 574 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); 575 ret = PTR_ERR_OR_ZERO(new_subvol); 576 if (ret) 577 goto err; 578 579 new_subvol->v.flags = 0; 580 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); 581 new_subvol->v.inode = cpu_to_le64(inode); 582 new_subvol->v.creation_parent = cpu_to_le32(src_subvolid); 583 new_subvol->v.fs_path_parent = cpu_to_le32(parent_subvolid); 584 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); 585 new_subvol->v.otime.hi = 0; 586 587 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); 588 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); 589 590 *new_subvolid = new_subvol->k.p.offset; 591 *new_snapshotid = new_nodes[0]; 592 err: 593 bch2_trans_iter_exit(trans, &src_iter); 594 bch2_trans_iter_exit(trans, &dst_iter); 595 return ret; 596 } 597 598 int bch2_fs_subvolumes_init(struct bch_fs *c) 599 { 600 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); 601 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, 602 bch2_subvolume_wait_for_pagecache_and_delete); 603 mutex_init(&c->snapshots_unlinked_lock); 604 return 0; 605 } 606