1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_key_cache.h" 5 #include "btree_update.h" 6 #include "errcode.h" 7 #include "error.h" 8 #include "fs.h" 9 #include "snapshot.h" 10 #include "subvolume.h" 11 12 #include <linux/random.h> 13 14 static int bch2_subvolume_delete(struct btree_trans *, u32); 15 16 static int check_subvol(struct btree_trans *trans, 17 struct btree_iter *iter, 18 struct bkey_s_c k) 19 { 20 struct bch_fs *c = trans->c; 21 struct bkey_s_c_subvolume subvol; 22 struct bch_snapshot snapshot; 23 unsigned snapid; 24 int ret = 0; 25 26 if (k.k->type != KEY_TYPE_subvolume) 27 return 0; 28 29 subvol = bkey_s_c_to_subvolume(k); 30 snapid = le32_to_cpu(subvol.v->snapshot); 31 ret = bch2_snapshot_lookup(trans, snapid, &snapshot); 32 33 if (bch2_err_matches(ret, ENOENT)) 34 bch_err(c, "subvolume %llu points to nonexistent snapshot %u", 35 k.k->p.offset, snapid); 36 if (ret) 37 return ret; 38 39 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { 40 bch2_fs_lazy_rw(c); 41 42 ret = bch2_subvolume_delete(trans, iter->pos.offset); 43 if (ret) 44 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 45 return ret ?: -BCH_ERR_transaction_restart_nested; 46 } 47 48 if (!BCH_SUBVOLUME_SNAP(subvol.v)) { 49 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); 50 u32 snapshot_tree; 51 struct bch_snapshot_tree st; 52 53 rcu_read_lock(); 54 snapshot_tree = snapshot_t(c, snapshot_root)->tree; 55 rcu_read_unlock(); 56 57 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); 58 59 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 60 "%s: snapshot tree %u not found", __func__, snapshot_tree); 61 62 if (ret) 63 return ret; 64 65 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c, 66 "subvolume %llu is not set as snapshot but is not master subvolume", 67 k.k->p.offset)) { 68 struct bkey_i_subvolume *s = 69 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); 70 ret = PTR_ERR_OR_ZERO(s); 71 if (ret) 72 return ret; 73 74 SET_BCH_SUBVOLUME_SNAP(&s->v, true); 75 } 76 } 77 78 fsck_err: 79 return ret; 80 } 81 82 int bch2_check_subvols(struct bch_fs *c) 83 { 84 struct btree_iter iter; 85 struct bkey_s_c k; 86 int ret; 87 88 ret = bch2_trans_run(c, 89 for_each_btree_key_commit(trans, iter, 90 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 91 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, 92 check_subvol(trans, &iter, k))); 93 if (ret) 94 bch_err_fn(c, ret); 95 return ret; 96 } 97 98 /* Subvolumes: */ 99 100 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, 101 enum bkey_invalid_flags flags, struct printbuf *err) 102 { 103 if (bkey_lt(k.k->p, SUBVOL_POS_MIN) || 104 bkey_gt(k.k->p, SUBVOL_POS_MAX)) { 105 prt_printf(err, "invalid pos"); 106 return -BCH_ERR_invalid_bkey; 107 } 108 109 return 0; 110 } 111 112 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, 113 struct bkey_s_c k) 114 { 115 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 116 117 prt_printf(out, "root %llu snapshot id %u", 118 le64_to_cpu(s.v->inode), 119 le32_to_cpu(s.v->snapshot)); 120 121 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent)) 122 prt_printf(out, " parent %u", le32_to_cpu(s.v->parent)); 123 } 124 125 static __always_inline int 126 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, 127 bool inconsistent_if_not_found, 128 int iter_flags, 129 struct bch_subvolume *s) 130 { 131 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), 132 iter_flags, subvolume, s); 133 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && 134 inconsistent_if_not_found, 135 trans->c, "missing subvolume %u", subvol); 136 return ret; 137 } 138 139 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, 140 bool inconsistent_if_not_found, 141 int iter_flags, 142 struct bch_subvolume *s) 143 { 144 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); 145 } 146 147 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, 148 struct bch_subvolume *subvol) 149 { 150 struct bch_snapshot snap; 151 152 return bch2_snapshot_lookup(trans, snapshot, &snap) ?: 153 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); 154 } 155 156 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 157 u32 *snapid) 158 { 159 struct btree_iter iter; 160 struct bkey_s_c_subvolume subvol; 161 int ret; 162 163 subvol = bch2_bkey_get_iter_typed(trans, &iter, 164 BTREE_ID_subvolumes, POS(0, subvolid), 165 BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, 166 subvolume); 167 ret = bkey_err(subvol); 168 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 169 "missing subvolume %u", subvolid); 170 171 if (likely(!ret)) 172 *snapid = le32_to_cpu(subvol.v->snapshot); 173 bch2_trans_iter_exit(trans, &iter); 174 return ret; 175 } 176 177 static int bch2_subvolume_reparent(struct btree_trans *trans, 178 struct btree_iter *iter, 179 struct bkey_s_c k, 180 u32 old_parent, u32 new_parent) 181 { 182 struct bkey_i_subvolume *s; 183 int ret; 184 185 if (k.k->type != KEY_TYPE_subvolume) 186 return 0; 187 188 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) && 189 le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent) 190 return 0; 191 192 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); 193 ret = PTR_ERR_OR_ZERO(s); 194 if (ret) 195 return ret; 196 197 s->v.parent = cpu_to_le32(new_parent); 198 return 0; 199 } 200 201 /* 202 * Separate from the snapshot tree in the snapshots btree, we record the tree 203 * structure of how snapshot subvolumes were created - the parent subvolume of 204 * each snapshot subvolume. 205 * 206 * When a subvolume is deleted, we scan for child subvolumes and reparant them, 207 * to avoid dangling references: 208 */ 209 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) 210 { 211 struct btree_iter iter; 212 struct bkey_s_c k; 213 struct bch_subvolume s; 214 215 return lockrestart_do(trans, 216 bch2_subvolume_get(trans, subvolid_to_delete, true, 217 BTREE_ITER_CACHED, &s)) ?: 218 for_each_btree_key_commit(trans, iter, 219 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 220 NULL, NULL, BTREE_INSERT_NOFAIL, 221 bch2_subvolume_reparent(trans, &iter, k, 222 subvolid_to_delete, le32_to_cpu(s.parent))); 223 } 224 225 /* 226 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot 227 * deletion/cleanup: 228 */ 229 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 230 { 231 struct btree_iter iter; 232 struct bkey_s_c_subvolume subvol; 233 struct btree_trans_commit_hook *h; 234 u32 snapid; 235 int ret = 0; 236 237 subvol = bch2_bkey_get_iter_typed(trans, &iter, 238 BTREE_ID_subvolumes, POS(0, subvolid), 239 BTREE_ITER_CACHED|BTREE_ITER_INTENT, 240 subvolume); 241 ret = bkey_err(subvol); 242 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 243 "missing subvolume %u", subvolid); 244 if (ret) 245 return ret; 246 247 snapid = le32_to_cpu(subvol.v->snapshot); 248 249 ret = bch2_btree_delete_at(trans, &iter, 0); 250 if (ret) 251 goto err; 252 253 ret = bch2_snapshot_node_set_deleted(trans, snapid); 254 if (ret) 255 goto err; 256 257 h = bch2_trans_kmalloc(trans, sizeof(*h)); 258 ret = PTR_ERR_OR_ZERO(h); 259 if (ret) 260 goto err; 261 262 h->fn = bch2_delete_dead_snapshots_hook; 263 bch2_trans_commit_hook(trans, h); 264 err: 265 bch2_trans_iter_exit(trans, &iter); 266 return ret; 267 } 268 269 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 270 { 271 return bch2_subvolumes_reparent(trans, subvolid) ?: 272 commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, 273 __bch2_subvolume_delete(trans, subvolid)); 274 } 275 276 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) 277 { 278 struct bch_fs *c = container_of(work, struct bch_fs, 279 snapshot_wait_for_pagecache_and_delete_work); 280 snapshot_id_list s; 281 u32 *id; 282 int ret = 0; 283 284 while (!ret) { 285 mutex_lock(&c->snapshots_unlinked_lock); 286 s = c->snapshots_unlinked; 287 darray_init(&c->snapshots_unlinked); 288 mutex_unlock(&c->snapshots_unlinked_lock); 289 290 if (!s.nr) 291 break; 292 293 bch2_evict_subvolume_inodes(c, &s); 294 295 for (id = s.data; id < s.data + s.nr; id++) { 296 ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); 297 if (ret) { 298 bch_err_msg(c, ret, "deleting subvolume %u", *id); 299 break; 300 } 301 } 302 303 darray_exit(&s); 304 } 305 306 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 307 } 308 309 struct subvolume_unlink_hook { 310 struct btree_trans_commit_hook h; 311 u32 subvol; 312 }; 313 314 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, 315 struct btree_trans_commit_hook *_h) 316 { 317 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); 318 struct bch_fs *c = trans->c; 319 int ret = 0; 320 321 mutex_lock(&c->snapshots_unlinked_lock); 322 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) 323 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); 324 mutex_unlock(&c->snapshots_unlinked_lock); 325 326 if (ret) 327 return ret; 328 329 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) 330 return -EROFS; 331 332 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) 333 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 334 return 0; 335 } 336 337 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) 338 { 339 struct btree_iter iter; 340 struct bkey_i_subvolume *n; 341 struct subvolume_unlink_hook *h; 342 int ret = 0; 343 344 h = bch2_trans_kmalloc(trans, sizeof(*h)); 345 ret = PTR_ERR_OR_ZERO(h); 346 if (ret) 347 return ret; 348 349 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; 350 h->subvol = subvolid; 351 bch2_trans_commit_hook(trans, &h->h); 352 353 n = bch2_bkey_get_mut_typed(trans, &iter, 354 BTREE_ID_subvolumes, POS(0, subvolid), 355 BTREE_ITER_CACHED, subvolume); 356 ret = PTR_ERR_OR_ZERO(n); 357 if (unlikely(ret)) { 358 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 359 "missing subvolume %u", subvolid); 360 return ret; 361 } 362 363 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); 364 bch2_trans_iter_exit(trans, &iter); 365 return ret; 366 } 367 368 int bch2_subvolume_create(struct btree_trans *trans, u64 inode, 369 u32 src_subvolid, 370 u32 *new_subvolid, 371 u32 *new_snapshotid, 372 bool ro) 373 { 374 struct bch_fs *c = trans->c; 375 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; 376 struct bkey_i_subvolume *new_subvol = NULL; 377 struct bkey_i_subvolume *src_subvol = NULL; 378 u32 parent = 0, new_nodes[2], snapshot_subvols[2]; 379 int ret = 0; 380 381 ret = bch2_bkey_get_empty_slot(trans, &dst_iter, 382 BTREE_ID_subvolumes, POS(0, U32_MAX)); 383 if (ret == -BCH_ERR_ENOSPC_btree_slot) 384 ret = -BCH_ERR_ENOSPC_subvolume_create; 385 if (ret) 386 return ret; 387 388 snapshot_subvols[0] = dst_iter.pos.offset; 389 snapshot_subvols[1] = src_subvolid; 390 391 if (src_subvolid) { 392 /* Creating a snapshot: */ 393 394 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, 395 BTREE_ID_subvolumes, POS(0, src_subvolid), 396 BTREE_ITER_CACHED, subvolume); 397 ret = PTR_ERR_OR_ZERO(src_subvol); 398 if (unlikely(ret)) { 399 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 400 "subvolume %u not found", src_subvolid); 401 goto err; 402 } 403 404 parent = le32_to_cpu(src_subvol->v.snapshot); 405 } 406 407 ret = bch2_snapshot_node_create(trans, parent, new_nodes, 408 snapshot_subvols, 409 src_subvolid ? 2 : 1); 410 if (ret) 411 goto err; 412 413 if (src_subvolid) { 414 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); 415 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); 416 if (ret) 417 goto err; 418 } 419 420 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); 421 ret = PTR_ERR_OR_ZERO(new_subvol); 422 if (ret) 423 goto err; 424 425 new_subvol->v.flags = 0; 426 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); 427 new_subvol->v.inode = cpu_to_le64(inode); 428 new_subvol->v.parent = cpu_to_le32(src_subvolid); 429 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); 430 new_subvol->v.otime.hi = 0; 431 432 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); 433 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); 434 435 *new_subvolid = new_subvol->k.p.offset; 436 *new_snapshotid = new_nodes[0]; 437 err: 438 bch2_trans_iter_exit(trans, &src_iter); 439 bch2_trans_iter_exit(trans, &dst_iter); 440 return ret; 441 } 442 443 int bch2_fs_subvolumes_init(struct bch_fs *c) 444 { 445 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); 446 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, 447 bch2_subvolume_wait_for_pagecache_and_delete); 448 mutex_init(&c->snapshots_unlinked_lock); 449 return 0; 450 } 451