1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_key_cache.h" 5 #include "btree_update.h" 6 #include "errcode.h" 7 #include "error.h" 8 #include "fs.h" 9 #include "snapshot.h" 10 #include "subvolume.h" 11 12 #include <linux/random.h> 13 14 static int bch2_subvolume_delete(struct btree_trans *, u32); 15 16 static int check_subvol(struct btree_trans *trans, 17 struct btree_iter *iter, 18 struct bkey_s_c k) 19 { 20 struct bch_fs *c = trans->c; 21 struct bkey_s_c_subvolume subvol; 22 struct bch_snapshot snapshot; 23 unsigned snapid; 24 int ret = 0; 25 26 if (k.k->type != KEY_TYPE_subvolume) 27 return 0; 28 29 subvol = bkey_s_c_to_subvolume(k); 30 snapid = le32_to_cpu(subvol.v->snapshot); 31 ret = bch2_snapshot_lookup(trans, snapid, &snapshot); 32 33 if (bch2_err_matches(ret, ENOENT)) 34 bch_err(c, "subvolume %llu points to nonexistent snapshot %u", 35 k.k->p.offset, snapid); 36 if (ret) 37 return ret; 38 39 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { 40 ret = bch2_subvolume_delete(trans, iter->pos.offset); 41 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 42 return ret ?: -BCH_ERR_transaction_restart_nested; 43 } 44 45 if (!BCH_SUBVOLUME_SNAP(subvol.v)) { 46 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); 47 u32 snapshot_tree; 48 struct bch_snapshot_tree st; 49 50 rcu_read_lock(); 51 snapshot_tree = snapshot_t(c, snapshot_root)->tree; 52 rcu_read_unlock(); 53 54 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); 55 56 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 57 "%s: snapshot tree %u not found", __func__, snapshot_tree); 58 59 if (ret) 60 return ret; 61 62 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, 63 c, subvol_not_master_and_not_snapshot, 64 "subvolume %llu is not set as snapshot but is not master subvolume", 65 k.k->p.offset)) { 66 struct bkey_i_subvolume *s = 67 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); 68 ret = PTR_ERR_OR_ZERO(s); 69 if (ret) 70 return ret; 71 72 SET_BCH_SUBVOLUME_SNAP(&s->v, true); 73 } 74 } 75 76 fsck_err: 77 return ret; 78 } 79 80 int bch2_check_subvols(struct bch_fs *c) 81 { 82 int ret = bch2_trans_run(c, 83 for_each_btree_key_commit(trans, iter, 84 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 85 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 86 check_subvol(trans, &iter, k))); 87 bch_err_fn(c, ret); 88 return ret; 89 } 90 91 /* Subvolumes: */ 92 93 int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, 94 enum bkey_invalid_flags flags, struct printbuf *err) 95 { 96 int ret = 0; 97 98 bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || 99 bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, 100 subvol_pos_bad, 101 "invalid pos"); 102 fsck_err: 103 return ret; 104 } 105 106 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, 107 struct bkey_s_c k) 108 { 109 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 110 111 prt_printf(out, "root %llu snapshot id %u", 112 le64_to_cpu(s.v->inode), 113 le32_to_cpu(s.v->snapshot)); 114 115 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent)) 116 prt_printf(out, " parent %u", le32_to_cpu(s.v->parent)); 117 } 118 119 static __always_inline int 120 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, 121 bool inconsistent_if_not_found, 122 int iter_flags, 123 struct bch_subvolume *s) 124 { 125 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), 126 iter_flags, subvolume, s); 127 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && 128 inconsistent_if_not_found, 129 trans->c, "missing subvolume %u", subvol); 130 return ret; 131 } 132 133 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, 134 bool inconsistent_if_not_found, 135 int iter_flags, 136 struct bch_subvolume *s) 137 { 138 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); 139 } 140 141 int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) 142 { 143 struct bch_subvolume s; 144 int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); 145 if (ret) 146 return ret; 147 148 if (BCH_SUBVOLUME_RO(&s)) 149 return -EROFS; 150 return 0; 151 } 152 153 int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) 154 { 155 return bch2_trans_do(c, NULL, NULL, 0, 156 bch2_subvol_is_ro_trans(trans, subvol)); 157 } 158 159 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, 160 struct bch_subvolume *subvol) 161 { 162 struct bch_snapshot snap; 163 164 return bch2_snapshot_lookup(trans, snapshot, &snap) ?: 165 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); 166 } 167 168 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 169 u32 *snapid) 170 { 171 struct btree_iter iter; 172 struct bkey_s_c_subvolume subvol; 173 int ret; 174 175 subvol = bch2_bkey_get_iter_typed(trans, &iter, 176 BTREE_ID_subvolumes, POS(0, subvolid), 177 BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, 178 subvolume); 179 ret = bkey_err(subvol); 180 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 181 "missing subvolume %u", subvolid); 182 183 if (likely(!ret)) 184 *snapid = le32_to_cpu(subvol.v->snapshot); 185 bch2_trans_iter_exit(trans, &iter); 186 return ret; 187 } 188 189 static int bch2_subvolume_reparent(struct btree_trans *trans, 190 struct btree_iter *iter, 191 struct bkey_s_c k, 192 u32 old_parent, u32 new_parent) 193 { 194 struct bkey_i_subvolume *s; 195 int ret; 196 197 if (k.k->type != KEY_TYPE_subvolume) 198 return 0; 199 200 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) && 201 le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent) 202 return 0; 203 204 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); 205 ret = PTR_ERR_OR_ZERO(s); 206 if (ret) 207 return ret; 208 209 s->v.parent = cpu_to_le32(new_parent); 210 return 0; 211 } 212 213 /* 214 * Separate from the snapshot tree in the snapshots btree, we record the tree 215 * structure of how snapshot subvolumes were created - the parent subvolume of 216 * each snapshot subvolume. 217 * 218 * When a subvolume is deleted, we scan for child subvolumes and reparant them, 219 * to avoid dangling references: 220 */ 221 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) 222 { 223 struct bch_subvolume s; 224 225 return lockrestart_do(trans, 226 bch2_subvolume_get(trans, subvolid_to_delete, true, 227 BTREE_ITER_CACHED, &s)) ?: 228 for_each_btree_key_commit(trans, iter, 229 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 230 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 231 bch2_subvolume_reparent(trans, &iter, k, 232 subvolid_to_delete, le32_to_cpu(s.parent))); 233 } 234 235 /* 236 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot 237 * deletion/cleanup: 238 */ 239 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 240 { 241 struct btree_iter iter; 242 struct bkey_s_c_subvolume subvol; 243 u32 snapid; 244 int ret = 0; 245 246 subvol = bch2_bkey_get_iter_typed(trans, &iter, 247 BTREE_ID_subvolumes, POS(0, subvolid), 248 BTREE_ITER_CACHED|BTREE_ITER_INTENT, 249 subvolume); 250 ret = bkey_err(subvol); 251 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 252 "missing subvolume %u", subvolid); 253 if (ret) 254 return ret; 255 256 snapid = le32_to_cpu(subvol.v->snapshot); 257 258 ret = bch2_btree_delete_at(trans, &iter, 0) ?: 259 bch2_snapshot_node_set_deleted(trans, snapid); 260 bch2_trans_iter_exit(trans, &iter); 261 return ret; 262 } 263 264 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 265 { 266 return bch2_subvolumes_reparent(trans, subvolid) ?: 267 commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 268 __bch2_subvolume_delete(trans, subvolid)); 269 } 270 271 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) 272 { 273 struct bch_fs *c = container_of(work, struct bch_fs, 274 snapshot_wait_for_pagecache_and_delete_work); 275 snapshot_id_list s; 276 u32 *id; 277 int ret = 0; 278 279 while (!ret) { 280 mutex_lock(&c->snapshots_unlinked_lock); 281 s = c->snapshots_unlinked; 282 darray_init(&c->snapshots_unlinked); 283 mutex_unlock(&c->snapshots_unlinked_lock); 284 285 if (!s.nr) 286 break; 287 288 bch2_evict_subvolume_inodes(c, &s); 289 290 for (id = s.data; id < s.data + s.nr; id++) { 291 ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); 292 bch_err_msg(c, ret, "deleting subvolume %u", *id); 293 if (ret) 294 break; 295 } 296 297 darray_exit(&s); 298 } 299 300 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 301 } 302 303 struct subvolume_unlink_hook { 304 struct btree_trans_commit_hook h; 305 u32 subvol; 306 }; 307 308 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, 309 struct btree_trans_commit_hook *_h) 310 { 311 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); 312 struct bch_fs *c = trans->c; 313 int ret = 0; 314 315 mutex_lock(&c->snapshots_unlinked_lock); 316 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) 317 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); 318 mutex_unlock(&c->snapshots_unlinked_lock); 319 320 if (ret) 321 return ret; 322 323 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) 324 return -EROFS; 325 326 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) 327 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); 328 return 0; 329 } 330 331 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) 332 { 333 struct btree_iter iter; 334 struct bkey_i_subvolume *n; 335 struct subvolume_unlink_hook *h; 336 int ret = 0; 337 338 h = bch2_trans_kmalloc(trans, sizeof(*h)); 339 ret = PTR_ERR_OR_ZERO(h); 340 if (ret) 341 return ret; 342 343 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; 344 h->subvol = subvolid; 345 bch2_trans_commit_hook(trans, &h->h); 346 347 n = bch2_bkey_get_mut_typed(trans, &iter, 348 BTREE_ID_subvolumes, POS(0, subvolid), 349 BTREE_ITER_CACHED, subvolume); 350 ret = PTR_ERR_OR_ZERO(n); 351 if (unlikely(ret)) { 352 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 353 "missing subvolume %u", subvolid); 354 return ret; 355 } 356 357 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); 358 bch2_trans_iter_exit(trans, &iter); 359 return ret; 360 } 361 362 int bch2_subvolume_create(struct btree_trans *trans, u64 inode, 363 u32 src_subvolid, 364 u32 *new_subvolid, 365 u32 *new_snapshotid, 366 bool ro) 367 { 368 struct bch_fs *c = trans->c; 369 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; 370 struct bkey_i_subvolume *new_subvol = NULL; 371 struct bkey_i_subvolume *src_subvol = NULL; 372 u32 parent = 0, new_nodes[2], snapshot_subvols[2]; 373 int ret = 0; 374 375 ret = bch2_bkey_get_empty_slot(trans, &dst_iter, 376 BTREE_ID_subvolumes, POS(0, U32_MAX)); 377 if (ret == -BCH_ERR_ENOSPC_btree_slot) 378 ret = -BCH_ERR_ENOSPC_subvolume_create; 379 if (ret) 380 return ret; 381 382 snapshot_subvols[0] = dst_iter.pos.offset; 383 snapshot_subvols[1] = src_subvolid; 384 385 if (src_subvolid) { 386 /* Creating a snapshot: */ 387 388 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, 389 BTREE_ID_subvolumes, POS(0, src_subvolid), 390 BTREE_ITER_CACHED, subvolume); 391 ret = PTR_ERR_OR_ZERO(src_subvol); 392 if (unlikely(ret)) { 393 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, 394 "subvolume %u not found", src_subvolid); 395 goto err; 396 } 397 398 parent = le32_to_cpu(src_subvol->v.snapshot); 399 } 400 401 ret = bch2_snapshot_node_create(trans, parent, new_nodes, 402 snapshot_subvols, 403 src_subvolid ? 2 : 1); 404 if (ret) 405 goto err; 406 407 if (src_subvolid) { 408 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); 409 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); 410 if (ret) 411 goto err; 412 } 413 414 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); 415 ret = PTR_ERR_OR_ZERO(new_subvol); 416 if (ret) 417 goto err; 418 419 new_subvol->v.flags = 0; 420 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); 421 new_subvol->v.inode = cpu_to_le64(inode); 422 new_subvol->v.parent = cpu_to_le32(src_subvolid); 423 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); 424 new_subvol->v.otime.hi = 0; 425 426 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); 427 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); 428 429 *new_subvolid = new_subvol->k.p.offset; 430 *new_snapshotid = new_nodes[0]; 431 err: 432 bch2_trans_iter_exit(trans, &src_iter); 433 bch2_trans_iter_exit(trans, &dst_iter); 434 return ret; 435 } 436 437 int bch2_fs_subvolumes_init(struct bch_fs *c) 438 { 439 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); 440 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, 441 bch2_subvolume_wait_for_pagecache_and_delete); 442 mutex_init(&c->snapshots_unlinked_lock); 443 return 0; 444 } 445