1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_key_cache.h" 5 #include "btree_write_buffer.h" 6 #include "bkey_methods.h" 7 #include "btree_update.h" 8 #include "buckets.h" 9 #include "compress.h" 10 #include "dirent.h" 11 #include "disk_accounting.h" 12 #include "error.h" 13 #include "extents.h" 14 #include "extent_update.h" 15 #include "fs.h" 16 #include "inode.h" 17 #include "namei.h" 18 #include "opts.h" 19 #include "str_hash.h" 20 #include "snapshot.h" 21 #include "subvolume.h" 22 #include "varint.h" 23 24 #include <linux/random.h> 25 26 #include <linux/unaligned.h> 27 28 #define x(name, ...) #name, 29 const char * const bch2_inode_opts[] = { 30 BCH_INODE_OPTS() 31 NULL, 32 }; 33 34 static const char * const bch2_inode_flag_strs[] = { 35 BCH_INODE_FLAGS() 36 NULL 37 }; 38 #undef x 39 40 static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); 41 static int may_delete_deleted_inum(struct btree_trans *, subvol_inum); 42 43 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; 44 45 static int inode_decode_field(const u8 *in, const u8 *end, 46 u64 out[2], unsigned *out_bits) 47 { 48 __be64 be[2] = { 0, 0 }; 49 unsigned bytes, shift; 50 u8 *p; 51 52 if (in >= end) 53 return -BCH_ERR_inode_unpack_error; 54 55 if (!*in) 56 return -BCH_ERR_inode_unpack_error; 57 58 /* 59 * position of highest set bit indicates number of bytes: 60 * shift = number of bits to remove in high byte: 61 */ 62 shift = 8 - __fls(*in); /* 1 <= shift <= 8 */ 63 bytes = byte_table[shift - 1]; 64 65 if (in + bytes > end) 66 return -BCH_ERR_inode_unpack_error; 67 68 p = (u8 *) be + 16 - bytes; 69 memcpy(p, in, bytes); 70 *p ^= (1 << 8) >> shift; 71 72 out[0] = be64_to_cpu(be[0]); 73 out[1] = be64_to_cpu(be[1]); 74 *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]); 75 76 return bytes; 77 } 78 79 static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, 80 const struct bch_inode_unpacked *inode) 81 { 82 struct bkey_i_inode_v3 *k = &packed->inode; 83 u8 *out = k->v.fields; 84 u8 *end = (void *) &packed[1]; 85 u8 *last_nonzero_field = out; 86 unsigned nr_fields = 0, last_nonzero_fieldnr = 0; 87 unsigned bytes; 88 int ret; 89 90 bkey_inode_v3_init(&packed->inode.k_i); 91 packed->inode.k.p.offset = inode->bi_inum; 92 packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); 93 packed->inode.v.bi_hash_seed = inode->bi_hash_seed; 94 packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); 95 packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors); 96 packed->inode.v.bi_size = cpu_to_le64(inode->bi_size); 97 packed->inode.v.bi_version = cpu_to_le64(inode->bi_version); 98 SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode); 99 SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR); 100 101 102 #define x(_name, _bits) \ 103 nr_fields++; \ 104 \ 105 if (inode->_name) { \ 106 ret = bch2_varint_encode_fast(out, inode->_name); \ 107 out += ret; \ 108 \ 109 if (_bits > 64) \ 110 *out++ = 0; \ 111 \ 112 last_nonzero_field = out; \ 113 last_nonzero_fieldnr = nr_fields; \ 114 } else { \ 115 *out++ = 0; \ 116 \ 117 if (_bits > 64) \ 118 *out++ = 0; \ 119 } 120 121 BCH_INODE_FIELDS_v3() 122 #undef x 123 BUG_ON(out > end); 124 125 out = last_nonzero_field; 126 nr_fields = last_nonzero_fieldnr; 127 128 bytes = out - (u8 *) &packed->inode.v; 129 set_bkey_val_bytes(&packed->inode.k, bytes); 130 memset_u64s_tail(&packed->inode.v, 0, bytes); 131 132 SET_INODEv3_NR_FIELDS(&k->v, nr_fields); 133 134 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { 135 struct bch_inode_unpacked unpacked; 136 137 ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked); 138 BUG_ON(ret); 139 BUG_ON(unpacked.bi_inum != inode->bi_inum); 140 BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); 141 BUG_ON(unpacked.bi_sectors != inode->bi_sectors); 142 BUG_ON(unpacked.bi_size != inode->bi_size); 143 BUG_ON(unpacked.bi_version != inode->bi_version); 144 BUG_ON(unpacked.bi_mode != inode->bi_mode); 145 146 #define x(_name, _bits) if (unpacked._name != inode->_name) \ 147 panic("unpacked %llu should be %llu", \ 148 (u64) unpacked._name, (u64) inode->_name); 149 BCH_INODE_FIELDS_v3() 150 #undef x 151 } 152 } 153 154 void bch2_inode_pack(struct bkey_inode_buf *packed, 155 const struct bch_inode_unpacked *inode) 156 { 157 bch2_inode_pack_inlined(packed, inode); 158 } 159 160 static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, 161 struct bch_inode_unpacked *unpacked) 162 { 163 const u8 *in = inode.v->fields; 164 const u8 *end = bkey_val_end(inode); 165 u64 field[2]; 166 unsigned fieldnr = 0, field_bits; 167 int ret; 168 169 #define x(_name, _bits) \ 170 if (fieldnr++ == INODEv1_NR_FIELDS(inode.v)) { \ 171 unsigned offset = offsetof(struct bch_inode_unpacked, _name);\ 172 memset((void *) unpacked + offset, 0, \ 173 sizeof(*unpacked) - offset); \ 174 return 0; \ 175 } \ 176 \ 177 ret = inode_decode_field(in, end, field, &field_bits); \ 178 if (ret < 0) \ 179 return ret; \ 180 \ 181 if (field_bits > sizeof(unpacked->_name) * 8) \ 182 return -BCH_ERR_inode_unpack_error; \ 183 \ 184 unpacked->_name = field[1]; \ 185 in += ret; 186 187 BCH_INODE_FIELDS_v2() 188 #undef x 189 190 /* XXX: signal if there were more fields than expected? */ 191 return 0; 192 } 193 194 static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, 195 const u8 *in, const u8 *end, 196 unsigned nr_fields) 197 { 198 unsigned fieldnr = 0; 199 int ret; 200 u64 v[2]; 201 202 #define x(_name, _bits) \ 203 if (fieldnr < nr_fields) { \ 204 ret = bch2_varint_decode_fast(in, end, &v[0]); \ 205 if (ret < 0) \ 206 return ret; \ 207 in += ret; \ 208 \ 209 if (_bits > 64) { \ 210 ret = bch2_varint_decode_fast(in, end, &v[1]); \ 211 if (ret < 0) \ 212 return ret; \ 213 in += ret; \ 214 } else { \ 215 v[1] = 0; \ 216 } \ 217 } else { \ 218 v[0] = v[1] = 0; \ 219 } \ 220 \ 221 unpacked->_name = v[0]; \ 222 if (v[1] || v[0] != unpacked->_name) \ 223 return -BCH_ERR_inode_unpack_error; \ 224 fieldnr++; 225 226 BCH_INODE_FIELDS_v2() 227 #undef x 228 229 /* XXX: signal if there were more fields than expected? */ 230 return 0; 231 } 232 233 static int bch2_inode_unpack_v3(struct bkey_s_c k, 234 struct bch_inode_unpacked *unpacked) 235 { 236 struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); 237 const u8 *in = inode.v->fields; 238 const u8 *end = bkey_val_end(inode); 239 unsigned nr_fields = INODEv3_NR_FIELDS(inode.v); 240 unsigned fieldnr = 0; 241 int ret; 242 u64 v[2]; 243 244 unpacked->bi_inum = inode.k->p.offset; 245 unpacked->bi_snapshot = inode.k->p.snapshot; 246 unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); 247 unpacked->bi_hash_seed = inode.v->bi_hash_seed; 248 unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); 249 unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors); 250 unpacked->bi_size = le64_to_cpu(inode.v->bi_size); 251 unpacked->bi_version = le64_to_cpu(inode.v->bi_version); 252 unpacked->bi_mode = INODEv3_MODE(inode.v); 253 254 #define x(_name, _bits) \ 255 if (fieldnr < nr_fields) { \ 256 ret = bch2_varint_decode_fast(in, end, &v[0]); \ 257 if (ret < 0) \ 258 return ret; \ 259 in += ret; \ 260 \ 261 if (_bits > 64) { \ 262 ret = bch2_varint_decode_fast(in, end, &v[1]); \ 263 if (ret < 0) \ 264 return ret; \ 265 in += ret; \ 266 } else { \ 267 v[1] = 0; \ 268 } \ 269 } else { \ 270 v[0] = v[1] = 0; \ 271 } \ 272 \ 273 unpacked->_name = v[0]; \ 274 if (v[1] || v[0] != unpacked->_name) \ 275 return -BCH_ERR_inode_unpack_error; \ 276 fieldnr++; 277 278 BCH_INODE_FIELDS_v3() 279 #undef x 280 281 /* XXX: signal if there were more fields than expected? */ 282 return 0; 283 } 284 285 static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, 286 struct bch_inode_unpacked *unpacked) 287 { 288 memset(unpacked, 0, sizeof(*unpacked)); 289 290 switch (k.k->type) { 291 case KEY_TYPE_inode: { 292 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); 293 294 unpacked->bi_inum = inode.k->p.offset; 295 unpacked->bi_snapshot = inode.k->p.snapshot; 296 unpacked->bi_journal_seq= 0; 297 unpacked->bi_hash_seed = inode.v->bi_hash_seed; 298 unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); 299 unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); 300 301 if (INODEv1_NEW_VARINT(inode.v)) { 302 return bch2_inode_unpack_v2(unpacked, inode.v->fields, 303 bkey_val_end(inode), 304 INODEv1_NR_FIELDS(inode.v)); 305 } else { 306 return bch2_inode_unpack_v1(inode, unpacked); 307 } 308 break; 309 } 310 case KEY_TYPE_inode_v2: { 311 struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); 312 313 unpacked->bi_inum = inode.k->p.offset; 314 unpacked->bi_snapshot = inode.k->p.snapshot; 315 unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); 316 unpacked->bi_hash_seed = inode.v->bi_hash_seed; 317 unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); 318 unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); 319 320 return bch2_inode_unpack_v2(unpacked, inode.v->fields, 321 bkey_val_end(inode), 322 INODEv2_NR_FIELDS(inode.v)); 323 } 324 default: 325 BUG(); 326 } 327 } 328 329 int bch2_inode_unpack(struct bkey_s_c k, 330 struct bch_inode_unpacked *unpacked) 331 { 332 return likely(k.k->type == KEY_TYPE_inode_v3) 333 ? bch2_inode_unpack_v3(k, unpacked) 334 : bch2_inode_unpack_slowpath(k, unpacked); 335 } 336 337 int __bch2_inode_peek(struct btree_trans *trans, 338 struct btree_iter *iter, 339 struct bch_inode_unpacked *inode, 340 subvol_inum inum, unsigned flags, 341 bool warn) 342 { 343 u32 snapshot; 344 int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); 345 if (ret) 346 return ret; 347 348 struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, 349 SPOS(0, inum.inum, snapshot), 350 flags|BTREE_ITER_cached); 351 ret = bkey_err(k); 352 if (ret) 353 return ret; 354 355 ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; 356 if (ret) 357 goto err; 358 359 ret = bch2_inode_unpack(k, inode); 360 if (ret) 361 goto err; 362 363 return 0; 364 err: 365 if (warn) 366 bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); 367 bch2_trans_iter_exit(trans, iter); 368 return ret; 369 } 370 371 int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, 372 u64 inode_nr, u32 snapshot, 373 struct bch_inode_unpacked *inode, 374 unsigned flags) 375 { 376 struct btree_iter iter; 377 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 378 SPOS(0, inode_nr, snapshot), flags); 379 int ret = bkey_err(k); 380 if (ret) 381 goto err; 382 383 ret = bkey_is_inode(k.k) 384 ? bch2_inode_unpack(k, inode) 385 : -BCH_ERR_ENOENT_inode; 386 err: 387 bch2_trans_iter_exit(trans, &iter); 388 return ret; 389 } 390 391 int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans, 392 subvol_inum inum, 393 struct bch_inode_unpacked *inode) 394 { 395 struct btree_iter iter; 396 int ret; 397 398 ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0); 399 if (!ret) 400 bch2_trans_iter_exit(trans, &iter); 401 return ret; 402 } 403 404 int bch2_inode_find_by_inum_trans(struct btree_trans *trans, 405 subvol_inum inum, 406 struct bch_inode_unpacked *inode) 407 { 408 struct btree_iter iter; 409 int ret; 410 411 ret = bch2_inode_peek(trans, &iter, inode, inum, 0); 412 if (!ret) 413 bch2_trans_iter_exit(trans, &iter); 414 return ret; 415 } 416 417 int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, 418 struct bch_inode_unpacked *inode) 419 { 420 return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); 421 } 422 423 int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum, 424 struct bch_inode_unpacked *root) 425 { 426 struct btree_iter iter; 427 struct bkey_s_c k; 428 int ret = 0; 429 430 for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, 431 SPOS(0, inum, U32_MAX), 432 BTREE_ITER_all_snapshots, k, ret) { 433 if (k.k->p.offset != inum) 434 break; 435 if (bkey_is_inode(k.k)) { 436 ret = bch2_inode_unpack(k, root); 437 goto out; 438 } 439 } 440 /* We're only called when we know we have an inode for @inum */ 441 BUG_ON(!ret); 442 out: 443 bch2_trans_iter_exit(trans, &iter); 444 return ret; 445 } 446 447 int bch2_inode_write_flags(struct btree_trans *trans, 448 struct btree_iter *iter, 449 struct bch_inode_unpacked *inode, 450 enum btree_iter_update_trigger_flags flags) 451 { 452 struct bkey_inode_buf *inode_p; 453 454 inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); 455 if (IS_ERR(inode_p)) 456 return PTR_ERR(inode_p); 457 458 bch2_inode_pack_inlined(inode_p, inode); 459 inode_p->inode.k.p.snapshot = iter->snapshot; 460 return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); 461 } 462 463 int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 464 { 465 struct bkey_inode_buf *inode_p = 466 bch2_trans_kmalloc(trans, sizeof(*inode_p)); 467 468 if (IS_ERR(inode_p)) 469 return PTR_ERR(inode_p); 470 471 bch2_inode_pack(inode_p, inode); 472 inode_p->inode.k.p.snapshot = inode->bi_snapshot; 473 474 return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, 475 &inode_p->inode.k_i, 476 BTREE_UPDATE_internal_snapshot_node); 477 } 478 479 int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 480 { 481 int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 482 __bch2_fsck_write_inode(trans, inode)); 483 bch_err_fn(trans->c, ret); 484 return ret; 485 } 486 487 struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) 488 { 489 struct bch_inode_unpacked u; 490 struct bkey_inode_buf *inode_p; 491 int ret; 492 493 if (!bkey_is_inode(&k->k)) 494 return ERR_PTR(-ENOENT); 495 496 inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); 497 if (IS_ERR(inode_p)) 498 return ERR_CAST(inode_p); 499 500 ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u); 501 if (ret) 502 return ERR_PTR(ret); 503 504 bch2_inode_pack(inode_p, &u); 505 return &inode_p->inode.k_i; 506 } 507 508 static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, 509 struct bkey_validate_context from) 510 { 511 struct bch_inode_unpacked unpacked; 512 int ret = 0; 513 514 bkey_fsck_err_on(k.k->p.inode, 515 c, inode_pos_inode_nonzero, 516 "nonzero k.p.inode"); 517 518 bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, 519 c, inode_pos_blockdev_range, 520 "fs inode in blockdev range"); 521 522 bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), 523 c, inode_unpack_error, 524 "invalid variable length fields"); 525 526 bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, 527 c, inode_checksum_type_invalid, 528 "invalid data checksum type (%u >= %u", 529 unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); 530 531 bkey_fsck_err_on(unpacked.bi_compression && 532 !bch2_compression_opt_valid(unpacked.bi_compression - 1), 533 c, inode_compression_type_invalid, 534 "invalid compression opt %u", unpacked.bi_compression - 1); 535 536 bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && 537 unpacked.bi_nlink != 0, 538 c, inode_unlinked_but_nlink_nonzero, 539 "flagged as unlinked but bi_nlink != 0"); 540 541 bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), 542 c, inode_subvol_root_but_not_dir, 543 "subvolume root but not a directory"); 544 fsck_err: 545 return ret; 546 } 547 548 int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, 549 struct bkey_validate_context from) 550 { 551 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); 552 int ret = 0; 553 554 bkey_fsck_err_on(INODEv1_STR_HASH(inode.v) >= BCH_STR_HASH_NR, 555 c, inode_str_hash_invalid, 556 "invalid str hash type (%llu >= %u)", 557 INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); 558 559 ret = __bch2_inode_validate(c, k, from); 560 fsck_err: 561 return ret; 562 } 563 564 int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, 565 struct bkey_validate_context from) 566 { 567 struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); 568 int ret = 0; 569 570 bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, 571 c, inode_str_hash_invalid, 572 "invalid str hash type (%llu >= %u)", 573 INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); 574 575 ret = __bch2_inode_validate(c, k, from); 576 fsck_err: 577 return ret; 578 } 579 580 int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, 581 struct bkey_validate_context from) 582 { 583 struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); 584 int ret = 0; 585 586 bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || 587 INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), 588 c, inode_v3_fields_start_bad, 589 "invalid fields_start (got %llu, min %u max %zu)", 590 INODEv3_FIELDS_START(inode.v), 591 INODEv3_FIELDS_START_INITIAL, 592 bkey_val_u64s(inode.k)); 593 594 bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, 595 c, inode_str_hash_invalid, 596 "invalid str hash type (%llu >= %u)", 597 INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); 598 599 ret = __bch2_inode_validate(c, k, from); 600 fsck_err: 601 return ret; 602 } 603 604 static void __bch2_inode_unpacked_to_text(struct printbuf *out, 605 struct bch_inode_unpacked *inode) 606 { 607 prt_printf(out, "\n"); 608 printbuf_indent_add(out, 2); 609 prt_printf(out, "mode=%o\n", inode->bi_mode); 610 611 prt_str(out, "flags="); 612 prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); 613 prt_printf(out, "(%x)\n", inode->bi_flags); 614 615 prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq); 616 prt_printf(out, "hash_seed=%llx\n", inode->bi_hash_seed); 617 prt_printf(out, "hash_type="); 618 bch2_prt_str_hash_type(out, INODE_STR_HASH(inode)); 619 prt_newline(out); 620 prt_printf(out, "bi_size=%llu\n", inode->bi_size); 621 prt_printf(out, "bi_sectors=%llu\n", inode->bi_sectors); 622 prt_printf(out, "bi_version=%llu\n", inode->bi_version); 623 624 #define x(_name, _bits) \ 625 prt_printf(out, #_name "=%llu\n", (u64) inode->_name); 626 BCH_INODE_FIELDS_v3() 627 #undef x 628 629 bch2_printbuf_strip_trailing_newline(out); 630 printbuf_indent_sub(out, 2); 631 } 632 633 void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) 634 { 635 prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot); 636 __bch2_inode_unpacked_to_text(out, inode); 637 } 638 639 void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 640 { 641 struct bch_inode_unpacked inode; 642 643 if (bch2_inode_unpack(k, &inode)) { 644 prt_printf(out, "(unpack error)"); 645 return; 646 } 647 648 __bch2_inode_unpacked_to_text(out, &inode); 649 } 650 651 static inline u64 bkey_inode_flags(struct bkey_s_c k) 652 { 653 switch (k.k->type) { 654 case KEY_TYPE_inode: 655 return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags); 656 case KEY_TYPE_inode_v2: 657 return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags); 658 case KEY_TYPE_inode_v3: 659 return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags); 660 default: 661 return 0; 662 } 663 } 664 665 static inline void bkey_inode_flags_set(struct bkey_s k, u64 f) 666 { 667 switch (k.k->type) { 668 case KEY_TYPE_inode: 669 bkey_s_to_inode(k).v->bi_flags = cpu_to_le32(f); 670 return; 671 case KEY_TYPE_inode_v2: 672 bkey_s_to_inode_v2(k).v->bi_flags = cpu_to_le64(f); 673 return; 674 case KEY_TYPE_inode_v3: 675 bkey_s_to_inode_v3(k).v->bi_flags = cpu_to_le64(f); 676 return; 677 default: 678 BUG(); 679 } 680 } 681 682 static inline bool bkey_is_unlinked_inode(struct bkey_s_c k) 683 { 684 unsigned f = bkey_inode_flags(k) & BCH_INODE_unlinked; 685 686 return (f & BCH_INODE_unlinked) && !(f & BCH_INODE_has_child_snapshot); 687 } 688 689 static struct bkey_s_c 690 bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter *iter, 691 enum btree_id btree, struct bpos pos, 692 unsigned flags) 693 { 694 struct bch_fs *c = trans->c; 695 struct bkey_s_c k; 696 int ret = 0; 697 698 for_each_btree_key_max_norestart(trans, *iter, btree, 699 bpos_successor(pos), 700 SPOS(pos.inode, pos.offset, U32_MAX), 701 flags|BTREE_ITER_all_snapshots, k, ret) 702 if (bch2_snapshot_is_ancestor(c, pos.snapshot, k.k->p.snapshot)) 703 return k; 704 705 bch2_trans_iter_exit(trans, iter); 706 return ret ? bkey_s_c_err(ret) : bkey_s_c_null; 707 } 708 709 static struct bkey_s_c 710 bch2_inode_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter *iter, 711 struct bpos pos, unsigned flags) 712 { 713 struct bkey_s_c k; 714 again: 715 k = bch2_bkey_get_iter_snapshot_parent(trans, iter, BTREE_ID_inodes, pos, flags); 716 if (!k.k || 717 bkey_err(k) || 718 bkey_is_inode(k.k)) 719 return k; 720 721 bch2_trans_iter_exit(trans, iter); 722 pos = k.k->p; 723 goto again; 724 } 725 726 int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) 727 { 728 struct bch_fs *c = trans->c; 729 struct btree_iter iter; 730 struct bkey_s_c k; 731 int ret = 0; 732 733 for_each_btree_key_max_norestart(trans, iter, 734 BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), 735 BTREE_ITER_all_snapshots| 736 BTREE_ITER_with_updates, k, ret) 737 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot) && 738 bkey_is_inode(k.k)) { 739 ret = 1; 740 break; 741 } 742 bch2_trans_iter_exit(trans, &iter); 743 return ret; 744 } 745 746 static int update_inode_has_children(struct btree_trans *trans, 747 struct bkey_s k, 748 bool have_child) 749 { 750 if (!have_child) { 751 int ret = bch2_inode_has_child_snapshots(trans, k.k->p); 752 if (ret) 753 return ret < 0 ? ret : 0; 754 } 755 756 u64 f = bkey_inode_flags(k.s_c); 757 if (have_child != !!(f & BCH_INODE_has_child_snapshot)) 758 bkey_inode_flags_set(k, f ^ BCH_INODE_has_child_snapshot); 759 760 return 0; 761 } 762 763 static int update_parent_inode_has_children(struct btree_trans *trans, struct bpos pos, 764 bool have_child) 765 { 766 struct btree_iter iter; 767 struct bkey_s_c k = bch2_inode_get_iter_snapshot_parent(trans, 768 &iter, pos, BTREE_ITER_with_updates); 769 int ret = bkey_err(k); 770 if (ret) 771 return ret; 772 if (!k.k) 773 return 0; 774 775 if (!have_child) { 776 ret = bch2_inode_has_child_snapshots(trans, k.k->p); 777 if (ret) { 778 ret = ret < 0 ? ret : 0; 779 goto err; 780 } 781 } 782 783 u64 f = bkey_inode_flags(k); 784 if (have_child != !!(f & BCH_INODE_has_child_snapshot)) { 785 struct bkey_i *update = bch2_bkey_make_mut(trans, &iter, &k, 786 BTREE_UPDATE_internal_snapshot_node); 787 ret = PTR_ERR_OR_ZERO(update); 788 if (ret) 789 goto err; 790 791 bkey_inode_flags_set(bkey_i_to_s(update), f ^ BCH_INODE_has_child_snapshot); 792 } 793 err: 794 bch2_trans_iter_exit(trans, &iter); 795 return ret; 796 } 797 798 int bch2_trigger_inode(struct btree_trans *trans, 799 enum btree_id btree_id, unsigned level, 800 struct bkey_s_c old, 801 struct bkey_s new, 802 enum btree_iter_update_trigger_flags flags) 803 { 804 struct bch_fs *c = trans->c; 805 806 if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { 807 BUG_ON(!trans->journal_res.seq); 808 bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); 809 } 810 811 s64 nr[1] = { bkey_is_inode(new.k) - bkey_is_inode(old.k) }; 812 if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr[0]) { 813 int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes); 814 if (ret) 815 return ret; 816 } 817 818 if (flags & BTREE_TRIGGER_transactional) { 819 int unlinked_delta = (int) bkey_is_unlinked_inode(new.s_c) - 820 (int) bkey_is_unlinked_inode(old); 821 if (unlinked_delta) { 822 int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, 823 new.k->p, unlinked_delta > 0); 824 if (ret) 825 return ret; 826 } 827 828 /* 829 * If we're creating or deleting an inode at this snapshot ID, 830 * and there might be an inode in a parent snapshot ID, we might 831 * need to set or clear the has_child_snapshot flag on the 832 * parent. 833 */ 834 int deleted_delta = (int) bkey_is_inode(new.k) - 835 (int) bkey_is_inode(old.k); 836 if (deleted_delta && 837 bch2_snapshot_parent(c, new.k->p.snapshot)) { 838 int ret = update_parent_inode_has_children(trans, new.k->p, 839 deleted_delta > 0); 840 if (ret) 841 return ret; 842 } 843 844 /* 845 * When an inode is first updated in a new snapshot, we may need 846 * to clear has_child_snapshot 847 */ 848 if (deleted_delta > 0) { 849 int ret = update_inode_has_children(trans, new, false); 850 if (ret) 851 return ret; 852 } 853 } 854 855 return 0; 856 } 857 858 int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, 859 struct bkey_validate_context from) 860 { 861 int ret = 0; 862 863 bkey_fsck_err_on(k.k->p.inode, 864 c, inode_pos_inode_nonzero, 865 "nonzero k.p.inode"); 866 fsck_err: 867 return ret; 868 } 869 870 void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, 871 struct bkey_s_c k) 872 { 873 struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k); 874 875 prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); 876 } 877 878 int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k, 879 struct bkey_validate_context from) 880 { 881 int ret = 0; 882 883 bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors, 884 c, inode_alloc_cursor_inode_bad, 885 "k.p.inode bad"); 886 fsck_err: 887 return ret; 888 } 889 890 void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c, 891 struct bkey_s_c k) 892 { 893 struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k); 894 895 prt_printf(out, "idx %llu generation %llu", 896 le64_to_cpu(i.v->idx), 897 le64_to_cpu(i.v->gen)); 898 } 899 900 void bch2_inode_init_early(struct bch_fs *c, 901 struct bch_inode_unpacked *inode_u) 902 { 903 enum bch_str_hash_type str_hash = 904 bch2_str_hash_opt_to_type(c, c->opts.str_hash); 905 906 memset(inode_u, 0, sizeof(*inode_u)); 907 908 SET_INODE_STR_HASH(inode_u, str_hash); 909 get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); 910 } 911 912 void bch2_inode_init_late(struct bch_fs *c, 913 struct bch_inode_unpacked *inode_u, u64 now, 914 uid_t uid, gid_t gid, umode_t mode, dev_t rdev, 915 struct bch_inode_unpacked *parent) 916 { 917 inode_u->bi_mode = mode; 918 inode_u->bi_uid = uid; 919 inode_u->bi_gid = gid; 920 inode_u->bi_dev = rdev; 921 inode_u->bi_atime = now; 922 inode_u->bi_mtime = now; 923 inode_u->bi_ctime = now; 924 inode_u->bi_otime = now; 925 926 if (parent && parent->bi_mode & S_ISGID) { 927 inode_u->bi_gid = parent->bi_gid; 928 if (S_ISDIR(mode)) 929 inode_u->bi_mode |= S_ISGID; 930 } 931 932 if (parent) { 933 #define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name; 934 BCH_INODE_OPTS() 935 #undef x 936 } 937 938 if (!S_ISDIR(mode)) 939 inode_u->bi_casefold = 0; 940 941 if (bch2_inode_casefold(c, inode_u)) 942 inode_u->bi_flags |= BCH_INODE_has_case_insensitive; 943 } 944 945 void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, 946 uid_t uid, gid_t gid, umode_t mode, dev_t rdev, 947 struct bch_inode_unpacked *parent) 948 { 949 bch2_inode_init_early(c, inode_u); 950 bch2_inode_init_late(c, inode_u, bch2_current_time(c), 951 uid, gid, mode, rdev, parent); 952 } 953 954 static struct bkey_i_inode_alloc_cursor * 955 bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) 956 { 957 struct bch_fs *c = trans->c; 958 959 u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; 960 961 cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); 962 963 struct btree_iter iter; 964 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, 965 BTREE_ID_logged_ops, 966 POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), 967 BTREE_ITER_cached); 968 int ret = bkey_err(k); 969 if (ret) 970 return ERR_PTR(ret); 971 972 struct bkey_i_inode_alloc_cursor *cursor = 973 k.k->type == KEY_TYPE_inode_alloc_cursor 974 ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) 975 : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); 976 ret = PTR_ERR_OR_ZERO(cursor); 977 if (ret) 978 goto err; 979 980 if (c->opts.inodes_32bit) { 981 *min = BLOCKDEV_INODE_MAX; 982 *max = INT_MAX; 983 } else { 984 cursor->v.bits = c->opts.shard_inode_numbers_bits; 985 986 unsigned bits = 63 - c->opts.shard_inode_numbers_bits; 987 988 *min = max(cpu << bits, (u64) INT_MAX + 1); 989 *max = (cpu << bits) | ~(ULLONG_MAX << bits); 990 } 991 992 if (le64_to_cpu(cursor->v.idx) < *min) 993 cursor->v.idx = cpu_to_le64(*min); 994 995 if (le64_to_cpu(cursor->v.idx) >= *max) { 996 cursor->v.idx = cpu_to_le64(*min); 997 le32_add_cpu(&cursor->v.gen, 1); 998 } 999 err: 1000 bch2_trans_iter_exit(trans, &iter); 1001 return ret ? ERR_PTR(ret) : cursor; 1002 } 1003 1004 /* 1005 * This just finds an empty slot: 1006 */ 1007 int bch2_inode_create(struct btree_trans *trans, 1008 struct btree_iter *iter, 1009 struct bch_inode_unpacked *inode_u, 1010 u32 snapshot, u64 cpu) 1011 { 1012 u64 min, max; 1013 struct bkey_i_inode_alloc_cursor *cursor = 1014 bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); 1015 int ret = PTR_ERR_OR_ZERO(cursor); 1016 if (ret) 1017 return ret; 1018 1019 u64 start = le64_to_cpu(cursor->v.idx); 1020 u64 pos = start; 1021 1022 bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), 1023 BTREE_ITER_all_snapshots| 1024 BTREE_ITER_intent); 1025 struct bkey_s_c k; 1026 again: 1027 while ((k = bch2_btree_iter_peek(trans, iter)).k && 1028 !(ret = bkey_err(k)) && 1029 bkey_lt(k.k->p, POS(0, max))) { 1030 if (pos < iter->pos.offset) 1031 goto found_slot; 1032 1033 /* 1034 * We don't need to iterate over keys in every snapshot once 1035 * we've found just one: 1036 */ 1037 pos = iter->pos.offset + 1; 1038 bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); 1039 } 1040 1041 if (!ret && pos < max) 1042 goto found_slot; 1043 1044 if (!ret && start == min) 1045 ret = bch_err_throw(trans->c, ENOSPC_inode_create); 1046 1047 if (ret) { 1048 bch2_trans_iter_exit(trans, iter); 1049 return ret; 1050 } 1051 1052 /* Retry from start */ 1053 pos = start = min; 1054 bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); 1055 le32_add_cpu(&cursor->v.gen, 1); 1056 goto again; 1057 found_slot: 1058 bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot)); 1059 k = bch2_btree_iter_peek_slot(trans, iter); 1060 ret = bkey_err(k); 1061 if (ret) { 1062 bch2_trans_iter_exit(trans, iter); 1063 return ret; 1064 } 1065 1066 inode_u->bi_inum = k.k->p.offset; 1067 inode_u->bi_generation = le64_to_cpu(cursor->v.gen); 1068 cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); 1069 return 0; 1070 } 1071 1072 static int bch2_inode_delete_keys(struct btree_trans *trans, 1073 subvol_inum inum, enum btree_id id) 1074 { 1075 struct btree_iter iter; 1076 struct bkey_s_c k; 1077 struct bkey_i delete; 1078 struct bpos end = POS(inum.inum, U64_MAX); 1079 u32 snapshot; 1080 int ret = 0; 1081 1082 /* 1083 * We're never going to be deleting partial extents, no need to use an 1084 * extent iterator: 1085 */ 1086 bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0), 1087 BTREE_ITER_intent); 1088 1089 while (1) { 1090 bch2_trans_begin(trans); 1091 1092 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 1093 if (ret) 1094 goto err; 1095 1096 bch2_btree_iter_set_snapshot(trans, &iter, snapshot); 1097 1098 k = bch2_btree_iter_peek_max(trans, &iter, end); 1099 ret = bkey_err(k); 1100 if (ret) 1101 goto err; 1102 1103 if (!k.k) 1104 break; 1105 1106 bkey_init(&delete.k); 1107 delete.k.p = iter.pos; 1108 1109 if (iter.flags & BTREE_ITER_is_extents) 1110 bch2_key_resize(&delete.k, 1111 bpos_min(end, k.k->p).offset - 1112 iter.pos.offset); 1113 1114 ret = bch2_trans_update(trans, &iter, &delete, 0) ?: 1115 bch2_trans_commit(trans, NULL, NULL, 1116 BCH_TRANS_COMMIT_no_enospc); 1117 err: 1118 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1119 break; 1120 } 1121 1122 bch2_trans_iter_exit(trans, &iter); 1123 return ret; 1124 } 1125 1126 int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) 1127 { 1128 struct btree_trans *trans = bch2_trans_get(c); 1129 struct btree_iter iter = {}; 1130 struct bkey_s_c k; 1131 u32 snapshot; 1132 int ret; 1133 1134 ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum)); 1135 if (ret) 1136 goto err2; 1137 1138 /* 1139 * If this was a directory, there shouldn't be any real dirents left - 1140 * but there could be whiteouts (from hash collisions) that we should 1141 * delete: 1142 * 1143 * XXX: the dirent code ideally would delete whiteouts when they're no 1144 * longer needed 1145 */ 1146 ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: 1147 bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: 1148 bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); 1149 if (ret) 1150 goto err2; 1151 retry: 1152 bch2_trans_begin(trans); 1153 1154 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 1155 if (ret) 1156 goto err; 1157 1158 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 1159 SPOS(0, inum.inum, snapshot), 1160 BTREE_ITER_intent|BTREE_ITER_cached); 1161 ret = bkey_err(k); 1162 if (ret) 1163 goto err; 1164 1165 if (!bkey_is_inode(k.k)) { 1166 bch2_fs_inconsistent(c, 1167 "inode %llu:%u not found when deleting", 1168 inum.inum, snapshot); 1169 ret = bch_err_throw(c, ENOENT_inode); 1170 goto err; 1171 } 1172 1173 ret = bch2_btree_delete_at(trans, &iter, 0) ?: 1174 bch2_trans_commit(trans, NULL, NULL, 1175 BCH_TRANS_COMMIT_no_enospc); 1176 err: 1177 bch2_trans_iter_exit(trans, &iter); 1178 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1179 goto retry; 1180 1181 if (ret) 1182 goto err2; 1183 1184 ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); 1185 err2: 1186 bch2_trans_put(trans); 1187 return ret; 1188 } 1189 1190 int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) 1191 { 1192 if (bi->bi_flags & BCH_INODE_unlinked) 1193 bi->bi_flags &= ~BCH_INODE_unlinked; 1194 else { 1195 if (bi->bi_nlink == U32_MAX) 1196 return -EINVAL; 1197 1198 bi->bi_nlink++; 1199 } 1200 1201 return 0; 1202 } 1203 1204 void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) 1205 { 1206 if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) { 1207 bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero", 1208 bi->bi_inum); 1209 return; 1210 } 1211 1212 if (bi->bi_flags & BCH_INODE_unlinked) { 1213 bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum); 1214 return; 1215 } 1216 1217 if (bi->bi_nlink) 1218 bi->bi_nlink--; 1219 else 1220 bi->bi_flags |= BCH_INODE_unlinked; 1221 } 1222 1223 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) 1224 { 1225 struct bch_opts ret = { 0 }; 1226 #define x(_name, _bits) \ 1227 if (inode->bi_##_name) \ 1228 opt_set(ret, _name, inode->bi_##_name - 1); 1229 BCH_INODE_OPTS() 1230 #undef x 1231 return ret; 1232 } 1233 1234 void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, 1235 struct bch_inode_unpacked *inode) 1236 { 1237 #define x(_name, _bits) \ 1238 if ((inode)->bi_##_name) { \ 1239 opts->_name = inode->bi_##_name - 1; \ 1240 opts->_name##_from_inode = true; \ 1241 } else { \ 1242 opts->_name = c->opts._name; \ 1243 opts->_name##_from_inode = false; \ 1244 } 1245 BCH_INODE_OPTS() 1246 #undef x 1247 1248 bch2_io_opts_fixups(opts); 1249 } 1250 1251 int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) 1252 { 1253 struct bch_inode_unpacked inode; 1254 int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); 1255 1256 if (ret) 1257 return ret; 1258 1259 bch2_inode_opts_get(opts, trans->c, &inode); 1260 return 0; 1261 } 1262 1263 int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, 1264 struct bch_inode_unpacked *bi, unsigned v) 1265 { 1266 struct bch_fs *c = trans->c; 1267 1268 #ifndef CONFIG_UNICODE 1269 bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); 1270 return -EOPNOTSUPP; 1271 #endif 1272 1273 if (c->opts.casefold_disabled) 1274 return -EOPNOTSUPP; 1275 1276 int ret = 0; 1277 /* Not supported on individual files. */ 1278 if (!S_ISDIR(bi->bi_mode)) 1279 return -EOPNOTSUPP; 1280 1281 /* 1282 * Make sure the dir is empty, as otherwise we'd need to 1283 * rehash everything and update the dirent keys. 1284 */ 1285 ret = bch2_empty_dir_trans(trans, inum); 1286 if (ret < 0) 1287 return ret; 1288 1289 ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); 1290 if (ret) 1291 return ret; 1292 1293 bch2_check_set_feature(c, BCH_FEATURE_casefolding); 1294 1295 bi->bi_casefold = v + 1; 1296 bi->bi_fields_set |= BIT(Inode_opt_casefold); 1297 1298 return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi); 1299 } 1300 1301 static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) 1302 { 1303 struct bch_fs *c = trans->c; 1304 struct btree_iter iter = {}; 1305 struct bkey_i_inode_generation delete; 1306 struct bch_inode_unpacked inode_u; 1307 struct bkey_s_c k; 1308 int ret; 1309 1310 do { 1311 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1312 SPOS(inum, 0, snapshot), 1313 SPOS(inum, U64_MAX, snapshot), 1314 0, NULL) ?: 1315 bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, 1316 SPOS(inum, 0, snapshot), 1317 SPOS(inum, U64_MAX, snapshot), 1318 0, NULL) ?: 1319 bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, 1320 SPOS(inum, 0, snapshot), 1321 SPOS(inum, U64_MAX, snapshot), 1322 0, NULL); 1323 } while (ret == -BCH_ERR_transaction_restart_nested); 1324 if (ret) 1325 goto err; 1326 retry: 1327 bch2_trans_begin(trans); 1328 1329 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 1330 SPOS(0, inum, snapshot), BTREE_ITER_intent); 1331 ret = bkey_err(k); 1332 if (ret) 1333 goto err; 1334 1335 if (!bkey_is_inode(k.k)) { 1336 bch2_fs_inconsistent(c, 1337 "inode %llu:%u not found when deleting", 1338 inum, snapshot); 1339 ret = bch_err_throw(c, ENOENT_inode); 1340 goto err; 1341 } 1342 1343 bch2_inode_unpack(k, &inode_u); 1344 1345 /* Subvolume root? */ 1346 if (inode_u.bi_subvol) 1347 bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum); 1348 1349 bkey_inode_generation_init(&delete.k_i); 1350 delete.k.p = iter.pos; 1351 delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); 1352 1353 ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: 1354 bch2_trans_commit(trans, NULL, NULL, 1355 BCH_TRANS_COMMIT_no_enospc); 1356 err: 1357 bch2_trans_iter_exit(trans, &iter); 1358 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1359 goto retry; 1360 1361 return ret ?: -BCH_ERR_transaction_restart_nested; 1362 } 1363 1364 /* 1365 * After deleting an inode, there may be versions in older snapshots that should 1366 * also be deleted - if they're not referenced by sibling snapshots and not open 1367 * in other subvolumes: 1368 */ 1369 static int delete_ancestor_snapshot_inodes(struct btree_trans *trans, struct bpos pos) 1370 { 1371 struct btree_iter iter; 1372 struct bkey_s_c k; 1373 int ret; 1374 next_parent: 1375 ret = lockrestart_do(trans, 1376 bkey_err(k = bch2_inode_get_iter_snapshot_parent(trans, &iter, pos, 0))); 1377 if (ret || !k.k) 1378 return ret; 1379 1380 bool unlinked = bkey_is_unlinked_inode(k); 1381 pos = k.k->p; 1382 bch2_trans_iter_exit(trans, &iter); 1383 1384 if (!unlinked) 1385 return 0; 1386 1387 ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos)); 1388 if (ret) 1389 return ret < 0 ? ret : 0; 1390 1391 ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot); 1392 if (ret) 1393 return ret; 1394 goto next_parent; 1395 } 1396 1397 int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) 1398 { 1399 return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?: 1400 delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); 1401 } 1402 1403 static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, 1404 bool from_deleted_inodes) 1405 { 1406 struct bch_fs *c = trans->c; 1407 struct btree_iter inode_iter; 1408 struct bkey_s_c k; 1409 struct bch_inode_unpacked inode; 1410 struct printbuf buf = PRINTBUF; 1411 int ret; 1412 1413 k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached); 1414 ret = bkey_err(k); 1415 if (ret) 1416 return ret; 1417 1418 ret = bkey_is_inode(k.k) ? 0 : bch_err_throw(c, ENOENT_inode); 1419 if (fsck_err_on(from_deleted_inodes && ret, 1420 trans, deleted_inode_missing, 1421 "nonexistent inode %llu:%u in deleted_inodes btree", 1422 pos.offset, pos.snapshot)) 1423 goto delete; 1424 if (ret) 1425 goto out; 1426 1427 ret = bch2_inode_unpack(k, &inode); 1428 if (ret) 1429 goto out; 1430 1431 if (S_ISDIR(inode.bi_mode)) { 1432 ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); 1433 if (fsck_err_on(from_deleted_inodes && 1434 bch2_err_matches(ret, ENOTEMPTY), 1435 trans, deleted_inode_is_dir, 1436 "non empty directory %llu:%u in deleted_inodes btree", 1437 pos.offset, pos.snapshot)) 1438 goto delete; 1439 if (ret) 1440 goto out; 1441 } 1442 1443 ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked); 1444 if (fsck_err_on(from_deleted_inodes && ret, 1445 trans, deleted_inode_not_unlinked, 1446 "non-deleted inode %llu:%u in deleted_inodes btree", 1447 pos.offset, pos.snapshot)) 1448 goto delete; 1449 if (ret) 1450 goto out; 1451 1452 ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot) 1453 ? 0 : bch_err_throw(c, inode_has_child_snapshot); 1454 1455 if (fsck_err_on(from_deleted_inodes && ret, 1456 trans, deleted_inode_has_child_snapshots, 1457 "inode with child snapshots %llu:%u in deleted_inodes btree", 1458 pos.offset, pos.snapshot)) 1459 goto delete; 1460 if (ret) 1461 goto out; 1462 1463 ret = bch2_inode_has_child_snapshots(trans, k.k->p); 1464 if (ret < 0) 1465 goto out; 1466 1467 if (ret) { 1468 if (fsck_err(trans, inode_has_child_snapshots_wrong, 1469 "inode has_child_snapshots flag wrong (should be set)\n%s", 1470 (printbuf_reset(&buf), 1471 bch2_inode_unpacked_to_text(&buf, &inode), 1472 buf.buf))) { 1473 inode.bi_flags |= BCH_INODE_has_child_snapshot; 1474 ret = __bch2_fsck_write_inode(trans, &inode); 1475 if (ret) 1476 goto out; 1477 } 1478 1479 if (!from_deleted_inodes) { 1480 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: 1481 bch_err_throw(c, inode_has_child_snapshot); 1482 goto out; 1483 } 1484 1485 goto delete; 1486 1487 } 1488 1489 if (from_deleted_inodes) { 1490 if (test_bit(BCH_FS_clean_recovery, &c->flags) && 1491 !fsck_err(trans, deleted_inode_but_clean, 1492 "filesystem marked as clean but have deleted inode %llu:%u", 1493 pos.offset, pos.snapshot)) { 1494 ret = 0; 1495 goto out; 1496 } 1497 1498 ret = 1; 1499 } 1500 out: 1501 fsck_err: 1502 bch2_trans_iter_exit(trans, &inode_iter); 1503 printbuf_exit(&buf); 1504 return ret; 1505 delete: 1506 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); 1507 goto out; 1508 } 1509 1510 static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum) 1511 { 1512 u32 snapshot; 1513 1514 return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: 1515 may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false); 1516 } 1517 1518 int bch2_delete_dead_inodes(struct bch_fs *c) 1519 { 1520 struct btree_trans *trans = bch2_trans_get(c); 1521 int ret; 1522 1523 /* 1524 * if we ran check_inodes() unlinked inodes will have already been 1525 * cleaned up but the write buffer will be out of sync; therefore we 1526 * alway need a write buffer flush 1527 */ 1528 ret = bch2_btree_write_buffer_flush_sync(trans); 1529 if (ret) 1530 goto err; 1531 1532 /* 1533 * Weird transaction restart handling here because on successful delete, 1534 * bch2_inode_rm_snapshot() will return a nested transaction restart, 1535 * but we can't retry because the btree write buffer won't have been 1536 * flushed and we'd spin: 1537 */ 1538 ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, 1539 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1540 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1541 ret = may_delete_deleted_inode(trans, k.k->p, true); 1542 if (ret > 0) { 1543 bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", 1544 k.k->p.offset, k.k->p.snapshot); 1545 1546 ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); 1547 /* 1548 * We don't want to loop here: a transaction restart 1549 * error here means we handled a transaction restart and 1550 * we're actually done, but if we loop we'll retry the 1551 * same key because the write buffer hasn't been flushed 1552 * yet 1553 */ 1554 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 1555 ret = 0; 1556 continue; 1557 } 1558 } 1559 1560 ret; 1561 })); 1562 err: 1563 bch2_trans_put(trans); 1564 bch_err_fn(c, ret); 1565 return ret; 1566 } 1567