1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "bkey_methods.h" 6 #include "btree_update.h" 7 #include "extents.h" 8 #include "dirent.h" 9 #include "fs.h" 10 #include "keylist.h" 11 #include "str_hash.h" 12 #include "subvolume.h" 13 14 #include <linux/dcache.h> 15 16 static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) 17 { 18 if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) 19 return 0; 20 21 unsigned bkey_u64s = bkey_val_u64s(d.k); 22 unsigned bkey_bytes = bkey_u64s * sizeof(u64); 23 u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1]; 24 #if CPU_BIG_ENDIAN 25 unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8; 26 #else 27 unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8; 28 #endif 29 30 return bkey_bytes - 31 offsetof(struct bch_dirent, d_name) - 32 trailing_nuls; 33 } 34 35 struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d) 36 { 37 return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); 38 } 39 40 static u64 bch2_dirent_hash(const struct bch_hash_info *info, 41 const struct qstr *name) 42 { 43 struct bch_str_hash_ctx ctx; 44 45 bch2_str_hash_init(&ctx, info); 46 bch2_str_hash_update(&ctx, info, name->name, name->len); 47 48 /* [0,2) reserved for dots */ 49 return max_t(u64, bch2_str_hash_end(&ctx, info), 2); 50 } 51 52 static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) 53 { 54 return bch2_dirent_hash(info, key); 55 } 56 57 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) 58 { 59 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 60 struct qstr name = bch2_dirent_get_name(d); 61 62 return bch2_dirent_hash(info, &name); 63 } 64 65 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) 66 { 67 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); 68 const struct qstr l_name = bch2_dirent_get_name(l); 69 const struct qstr *r_name = _r; 70 71 return !qstr_eq(l_name, *r_name); 72 } 73 74 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) 75 { 76 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); 77 struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); 78 const struct qstr l_name = bch2_dirent_get_name(l); 79 const struct qstr r_name = bch2_dirent_get_name(r); 80 81 return !qstr_eq(l_name, r_name); 82 } 83 84 static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k) 85 { 86 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 87 88 if (d.v->d_type == DT_SUBVOL) 89 return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol; 90 return true; 91 } 92 93 const struct bch_hash_desc bch2_dirent_hash_desc = { 94 .btree_id = BTREE_ID_dirents, 95 .key_type = KEY_TYPE_dirent, 96 .hash_key = dirent_hash_key, 97 .hash_bkey = dirent_hash_bkey, 98 .cmp_key = dirent_cmp_key, 99 .cmp_bkey = dirent_cmp_bkey, 100 .is_visible = dirent_is_visible, 101 }; 102 103 int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, 104 enum bch_validate_flags flags, 105 struct printbuf *err) 106 { 107 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 108 struct qstr d_name = bch2_dirent_get_name(d); 109 int ret = 0; 110 111 bkey_fsck_err_on(!d_name.len, c, err, 112 dirent_empty_name, 113 "empty name"); 114 115 bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, 116 dirent_val_too_big, 117 "value too big (%zu > %u)", 118 bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); 119 120 /* 121 * Check new keys don't exceed the max length 122 * (older keys may be larger.) 123 */ 124 bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, 125 dirent_name_too_long, 126 "dirent name too big (%u > %u)", 127 d_name.len, BCH_NAME_MAX); 128 129 bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, 130 dirent_name_embedded_nul, 131 "dirent has stray data after name's NUL"); 132 133 bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || 134 (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, 135 dirent_name_dot_or_dotdot, 136 "invalid name"); 137 138 bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, 139 dirent_name_has_slash, 140 "name with /"); 141 142 bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && 143 le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, 144 dirent_to_itself, 145 "dirent points to own directory"); 146 fsck_err: 147 return ret; 148 } 149 150 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 151 { 152 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 153 struct qstr d_name = bch2_dirent_get_name(d); 154 155 prt_printf(out, "%.*s -> ", d_name.len, d_name.name); 156 157 if (d.v->d_type != DT_SUBVOL) 158 prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum)); 159 else 160 prt_printf(out, "%u -> %u", 161 le32_to_cpu(d.v->d_parent_subvol), 162 le32_to_cpu(d.v->d_child_subvol)); 163 164 prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type)); 165 } 166 167 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, 168 subvol_inum dir, u8 type, 169 const struct qstr *name, u64 dst) 170 { 171 struct bkey_i_dirent *dirent; 172 unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len); 173 174 if (name->len > BCH_NAME_MAX) 175 return ERR_PTR(-ENAMETOOLONG); 176 177 BUG_ON(u64s > U8_MAX); 178 179 dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); 180 if (IS_ERR(dirent)) 181 return dirent; 182 183 bkey_dirent_init(&dirent->k_i); 184 dirent->k.u64s = u64s; 185 186 if (type != DT_SUBVOL) { 187 dirent->v.d_inum = cpu_to_le64(dst); 188 } else { 189 dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol); 190 dirent->v.d_child_subvol = cpu_to_le32(dst); 191 } 192 193 dirent->v.d_type = type; 194 195 memcpy(dirent->v.d_name, name->name, name->len); 196 memset(dirent->v.d_name + name->len, 0, 197 bkey_val_bytes(&dirent->k) - 198 offsetof(struct bch_dirent, d_name) - 199 name->len); 200 201 EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); 202 203 return dirent; 204 } 205 206 int bch2_dirent_create_snapshot(struct btree_trans *trans, 207 u32 dir_subvol, u64 dir, u32 snapshot, 208 const struct bch_hash_info *hash_info, 209 u8 type, const struct qstr *name, u64 dst_inum, 210 u64 *dir_offset, 211 enum btree_iter_update_trigger_flags flags) 212 { 213 subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir }; 214 struct bkey_i_dirent *dirent; 215 int ret; 216 217 dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum); 218 ret = PTR_ERR_OR_ZERO(dirent); 219 if (ret) 220 return ret; 221 222 dirent->k.p.inode = dir; 223 dirent->k.p.snapshot = snapshot; 224 225 ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, 226 dir_inum, snapshot, &dirent->k_i, 227 flags|BTREE_UPDATE_internal_snapshot_node); 228 *dir_offset = dirent->k.p.offset; 229 230 return ret; 231 } 232 233 int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, 234 const struct bch_hash_info *hash_info, 235 u8 type, const struct qstr *name, u64 dst_inum, 236 u64 *dir_offset, 237 enum btree_iter_update_trigger_flags flags) 238 { 239 struct bkey_i_dirent *dirent; 240 int ret; 241 242 dirent = dirent_create_key(trans, dir, type, name, dst_inum); 243 ret = PTR_ERR_OR_ZERO(dirent); 244 if (ret) 245 return ret; 246 247 ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, 248 dir, &dirent->k_i, flags); 249 *dir_offset = dirent->k.p.offset; 250 251 return ret; 252 } 253 254 static void dirent_copy_target(struct bkey_i_dirent *dst, 255 struct bkey_s_c_dirent src) 256 { 257 dst->v.d_inum = src.v->d_inum; 258 dst->v.d_type = src.v->d_type; 259 } 260 261 int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, 262 struct bkey_s_c_dirent d, subvol_inum *target) 263 { 264 struct bch_subvolume s; 265 int ret = 0; 266 267 if (d.v->d_type == DT_SUBVOL && 268 le32_to_cpu(d.v->d_parent_subvol) != dir.subvol) 269 return 1; 270 271 if (likely(d.v->d_type != DT_SUBVOL)) { 272 target->subvol = dir.subvol; 273 target->inum = le64_to_cpu(d.v->d_inum); 274 } else { 275 target->subvol = le32_to_cpu(d.v->d_child_subvol); 276 277 ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_cached, &s); 278 279 target->inum = le64_to_cpu(s.inode); 280 } 281 282 return ret; 283 } 284 285 int bch2_dirent_rename(struct btree_trans *trans, 286 subvol_inum src_dir, struct bch_hash_info *src_hash, 287 subvol_inum dst_dir, struct bch_hash_info *dst_hash, 288 const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, 289 const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, 290 enum bch_rename_mode mode) 291 { 292 struct btree_iter src_iter = { NULL }; 293 struct btree_iter dst_iter = { NULL }; 294 struct bkey_s_c old_src, old_dst = bkey_s_c_null; 295 struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; 296 struct bpos dst_pos = 297 POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name)); 298 unsigned src_update_flags = 0; 299 bool delete_src, delete_dst; 300 int ret = 0; 301 302 memset(src_inum, 0, sizeof(*src_inum)); 303 memset(dst_inum, 0, sizeof(*dst_inum)); 304 305 /* Lookup src: */ 306 old_src = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, 307 src_hash, src_dir, src_name, 308 BTREE_ITER_intent); 309 ret = bkey_err(old_src); 310 if (ret) 311 goto out; 312 313 ret = bch2_dirent_read_target(trans, src_dir, 314 bkey_s_c_to_dirent(old_src), src_inum); 315 if (ret) 316 goto out; 317 318 /* Lookup dst: */ 319 if (mode == BCH_RENAME) { 320 /* 321 * Note that we're _not_ checking if the target already exists - 322 * we're relying on the VFS to do that check for us for 323 * correctness: 324 */ 325 ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, 326 dst_hash, dst_dir, dst_name); 327 if (ret) 328 goto out; 329 } else { 330 old_dst = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, 331 dst_hash, dst_dir, dst_name, 332 BTREE_ITER_intent); 333 ret = bkey_err(old_dst); 334 if (ret) 335 goto out; 336 337 ret = bch2_dirent_read_target(trans, dst_dir, 338 bkey_s_c_to_dirent(old_dst), dst_inum); 339 if (ret) 340 goto out; 341 } 342 343 if (mode != BCH_RENAME_EXCHANGE) 344 *src_offset = dst_iter.pos.offset; 345 346 /* Create new dst key: */ 347 new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0); 348 ret = PTR_ERR_OR_ZERO(new_dst); 349 if (ret) 350 goto out; 351 352 dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src)); 353 new_dst->k.p = dst_iter.pos; 354 355 /* Create new src key: */ 356 if (mode == BCH_RENAME_EXCHANGE) { 357 new_src = dirent_create_key(trans, src_dir, 0, src_name, 0); 358 ret = PTR_ERR_OR_ZERO(new_src); 359 if (ret) 360 goto out; 361 362 dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst)); 363 new_src->k.p = src_iter.pos; 364 } else { 365 new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 366 ret = PTR_ERR_OR_ZERO(new_src); 367 if (ret) 368 goto out; 369 370 bkey_init(&new_src->k); 371 new_src->k.p = src_iter.pos; 372 373 if (bkey_le(dst_pos, src_iter.pos) && 374 bkey_lt(src_iter.pos, dst_iter.pos)) { 375 /* 376 * We have a hash collision for the new dst key, 377 * and new_src - the key we're deleting - is between 378 * new_dst's hashed slot and the slot we're going to be 379 * inserting it into - oops. This will break the hash 380 * table if we don't deal with it: 381 */ 382 if (mode == BCH_RENAME) { 383 /* 384 * If we're not overwriting, we can just insert 385 * new_dst at the src position: 386 */ 387 new_src = new_dst; 388 new_src->k.p = src_iter.pos; 389 goto out_set_src; 390 } else { 391 /* If we're overwriting, we can't insert new_dst 392 * at a different slot because it has to 393 * overwrite old_dst - just make sure to use a 394 * whiteout when deleting src: 395 */ 396 new_src->k.type = KEY_TYPE_hash_whiteout; 397 } 398 } else { 399 /* Check if we need a whiteout to delete src: */ 400 ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, 401 src_hash, &src_iter); 402 if (ret < 0) 403 goto out; 404 405 if (ret) 406 new_src->k.type = KEY_TYPE_hash_whiteout; 407 } 408 } 409 410 if (new_dst->v.d_type == DT_SUBVOL) 411 new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol); 412 413 if ((mode == BCH_RENAME_EXCHANGE) && 414 new_src->v.d_type == DT_SUBVOL) 415 new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol); 416 417 ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); 418 if (ret) 419 goto out; 420 out_set_src: 421 /* 422 * If we're deleting a subvolume we need to really delete the dirent, 423 * not just emit a whiteout in the current snapshot - there can only be 424 * single dirent that points to a given subvolume. 425 * 426 * IOW, we don't maintain multiple versions in different snapshots of 427 * dirents that point to subvolumes - dirents that point to subvolumes 428 * are only visible in one particular subvolume so it's not necessary, 429 * and it would be particularly confusing for fsck to have to deal with. 430 */ 431 delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL && 432 new_src->k.p.snapshot != old_src.k->p.snapshot; 433 434 delete_dst = old_dst.k && 435 bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL && 436 new_dst->k.p.snapshot != old_dst.k->p.snapshot; 437 438 if (!delete_src || !bkey_deleted(&new_src->k)) { 439 ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags); 440 if (ret) 441 goto out; 442 } 443 444 if (delete_src) { 445 bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); 446 ret = bch2_btree_iter_traverse(&src_iter) ?: 447 bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node); 448 if (ret) 449 goto out; 450 } 451 452 if (delete_dst) { 453 bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot); 454 ret = bch2_btree_iter_traverse(&dst_iter) ?: 455 bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node); 456 if (ret) 457 goto out; 458 } 459 460 if (mode == BCH_RENAME_EXCHANGE) 461 *src_offset = new_src->k.p.offset; 462 *dst_offset = new_dst->k.p.offset; 463 out: 464 bch2_trans_iter_exit(trans, &src_iter); 465 bch2_trans_iter_exit(trans, &dst_iter); 466 return ret; 467 } 468 469 int bch2_dirent_lookup_trans(struct btree_trans *trans, 470 struct btree_iter *iter, 471 subvol_inum dir, 472 const struct bch_hash_info *hash_info, 473 const struct qstr *name, subvol_inum *inum, 474 unsigned flags) 475 { 476 struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, 477 hash_info, dir, name, flags); 478 int ret = bkey_err(k); 479 if (ret) 480 goto err; 481 482 ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum); 483 if (ret > 0) 484 ret = -ENOENT; 485 err: 486 if (ret) 487 bch2_trans_iter_exit(trans, iter); 488 return ret; 489 } 490 491 u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, 492 const struct bch_hash_info *hash_info, 493 const struct qstr *name, subvol_inum *inum) 494 { 495 struct btree_trans *trans = bch2_trans_get(c); 496 struct btree_iter iter = { NULL }; 497 498 int ret = lockrestart_do(trans, 499 bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); 500 bch2_trans_iter_exit(trans, &iter); 501 bch2_trans_put(trans); 502 return ret; 503 } 504 505 int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot) 506 { 507 struct btree_iter iter; 508 struct bkey_s_c k; 509 int ret; 510 511 for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, 512 SPOS(dir, 0, snapshot), 513 POS(dir, U64_MAX), 0, k, ret) 514 if (k.k->type == KEY_TYPE_dirent) { 515 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 516 if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol) 517 continue; 518 ret = -BCH_ERR_ENOTEMPTY_dir_not_empty; 519 break; 520 } 521 bch2_trans_iter_exit(trans, &iter); 522 523 return ret; 524 } 525 526 int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) 527 { 528 u32 snapshot; 529 530 return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?: 531 bch2_empty_dir_snapshot(trans, dir.inum, dir.subvol, snapshot); 532 } 533 534 static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subvol_inum target) 535 { 536 struct qstr name = bch2_dirent_get_name(d); 537 /* 538 * Although not required by the kernel code, updating ctx->pos is needed 539 * for the bcachefs FUSE driver. Without this update, the FUSE 540 * implementation will be stuck in an infinite loop when reading 541 * directories (via the bcachefs_fuse_readdir callback). 542 * In kernel space, ctx->pos is updated by the VFS code. 543 */ 544 ctx->pos = d.k->p.offset; 545 bool ret = dir_emit(ctx, name.name, 546 name.len, 547 target.inum, 548 vfs_d_type(d.v->d_type)); 549 if (ret) 550 ctx->pos = d.k->p.offset + 1; 551 return ret; 552 } 553 554 int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) 555 { 556 struct btree_trans *trans = bch2_trans_get(c); 557 struct btree_iter iter; 558 struct bkey_s_c k; 559 subvol_inum target; 560 u32 snapshot; 561 struct bkey_buf sk; 562 int ret; 563 564 bch2_bkey_buf_init(&sk); 565 retry: 566 bch2_trans_begin(trans); 567 568 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 569 if (ret) 570 goto err; 571 572 for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, 573 SPOS(inum.inum, ctx->pos, snapshot), 574 POS(inum.inum, U64_MAX), 0, k, ret) { 575 if (k.k->type != KEY_TYPE_dirent) 576 continue; 577 578 /* dir_emit() can fault and block: */ 579 bch2_bkey_buf_reassemble(&sk, c, k); 580 struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k); 581 582 ret = bch2_dirent_read_target(trans, inum, dirent, &target); 583 if (ret < 0) 584 break; 585 if (ret) 586 continue; 587 588 /* 589 * read_target looks up subvolumes, we can overflow paths if the 590 * directory has many subvolumes in it 591 * 592 * XXX: btree_trans_too_many_iters() is something we'd like to 593 * get rid of, and there's no good reason to be using it here 594 * except that we don't yet have a for_each_btree_key() helper 595 * that does subvolume_get_snapshot(). 596 */ 597 ret = drop_locks_do(trans, 598 bch2_dir_emit(ctx, dirent, target)) ?: 599 btree_trans_too_many_iters(trans); 600 if (ret) { 601 ret = ret < 0 ? ret : 0; 602 break; 603 } 604 } 605 bch2_trans_iter_exit(trans, &iter); 606 err: 607 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 608 goto retry; 609 610 bch2_trans_put(trans); 611 bch2_bkey_buf_exit(&sk, c); 612 613 return ret; 614 } 615