1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "bkey_methods.h" 6 #include "btree_update.h" 7 #include "extents.h" 8 #include "dirent.h" 9 #include "fs.h" 10 #include "keylist.h" 11 #include "str_hash.h" 12 #include "subvolume.h" 13 14 #include <linux/dcache.h> 15 16 static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) 17 { 18 unsigned bkey_u64s = bkey_val_u64s(d.k); 19 unsigned bkey_bytes = bkey_u64s * sizeof(u64); 20 u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1]; 21 #if CPU_BIG_ENDIAN 22 unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8; 23 #else 24 unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8; 25 #endif 26 27 return bkey_bytes - 28 offsetof(struct bch_dirent, d_name) - 29 trailing_nuls; 30 } 31 32 struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d) 33 { 34 return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); 35 } 36 37 static u64 bch2_dirent_hash(const struct bch_hash_info *info, 38 const struct qstr *name) 39 { 40 struct bch_str_hash_ctx ctx; 41 42 bch2_str_hash_init(&ctx, info); 43 bch2_str_hash_update(&ctx, info, name->name, name->len); 44 45 /* [0,2) reserved for dots */ 46 return max_t(u64, bch2_str_hash_end(&ctx, info), 2); 47 } 48 49 static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) 50 { 51 return bch2_dirent_hash(info, key); 52 } 53 54 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) 55 { 56 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 57 struct qstr name = bch2_dirent_get_name(d); 58 59 return bch2_dirent_hash(info, &name); 60 } 61 62 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) 63 { 64 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); 65 const struct qstr l_name = bch2_dirent_get_name(l); 66 const struct qstr *r_name = _r; 67 68 return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len); 69 } 70 71 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) 72 { 73 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); 74 struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); 75 const struct qstr l_name = bch2_dirent_get_name(l); 76 const struct qstr r_name = bch2_dirent_get_name(r); 77 78 return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len); 79 } 80 81 static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k) 82 { 83 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 84 85 if (d.v->d_type == DT_SUBVOL) 86 return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol; 87 return true; 88 } 89 90 const struct bch_hash_desc bch2_dirent_hash_desc = { 91 .btree_id = BTREE_ID_dirents, 92 .key_type = KEY_TYPE_dirent, 93 .hash_key = dirent_hash_key, 94 .hash_bkey = dirent_hash_bkey, 95 .cmp_key = dirent_cmp_key, 96 .cmp_bkey = dirent_cmp_bkey, 97 .is_visible = dirent_is_visible, 98 }; 99 100 int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, 101 enum bkey_invalid_flags flags, 102 struct printbuf *err) 103 { 104 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 105 struct qstr d_name = bch2_dirent_get_name(d); 106 int ret = 0; 107 108 bkey_fsck_err_on(!d_name.len, c, err, 109 dirent_empty_name, 110 "empty name"); 111 112 bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, 113 dirent_val_too_big, 114 "value too big (%zu > %u)", 115 bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); 116 117 /* 118 * Check new keys don't exceed the max length 119 * (older keys may be larger.) 120 */ 121 bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err, 122 dirent_name_too_long, 123 "dirent name too big (%u > %u)", 124 d_name.len, BCH_NAME_MAX); 125 126 bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, 127 dirent_name_embedded_nul, 128 "dirent has stray data after name's NUL"); 129 130 bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || 131 (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, 132 dirent_name_dot_or_dotdot, 133 "invalid name"); 134 135 bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, 136 dirent_name_has_slash, 137 "name with /"); 138 139 bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && 140 le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, 141 dirent_to_itself, 142 "dirent points to own directory"); 143 fsck_err: 144 return ret; 145 } 146 147 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, 148 struct bkey_s_c k) 149 { 150 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 151 struct qstr d_name = bch2_dirent_get_name(d); 152 153 prt_printf(out, "%.*s -> %llu type %s", 154 d_name.len, 155 d_name.name, 156 d.v->d_type != DT_SUBVOL 157 ? le64_to_cpu(d.v->d_inum) 158 : le32_to_cpu(d.v->d_child_subvol), 159 bch2_d_type_str(d.v->d_type)); 160 } 161 162 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, 163 subvol_inum dir, u8 type, 164 const struct qstr *name, u64 dst) 165 { 166 struct bkey_i_dirent *dirent; 167 unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len); 168 169 if (name->len > BCH_NAME_MAX) 170 return ERR_PTR(-ENAMETOOLONG); 171 172 BUG_ON(u64s > U8_MAX); 173 174 dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); 175 if (IS_ERR(dirent)) 176 return dirent; 177 178 bkey_dirent_init(&dirent->k_i); 179 dirent->k.u64s = u64s; 180 181 if (type != DT_SUBVOL) { 182 dirent->v.d_inum = cpu_to_le64(dst); 183 } else { 184 dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol); 185 dirent->v.d_child_subvol = cpu_to_le32(dst); 186 } 187 188 dirent->v.d_type = type; 189 190 memcpy(dirent->v.d_name, name->name, name->len); 191 memset(dirent->v.d_name + name->len, 0, 192 bkey_val_bytes(&dirent->k) - 193 offsetof(struct bch_dirent, d_name) - 194 name->len); 195 196 EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); 197 198 return dirent; 199 } 200 201 int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, 202 const struct bch_hash_info *hash_info, 203 u8 type, const struct qstr *name, u64 dst_inum, 204 u64 *dir_offset, int flags) 205 { 206 struct bkey_i_dirent *dirent; 207 int ret; 208 209 dirent = dirent_create_key(trans, dir, type, name, dst_inum); 210 ret = PTR_ERR_OR_ZERO(dirent); 211 if (ret) 212 return ret; 213 214 ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, 215 dir, &dirent->k_i, flags); 216 *dir_offset = dirent->k.p.offset; 217 218 return ret; 219 } 220 221 static void dirent_copy_target(struct bkey_i_dirent *dst, 222 struct bkey_s_c_dirent src) 223 { 224 dst->v.d_inum = src.v->d_inum; 225 dst->v.d_type = src.v->d_type; 226 } 227 228 int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, 229 struct bkey_s_c_dirent d, subvol_inum *target) 230 { 231 struct bch_subvolume s; 232 int ret = 0; 233 234 if (d.v->d_type == DT_SUBVOL && 235 le32_to_cpu(d.v->d_parent_subvol) != dir.subvol) 236 return 1; 237 238 if (likely(d.v->d_type != DT_SUBVOL)) { 239 target->subvol = dir.subvol; 240 target->inum = le64_to_cpu(d.v->d_inum); 241 } else { 242 target->subvol = le32_to_cpu(d.v->d_child_subvol); 243 244 ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s); 245 246 target->inum = le64_to_cpu(s.inode); 247 } 248 249 return ret; 250 } 251 252 int bch2_dirent_rename(struct btree_trans *trans, 253 subvol_inum src_dir, struct bch_hash_info *src_hash, 254 subvol_inum dst_dir, struct bch_hash_info *dst_hash, 255 const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, 256 const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, 257 enum bch_rename_mode mode) 258 { 259 struct btree_iter src_iter = { NULL }; 260 struct btree_iter dst_iter = { NULL }; 261 struct bkey_s_c old_src, old_dst = bkey_s_c_null; 262 struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; 263 struct bpos dst_pos = 264 POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name)); 265 unsigned src_type = 0, dst_type = 0, src_update_flags = 0; 266 int ret = 0; 267 268 if (src_dir.subvol != dst_dir.subvol) 269 return -EXDEV; 270 271 memset(src_inum, 0, sizeof(*src_inum)); 272 memset(dst_inum, 0, sizeof(*dst_inum)); 273 274 /* Lookup src: */ 275 ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, 276 src_hash, src_dir, src_name, 277 BTREE_ITER_INTENT); 278 if (ret) 279 goto out; 280 281 old_src = bch2_btree_iter_peek_slot(&src_iter); 282 ret = bkey_err(old_src); 283 if (ret) 284 goto out; 285 286 ret = bch2_dirent_read_target(trans, src_dir, 287 bkey_s_c_to_dirent(old_src), src_inum); 288 if (ret) 289 goto out; 290 291 src_type = bkey_s_c_to_dirent(old_src).v->d_type; 292 293 if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE) 294 return -EOPNOTSUPP; 295 296 297 /* Lookup dst: */ 298 if (mode == BCH_RENAME) { 299 /* 300 * Note that we're _not_ checking if the target already exists - 301 * we're relying on the VFS to do that check for us for 302 * correctness: 303 */ 304 ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, 305 dst_hash, dst_dir, dst_name); 306 if (ret) 307 goto out; 308 } else { 309 ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, 310 dst_hash, dst_dir, dst_name, 311 BTREE_ITER_INTENT); 312 if (ret) 313 goto out; 314 315 old_dst = bch2_btree_iter_peek_slot(&dst_iter); 316 ret = bkey_err(old_dst); 317 if (ret) 318 goto out; 319 320 ret = bch2_dirent_read_target(trans, dst_dir, 321 bkey_s_c_to_dirent(old_dst), dst_inum); 322 if (ret) 323 goto out; 324 325 dst_type = bkey_s_c_to_dirent(old_dst).v->d_type; 326 327 if (dst_type == DT_SUBVOL) 328 return -EOPNOTSUPP; 329 } 330 331 if (mode != BCH_RENAME_EXCHANGE) 332 *src_offset = dst_iter.pos.offset; 333 334 /* Create new dst key: */ 335 new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0); 336 ret = PTR_ERR_OR_ZERO(new_dst); 337 if (ret) 338 goto out; 339 340 dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src)); 341 new_dst->k.p = dst_iter.pos; 342 343 /* Create new src key: */ 344 if (mode == BCH_RENAME_EXCHANGE) { 345 new_src = dirent_create_key(trans, src_dir, 0, src_name, 0); 346 ret = PTR_ERR_OR_ZERO(new_src); 347 if (ret) 348 goto out; 349 350 dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst)); 351 new_src->k.p = src_iter.pos; 352 } else { 353 new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 354 ret = PTR_ERR_OR_ZERO(new_src); 355 if (ret) 356 goto out; 357 358 bkey_init(&new_src->k); 359 new_src->k.p = src_iter.pos; 360 361 if (bkey_le(dst_pos, src_iter.pos) && 362 bkey_lt(src_iter.pos, dst_iter.pos)) { 363 /* 364 * We have a hash collision for the new dst key, 365 * and new_src - the key we're deleting - is between 366 * new_dst's hashed slot and the slot we're going to be 367 * inserting it into - oops. This will break the hash 368 * table if we don't deal with it: 369 */ 370 if (mode == BCH_RENAME) { 371 /* 372 * If we're not overwriting, we can just insert 373 * new_dst at the src position: 374 */ 375 new_src = new_dst; 376 new_src->k.p = src_iter.pos; 377 goto out_set_src; 378 } else { 379 /* If we're overwriting, we can't insert new_dst 380 * at a different slot because it has to 381 * overwrite old_dst - just make sure to use a 382 * whiteout when deleting src: 383 */ 384 new_src->k.type = KEY_TYPE_hash_whiteout; 385 } 386 } else { 387 /* Check if we need a whiteout to delete src: */ 388 ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, 389 src_hash, &src_iter); 390 if (ret < 0) 391 goto out; 392 393 if (ret) 394 new_src->k.type = KEY_TYPE_hash_whiteout; 395 } 396 } 397 398 ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); 399 if (ret) 400 goto out; 401 out_set_src: 402 403 /* 404 * If we're deleting a subvolume, we need to really delete the dirent, 405 * not just emit a whiteout in the current snapshot: 406 */ 407 if (src_type == DT_SUBVOL) { 408 bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); 409 ret = bch2_btree_iter_traverse(&src_iter); 410 if (ret) 411 goto out; 412 413 new_src->k.p = src_iter.pos; 414 src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE; 415 } 416 417 ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags); 418 if (ret) 419 goto out; 420 421 if (mode == BCH_RENAME_EXCHANGE) 422 *src_offset = new_src->k.p.offset; 423 *dst_offset = new_dst->k.p.offset; 424 out: 425 bch2_trans_iter_exit(trans, &src_iter); 426 bch2_trans_iter_exit(trans, &dst_iter); 427 return ret; 428 } 429 430 int __bch2_dirent_lookup_trans(struct btree_trans *trans, 431 struct btree_iter *iter, 432 subvol_inum dir, 433 const struct bch_hash_info *hash_info, 434 const struct qstr *name, subvol_inum *inum, 435 unsigned flags) 436 { 437 struct bkey_s_c k; 438 struct bkey_s_c_dirent d; 439 u32 snapshot; 440 int ret; 441 442 ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); 443 if (ret) 444 return ret; 445 446 ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, 447 hash_info, dir, name, flags); 448 if (ret) 449 return ret; 450 451 k = bch2_btree_iter_peek_slot(iter); 452 ret = bkey_err(k); 453 if (ret) 454 goto err; 455 456 d = bkey_s_c_to_dirent(k); 457 458 ret = bch2_dirent_read_target(trans, dir, d, inum); 459 if (ret > 0) 460 ret = -ENOENT; 461 err: 462 if (ret) 463 bch2_trans_iter_exit(trans, iter); 464 465 return ret; 466 } 467 468 u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, 469 const struct bch_hash_info *hash_info, 470 const struct qstr *name, subvol_inum *inum) 471 { 472 struct btree_trans *trans = bch2_trans_get(c); 473 struct btree_iter iter; 474 int ret; 475 retry: 476 bch2_trans_begin(trans); 477 478 ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, 479 name, inum, 0); 480 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 481 goto retry; 482 if (!ret) 483 bch2_trans_iter_exit(trans, &iter); 484 bch2_trans_put(trans); 485 return ret; 486 } 487 488 int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot) 489 { 490 struct btree_iter iter; 491 struct bkey_s_c k; 492 int ret; 493 494 for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, 495 SPOS(dir, 0, snapshot), 496 POS(dir, U64_MAX), 0, k, ret) 497 if (k.k->type == KEY_TYPE_dirent) { 498 ret = -ENOTEMPTY; 499 break; 500 } 501 bch2_trans_iter_exit(trans, &iter); 502 503 return ret; 504 } 505 506 int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) 507 { 508 u32 snapshot; 509 510 return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?: 511 bch2_empty_dir_snapshot(trans, dir.inum, snapshot); 512 } 513 514 int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) 515 { 516 struct btree_trans *trans = bch2_trans_get(c); 517 struct btree_iter iter; 518 struct bkey_s_c k; 519 struct bkey_s_c_dirent dirent; 520 subvol_inum target; 521 u32 snapshot; 522 struct bkey_buf sk; 523 struct qstr name; 524 int ret; 525 526 bch2_bkey_buf_init(&sk); 527 retry: 528 bch2_trans_begin(trans); 529 530 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 531 if (ret) 532 goto err; 533 534 for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, 535 SPOS(inum.inum, ctx->pos, snapshot), 536 POS(inum.inum, U64_MAX), 0, k, ret) { 537 if (k.k->type != KEY_TYPE_dirent) 538 continue; 539 540 dirent = bkey_s_c_to_dirent(k); 541 542 ret = bch2_dirent_read_target(trans, inum, dirent, &target); 543 if (ret < 0) 544 break; 545 if (ret) 546 continue; 547 548 /* dir_emit() can fault and block: */ 549 bch2_bkey_buf_reassemble(&sk, c, k); 550 dirent = bkey_i_to_s_c_dirent(sk.k); 551 bch2_trans_unlock(trans); 552 553 name = bch2_dirent_get_name(dirent); 554 555 ctx->pos = dirent.k->p.offset; 556 if (!dir_emit(ctx, name.name, 557 name.len, 558 target.inum, 559 vfs_d_type(dirent.v->d_type))) 560 break; 561 ctx->pos = dirent.k->p.offset + 1; 562 563 /* 564 * read_target looks up subvolumes, we can overflow paths if the 565 * directory has many subvolumes in it 566 */ 567 ret = btree_trans_too_many_iters(trans); 568 if (ret) 569 break; 570 } 571 bch2_trans_iter_exit(trans, &iter); 572 err: 573 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 574 goto retry; 575 576 bch2_trans_put(trans); 577 bch2_bkey_buf_exit(&sk, c); 578 579 return ret; 580 } 581