1 // SPDX-License-Identifier: GPL-2.0 2 #ifndef NO_BCACHEFS_FS 3 4 #include "bcachefs.h" 5 #include "acl.h" 6 #include "bkey_buf.h" 7 #include "btree_update.h" 8 #include "buckets.h" 9 #include "chardev.h" 10 #include "dirent.h" 11 #include "errcode.h" 12 #include "extents.h" 13 #include "fs.h" 14 #include "fs-common.h" 15 #include "fs-io.h" 16 #include "fs-ioctl.h" 17 #include "fs-io-buffered.h" 18 #include "fs-io-direct.h" 19 #include "fs-io-pagecache.h" 20 #include "fsck.h" 21 #include "inode.h" 22 #include "io_read.h" 23 #include "journal.h" 24 #include "keylist.h" 25 #include "quota.h" 26 #include "snapshot.h" 27 #include "super.h" 28 #include "xattr.h" 29 30 #include <linux/aio.h> 31 #include <linux/backing-dev.h> 32 #include <linux/exportfs.h> 33 #include <linux/fiemap.h> 34 #include <linux/module.h> 35 #include <linux/pagemap.h> 36 #include <linux/posix_acl.h> 37 #include <linux/random.h> 38 #include <linux/seq_file.h> 39 #include <linux/statfs.h> 40 #include <linux/string.h> 41 #include <linux/xattr.h> 42 43 static struct kmem_cache *bch2_inode_cache; 44 45 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, 46 struct bch_inode_info *, 47 struct bch_inode_unpacked *, 48 struct bch_subvolume *); 49 50 void bch2_inode_update_after_write(struct btree_trans *trans, 51 struct bch_inode_info *inode, 52 struct bch_inode_unpacked *bi, 53 unsigned fields) 54 { 55 struct bch_fs *c = trans->c; 56 57 BUG_ON(bi->bi_inum != inode->v.i_ino); 58 59 bch2_assert_pos_locked(trans, BTREE_ID_inodes, 60 POS(0, bi->bi_inum), 61 c->opts.inodes_use_key_cache); 62 63 set_nlink(&inode->v, bch2_inode_nlink_get(bi)); 64 i_uid_write(&inode->v, bi->bi_uid); 65 i_gid_write(&inode->v, bi->bi_gid); 66 inode->v.i_mode = bi->bi_mode; 67 68 if (fields & ATTR_ATIME) 69 inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime)); 70 if (fields & ATTR_MTIME) 71 inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime)); 72 if (fields & ATTR_CTIME) 73 inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime)); 74 75 inode->ei_inode = *bi; 76 77 bch2_inode_flags_to_vfs(inode); 78 } 79 80 int __must_check bch2_write_inode(struct bch_fs *c, 81 struct bch_inode_info *inode, 82 inode_set_fn set, 83 void *p, unsigned fields) 84 { 85 struct btree_trans *trans = bch2_trans_get(c); 86 struct btree_iter iter = { NULL }; 87 struct bch_inode_unpacked inode_u; 88 int ret; 89 retry: 90 bch2_trans_begin(trans); 91 92 ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), 93 BTREE_ITER_INTENT) ?: 94 (set ? set(trans, inode, &inode_u, p) : 0) ?: 95 bch2_inode_write(trans, &iter, &inode_u) ?: 96 bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); 97 98 /* 99 * the btree node lock protects inode->ei_inode, not ei_update_lock; 100 * this is important for inode updates via bchfs_write_index_update 101 */ 102 if (!ret) 103 bch2_inode_update_after_write(trans, inode, &inode_u, fields); 104 105 bch2_trans_iter_exit(trans, &iter); 106 107 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 108 goto retry; 109 110 bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, 111 "inode %u:%llu not found when updating", 112 inode_inum(inode).subvol, 113 inode_inum(inode).inum); 114 115 bch2_trans_put(trans); 116 return ret < 0 ? ret : 0; 117 } 118 119 int bch2_fs_quota_transfer(struct bch_fs *c, 120 struct bch_inode_info *inode, 121 struct bch_qid new_qid, 122 unsigned qtypes, 123 enum quota_acct_mode mode) 124 { 125 unsigned i; 126 int ret; 127 128 qtypes &= enabled_qtypes(c); 129 130 for (i = 0; i < QTYP_NR; i++) 131 if (new_qid.q[i] == inode->ei_qid.q[i]) 132 qtypes &= ~(1U << i); 133 134 if (!qtypes) 135 return 0; 136 137 mutex_lock(&inode->ei_quota_lock); 138 139 ret = bch2_quota_transfer(c, qtypes, new_qid, 140 inode->ei_qid, 141 inode->v.i_blocks + 142 inode->ei_quota_reserved, 143 mode); 144 if (!ret) 145 for (i = 0; i < QTYP_NR; i++) 146 if (qtypes & (1 << i)) 147 inode->ei_qid.q[i] = new_qid.q[i]; 148 149 mutex_unlock(&inode->ei_quota_lock); 150 151 return ret; 152 } 153 154 static int bch2_iget5_test(struct inode *vinode, void *p) 155 { 156 struct bch_inode_info *inode = to_bch_ei(vinode); 157 subvol_inum *inum = p; 158 159 return inode->ei_subvol == inum->subvol && 160 inode->ei_inode.bi_inum == inum->inum; 161 } 162 163 static int bch2_iget5_set(struct inode *vinode, void *p) 164 { 165 struct bch_inode_info *inode = to_bch_ei(vinode); 166 subvol_inum *inum = p; 167 168 inode->v.i_ino = inum->inum; 169 inode->ei_subvol = inum->subvol; 170 inode->ei_inode.bi_inum = inum->inum; 171 return 0; 172 } 173 174 static unsigned bch2_inode_hash(subvol_inum inum) 175 { 176 return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); 177 } 178 179 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) 180 { 181 struct bch_inode_unpacked inode_u; 182 struct bch_inode_info *inode; 183 struct btree_trans *trans; 184 struct bch_subvolume subvol; 185 int ret; 186 187 inode = to_bch_ei(iget5_locked(c->vfs_sb, 188 bch2_inode_hash(inum), 189 bch2_iget5_test, 190 bch2_iget5_set, 191 &inum)); 192 if (unlikely(!inode)) 193 return ERR_PTR(-ENOMEM); 194 if (!(inode->v.i_state & I_NEW)) 195 return &inode->v; 196 197 trans = bch2_trans_get(c); 198 ret = lockrestart_do(trans, 199 bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: 200 bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); 201 202 if (!ret) 203 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); 204 bch2_trans_put(trans); 205 206 if (ret) { 207 iget_failed(&inode->v); 208 return ERR_PTR(bch2_err_class(ret)); 209 } 210 211 mutex_lock(&c->vfs_inodes_lock); 212 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); 213 mutex_unlock(&c->vfs_inodes_lock); 214 215 unlock_new_inode(&inode->v); 216 217 return &inode->v; 218 } 219 220 struct bch_inode_info * 221 __bch2_create(struct mnt_idmap *idmap, 222 struct bch_inode_info *dir, struct dentry *dentry, 223 umode_t mode, dev_t rdev, subvol_inum snapshot_src, 224 unsigned flags) 225 { 226 struct bch_fs *c = dir->v.i_sb->s_fs_info; 227 struct btree_trans *trans; 228 struct bch_inode_unpacked dir_u; 229 struct bch_inode_info *inode, *old; 230 struct bch_inode_unpacked inode_u; 231 struct posix_acl *default_acl = NULL, *acl = NULL; 232 subvol_inum inum; 233 struct bch_subvolume subvol; 234 u64 journal_seq = 0; 235 int ret; 236 237 /* 238 * preallocate acls + vfs inode before btree transaction, so that 239 * nothing can fail after the transaction succeeds: 240 */ 241 #ifdef CONFIG_BCACHEFS_POSIX_ACL 242 ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl); 243 if (ret) 244 return ERR_PTR(ret); 245 #endif 246 inode = to_bch_ei(new_inode(c->vfs_sb)); 247 if (unlikely(!inode)) { 248 inode = ERR_PTR(-ENOMEM); 249 goto err; 250 } 251 252 bch2_inode_init_early(c, &inode_u); 253 254 if (!(flags & BCH_CREATE_TMPFILE)) 255 mutex_lock(&dir->ei_update_lock); 256 257 trans = bch2_trans_get(c); 258 retry: 259 bch2_trans_begin(trans); 260 261 ret = bch2_create_trans(trans, 262 inode_inum(dir), &dir_u, &inode_u, 263 !(flags & BCH_CREATE_TMPFILE) 264 ? &dentry->d_name : NULL, 265 from_kuid(i_user_ns(&dir->v), current_fsuid()), 266 from_kgid(i_user_ns(&dir->v), current_fsgid()), 267 mode, rdev, 268 default_acl, acl, snapshot_src, flags) ?: 269 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, 270 KEY_TYPE_QUOTA_PREALLOC); 271 if (unlikely(ret)) 272 goto err_before_quota; 273 274 inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; 275 inum.inum = inode_u.bi_inum; 276 277 ret = bch2_subvolume_get(trans, inum.subvol, true, 278 BTREE_ITER_WITH_UPDATES, &subvol) ?: 279 bch2_trans_commit(trans, NULL, &journal_seq, 0); 280 if (unlikely(ret)) { 281 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, 282 KEY_TYPE_QUOTA_WARN); 283 err_before_quota: 284 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 285 goto retry; 286 goto err_trans; 287 } 288 289 if (!(flags & BCH_CREATE_TMPFILE)) { 290 bch2_inode_update_after_write(trans, dir, &dir_u, 291 ATTR_MTIME|ATTR_CTIME); 292 mutex_unlock(&dir->ei_update_lock); 293 } 294 295 bch2_iget5_set(&inode->v, &inum); 296 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); 297 298 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); 299 set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); 300 301 /* 302 * we must insert the new inode into the inode cache before calling 303 * bch2_trans_exit() and dropping locks, else we could race with another 304 * thread pulling the inode in and modifying it: 305 */ 306 307 inode->v.i_state |= I_CREATING; 308 309 old = to_bch_ei(inode_insert5(&inode->v, 310 bch2_inode_hash(inum), 311 bch2_iget5_test, 312 bch2_iget5_set, 313 &inum)); 314 BUG_ON(!old); 315 316 if (unlikely(old != inode)) { 317 /* 318 * We raced, another process pulled the new inode into cache 319 * before us: 320 */ 321 make_bad_inode(&inode->v); 322 iput(&inode->v); 323 324 inode = old; 325 } else { 326 mutex_lock(&c->vfs_inodes_lock); 327 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); 328 mutex_unlock(&c->vfs_inodes_lock); 329 /* 330 * we really don't want insert_inode_locked2() to be setting 331 * I_NEW... 332 */ 333 unlock_new_inode(&inode->v); 334 } 335 336 bch2_trans_put(trans); 337 err: 338 posix_acl_release(default_acl); 339 posix_acl_release(acl); 340 return inode; 341 err_trans: 342 if (!(flags & BCH_CREATE_TMPFILE)) 343 mutex_unlock(&dir->ei_update_lock); 344 345 bch2_trans_put(trans); 346 make_bad_inode(&inode->v); 347 iput(&inode->v); 348 inode = ERR_PTR(ret); 349 goto err; 350 } 351 352 /* methods */ 353 354 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, 355 unsigned int flags) 356 { 357 struct bch_fs *c = vdir->i_sb->s_fs_info; 358 struct bch_inode_info *dir = to_bch_ei(vdir); 359 struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); 360 struct inode *vinode = NULL; 361 subvol_inum inum = { .subvol = 1 }; 362 int ret; 363 364 ret = bch2_dirent_lookup(c, inode_inum(dir), &hash, 365 &dentry->d_name, &inum); 366 367 if (!ret) 368 vinode = bch2_vfs_inode_get(c, inum); 369 370 return d_splice_alias(vinode, dentry); 371 } 372 373 static int bch2_mknod(struct mnt_idmap *idmap, 374 struct inode *vdir, struct dentry *dentry, 375 umode_t mode, dev_t rdev) 376 { 377 struct bch_inode_info *inode = 378 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, 379 (subvol_inum) { 0 }, 0); 380 381 if (IS_ERR(inode)) 382 return bch2_err_class(PTR_ERR(inode)); 383 384 d_instantiate(dentry, &inode->v); 385 return 0; 386 } 387 388 static int bch2_create(struct mnt_idmap *idmap, 389 struct inode *vdir, struct dentry *dentry, 390 umode_t mode, bool excl) 391 { 392 return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0); 393 } 394 395 static int __bch2_link(struct bch_fs *c, 396 struct bch_inode_info *inode, 397 struct bch_inode_info *dir, 398 struct dentry *dentry) 399 { 400 struct btree_trans *trans = bch2_trans_get(c); 401 struct bch_inode_unpacked dir_u, inode_u; 402 int ret; 403 404 mutex_lock(&inode->ei_update_lock); 405 406 ret = commit_do(trans, NULL, NULL, 0, 407 bch2_link_trans(trans, 408 inode_inum(dir), &dir_u, 409 inode_inum(inode), &inode_u, 410 &dentry->d_name)); 411 412 if (likely(!ret)) { 413 bch2_inode_update_after_write(trans, dir, &dir_u, 414 ATTR_MTIME|ATTR_CTIME); 415 bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); 416 } 417 418 bch2_trans_put(trans); 419 mutex_unlock(&inode->ei_update_lock); 420 return ret; 421 } 422 423 static int bch2_link(struct dentry *old_dentry, struct inode *vdir, 424 struct dentry *dentry) 425 { 426 struct bch_fs *c = vdir->i_sb->s_fs_info; 427 struct bch_inode_info *dir = to_bch_ei(vdir); 428 struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); 429 int ret; 430 431 lockdep_assert_held(&inode->v.i_rwsem); 432 433 ret = __bch2_link(c, inode, dir, dentry); 434 if (unlikely(ret)) 435 return ret; 436 437 ihold(&inode->v); 438 d_instantiate(dentry, &inode->v); 439 return 0; 440 } 441 442 int __bch2_unlink(struct inode *vdir, struct dentry *dentry, 443 bool deleting_snapshot) 444 { 445 struct bch_fs *c = vdir->i_sb->s_fs_info; 446 struct bch_inode_info *dir = to_bch_ei(vdir); 447 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); 448 struct bch_inode_unpacked dir_u, inode_u; 449 struct btree_trans *trans = bch2_trans_get(c); 450 int ret; 451 452 bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); 453 454 ret = commit_do(trans, NULL, NULL, 455 BTREE_INSERT_NOFAIL, 456 bch2_unlink_trans(trans, 457 inode_inum(dir), &dir_u, 458 &inode_u, &dentry->d_name, 459 deleting_snapshot)); 460 if (unlikely(ret)) 461 goto err; 462 463 bch2_inode_update_after_write(trans, dir, &dir_u, 464 ATTR_MTIME|ATTR_CTIME); 465 bch2_inode_update_after_write(trans, inode, &inode_u, 466 ATTR_MTIME); 467 468 if (inode_u.bi_subvol) { 469 /* 470 * Subvolume deletion is asynchronous, but we still want to tell 471 * the VFS that it's been deleted here: 472 */ 473 set_nlink(&inode->v, 0); 474 } 475 err: 476 bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); 477 bch2_trans_put(trans); 478 479 return ret; 480 } 481 482 static int bch2_unlink(struct inode *vdir, struct dentry *dentry) 483 { 484 return __bch2_unlink(vdir, dentry, false); 485 } 486 487 static int bch2_symlink(struct mnt_idmap *idmap, 488 struct inode *vdir, struct dentry *dentry, 489 const char *symname) 490 { 491 struct bch_fs *c = vdir->i_sb->s_fs_info; 492 struct bch_inode_info *dir = to_bch_ei(vdir), *inode; 493 int ret; 494 495 inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, 496 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); 497 if (IS_ERR(inode)) 498 return bch2_err_class(PTR_ERR(inode)); 499 500 inode_lock(&inode->v); 501 ret = page_symlink(&inode->v, symname, strlen(symname) + 1); 502 inode_unlock(&inode->v); 503 504 if (unlikely(ret)) 505 goto err; 506 507 ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX); 508 if (unlikely(ret)) 509 goto err; 510 511 ret = __bch2_link(c, inode, dir, dentry); 512 if (unlikely(ret)) 513 goto err; 514 515 d_instantiate(dentry, &inode->v); 516 return 0; 517 err: 518 iput(&inode->v); 519 return ret; 520 } 521 522 static int bch2_mkdir(struct mnt_idmap *idmap, 523 struct inode *vdir, struct dentry *dentry, umode_t mode) 524 { 525 return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0); 526 } 527 528 static int bch2_rename2(struct mnt_idmap *idmap, 529 struct inode *src_vdir, struct dentry *src_dentry, 530 struct inode *dst_vdir, struct dentry *dst_dentry, 531 unsigned flags) 532 { 533 struct bch_fs *c = src_vdir->i_sb->s_fs_info; 534 struct bch_inode_info *src_dir = to_bch_ei(src_vdir); 535 struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir); 536 struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode); 537 struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); 538 struct bch_inode_unpacked dst_dir_u, src_dir_u; 539 struct bch_inode_unpacked src_inode_u, dst_inode_u; 540 struct btree_trans *trans; 541 enum bch_rename_mode mode = flags & RENAME_EXCHANGE 542 ? BCH_RENAME_EXCHANGE 543 : dst_dentry->d_inode 544 ? BCH_RENAME_OVERWRITE : BCH_RENAME; 545 int ret; 546 547 if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) 548 return -EINVAL; 549 550 if (mode == BCH_RENAME_OVERWRITE) { 551 ret = filemap_write_and_wait_range(src_inode->v.i_mapping, 552 0, LLONG_MAX); 553 if (ret) 554 return ret; 555 } 556 557 trans = bch2_trans_get(c); 558 559 bch2_lock_inodes(INODE_UPDATE_LOCK, 560 src_dir, 561 dst_dir, 562 src_inode, 563 dst_inode); 564 565 if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { 566 ret = bch2_fs_quota_transfer(c, src_inode, 567 dst_dir->ei_qid, 568 1 << QTYP_PRJ, 569 KEY_TYPE_QUOTA_PREALLOC); 570 if (ret) 571 goto err; 572 } 573 574 if (mode == BCH_RENAME_EXCHANGE && 575 inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) { 576 ret = bch2_fs_quota_transfer(c, dst_inode, 577 src_dir->ei_qid, 578 1 << QTYP_PRJ, 579 KEY_TYPE_QUOTA_PREALLOC); 580 if (ret) 581 goto err; 582 } 583 584 ret = commit_do(trans, NULL, NULL, 0, 585 bch2_rename_trans(trans, 586 inode_inum(src_dir), &src_dir_u, 587 inode_inum(dst_dir), &dst_dir_u, 588 &src_inode_u, 589 &dst_inode_u, 590 &src_dentry->d_name, 591 &dst_dentry->d_name, 592 mode)); 593 if (unlikely(ret)) 594 goto err; 595 596 BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum); 597 BUG_ON(dst_inode && 598 dst_inode->v.i_ino != dst_inode_u.bi_inum); 599 600 bch2_inode_update_after_write(trans, src_dir, &src_dir_u, 601 ATTR_MTIME|ATTR_CTIME); 602 603 if (src_dir != dst_dir) 604 bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u, 605 ATTR_MTIME|ATTR_CTIME); 606 607 bch2_inode_update_after_write(trans, src_inode, &src_inode_u, 608 ATTR_CTIME); 609 610 if (dst_inode) 611 bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, 612 ATTR_CTIME); 613 err: 614 bch2_trans_put(trans); 615 616 bch2_fs_quota_transfer(c, src_inode, 617 bch_qid(&src_inode->ei_inode), 618 1 << QTYP_PRJ, 619 KEY_TYPE_QUOTA_NOCHECK); 620 if (dst_inode) 621 bch2_fs_quota_transfer(c, dst_inode, 622 bch_qid(&dst_inode->ei_inode), 623 1 << QTYP_PRJ, 624 KEY_TYPE_QUOTA_NOCHECK); 625 626 bch2_unlock_inodes(INODE_UPDATE_LOCK, 627 src_dir, 628 dst_dir, 629 src_inode, 630 dst_inode); 631 632 return ret; 633 } 634 635 static void bch2_setattr_copy(struct mnt_idmap *idmap, 636 struct bch_inode_info *inode, 637 struct bch_inode_unpacked *bi, 638 struct iattr *attr) 639 { 640 struct bch_fs *c = inode->v.i_sb->s_fs_info; 641 unsigned int ia_valid = attr->ia_valid; 642 643 if (ia_valid & ATTR_UID) 644 bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid); 645 if (ia_valid & ATTR_GID) 646 bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid); 647 648 if (ia_valid & ATTR_SIZE) 649 bi->bi_size = attr->ia_size; 650 651 if (ia_valid & ATTR_ATIME) 652 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); 653 if (ia_valid & ATTR_MTIME) 654 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime); 655 if (ia_valid & ATTR_CTIME) 656 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime); 657 658 if (ia_valid & ATTR_MODE) { 659 umode_t mode = attr->ia_mode; 660 kgid_t gid = ia_valid & ATTR_GID 661 ? attr->ia_gid 662 : inode->v.i_gid; 663 664 if (!in_group_p(gid) && 665 !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID)) 666 mode &= ~S_ISGID; 667 bi->bi_mode = mode; 668 } 669 } 670 671 int bch2_setattr_nonsize(struct mnt_idmap *idmap, 672 struct bch_inode_info *inode, 673 struct iattr *attr) 674 { 675 struct bch_fs *c = inode->v.i_sb->s_fs_info; 676 struct bch_qid qid; 677 struct btree_trans *trans; 678 struct btree_iter inode_iter = { NULL }; 679 struct bch_inode_unpacked inode_u; 680 struct posix_acl *acl = NULL; 681 int ret; 682 683 mutex_lock(&inode->ei_update_lock); 684 685 qid = inode->ei_qid; 686 687 if (attr->ia_valid & ATTR_UID) 688 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid); 689 690 if (attr->ia_valid & ATTR_GID) 691 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid); 692 693 ret = bch2_fs_quota_transfer(c, inode, qid, ~0, 694 KEY_TYPE_QUOTA_PREALLOC); 695 if (ret) 696 goto err; 697 698 trans = bch2_trans_get(c); 699 retry: 700 bch2_trans_begin(trans); 701 kfree(acl); 702 acl = NULL; 703 704 ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), 705 BTREE_ITER_INTENT); 706 if (ret) 707 goto btree_err; 708 709 bch2_setattr_copy(idmap, inode, &inode_u, attr); 710 711 if (attr->ia_valid & ATTR_MODE) { 712 ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u, 713 inode_u.bi_mode, &acl); 714 if (ret) 715 goto btree_err; 716 } 717 718 ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: 719 bch2_trans_commit(trans, NULL, NULL, 720 BTREE_INSERT_NOFAIL); 721 btree_err: 722 bch2_trans_iter_exit(trans, &inode_iter); 723 724 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 725 goto retry; 726 if (unlikely(ret)) 727 goto err_trans; 728 729 bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); 730 731 if (acl) 732 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); 733 err_trans: 734 bch2_trans_put(trans); 735 err: 736 mutex_unlock(&inode->ei_update_lock); 737 738 return bch2_err_class(ret); 739 } 740 741 static int bch2_getattr(struct mnt_idmap *idmap, 742 const struct path *path, struct kstat *stat, 743 u32 request_mask, unsigned query_flags) 744 { 745 struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry)); 746 struct bch_fs *c = inode->v.i_sb->s_fs_info; 747 748 stat->dev = inode->v.i_sb->s_dev; 749 stat->ino = inode->v.i_ino; 750 stat->mode = inode->v.i_mode; 751 stat->nlink = inode->v.i_nlink; 752 stat->uid = inode->v.i_uid; 753 stat->gid = inode->v.i_gid; 754 stat->rdev = inode->v.i_rdev; 755 stat->size = i_size_read(&inode->v); 756 stat->atime = inode_get_atime(&inode->v); 757 stat->mtime = inode_get_mtime(&inode->v); 758 stat->ctime = inode_get_ctime(&inode->v); 759 stat->blksize = block_bytes(c); 760 stat->blocks = inode->v.i_blocks; 761 762 if (request_mask & STATX_BTIME) { 763 stat->result_mask |= STATX_BTIME; 764 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); 765 } 766 767 if (inode->ei_inode.bi_flags & BCH_INODE_immutable) 768 stat->attributes |= STATX_ATTR_IMMUTABLE; 769 stat->attributes_mask |= STATX_ATTR_IMMUTABLE; 770 771 if (inode->ei_inode.bi_flags & BCH_INODE_append) 772 stat->attributes |= STATX_ATTR_APPEND; 773 stat->attributes_mask |= STATX_ATTR_APPEND; 774 775 if (inode->ei_inode.bi_flags & BCH_INODE_nodump) 776 stat->attributes |= STATX_ATTR_NODUMP; 777 stat->attributes_mask |= STATX_ATTR_NODUMP; 778 779 return 0; 780 } 781 782 static int bch2_setattr(struct mnt_idmap *idmap, 783 struct dentry *dentry, struct iattr *iattr) 784 { 785 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); 786 int ret; 787 788 lockdep_assert_held(&inode->v.i_rwsem); 789 790 ret = setattr_prepare(idmap, dentry, iattr); 791 if (ret) 792 return ret; 793 794 return iattr->ia_valid & ATTR_SIZE 795 ? bchfs_truncate(idmap, inode, iattr) 796 : bch2_setattr_nonsize(idmap, inode, iattr); 797 } 798 799 static int bch2_tmpfile(struct mnt_idmap *idmap, 800 struct inode *vdir, struct file *file, umode_t mode) 801 { 802 struct bch_inode_info *inode = 803 __bch2_create(idmap, to_bch_ei(vdir), 804 file->f_path.dentry, mode, 0, 805 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); 806 807 if (IS_ERR(inode)) 808 return bch2_err_class(PTR_ERR(inode)); 809 810 d_mark_tmpfile(file, &inode->v); 811 d_instantiate(file->f_path.dentry, &inode->v); 812 return finish_open_simple(file, 0); 813 } 814 815 static int bch2_fill_extent(struct bch_fs *c, 816 struct fiemap_extent_info *info, 817 struct bkey_s_c k, unsigned flags) 818 { 819 if (bkey_extent_is_direct_data(k.k)) { 820 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 821 const union bch_extent_entry *entry; 822 struct extent_ptr_decoded p; 823 int ret; 824 825 if (k.k->type == KEY_TYPE_reflink_v) 826 flags |= FIEMAP_EXTENT_SHARED; 827 828 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 829 int flags2 = 0; 830 u64 offset = p.ptr.offset; 831 832 if (p.ptr.unwritten) 833 flags2 |= FIEMAP_EXTENT_UNWRITTEN; 834 835 if (p.crc.compression_type) 836 flags2 |= FIEMAP_EXTENT_ENCODED; 837 else 838 offset += p.crc.offset; 839 840 if ((offset & (block_sectors(c) - 1)) || 841 (k.k->size & (block_sectors(c) - 1))) 842 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; 843 844 ret = fiemap_fill_next_extent(info, 845 bkey_start_offset(k.k) << 9, 846 offset << 9, 847 k.k->size << 9, flags|flags2); 848 if (ret) 849 return ret; 850 } 851 852 return 0; 853 } else if (bkey_extent_is_inline_data(k.k)) { 854 return fiemap_fill_next_extent(info, 855 bkey_start_offset(k.k) << 9, 856 0, k.k->size << 9, 857 flags| 858 FIEMAP_EXTENT_DATA_INLINE); 859 } else if (k.k->type == KEY_TYPE_reservation) { 860 return fiemap_fill_next_extent(info, 861 bkey_start_offset(k.k) << 9, 862 0, k.k->size << 9, 863 flags| 864 FIEMAP_EXTENT_DELALLOC| 865 FIEMAP_EXTENT_UNWRITTEN); 866 } else { 867 BUG(); 868 } 869 } 870 871 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, 872 u64 start, u64 len) 873 { 874 struct bch_fs *c = vinode->i_sb->s_fs_info; 875 struct bch_inode_info *ei = to_bch_ei(vinode); 876 struct btree_trans *trans; 877 struct btree_iter iter; 878 struct bkey_s_c k; 879 struct bkey_buf cur, prev; 880 struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); 881 unsigned offset_into_extent, sectors; 882 bool have_extent = false; 883 u32 snapshot; 884 int ret = 0; 885 886 ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); 887 if (ret) 888 return ret; 889 890 if (start + len < start) 891 return -EINVAL; 892 893 start >>= 9; 894 895 bch2_bkey_buf_init(&cur); 896 bch2_bkey_buf_init(&prev); 897 trans = bch2_trans_get(c); 898 retry: 899 bch2_trans_begin(trans); 900 901 ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot); 902 if (ret) 903 goto err; 904 905 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 906 SPOS(ei->v.i_ino, start, snapshot), 0); 907 908 while (!(ret = btree_trans_too_many_iters(trans)) && 909 (k = bch2_btree_iter_peek_upto(&iter, end)).k && 910 !(ret = bkey_err(k))) { 911 enum btree_id data_btree = BTREE_ID_extents; 912 913 if (!bkey_extent_is_data(k.k) && 914 k.k->type != KEY_TYPE_reservation) { 915 bch2_btree_iter_advance(&iter); 916 continue; 917 } 918 919 offset_into_extent = iter.pos.offset - 920 bkey_start_offset(k.k); 921 sectors = k.k->size - offset_into_extent; 922 923 bch2_bkey_buf_reassemble(&cur, c, k); 924 925 ret = bch2_read_indirect_extent(trans, &data_btree, 926 &offset_into_extent, &cur); 927 if (ret) 928 break; 929 930 k = bkey_i_to_s_c(cur.k); 931 bch2_bkey_buf_realloc(&prev, c, k.k->u64s); 932 933 sectors = min(sectors, k.k->size - offset_into_extent); 934 935 bch2_cut_front(POS(k.k->p.inode, 936 bkey_start_offset(k.k) + 937 offset_into_extent), 938 cur.k); 939 bch2_key_resize(&cur.k->k, sectors); 940 cur.k->k.p = iter.pos; 941 cur.k->k.p.offset += cur.k->k.size; 942 943 if (have_extent) { 944 bch2_trans_unlock(trans); 945 ret = bch2_fill_extent(c, info, 946 bkey_i_to_s_c(prev.k), 0); 947 if (ret) 948 break; 949 } 950 951 bkey_copy(prev.k, cur.k); 952 have_extent = true; 953 954 bch2_btree_iter_set_pos(&iter, 955 POS(iter.pos.inode, iter.pos.offset + sectors)); 956 } 957 start = iter.pos.offset; 958 bch2_trans_iter_exit(trans, &iter); 959 err: 960 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 961 goto retry; 962 963 if (!ret && have_extent) { 964 bch2_trans_unlock(trans); 965 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), 966 FIEMAP_EXTENT_LAST); 967 } 968 969 bch2_trans_put(trans); 970 bch2_bkey_buf_exit(&cur, c); 971 bch2_bkey_buf_exit(&prev, c); 972 return ret < 0 ? ret : 0; 973 } 974 975 static const struct vm_operations_struct bch_vm_ops = { 976 .fault = bch2_page_fault, 977 .map_pages = filemap_map_pages, 978 .page_mkwrite = bch2_page_mkwrite, 979 }; 980 981 static int bch2_mmap(struct file *file, struct vm_area_struct *vma) 982 { 983 file_accessed(file); 984 985 vma->vm_ops = &bch_vm_ops; 986 return 0; 987 } 988 989 /* Directories: */ 990 991 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) 992 { 993 return generic_file_llseek_size(file, offset, whence, 994 S64_MAX, S64_MAX); 995 } 996 997 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) 998 { 999 struct bch_inode_info *inode = file_bch_inode(file); 1000 struct bch_fs *c = inode->v.i_sb->s_fs_info; 1001 int ret; 1002 1003 if (!dir_emit_dots(file, ctx)) 1004 return 0; 1005 1006 ret = bch2_readdir(c, inode_inum(inode), ctx); 1007 if (ret) 1008 bch_err_fn(c, ret); 1009 1010 return bch2_err_class(ret); 1011 } 1012 1013 static const struct file_operations bch_file_operations = { 1014 .llseek = bch2_llseek, 1015 .read_iter = bch2_read_iter, 1016 .write_iter = bch2_write_iter, 1017 .mmap = bch2_mmap, 1018 .open = generic_file_open, 1019 .fsync = bch2_fsync, 1020 .splice_read = filemap_splice_read, 1021 .splice_write = iter_file_splice_write, 1022 .fallocate = bch2_fallocate_dispatch, 1023 .unlocked_ioctl = bch2_fs_file_ioctl, 1024 #ifdef CONFIG_COMPAT 1025 .compat_ioctl = bch2_compat_fs_ioctl, 1026 #endif 1027 .remap_file_range = bch2_remap_file_range, 1028 }; 1029 1030 static const struct inode_operations bch_file_inode_operations = { 1031 .getattr = bch2_getattr, 1032 .setattr = bch2_setattr, 1033 .fiemap = bch2_fiemap, 1034 .listxattr = bch2_xattr_list, 1035 #ifdef CONFIG_BCACHEFS_POSIX_ACL 1036 .get_acl = bch2_get_acl, 1037 .set_acl = bch2_set_acl, 1038 #endif 1039 }; 1040 1041 static const struct inode_operations bch_dir_inode_operations = { 1042 .lookup = bch2_lookup, 1043 .create = bch2_create, 1044 .link = bch2_link, 1045 .unlink = bch2_unlink, 1046 .symlink = bch2_symlink, 1047 .mkdir = bch2_mkdir, 1048 .rmdir = bch2_unlink, 1049 .mknod = bch2_mknod, 1050 .rename = bch2_rename2, 1051 .getattr = bch2_getattr, 1052 .setattr = bch2_setattr, 1053 .tmpfile = bch2_tmpfile, 1054 .listxattr = bch2_xattr_list, 1055 #ifdef CONFIG_BCACHEFS_POSIX_ACL 1056 .get_acl = bch2_get_acl, 1057 .set_acl = bch2_set_acl, 1058 #endif 1059 }; 1060 1061 static const struct file_operations bch_dir_file_operations = { 1062 .llseek = bch2_dir_llseek, 1063 .read = generic_read_dir, 1064 .iterate_shared = bch2_vfs_readdir, 1065 .fsync = bch2_fsync, 1066 .unlocked_ioctl = bch2_fs_file_ioctl, 1067 #ifdef CONFIG_COMPAT 1068 .compat_ioctl = bch2_compat_fs_ioctl, 1069 #endif 1070 }; 1071 1072 static const struct inode_operations bch_symlink_inode_operations = { 1073 .get_link = page_get_link, 1074 .getattr = bch2_getattr, 1075 .setattr = bch2_setattr, 1076 .listxattr = bch2_xattr_list, 1077 #ifdef CONFIG_BCACHEFS_POSIX_ACL 1078 .get_acl = bch2_get_acl, 1079 .set_acl = bch2_set_acl, 1080 #endif 1081 }; 1082 1083 static const struct inode_operations bch_special_inode_operations = { 1084 .getattr = bch2_getattr, 1085 .setattr = bch2_setattr, 1086 .listxattr = bch2_xattr_list, 1087 #ifdef CONFIG_BCACHEFS_POSIX_ACL 1088 .get_acl = bch2_get_acl, 1089 .set_acl = bch2_set_acl, 1090 #endif 1091 }; 1092 1093 static const struct address_space_operations bch_address_space_operations = { 1094 .read_folio = bch2_read_folio, 1095 .writepages = bch2_writepages, 1096 .readahead = bch2_readahead, 1097 .dirty_folio = filemap_dirty_folio, 1098 .write_begin = bch2_write_begin, 1099 .write_end = bch2_write_end, 1100 .invalidate_folio = bch2_invalidate_folio, 1101 .release_folio = bch2_release_folio, 1102 .direct_IO = noop_direct_IO, 1103 #ifdef CONFIG_MIGRATION 1104 .migrate_folio = filemap_migrate_folio, 1105 #endif 1106 .error_remove_page = generic_error_remove_page, 1107 }; 1108 1109 struct bcachefs_fid { 1110 u64 inum; 1111 u32 subvol; 1112 u32 gen; 1113 } __packed; 1114 1115 struct bcachefs_fid_with_parent { 1116 struct bcachefs_fid fid; 1117 struct bcachefs_fid dir; 1118 } __packed; 1119 1120 static int bcachefs_fid_valid(int fh_len, int fh_type) 1121 { 1122 switch (fh_type) { 1123 case FILEID_BCACHEFS_WITHOUT_PARENT: 1124 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32); 1125 case FILEID_BCACHEFS_WITH_PARENT: 1126 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32); 1127 default: 1128 return false; 1129 } 1130 } 1131 1132 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode) 1133 { 1134 return (struct bcachefs_fid) { 1135 .inum = inode->ei_inode.bi_inum, 1136 .subvol = inode->ei_subvol, 1137 .gen = inode->ei_inode.bi_generation, 1138 }; 1139 } 1140 1141 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, 1142 struct inode *vdir) 1143 { 1144 struct bch_inode_info *inode = to_bch_ei(vinode); 1145 struct bch_inode_info *dir = to_bch_ei(vdir); 1146 1147 if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32)) 1148 return FILEID_INVALID; 1149 1150 if (!S_ISDIR(inode->v.i_mode) && dir) { 1151 struct bcachefs_fid_with_parent *fid = (void *) fh; 1152 1153 fid->fid = bch2_inode_to_fid(inode); 1154 fid->dir = bch2_inode_to_fid(dir); 1155 1156 *len = sizeof(*fid) / sizeof(u32); 1157 return FILEID_BCACHEFS_WITH_PARENT; 1158 } else { 1159 struct bcachefs_fid *fid = (void *) fh; 1160 1161 *fid = bch2_inode_to_fid(inode); 1162 1163 *len = sizeof(*fid) / sizeof(u32); 1164 return FILEID_BCACHEFS_WITHOUT_PARENT; 1165 } 1166 } 1167 1168 static struct inode *bch2_nfs_get_inode(struct super_block *sb, 1169 struct bcachefs_fid fid) 1170 { 1171 struct bch_fs *c = sb->s_fs_info; 1172 struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) { 1173 .subvol = fid.subvol, 1174 .inum = fid.inum, 1175 }); 1176 if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) { 1177 iput(vinode); 1178 vinode = ERR_PTR(-ESTALE); 1179 } 1180 return vinode; 1181 } 1182 1183 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid, 1184 int fh_len, int fh_type) 1185 { 1186 struct bcachefs_fid *fid = (void *) _fid; 1187 1188 if (!bcachefs_fid_valid(fh_len, fh_type)) 1189 return NULL; 1190 1191 return d_obtain_alias(bch2_nfs_get_inode(sb, *fid)); 1192 } 1193 1194 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid, 1195 int fh_len, int fh_type) 1196 { 1197 struct bcachefs_fid_with_parent *fid = (void *) _fid; 1198 1199 if (!bcachefs_fid_valid(fh_len, fh_type) || 1200 fh_type != FILEID_BCACHEFS_WITH_PARENT) 1201 return NULL; 1202 1203 return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir)); 1204 } 1205 1206 static struct dentry *bch2_get_parent(struct dentry *child) 1207 { 1208 struct bch_inode_info *inode = to_bch_ei(child->d_inode); 1209 struct bch_fs *c = inode->v.i_sb->s_fs_info; 1210 subvol_inum parent_inum = { 1211 .subvol = inode->ei_inode.bi_parent_subvol ?: 1212 inode->ei_subvol, 1213 .inum = inode->ei_inode.bi_dir, 1214 }; 1215 1216 return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum)); 1217 } 1218 1219 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child) 1220 { 1221 struct bch_inode_info *inode = to_bch_ei(child->d_inode); 1222 struct bch_inode_info *dir = to_bch_ei(parent->d_inode); 1223 struct bch_fs *c = inode->v.i_sb->s_fs_info; 1224 struct btree_trans *trans; 1225 struct btree_iter iter1; 1226 struct btree_iter iter2; 1227 struct bkey_s_c k; 1228 struct bkey_s_c_dirent d; 1229 struct bch_inode_unpacked inode_u; 1230 subvol_inum target; 1231 u32 snapshot; 1232 struct qstr dirent_name; 1233 unsigned name_len = 0; 1234 int ret; 1235 1236 if (!S_ISDIR(dir->v.i_mode)) 1237 return -EINVAL; 1238 1239 trans = bch2_trans_get(c); 1240 1241 bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, 1242 POS(dir->ei_inode.bi_inum, 0), 0); 1243 bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, 1244 POS(dir->ei_inode.bi_inum, 0), 0); 1245 retry: 1246 bch2_trans_begin(trans); 1247 1248 ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot); 1249 if (ret) 1250 goto err; 1251 1252 bch2_btree_iter_set_snapshot(&iter1, snapshot); 1253 bch2_btree_iter_set_snapshot(&iter2, snapshot); 1254 1255 ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); 1256 if (ret) 1257 goto err; 1258 1259 if (inode_u.bi_dir == dir->ei_inode.bi_inum) { 1260 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); 1261 1262 k = bch2_btree_iter_peek_slot(&iter1); 1263 ret = bkey_err(k); 1264 if (ret) 1265 goto err; 1266 1267 if (k.k->type != KEY_TYPE_dirent) { 1268 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 1269 goto err; 1270 } 1271 1272 d = bkey_s_c_to_dirent(k); 1273 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); 1274 if (ret > 0) 1275 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 1276 if (ret) 1277 goto err; 1278 1279 if (target.subvol == inode->ei_subvol && 1280 target.inum == inode->ei_inode.bi_inum) 1281 goto found; 1282 } else { 1283 /* 1284 * File with multiple hardlinks and our backref is to the wrong 1285 * directory - linear search: 1286 */ 1287 for_each_btree_key_continue_norestart(iter2, 0, k, ret) { 1288 if (k.k->p.inode > dir->ei_inode.bi_inum) 1289 break; 1290 1291 if (k.k->type != KEY_TYPE_dirent) 1292 continue; 1293 1294 d = bkey_s_c_to_dirent(k); 1295 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); 1296 if (ret < 0) 1297 break; 1298 if (ret) 1299 continue; 1300 1301 if (target.subvol == inode->ei_subvol && 1302 target.inum == inode->ei_inode.bi_inum) 1303 goto found; 1304 } 1305 } 1306 1307 ret = -ENOENT; 1308 goto err; 1309 found: 1310 dirent_name = bch2_dirent_get_name(d); 1311 1312 name_len = min_t(unsigned, dirent_name.len, NAME_MAX); 1313 memcpy(name, dirent_name.name, name_len); 1314 name[name_len] = '\0'; 1315 err: 1316 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1317 goto retry; 1318 1319 bch2_trans_iter_exit(trans, &iter1); 1320 bch2_trans_iter_exit(trans, &iter2); 1321 bch2_trans_put(trans); 1322 1323 return ret; 1324 } 1325 1326 static const struct export_operations bch_export_ops = { 1327 .encode_fh = bch2_encode_fh, 1328 .fh_to_dentry = bch2_fh_to_dentry, 1329 .fh_to_parent = bch2_fh_to_parent, 1330 .get_parent = bch2_get_parent, 1331 .get_name = bch2_get_name, 1332 }; 1333 1334 static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, 1335 struct bch_inode_info *inode, 1336 struct bch_inode_unpacked *bi, 1337 struct bch_subvolume *subvol) 1338 { 1339 bch2_inode_update_after_write(trans, inode, bi, ~0); 1340 1341 if (BCH_SUBVOLUME_SNAP(subvol)) 1342 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); 1343 else 1344 clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); 1345 1346 inode->v.i_blocks = bi->bi_sectors; 1347 inode->v.i_ino = bi->bi_inum; 1348 inode->v.i_rdev = bi->bi_dev; 1349 inode->v.i_generation = bi->bi_generation; 1350 inode->v.i_size = bi->bi_size; 1351 1352 inode->ei_flags = 0; 1353 inode->ei_quota_reserved = 0; 1354 inode->ei_qid = bch_qid(bi); 1355 inode->ei_subvol = inum.subvol; 1356 1357 inode->v.i_mapping->a_ops = &bch_address_space_operations; 1358 1359 switch (inode->v.i_mode & S_IFMT) { 1360 case S_IFREG: 1361 inode->v.i_op = &bch_file_inode_operations; 1362 inode->v.i_fop = &bch_file_operations; 1363 break; 1364 case S_IFDIR: 1365 inode->v.i_op = &bch_dir_inode_operations; 1366 inode->v.i_fop = &bch_dir_file_operations; 1367 break; 1368 case S_IFLNK: 1369 inode_nohighmem(&inode->v); 1370 inode->v.i_op = &bch_symlink_inode_operations; 1371 break; 1372 default: 1373 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev); 1374 inode->v.i_op = &bch_special_inode_operations; 1375 break; 1376 } 1377 1378 mapping_set_large_folios(inode->v.i_mapping); 1379 } 1380 1381 static struct inode *bch2_alloc_inode(struct super_block *sb) 1382 { 1383 struct bch_inode_info *inode; 1384 1385 inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS); 1386 if (!inode) 1387 return NULL; 1388 1389 inode_init_once(&inode->v); 1390 mutex_init(&inode->ei_update_lock); 1391 two_state_lock_init(&inode->ei_pagecache_lock); 1392 INIT_LIST_HEAD(&inode->ei_vfs_inode_list); 1393 mutex_init(&inode->ei_quota_lock); 1394 1395 return &inode->v; 1396 } 1397 1398 static void bch2_i_callback(struct rcu_head *head) 1399 { 1400 struct inode *vinode = container_of(head, struct inode, i_rcu); 1401 struct bch_inode_info *inode = to_bch_ei(vinode); 1402 1403 kmem_cache_free(bch2_inode_cache, inode); 1404 } 1405 1406 static void bch2_destroy_inode(struct inode *vinode) 1407 { 1408 call_rcu(&vinode->i_rcu, bch2_i_callback); 1409 } 1410 1411 static int inode_update_times_fn(struct btree_trans *trans, 1412 struct bch_inode_info *inode, 1413 struct bch_inode_unpacked *bi, 1414 void *p) 1415 { 1416 struct bch_fs *c = inode->v.i_sb->s_fs_info; 1417 1418 bi->bi_atime = timespec_to_bch2_time(c, inode_get_atime(&inode->v)); 1419 bi->bi_mtime = timespec_to_bch2_time(c, inode_get_mtime(&inode->v)); 1420 bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v)); 1421 1422 return 0; 1423 } 1424 1425 static int bch2_vfs_write_inode(struct inode *vinode, 1426 struct writeback_control *wbc) 1427 { 1428 struct bch_fs *c = vinode->i_sb->s_fs_info; 1429 struct bch_inode_info *inode = to_bch_ei(vinode); 1430 int ret; 1431 1432 mutex_lock(&inode->ei_update_lock); 1433 ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 1434 ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); 1435 mutex_unlock(&inode->ei_update_lock); 1436 1437 return bch2_err_class(ret); 1438 } 1439 1440 static void bch2_evict_inode(struct inode *vinode) 1441 { 1442 struct bch_fs *c = vinode->i_sb->s_fs_info; 1443 struct bch_inode_info *inode = to_bch_ei(vinode); 1444 1445 truncate_inode_pages_final(&inode->v.i_data); 1446 1447 clear_inode(&inode->v); 1448 1449 BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); 1450 1451 if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { 1452 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), 1453 KEY_TYPE_QUOTA_WARN); 1454 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, 1455 KEY_TYPE_QUOTA_WARN); 1456 bch2_inode_rm(c, inode_inum(inode)); 1457 } 1458 1459 mutex_lock(&c->vfs_inodes_lock); 1460 list_del_init(&inode->ei_vfs_inode_list); 1461 mutex_unlock(&c->vfs_inodes_lock); 1462 } 1463 1464 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) 1465 { 1466 struct bch_inode_info *inode, **i; 1467 DARRAY(struct bch_inode_info *) grabbed; 1468 bool clean_pass = false, this_pass_clean; 1469 1470 /* 1471 * Initially, we scan for inodes without I_DONTCACHE, then mark them to 1472 * be pruned with d_mark_dontcache(). 1473 * 1474 * Once we've had a clean pass where we didn't find any inodes without 1475 * I_DONTCACHE, we wait for them to be freed: 1476 */ 1477 1478 darray_init(&grabbed); 1479 darray_make_room(&grabbed, 1024); 1480 again: 1481 cond_resched(); 1482 this_pass_clean = true; 1483 1484 mutex_lock(&c->vfs_inodes_lock); 1485 list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) { 1486 if (!snapshot_list_has_id(s, inode->ei_subvol)) 1487 continue; 1488 1489 if (!(inode->v.i_state & I_DONTCACHE) && 1490 !(inode->v.i_state & I_FREEING) && 1491 igrab(&inode->v)) { 1492 this_pass_clean = false; 1493 1494 if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) { 1495 iput(&inode->v); 1496 break; 1497 } 1498 } else if (clean_pass && this_pass_clean) { 1499 wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); 1500 DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); 1501 1502 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 1503 mutex_unlock(&c->vfs_inodes_lock); 1504 1505 schedule(); 1506 finish_wait(wq, &wait.wq_entry); 1507 goto again; 1508 } 1509 } 1510 mutex_unlock(&c->vfs_inodes_lock); 1511 1512 darray_for_each(grabbed, i) { 1513 inode = *i; 1514 d_mark_dontcache(&inode->v); 1515 d_prune_aliases(&inode->v); 1516 iput(&inode->v); 1517 } 1518 grabbed.nr = 0; 1519 1520 if (!clean_pass || !this_pass_clean) { 1521 clean_pass = this_pass_clean; 1522 goto again; 1523 } 1524 1525 darray_exit(&grabbed); 1526 } 1527 1528 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) 1529 { 1530 struct super_block *sb = dentry->d_sb; 1531 struct bch_fs *c = sb->s_fs_info; 1532 struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); 1533 unsigned shift = sb->s_blocksize_bits - 9; 1534 /* 1535 * this assumes inodes take up 64 bytes, which is a decent average 1536 * number: 1537 */ 1538 u64 avail_inodes = ((usage.capacity - usage.used) << 3); 1539 u64 fsid; 1540 1541 buf->f_type = BCACHEFS_STATFS_MAGIC; 1542 buf->f_bsize = sb->s_blocksize; 1543 buf->f_blocks = usage.capacity >> shift; 1544 buf->f_bfree = usage.free >> shift; 1545 buf->f_bavail = avail_factor(usage.free) >> shift; 1546 1547 buf->f_files = usage.nr_inodes + avail_inodes; 1548 buf->f_ffree = avail_inodes; 1549 1550 fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ 1551 le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); 1552 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 1553 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 1554 buf->f_namelen = BCH_NAME_MAX; 1555 1556 return 0; 1557 } 1558 1559 static int bch2_sync_fs(struct super_block *sb, int wait) 1560 { 1561 struct bch_fs *c = sb->s_fs_info; 1562 int ret; 1563 1564 if (c->opts.journal_flush_disabled) 1565 return 0; 1566 1567 if (!wait) { 1568 bch2_journal_flush_async(&c->journal, NULL); 1569 return 0; 1570 } 1571 1572 ret = bch2_journal_flush(&c->journal); 1573 return bch2_err_class(ret); 1574 } 1575 1576 static struct bch_fs *bch2_path_to_fs(const char *path) 1577 { 1578 struct bch_fs *c; 1579 dev_t dev; 1580 int ret; 1581 1582 ret = lookup_bdev(path, &dev); 1583 if (ret) 1584 return ERR_PTR(ret); 1585 1586 c = bch2_dev_to_fs(dev); 1587 if (c) 1588 closure_put(&c->cl); 1589 return c ?: ERR_PTR(-ENOENT); 1590 } 1591 1592 static char **split_devs(const char *_dev_name, unsigned *nr) 1593 { 1594 char *dev_name = NULL, **devs = NULL, *s; 1595 size_t i = 0, nr_devs = 0; 1596 1597 dev_name = kstrdup(_dev_name, GFP_KERNEL); 1598 if (!dev_name) 1599 return NULL; 1600 1601 for (s = dev_name; s; s = strchr(s + 1, ':')) 1602 nr_devs++; 1603 1604 devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); 1605 if (!devs) { 1606 kfree(dev_name); 1607 return NULL; 1608 } 1609 1610 while ((s = strsep(&dev_name, ":"))) 1611 devs[i++] = s; 1612 1613 *nr = nr_devs; 1614 return devs; 1615 } 1616 1617 static int bch2_remount(struct super_block *sb, int *flags, char *data) 1618 { 1619 struct bch_fs *c = sb->s_fs_info; 1620 struct bch_opts opts = bch2_opts_empty(); 1621 int ret; 1622 1623 opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); 1624 1625 ret = bch2_parse_mount_opts(c, &opts, data); 1626 if (ret) 1627 goto err; 1628 1629 if (opts.read_only != c->opts.read_only) { 1630 down_write(&c->state_lock); 1631 1632 if (opts.read_only) { 1633 bch2_fs_read_only(c); 1634 1635 sb->s_flags |= SB_RDONLY; 1636 } else { 1637 ret = bch2_fs_read_write(c); 1638 if (ret) { 1639 bch_err(c, "error going rw: %i", ret); 1640 up_write(&c->state_lock); 1641 ret = -EINVAL; 1642 goto err; 1643 } 1644 1645 sb->s_flags &= ~SB_RDONLY; 1646 } 1647 1648 c->opts.read_only = opts.read_only; 1649 1650 up_write(&c->state_lock); 1651 } 1652 1653 if (opt_defined(opts, errors)) 1654 c->opts.errors = opts.errors; 1655 err: 1656 return bch2_err_class(ret); 1657 } 1658 1659 static int bch2_show_devname(struct seq_file *seq, struct dentry *root) 1660 { 1661 struct bch_fs *c = root->d_sb->s_fs_info; 1662 struct bch_dev *ca; 1663 unsigned i; 1664 bool first = true; 1665 1666 for_each_online_member(ca, c, i) { 1667 if (!first) 1668 seq_putc(seq, ':'); 1669 first = false; 1670 seq_puts(seq, ca->disk_sb.sb_name); 1671 } 1672 1673 return 0; 1674 } 1675 1676 static int bch2_show_options(struct seq_file *seq, struct dentry *root) 1677 { 1678 struct bch_fs *c = root->d_sb->s_fs_info; 1679 enum bch_opt_id i; 1680 struct printbuf buf = PRINTBUF; 1681 int ret = 0; 1682 1683 for (i = 0; i < bch2_opts_nr; i++) { 1684 const struct bch_option *opt = &bch2_opt_table[i]; 1685 u64 v = bch2_opt_get_by_id(&c->opts, i); 1686 1687 if (!(opt->flags & OPT_MOUNT)) 1688 continue; 1689 1690 if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) 1691 continue; 1692 1693 printbuf_reset(&buf); 1694 bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v, 1695 OPT_SHOW_MOUNT_STYLE); 1696 seq_putc(seq, ','); 1697 seq_puts(seq, buf.buf); 1698 } 1699 1700 if (buf.allocation_failure) 1701 ret = -ENOMEM; 1702 printbuf_exit(&buf); 1703 return ret; 1704 } 1705 1706 static void bch2_put_super(struct super_block *sb) 1707 { 1708 struct bch_fs *c = sb->s_fs_info; 1709 1710 __bch2_fs_stop(c); 1711 } 1712 1713 /* 1714 * bcachefs doesn't currently integrate intwrite freeze protection but the 1715 * internal write references serve the same purpose. Therefore reuse the 1716 * read-only transition code to perform the quiesce. The caveat is that we don't 1717 * currently have the ability to block tasks that want a write reference while 1718 * the superblock is frozen. This is fine for now, but we should either add 1719 * blocking support or find a way to integrate sb_start_intwrite() and friends. 1720 */ 1721 static int bch2_freeze(struct super_block *sb) 1722 { 1723 struct bch_fs *c = sb->s_fs_info; 1724 1725 down_write(&c->state_lock); 1726 bch2_fs_read_only(c); 1727 up_write(&c->state_lock); 1728 return 0; 1729 } 1730 1731 static int bch2_unfreeze(struct super_block *sb) 1732 { 1733 struct bch_fs *c = sb->s_fs_info; 1734 int ret; 1735 1736 down_write(&c->state_lock); 1737 ret = bch2_fs_read_write(c); 1738 up_write(&c->state_lock); 1739 return ret; 1740 } 1741 1742 static const struct super_operations bch_super_operations = { 1743 .alloc_inode = bch2_alloc_inode, 1744 .destroy_inode = bch2_destroy_inode, 1745 .write_inode = bch2_vfs_write_inode, 1746 .evict_inode = bch2_evict_inode, 1747 .sync_fs = bch2_sync_fs, 1748 .statfs = bch2_statfs, 1749 .show_devname = bch2_show_devname, 1750 .show_options = bch2_show_options, 1751 .remount_fs = bch2_remount, 1752 .put_super = bch2_put_super, 1753 .freeze_fs = bch2_freeze, 1754 .unfreeze_fs = bch2_unfreeze, 1755 }; 1756 1757 static int bch2_set_super(struct super_block *s, void *data) 1758 { 1759 s->s_fs_info = data; 1760 return 0; 1761 } 1762 1763 static int bch2_noset_super(struct super_block *s, void *data) 1764 { 1765 return -EBUSY; 1766 } 1767 1768 static int bch2_test_super(struct super_block *s, void *data) 1769 { 1770 struct bch_fs *c = s->s_fs_info; 1771 struct bch_fs **devs = data; 1772 unsigned i; 1773 1774 if (!c) 1775 return false; 1776 1777 for (i = 0; devs[i]; i++) 1778 if (c != devs[i]) 1779 return false; 1780 return true; 1781 } 1782 1783 static struct dentry *bch2_mount(struct file_system_type *fs_type, 1784 int flags, const char *dev_name, void *data) 1785 { 1786 struct bch_fs *c; 1787 struct bch_dev *ca; 1788 struct super_block *sb; 1789 struct inode *vinode; 1790 struct bch_opts opts = bch2_opts_empty(); 1791 char **devs; 1792 struct bch_fs **devs_to_fs = NULL; 1793 unsigned i, nr_devs; 1794 int ret; 1795 1796 opt_set(opts, read_only, (flags & SB_RDONLY) != 0); 1797 1798 ret = bch2_parse_mount_opts(NULL, &opts, data); 1799 if (ret) 1800 return ERR_PTR(ret); 1801 1802 if (!dev_name || strlen(dev_name) == 0) 1803 return ERR_PTR(-EINVAL); 1804 1805 devs = split_devs(dev_name, &nr_devs); 1806 if (!devs) 1807 return ERR_PTR(-ENOMEM); 1808 1809 devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); 1810 if (!devs_to_fs) { 1811 sb = ERR_PTR(-ENOMEM); 1812 goto got_sb; 1813 } 1814 1815 for (i = 0; i < nr_devs; i++) 1816 devs_to_fs[i] = bch2_path_to_fs(devs[i]); 1817 1818 sb = sget(fs_type, bch2_test_super, bch2_noset_super, 1819 flags|SB_NOSEC, devs_to_fs); 1820 if (!IS_ERR(sb)) 1821 goto got_sb; 1822 1823 c = bch2_fs_open(devs, nr_devs, opts); 1824 if (IS_ERR(c)) { 1825 sb = ERR_CAST(c); 1826 goto got_sb; 1827 } 1828 1829 /* Some options can't be parsed until after the fs is started: */ 1830 ret = bch2_parse_mount_opts(c, &opts, data); 1831 if (ret) { 1832 bch2_fs_stop(c); 1833 sb = ERR_PTR(ret); 1834 goto got_sb; 1835 } 1836 1837 bch2_opts_apply(&c->opts, opts); 1838 1839 sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); 1840 if (IS_ERR(sb)) 1841 bch2_fs_stop(c); 1842 got_sb: 1843 kfree(devs_to_fs); 1844 kfree(devs[0]); 1845 kfree(devs); 1846 1847 if (IS_ERR(sb)) { 1848 ret = PTR_ERR(sb); 1849 ret = bch2_err_class(ret); 1850 return ERR_PTR(ret); 1851 } 1852 1853 c = sb->s_fs_info; 1854 1855 if (sb->s_root) { 1856 if ((flags ^ sb->s_flags) & SB_RDONLY) { 1857 ret = -EBUSY; 1858 goto err_put_super; 1859 } 1860 goto out; 1861 } 1862 1863 sb->s_blocksize = block_bytes(c); 1864 sb->s_blocksize_bits = ilog2(block_bytes(c)); 1865 sb->s_maxbytes = MAX_LFS_FILESIZE; 1866 sb->s_op = &bch_super_operations; 1867 sb->s_export_op = &bch_export_ops; 1868 #ifdef CONFIG_BCACHEFS_QUOTA 1869 sb->s_qcop = &bch2_quotactl_operations; 1870 sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ; 1871 #endif 1872 sb->s_xattr = bch2_xattr_handlers; 1873 sb->s_magic = BCACHEFS_STATFS_MAGIC; 1874 sb->s_time_gran = c->sb.nsec_per_time_unit; 1875 sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; 1876 sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); 1877 c->vfs_sb = sb; 1878 strscpy(sb->s_id, c->name, sizeof(sb->s_id)); 1879 1880 ret = super_setup_bdi(sb); 1881 if (ret) 1882 goto err_put_super; 1883 1884 sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; 1885 1886 for_each_online_member(ca, c, i) { 1887 struct block_device *bdev = ca->disk_sb.bdev; 1888 1889 /* XXX: create an anonymous device for multi device filesystems */ 1890 sb->s_bdev = bdev; 1891 sb->s_dev = bdev->bd_dev; 1892 percpu_ref_put(&ca->io_ref); 1893 break; 1894 } 1895 1896 c->dev = sb->s_dev; 1897 1898 #ifdef CONFIG_BCACHEFS_POSIX_ACL 1899 if (c->opts.acl) 1900 sb->s_flags |= SB_POSIXACL; 1901 #endif 1902 1903 sb->s_shrink->seeks = 0; 1904 1905 vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); 1906 ret = PTR_ERR_OR_ZERO(vinode); 1907 if (ret) { 1908 bch_err_msg(c, ret, "mounting: error getting root inode"); 1909 goto err_put_super; 1910 } 1911 1912 sb->s_root = d_make_root(vinode); 1913 if (!sb->s_root) { 1914 bch_err(c, "error mounting: error allocating root dentry"); 1915 ret = -ENOMEM; 1916 goto err_put_super; 1917 } 1918 1919 sb->s_flags |= SB_ACTIVE; 1920 out: 1921 return dget(sb->s_root); 1922 1923 err_put_super: 1924 deactivate_locked_super(sb); 1925 return ERR_PTR(bch2_err_class(ret)); 1926 } 1927 1928 static void bch2_kill_sb(struct super_block *sb) 1929 { 1930 struct bch_fs *c = sb->s_fs_info; 1931 1932 generic_shutdown_super(sb); 1933 bch2_fs_free(c); 1934 } 1935 1936 static struct file_system_type bcache_fs_type = { 1937 .owner = THIS_MODULE, 1938 .name = "bcachefs", 1939 .mount = bch2_mount, 1940 .kill_sb = bch2_kill_sb, 1941 .fs_flags = FS_REQUIRES_DEV, 1942 }; 1943 1944 MODULE_ALIAS_FS("bcachefs"); 1945 1946 void bch2_vfs_exit(void) 1947 { 1948 unregister_filesystem(&bcache_fs_type); 1949 kmem_cache_destroy(bch2_inode_cache); 1950 } 1951 1952 int __init bch2_vfs_init(void) 1953 { 1954 int ret = -ENOMEM; 1955 1956 bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); 1957 if (!bch2_inode_cache) 1958 goto err; 1959 1960 ret = register_filesystem(&bcache_fs_type); 1961 if (ret) 1962 goto err; 1963 1964 return 0; 1965 err: 1966 bch2_vfs_exit(); 1967 return ret; 1968 } 1969 1970 #endif /* NO_BCACHEFS_FS */ 1971