1 // SPDX-License-Identifier: GPL-2.0 2 #ifndef NO_BCACHEFS_FS 3 4 #include "bcachefs.h" 5 #include "chardev.h" 6 #include "dirent.h" 7 #include "fs.h" 8 #include "fs-ioctl.h" 9 #include "namei.h" 10 #include "quota.h" 11 12 #include <linux/compat.h> 13 #include <linux/fsnotify.h> 14 #include <linux/mount.h> 15 #include <linux/namei.h> 16 #include <linux/security.h> 17 #include <linux/writeback.h> 18 19 #define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) 20 #define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ 21 #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ 22 #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ 23 24 struct flags_set { 25 unsigned mask; 26 unsigned flags; 27 28 unsigned projid; 29 30 bool set_projinherit; 31 bool projinherit; 32 }; 33 34 static int bch2_inode_flags_set(struct btree_trans *trans, 35 struct bch_inode_info *inode, 36 struct bch_inode_unpacked *bi, 37 void *p) 38 { 39 struct bch_fs *c = inode->v.i_sb->s_fs_info; 40 /* 41 * We're relying on btree locking here for exclusion with other ioctl 42 * calls - use the flags in the btree (@bi), not inode->i_flags: 43 */ 44 struct flags_set *s = p; 45 unsigned newflags = s->flags; 46 unsigned oldflags = bi->bi_flags & s->mask; 47 48 if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && 49 !capable(CAP_LINUX_IMMUTABLE)) 50 return -EPERM; 51 52 if (!S_ISREG(bi->bi_mode) && 53 !S_ISDIR(bi->bi_mode) && 54 (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) 55 return -EINVAL; 56 57 if ((newflags ^ oldflags) & BCH_INODE_casefolded) { 58 #ifdef CONFIG_UNICODE 59 int ret = 0; 60 /* Not supported on individual files. */ 61 if (!S_ISDIR(bi->bi_mode)) 62 return -EOPNOTSUPP; 63 64 /* 65 * Make sure the dir is empty, as otherwise we'd need to 66 * rehash everything and update the dirent keys. 67 */ 68 ret = bch2_empty_dir_trans(trans, inode_inum(inode)); 69 if (ret < 0) 70 return ret; 71 72 ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding); 73 if (ret) 74 return ret; 75 76 bch2_check_set_feature(c, BCH_FEATURE_casefolding); 77 #else 78 printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); 79 return -EOPNOTSUPP; 80 #endif 81 } 82 83 if (s->set_projinherit) { 84 bi->bi_fields_set &= ~(1 << Inode_opt_project); 85 bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); 86 } 87 88 bi->bi_flags &= ~s->mask; 89 bi->bi_flags |= newflags; 90 91 bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); 92 return 0; 93 } 94 95 static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) 96 { 97 unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); 98 99 return put_user(flags, arg); 100 } 101 102 static int bch2_ioc_setflags(struct bch_fs *c, 103 struct file *file, 104 struct bch_inode_info *inode, 105 void __user *arg) 106 { 107 struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; 108 unsigned uflags; 109 int ret; 110 111 if (get_user(uflags, (int __user *) arg)) 112 return -EFAULT; 113 114 s.flags = map_flags_rev(bch_flags_to_uflags, uflags); 115 if (uflags) 116 return -EOPNOTSUPP; 117 118 ret = mnt_want_write_file(file); 119 if (ret) 120 return ret; 121 122 inode_lock(&inode->v); 123 if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { 124 ret = -EACCES; 125 goto setflags_out; 126 } 127 128 mutex_lock(&inode->ei_update_lock); 129 ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: 130 bch2_write_inode(c, inode, bch2_inode_flags_set, &s, 131 ATTR_CTIME); 132 mutex_unlock(&inode->ei_update_lock); 133 134 setflags_out: 135 inode_unlock(&inode->v); 136 mnt_drop_write_file(file); 137 return ret; 138 } 139 140 static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, 141 struct fsxattr __user *arg) 142 { 143 struct fsxattr fa = { 0 }; 144 145 fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); 146 147 if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) 148 fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; 149 150 fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; 151 152 if (copy_to_user(arg, &fa, sizeof(fa))) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static int fssetxattr_inode_update_fn(struct btree_trans *trans, 159 struct bch_inode_info *inode, 160 struct bch_inode_unpacked *bi, 161 void *p) 162 { 163 struct flags_set *s = p; 164 165 if (s->projid != bi->bi_project) { 166 bi->bi_fields_set |= 1U << Inode_opt_project; 167 bi->bi_project = s->projid; 168 } 169 170 return bch2_inode_flags_set(trans, inode, bi, p); 171 } 172 173 static int bch2_ioc_fssetxattr(struct bch_fs *c, 174 struct file *file, 175 struct bch_inode_info *inode, 176 struct fsxattr __user *arg) 177 { 178 struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; 179 struct fsxattr fa; 180 int ret; 181 182 if (copy_from_user(&fa, arg, sizeof(fa))) 183 return -EFAULT; 184 185 s.set_projinherit = true; 186 s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; 187 fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; 188 189 s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); 190 if (fa.fsx_xflags) 191 return -EOPNOTSUPP; 192 193 if (fa.fsx_projid >= U32_MAX) 194 return -EINVAL; 195 196 /* 197 * inode fields accessible via the xattr interface are stored with a +1 198 * bias, so that 0 means unset: 199 */ 200 s.projid = fa.fsx_projid + 1; 201 202 ret = mnt_want_write_file(file); 203 if (ret) 204 return ret; 205 206 inode_lock(&inode->v); 207 if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { 208 ret = -EACCES; 209 goto err; 210 } 211 212 mutex_lock(&inode->ei_update_lock); 213 ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: 214 bch2_set_projid(c, inode, fa.fsx_projid) ?: 215 bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, 216 ATTR_CTIME); 217 mutex_unlock(&inode->ei_update_lock); 218 err: 219 inode_unlock(&inode->v); 220 mnt_drop_write_file(file); 221 return ret; 222 } 223 224 static int bch2_reinherit_attrs_fn(struct btree_trans *trans, 225 struct bch_inode_info *inode, 226 struct bch_inode_unpacked *bi, 227 void *p) 228 { 229 struct bch_inode_info *dir = p; 230 231 return !bch2_reinherit_attrs(bi, &dir->ei_inode); 232 } 233 234 static int bch2_ioc_reinherit_attrs(struct bch_fs *c, 235 struct file *file, 236 struct bch_inode_info *src, 237 const char __user *name) 238 { 239 struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); 240 struct bch_inode_info *dst; 241 struct inode *vinode = NULL; 242 char *kname = NULL; 243 struct qstr qstr; 244 int ret = 0; 245 subvol_inum inum; 246 247 kname = kmalloc(BCH_NAME_MAX, GFP_KERNEL); 248 if (!kname) 249 return -ENOMEM; 250 251 ret = strncpy_from_user(kname, name, BCH_NAME_MAX); 252 if (unlikely(ret < 0)) 253 goto err1; 254 255 qstr.len = ret; 256 qstr.name = kname; 257 258 ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum); 259 if (ret) 260 goto err1; 261 262 vinode = bch2_vfs_inode_get(c, inum); 263 ret = PTR_ERR_OR_ZERO(vinode); 264 if (ret) 265 goto err1; 266 267 dst = to_bch_ei(vinode); 268 269 ret = mnt_want_write_file(file); 270 if (ret) 271 goto err2; 272 273 bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst); 274 275 if (inode_attr_changing(src, dst, Inode_opt_project)) { 276 ret = bch2_fs_quota_transfer(c, dst, 277 src->ei_qid, 278 1 << QTYP_PRJ, 279 KEY_TYPE_QUOTA_PREALLOC); 280 if (ret) 281 goto err3; 282 } 283 284 ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); 285 err3: 286 bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst); 287 288 /* return true if we did work */ 289 if (ret >= 0) 290 ret = !ret; 291 292 mnt_drop_write_file(file); 293 err2: 294 iput(vinode); 295 err1: 296 kfree(kname); 297 298 return ret; 299 } 300 301 static int bch2_ioc_getversion(struct bch_inode_info *inode, u32 __user *arg) 302 { 303 return put_user(inode->v.i_generation, arg); 304 } 305 306 static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) 307 { 308 int ret; 309 size_t len; 310 char label[BCH_SB_LABEL_SIZE]; 311 312 BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); 313 314 mutex_lock(&c->sb_lock); 315 memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); 316 mutex_unlock(&c->sb_lock); 317 318 len = strnlen(label, BCH_SB_LABEL_SIZE); 319 if (len == BCH_SB_LABEL_SIZE) { 320 bch_warn(c, 321 "label is too long, return the first %zu bytes", 322 --len); 323 } 324 325 ret = copy_to_user(user_label, label, len); 326 327 return ret ? -EFAULT : 0; 328 } 329 330 static int bch2_ioc_setlabel(struct bch_fs *c, 331 struct file *file, 332 struct bch_inode_info *inode, 333 const char __user *user_label) 334 { 335 int ret; 336 char label[BCH_SB_LABEL_SIZE]; 337 338 if (!capable(CAP_SYS_ADMIN)) 339 return -EPERM; 340 341 if (copy_from_user(label, user_label, sizeof(label))) 342 return -EFAULT; 343 344 if (strnlen(label, BCH_SB_LABEL_SIZE) == BCH_SB_LABEL_SIZE) { 345 bch_err(c, 346 "unable to set label with more than %d bytes", 347 BCH_SB_LABEL_SIZE - 1); 348 return -EINVAL; 349 } 350 351 ret = mnt_want_write_file(file); 352 if (ret) 353 return ret; 354 355 mutex_lock(&c->sb_lock); 356 strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); 357 ret = bch2_write_super(c); 358 mutex_unlock(&c->sb_lock); 359 360 mnt_drop_write_file(file); 361 return ret; 362 } 363 364 static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) 365 { 366 u32 flags; 367 int ret = 0; 368 369 if (!capable(CAP_SYS_ADMIN)) 370 return -EPERM; 371 372 if (get_user(flags, arg)) 373 return -EFAULT; 374 375 bch_notice(c, "shutdown by ioctl type %u", flags); 376 377 switch (flags) { 378 case FSOP_GOING_FLAGS_DEFAULT: 379 ret = bdev_freeze(c->vfs_sb->s_bdev); 380 if (ret) 381 break; 382 bch2_journal_flush(&c->journal); 383 bch2_fs_emergency_read_only(c); 384 bdev_thaw(c->vfs_sb->s_bdev); 385 break; 386 case FSOP_GOING_FLAGS_LOGFLUSH: 387 bch2_journal_flush(&c->journal); 388 fallthrough; 389 case FSOP_GOING_FLAGS_NOLOGFLUSH: 390 bch2_fs_emergency_read_only(c); 391 break; 392 default: 393 ret = -EINVAL; 394 break; 395 } 396 397 return ret; 398 } 399 400 static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, 401 struct bch_ioctl_subvolume arg) 402 { 403 struct inode *dir; 404 struct bch_inode_info *inode; 405 struct user_namespace *s_user_ns; 406 struct dentry *dst_dentry; 407 struct path src_path, dst_path; 408 int how = LOOKUP_FOLLOW; 409 int error; 410 subvol_inum snapshot_src = { 0 }; 411 unsigned lookup_flags = 0; 412 unsigned create_flags = BCH_CREATE_SUBVOL; 413 414 if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE| 415 BCH_SUBVOL_SNAPSHOT_RO)) 416 return -EINVAL; 417 418 if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && 419 (arg.src_ptr || 420 (arg.flags & BCH_SUBVOL_SNAPSHOT_RO))) 421 return -EINVAL; 422 423 if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) 424 create_flags |= BCH_CREATE_SNAPSHOT; 425 426 if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) 427 create_flags |= BCH_CREATE_SNAPSHOT_RO; 428 429 if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { 430 /* sync_inodes_sb enforce s_umount is locked */ 431 down_read(&c->vfs_sb->s_umount); 432 sync_inodes_sb(c->vfs_sb); 433 up_read(&c->vfs_sb->s_umount); 434 } 435 436 if (arg.src_ptr) { 437 error = user_path_at(arg.dirfd, 438 (const char __user *)(unsigned long)arg.src_ptr, 439 how, &src_path); 440 if (error) 441 goto err1; 442 443 if (src_path.dentry->d_sb->s_fs_info != c) { 444 path_put(&src_path); 445 error = -EXDEV; 446 goto err1; 447 } 448 449 snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode)); 450 } 451 452 dst_dentry = user_path_create(arg.dirfd, 453 (const char __user *)(unsigned long)arg.dst_ptr, 454 &dst_path, lookup_flags); 455 error = PTR_ERR_OR_ZERO(dst_dentry); 456 if (error) 457 goto err2; 458 459 if (dst_dentry->d_sb->s_fs_info != c) { 460 error = -EXDEV; 461 goto err3; 462 } 463 464 if (dst_dentry->d_inode) { 465 error = -BCH_ERR_EEXIST_subvolume_create; 466 goto err3; 467 } 468 469 dir = dst_path.dentry->d_inode; 470 if (IS_DEADDIR(dir)) { 471 error = -BCH_ERR_ENOENT_directory_dead; 472 goto err3; 473 } 474 475 s_user_ns = dir->i_sb->s_user_ns; 476 if (!kuid_has_mapping(s_user_ns, current_fsuid()) || 477 !kgid_has_mapping(s_user_ns, current_fsgid())) { 478 error = -EOVERFLOW; 479 goto err3; 480 } 481 482 error = inode_permission(file_mnt_idmap(filp), 483 dir, MAY_WRITE | MAY_EXEC); 484 if (error) 485 goto err3; 486 487 if (!IS_POSIXACL(dir)) 488 arg.mode &= ~current_umask(); 489 490 error = security_path_mkdir(&dst_path, dst_dentry, arg.mode); 491 if (error) 492 goto err3; 493 494 if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && 495 !arg.src_ptr) 496 snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; 497 498 down_write(&c->snapshot_create_lock); 499 inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), 500 dst_dentry, arg.mode|S_IFDIR, 501 0, snapshot_src, create_flags); 502 up_write(&c->snapshot_create_lock); 503 504 error = PTR_ERR_OR_ZERO(inode); 505 if (error) 506 goto err3; 507 508 d_instantiate(dst_dentry, &inode->v); 509 fsnotify_mkdir(dir, dst_dentry); 510 err3: 511 done_path_create(&dst_path, dst_dentry); 512 err2: 513 if (arg.src_ptr) 514 path_put(&src_path); 515 err1: 516 return error; 517 } 518 519 static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, 520 struct bch_ioctl_subvolume arg) 521 { 522 const char __user *name = (void __user *)(unsigned long)arg.dst_ptr; 523 struct path path; 524 struct inode *dir; 525 struct dentry *victim; 526 int ret = 0; 527 528 if (arg.flags) 529 return -EINVAL; 530 531 victim = user_path_locked_at(arg.dirfd, name, &path); 532 if (IS_ERR(victim)) 533 return PTR_ERR(victim); 534 535 dir = d_inode(path.dentry); 536 if (victim->d_sb->s_fs_info != c) { 537 ret = -EXDEV; 538 goto err; 539 } 540 541 ret = inode_permission(file_mnt_idmap(filp), d_inode(victim), MAY_WRITE) ?: 542 __bch2_unlink(dir, victim, true); 543 if (!ret) { 544 fsnotify_rmdir(dir, victim); 545 d_invalidate(victim); 546 } 547 err: 548 inode_unlock(dir); 549 dput(victim); 550 path_put(&path); 551 return ret; 552 } 553 554 long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) 555 { 556 struct bch_inode_info *inode = file_bch_inode(file); 557 struct bch_fs *c = inode->v.i_sb->s_fs_info; 558 long ret; 559 560 switch (cmd) { 561 case FS_IOC_GETFLAGS: 562 ret = bch2_ioc_getflags(inode, (int __user *) arg); 563 break; 564 565 case FS_IOC_SETFLAGS: 566 ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); 567 break; 568 569 case FS_IOC_FSGETXATTR: 570 ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); 571 break; 572 573 case FS_IOC_FSSETXATTR: 574 ret = bch2_ioc_fssetxattr(c, file, inode, 575 (void __user *) arg); 576 break; 577 578 case BCHFS_IOC_REINHERIT_ATTRS: 579 ret = bch2_ioc_reinherit_attrs(c, file, inode, 580 (void __user *) arg); 581 break; 582 583 case FS_IOC_GETVERSION: 584 ret = bch2_ioc_getversion(inode, (u32 __user *) arg); 585 break; 586 587 case FS_IOC_SETVERSION: 588 ret = -ENOTTY; 589 break; 590 591 case FS_IOC_GETFSLABEL: 592 ret = bch2_ioc_getlabel(c, (void __user *) arg); 593 break; 594 595 case FS_IOC_SETFSLABEL: 596 ret = bch2_ioc_setlabel(c, file, inode, (const void __user *) arg); 597 break; 598 599 case FS_IOC_GOINGDOWN: 600 ret = bch2_ioc_goingdown(c, (u32 __user *) arg); 601 break; 602 603 case BCH_IOCTL_SUBVOLUME_CREATE: { 604 struct bch_ioctl_subvolume i; 605 606 ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) 607 ? -EFAULT 608 : bch2_ioctl_subvolume_create(c, file, i); 609 break; 610 } 611 612 case BCH_IOCTL_SUBVOLUME_DESTROY: { 613 struct bch_ioctl_subvolume i; 614 615 ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) 616 ? -EFAULT 617 : bch2_ioctl_subvolume_destroy(c, file, i); 618 break; 619 } 620 621 default: 622 ret = bch2_fs_ioctl(c, cmd, (void __user *) arg); 623 break; 624 } 625 626 return bch2_err_class(ret); 627 } 628 629 #ifdef CONFIG_COMPAT 630 long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg) 631 { 632 /* These are just misnamed, they actually get/put from/to user an int */ 633 switch (cmd) { 634 case FS_IOC32_GETFLAGS: 635 cmd = FS_IOC_GETFLAGS; 636 break; 637 case FS_IOC32_SETFLAGS: 638 cmd = FS_IOC_SETFLAGS; 639 break; 640 case FS_IOC32_GETVERSION: 641 cmd = FS_IOC_GETVERSION; 642 break; 643 case FS_IOC_GETFSLABEL: 644 case FS_IOC_SETFSLABEL: 645 break; 646 default: 647 return -ENOIOCTLCMD; 648 } 649 return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 650 } 651 #endif 652 653 #endif /* NO_BCACHEFS_FS */ 654