1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/file.h> 13 #include <linux/fs_context.h> 14 #include <linux/moduleparam.h> 15 #include <linux/sched.h> 16 #include <linux/namei.h> 17 #include <linux/slab.h> 18 #include <linux/xattr.h> 19 #include <linux/iversion.h> 20 #include <linux/posix_acl.h> 21 #include <linux/security.h> 22 #include <linux/types.h> 23 #include <linux/kernel.h> 24 25 static bool __read_mostly allow_sys_admin_access; 26 module_param(allow_sys_admin_access, bool, 0644); 27 MODULE_PARM_DESC(allow_sys_admin_access, 28 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check"); 29 30 static void fuse_advise_use_readdirplus(struct inode *dir) 31 { 32 struct fuse_inode *fi = get_fuse_inode(dir); 33 34 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); 35 } 36 37 #if BITS_PER_LONG >= 64 38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) 39 { 40 entry->d_fsdata = (void *) time; 41 } 42 43 static inline u64 fuse_dentry_time(const struct dentry *entry) 44 { 45 return (u64)entry->d_fsdata; 46 } 47 48 #else 49 union fuse_dentry { 50 u64 time; 51 struct rcu_head rcu; 52 }; 53 54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) 55 { 56 ((union fuse_dentry *) dentry->d_fsdata)->time = time; 57 } 58 59 static inline u64 fuse_dentry_time(const struct dentry *entry) 60 { 61 return ((union fuse_dentry *) entry->d_fsdata)->time; 62 } 63 #endif 64 65 static void fuse_dentry_settime(struct dentry *dentry, u64 time) 66 { 67 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); 68 bool delete = !time && fc->delete_stale; 69 /* 70 * Mess with DCACHE_OP_DELETE because dput() will be faster without it. 71 * Don't care about races, either way it's just an optimization 72 */ 73 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || 74 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { 75 spin_lock(&dentry->d_lock); 76 if (!delete) 77 dentry->d_flags &= ~DCACHE_OP_DELETE; 78 else 79 dentry->d_flags |= DCACHE_OP_DELETE; 80 spin_unlock(&dentry->d_lock); 81 } 82 83 __fuse_dentry_settime(dentry, time); 84 } 85 86 /* 87 * FUSE caches dentries and attributes with separate timeout. The 88 * time in jiffies until the dentry/attributes are valid is stored in 89 * dentry->d_fsdata and fuse_inode->i_time respectively. 90 */ 91 92 /* 93 * Calculate the time in jiffies until a dentry/attributes are valid 94 */ 95 static u64 time_to_jiffies(u64 sec, u32 nsec) 96 { 97 if (sec || nsec) { 98 struct timespec64 ts = { 99 sec, 100 min_t(u32, nsec, NSEC_PER_SEC - 1) 101 }; 102 103 return get_jiffies_64() + timespec64_to_jiffies(&ts); 104 } else 105 return 0; 106 } 107 108 /* 109 * Set dentry and possibly attribute timeouts from the lookup/mk* 110 * replies 111 */ 112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) 113 { 114 fuse_dentry_settime(entry, 115 time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); 116 } 117 118 static u64 attr_timeout(struct fuse_attr_out *o) 119 { 120 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 121 } 122 123 u64 entry_attr_timeout(struct fuse_entry_out *o) 124 { 125 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 126 } 127 128 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) 129 { 130 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); 131 } 132 133 /* 134 * Mark the attributes as stale, so that at the next call to 135 * ->getattr() they will be fetched from userspace 136 */ 137 void fuse_invalidate_attr(struct inode *inode) 138 { 139 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS); 140 } 141 142 static void fuse_dir_changed(struct inode *dir) 143 { 144 fuse_invalidate_attr(dir); 145 inode_maybe_inc_iversion(dir, false); 146 } 147 148 /** 149 * Mark the attributes as stale due to an atime change. Avoid the invalidate if 150 * atime is not used. 151 */ 152 void fuse_invalidate_atime(struct inode *inode) 153 { 154 if (!IS_RDONLY(inode)) 155 fuse_invalidate_attr_mask(inode, STATX_ATIME); 156 } 157 158 /* 159 * Just mark the entry as stale, so that a next attempt to look it up 160 * will result in a new lookup call to userspace 161 * 162 * This is called when a dentry is about to become negative and the 163 * timeout is unknown (unlink, rmdir, rename and in some cases 164 * lookup) 165 */ 166 void fuse_invalidate_entry_cache(struct dentry *entry) 167 { 168 fuse_dentry_settime(entry, 0); 169 } 170 171 /* 172 * Same as fuse_invalidate_entry_cache(), but also try to remove the 173 * dentry from the hash 174 */ 175 static void fuse_invalidate_entry(struct dentry *entry) 176 { 177 d_invalidate(entry); 178 fuse_invalidate_entry_cache(entry); 179 } 180 181 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, 182 u64 nodeid, const struct qstr *name, 183 struct fuse_entry_out *outarg) 184 { 185 memset(outarg, 0, sizeof(struct fuse_entry_out)); 186 args->opcode = FUSE_LOOKUP; 187 args->nodeid = nodeid; 188 args->in_numargs = 1; 189 args->in_args[0].size = name->len + 1; 190 args->in_args[0].value = name->name; 191 args->out_numargs = 1; 192 args->out_args[0].size = sizeof(struct fuse_entry_out); 193 args->out_args[0].value = outarg; 194 } 195 196 /* 197 * Check whether the dentry is still valid 198 * 199 * If the entry validity timeout has expired and the dentry is 200 * positive, try to redo the lookup. If the lookup results in a 201 * different inode, then let the VFS invalidate the dentry and redo 202 * the lookup once more. If the lookup results in the same inode, 203 * then refresh the attributes, timeouts and mark the dentry valid. 204 */ 205 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) 206 { 207 struct inode *inode; 208 struct dentry *parent; 209 struct fuse_mount *fm; 210 struct fuse_inode *fi; 211 int ret; 212 213 inode = d_inode_rcu(entry); 214 if (inode && fuse_is_bad(inode)) 215 goto invalid; 216 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || 217 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { 218 struct fuse_entry_out outarg; 219 FUSE_ARGS(args); 220 struct fuse_forget_link *forget; 221 u64 attr_version; 222 223 /* For negative dentries, always do a fresh lookup */ 224 if (!inode) 225 goto invalid; 226 227 ret = -ECHILD; 228 if (flags & LOOKUP_RCU) 229 goto out; 230 231 fm = get_fuse_mount(inode); 232 233 forget = fuse_alloc_forget(); 234 ret = -ENOMEM; 235 if (!forget) 236 goto out; 237 238 attr_version = fuse_get_attr_version(fm->fc); 239 240 parent = dget_parent(entry); 241 fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), 242 &entry->d_name, &outarg); 243 ret = fuse_simple_request(fm, &args); 244 dput(parent); 245 /* Zero nodeid is same as -ENOENT */ 246 if (!ret && !outarg.nodeid) 247 ret = -ENOENT; 248 if (!ret) { 249 fi = get_fuse_inode(inode); 250 if (outarg.nodeid != get_node_id(inode) || 251 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 252 fuse_queue_forget(fm->fc, forget, 253 outarg.nodeid, 1); 254 goto invalid; 255 } 256 spin_lock(&fi->lock); 257 fi->nlookup++; 258 spin_unlock(&fi->lock); 259 } 260 kfree(forget); 261 if (ret == -ENOMEM) 262 goto out; 263 if (ret || fuse_invalid_attr(&outarg.attr) || 264 fuse_stale_inode(inode, outarg.generation, &outarg.attr)) 265 goto invalid; 266 267 forget_all_cached_acls(inode); 268 fuse_change_attributes(inode, &outarg.attr, 269 entry_attr_timeout(&outarg), 270 attr_version); 271 fuse_change_entry_timeout(entry, &outarg); 272 } else if (inode) { 273 fi = get_fuse_inode(inode); 274 if (flags & LOOKUP_RCU) { 275 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) 276 return -ECHILD; 277 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { 278 parent = dget_parent(entry); 279 fuse_advise_use_readdirplus(d_inode(parent)); 280 dput(parent); 281 } 282 } 283 ret = 1; 284 out: 285 return ret; 286 287 invalid: 288 ret = 0; 289 goto out; 290 } 291 292 #if BITS_PER_LONG < 64 293 static int fuse_dentry_init(struct dentry *dentry) 294 { 295 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), 296 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); 297 298 return dentry->d_fsdata ? 0 : -ENOMEM; 299 } 300 static void fuse_dentry_release(struct dentry *dentry) 301 { 302 union fuse_dentry *fd = dentry->d_fsdata; 303 304 kfree_rcu(fd, rcu); 305 } 306 #endif 307 308 static int fuse_dentry_delete(const struct dentry *dentry) 309 { 310 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 311 } 312 313 /* 314 * Create a fuse_mount object with a new superblock (with path->dentry 315 * as the root), and return that mount so it can be auto-mounted on 316 * @path. 317 */ 318 static struct vfsmount *fuse_dentry_automount(struct path *path) 319 { 320 struct fs_context *fsc; 321 struct vfsmount *mnt; 322 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 323 324 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 325 if (IS_ERR(fsc)) 326 return ERR_CAST(fsc); 327 328 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */ 329 fsc->fs_private = mp_fi; 330 331 /* Create the submount */ 332 mnt = fc_mount(fsc); 333 if (!IS_ERR(mnt)) 334 mntget(mnt); 335 336 put_fs_context(fsc); 337 return mnt; 338 } 339 340 const struct dentry_operations fuse_dentry_operations = { 341 .d_revalidate = fuse_dentry_revalidate, 342 .d_delete = fuse_dentry_delete, 343 #if BITS_PER_LONG < 64 344 .d_init = fuse_dentry_init, 345 .d_release = fuse_dentry_release, 346 #endif 347 .d_automount = fuse_dentry_automount, 348 }; 349 350 const struct dentry_operations fuse_root_dentry_operations = { 351 #if BITS_PER_LONG < 64 352 .d_init = fuse_dentry_init, 353 .d_release = fuse_dentry_release, 354 #endif 355 }; 356 357 int fuse_valid_type(int m) 358 { 359 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || 360 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); 361 } 362 363 bool fuse_invalid_attr(struct fuse_attr *attr) 364 { 365 return !fuse_valid_type(attr->mode) || 366 attr->size > LLONG_MAX; 367 } 368 369 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 370 struct fuse_entry_out *outarg, struct inode **inode) 371 { 372 struct fuse_mount *fm = get_fuse_mount_super(sb); 373 FUSE_ARGS(args); 374 struct fuse_forget_link *forget; 375 u64 attr_version; 376 int err; 377 378 *inode = NULL; 379 err = -ENAMETOOLONG; 380 if (name->len > FUSE_NAME_MAX) 381 goto out; 382 383 384 forget = fuse_alloc_forget(); 385 err = -ENOMEM; 386 if (!forget) 387 goto out; 388 389 attr_version = fuse_get_attr_version(fm->fc); 390 391 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 392 err = fuse_simple_request(fm, &args); 393 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 394 if (err || !outarg->nodeid) 395 goto out_put_forget; 396 397 err = -EIO; 398 if (!outarg->nodeid) 399 goto out_put_forget; 400 if (fuse_invalid_attr(&outarg->attr)) 401 goto out_put_forget; 402 403 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, 404 &outarg->attr, entry_attr_timeout(outarg), 405 attr_version); 406 err = -ENOMEM; 407 if (!*inode) { 408 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); 409 goto out; 410 } 411 err = 0; 412 413 out_put_forget: 414 kfree(forget); 415 out: 416 return err; 417 } 418 419 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 420 unsigned int flags) 421 { 422 int err; 423 struct fuse_entry_out outarg; 424 struct inode *inode; 425 struct dentry *newent; 426 bool outarg_valid = true; 427 bool locked; 428 429 if (fuse_is_bad(dir)) 430 return ERR_PTR(-EIO); 431 432 locked = fuse_lock_inode(dir); 433 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, 434 &outarg, &inode); 435 fuse_unlock_inode(dir, locked); 436 if (err == -ENOENT) { 437 outarg_valid = false; 438 err = 0; 439 } 440 if (err) 441 goto out_err; 442 443 err = -EIO; 444 if (inode && get_node_id(inode) == FUSE_ROOT_ID) 445 goto out_iput; 446 447 newent = d_splice_alias(inode, entry); 448 err = PTR_ERR(newent); 449 if (IS_ERR(newent)) 450 goto out_err; 451 452 entry = newent ? newent : entry; 453 if (outarg_valid) 454 fuse_change_entry_timeout(entry, &outarg); 455 else 456 fuse_invalidate_entry_cache(entry); 457 458 if (inode) 459 fuse_advise_use_readdirplus(dir); 460 return newent; 461 462 out_iput: 463 iput(inode); 464 out_err: 465 return ERR_PTR(err); 466 } 467 468 static int get_security_context(struct dentry *entry, umode_t mode, 469 void **security_ctx, u32 *security_ctxlen) 470 { 471 struct fuse_secctx *fctx; 472 struct fuse_secctx_header *header; 473 void *ctx = NULL, *ptr; 474 u32 ctxlen, total_len = sizeof(*header); 475 int err, nr_ctx = 0; 476 const char *name; 477 size_t namelen; 478 479 err = security_dentry_init_security(entry, mode, &entry->d_name, 480 &name, &ctx, &ctxlen); 481 if (err) { 482 if (err != -EOPNOTSUPP) 483 goto out_err; 484 /* No LSM is supporting this security hook. Ignore error */ 485 ctxlen = 0; 486 ctx = NULL; 487 } 488 489 if (ctxlen) { 490 nr_ctx = 1; 491 namelen = strlen(name) + 1; 492 err = -EIO; 493 if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX)) 494 goto out_err; 495 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen); 496 } 497 498 err = -ENOMEM; 499 header = ptr = kzalloc(total_len, GFP_KERNEL); 500 if (!ptr) 501 goto out_err; 502 503 header->nr_secctx = nr_ctx; 504 header->size = total_len; 505 ptr += sizeof(*header); 506 if (nr_ctx) { 507 fctx = ptr; 508 fctx->size = ctxlen; 509 ptr += sizeof(*fctx); 510 511 strcpy(ptr, name); 512 ptr += namelen; 513 514 memcpy(ptr, ctx, ctxlen); 515 } 516 *security_ctxlen = total_len; 517 *security_ctx = header; 518 err = 0; 519 out_err: 520 kfree(ctx); 521 return err; 522 } 523 524 /* 525 * Atomic create+open operation 526 * 527 * If the filesystem doesn't support this, then fall back to separate 528 * 'mknod' + 'open' requests. 529 */ 530 static int fuse_create_open(struct inode *dir, struct dentry *entry, 531 struct file *file, unsigned int flags, 532 umode_t mode) 533 { 534 int err; 535 struct inode *inode; 536 struct fuse_mount *fm = get_fuse_mount(dir); 537 FUSE_ARGS(args); 538 struct fuse_forget_link *forget; 539 struct fuse_create_in inarg; 540 struct fuse_open_out outopen; 541 struct fuse_entry_out outentry; 542 struct fuse_inode *fi; 543 struct fuse_file *ff; 544 void *security_ctx = NULL; 545 u32 security_ctxlen; 546 bool trunc = flags & O_TRUNC; 547 548 /* Userspace expects S_IFREG in create mode */ 549 BUG_ON((mode & S_IFMT) != S_IFREG); 550 551 forget = fuse_alloc_forget(); 552 err = -ENOMEM; 553 if (!forget) 554 goto out_err; 555 556 err = -ENOMEM; 557 ff = fuse_file_alloc(fm); 558 if (!ff) 559 goto out_put_forget_req; 560 561 if (!fm->fc->dont_mask) 562 mode &= ~current_umask(); 563 564 flags &= ~O_NOCTTY; 565 memset(&inarg, 0, sizeof(inarg)); 566 memset(&outentry, 0, sizeof(outentry)); 567 inarg.flags = flags; 568 inarg.mode = mode; 569 inarg.umask = current_umask(); 570 571 if (fm->fc->handle_killpriv_v2 && trunc && 572 !(flags & O_EXCL) && !capable(CAP_FSETID)) { 573 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; 574 } 575 576 args.opcode = FUSE_CREATE; 577 args.nodeid = get_node_id(dir); 578 args.in_numargs = 2; 579 args.in_args[0].size = sizeof(inarg); 580 args.in_args[0].value = &inarg; 581 args.in_args[1].size = entry->d_name.len + 1; 582 args.in_args[1].value = entry->d_name.name; 583 args.out_numargs = 2; 584 args.out_args[0].size = sizeof(outentry); 585 args.out_args[0].value = &outentry; 586 args.out_args[1].size = sizeof(outopen); 587 args.out_args[1].value = &outopen; 588 589 if (fm->fc->init_security) { 590 err = get_security_context(entry, mode, &security_ctx, 591 &security_ctxlen); 592 if (err) 593 goto out_put_forget_req; 594 595 args.in_numargs = 3; 596 args.in_args[2].size = security_ctxlen; 597 args.in_args[2].value = security_ctx; 598 } 599 600 err = fuse_simple_request(fm, &args); 601 kfree(security_ctx); 602 if (err) 603 goto out_free_ff; 604 605 err = -EIO; 606 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || 607 fuse_invalid_attr(&outentry.attr)) 608 goto out_free_ff; 609 610 ff->fh = outopen.fh; 611 ff->nodeid = outentry.nodeid; 612 ff->open_flags = outopen.open_flags; 613 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 614 &outentry.attr, entry_attr_timeout(&outentry), 0); 615 if (!inode) { 616 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 617 fuse_sync_release(NULL, ff, flags); 618 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); 619 err = -ENOMEM; 620 goto out_err; 621 } 622 kfree(forget); 623 d_instantiate(entry, inode); 624 fuse_change_entry_timeout(entry, &outentry); 625 fuse_dir_changed(dir); 626 err = finish_open(file, entry, generic_file_open); 627 if (err) { 628 fi = get_fuse_inode(inode); 629 fuse_sync_release(fi, ff, flags); 630 } else { 631 file->private_data = ff; 632 fuse_finish_open(inode, file); 633 if (fm->fc->atomic_o_trunc && trunc) 634 truncate_pagecache(inode, 0); 635 else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 636 invalidate_inode_pages2(inode->i_mapping); 637 } 638 return err; 639 640 out_free_ff: 641 fuse_file_free(ff); 642 out_put_forget_req: 643 kfree(forget); 644 out_err: 645 return err; 646 } 647 648 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *, 649 umode_t, dev_t); 650 static int fuse_atomic_open(struct inode *dir, struct dentry *entry, 651 struct file *file, unsigned flags, 652 umode_t mode) 653 { 654 int err; 655 struct fuse_conn *fc = get_fuse_conn(dir); 656 struct dentry *res = NULL; 657 658 if (fuse_is_bad(dir)) 659 return -EIO; 660 661 if (d_in_lookup(entry)) { 662 res = fuse_lookup(dir, entry, 0); 663 if (IS_ERR(res)) 664 return PTR_ERR(res); 665 666 if (res) 667 entry = res; 668 } 669 670 if (!(flags & O_CREAT) || d_really_is_positive(entry)) 671 goto no_open; 672 673 /* Only creates */ 674 file->f_mode |= FMODE_CREATED; 675 676 if (fc->no_create) 677 goto mknod; 678 679 err = fuse_create_open(dir, entry, file, flags, mode); 680 if (err == -ENOSYS) { 681 fc->no_create = 1; 682 goto mknod; 683 } 684 out_dput: 685 dput(res); 686 return err; 687 688 mknod: 689 err = fuse_mknod(&init_user_ns, dir, entry, mode, 0); 690 if (err) 691 goto out_dput; 692 no_open: 693 return finish_no_open(file, res); 694 } 695 696 /* 697 * Code shared between mknod, mkdir, symlink and link 698 */ 699 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 700 struct inode *dir, struct dentry *entry, 701 umode_t mode) 702 { 703 struct fuse_entry_out outarg; 704 struct inode *inode; 705 struct dentry *d; 706 int err; 707 struct fuse_forget_link *forget; 708 void *security_ctx = NULL; 709 u32 security_ctxlen; 710 711 if (fuse_is_bad(dir)) 712 return -EIO; 713 714 forget = fuse_alloc_forget(); 715 if (!forget) 716 return -ENOMEM; 717 718 memset(&outarg, 0, sizeof(outarg)); 719 args->nodeid = get_node_id(dir); 720 args->out_numargs = 1; 721 args->out_args[0].size = sizeof(outarg); 722 args->out_args[0].value = &outarg; 723 724 if (fm->fc->init_security && args->opcode != FUSE_LINK) { 725 err = get_security_context(entry, mode, &security_ctx, 726 &security_ctxlen); 727 if (err) 728 goto out_put_forget_req; 729 730 BUG_ON(args->in_numargs != 2); 731 732 args->in_numargs = 3; 733 args->in_args[2].size = security_ctxlen; 734 args->in_args[2].value = security_ctx; 735 } 736 737 err = fuse_simple_request(fm, args); 738 kfree(security_ctx); 739 if (err) 740 goto out_put_forget_req; 741 742 err = -EIO; 743 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) 744 goto out_put_forget_req; 745 746 if ((outarg.attr.mode ^ mode) & S_IFMT) 747 goto out_put_forget_req; 748 749 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 750 &outarg.attr, entry_attr_timeout(&outarg), 0); 751 if (!inode) { 752 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 753 return -ENOMEM; 754 } 755 kfree(forget); 756 757 d_drop(entry); 758 d = d_splice_alias(inode, entry); 759 if (IS_ERR(d)) 760 return PTR_ERR(d); 761 762 if (d) { 763 fuse_change_entry_timeout(d, &outarg); 764 dput(d); 765 } else { 766 fuse_change_entry_timeout(entry, &outarg); 767 } 768 fuse_dir_changed(dir); 769 return 0; 770 771 out_put_forget_req: 772 kfree(forget); 773 return err; 774 } 775 776 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir, 777 struct dentry *entry, umode_t mode, dev_t rdev) 778 { 779 struct fuse_mknod_in inarg; 780 struct fuse_mount *fm = get_fuse_mount(dir); 781 FUSE_ARGS(args); 782 783 if (!fm->fc->dont_mask) 784 mode &= ~current_umask(); 785 786 memset(&inarg, 0, sizeof(inarg)); 787 inarg.mode = mode; 788 inarg.rdev = new_encode_dev(rdev); 789 inarg.umask = current_umask(); 790 args.opcode = FUSE_MKNOD; 791 args.in_numargs = 2; 792 args.in_args[0].size = sizeof(inarg); 793 args.in_args[0].value = &inarg; 794 args.in_args[1].size = entry->d_name.len + 1; 795 args.in_args[1].value = entry->d_name.name; 796 return create_new_entry(fm, &args, dir, entry, mode); 797 } 798 799 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir, 800 struct dentry *entry, umode_t mode, bool excl) 801 { 802 return fuse_mknod(&init_user_ns, dir, entry, mode, 0); 803 } 804 805 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, 806 struct dentry *entry, umode_t mode) 807 { 808 struct fuse_mkdir_in inarg; 809 struct fuse_mount *fm = get_fuse_mount(dir); 810 FUSE_ARGS(args); 811 812 if (!fm->fc->dont_mask) 813 mode &= ~current_umask(); 814 815 memset(&inarg, 0, sizeof(inarg)); 816 inarg.mode = mode; 817 inarg.umask = current_umask(); 818 args.opcode = FUSE_MKDIR; 819 args.in_numargs = 2; 820 args.in_args[0].size = sizeof(inarg); 821 args.in_args[0].value = &inarg; 822 args.in_args[1].size = entry->d_name.len + 1; 823 args.in_args[1].value = entry->d_name.name; 824 return create_new_entry(fm, &args, dir, entry, S_IFDIR); 825 } 826 827 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, 828 struct dentry *entry, const char *link) 829 { 830 struct fuse_mount *fm = get_fuse_mount(dir); 831 unsigned len = strlen(link) + 1; 832 FUSE_ARGS(args); 833 834 args.opcode = FUSE_SYMLINK; 835 args.in_numargs = 2; 836 args.in_args[0].size = entry->d_name.len + 1; 837 args.in_args[0].value = entry->d_name.name; 838 args.in_args[1].size = len; 839 args.in_args[1].value = link; 840 return create_new_entry(fm, &args, dir, entry, S_IFLNK); 841 } 842 843 void fuse_flush_time_update(struct inode *inode) 844 { 845 int err = sync_inode_metadata(inode, 1); 846 847 mapping_set_error(inode->i_mapping, err); 848 } 849 850 static void fuse_update_ctime_in_cache(struct inode *inode) 851 { 852 if (!IS_NOCMTIME(inode)) { 853 inode->i_ctime = current_time(inode); 854 mark_inode_dirty_sync(inode); 855 fuse_flush_time_update(inode); 856 } 857 } 858 859 void fuse_update_ctime(struct inode *inode) 860 { 861 fuse_invalidate_attr_mask(inode, STATX_CTIME); 862 fuse_update_ctime_in_cache(inode); 863 } 864 865 static void fuse_entry_unlinked(struct dentry *entry) 866 { 867 struct inode *inode = d_inode(entry); 868 struct fuse_conn *fc = get_fuse_conn(inode); 869 struct fuse_inode *fi = get_fuse_inode(inode); 870 871 spin_lock(&fi->lock); 872 fi->attr_version = atomic64_inc_return(&fc->attr_version); 873 /* 874 * If i_nlink == 0 then unlink doesn't make sense, yet this can 875 * happen if userspace filesystem is careless. It would be 876 * difficult to enforce correct nlink usage so just ignore this 877 * condition here 878 */ 879 if (S_ISDIR(inode->i_mode)) 880 clear_nlink(inode); 881 else if (inode->i_nlink > 0) 882 drop_nlink(inode); 883 spin_unlock(&fi->lock); 884 fuse_invalidate_entry_cache(entry); 885 fuse_update_ctime(inode); 886 } 887 888 static int fuse_unlink(struct inode *dir, struct dentry *entry) 889 { 890 int err; 891 struct fuse_mount *fm = get_fuse_mount(dir); 892 FUSE_ARGS(args); 893 894 if (fuse_is_bad(dir)) 895 return -EIO; 896 897 args.opcode = FUSE_UNLINK; 898 args.nodeid = get_node_id(dir); 899 args.in_numargs = 1; 900 args.in_args[0].size = entry->d_name.len + 1; 901 args.in_args[0].value = entry->d_name.name; 902 err = fuse_simple_request(fm, &args); 903 if (!err) { 904 fuse_dir_changed(dir); 905 fuse_entry_unlinked(entry); 906 } else if (err == -EINTR) 907 fuse_invalidate_entry(entry); 908 return err; 909 } 910 911 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 912 { 913 int err; 914 struct fuse_mount *fm = get_fuse_mount(dir); 915 FUSE_ARGS(args); 916 917 if (fuse_is_bad(dir)) 918 return -EIO; 919 920 args.opcode = FUSE_RMDIR; 921 args.nodeid = get_node_id(dir); 922 args.in_numargs = 1; 923 args.in_args[0].size = entry->d_name.len + 1; 924 args.in_args[0].value = entry->d_name.name; 925 err = fuse_simple_request(fm, &args); 926 if (!err) { 927 fuse_dir_changed(dir); 928 fuse_entry_unlinked(entry); 929 } else if (err == -EINTR) 930 fuse_invalidate_entry(entry); 931 return err; 932 } 933 934 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, 935 struct inode *newdir, struct dentry *newent, 936 unsigned int flags, int opcode, size_t argsize) 937 { 938 int err; 939 struct fuse_rename2_in inarg; 940 struct fuse_mount *fm = get_fuse_mount(olddir); 941 FUSE_ARGS(args); 942 943 memset(&inarg, 0, argsize); 944 inarg.newdir = get_node_id(newdir); 945 inarg.flags = flags; 946 args.opcode = opcode; 947 args.nodeid = get_node_id(olddir); 948 args.in_numargs = 3; 949 args.in_args[0].size = argsize; 950 args.in_args[0].value = &inarg; 951 args.in_args[1].size = oldent->d_name.len + 1; 952 args.in_args[1].value = oldent->d_name.name; 953 args.in_args[2].size = newent->d_name.len + 1; 954 args.in_args[2].value = newent->d_name.name; 955 err = fuse_simple_request(fm, &args); 956 if (!err) { 957 /* ctime changes */ 958 fuse_update_ctime(d_inode(oldent)); 959 960 if (flags & RENAME_EXCHANGE) 961 fuse_update_ctime(d_inode(newent)); 962 963 fuse_dir_changed(olddir); 964 if (olddir != newdir) 965 fuse_dir_changed(newdir); 966 967 /* newent will end up negative */ 968 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) 969 fuse_entry_unlinked(newent); 970 } else if (err == -EINTR) { 971 /* If request was interrupted, DEITY only knows if the 972 rename actually took place. If the invalidation 973 fails (e.g. some process has CWD under the renamed 974 directory), then there can be inconsistency between 975 the dcache and the real filesystem. Tough luck. */ 976 fuse_invalidate_entry(oldent); 977 if (d_really_is_positive(newent)) 978 fuse_invalidate_entry(newent); 979 } 980 981 return err; 982 } 983 984 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir, 985 struct dentry *oldent, struct inode *newdir, 986 struct dentry *newent, unsigned int flags) 987 { 988 struct fuse_conn *fc = get_fuse_conn(olddir); 989 int err; 990 991 if (fuse_is_bad(olddir)) 992 return -EIO; 993 994 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 995 return -EINVAL; 996 997 if (flags) { 998 if (fc->no_rename2 || fc->minor < 23) 999 return -EINVAL; 1000 1001 err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 1002 FUSE_RENAME2, 1003 sizeof(struct fuse_rename2_in)); 1004 if (err == -ENOSYS) { 1005 fc->no_rename2 = 1; 1006 err = -EINVAL; 1007 } 1008 } else { 1009 err = fuse_rename_common(olddir, oldent, newdir, newent, 0, 1010 FUSE_RENAME, 1011 sizeof(struct fuse_rename_in)); 1012 } 1013 1014 return err; 1015 } 1016 1017 static int fuse_link(struct dentry *entry, struct inode *newdir, 1018 struct dentry *newent) 1019 { 1020 int err; 1021 struct fuse_link_in inarg; 1022 struct inode *inode = d_inode(entry); 1023 struct fuse_mount *fm = get_fuse_mount(inode); 1024 FUSE_ARGS(args); 1025 1026 memset(&inarg, 0, sizeof(inarg)); 1027 inarg.oldnodeid = get_node_id(inode); 1028 args.opcode = FUSE_LINK; 1029 args.in_numargs = 2; 1030 args.in_args[0].size = sizeof(inarg); 1031 args.in_args[0].value = &inarg; 1032 args.in_args[1].size = newent->d_name.len + 1; 1033 args.in_args[1].value = newent->d_name.name; 1034 err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 1035 if (!err) 1036 fuse_update_ctime_in_cache(inode); 1037 else if (err == -EINTR) 1038 fuse_invalidate_attr(inode); 1039 1040 return err; 1041 } 1042 1043 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 1044 struct kstat *stat) 1045 { 1046 unsigned int blkbits; 1047 struct fuse_conn *fc = get_fuse_conn(inode); 1048 1049 stat->dev = inode->i_sb->s_dev; 1050 stat->ino = attr->ino; 1051 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 1052 stat->nlink = attr->nlink; 1053 stat->uid = make_kuid(fc->user_ns, attr->uid); 1054 stat->gid = make_kgid(fc->user_ns, attr->gid); 1055 stat->rdev = inode->i_rdev; 1056 stat->atime.tv_sec = attr->atime; 1057 stat->atime.tv_nsec = attr->atimensec; 1058 stat->mtime.tv_sec = attr->mtime; 1059 stat->mtime.tv_nsec = attr->mtimensec; 1060 stat->ctime.tv_sec = attr->ctime; 1061 stat->ctime.tv_nsec = attr->ctimensec; 1062 stat->size = attr->size; 1063 stat->blocks = attr->blocks; 1064 1065 if (attr->blksize != 0) 1066 blkbits = ilog2(attr->blksize); 1067 else 1068 blkbits = inode->i_sb->s_blocksize_bits; 1069 1070 stat->blksize = 1 << blkbits; 1071 } 1072 1073 static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 1074 struct file *file) 1075 { 1076 int err; 1077 struct fuse_getattr_in inarg; 1078 struct fuse_attr_out outarg; 1079 struct fuse_mount *fm = get_fuse_mount(inode); 1080 FUSE_ARGS(args); 1081 u64 attr_version; 1082 1083 attr_version = fuse_get_attr_version(fm->fc); 1084 1085 memset(&inarg, 0, sizeof(inarg)); 1086 memset(&outarg, 0, sizeof(outarg)); 1087 /* Directories have separate file-handle space */ 1088 if (file && S_ISREG(inode->i_mode)) { 1089 struct fuse_file *ff = file->private_data; 1090 1091 inarg.getattr_flags |= FUSE_GETATTR_FH; 1092 inarg.fh = ff->fh; 1093 } 1094 args.opcode = FUSE_GETATTR; 1095 args.nodeid = get_node_id(inode); 1096 args.in_numargs = 1; 1097 args.in_args[0].size = sizeof(inarg); 1098 args.in_args[0].value = &inarg; 1099 args.out_numargs = 1; 1100 args.out_args[0].size = sizeof(outarg); 1101 args.out_args[0].value = &outarg; 1102 err = fuse_simple_request(fm, &args); 1103 if (!err) { 1104 if (fuse_invalid_attr(&outarg.attr) || 1105 inode_wrong_type(inode, outarg.attr.mode)) { 1106 fuse_make_bad(inode); 1107 err = -EIO; 1108 } else { 1109 fuse_change_attributes(inode, &outarg.attr, 1110 attr_timeout(&outarg), 1111 attr_version); 1112 if (stat) 1113 fuse_fillattr(inode, &outarg.attr, stat); 1114 } 1115 } 1116 return err; 1117 } 1118 1119 static int fuse_update_get_attr(struct inode *inode, struct file *file, 1120 struct kstat *stat, u32 request_mask, 1121 unsigned int flags) 1122 { 1123 struct fuse_inode *fi = get_fuse_inode(inode); 1124 int err = 0; 1125 bool sync; 1126 u32 inval_mask = READ_ONCE(fi->inval_mask); 1127 u32 cache_mask = fuse_get_cache_mask(inode); 1128 1129 if (flags & AT_STATX_FORCE_SYNC) 1130 sync = true; 1131 else if (flags & AT_STATX_DONT_SYNC) 1132 sync = false; 1133 else if (request_mask & inval_mask & ~cache_mask) 1134 sync = true; 1135 else 1136 sync = time_before64(fi->i_time, get_jiffies_64()); 1137 1138 if (sync) { 1139 forget_all_cached_acls(inode); 1140 err = fuse_do_getattr(inode, stat, file); 1141 } else if (stat) { 1142 generic_fillattr(&init_user_ns, inode, stat); 1143 stat->mode = fi->orig_i_mode; 1144 stat->ino = fi->orig_ino; 1145 } 1146 1147 return err; 1148 } 1149 1150 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) 1151 { 1152 return fuse_update_get_attr(inode, file, NULL, mask, 0); 1153 } 1154 1155 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1156 u64 child_nodeid, struct qstr *name) 1157 { 1158 int err = -ENOTDIR; 1159 struct inode *parent; 1160 struct dentry *dir; 1161 struct dentry *entry; 1162 1163 parent = fuse_ilookup(fc, parent_nodeid, NULL); 1164 if (!parent) 1165 return -ENOENT; 1166 1167 inode_lock_nested(parent, I_MUTEX_PARENT); 1168 if (!S_ISDIR(parent->i_mode)) 1169 goto unlock; 1170 1171 err = -ENOENT; 1172 dir = d_find_alias(parent); 1173 if (!dir) 1174 goto unlock; 1175 1176 name->hash = full_name_hash(dir, name->name, name->len); 1177 entry = d_lookup(dir, name); 1178 dput(dir); 1179 if (!entry) 1180 goto unlock; 1181 1182 fuse_dir_changed(parent); 1183 fuse_invalidate_entry(entry); 1184 1185 if (child_nodeid != 0 && d_really_is_positive(entry)) { 1186 inode_lock(d_inode(entry)); 1187 if (get_node_id(d_inode(entry)) != child_nodeid) { 1188 err = -ENOENT; 1189 goto badentry; 1190 } 1191 if (d_mountpoint(entry)) { 1192 err = -EBUSY; 1193 goto badentry; 1194 } 1195 if (d_is_dir(entry)) { 1196 shrink_dcache_parent(entry); 1197 if (!simple_empty(entry)) { 1198 err = -ENOTEMPTY; 1199 goto badentry; 1200 } 1201 d_inode(entry)->i_flags |= S_DEAD; 1202 } 1203 dont_mount(entry); 1204 clear_nlink(d_inode(entry)); 1205 err = 0; 1206 badentry: 1207 inode_unlock(d_inode(entry)); 1208 if (!err) 1209 d_delete(entry); 1210 } else { 1211 err = 0; 1212 } 1213 dput(entry); 1214 1215 unlock: 1216 inode_unlock(parent); 1217 iput(parent); 1218 return err; 1219 } 1220 1221 /* 1222 * Calling into a user-controlled filesystem gives the filesystem 1223 * daemon ptrace-like capabilities over the current process. This 1224 * means, that the filesystem daemon is able to record the exact 1225 * filesystem operations performed, and can also control the behavior 1226 * of the requester process in otherwise impossible ways. For example 1227 * it can delay the operation for arbitrary length of time allowing 1228 * DoS against the requester. 1229 * 1230 * For this reason only those processes can call into the filesystem, 1231 * for which the owner of the mount has ptrace privilege. This 1232 * excludes processes started by other users, suid or sgid processes. 1233 */ 1234 int fuse_allow_current_process(struct fuse_conn *fc) 1235 { 1236 const struct cred *cred; 1237 1238 if (allow_sys_admin_access && capable(CAP_SYS_ADMIN)) 1239 return 1; 1240 1241 if (fc->allow_other) 1242 return current_in_userns(fc->user_ns); 1243 1244 cred = current_cred(); 1245 if (uid_eq(cred->euid, fc->user_id) && 1246 uid_eq(cred->suid, fc->user_id) && 1247 uid_eq(cred->uid, fc->user_id) && 1248 gid_eq(cred->egid, fc->group_id) && 1249 gid_eq(cred->sgid, fc->group_id) && 1250 gid_eq(cred->gid, fc->group_id)) 1251 return 1; 1252 1253 return 0; 1254 } 1255 1256 static int fuse_access(struct inode *inode, int mask) 1257 { 1258 struct fuse_mount *fm = get_fuse_mount(inode); 1259 FUSE_ARGS(args); 1260 struct fuse_access_in inarg; 1261 int err; 1262 1263 BUG_ON(mask & MAY_NOT_BLOCK); 1264 1265 if (fm->fc->no_access) 1266 return 0; 1267 1268 memset(&inarg, 0, sizeof(inarg)); 1269 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); 1270 args.opcode = FUSE_ACCESS; 1271 args.nodeid = get_node_id(inode); 1272 args.in_numargs = 1; 1273 args.in_args[0].size = sizeof(inarg); 1274 args.in_args[0].value = &inarg; 1275 err = fuse_simple_request(fm, &args); 1276 if (err == -ENOSYS) { 1277 fm->fc->no_access = 1; 1278 err = 0; 1279 } 1280 return err; 1281 } 1282 1283 static int fuse_perm_getattr(struct inode *inode, int mask) 1284 { 1285 if (mask & MAY_NOT_BLOCK) 1286 return -ECHILD; 1287 1288 forget_all_cached_acls(inode); 1289 return fuse_do_getattr(inode, NULL, NULL); 1290 } 1291 1292 /* 1293 * Check permission. The two basic access models of FUSE are: 1294 * 1295 * 1) Local access checking ('default_permissions' mount option) based 1296 * on file mode. This is the plain old disk filesystem permission 1297 * modell. 1298 * 1299 * 2) "Remote" access checking, where server is responsible for 1300 * checking permission in each inode operation. An exception to this 1301 * is if ->permission() was invoked from sys_access() in which case an 1302 * access request is sent. Execute permission is still checked 1303 * locally based on file mode. 1304 */ 1305 static int fuse_permission(struct user_namespace *mnt_userns, 1306 struct inode *inode, int mask) 1307 { 1308 struct fuse_conn *fc = get_fuse_conn(inode); 1309 bool refreshed = false; 1310 int err = 0; 1311 1312 if (fuse_is_bad(inode)) 1313 return -EIO; 1314 1315 if (!fuse_allow_current_process(fc)) 1316 return -EACCES; 1317 1318 /* 1319 * If attributes are needed, refresh them before proceeding 1320 */ 1321 if (fc->default_permissions || 1322 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1323 struct fuse_inode *fi = get_fuse_inode(inode); 1324 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; 1325 1326 if (perm_mask & READ_ONCE(fi->inval_mask) || 1327 time_before64(fi->i_time, get_jiffies_64())) { 1328 refreshed = true; 1329 1330 err = fuse_perm_getattr(inode, mask); 1331 if (err) 1332 return err; 1333 } 1334 } 1335 1336 if (fc->default_permissions) { 1337 err = generic_permission(&init_user_ns, inode, mask); 1338 1339 /* If permission is denied, try to refresh file 1340 attributes. This is also needed, because the root 1341 node will at first have no permissions */ 1342 if (err == -EACCES && !refreshed) { 1343 err = fuse_perm_getattr(inode, mask); 1344 if (!err) 1345 err = generic_permission(&init_user_ns, 1346 inode, mask); 1347 } 1348 1349 /* Note: the opposite of the above test does not 1350 exist. So if permissions are revoked this won't be 1351 noticed immediately, only after the attribute 1352 timeout has expired */ 1353 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1354 err = fuse_access(inode, mask); 1355 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1356 if (!(inode->i_mode & S_IXUGO)) { 1357 if (refreshed) 1358 return -EACCES; 1359 1360 err = fuse_perm_getattr(inode, mask); 1361 if (!err && !(inode->i_mode & S_IXUGO)) 1362 return -EACCES; 1363 } 1364 } 1365 return err; 1366 } 1367 1368 static int fuse_readlink_page(struct inode *inode, struct page *page) 1369 { 1370 struct fuse_mount *fm = get_fuse_mount(inode); 1371 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1372 struct fuse_args_pages ap = { 1373 .num_pages = 1, 1374 .pages = &page, 1375 .descs = &desc, 1376 }; 1377 char *link; 1378 ssize_t res; 1379 1380 ap.args.opcode = FUSE_READLINK; 1381 ap.args.nodeid = get_node_id(inode); 1382 ap.args.out_pages = true; 1383 ap.args.out_argvar = true; 1384 ap.args.page_zeroing = true; 1385 ap.args.out_numargs = 1; 1386 ap.args.out_args[0].size = desc.length; 1387 res = fuse_simple_request(fm, &ap.args); 1388 1389 fuse_invalidate_atime(inode); 1390 1391 if (res < 0) 1392 return res; 1393 1394 if (WARN_ON(res >= PAGE_SIZE)) 1395 return -EIO; 1396 1397 link = page_address(page); 1398 link[res] = '\0'; 1399 1400 return 0; 1401 } 1402 1403 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, 1404 struct delayed_call *callback) 1405 { 1406 struct fuse_conn *fc = get_fuse_conn(inode); 1407 struct page *page; 1408 int err; 1409 1410 err = -EIO; 1411 if (fuse_is_bad(inode)) 1412 goto out_err; 1413 1414 if (fc->cache_symlinks) 1415 return page_get_link(dentry, inode, callback); 1416 1417 err = -ECHILD; 1418 if (!dentry) 1419 goto out_err; 1420 1421 page = alloc_page(GFP_KERNEL); 1422 err = -ENOMEM; 1423 if (!page) 1424 goto out_err; 1425 1426 err = fuse_readlink_page(inode, page); 1427 if (err) { 1428 __free_page(page); 1429 goto out_err; 1430 } 1431 1432 set_delayed_call(callback, page_put_link, page); 1433 1434 return page_address(page); 1435 1436 out_err: 1437 return ERR_PTR(err); 1438 } 1439 1440 static int fuse_dir_open(struct inode *inode, struct file *file) 1441 { 1442 return fuse_open_common(inode, file, true); 1443 } 1444 1445 static int fuse_dir_release(struct inode *inode, struct file *file) 1446 { 1447 fuse_release_common(file, true); 1448 1449 return 0; 1450 } 1451 1452 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, 1453 int datasync) 1454 { 1455 struct inode *inode = file->f_mapping->host; 1456 struct fuse_conn *fc = get_fuse_conn(inode); 1457 int err; 1458 1459 if (fuse_is_bad(inode)) 1460 return -EIO; 1461 1462 if (fc->no_fsyncdir) 1463 return 0; 1464 1465 inode_lock(inode); 1466 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR); 1467 if (err == -ENOSYS) { 1468 fc->no_fsyncdir = 1; 1469 err = 0; 1470 } 1471 inode_unlock(inode); 1472 1473 return err; 1474 } 1475 1476 static long fuse_dir_ioctl(struct file *file, unsigned int cmd, 1477 unsigned long arg) 1478 { 1479 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1480 1481 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ 1482 if (fc->minor < 18) 1483 return -ENOTTY; 1484 1485 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); 1486 } 1487 1488 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, 1489 unsigned long arg) 1490 { 1491 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1492 1493 if (fc->minor < 18) 1494 return -ENOTTY; 1495 1496 return fuse_ioctl_common(file, cmd, arg, 1497 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); 1498 } 1499 1500 static bool update_mtime(unsigned ivalid, bool trust_local_mtime) 1501 { 1502 /* Always update if mtime is explicitly set */ 1503 if (ivalid & ATTR_MTIME_SET) 1504 return true; 1505 1506 /* Or if kernel i_mtime is the official one */ 1507 if (trust_local_mtime) 1508 return true; 1509 1510 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ 1511 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) 1512 return false; 1513 1514 /* In all other cases update */ 1515 return true; 1516 } 1517 1518 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, 1519 struct fuse_setattr_in *arg, bool trust_local_cmtime) 1520 { 1521 unsigned ivalid = iattr->ia_valid; 1522 1523 if (ivalid & ATTR_MODE) 1524 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1525 if (ivalid & ATTR_UID) 1526 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); 1527 if (ivalid & ATTR_GID) 1528 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); 1529 if (ivalid & ATTR_SIZE) 1530 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1531 if (ivalid & ATTR_ATIME) { 1532 arg->valid |= FATTR_ATIME; 1533 arg->atime = iattr->ia_atime.tv_sec; 1534 arg->atimensec = iattr->ia_atime.tv_nsec; 1535 if (!(ivalid & ATTR_ATIME_SET)) 1536 arg->valid |= FATTR_ATIME_NOW; 1537 } 1538 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { 1539 arg->valid |= FATTR_MTIME; 1540 arg->mtime = iattr->ia_mtime.tv_sec; 1541 arg->mtimensec = iattr->ia_mtime.tv_nsec; 1542 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) 1543 arg->valid |= FATTR_MTIME_NOW; 1544 } 1545 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { 1546 arg->valid |= FATTR_CTIME; 1547 arg->ctime = iattr->ia_ctime.tv_sec; 1548 arg->ctimensec = iattr->ia_ctime.tv_nsec; 1549 } 1550 } 1551 1552 /* 1553 * Prevent concurrent writepages on inode 1554 * 1555 * This is done by adding a negative bias to the inode write counter 1556 * and waiting for all pending writes to finish. 1557 */ 1558 void fuse_set_nowrite(struct inode *inode) 1559 { 1560 struct fuse_inode *fi = get_fuse_inode(inode); 1561 1562 BUG_ON(!inode_is_locked(inode)); 1563 1564 spin_lock(&fi->lock); 1565 BUG_ON(fi->writectr < 0); 1566 fi->writectr += FUSE_NOWRITE; 1567 spin_unlock(&fi->lock); 1568 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); 1569 } 1570 1571 /* 1572 * Allow writepages on inode 1573 * 1574 * Remove the bias from the writecounter and send any queued 1575 * writepages. 1576 */ 1577 static void __fuse_release_nowrite(struct inode *inode) 1578 { 1579 struct fuse_inode *fi = get_fuse_inode(inode); 1580 1581 BUG_ON(fi->writectr != FUSE_NOWRITE); 1582 fi->writectr = 0; 1583 fuse_flush_writepages(inode); 1584 } 1585 1586 void fuse_release_nowrite(struct inode *inode) 1587 { 1588 struct fuse_inode *fi = get_fuse_inode(inode); 1589 1590 spin_lock(&fi->lock); 1591 __fuse_release_nowrite(inode); 1592 spin_unlock(&fi->lock); 1593 } 1594 1595 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, 1596 struct inode *inode, 1597 struct fuse_setattr_in *inarg_p, 1598 struct fuse_attr_out *outarg_p) 1599 { 1600 args->opcode = FUSE_SETATTR; 1601 args->nodeid = get_node_id(inode); 1602 args->in_numargs = 1; 1603 args->in_args[0].size = sizeof(*inarg_p); 1604 args->in_args[0].value = inarg_p; 1605 args->out_numargs = 1; 1606 args->out_args[0].size = sizeof(*outarg_p); 1607 args->out_args[0].value = outarg_p; 1608 } 1609 1610 /* 1611 * Flush inode->i_mtime to the server 1612 */ 1613 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1614 { 1615 struct fuse_mount *fm = get_fuse_mount(inode); 1616 FUSE_ARGS(args); 1617 struct fuse_setattr_in inarg; 1618 struct fuse_attr_out outarg; 1619 1620 memset(&inarg, 0, sizeof(inarg)); 1621 memset(&outarg, 0, sizeof(outarg)); 1622 1623 inarg.valid = FATTR_MTIME; 1624 inarg.mtime = inode->i_mtime.tv_sec; 1625 inarg.mtimensec = inode->i_mtime.tv_nsec; 1626 if (fm->fc->minor >= 23) { 1627 inarg.valid |= FATTR_CTIME; 1628 inarg.ctime = inode->i_ctime.tv_sec; 1629 inarg.ctimensec = inode->i_ctime.tv_nsec; 1630 } 1631 if (ff) { 1632 inarg.valid |= FATTR_FH; 1633 inarg.fh = ff->fh; 1634 } 1635 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 1636 1637 return fuse_simple_request(fm, &args); 1638 } 1639 1640 /* 1641 * Set attributes, and at the same time refresh them. 1642 * 1643 * Truncation is slightly complicated, because the 'truncate' request 1644 * may fail, in which case we don't want to touch the mapping. 1645 * vmtruncate() doesn't allow for this case, so do the rlimit checking 1646 * and the actual truncation by hand. 1647 */ 1648 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, 1649 struct file *file) 1650 { 1651 struct inode *inode = d_inode(dentry); 1652 struct fuse_mount *fm = get_fuse_mount(inode); 1653 struct fuse_conn *fc = fm->fc; 1654 struct fuse_inode *fi = get_fuse_inode(inode); 1655 struct address_space *mapping = inode->i_mapping; 1656 FUSE_ARGS(args); 1657 struct fuse_setattr_in inarg; 1658 struct fuse_attr_out outarg; 1659 bool is_truncate = false; 1660 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); 1661 loff_t oldsize; 1662 int err; 1663 bool trust_local_cmtime = is_wb; 1664 bool fault_blocked = false; 1665 1666 if (!fc->default_permissions) 1667 attr->ia_valid |= ATTR_FORCE; 1668 1669 err = setattr_prepare(&init_user_ns, dentry, attr); 1670 if (err) 1671 return err; 1672 1673 if (attr->ia_valid & ATTR_SIZE) { 1674 if (WARN_ON(!S_ISREG(inode->i_mode))) 1675 return -EIO; 1676 is_truncate = true; 1677 } 1678 1679 if (FUSE_IS_DAX(inode) && is_truncate) { 1680 filemap_invalidate_lock(mapping); 1681 fault_blocked = true; 1682 err = fuse_dax_break_layouts(inode, 0, 0); 1683 if (err) { 1684 filemap_invalidate_unlock(mapping); 1685 return err; 1686 } 1687 } 1688 1689 if (attr->ia_valid & ATTR_OPEN) { 1690 /* This is coming from open(..., ... | O_TRUNC); */ 1691 WARN_ON(!(attr->ia_valid & ATTR_SIZE)); 1692 WARN_ON(attr->ia_size != 0); 1693 if (fc->atomic_o_trunc) { 1694 /* 1695 * No need to send request to userspace, since actual 1696 * truncation has already been done by OPEN. But still 1697 * need to truncate page cache. 1698 */ 1699 i_size_write(inode, 0); 1700 truncate_pagecache(inode, 0); 1701 goto out; 1702 } 1703 file = NULL; 1704 } 1705 1706 /* Flush dirty data/metadata before non-truncate SETATTR */ 1707 if (is_wb && 1708 attr->ia_valid & 1709 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | 1710 ATTR_TIMES_SET)) { 1711 err = write_inode_now(inode, true); 1712 if (err) 1713 return err; 1714 1715 fuse_set_nowrite(inode); 1716 fuse_release_nowrite(inode); 1717 } 1718 1719 if (is_truncate) { 1720 fuse_set_nowrite(inode); 1721 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1722 if (trust_local_cmtime && attr->ia_size != inode->i_size) 1723 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; 1724 } 1725 1726 memset(&inarg, 0, sizeof(inarg)); 1727 memset(&outarg, 0, sizeof(outarg)); 1728 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); 1729 if (file) { 1730 struct fuse_file *ff = file->private_data; 1731 inarg.valid |= FATTR_FH; 1732 inarg.fh = ff->fh; 1733 } 1734 1735 /* Kill suid/sgid for non-directory chown unconditionally */ 1736 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) && 1737 attr->ia_valid & (ATTR_UID | ATTR_GID)) 1738 inarg.valid |= FATTR_KILL_SUIDGID; 1739 1740 if (attr->ia_valid & ATTR_SIZE) { 1741 /* For mandatory locking in truncate */ 1742 inarg.valid |= FATTR_LOCKOWNER; 1743 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1744 1745 /* Kill suid/sgid for truncate only if no CAP_FSETID */ 1746 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID)) 1747 inarg.valid |= FATTR_KILL_SUIDGID; 1748 } 1749 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1750 err = fuse_simple_request(fm, &args); 1751 if (err) { 1752 if (err == -EINTR) 1753 fuse_invalidate_attr(inode); 1754 goto error; 1755 } 1756 1757 if (fuse_invalid_attr(&outarg.attr) || 1758 inode_wrong_type(inode, outarg.attr.mode)) { 1759 fuse_make_bad(inode); 1760 err = -EIO; 1761 goto error; 1762 } 1763 1764 spin_lock(&fi->lock); 1765 /* the kernel maintains i_mtime locally */ 1766 if (trust_local_cmtime) { 1767 if (attr->ia_valid & ATTR_MTIME) 1768 inode->i_mtime = attr->ia_mtime; 1769 if (attr->ia_valid & ATTR_CTIME) 1770 inode->i_ctime = attr->ia_ctime; 1771 /* FIXME: clear I_DIRTY_SYNC? */ 1772 } 1773 1774 fuse_change_attributes_common(inode, &outarg.attr, 1775 attr_timeout(&outarg), 1776 fuse_get_cache_mask(inode)); 1777 oldsize = inode->i_size; 1778 /* see the comment in fuse_change_attributes() */ 1779 if (!is_wb || is_truncate) 1780 i_size_write(inode, outarg.attr.size); 1781 1782 if (is_truncate) { 1783 /* NOTE: this may release/reacquire fi->lock */ 1784 __fuse_release_nowrite(inode); 1785 } 1786 spin_unlock(&fi->lock); 1787 1788 /* 1789 * Only call invalidate_inode_pages2() after removing 1790 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock. 1791 */ 1792 if ((is_truncate || !is_wb) && 1793 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { 1794 truncate_pagecache(inode, outarg.attr.size); 1795 invalidate_inode_pages2(mapping); 1796 } 1797 1798 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1799 out: 1800 if (fault_blocked) 1801 filemap_invalidate_unlock(mapping); 1802 1803 return 0; 1804 1805 error: 1806 if (is_truncate) 1807 fuse_release_nowrite(inode); 1808 1809 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1810 1811 if (fault_blocked) 1812 filemap_invalidate_unlock(mapping); 1813 return err; 1814 } 1815 1816 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry, 1817 struct iattr *attr) 1818 { 1819 struct inode *inode = d_inode(entry); 1820 struct fuse_conn *fc = get_fuse_conn(inode); 1821 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; 1822 int ret; 1823 1824 if (fuse_is_bad(inode)) 1825 return -EIO; 1826 1827 if (!fuse_allow_current_process(get_fuse_conn(inode))) 1828 return -EACCES; 1829 1830 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) { 1831 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | 1832 ATTR_MODE); 1833 1834 /* 1835 * The only sane way to reliably kill suid/sgid is to do it in 1836 * the userspace filesystem 1837 * 1838 * This should be done on write(), truncate() and chown(). 1839 */ 1840 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) { 1841 /* 1842 * ia_mode calculation may have used stale i_mode. 1843 * Refresh and recalculate. 1844 */ 1845 ret = fuse_do_getattr(inode, NULL, file); 1846 if (ret) 1847 return ret; 1848 1849 attr->ia_mode = inode->i_mode; 1850 if (inode->i_mode & S_ISUID) { 1851 attr->ia_valid |= ATTR_MODE; 1852 attr->ia_mode &= ~S_ISUID; 1853 } 1854 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1855 attr->ia_valid |= ATTR_MODE; 1856 attr->ia_mode &= ~S_ISGID; 1857 } 1858 } 1859 } 1860 if (!attr->ia_valid) 1861 return 0; 1862 1863 ret = fuse_do_setattr(entry, attr, file); 1864 if (!ret) { 1865 /* 1866 * If filesystem supports acls it may have updated acl xattrs in 1867 * the filesystem, so forget cached acls for the inode. 1868 */ 1869 if (fc->posix_acl) 1870 forget_all_cached_acls(inode); 1871 1872 /* Directory mode changed, may need to revalidate access */ 1873 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE)) 1874 fuse_invalidate_entry_cache(entry); 1875 } 1876 return ret; 1877 } 1878 1879 static int fuse_getattr(struct user_namespace *mnt_userns, 1880 const struct path *path, struct kstat *stat, 1881 u32 request_mask, unsigned int flags) 1882 { 1883 struct inode *inode = d_inode(path->dentry); 1884 struct fuse_conn *fc = get_fuse_conn(inode); 1885 1886 if (fuse_is_bad(inode)) 1887 return -EIO; 1888 1889 if (!fuse_allow_current_process(fc)) { 1890 if (!request_mask) { 1891 /* 1892 * If user explicitly requested *nothing* then don't 1893 * error out, but return st_dev only. 1894 */ 1895 stat->result_mask = 0; 1896 stat->dev = inode->i_sb->s_dev; 1897 return 0; 1898 } 1899 return -EACCES; 1900 } 1901 1902 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); 1903 } 1904 1905 static const struct inode_operations fuse_dir_inode_operations = { 1906 .lookup = fuse_lookup, 1907 .mkdir = fuse_mkdir, 1908 .symlink = fuse_symlink, 1909 .unlink = fuse_unlink, 1910 .rmdir = fuse_rmdir, 1911 .rename = fuse_rename2, 1912 .link = fuse_link, 1913 .setattr = fuse_setattr, 1914 .create = fuse_create, 1915 .atomic_open = fuse_atomic_open, 1916 .mknod = fuse_mknod, 1917 .permission = fuse_permission, 1918 .getattr = fuse_getattr, 1919 .listxattr = fuse_listxattr, 1920 .get_acl = fuse_get_acl, 1921 .set_acl = fuse_set_acl, 1922 .fileattr_get = fuse_fileattr_get, 1923 .fileattr_set = fuse_fileattr_set, 1924 }; 1925 1926 static const struct file_operations fuse_dir_operations = { 1927 .llseek = generic_file_llseek, 1928 .read = generic_read_dir, 1929 .iterate_shared = fuse_readdir, 1930 .open = fuse_dir_open, 1931 .release = fuse_dir_release, 1932 .fsync = fuse_dir_fsync, 1933 .unlocked_ioctl = fuse_dir_ioctl, 1934 .compat_ioctl = fuse_dir_compat_ioctl, 1935 }; 1936 1937 static const struct inode_operations fuse_common_inode_operations = { 1938 .setattr = fuse_setattr, 1939 .permission = fuse_permission, 1940 .getattr = fuse_getattr, 1941 .listxattr = fuse_listxattr, 1942 .get_acl = fuse_get_acl, 1943 .set_acl = fuse_set_acl, 1944 .fileattr_get = fuse_fileattr_get, 1945 .fileattr_set = fuse_fileattr_set, 1946 }; 1947 1948 static const struct inode_operations fuse_symlink_inode_operations = { 1949 .setattr = fuse_setattr, 1950 .get_link = fuse_get_link, 1951 .getattr = fuse_getattr, 1952 .listxattr = fuse_listxattr, 1953 }; 1954 1955 void fuse_init_common(struct inode *inode) 1956 { 1957 inode->i_op = &fuse_common_inode_operations; 1958 } 1959 1960 void fuse_init_dir(struct inode *inode) 1961 { 1962 struct fuse_inode *fi = get_fuse_inode(inode); 1963 1964 inode->i_op = &fuse_dir_inode_operations; 1965 inode->i_fop = &fuse_dir_operations; 1966 1967 spin_lock_init(&fi->rdc.lock); 1968 fi->rdc.cached = false; 1969 fi->rdc.size = 0; 1970 fi->rdc.pos = 0; 1971 fi->rdc.version = 0; 1972 } 1973 1974 static int fuse_symlink_read_folio(struct file *null, struct folio *folio) 1975 { 1976 int err = fuse_readlink_page(folio->mapping->host, &folio->page); 1977 1978 if (!err) 1979 folio_mark_uptodate(folio); 1980 1981 folio_unlock(folio); 1982 1983 return err; 1984 } 1985 1986 static const struct address_space_operations fuse_symlink_aops = { 1987 .read_folio = fuse_symlink_read_folio, 1988 }; 1989 1990 void fuse_init_symlink(struct inode *inode) 1991 { 1992 inode->i_op = &fuse_symlink_inode_operations; 1993 inode->i_data.a_ops = &fuse_symlink_aops; 1994 inode_nohighmem(inode); 1995 } 1996