1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 FUSE: Filesystem in Userspace 4 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 5 */ 6 7 #include "dev.h" 8 #include "fuse_i.h" 9 10 #include <linux/pagemap.h> 11 #include <linux/file.h> 12 #include <linux/fs_context.h> 13 #include <linux/moduleparam.h> 14 #include <linux/sched.h> 15 #include <linux/namei.h> 16 #include <linux/slab.h> 17 #include <linux/xattr.h> 18 #include <linux/iversion.h> 19 #include <linux/posix_acl.h> 20 #include <linux/security.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 24 static bool __read_mostly allow_sys_admin_access; 25 module_param(allow_sys_admin_access, bool, 0644); 26 MODULE_PARM_DESC(allow_sys_admin_access, 27 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check"); 28 29 struct dentry_bucket { 30 struct rb_root tree; 31 spinlock_t lock; 32 }; 33 34 #define FUSE_HASH_BITS 5 35 #define FUSE_HASH_SIZE (1 << FUSE_HASH_BITS) 36 static struct dentry_bucket dentry_hash[FUSE_HASH_SIZE]; 37 struct delayed_work dentry_tree_work; 38 39 /* Minimum invalidation work queue frequency */ 40 #define FUSE_DENTRY_INVAL_FREQ_MIN 5 41 42 unsigned __read_mostly inval_wq; 43 static int inval_wq_set(const char *val, const struct kernel_param *kp) 44 { 45 unsigned int num; 46 unsigned int old = inval_wq; 47 int ret; 48 49 if (!val) 50 return -EINVAL; 51 52 ret = kstrtouint(val, 0, &num); 53 if (ret) 54 return ret; 55 56 if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0)) 57 return -EINVAL; 58 59 /* This should prevent overflow in secs_to_jiffies() */ 60 if (num > USHRT_MAX) 61 return -EINVAL; 62 63 *((unsigned int *)kp->arg) = num; 64 65 if (num && !old) 66 schedule_delayed_work(&dentry_tree_work, 67 secs_to_jiffies(num)); 68 else if (!num && old) 69 cancel_delayed_work_sync(&dentry_tree_work); 70 71 return 0; 72 } 73 static const struct kernel_param_ops inval_wq_ops = { 74 .set = inval_wq_set, 75 .get = param_get_uint, 76 }; 77 module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644); 78 __MODULE_PARM_TYPE(inval_wq, "uint"); 79 MODULE_PARM_DESC(inval_wq, 80 "Dentries invalidation work queue period in secs (>= " 81 __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ")."); 82 83 static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry) 84 { 85 int i = hash_ptr(dentry, FUSE_HASH_BITS); 86 87 return &dentry_hash[i]; 88 } 89 90 static void fuse_advise_use_readdirplus(struct inode *dir) 91 { 92 struct fuse_inode *fi = get_fuse_inode(dir); 93 94 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); 95 } 96 97 struct fuse_dentry { 98 u64 time; 99 union { 100 struct rcu_head rcu; 101 struct rb_node node; 102 }; 103 struct dentry *dentry; 104 }; 105 106 static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd, 107 struct dentry_bucket *bucket) 108 { 109 if (!RB_EMPTY_NODE(&fd->node)) { 110 rb_erase(&fd->node, &bucket->tree); 111 RB_CLEAR_NODE(&fd->node); 112 } 113 } 114 115 static void fuse_dentry_tree_del_node(struct dentry *dentry) 116 { 117 struct fuse_dentry *fd = dentry->d_fsdata; 118 struct dentry_bucket *bucket = get_dentry_bucket(dentry); 119 120 spin_lock(&bucket->lock); 121 __fuse_dentry_tree_del_node(fd, bucket); 122 spin_unlock(&bucket->lock); 123 } 124 125 static void fuse_dentry_tree_add_node(struct dentry *dentry) 126 { 127 struct fuse_dentry *fd = dentry->d_fsdata; 128 struct dentry_bucket *bucket; 129 struct fuse_dentry *cur; 130 struct rb_node **p, *parent = NULL; 131 132 if (!inval_wq) 133 return; 134 135 bucket = get_dentry_bucket(dentry); 136 137 spin_lock(&bucket->lock); 138 139 __fuse_dentry_tree_del_node(fd, bucket); 140 141 p = &bucket->tree.rb_node; 142 while (*p) { 143 parent = *p; 144 cur = rb_entry(*p, struct fuse_dentry, node); 145 if (fd->time < cur->time) 146 p = &(*p)->rb_left; 147 else 148 p = &(*p)->rb_right; 149 } 150 rb_link_node(&fd->node, parent, p); 151 rb_insert_color(&fd->node, &bucket->tree); 152 spin_unlock(&bucket->lock); 153 } 154 155 /* 156 * work queue which, when enabled, will periodically check for expired dentries 157 * in the dentries tree. 158 */ 159 static void fuse_dentry_tree_work(struct work_struct *work) 160 { 161 LIST_HEAD(dispose); 162 struct fuse_dentry *fd; 163 struct rb_node *node; 164 int i; 165 166 for (i = 0; i < FUSE_HASH_SIZE; i++) { 167 spin_lock(&dentry_hash[i].lock); 168 node = rb_first(&dentry_hash[i].tree); 169 while (node) { 170 fd = rb_entry(node, struct fuse_dentry, node); 171 if (!time_before64(fd->time, get_jiffies_64())) 172 break; 173 174 rb_erase(&fd->node, &dentry_hash[i].tree); 175 RB_CLEAR_NODE(&fd->node); 176 spin_lock(&fd->dentry->d_lock); 177 /* If dentry is still referenced, let next dput release it */ 178 fd->dentry->d_flags |= DCACHE_OP_DELETE; 179 __move_to_shrink_list(fd->dentry, &dispose); 180 spin_unlock(&fd->dentry->d_lock); 181 if (need_resched()) { 182 spin_unlock(&dentry_hash[i].lock); 183 cond_resched(); 184 spin_lock(&dentry_hash[i].lock); 185 } 186 node = rb_first(&dentry_hash[i].tree); 187 } 188 spin_unlock(&dentry_hash[i].lock); 189 } 190 shrink_dentry_list(&dispose); 191 192 if (inval_wq) 193 schedule_delayed_work(&dentry_tree_work, 194 secs_to_jiffies(inval_wq)); 195 } 196 197 void fuse_epoch_work(struct work_struct *work) 198 { 199 struct fuse_conn *fc = container_of(work, struct fuse_conn, 200 epoch_work); 201 struct fuse_mount *fm; 202 struct inode *inode; 203 204 down_read(&fc->killsb); 205 206 inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm); 207 if (inode) { 208 iput(inode); 209 /* Remove all possible active references to cached inodes */ 210 shrink_dcache_sb(fm->sb); 211 } else 212 pr_warn("Failed to get root inode"); 213 214 up_read(&fc->killsb); 215 } 216 217 void fuse_dentry_tree_init(void) 218 { 219 int i; 220 221 for (i = 0; i < FUSE_HASH_SIZE; i++) { 222 spin_lock_init(&dentry_hash[i].lock); 223 dentry_hash[i].tree = RB_ROOT; 224 } 225 INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work); 226 } 227 228 void fuse_dentry_tree_cleanup(void) 229 { 230 int i; 231 232 inval_wq = 0; 233 cancel_delayed_work_sync(&dentry_tree_work); 234 235 for (i = 0; i < FUSE_HASH_SIZE; i++) 236 WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree)); 237 } 238 239 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) 240 { 241 ((struct fuse_dentry *) dentry->d_fsdata)->time = time; 242 } 243 244 static inline u64 fuse_dentry_time(const struct dentry *entry) 245 { 246 return ((struct fuse_dentry *) entry->d_fsdata)->time; 247 } 248 249 static void fuse_dentry_settime(struct dentry *dentry, u64 time) 250 { 251 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); 252 bool delete = !time && fc->delete_stale; 253 /* 254 * Mess with DCACHE_OP_DELETE because dput() will be faster without it. 255 * Don't care about races, either way it's just an optimization 256 */ 257 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || 258 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { 259 spin_lock(&dentry->d_lock); 260 if (!delete) 261 dentry->d_flags &= ~DCACHE_OP_DELETE; 262 else 263 dentry->d_flags |= DCACHE_OP_DELETE; 264 spin_unlock(&dentry->d_lock); 265 } 266 267 __fuse_dentry_settime(dentry, time); 268 fuse_dentry_tree_add_node(dentry); 269 } 270 271 /* 272 * FUSE caches dentries and attributes with separate timeout. The 273 * time in jiffies until the dentry/attributes are valid is stored in 274 * dentry->d_fsdata and fuse_inode->i_time respectively. 275 */ 276 277 /* 278 * Calculate the time in jiffies until a dentry/attributes are valid 279 */ 280 u64 fuse_time_to_jiffies(u64 sec, u32 nsec) 281 { 282 if (sec || nsec) { 283 struct timespec64 ts = { 284 sec, 285 min_t(u32, nsec, NSEC_PER_SEC - 1) 286 }; 287 288 return get_jiffies_64() + timespec64_to_jiffies(&ts); 289 } else 290 return 0; 291 } 292 293 /* 294 * Set dentry and possibly attribute timeouts from the lookup/mk* 295 * replies 296 */ 297 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) 298 { 299 fuse_dentry_settime(entry, 300 fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); 301 } 302 303 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) 304 { 305 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); 306 } 307 308 /* 309 * Mark the attributes as stale, so that at the next call to 310 * ->getattr() they will be fetched from userspace 311 */ 312 void fuse_invalidate_attr(struct inode *inode) 313 { 314 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS); 315 } 316 317 static void fuse_dir_changed(struct inode *dir) 318 { 319 fuse_invalidate_attr_mask(dir, FUSE_STATX_MODDIR); 320 inode_maybe_inc_iversion(dir, false); 321 } 322 323 /* 324 * Mark the attributes as stale due to an atime change. Avoid the invalidate if 325 * atime is not used. 326 */ 327 void fuse_invalidate_atime(struct inode *inode) 328 { 329 if (!IS_RDONLY(inode)) 330 fuse_invalidate_attr_mask(inode, STATX_ATIME); 331 } 332 333 /* 334 * Just mark the entry as stale, so that a next attempt to look it up 335 * will result in a new lookup call to userspace 336 * 337 * This is called when a dentry is about to become negative and the 338 * timeout is unknown (unlink, rmdir, rename and in some cases 339 * lookup) 340 */ 341 void fuse_invalidate_entry_cache(struct dentry *entry) 342 { 343 fuse_dentry_settime(entry, 0); 344 } 345 346 /* 347 * Same as fuse_invalidate_entry_cache(), but also try to remove the 348 * dentry from the hash 349 */ 350 static void fuse_invalidate_entry(struct dentry *entry) 351 { 352 d_invalidate(entry); 353 fuse_invalidate_entry_cache(entry); 354 } 355 356 static void fuse_lookup_init(struct fuse_args *args, u64 nodeid, 357 const struct qstr *name, 358 struct fuse_entry_out *outarg) 359 { 360 memset(outarg, 0, sizeof(struct fuse_entry_out)); 361 args->opcode = FUSE_LOOKUP; 362 args->nodeid = nodeid; 363 args->in_numargs = 3; 364 fuse_set_zero_arg0(args); 365 args->in_args[1].size = name->len; 366 args->in_args[1].value = name->name; 367 args->in_args[2].size = 1; 368 args->in_args[2].value = ""; 369 args->out_numargs = 1; 370 args->out_args[0].size = sizeof(struct fuse_entry_out); 371 args->out_args[0].value = outarg; 372 } 373 374 /* 375 * Check whether the dentry is still valid 376 * 377 * If the entry validity timeout has expired and the dentry is 378 * positive, try to redo the lookup. If the lookup results in a 379 * different inode, then let the VFS invalidate the dentry and redo 380 * the lookup once more. If the lookup results in the same inode, 381 * then refresh the attributes, timeouts and mark the dentry valid. 382 */ 383 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, 384 struct dentry *entry, unsigned int flags) 385 { 386 struct inode *inode; 387 struct fuse_mount *fm; 388 struct fuse_conn *fc; 389 struct fuse_inode *fi; 390 int ret; 391 392 fc = get_fuse_conn_super(dir->i_sb); 393 if (entry->d_time < atomic_read(&fc->epoch)) 394 goto invalid; 395 396 inode = d_inode_rcu(entry); 397 if (inode && fuse_is_bad(inode)) 398 goto invalid; 399 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || 400 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) { 401 struct fuse_entry_out outarg; 402 FUSE_ARGS(args); 403 struct fuse_forget_link *forget; 404 u64 attr_version; 405 406 /* For negative dentries, always do a fresh lookup */ 407 if (!inode) 408 goto invalid; 409 410 ret = -ECHILD; 411 if (flags & LOOKUP_RCU) 412 goto out; 413 414 fm = get_fuse_mount(inode); 415 416 forget = fuse_alloc_forget(); 417 ret = -ENOMEM; 418 if (!forget) 419 goto out; 420 421 attr_version = fuse_get_attr_version(fm->fc); 422 423 fuse_lookup_init(&args, get_node_id(dir), name, &outarg); 424 ret = fuse_simple_request(fm, &args); 425 /* Zero nodeid is same as -ENOENT */ 426 if (!ret && !outarg.nodeid) 427 ret = -ENOENT; 428 if (!ret) { 429 fi = get_fuse_inode(inode); 430 if (outarg.nodeid != get_node_id(inode) || 431 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 432 fuse_chan_queue_forget(fm->fc->chan, forget, 433 outarg.nodeid, 1); 434 goto invalid; 435 } 436 spin_lock(&fi->lock); 437 fi->nlookup++; 438 spin_unlock(&fi->lock); 439 } 440 kfree(forget); 441 if (ret == -ENOMEM || ret == -EINTR) 442 goto out; 443 if (ret || fuse_invalid_attr(&outarg.attr) || 444 fuse_stale_inode(inode, outarg.generation, &outarg.attr)) 445 goto invalid; 446 447 forget_all_cached_acls(inode); 448 fuse_change_attributes(inode, &outarg.attr, NULL, 449 ATTR_TIMEOUT(&outarg), 450 attr_version); 451 fuse_change_entry_timeout(entry, &outarg); 452 } else if (inode) { 453 fi = get_fuse_inode(inode); 454 if (flags & LOOKUP_RCU) { 455 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) 456 return -ECHILD; 457 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { 458 fuse_advise_use_readdirplus(dir); 459 } 460 } 461 ret = 1; 462 out: 463 return ret; 464 465 invalid: 466 ret = 0; 467 goto out; 468 } 469 470 static int fuse_dentry_init(struct dentry *dentry) 471 { 472 struct fuse_dentry *fd; 473 474 fd = kzalloc_obj(struct fuse_dentry, 475 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); 476 if (!fd) 477 return -ENOMEM; 478 479 fd->dentry = dentry; 480 RB_CLEAR_NODE(&fd->node); 481 dentry->d_fsdata = fd; 482 /* 483 * Initialising d_time (epoch) to '0' ensures the dentry is invalid 484 * if compared to fc->epoch, which is initialized to '1'. 485 */ 486 dentry->d_time = 0; 487 488 return 0; 489 } 490 491 static void fuse_dentry_release(struct dentry *dentry) 492 { 493 struct fuse_dentry *fd = dentry->d_fsdata; 494 495 if (!RB_EMPTY_NODE(&fd->node)) 496 fuse_dentry_tree_del_node(dentry); 497 kfree_rcu(fd, rcu); 498 } 499 500 static int fuse_dentry_delete(const struct dentry *dentry) 501 { 502 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 503 } 504 505 /* 506 * Create a fuse_mount object with a new superblock (with path->dentry 507 * as the root), and return that mount so it can be auto-mounted on 508 * @path. 509 */ 510 static struct vfsmount *fuse_dentry_automount(struct path *path) 511 { 512 struct fs_context *fsc; 513 struct vfsmount *mnt; 514 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 515 516 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 517 if (IS_ERR(fsc)) 518 return ERR_CAST(fsc); 519 520 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */ 521 fsc->fs_private = mp_fi; 522 523 /* Create the submount */ 524 mnt = fc_mount(fsc); 525 put_fs_context(fsc); 526 return mnt; 527 } 528 529 const struct dentry_operations fuse_dentry_operations = { 530 .d_revalidate = fuse_dentry_revalidate, 531 .d_delete = fuse_dentry_delete, 532 .d_init = fuse_dentry_init, 533 .d_release = fuse_dentry_release, 534 .d_automount = fuse_dentry_automount, 535 }; 536 537 int fuse_valid_type(int m) 538 { 539 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || 540 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); 541 } 542 543 static bool fuse_valid_size(u64 size) 544 { 545 return size <= LLONG_MAX; 546 } 547 548 bool fuse_invalid_attr(struct fuse_attr *attr) 549 { 550 return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size); 551 } 552 553 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 554 struct fuse_entry_out *outarg, struct inode **inode) 555 { 556 struct fuse_mount *fm = get_fuse_mount_super(sb); 557 FUSE_ARGS(args); 558 struct fuse_forget_link *forget; 559 u64 attr_version, evict_ctr; 560 int err; 561 562 *inode = NULL; 563 err = -ENAMETOOLONG; 564 if (name->len > fm->fc->name_max) 565 goto out; 566 567 568 forget = fuse_alloc_forget(); 569 err = -ENOMEM; 570 if (!forget) 571 goto out; 572 573 attr_version = fuse_get_attr_version(fm->fc); 574 evict_ctr = fuse_get_evict_ctr(fm->fc); 575 576 fuse_lookup_init(&args, nodeid, name, outarg); 577 err = fuse_simple_request(fm, &args); 578 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 579 if (err || !outarg->nodeid) 580 goto out_put_forget; 581 582 err = -EIO; 583 if (fuse_invalid_attr(&outarg->attr)) 584 goto out_put_forget; 585 if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { 586 pr_warn_once("root generation should be zero\n"); 587 outarg->generation = 0; 588 } 589 590 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, 591 &outarg->attr, ATTR_TIMEOUT(outarg), 592 attr_version, evict_ctr); 593 err = -ENOMEM; 594 if (!*inode) { 595 fuse_chan_queue_forget(fm->fc->chan, forget, outarg->nodeid, 1); 596 goto out; 597 } 598 err = 0; 599 600 out_put_forget: 601 kfree(forget); 602 out: 603 return err; 604 } 605 606 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 607 unsigned int flags) 608 { 609 struct fuse_entry_out outarg; 610 struct fuse_conn *fc; 611 struct inode *inode; 612 struct dentry *newent; 613 int err, epoch; 614 bool outarg_valid = true; 615 bool locked; 616 617 if (fuse_is_bad(dir)) 618 return ERR_PTR(-EIO); 619 620 fc = get_fuse_conn_super(dir->i_sb); 621 epoch = atomic_read(&fc->epoch); 622 623 locked = fuse_lock_inode(dir); 624 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, 625 &outarg, &inode); 626 fuse_unlock_inode(dir, locked); 627 if (err == -ENOENT) { 628 outarg_valid = false; 629 err = 0; 630 } 631 if (err) 632 goto out_err; 633 634 err = -EIO; 635 if (inode && get_node_id(inode) == FUSE_ROOT_ID) 636 goto out_iput; 637 638 newent = d_splice_alias(inode, entry); 639 err = PTR_ERR(newent); 640 if (IS_ERR(newent)) 641 goto out_err; 642 643 entry = newent ? newent : entry; 644 entry->d_time = epoch; 645 if (outarg_valid) 646 fuse_change_entry_timeout(entry, &outarg); 647 else 648 fuse_invalidate_entry_cache(entry); 649 650 if (inode) 651 fuse_advise_use_readdirplus(dir); 652 return newent; 653 654 out_iput: 655 iput(inode); 656 out_err: 657 return ERR_PTR(err); 658 } 659 660 static int get_security_context(struct dentry *entry, umode_t mode, 661 struct fuse_in_arg *ext) 662 { 663 struct fuse_secctx *fctx; 664 struct fuse_secctx_header *header; 665 struct lsm_context lsmctx = { }; 666 void *ptr; 667 u32 total_len = sizeof(*header); 668 int err, nr_ctx = 0; 669 const char *name = NULL; 670 size_t namesize; 671 672 err = security_dentry_init_security(entry, mode, &entry->d_name, 673 &name, &lsmctx); 674 675 /* If no LSM is supporting this security hook ignore error */ 676 if (err && err != -EOPNOTSUPP) 677 goto out_err; 678 679 if (lsmctx.len) { 680 nr_ctx = 1; 681 namesize = strlen(name) + 1; 682 err = -EIO; 683 if (WARN_ON(namesize > XATTR_NAME_MAX + 1 || 684 lsmctx.len > S32_MAX)) 685 goto out_err; 686 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize + 687 lsmctx.len); 688 } 689 690 err = -ENOMEM; 691 header = ptr = kzalloc(total_len, GFP_KERNEL); 692 if (!ptr) 693 goto out_err; 694 695 header->nr_secctx = nr_ctx; 696 header->size = total_len; 697 ptr += sizeof(*header); 698 if (nr_ctx) { 699 fctx = ptr; 700 fctx->size = lsmctx.len; 701 ptr += sizeof(*fctx); 702 703 strscpy(ptr, name, namesize); 704 ptr += namesize; 705 706 memcpy(ptr, lsmctx.context, lsmctx.len); 707 } 708 ext->size = total_len; 709 ext->value = header; 710 err = 0; 711 out_err: 712 if (nr_ctx) 713 security_release_secctx(&lsmctx); 714 return err; 715 } 716 717 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes) 718 { 719 void *p; 720 u32 newlen = buf->size + bytes; 721 722 p = krealloc(buf->value, newlen, GFP_KERNEL); 723 if (!p) { 724 kfree(buf->value); 725 buf->size = 0; 726 buf->value = NULL; 727 return NULL; 728 } 729 730 memset(p + buf->size, 0, bytes); 731 buf->value = p; 732 buf->size = newlen; 733 734 return p + newlen - bytes; 735 } 736 737 static u32 fuse_ext_size(size_t size) 738 { 739 return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size); 740 } 741 742 /* 743 * This adds just a single supplementary group that matches the parent's group. 744 */ 745 static int get_create_supp_group(struct mnt_idmap *idmap, 746 struct inode *dir, 747 struct fuse_in_arg *ext) 748 { 749 struct fuse_conn *fc = get_fuse_conn(dir); 750 struct fuse_ext_header *xh; 751 struct fuse_supp_groups *sg; 752 kgid_t kgid = dir->i_gid; 753 vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid); 754 gid_t parent_gid = from_kgid(fc->user_ns, kgid); 755 756 u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0])); 757 758 if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) || 759 !vfsgid_in_group_p(vfsgid)) 760 return 0; 761 762 xh = extend_arg(ext, sg_len); 763 if (!xh) 764 return -ENOMEM; 765 766 xh->size = sg_len; 767 xh->type = FUSE_EXT_GROUPS; 768 769 sg = (struct fuse_supp_groups *) &xh[1]; 770 sg->nr_groups = 1; 771 sg->groups[0] = parent_gid; 772 773 return 0; 774 } 775 776 static int get_create_ext(struct mnt_idmap *idmap, 777 struct fuse_args *args, 778 struct inode *dir, struct dentry *dentry, 779 umode_t mode) 780 { 781 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); 782 struct fuse_in_arg ext = { .size = 0, .value = NULL }; 783 int err = 0; 784 785 if (fc->init_security) 786 err = get_security_context(dentry, mode, &ext); 787 if (!err && fc->create_supp_group) 788 err = get_create_supp_group(idmap, dir, &ext); 789 790 if (!err && ext.size) { 791 WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); 792 args->is_ext = true; 793 args->ext_idx = args->in_numargs++; 794 args->in_args[args->ext_idx] = ext; 795 } else { 796 kfree(ext.value); 797 } 798 799 return err; 800 } 801 802 static void free_ext_value(struct fuse_args *args) 803 { 804 if (args->is_ext) 805 kfree(args->in_args[args->ext_idx].value); 806 } 807 808 /* 809 * Atomic create+open operation 810 * 811 * If the filesystem doesn't support this, then fall back to separate 812 * 'mknod' + 'open' requests. 813 */ 814 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, 815 struct dentry *entry, struct file *file, 816 unsigned int flags, umode_t mode, u32 opcode) 817 { 818 struct inode *inode; 819 struct fuse_mount *fm = get_fuse_mount(dir); 820 FUSE_ARGS(args); 821 struct fuse_forget_link *forget; 822 struct fuse_create_in inarg; 823 struct fuse_open_out *outopenp; 824 struct fuse_entry_out outentry; 825 struct fuse_inode *fi; 826 struct fuse_file *ff; 827 int epoch, err; 828 bool trunc = flags & O_TRUNC; 829 830 /* Userspace expects S_IFREG in create mode */ 831 BUG_ON((mode & S_IFMT) != S_IFREG); 832 833 epoch = atomic_read(&fm->fc->epoch); 834 forget = fuse_alloc_forget(); 835 err = -ENOMEM; 836 if (!forget) 837 goto out_err; 838 839 ff = fuse_file_alloc(fm, true); 840 if (!ff) 841 goto out_put_forget_req; 842 843 if (!fm->fc->dont_mask) 844 mode &= ~current_umask(); 845 846 flags &= ~O_NOCTTY; 847 memset(&inarg, 0, sizeof(inarg)); 848 memset(&outentry, 0, sizeof(outentry)); 849 inarg.flags = flags; 850 inarg.mode = mode; 851 inarg.umask = current_umask(); 852 853 if (fm->fc->handle_killpriv_v2 && trunc && 854 !(flags & O_EXCL) && !capable(CAP_FSETID)) { 855 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; 856 } 857 858 args.opcode = opcode; 859 args.nodeid = get_node_id(dir); 860 args.in_numargs = 2; 861 args.in_args[0].size = sizeof(inarg); 862 args.in_args[0].value = &inarg; 863 args.in_args[1].size = entry->d_name.len + 1; 864 args.in_args[1].value = entry->d_name.name; 865 args.out_numargs = 2; 866 args.out_args[0].size = sizeof(outentry); 867 args.out_args[0].value = &outentry; 868 /* Store outarg for fuse_finish_open() */ 869 outopenp = &ff->args->open_outarg; 870 args.out_args[1].size = sizeof(*outopenp); 871 args.out_args[1].value = outopenp; 872 873 err = get_create_ext(idmap, &args, dir, entry, mode); 874 if (err) 875 goto out_free_ff; 876 877 err = fuse_simple_idmap_request(idmap, fm, &args); 878 free_ext_value(&args); 879 if (err) 880 goto out_free_ff; 881 882 err = -EIO; 883 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || 884 fuse_invalid_attr(&outentry.attr)) 885 goto out_free_ff; 886 887 ff->fh = outopenp->fh; 888 ff->nodeid = outentry.nodeid; 889 ff->open_flags = outopenp->open_flags; 890 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 891 &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); 892 if (!inode) { 893 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 894 fuse_sync_release(NULL, ff, flags); 895 fuse_chan_queue_forget(fm->fc->chan, forget, outentry.nodeid, 1); 896 err = -ENOMEM; 897 goto out_err; 898 } 899 kfree(forget); 900 d_instantiate(entry, inode); 901 entry->d_time = epoch; 902 fuse_change_entry_timeout(entry, &outentry); 903 fuse_dir_changed(dir); 904 err = generic_file_open(inode, file); 905 if (!err) { 906 file->private_data = ff; 907 err = finish_open(file, entry, fuse_finish_open); 908 } 909 if (err) { 910 fi = get_fuse_inode(inode); 911 fuse_sync_release(fi, ff, flags); 912 } else { 913 if (fm->fc->atomic_o_trunc && trunc) 914 truncate_pagecache(inode, 0); 915 else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 916 invalidate_inode_pages2(inode->i_mapping); 917 } 918 return err; 919 920 out_free_ff: 921 fuse_file_free(ff); 922 out_put_forget_req: 923 kfree(forget); 924 out_err: 925 return err; 926 } 927 928 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, 929 umode_t, dev_t); 930 static int fuse_atomic_open(struct inode *dir, struct dentry *entry, 931 struct file *file, unsigned flags, 932 umode_t mode) 933 { 934 int err; 935 struct mnt_idmap *idmap = file_mnt_idmap(file); 936 struct fuse_conn *fc = get_fuse_conn(dir); 937 938 if (fuse_is_bad(dir)) 939 return -EIO; 940 941 if (d_in_lookup(entry)) { 942 struct dentry *res = fuse_lookup(dir, entry, 0); 943 if (res || d_really_is_positive(entry)) 944 return finish_no_open(file, res); 945 } 946 947 if (!(flags & O_CREAT)) 948 return finish_no_open(file, NULL); 949 950 /* Only creates */ 951 file->f_mode |= FMODE_CREATED; 952 953 if (fc->no_create) 954 goto mknod; 955 956 err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE); 957 if (err == -ENOSYS) { 958 fc->no_create = 1; 959 goto mknod; 960 } else if (err == -EEXIST) 961 fuse_invalidate_entry(entry); 962 return err; 963 964 mknod: 965 err = fuse_mknod(idmap, dir, entry, mode, 0); 966 if (err) 967 return err; 968 return finish_no_open(file, NULL); 969 } 970 971 /* 972 * Code shared between mknod, mkdir, symlink and link 973 */ 974 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm, 975 struct fuse_args *args, struct inode *dir, 976 struct dentry *entry, umode_t mode) 977 { 978 struct fuse_entry_out outarg; 979 struct inode *inode; 980 struct dentry *d; 981 struct fuse_forget_link *forget; 982 int epoch, err; 983 984 if (fuse_is_bad(dir)) 985 return ERR_PTR(-EIO); 986 987 epoch = atomic_read(&fm->fc->epoch); 988 989 forget = fuse_alloc_forget(); 990 if (!forget) 991 return ERR_PTR(-ENOMEM); 992 993 memset(&outarg, 0, sizeof(outarg)); 994 args->nodeid = get_node_id(dir); 995 args->out_numargs = 1; 996 args->out_args[0].size = sizeof(outarg); 997 args->out_args[0].value = &outarg; 998 999 if (args->opcode != FUSE_LINK) { 1000 err = get_create_ext(idmap, args, dir, entry, mode); 1001 if (err) 1002 goto out_put_forget_req; 1003 } 1004 1005 err = fuse_simple_idmap_request(idmap, fm, args); 1006 free_ext_value(args); 1007 if (err) 1008 goto out_put_forget_req; 1009 1010 err = -EIO; 1011 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) 1012 goto out_put_forget_req; 1013 1014 if ((outarg.attr.mode ^ mode) & S_IFMT) 1015 goto out_put_forget_req; 1016 1017 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 1018 &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); 1019 if (!inode) { 1020 fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1); 1021 return ERR_PTR(-ENOMEM); 1022 } 1023 kfree(forget); 1024 1025 d_drop(entry); 1026 d = d_splice_alias(inode, entry); 1027 if (IS_ERR(d)) 1028 return d; 1029 1030 if (d) { 1031 d->d_time = epoch; 1032 fuse_change_entry_timeout(d, &outarg); 1033 } else { 1034 entry->d_time = epoch; 1035 fuse_change_entry_timeout(entry, &outarg); 1036 } 1037 fuse_dir_changed(dir); 1038 return d; 1039 1040 out_put_forget_req: 1041 if (err == -EEXIST) 1042 fuse_invalidate_entry(entry); 1043 kfree(forget); 1044 return ERR_PTR(err); 1045 } 1046 1047 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm, 1048 struct fuse_args *args, struct inode *dir, 1049 struct dentry *entry, umode_t mode) 1050 { 1051 /* 1052 * Note that when creating anything other than a directory we 1053 * can be sure create_new_entry() will NOT return an alternate 1054 * dentry as d_splice_alias() only returns an alternate dentry 1055 * for directories. So we don't need to check for that case 1056 * when passing back the result. 1057 */ 1058 WARN_ON_ONCE(S_ISDIR(mode)); 1059 1060 return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode)); 1061 } 1062 1063 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir, 1064 struct dentry *entry, umode_t mode, dev_t rdev) 1065 { 1066 struct fuse_mknod_in inarg; 1067 struct fuse_mount *fm = get_fuse_mount(dir); 1068 FUSE_ARGS(args); 1069 1070 if (!fm->fc->dont_mask) 1071 mode &= ~current_umask(); 1072 1073 memset(&inarg, 0, sizeof(inarg)); 1074 inarg.mode = mode; 1075 inarg.rdev = new_encode_dev(rdev); 1076 inarg.umask = current_umask(); 1077 args.opcode = FUSE_MKNOD; 1078 args.in_numargs = 2; 1079 args.in_args[0].size = sizeof(inarg); 1080 args.in_args[0].value = &inarg; 1081 args.in_args[1].size = entry->d_name.len + 1; 1082 args.in_args[1].value = entry->d_name.name; 1083 return create_new_nondir(idmap, fm, &args, dir, entry, mode); 1084 } 1085 1086 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, 1087 struct dentry *entry, umode_t mode, bool excl) 1088 { 1089 return fuse_mknod(idmap, dir, entry, mode, 0); 1090 } 1091 1092 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, 1093 struct file *file, umode_t mode) 1094 { 1095 struct fuse_conn *fc = get_fuse_conn(dir); 1096 int err; 1097 1098 if (fc->no_tmpfile) 1099 return -EOPNOTSUPP; 1100 1101 err = fuse_create_open(idmap, dir, file->f_path.dentry, file, 1102 file->f_flags, mode, FUSE_TMPFILE); 1103 if (err == -ENOSYS) { 1104 fc->no_tmpfile = 1; 1105 err = -EOPNOTSUPP; 1106 } 1107 return err; 1108 } 1109 1110 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir, 1111 struct dentry *entry, umode_t mode) 1112 { 1113 struct fuse_mkdir_in inarg; 1114 struct fuse_mount *fm = get_fuse_mount(dir); 1115 FUSE_ARGS(args); 1116 1117 if (!fm->fc->dont_mask) 1118 mode &= ~current_umask(); 1119 1120 memset(&inarg, 0, sizeof(inarg)); 1121 inarg.mode = mode; 1122 inarg.umask = current_umask(); 1123 args.opcode = FUSE_MKDIR; 1124 args.in_numargs = 2; 1125 args.in_args[0].size = sizeof(inarg); 1126 args.in_args[0].value = &inarg; 1127 args.in_args[1].size = entry->d_name.len + 1; 1128 args.in_args[1].value = entry->d_name.name; 1129 return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR); 1130 } 1131 1132 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, 1133 struct dentry *entry, const char *link) 1134 { 1135 struct fuse_mount *fm = get_fuse_mount(dir); 1136 unsigned len = strlen(link) + 1; 1137 FUSE_ARGS(args); 1138 1139 args.opcode = FUSE_SYMLINK; 1140 args.in_numargs = 3; 1141 fuse_set_zero_arg0(&args); 1142 args.in_args[1].size = entry->d_name.len + 1; 1143 args.in_args[1].value = entry->d_name.name; 1144 args.in_args[2].size = len; 1145 args.in_args[2].value = link; 1146 return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK); 1147 } 1148 1149 void fuse_flush_time_update(struct inode *inode) 1150 { 1151 int err = sync_inode_metadata(inode, 1); 1152 1153 mapping_set_error(inode->i_mapping, err); 1154 } 1155 1156 static void fuse_update_ctime_in_cache(struct inode *inode) 1157 { 1158 if (!IS_NOCMTIME(inode)) { 1159 inode_set_ctime_current(inode); 1160 mark_inode_dirty_sync(inode); 1161 fuse_flush_time_update(inode); 1162 } 1163 } 1164 1165 void fuse_update_ctime(struct inode *inode) 1166 { 1167 fuse_invalidate_attr_mask(inode, STATX_CTIME); 1168 fuse_update_ctime_in_cache(inode); 1169 } 1170 1171 static void fuse_entry_unlinked(struct dentry *entry) 1172 { 1173 struct inode *inode = d_inode(entry); 1174 struct fuse_conn *fc = get_fuse_conn(inode); 1175 struct fuse_inode *fi = get_fuse_inode(inode); 1176 1177 spin_lock(&fi->lock); 1178 fi->attr_version = atomic64_inc_return(&fc->attr_version); 1179 /* 1180 * If i_nlink == 0 then unlink doesn't make sense, yet this can 1181 * happen if userspace filesystem is careless. It would be 1182 * difficult to enforce correct nlink usage so just ignore this 1183 * condition here 1184 */ 1185 if (S_ISDIR(inode->i_mode)) 1186 clear_nlink(inode); 1187 else if (inode->i_nlink > 0) 1188 drop_nlink(inode); 1189 spin_unlock(&fi->lock); 1190 fuse_invalidate_entry_cache(entry); 1191 fuse_update_ctime(inode); 1192 } 1193 1194 static int fuse_unlink(struct inode *dir, struct dentry *entry) 1195 { 1196 int err; 1197 struct fuse_mount *fm = get_fuse_mount(dir); 1198 FUSE_ARGS(args); 1199 1200 if (fuse_is_bad(dir)) 1201 return -EIO; 1202 1203 args.opcode = FUSE_UNLINK; 1204 args.nodeid = get_node_id(dir); 1205 args.in_numargs = 2; 1206 fuse_set_zero_arg0(&args); 1207 args.in_args[1].size = entry->d_name.len + 1; 1208 args.in_args[1].value = entry->d_name.name; 1209 err = fuse_simple_request(fm, &args); 1210 if (!err) { 1211 fuse_dir_changed(dir); 1212 fuse_entry_unlinked(entry); 1213 } else if (err == -EINTR || err == -ENOENT) 1214 fuse_invalidate_entry(entry); 1215 return err; 1216 } 1217 1218 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 1219 { 1220 int err; 1221 struct fuse_mount *fm = get_fuse_mount(dir); 1222 FUSE_ARGS(args); 1223 1224 if (fuse_is_bad(dir)) 1225 return -EIO; 1226 1227 args.opcode = FUSE_RMDIR; 1228 args.nodeid = get_node_id(dir); 1229 args.in_numargs = 2; 1230 fuse_set_zero_arg0(&args); 1231 args.in_args[1].size = entry->d_name.len + 1; 1232 args.in_args[1].value = entry->d_name.name; 1233 err = fuse_simple_request(fm, &args); 1234 if (!err) { 1235 fuse_dir_changed(dir); 1236 fuse_entry_unlinked(entry); 1237 } else if (err == -EINTR || err == -ENOENT) 1238 fuse_invalidate_entry(entry); 1239 return err; 1240 } 1241 1242 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent, 1243 struct inode *newdir, struct dentry *newent, 1244 unsigned int flags, int opcode, size_t argsize) 1245 { 1246 int err; 1247 struct fuse_rename2_in inarg; 1248 struct fuse_mount *fm = get_fuse_mount(olddir); 1249 FUSE_ARGS(args); 1250 1251 memset(&inarg, 0, argsize); 1252 inarg.newdir = get_node_id(newdir); 1253 inarg.flags = flags; 1254 args.opcode = opcode; 1255 args.nodeid = get_node_id(olddir); 1256 args.in_numargs = 3; 1257 args.in_args[0].size = argsize; 1258 args.in_args[0].value = &inarg; 1259 args.in_args[1].size = oldent->d_name.len + 1; 1260 args.in_args[1].value = oldent->d_name.name; 1261 args.in_args[2].size = newent->d_name.len + 1; 1262 args.in_args[2].value = newent->d_name.name; 1263 err = fuse_simple_idmap_request(idmap, fm, &args); 1264 if (!err) { 1265 /* ctime changes */ 1266 fuse_update_ctime(d_inode(oldent)); 1267 1268 if (flags & RENAME_EXCHANGE) 1269 fuse_update_ctime(d_inode(newent)); 1270 1271 fuse_dir_changed(olddir); 1272 if (olddir != newdir) 1273 fuse_dir_changed(newdir); 1274 1275 /* newent will end up negative */ 1276 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) 1277 fuse_entry_unlinked(newent); 1278 } else if (err == -EINTR || err == -ENOENT) { 1279 /* If request was interrupted, DEITY only knows if the 1280 rename actually took place. If the invalidation 1281 fails (e.g. some process has CWD under the renamed 1282 directory), then there can be inconsistency between 1283 the dcache and the real filesystem. Tough luck. */ 1284 fuse_invalidate_entry(oldent); 1285 if (d_really_is_positive(newent)) 1286 fuse_invalidate_entry(newent); 1287 } 1288 1289 return err; 1290 } 1291 1292 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir, 1293 struct dentry *oldent, struct inode *newdir, 1294 struct dentry *newent, unsigned int flags) 1295 { 1296 struct fuse_conn *fc = get_fuse_conn(olddir); 1297 int err; 1298 1299 if (fuse_is_bad(olddir)) 1300 return -EIO; 1301 1302 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 1303 return -EINVAL; 1304 1305 if (flags) { 1306 if (fc->no_rename2 || fc->minor < 23) 1307 return -EINVAL; 1308 1309 err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap, 1310 olddir, oldent, newdir, newent, flags, 1311 FUSE_RENAME2, 1312 sizeof(struct fuse_rename2_in)); 1313 if (err == -ENOSYS) { 1314 fc->no_rename2 = 1; 1315 err = -EINVAL; 1316 } 1317 } else { 1318 err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0, 1319 FUSE_RENAME, 1320 sizeof(struct fuse_rename_in)); 1321 } 1322 1323 return err; 1324 } 1325 1326 static int fuse_link(struct dentry *entry, struct inode *newdir, 1327 struct dentry *newent) 1328 { 1329 int err; 1330 struct fuse_link_in inarg; 1331 struct inode *inode = d_inode(entry); 1332 struct fuse_mount *fm = get_fuse_mount(inode); 1333 FUSE_ARGS(args); 1334 1335 if (fm->fc->no_link) 1336 goto out; 1337 1338 memset(&inarg, 0, sizeof(inarg)); 1339 inarg.oldnodeid = get_node_id(inode); 1340 args.opcode = FUSE_LINK; 1341 args.in_numargs = 2; 1342 args.in_args[0].size = sizeof(inarg); 1343 args.in_args[0].value = &inarg; 1344 args.in_args[1].size = newent->d_name.len + 1; 1345 args.in_args[1].value = newent->d_name.name; 1346 err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode); 1347 if (!err) 1348 fuse_update_ctime_in_cache(inode); 1349 else if (err == -EINTR) 1350 fuse_invalidate_attr(inode); 1351 1352 if (err == -ENOSYS) 1353 fm->fc->no_link = 1; 1354 out: 1355 if (fm->fc->no_link) 1356 return -EPERM; 1357 1358 return err; 1359 } 1360 1361 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, 1362 struct fuse_attr *attr, struct kstat *stat) 1363 { 1364 unsigned int blkbits; 1365 struct fuse_conn *fc = get_fuse_conn(inode); 1366 vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns, 1367 make_kuid(fc->user_ns, attr->uid)); 1368 vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, 1369 make_kgid(fc->user_ns, attr->gid)); 1370 1371 stat->dev = inode->i_sb->s_dev; 1372 stat->ino = attr->ino; 1373 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 1374 stat->nlink = attr->nlink; 1375 stat->uid = vfsuid_into_kuid(vfsuid); 1376 stat->gid = vfsgid_into_kgid(vfsgid); 1377 stat->rdev = inode->i_rdev; 1378 stat->atime.tv_sec = attr->atime; 1379 stat->atime.tv_nsec = attr->atimensec; 1380 stat->mtime.tv_sec = attr->mtime; 1381 stat->mtime.tv_nsec = attr->mtimensec; 1382 stat->ctime.tv_sec = attr->ctime; 1383 stat->ctime.tv_nsec = attr->ctimensec; 1384 stat->size = attr->size; 1385 stat->blocks = attr->blocks; 1386 1387 if (attr->blksize != 0) 1388 blkbits = ilog2(attr->blksize); 1389 else 1390 blkbits = inode->i_sb->s_blocksize_bits; 1391 1392 stat->blksize = 1 << blkbits; 1393 } 1394 1395 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr) 1396 { 1397 memset(attr, 0, sizeof(*attr)); 1398 attr->ino = sx->ino; 1399 attr->size = sx->size; 1400 attr->blocks = sx->blocks; 1401 attr->atime = sx->atime.tv_sec; 1402 attr->mtime = sx->mtime.tv_sec; 1403 attr->ctime = sx->ctime.tv_sec; 1404 attr->atimensec = sx->atime.tv_nsec; 1405 attr->mtimensec = sx->mtime.tv_nsec; 1406 attr->ctimensec = sx->ctime.tv_nsec; 1407 attr->mode = sx->mode; 1408 attr->nlink = sx->nlink; 1409 attr->uid = sx->uid; 1410 attr->gid = sx->gid; 1411 attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor)); 1412 attr->blksize = sx->blksize; 1413 } 1414 1415 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode, 1416 struct file *file, struct kstat *stat) 1417 { 1418 int err; 1419 struct fuse_attr attr; 1420 struct fuse_statx *sx; 1421 struct fuse_statx_in inarg; 1422 struct fuse_statx_out outarg; 1423 struct fuse_mount *fm = get_fuse_mount(inode); 1424 u64 attr_version = fuse_get_attr_version(fm->fc); 1425 FUSE_ARGS(args); 1426 1427 memset(&inarg, 0, sizeof(inarg)); 1428 memset(&outarg, 0, sizeof(outarg)); 1429 /* Directories have separate file-handle space */ 1430 if (file && S_ISREG(inode->i_mode)) { 1431 struct fuse_file *ff = file->private_data; 1432 1433 inarg.getattr_flags |= FUSE_GETATTR_FH; 1434 inarg.fh = ff->fh; 1435 } 1436 /* For now leave sync hints as the default, request all stats. */ 1437 inarg.sx_flags = 0; 1438 inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME; 1439 args.opcode = FUSE_STATX; 1440 args.nodeid = get_node_id(inode); 1441 args.in_numargs = 1; 1442 args.in_args[0].size = sizeof(inarg); 1443 args.in_args[0].value = &inarg; 1444 args.out_numargs = 1; 1445 args.out_args[0].size = sizeof(outarg); 1446 args.out_args[0].value = &outarg; 1447 err = fuse_simple_request(fm, &args); 1448 if (err) 1449 return err; 1450 1451 sx = &outarg.stat; 1452 if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) || 1453 ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) || 1454 inode_wrong_type(inode, sx->mode)))) { 1455 fuse_make_bad(inode); 1456 return -EIO; 1457 } 1458 1459 fuse_statx_to_attr(&outarg.stat, &attr); 1460 if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) { 1461 fuse_change_attributes(inode, &attr, &outarg.stat, 1462 ATTR_TIMEOUT(&outarg), attr_version); 1463 } 1464 1465 if (stat) { 1466 stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); 1467 stat->btime.tv_sec = sx->btime.tv_sec; 1468 stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); 1469 fuse_fillattr(idmap, inode, &attr, stat); 1470 stat->result_mask |= STATX_TYPE; 1471 } 1472 1473 return 0; 1474 } 1475 1476 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode, 1477 struct kstat *stat, struct file *file) 1478 { 1479 int err; 1480 struct fuse_getattr_in inarg; 1481 struct fuse_attr_out outarg; 1482 struct fuse_mount *fm = get_fuse_mount(inode); 1483 FUSE_ARGS(args); 1484 u64 attr_version; 1485 1486 attr_version = fuse_get_attr_version(fm->fc); 1487 1488 memset(&inarg, 0, sizeof(inarg)); 1489 memset(&outarg, 0, sizeof(outarg)); 1490 /* Directories have separate file-handle space */ 1491 if (file && S_ISREG(inode->i_mode)) { 1492 struct fuse_file *ff = file->private_data; 1493 1494 inarg.getattr_flags |= FUSE_GETATTR_FH; 1495 inarg.fh = ff->fh; 1496 } 1497 args.opcode = FUSE_GETATTR; 1498 args.nodeid = get_node_id(inode); 1499 args.in_numargs = 1; 1500 args.in_args[0].size = sizeof(inarg); 1501 args.in_args[0].value = &inarg; 1502 args.out_numargs = 1; 1503 args.out_args[0].size = sizeof(outarg); 1504 args.out_args[0].value = &outarg; 1505 err = fuse_simple_request(fm, &args); 1506 if (!err) { 1507 if (fuse_invalid_attr(&outarg.attr) || 1508 inode_wrong_type(inode, outarg.attr.mode)) { 1509 fuse_make_bad(inode); 1510 err = -EIO; 1511 } else { 1512 fuse_change_attributes(inode, &outarg.attr, NULL, 1513 ATTR_TIMEOUT(&outarg), 1514 attr_version); 1515 if (stat) 1516 fuse_fillattr(idmap, inode, &outarg.attr, stat); 1517 } 1518 } 1519 return err; 1520 } 1521 1522 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode, 1523 struct file *file, struct kstat *stat, 1524 u32 request_mask, unsigned int flags) 1525 { 1526 struct fuse_inode *fi = get_fuse_inode(inode); 1527 struct fuse_conn *fc = get_fuse_conn(inode); 1528 int err = 0; 1529 bool sync; 1530 u32 inval_mask = READ_ONCE(fi->inval_mask); 1531 u32 cache_mask = fuse_get_cache_mask(inode); 1532 1533 1534 /* FUSE only supports basic stats and possibly btime */ 1535 request_mask &= STATX_BASIC_STATS | STATX_BTIME; 1536 retry: 1537 if (fc->no_statx) 1538 request_mask &= STATX_BASIC_STATS; 1539 1540 if (!request_mask) 1541 sync = false; 1542 else if (flags & AT_STATX_FORCE_SYNC) 1543 sync = true; 1544 else if (flags & AT_STATX_DONT_SYNC) 1545 sync = false; 1546 else if (request_mask & inval_mask & ~cache_mask) 1547 sync = true; 1548 else 1549 sync = time_before64(fi->i_time, get_jiffies_64()); 1550 1551 if (sync) { 1552 forget_all_cached_acls(inode); 1553 /* Try statx if BTIME is requested */ 1554 if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { 1555 err = fuse_do_statx(idmap, inode, file, stat); 1556 if (err == -ENOSYS) { 1557 fc->no_statx = 1; 1558 err = 0; 1559 goto retry; 1560 } 1561 } else { 1562 err = fuse_do_getattr(idmap, inode, stat, file); 1563 } 1564 } else if (stat) { 1565 generic_fillattr(idmap, request_mask, inode, stat); 1566 stat->mode = fi->orig_i_mode; 1567 stat->ino = fi->orig_ino; 1568 stat->blksize = 1 << fi->cached_i_blkbits; 1569 if (test_bit(FUSE_I_BTIME, &fi->state)) { 1570 stat->btime = fi->i_btime; 1571 stat->result_mask |= STATX_BTIME; 1572 } 1573 } 1574 1575 return err; 1576 } 1577 1578 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) 1579 { 1580 return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0); 1581 } 1582 1583 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1584 u64 child_nodeid, struct qstr *name, u32 flags) 1585 { 1586 int err = -ENOTDIR; 1587 struct inode *parent; 1588 struct dentry *dir; 1589 struct dentry *entry; 1590 1591 parent = fuse_ilookup(fc, parent_nodeid, NULL); 1592 if (!parent) 1593 return -ENOENT; 1594 1595 inode_lock_nested(parent, I_MUTEX_PARENT); 1596 if (!S_ISDIR(parent->i_mode)) 1597 goto unlock; 1598 1599 err = -ENOENT; 1600 dir = d_find_alias(parent); 1601 if (!dir) 1602 goto unlock; 1603 1604 name->hash = full_name_hash(dir, name->name, name->len); 1605 entry = d_lookup(dir, name); 1606 dput(dir); 1607 if (!entry) 1608 goto unlock; 1609 1610 fuse_dir_changed(parent); 1611 if (!(flags & FUSE_EXPIRE_ONLY)) 1612 d_invalidate(entry); 1613 fuse_invalidate_entry_cache(entry); 1614 1615 if (child_nodeid != 0 && d_really_is_positive(entry)) { 1616 inode_lock(d_inode(entry)); 1617 if (get_node_id(d_inode(entry)) != child_nodeid) { 1618 err = -ENOENT; 1619 goto badentry; 1620 } 1621 if (d_mountpoint(entry)) { 1622 err = -EBUSY; 1623 goto badentry; 1624 } 1625 if (d_is_dir(entry)) { 1626 shrink_dcache_parent(entry); 1627 if (!simple_empty(entry)) { 1628 err = -ENOTEMPTY; 1629 goto badentry; 1630 } 1631 d_inode(entry)->i_flags |= S_DEAD; 1632 } 1633 dont_mount(entry); 1634 clear_nlink(d_inode(entry)); 1635 err = 0; 1636 badentry: 1637 inode_unlock(d_inode(entry)); 1638 if (!err) 1639 d_delete(entry); 1640 } else { 1641 err = 0; 1642 } 1643 dput(entry); 1644 1645 unlock: 1646 inode_unlock(parent); 1647 iput(parent); 1648 return err; 1649 } 1650 1651 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc) 1652 { 1653 const struct cred *cred = current_cred(); 1654 1655 return (uid_eq(cred->euid, fc->user_id) && 1656 uid_eq(cred->suid, fc->user_id) && 1657 uid_eq(cred->uid, fc->user_id) && 1658 gid_eq(cred->egid, fc->group_id) && 1659 gid_eq(cred->sgid, fc->group_id) && 1660 gid_eq(cred->gid, fc->group_id)); 1661 } 1662 1663 /* 1664 * Calling into a user-controlled filesystem gives the filesystem 1665 * daemon ptrace-like capabilities over the current process. This 1666 * means, that the filesystem daemon is able to record the exact 1667 * filesystem operations performed, and can also control the behavior 1668 * of the requester process in otherwise impossible ways. For example 1669 * it can delay the operation for arbitrary length of time allowing 1670 * DoS against the requester. 1671 * 1672 * For this reason only those processes can call into the filesystem, 1673 * for which the owner of the mount has ptrace privilege. This 1674 * excludes processes started by other users, suid or sgid processes. 1675 */ 1676 bool fuse_allow_current_process(struct fuse_conn *fc) 1677 { 1678 bool allow; 1679 1680 if (fc->allow_other) 1681 allow = current_in_userns(fc->user_ns); 1682 else 1683 allow = fuse_permissible_uidgid(fc); 1684 1685 if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN)) 1686 allow = true; 1687 1688 return allow; 1689 } 1690 1691 static int fuse_access(struct inode *inode, int mask) 1692 { 1693 struct fuse_mount *fm = get_fuse_mount(inode); 1694 FUSE_ARGS(args); 1695 struct fuse_access_in inarg; 1696 int err; 1697 1698 BUG_ON(mask & MAY_NOT_BLOCK); 1699 1700 /* 1701 * We should not send FUSE_ACCESS to the userspace 1702 * when idmapped mounts are enabled as for this case 1703 * we have fc->default_permissions = 1 and access 1704 * permission checks are done on the kernel side. 1705 */ 1706 WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP)); 1707 1708 if (fm->fc->no_access) 1709 return 0; 1710 1711 memset(&inarg, 0, sizeof(inarg)); 1712 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); 1713 args.opcode = FUSE_ACCESS; 1714 args.nodeid = get_node_id(inode); 1715 args.in_numargs = 1; 1716 args.in_args[0].size = sizeof(inarg); 1717 args.in_args[0].value = &inarg; 1718 err = fuse_simple_request(fm, &args); 1719 if (err == -ENOSYS) { 1720 fm->fc->no_access = 1; 1721 err = 0; 1722 } 1723 return err; 1724 } 1725 1726 static int fuse_perm_getattr(struct inode *inode, int mask) 1727 { 1728 if (mask & MAY_NOT_BLOCK) 1729 return -ECHILD; 1730 1731 forget_all_cached_acls(inode); 1732 return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL); 1733 } 1734 1735 /* 1736 * Check permission. The two basic access models of FUSE are: 1737 * 1738 * 1) Local access checking ('default_permissions' mount option) based 1739 * on file mode. This is the plain old disk filesystem permission 1740 * model. 1741 * 1742 * 2) "Remote" access checking, where server is responsible for 1743 * checking permission in each inode operation. An exception to this 1744 * is if ->permission() was invoked from sys_access() in which case an 1745 * access request is sent. Execute permission is still checked 1746 * locally based on file mode. 1747 */ 1748 static int fuse_permission(struct mnt_idmap *idmap, 1749 struct inode *inode, int mask) 1750 { 1751 struct fuse_conn *fc = get_fuse_conn(inode); 1752 bool refreshed = false; 1753 int err = 0; 1754 1755 if (fuse_is_bad(inode)) 1756 return -EIO; 1757 1758 if (!fuse_allow_current_process(fc)) 1759 return -EACCES; 1760 1761 /* 1762 * If attributes are needed, refresh them before proceeding 1763 */ 1764 if (fc->default_permissions || 1765 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1766 struct fuse_inode *fi = get_fuse_inode(inode); 1767 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; 1768 1769 if (perm_mask & READ_ONCE(fi->inval_mask) || 1770 time_before64(fi->i_time, get_jiffies_64())) { 1771 refreshed = true; 1772 1773 err = fuse_perm_getattr(inode, mask); 1774 if (err) 1775 return err; 1776 } 1777 } 1778 1779 if (fc->default_permissions) { 1780 err = generic_permission(idmap, inode, mask); 1781 1782 /* If permission is denied, try to refresh file 1783 attributes. This is also needed, because the root 1784 node will at first have no permissions */ 1785 if (err == -EACCES && !refreshed) { 1786 err = fuse_perm_getattr(inode, mask); 1787 if (!err) 1788 err = generic_permission(idmap, 1789 inode, mask); 1790 } 1791 1792 /* Note: the opposite of the above test does not 1793 exist. So if permissions are revoked this won't be 1794 noticed immediately, only after the attribute 1795 timeout has expired */ 1796 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1797 err = fuse_access(inode, mask); 1798 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1799 if (!(inode->i_mode & S_IXUGO)) { 1800 if (refreshed) 1801 return -EACCES; 1802 1803 err = fuse_perm_getattr(inode, mask); 1804 if (!err && !(inode->i_mode & S_IXUGO)) 1805 return -EACCES; 1806 } 1807 } 1808 return err; 1809 } 1810 1811 static int fuse_readlink_folio(struct inode *inode, struct folio *folio) 1812 { 1813 struct fuse_mount *fm = get_fuse_mount(inode); 1814 struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 }; 1815 struct fuse_args_pages ap = { 1816 .num_folios = 1, 1817 .folios = &folio, 1818 .descs = &desc, 1819 }; 1820 char *link; 1821 ssize_t res; 1822 1823 ap.args.opcode = FUSE_READLINK; 1824 ap.args.nodeid = get_node_id(inode); 1825 ap.args.out_pages = true; 1826 ap.args.out_argvar = true; 1827 ap.args.page_zeroing = true; 1828 ap.args.out_numargs = 1; 1829 ap.args.out_args[0].size = desc.length; 1830 res = fuse_simple_request(fm, &ap.args); 1831 1832 fuse_invalidate_atime(inode); 1833 1834 if (res < 0) 1835 return res; 1836 1837 if (WARN_ON(res >= PAGE_SIZE)) 1838 return -EIO; 1839 1840 link = folio_address(folio); 1841 link[res] = '\0'; 1842 1843 return 0; 1844 } 1845 1846 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, 1847 struct delayed_call *callback) 1848 { 1849 struct fuse_conn *fc = get_fuse_conn(inode); 1850 struct folio *folio; 1851 int err; 1852 1853 err = -EIO; 1854 if (fuse_is_bad(inode)) 1855 goto out_err; 1856 1857 if (fc->cache_symlinks) 1858 return page_get_link_raw(dentry, inode, callback); 1859 1860 err = -ECHILD; 1861 if (!dentry) 1862 goto out_err; 1863 1864 folio = folio_alloc(GFP_KERNEL, 0); 1865 err = -ENOMEM; 1866 if (!folio) 1867 goto out_err; 1868 1869 err = fuse_readlink_folio(inode, folio); 1870 if (err) { 1871 folio_put(folio); 1872 goto out_err; 1873 } 1874 1875 set_delayed_call(callback, page_put_link, folio); 1876 1877 return folio_address(folio); 1878 1879 out_err: 1880 return ERR_PTR(err); 1881 } 1882 1883 static int fuse_dir_open(struct inode *inode, struct file *file) 1884 { 1885 struct fuse_mount *fm = get_fuse_mount(inode); 1886 int err; 1887 1888 if (fuse_is_bad(inode)) 1889 return -EIO; 1890 1891 err = generic_file_open(inode, file); 1892 if (err) 1893 return err; 1894 1895 err = fuse_do_open(fm, get_node_id(inode), file, true); 1896 if (!err) { 1897 struct fuse_file *ff = file->private_data; 1898 1899 /* 1900 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for 1901 * directories for backward compatibility, though it's unlikely 1902 * to be useful. 1903 */ 1904 if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE)) 1905 nonseekable_open(inode, file); 1906 if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 1907 invalidate_inode_pages2(inode->i_mapping); 1908 } 1909 1910 return err; 1911 } 1912 1913 static int fuse_dir_release(struct inode *inode, struct file *file) 1914 { 1915 fuse_release_common(file, true); 1916 1917 return 0; 1918 } 1919 1920 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, 1921 int datasync) 1922 { 1923 struct inode *inode = file->f_mapping->host; 1924 struct fuse_conn *fc = get_fuse_conn(inode); 1925 int err; 1926 1927 if (fuse_is_bad(inode)) 1928 return -EIO; 1929 1930 if (fc->no_fsyncdir) 1931 return 0; 1932 1933 inode_lock(inode); 1934 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR); 1935 if (err == -ENOSYS) { 1936 fc->no_fsyncdir = 1; 1937 err = 0; 1938 } 1939 inode_unlock(inode); 1940 1941 return err; 1942 } 1943 1944 static long fuse_dir_ioctl(struct file *file, unsigned int cmd, 1945 unsigned long arg) 1946 { 1947 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1948 1949 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ 1950 if (fc->minor < 18) 1951 return -ENOTTY; 1952 1953 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); 1954 } 1955 1956 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, 1957 unsigned long arg) 1958 { 1959 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1960 1961 if (fc->minor < 18) 1962 return -ENOTTY; 1963 1964 return fuse_ioctl_common(file, cmd, arg, 1965 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); 1966 } 1967 1968 static bool update_mtime(unsigned ivalid, bool trust_local_mtime) 1969 { 1970 /* Always update if mtime is explicitly set */ 1971 if (ivalid & ATTR_MTIME_SET) 1972 return true; 1973 1974 /* Or if kernel i_mtime is the official one */ 1975 if (trust_local_mtime) 1976 return true; 1977 1978 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ 1979 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) 1980 return false; 1981 1982 /* In all other cases update */ 1983 return true; 1984 } 1985 1986 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc, 1987 struct iattr *iattr, struct fuse_setattr_in *arg, 1988 bool trust_local_cmtime) 1989 { 1990 unsigned ivalid = iattr->ia_valid; 1991 1992 if (ivalid & ATTR_MODE) 1993 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1994 1995 if (ivalid & ATTR_UID) { 1996 kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid); 1997 1998 arg->valid |= FATTR_UID; 1999 arg->uid = from_kuid(fc->user_ns, fsuid); 2000 } 2001 2002 if (ivalid & ATTR_GID) { 2003 kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid); 2004 2005 arg->valid |= FATTR_GID; 2006 arg->gid = from_kgid(fc->user_ns, fsgid); 2007 } 2008 2009 if (ivalid & ATTR_SIZE) 2010 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 2011 if (ivalid & ATTR_ATIME) { 2012 arg->valid |= FATTR_ATIME; 2013 arg->atime = iattr->ia_atime.tv_sec; 2014 arg->atimensec = iattr->ia_atime.tv_nsec; 2015 if (!(ivalid & ATTR_ATIME_SET)) 2016 arg->valid |= FATTR_ATIME_NOW; 2017 } 2018 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { 2019 arg->valid |= FATTR_MTIME; 2020 arg->mtime = iattr->ia_mtime.tv_sec; 2021 arg->mtimensec = iattr->ia_mtime.tv_nsec; 2022 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) 2023 arg->valid |= FATTR_MTIME_NOW; 2024 } 2025 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { 2026 arg->valid |= FATTR_CTIME; 2027 arg->ctime = iattr->ia_ctime.tv_sec; 2028 arg->ctimensec = iattr->ia_ctime.tv_nsec; 2029 } 2030 } 2031 2032 /* 2033 * Prevent concurrent writepages on inode 2034 * 2035 * This is done by adding a negative bias to the inode write counter 2036 * and waiting for all pending writes to finish. 2037 */ 2038 void fuse_set_nowrite(struct inode *inode) 2039 { 2040 struct fuse_inode *fi = get_fuse_inode(inode); 2041 2042 BUG_ON(!inode_is_locked(inode)); 2043 2044 spin_lock(&fi->lock); 2045 BUG_ON(fi->writectr < 0); 2046 fi->writectr += FUSE_NOWRITE; 2047 spin_unlock(&fi->lock); 2048 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); 2049 } 2050 2051 /* 2052 * Allow writepages on inode 2053 * 2054 * Remove the bias from the writecounter and send any queued 2055 * writepages. 2056 */ 2057 static void __fuse_release_nowrite(struct inode *inode) 2058 { 2059 struct fuse_inode *fi = get_fuse_inode(inode); 2060 2061 BUG_ON(fi->writectr != FUSE_NOWRITE); 2062 fi->writectr = 0; 2063 fuse_flush_writepages(inode); 2064 } 2065 2066 void fuse_release_nowrite(struct inode *inode) 2067 { 2068 struct fuse_inode *fi = get_fuse_inode(inode); 2069 2070 spin_lock(&fi->lock); 2071 __fuse_release_nowrite(inode); 2072 spin_unlock(&fi->lock); 2073 } 2074 2075 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, 2076 struct inode *inode, 2077 struct fuse_setattr_in *inarg_p, 2078 struct fuse_attr_out *outarg_p) 2079 { 2080 args->opcode = FUSE_SETATTR; 2081 args->nodeid = get_node_id(inode); 2082 args->in_numargs = 1; 2083 args->in_args[0].size = sizeof(*inarg_p); 2084 args->in_args[0].value = inarg_p; 2085 args->out_numargs = 1; 2086 args->out_args[0].size = sizeof(*outarg_p); 2087 args->out_args[0].value = outarg_p; 2088 } 2089 2090 /* 2091 * Flush inode->i_mtime to the server 2092 */ 2093 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 2094 { 2095 struct fuse_mount *fm = get_fuse_mount(inode); 2096 FUSE_ARGS(args); 2097 struct fuse_setattr_in inarg; 2098 struct fuse_attr_out outarg; 2099 2100 memset(&inarg, 0, sizeof(inarg)); 2101 memset(&outarg, 0, sizeof(outarg)); 2102 2103 inarg.valid = FATTR_MTIME; 2104 inarg.mtime = inode_get_mtime_sec(inode); 2105 inarg.mtimensec = inode_get_mtime_nsec(inode); 2106 if (fm->fc->minor >= 23) { 2107 inarg.valid |= FATTR_CTIME; 2108 inarg.ctime = inode_get_ctime_sec(inode); 2109 inarg.ctimensec = inode_get_ctime_nsec(inode); 2110 } 2111 if (ff) { 2112 inarg.valid |= FATTR_FH; 2113 inarg.fh = ff->fh; 2114 } 2115 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 2116 2117 return fuse_simple_request(fm, &args); 2118 } 2119 2120 /* 2121 * Set attributes, and at the same time refresh them. 2122 * 2123 * Truncation is slightly complicated, because the 'truncate' request 2124 * may fail, in which case we don't want to touch the mapping. 2125 * vmtruncate() doesn't allow for this case, so do the rlimit checking 2126 * and the actual truncation by hand. 2127 */ 2128 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 2129 struct iattr *attr, struct file *file) 2130 { 2131 struct inode *inode = d_inode(dentry); 2132 struct fuse_mount *fm = get_fuse_mount(inode); 2133 struct fuse_conn *fc = fm->fc; 2134 struct fuse_inode *fi = get_fuse_inode(inode); 2135 struct address_space *mapping = inode->i_mapping; 2136 FUSE_ARGS(args); 2137 struct fuse_setattr_in inarg; 2138 struct fuse_attr_out outarg; 2139 bool is_truncate = false; 2140 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); 2141 loff_t oldsize; 2142 int err; 2143 bool trust_local_cmtime = is_wb; 2144 bool fault_blocked = false; 2145 u64 attr_version; 2146 2147 if (!fc->default_permissions) 2148 attr->ia_valid |= ATTR_FORCE; 2149 2150 err = setattr_prepare(idmap, dentry, attr); 2151 if (err) 2152 return err; 2153 2154 if (attr->ia_valid & ATTR_SIZE) { 2155 if (WARN_ON(!S_ISREG(inode->i_mode))) 2156 return -EIO; 2157 is_truncate = true; 2158 } 2159 2160 if (FUSE_IS_DAX(inode) && is_truncate) { 2161 filemap_invalidate_lock(mapping); 2162 fault_blocked = true; 2163 err = fuse_dax_break_layouts(inode, 0, -1); 2164 if (err) { 2165 filemap_invalidate_unlock(mapping); 2166 return err; 2167 } 2168 } 2169 2170 if (attr->ia_valid & ATTR_OPEN) { 2171 /* This is coming from open(..., ... | O_TRUNC); */ 2172 WARN_ON(!(attr->ia_valid & ATTR_SIZE)); 2173 WARN_ON(attr->ia_size != 0); 2174 if (fc->atomic_o_trunc) { 2175 /* 2176 * No need to send request to userspace, since actual 2177 * truncation has already been done by OPEN. But still 2178 * need to truncate page cache. 2179 */ 2180 i_size_write(inode, 0); 2181 truncate_pagecache(inode, 0); 2182 goto out; 2183 } 2184 file = NULL; 2185 } 2186 2187 /* Flush dirty data/metadata before non-truncate SETATTR */ 2188 if (is_wb && 2189 attr->ia_valid & 2190 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | 2191 ATTR_TIMES_SET)) { 2192 err = write_inode_now(inode, true); 2193 if (err) 2194 return err; 2195 2196 fuse_set_nowrite(inode); 2197 fuse_release_nowrite(inode); 2198 } 2199 2200 if (is_truncate) { 2201 fuse_set_nowrite(inode); 2202 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 2203 if (trust_local_cmtime && attr->ia_size != inode->i_size) 2204 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; 2205 } 2206 2207 memset(&inarg, 0, sizeof(inarg)); 2208 memset(&outarg, 0, sizeof(outarg)); 2209 iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime); 2210 if (file) { 2211 struct fuse_file *ff = file->private_data; 2212 inarg.valid |= FATTR_FH; 2213 inarg.fh = ff->fh; 2214 } 2215 2216 /* Kill suid/sgid for non-directory chown unconditionally */ 2217 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) && 2218 attr->ia_valid & (ATTR_UID | ATTR_GID)) 2219 inarg.valid |= FATTR_KILL_SUIDGID; 2220 2221 if (attr->ia_valid & ATTR_SIZE) { 2222 /* For mandatory locking in truncate */ 2223 inarg.valid |= FATTR_LOCKOWNER; 2224 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 2225 2226 /* Kill suid/sgid for truncate only if no CAP_FSETID */ 2227 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID)) 2228 inarg.valid |= FATTR_KILL_SUIDGID; 2229 } 2230 2231 attr_version = fuse_get_attr_version(fm->fc); 2232 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 2233 err = fuse_simple_request(fm, &args); 2234 if (err) { 2235 if (err == -EINTR) 2236 fuse_invalidate_attr(inode); 2237 goto error; 2238 } 2239 2240 if (fuse_invalid_attr(&outarg.attr) || 2241 inode_wrong_type(inode, outarg.attr.mode)) { 2242 fuse_make_bad(inode); 2243 err = -EIO; 2244 goto error; 2245 } 2246 2247 spin_lock(&fi->lock); 2248 /* the kernel maintains i_mtime locally */ 2249 if (trust_local_cmtime) { 2250 if (attr->ia_valid & ATTR_MTIME) 2251 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2252 if (attr->ia_valid & ATTR_CTIME) 2253 inode_set_ctime_to_ts(inode, attr->ia_ctime); 2254 /* FIXME: clear I_DIRTY_SYNC? */ 2255 } 2256 2257 if (fi->attr_version > attr_version) { 2258 /* 2259 * Apply attributes, for example for fsnotify_change(), but set 2260 * attribute timeout to zero. 2261 */ 2262 outarg.attr_valid = outarg.attr_valid_nsec = 0; 2263 } 2264 2265 fuse_change_attributes_common(inode, &outarg.attr, NULL, 2266 ATTR_TIMEOUT(&outarg), 2267 fuse_get_cache_mask(inode), 0); 2268 oldsize = inode->i_size; 2269 /* see the comment in fuse_change_attributes() */ 2270 if (!is_wb || is_truncate) 2271 i_size_write(inode, outarg.attr.size); 2272 2273 if (is_truncate) { 2274 /* NOTE: this may release/reacquire fi->lock */ 2275 __fuse_release_nowrite(inode); 2276 } 2277 spin_unlock(&fi->lock); 2278 2279 /* 2280 * Only call invalidate_inode_pages2() after removing 2281 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock. 2282 */ 2283 if ((is_truncate || !is_wb) && 2284 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { 2285 truncate_pagecache(inode, outarg.attr.size); 2286 invalidate_inode_pages2(mapping); 2287 } 2288 2289 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 2290 out: 2291 if (fault_blocked) 2292 filemap_invalidate_unlock(mapping); 2293 2294 return 0; 2295 2296 error: 2297 if (is_truncate) 2298 fuse_release_nowrite(inode); 2299 2300 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 2301 2302 if (fault_blocked) 2303 filemap_invalidate_unlock(mapping); 2304 return err; 2305 } 2306 2307 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry, 2308 struct iattr *attr) 2309 { 2310 struct inode *inode = d_inode(entry); 2311 struct fuse_conn *fc = get_fuse_conn(inode); 2312 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; 2313 int ret; 2314 2315 if (fuse_is_bad(inode)) 2316 return -EIO; 2317 2318 if (!fuse_allow_current_process(get_fuse_conn(inode))) 2319 return -EACCES; 2320 2321 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) { 2322 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | 2323 ATTR_MODE); 2324 2325 /* 2326 * The only sane way to reliably kill suid/sgid is to do it in 2327 * the userspace filesystem 2328 * 2329 * This should be done on write(), truncate() and chown(). 2330 */ 2331 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) { 2332 /* 2333 * ia_mode calculation may have used stale i_mode. 2334 * Refresh and recalculate. 2335 */ 2336 ret = fuse_do_getattr(idmap, inode, NULL, file); 2337 if (ret) 2338 return ret; 2339 2340 attr->ia_mode = inode->i_mode; 2341 if (inode->i_mode & S_ISUID) { 2342 attr->ia_valid |= ATTR_MODE; 2343 attr->ia_mode &= ~S_ISUID; 2344 } 2345 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 2346 attr->ia_valid |= ATTR_MODE; 2347 attr->ia_mode &= ~S_ISGID; 2348 } 2349 } 2350 } 2351 if (!attr->ia_valid) 2352 return 0; 2353 2354 ret = fuse_do_setattr(idmap, entry, attr, file); 2355 if (!ret) { 2356 /* 2357 * If filesystem supports acls it may have updated acl xattrs in 2358 * the filesystem, so forget cached acls for the inode. 2359 */ 2360 if (fc->posix_acl) 2361 forget_all_cached_acls(inode); 2362 2363 /* Directory mode changed, may need to revalidate access */ 2364 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE)) 2365 fuse_invalidate_entry_cache(entry); 2366 } 2367 return ret; 2368 } 2369 2370 static int fuse_getattr(struct mnt_idmap *idmap, 2371 const struct path *path, struct kstat *stat, 2372 u32 request_mask, unsigned int flags) 2373 { 2374 struct inode *inode = d_inode(path->dentry); 2375 struct fuse_conn *fc = get_fuse_conn(inode); 2376 2377 if (fuse_is_bad(inode)) 2378 return -EIO; 2379 2380 if (!fuse_allow_current_process(fc)) { 2381 if (!request_mask) { 2382 /* 2383 * If user explicitly requested *nothing* then don't 2384 * error out, but return st_dev only. 2385 */ 2386 stat->result_mask = 0; 2387 stat->dev = inode->i_sb->s_dev; 2388 return 0; 2389 } 2390 return -EACCES; 2391 } 2392 2393 return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags); 2394 } 2395 2396 static const struct inode_operations fuse_dir_inode_operations = { 2397 .lookup = fuse_lookup, 2398 .mkdir = fuse_mkdir, 2399 .symlink = fuse_symlink, 2400 .unlink = fuse_unlink, 2401 .rmdir = fuse_rmdir, 2402 .rename = fuse_rename2, 2403 .link = fuse_link, 2404 .setattr = fuse_setattr, 2405 .create = fuse_create, 2406 .atomic_open = fuse_atomic_open, 2407 .tmpfile = fuse_tmpfile, 2408 .mknod = fuse_mknod, 2409 .permission = fuse_permission, 2410 .getattr = fuse_getattr, 2411 .listxattr = fuse_listxattr, 2412 .get_inode_acl = fuse_get_inode_acl, 2413 .get_acl = fuse_get_acl, 2414 .set_acl = fuse_set_acl, 2415 .fileattr_get = fuse_fileattr_get, 2416 .fileattr_set = fuse_fileattr_set, 2417 }; 2418 2419 static const struct file_operations fuse_dir_operations = { 2420 .llseek = generic_file_llseek, 2421 .read = generic_read_dir, 2422 .iterate_shared = fuse_readdir, 2423 .open = fuse_dir_open, 2424 .release = fuse_dir_release, 2425 .fsync = fuse_dir_fsync, 2426 .unlocked_ioctl = fuse_dir_ioctl, 2427 .compat_ioctl = fuse_dir_compat_ioctl, 2428 }; 2429 2430 static const struct inode_operations fuse_common_inode_operations = { 2431 .setattr = fuse_setattr, 2432 .permission = fuse_permission, 2433 .getattr = fuse_getattr, 2434 .listxattr = fuse_listxattr, 2435 .get_inode_acl = fuse_get_inode_acl, 2436 .get_acl = fuse_get_acl, 2437 .set_acl = fuse_set_acl, 2438 .fileattr_get = fuse_fileattr_get, 2439 .fileattr_set = fuse_fileattr_set, 2440 }; 2441 2442 static const struct inode_operations fuse_symlink_inode_operations = { 2443 .setattr = fuse_setattr, 2444 .get_link = fuse_get_link, 2445 .getattr = fuse_getattr, 2446 .listxattr = fuse_listxattr, 2447 }; 2448 2449 void fuse_init_common(struct inode *inode) 2450 { 2451 inode->i_op = &fuse_common_inode_operations; 2452 } 2453 2454 void fuse_init_dir(struct inode *inode) 2455 { 2456 struct fuse_inode *fi = get_fuse_inode(inode); 2457 2458 inode->i_op = &fuse_dir_inode_operations; 2459 inode->i_fop = &fuse_dir_operations; 2460 2461 spin_lock_init(&fi->rdc.lock); 2462 fi->rdc.cached = false; 2463 fi->rdc.size = 0; 2464 fi->rdc.pos = 0; 2465 fi->rdc.version = 0; 2466 } 2467 2468 static int fuse_symlink_read_folio(struct file *null, struct folio *folio) 2469 { 2470 int err = fuse_readlink_folio(folio->mapping->host, folio); 2471 2472 if (!err) 2473 folio_mark_uptodate(folio); 2474 2475 folio_unlock(folio); 2476 2477 return err; 2478 } 2479 2480 static const struct address_space_operations fuse_symlink_aops = { 2481 .read_folio = fuse_symlink_read_folio, 2482 }; 2483 2484 void fuse_init_symlink(struct inode *inode) 2485 { 2486 inode->i_op = &fuse_symlink_inode_operations; 2487 inode->i_data.a_ops = &fuse_symlink_aops; 2488 inode_nohighmem(inode); 2489 } 2490