1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29
30 struct dentry_bucket {
31 struct rb_root tree;
32 spinlock_t lock;
33 };
34
35 #define HASH_BITS 5
36 #define HASH_SIZE (1 << HASH_BITS)
37 static struct dentry_bucket dentry_hash[HASH_SIZE];
38 struct delayed_work dentry_tree_work;
39
40 /* Minimum invalidation work queue frequency */
41 #define FUSE_DENTRY_INVAL_FREQ_MIN 5
42
43 unsigned __read_mostly inval_wq;
inval_wq_set(const char * val,const struct kernel_param * kp)44 static int inval_wq_set(const char *val, const struct kernel_param *kp)
45 {
46 unsigned int num;
47 unsigned int old = inval_wq;
48 int ret;
49
50 if (!val)
51 return -EINVAL;
52
53 ret = kstrtouint(val, 0, &num);
54 if (ret)
55 return ret;
56
57 if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0))
58 return -EINVAL;
59
60 /* This should prevent overflow in secs_to_jiffies() */
61 if (num > USHRT_MAX)
62 return -EINVAL;
63
64 *((unsigned int *)kp->arg) = num;
65
66 if (num && !old)
67 schedule_delayed_work(&dentry_tree_work,
68 secs_to_jiffies(num));
69 else if (!num && old)
70 cancel_delayed_work_sync(&dentry_tree_work);
71
72 return 0;
73 }
74 static const struct kernel_param_ops inval_wq_ops = {
75 .set = inval_wq_set,
76 .get = param_get_uint,
77 };
78 module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
79 __MODULE_PARM_TYPE(inval_wq, "uint");
80 MODULE_PARM_DESC(inval_wq,
81 "Dentries invalidation work queue period in secs (>= "
82 __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ").");
83
get_dentry_bucket(struct dentry * dentry)84 static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry)
85 {
86 int i = hash_ptr(dentry, HASH_BITS);
87
88 return &dentry_hash[i];
89 }
90
fuse_advise_use_readdirplus(struct inode * dir)91 static void fuse_advise_use_readdirplus(struct inode *dir)
92 {
93 struct fuse_inode *fi = get_fuse_inode(dir);
94
95 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
96 }
97
98 struct fuse_dentry {
99 u64 time;
100 union {
101 struct rcu_head rcu;
102 struct rb_node node;
103 };
104 struct dentry *dentry;
105 };
106
__fuse_dentry_tree_del_node(struct fuse_dentry * fd,struct dentry_bucket * bucket)107 static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd,
108 struct dentry_bucket *bucket)
109 {
110 if (!RB_EMPTY_NODE(&fd->node)) {
111 rb_erase(&fd->node, &bucket->tree);
112 RB_CLEAR_NODE(&fd->node);
113 }
114 }
115
fuse_dentry_tree_del_node(struct dentry * dentry)116 static void fuse_dentry_tree_del_node(struct dentry *dentry)
117 {
118 struct fuse_dentry *fd = dentry->d_fsdata;
119 struct dentry_bucket *bucket = get_dentry_bucket(dentry);
120
121 spin_lock(&bucket->lock);
122 __fuse_dentry_tree_del_node(fd, bucket);
123 spin_unlock(&bucket->lock);
124 }
125
fuse_dentry_tree_add_node(struct dentry * dentry)126 static void fuse_dentry_tree_add_node(struct dentry *dentry)
127 {
128 struct fuse_dentry *fd = dentry->d_fsdata;
129 struct dentry_bucket *bucket;
130 struct fuse_dentry *cur;
131 struct rb_node **p, *parent = NULL;
132
133 if (!inval_wq)
134 return;
135
136 bucket = get_dentry_bucket(dentry);
137
138 spin_lock(&bucket->lock);
139
140 __fuse_dentry_tree_del_node(fd, bucket);
141
142 p = &bucket->tree.rb_node;
143 while (*p) {
144 parent = *p;
145 cur = rb_entry(*p, struct fuse_dentry, node);
146 if (fd->time < cur->time)
147 p = &(*p)->rb_left;
148 else
149 p = &(*p)->rb_right;
150 }
151 rb_link_node(&fd->node, parent, p);
152 rb_insert_color(&fd->node, &bucket->tree);
153 spin_unlock(&bucket->lock);
154 }
155
156 /*
157 * work queue which, when enabled, will periodically check for expired dentries
158 * in the dentries tree.
159 */
fuse_dentry_tree_work(struct work_struct * work)160 static void fuse_dentry_tree_work(struct work_struct *work)
161 {
162 LIST_HEAD(dispose);
163 struct fuse_dentry *fd;
164 struct rb_node *node;
165 int i;
166
167 for (i = 0; i < HASH_SIZE; i++) {
168 spin_lock(&dentry_hash[i].lock);
169 node = rb_first(&dentry_hash[i].tree);
170 while (node) {
171 fd = rb_entry(node, struct fuse_dentry, node);
172 if (time_after64(get_jiffies_64(), fd->time)) {
173 rb_erase(&fd->node, &dentry_hash[i].tree);
174 RB_CLEAR_NODE(&fd->node);
175 spin_unlock(&dentry_hash[i].lock);
176 d_dispose_if_unused(fd->dentry, &dispose);
177 cond_resched();
178 spin_lock(&dentry_hash[i].lock);
179 } else
180 break;
181 node = rb_first(&dentry_hash[i].tree);
182 }
183 spin_unlock(&dentry_hash[i].lock);
184 shrink_dentry_list(&dispose);
185 }
186
187 if (inval_wq)
188 schedule_delayed_work(&dentry_tree_work,
189 secs_to_jiffies(inval_wq));
190 }
191
fuse_epoch_work(struct work_struct * work)192 void fuse_epoch_work(struct work_struct *work)
193 {
194 struct fuse_conn *fc = container_of(work, struct fuse_conn,
195 epoch_work);
196 struct fuse_mount *fm;
197 struct inode *inode;
198
199 down_read(&fc->killsb);
200
201 inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm);
202 if (inode) {
203 iput(inode);
204 /* Remove all possible active references to cached inodes */
205 shrink_dcache_sb(fm->sb);
206 } else
207 pr_warn("Failed to get root inode");
208
209 up_read(&fc->killsb);
210 }
211
fuse_dentry_tree_init(void)212 void fuse_dentry_tree_init(void)
213 {
214 int i;
215
216 for (i = 0; i < HASH_SIZE; i++) {
217 spin_lock_init(&dentry_hash[i].lock);
218 dentry_hash[i].tree = RB_ROOT;
219 }
220 INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work);
221 }
222
fuse_dentry_tree_cleanup(void)223 void fuse_dentry_tree_cleanup(void)
224 {
225 int i;
226
227 inval_wq = 0;
228 cancel_delayed_work_sync(&dentry_tree_work);
229
230 for (i = 0; i < HASH_SIZE; i++)
231 WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree));
232 }
233
__fuse_dentry_settime(struct dentry * dentry,u64 time)234 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
235 {
236 ((struct fuse_dentry *) dentry->d_fsdata)->time = time;
237 }
238
fuse_dentry_time(const struct dentry * entry)239 static inline u64 fuse_dentry_time(const struct dentry *entry)
240 {
241 return ((struct fuse_dentry *) entry->d_fsdata)->time;
242 }
243
fuse_dentry_settime(struct dentry * dentry,u64 time)244 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
245 {
246 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
247 bool delete = !time && fc->delete_stale;
248 /*
249 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
250 * Don't care about races, either way it's just an optimization
251 */
252 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
253 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
254 spin_lock(&dentry->d_lock);
255 if (!delete)
256 dentry->d_flags &= ~DCACHE_OP_DELETE;
257 else
258 dentry->d_flags |= DCACHE_OP_DELETE;
259 spin_unlock(&dentry->d_lock);
260 }
261
262 __fuse_dentry_settime(dentry, time);
263 fuse_dentry_tree_add_node(dentry);
264 }
265
266 /*
267 * FUSE caches dentries and attributes with separate timeout. The
268 * time in jiffies until the dentry/attributes are valid is stored in
269 * dentry->d_fsdata and fuse_inode->i_time respectively.
270 */
271
272 /*
273 * Calculate the time in jiffies until a dentry/attributes are valid
274 */
fuse_time_to_jiffies(u64 sec,u32 nsec)275 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
276 {
277 if (sec || nsec) {
278 struct timespec64 ts = {
279 sec,
280 min_t(u32, nsec, NSEC_PER_SEC - 1)
281 };
282
283 return get_jiffies_64() + timespec64_to_jiffies(&ts);
284 } else
285 return 0;
286 }
287
288 /*
289 * Set dentry and possibly attribute timeouts from the lookup/mk*
290 * replies
291 */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)292 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
293 {
294 fuse_dentry_settime(entry,
295 fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
296 }
297
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)298 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
299 {
300 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
301 }
302
303 /*
304 * Mark the attributes as stale, so that at the next call to
305 * ->getattr() they will be fetched from userspace
306 */
fuse_invalidate_attr(struct inode * inode)307 void fuse_invalidate_attr(struct inode *inode)
308 {
309 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
310 }
311
fuse_dir_changed(struct inode * dir)312 static void fuse_dir_changed(struct inode *dir)
313 {
314 fuse_invalidate_attr(dir);
315 inode_maybe_inc_iversion(dir, false);
316 }
317
318 /*
319 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
320 * atime is not used.
321 */
fuse_invalidate_atime(struct inode * inode)322 void fuse_invalidate_atime(struct inode *inode)
323 {
324 if (!IS_RDONLY(inode))
325 fuse_invalidate_attr_mask(inode, STATX_ATIME);
326 }
327
328 /*
329 * Just mark the entry as stale, so that a next attempt to look it up
330 * will result in a new lookup call to userspace
331 *
332 * This is called when a dentry is about to become negative and the
333 * timeout is unknown (unlink, rmdir, rename and in some cases
334 * lookup)
335 */
fuse_invalidate_entry_cache(struct dentry * entry)336 void fuse_invalidate_entry_cache(struct dentry *entry)
337 {
338 fuse_dentry_settime(entry, 0);
339 }
340
341 /*
342 * Same as fuse_invalidate_entry_cache(), but also try to remove the
343 * dentry from the hash
344 */
fuse_invalidate_entry(struct dentry * entry)345 static void fuse_invalidate_entry(struct dentry *entry)
346 {
347 d_invalidate(entry);
348 fuse_invalidate_entry_cache(entry);
349 }
350
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg)351 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
352 u64 nodeid, const struct qstr *name,
353 struct fuse_entry_out *outarg)
354 {
355 memset(outarg, 0, sizeof(struct fuse_entry_out));
356 args->opcode = FUSE_LOOKUP;
357 args->nodeid = nodeid;
358 args->in_numargs = 3;
359 fuse_set_zero_arg0(args);
360 args->in_args[1].size = name->len;
361 args->in_args[1].value = name->name;
362 args->in_args[2].size = 1;
363 args->in_args[2].value = "";
364 args->out_numargs = 1;
365 args->out_args[0].size = sizeof(struct fuse_entry_out);
366 args->out_args[0].value = outarg;
367 }
368
369 /*
370 * Check whether the dentry is still valid
371 *
372 * If the entry validity timeout has expired and the dentry is
373 * positive, try to redo the lookup. If the lookup results in a
374 * different inode, then let the VFS invalidate the dentry and redo
375 * the lookup once more. If the lookup results in the same inode,
376 * then refresh the attributes, timeouts and mark the dentry valid.
377 */
fuse_dentry_revalidate(struct inode * dir,const struct qstr * name,struct dentry * entry,unsigned int flags)378 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
379 struct dentry *entry, unsigned int flags)
380 {
381 struct inode *inode;
382 struct fuse_mount *fm;
383 struct fuse_conn *fc;
384 struct fuse_inode *fi;
385 int ret;
386
387 fc = get_fuse_conn_super(dir->i_sb);
388 if (entry->d_time < atomic_read(&fc->epoch))
389 goto invalid;
390
391 inode = d_inode_rcu(entry);
392 if (inode && fuse_is_bad(inode))
393 goto invalid;
394 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
395 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
396 struct fuse_entry_out outarg;
397 FUSE_ARGS(args);
398 struct fuse_forget_link *forget;
399 u64 attr_version;
400
401 /* For negative dentries, always do a fresh lookup */
402 if (!inode)
403 goto invalid;
404
405 ret = -ECHILD;
406 if (flags & LOOKUP_RCU)
407 goto out;
408
409 fm = get_fuse_mount(inode);
410
411 forget = fuse_alloc_forget();
412 ret = -ENOMEM;
413 if (!forget)
414 goto out;
415
416 attr_version = fuse_get_attr_version(fm->fc);
417
418 fuse_lookup_init(fm->fc, &args, get_node_id(dir),
419 name, &outarg);
420 ret = fuse_simple_request(fm, &args);
421 /* Zero nodeid is same as -ENOENT */
422 if (!ret && !outarg.nodeid)
423 ret = -ENOENT;
424 if (!ret) {
425 fi = get_fuse_inode(inode);
426 if (outarg.nodeid != get_node_id(inode) ||
427 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
428 fuse_queue_forget(fm->fc, forget,
429 outarg.nodeid, 1);
430 goto invalid;
431 }
432 spin_lock(&fi->lock);
433 fi->nlookup++;
434 spin_unlock(&fi->lock);
435 }
436 kfree(forget);
437 if (ret == -ENOMEM || ret == -EINTR)
438 goto out;
439 if (ret || fuse_invalid_attr(&outarg.attr) ||
440 fuse_stale_inode(inode, outarg.generation, &outarg.attr))
441 goto invalid;
442
443 forget_all_cached_acls(inode);
444 fuse_change_attributes(inode, &outarg.attr, NULL,
445 ATTR_TIMEOUT(&outarg),
446 attr_version);
447 fuse_change_entry_timeout(entry, &outarg);
448 } else if (inode) {
449 fi = get_fuse_inode(inode);
450 if (flags & LOOKUP_RCU) {
451 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
452 return -ECHILD;
453 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
454 fuse_advise_use_readdirplus(dir);
455 }
456 }
457 ret = 1;
458 out:
459 return ret;
460
461 invalid:
462 ret = 0;
463 goto out;
464 }
465
fuse_dentry_init(struct dentry * dentry)466 static int fuse_dentry_init(struct dentry *dentry)
467 {
468 struct fuse_dentry *fd;
469
470 fd = kzalloc(sizeof(struct fuse_dentry),
471 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
472 if (!fd)
473 return -ENOMEM;
474
475 fd->dentry = dentry;
476 RB_CLEAR_NODE(&fd->node);
477 dentry->d_fsdata = fd;
478
479 return 0;
480 }
481
fuse_dentry_prune(struct dentry * dentry)482 static void fuse_dentry_prune(struct dentry *dentry)
483 {
484 struct fuse_dentry *fd = dentry->d_fsdata;
485
486 if (!RB_EMPTY_NODE(&fd->node))
487 fuse_dentry_tree_del_node(dentry);
488 }
489
fuse_dentry_release(struct dentry * dentry)490 static void fuse_dentry_release(struct dentry *dentry)
491 {
492 struct fuse_dentry *fd = dentry->d_fsdata;
493
494 kfree_rcu(fd, rcu);
495 }
496
fuse_dentry_delete(const struct dentry * dentry)497 static int fuse_dentry_delete(const struct dentry *dentry)
498 {
499 return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
500 }
501
502 /*
503 * Create a fuse_mount object with a new superblock (with path->dentry
504 * as the root), and return that mount so it can be auto-mounted on
505 * @path.
506 */
fuse_dentry_automount(struct path * path)507 static struct vfsmount *fuse_dentry_automount(struct path *path)
508 {
509 struct fs_context *fsc;
510 struct vfsmount *mnt;
511 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
512
513 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
514 if (IS_ERR(fsc))
515 return ERR_CAST(fsc);
516
517 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
518 fsc->fs_private = mp_fi;
519
520 /* Create the submount */
521 mnt = fc_mount(fsc);
522 put_fs_context(fsc);
523 return mnt;
524 }
525
526 const struct dentry_operations fuse_dentry_operations = {
527 .d_revalidate = fuse_dentry_revalidate,
528 .d_delete = fuse_dentry_delete,
529 .d_init = fuse_dentry_init,
530 .d_prune = fuse_dentry_prune,
531 .d_release = fuse_dentry_release,
532 .d_automount = fuse_dentry_automount,
533 };
534
fuse_valid_type(int m)535 int fuse_valid_type(int m)
536 {
537 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
538 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
539 }
540
fuse_valid_size(u64 size)541 static bool fuse_valid_size(u64 size)
542 {
543 return size <= LLONG_MAX;
544 }
545
fuse_invalid_attr(struct fuse_attr * attr)546 bool fuse_invalid_attr(struct fuse_attr *attr)
547 {
548 return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
549 }
550
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct inode ** inode)551 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
552 struct fuse_entry_out *outarg, struct inode **inode)
553 {
554 struct fuse_mount *fm = get_fuse_mount_super(sb);
555 FUSE_ARGS(args);
556 struct fuse_forget_link *forget;
557 u64 attr_version, evict_ctr;
558 int err;
559
560 *inode = NULL;
561 err = -ENAMETOOLONG;
562 if (name->len > fm->fc->name_max)
563 goto out;
564
565
566 forget = fuse_alloc_forget();
567 err = -ENOMEM;
568 if (!forget)
569 goto out;
570
571 attr_version = fuse_get_attr_version(fm->fc);
572 evict_ctr = fuse_get_evict_ctr(fm->fc);
573
574 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
575 err = fuse_simple_request(fm, &args);
576 /* Zero nodeid is same as -ENOENT, but with valid timeout */
577 if (err || !outarg->nodeid)
578 goto out_put_forget;
579
580 err = -EIO;
581 if (fuse_invalid_attr(&outarg->attr))
582 goto out_put_forget;
583 if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
584 pr_warn_once("root generation should be zero\n");
585 outarg->generation = 0;
586 }
587
588 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
589 &outarg->attr, ATTR_TIMEOUT(outarg),
590 attr_version, evict_ctr);
591 err = -ENOMEM;
592 if (!*inode) {
593 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
594 goto out;
595 }
596 err = 0;
597
598 out_put_forget:
599 kfree(forget);
600 out:
601 return err;
602 }
603
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)604 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
605 unsigned int flags)
606 {
607 struct fuse_entry_out outarg;
608 struct fuse_conn *fc;
609 struct inode *inode;
610 struct dentry *newent;
611 int err, epoch;
612 bool outarg_valid = true;
613 bool locked;
614
615 if (fuse_is_bad(dir))
616 return ERR_PTR(-EIO);
617
618 fc = get_fuse_conn_super(dir->i_sb);
619 epoch = atomic_read(&fc->epoch);
620
621 locked = fuse_lock_inode(dir);
622 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
623 &outarg, &inode);
624 fuse_unlock_inode(dir, locked);
625 if (err == -ENOENT) {
626 outarg_valid = false;
627 err = 0;
628 }
629 if (err)
630 goto out_err;
631
632 err = -EIO;
633 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
634 goto out_iput;
635
636 newent = d_splice_alias(inode, entry);
637 err = PTR_ERR(newent);
638 if (IS_ERR(newent))
639 goto out_err;
640
641 entry = newent ? newent : entry;
642 entry->d_time = epoch;
643 if (outarg_valid)
644 fuse_change_entry_timeout(entry, &outarg);
645 else
646 fuse_invalidate_entry_cache(entry);
647
648 if (inode)
649 fuse_advise_use_readdirplus(dir);
650 return newent;
651
652 out_iput:
653 iput(inode);
654 out_err:
655 return ERR_PTR(err);
656 }
657
get_security_context(struct dentry * entry,umode_t mode,struct fuse_in_arg * ext)658 static int get_security_context(struct dentry *entry, umode_t mode,
659 struct fuse_in_arg *ext)
660 {
661 struct fuse_secctx *fctx;
662 struct fuse_secctx_header *header;
663 struct lsm_context lsmctx = { };
664 void *ptr;
665 u32 total_len = sizeof(*header);
666 int err, nr_ctx = 0;
667 const char *name = NULL;
668 size_t namesize;
669
670 err = security_dentry_init_security(entry, mode, &entry->d_name,
671 &name, &lsmctx);
672
673 /* If no LSM is supporting this security hook ignore error */
674 if (err && err != -EOPNOTSUPP)
675 goto out_err;
676
677 if (lsmctx.len) {
678 nr_ctx = 1;
679 namesize = strlen(name) + 1;
680 err = -EIO;
681 if (WARN_ON(namesize > XATTR_NAME_MAX + 1 ||
682 lsmctx.len > S32_MAX))
683 goto out_err;
684 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize +
685 lsmctx.len);
686 }
687
688 err = -ENOMEM;
689 header = ptr = kzalloc(total_len, GFP_KERNEL);
690 if (!ptr)
691 goto out_err;
692
693 header->nr_secctx = nr_ctx;
694 header->size = total_len;
695 ptr += sizeof(*header);
696 if (nr_ctx) {
697 fctx = ptr;
698 fctx->size = lsmctx.len;
699 ptr += sizeof(*fctx);
700
701 strscpy(ptr, name, namesize);
702 ptr += namesize;
703
704 memcpy(ptr, lsmctx.context, lsmctx.len);
705 }
706 ext->size = total_len;
707 ext->value = header;
708 err = 0;
709 out_err:
710 if (nr_ctx)
711 security_release_secctx(&lsmctx);
712 return err;
713 }
714
extend_arg(struct fuse_in_arg * buf,u32 bytes)715 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
716 {
717 void *p;
718 u32 newlen = buf->size + bytes;
719
720 p = krealloc(buf->value, newlen, GFP_KERNEL);
721 if (!p) {
722 kfree(buf->value);
723 buf->size = 0;
724 buf->value = NULL;
725 return NULL;
726 }
727
728 memset(p + buf->size, 0, bytes);
729 buf->value = p;
730 buf->size = newlen;
731
732 return p + newlen - bytes;
733 }
734
fuse_ext_size(size_t size)735 static u32 fuse_ext_size(size_t size)
736 {
737 return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
738 }
739
740 /*
741 * This adds just a single supplementary group that matches the parent's group.
742 */
get_create_supp_group(struct mnt_idmap * idmap,struct inode * dir,struct fuse_in_arg * ext)743 static int get_create_supp_group(struct mnt_idmap *idmap,
744 struct inode *dir,
745 struct fuse_in_arg *ext)
746 {
747 struct fuse_conn *fc = get_fuse_conn(dir);
748 struct fuse_ext_header *xh;
749 struct fuse_supp_groups *sg;
750 kgid_t kgid = dir->i_gid;
751 vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
752 gid_t parent_gid = from_kgid(fc->user_ns, kgid);
753
754 u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
755
756 if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
757 !vfsgid_in_group_p(vfsgid))
758 return 0;
759
760 xh = extend_arg(ext, sg_len);
761 if (!xh)
762 return -ENOMEM;
763
764 xh->size = sg_len;
765 xh->type = FUSE_EXT_GROUPS;
766
767 sg = (struct fuse_supp_groups *) &xh[1];
768 sg->nr_groups = 1;
769 sg->groups[0] = parent_gid;
770
771 return 0;
772 }
773
get_create_ext(struct mnt_idmap * idmap,struct fuse_args * args,struct inode * dir,struct dentry * dentry,umode_t mode)774 static int get_create_ext(struct mnt_idmap *idmap,
775 struct fuse_args *args,
776 struct inode *dir, struct dentry *dentry,
777 umode_t mode)
778 {
779 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
780 struct fuse_in_arg ext = { .size = 0, .value = NULL };
781 int err = 0;
782
783 if (fc->init_security)
784 err = get_security_context(dentry, mode, &ext);
785 if (!err && fc->create_supp_group)
786 err = get_create_supp_group(idmap, dir, &ext);
787
788 if (!err && ext.size) {
789 WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
790 args->is_ext = true;
791 args->ext_idx = args->in_numargs++;
792 args->in_args[args->ext_idx] = ext;
793 } else {
794 kfree(ext.value);
795 }
796
797 return err;
798 }
799
free_ext_value(struct fuse_args * args)800 static void free_ext_value(struct fuse_args *args)
801 {
802 if (args->is_ext)
803 kfree(args->in_args[args->ext_idx].value);
804 }
805
806 /*
807 * Atomic create+open operation
808 *
809 * If the filesystem doesn't support this, then fall back to separate
810 * 'mknod' + 'open' requests.
811 */
fuse_create_open(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode,u32 opcode)812 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
813 struct dentry *entry, struct file *file,
814 unsigned int flags, umode_t mode, u32 opcode)
815 {
816 struct inode *inode;
817 struct fuse_mount *fm = get_fuse_mount(dir);
818 FUSE_ARGS(args);
819 struct fuse_forget_link *forget;
820 struct fuse_create_in inarg;
821 struct fuse_open_out *outopenp;
822 struct fuse_entry_out outentry;
823 struct fuse_inode *fi;
824 struct fuse_file *ff;
825 int epoch, err;
826 bool trunc = flags & O_TRUNC;
827
828 /* Userspace expects S_IFREG in create mode */
829 BUG_ON((mode & S_IFMT) != S_IFREG);
830
831 epoch = atomic_read(&fm->fc->epoch);
832 forget = fuse_alloc_forget();
833 err = -ENOMEM;
834 if (!forget)
835 goto out_err;
836
837 err = -ENOMEM;
838 ff = fuse_file_alloc(fm, true);
839 if (!ff)
840 goto out_put_forget_req;
841
842 if (!fm->fc->dont_mask)
843 mode &= ~current_umask();
844
845 flags &= ~O_NOCTTY;
846 memset(&inarg, 0, sizeof(inarg));
847 memset(&outentry, 0, sizeof(outentry));
848 inarg.flags = flags;
849 inarg.mode = mode;
850 inarg.umask = current_umask();
851
852 if (fm->fc->handle_killpriv_v2 && trunc &&
853 !(flags & O_EXCL) && !capable(CAP_FSETID)) {
854 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
855 }
856
857 args.opcode = opcode;
858 args.nodeid = get_node_id(dir);
859 args.in_numargs = 2;
860 args.in_args[0].size = sizeof(inarg);
861 args.in_args[0].value = &inarg;
862 args.in_args[1].size = entry->d_name.len + 1;
863 args.in_args[1].value = entry->d_name.name;
864 args.out_numargs = 2;
865 args.out_args[0].size = sizeof(outentry);
866 args.out_args[0].value = &outentry;
867 /* Store outarg for fuse_finish_open() */
868 outopenp = &ff->args->open_outarg;
869 args.out_args[1].size = sizeof(*outopenp);
870 args.out_args[1].value = outopenp;
871
872 err = get_create_ext(idmap, &args, dir, entry, mode);
873 if (err)
874 goto out_free_ff;
875
876 err = fuse_simple_idmap_request(idmap, fm, &args);
877 free_ext_value(&args);
878 if (err)
879 goto out_free_ff;
880
881 err = -EIO;
882 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
883 fuse_invalid_attr(&outentry.attr))
884 goto out_free_ff;
885
886 ff->fh = outopenp->fh;
887 ff->nodeid = outentry.nodeid;
888 ff->open_flags = outopenp->open_flags;
889 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
890 &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
891 if (!inode) {
892 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
893 fuse_sync_release(NULL, ff, flags);
894 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
895 err = -ENOMEM;
896 goto out_err;
897 }
898 kfree(forget);
899 d_instantiate(entry, inode);
900 entry->d_time = epoch;
901 fuse_change_entry_timeout(entry, &outentry);
902 fuse_dir_changed(dir);
903 err = generic_file_open(inode, file);
904 if (!err) {
905 file->private_data = ff;
906 err = finish_open(file, entry, fuse_finish_open);
907 }
908 if (err) {
909 fi = get_fuse_inode(inode);
910 fuse_sync_release(fi, ff, flags);
911 } else {
912 if (fm->fc->atomic_o_trunc && trunc)
913 truncate_pagecache(inode, 0);
914 else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
915 invalidate_inode_pages2(inode->i_mapping);
916 }
917 return err;
918
919 out_free_ff:
920 fuse_file_free(ff);
921 out_put_forget_req:
922 kfree(forget);
923 out_err:
924 return err;
925 }
926
927 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
928 umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)929 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
930 struct file *file, unsigned flags,
931 umode_t mode)
932 {
933 int err;
934 struct mnt_idmap *idmap = file_mnt_idmap(file);
935 struct fuse_conn *fc = get_fuse_conn(dir);
936
937 if (fuse_is_bad(dir))
938 return -EIO;
939
940 if (d_in_lookup(entry)) {
941 struct dentry *res = fuse_lookup(dir, entry, 0);
942 if (res || d_really_is_positive(entry))
943 return finish_no_open(file, res);
944 }
945
946 if (!(flags & O_CREAT))
947 return finish_no_open(file, NULL);
948
949 /* Only creates */
950 file->f_mode |= FMODE_CREATED;
951
952 if (fc->no_create)
953 goto mknod;
954
955 err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
956 if (err == -ENOSYS) {
957 fc->no_create = 1;
958 goto mknod;
959 } else if (err == -EEXIST)
960 fuse_invalidate_entry(entry);
961 return err;
962
963 mknod:
964 err = fuse_mknod(idmap, dir, entry, mode, 0);
965 if (err)
966 return err;
967 return finish_no_open(file, NULL);
968 }
969
970 /*
971 * Code shared between mknod, mkdir, symlink and link
972 */
create_new_entry(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)973 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
974 struct fuse_args *args, struct inode *dir,
975 struct dentry *entry, umode_t mode)
976 {
977 struct fuse_entry_out outarg;
978 struct inode *inode;
979 struct dentry *d;
980 struct fuse_forget_link *forget;
981 int epoch, err;
982
983 if (fuse_is_bad(dir))
984 return ERR_PTR(-EIO);
985
986 epoch = atomic_read(&fm->fc->epoch);
987
988 forget = fuse_alloc_forget();
989 if (!forget)
990 return ERR_PTR(-ENOMEM);
991
992 memset(&outarg, 0, sizeof(outarg));
993 args->nodeid = get_node_id(dir);
994 args->out_numargs = 1;
995 args->out_args[0].size = sizeof(outarg);
996 args->out_args[0].value = &outarg;
997
998 if (args->opcode != FUSE_LINK) {
999 err = get_create_ext(idmap, args, dir, entry, mode);
1000 if (err)
1001 goto out_put_forget_req;
1002 }
1003
1004 err = fuse_simple_idmap_request(idmap, fm, args);
1005 free_ext_value(args);
1006 if (err)
1007 goto out_put_forget_req;
1008
1009 err = -EIO;
1010 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
1011 goto out_put_forget_req;
1012
1013 if ((outarg.attr.mode ^ mode) & S_IFMT)
1014 goto out_put_forget_req;
1015
1016 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
1017 &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
1018 if (!inode) {
1019 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
1020 return ERR_PTR(-ENOMEM);
1021 }
1022 kfree(forget);
1023
1024 d_drop(entry);
1025 d = d_splice_alias(inode, entry);
1026 if (IS_ERR(d))
1027 return d;
1028
1029 if (d) {
1030 d->d_time = epoch;
1031 fuse_change_entry_timeout(d, &outarg);
1032 } else {
1033 entry->d_time = epoch;
1034 fuse_change_entry_timeout(entry, &outarg);
1035 }
1036 fuse_dir_changed(dir);
1037 return d;
1038
1039 out_put_forget_req:
1040 if (err == -EEXIST)
1041 fuse_invalidate_entry(entry);
1042 kfree(forget);
1043 return ERR_PTR(err);
1044 }
1045
create_new_nondir(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)1046 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
1047 struct fuse_args *args, struct inode *dir,
1048 struct dentry *entry, umode_t mode)
1049 {
1050 /*
1051 * Note that when creating anything other than a directory we
1052 * can be sure create_new_entry() will NOT return an alternate
1053 * dentry as d_splice_alias() only returns an alternate dentry
1054 * for directories. So we don't need to check for that case
1055 * when passing back the result.
1056 */
1057 WARN_ON_ONCE(S_ISDIR(mode));
1058
1059 return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
1060 }
1061
fuse_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)1062 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
1063 struct dentry *entry, umode_t mode, dev_t rdev)
1064 {
1065 struct fuse_mknod_in inarg;
1066 struct fuse_mount *fm = get_fuse_mount(dir);
1067 FUSE_ARGS(args);
1068
1069 if (!fm->fc->dont_mask)
1070 mode &= ~current_umask();
1071
1072 memset(&inarg, 0, sizeof(inarg));
1073 inarg.mode = mode;
1074 inarg.rdev = new_encode_dev(rdev);
1075 inarg.umask = current_umask();
1076 args.opcode = FUSE_MKNOD;
1077 args.in_numargs = 2;
1078 args.in_args[0].size = sizeof(inarg);
1079 args.in_args[0].value = &inarg;
1080 args.in_args[1].size = entry->d_name.len + 1;
1081 args.in_args[1].value = entry->d_name.name;
1082 return create_new_nondir(idmap, fm, &args, dir, entry, mode);
1083 }
1084
fuse_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)1085 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
1086 struct dentry *entry, umode_t mode, bool excl)
1087 {
1088 return fuse_mknod(idmap, dir, entry, mode, 0);
1089 }
1090
fuse_tmpfile(struct mnt_idmap * idmap,struct inode * dir,struct file * file,umode_t mode)1091 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
1092 struct file *file, umode_t mode)
1093 {
1094 struct fuse_conn *fc = get_fuse_conn(dir);
1095 int err;
1096
1097 if (fc->no_tmpfile)
1098 return -EOPNOTSUPP;
1099
1100 err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
1101 file->f_flags, mode, FUSE_TMPFILE);
1102 if (err == -ENOSYS) {
1103 fc->no_tmpfile = 1;
1104 err = -EOPNOTSUPP;
1105 }
1106 return err;
1107 }
1108
fuse_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode)1109 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
1110 struct dentry *entry, umode_t mode)
1111 {
1112 struct fuse_mkdir_in inarg;
1113 struct fuse_mount *fm = get_fuse_mount(dir);
1114 FUSE_ARGS(args);
1115
1116 if (!fm->fc->dont_mask)
1117 mode &= ~current_umask();
1118
1119 memset(&inarg, 0, sizeof(inarg));
1120 inarg.mode = mode;
1121 inarg.umask = current_umask();
1122 args.opcode = FUSE_MKDIR;
1123 args.in_numargs = 2;
1124 args.in_args[0].size = sizeof(inarg);
1125 args.in_args[0].value = &inarg;
1126 args.in_args[1].size = entry->d_name.len + 1;
1127 args.in_args[1].value = entry->d_name.name;
1128 return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
1129 }
1130
fuse_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,const char * link)1131 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
1132 struct dentry *entry, const char *link)
1133 {
1134 struct fuse_mount *fm = get_fuse_mount(dir);
1135 unsigned len = strlen(link) + 1;
1136 FUSE_ARGS(args);
1137
1138 args.opcode = FUSE_SYMLINK;
1139 args.in_numargs = 3;
1140 fuse_set_zero_arg0(&args);
1141 args.in_args[1].size = entry->d_name.len + 1;
1142 args.in_args[1].value = entry->d_name.name;
1143 args.in_args[2].size = len;
1144 args.in_args[2].value = link;
1145 return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
1146 }
1147
fuse_flush_time_update(struct inode * inode)1148 void fuse_flush_time_update(struct inode *inode)
1149 {
1150 int err = sync_inode_metadata(inode, 1);
1151
1152 mapping_set_error(inode->i_mapping, err);
1153 }
1154
fuse_update_ctime_in_cache(struct inode * inode)1155 static void fuse_update_ctime_in_cache(struct inode *inode)
1156 {
1157 if (!IS_NOCMTIME(inode)) {
1158 inode_set_ctime_current(inode);
1159 mark_inode_dirty_sync(inode);
1160 fuse_flush_time_update(inode);
1161 }
1162 }
1163
fuse_update_ctime(struct inode * inode)1164 void fuse_update_ctime(struct inode *inode)
1165 {
1166 fuse_invalidate_attr_mask(inode, STATX_CTIME);
1167 fuse_update_ctime_in_cache(inode);
1168 }
1169
fuse_entry_unlinked(struct dentry * entry)1170 static void fuse_entry_unlinked(struct dentry *entry)
1171 {
1172 struct inode *inode = d_inode(entry);
1173 struct fuse_conn *fc = get_fuse_conn(inode);
1174 struct fuse_inode *fi = get_fuse_inode(inode);
1175
1176 spin_lock(&fi->lock);
1177 fi->attr_version = atomic64_inc_return(&fc->attr_version);
1178 /*
1179 * If i_nlink == 0 then unlink doesn't make sense, yet this can
1180 * happen if userspace filesystem is careless. It would be
1181 * difficult to enforce correct nlink usage so just ignore this
1182 * condition here
1183 */
1184 if (S_ISDIR(inode->i_mode))
1185 clear_nlink(inode);
1186 else if (inode->i_nlink > 0)
1187 drop_nlink(inode);
1188 spin_unlock(&fi->lock);
1189 fuse_invalidate_entry_cache(entry);
1190 fuse_update_ctime(inode);
1191 }
1192
fuse_unlink(struct inode * dir,struct dentry * entry)1193 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1194 {
1195 int err;
1196 struct fuse_mount *fm = get_fuse_mount(dir);
1197 FUSE_ARGS(args);
1198
1199 if (fuse_is_bad(dir))
1200 return -EIO;
1201
1202 args.opcode = FUSE_UNLINK;
1203 args.nodeid = get_node_id(dir);
1204 args.in_numargs = 2;
1205 fuse_set_zero_arg0(&args);
1206 args.in_args[1].size = entry->d_name.len + 1;
1207 args.in_args[1].value = entry->d_name.name;
1208 err = fuse_simple_request(fm, &args);
1209 if (!err) {
1210 fuse_dir_changed(dir);
1211 fuse_entry_unlinked(entry);
1212 } else if (err == -EINTR || err == -ENOENT)
1213 fuse_invalidate_entry(entry);
1214 return err;
1215 }
1216
fuse_rmdir(struct inode * dir,struct dentry * entry)1217 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1218 {
1219 int err;
1220 struct fuse_mount *fm = get_fuse_mount(dir);
1221 FUSE_ARGS(args);
1222
1223 if (fuse_is_bad(dir))
1224 return -EIO;
1225
1226 args.opcode = FUSE_RMDIR;
1227 args.nodeid = get_node_id(dir);
1228 args.in_numargs = 2;
1229 fuse_set_zero_arg0(&args);
1230 args.in_args[1].size = entry->d_name.len + 1;
1231 args.in_args[1].value = entry->d_name.name;
1232 err = fuse_simple_request(fm, &args);
1233 if (!err) {
1234 fuse_dir_changed(dir);
1235 fuse_entry_unlinked(entry);
1236 } else if (err == -EINTR || err == -ENOENT)
1237 fuse_invalidate_entry(entry);
1238 return err;
1239 }
1240
fuse_rename_common(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)1241 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1242 struct inode *newdir, struct dentry *newent,
1243 unsigned int flags, int opcode, size_t argsize)
1244 {
1245 int err;
1246 struct fuse_rename2_in inarg;
1247 struct fuse_mount *fm = get_fuse_mount(olddir);
1248 FUSE_ARGS(args);
1249
1250 memset(&inarg, 0, argsize);
1251 inarg.newdir = get_node_id(newdir);
1252 inarg.flags = flags;
1253 args.opcode = opcode;
1254 args.nodeid = get_node_id(olddir);
1255 args.in_numargs = 3;
1256 args.in_args[0].size = argsize;
1257 args.in_args[0].value = &inarg;
1258 args.in_args[1].size = oldent->d_name.len + 1;
1259 args.in_args[1].value = oldent->d_name.name;
1260 args.in_args[2].size = newent->d_name.len + 1;
1261 args.in_args[2].value = newent->d_name.name;
1262 err = fuse_simple_idmap_request(idmap, fm, &args);
1263 if (!err) {
1264 /* ctime changes */
1265 fuse_update_ctime(d_inode(oldent));
1266
1267 if (flags & RENAME_EXCHANGE)
1268 fuse_update_ctime(d_inode(newent));
1269
1270 fuse_dir_changed(olddir);
1271 if (olddir != newdir)
1272 fuse_dir_changed(newdir);
1273
1274 /* newent will end up negative */
1275 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1276 fuse_entry_unlinked(newent);
1277 } else if (err == -EINTR || err == -ENOENT) {
1278 /* If request was interrupted, DEITY only knows if the
1279 rename actually took place. If the invalidation
1280 fails (e.g. some process has CWD under the renamed
1281 directory), then there can be inconsistency between
1282 the dcache and the real filesystem. Tough luck. */
1283 fuse_invalidate_entry(oldent);
1284 if (d_really_is_positive(newent))
1285 fuse_invalidate_entry(newent);
1286 }
1287
1288 return err;
1289 }
1290
fuse_rename2(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)1291 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1292 struct dentry *oldent, struct inode *newdir,
1293 struct dentry *newent, unsigned int flags)
1294 {
1295 struct fuse_conn *fc = get_fuse_conn(olddir);
1296 int err;
1297
1298 if (fuse_is_bad(olddir))
1299 return -EIO;
1300
1301 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1302 return -EINVAL;
1303
1304 if (flags) {
1305 if (fc->no_rename2 || fc->minor < 23)
1306 return -EINVAL;
1307
1308 err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1309 olddir, oldent, newdir, newent, flags,
1310 FUSE_RENAME2,
1311 sizeof(struct fuse_rename2_in));
1312 if (err == -ENOSYS) {
1313 fc->no_rename2 = 1;
1314 err = -EINVAL;
1315 }
1316 } else {
1317 err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1318 FUSE_RENAME,
1319 sizeof(struct fuse_rename_in));
1320 }
1321
1322 return err;
1323 }
1324
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1325 static int fuse_link(struct dentry *entry, struct inode *newdir,
1326 struct dentry *newent)
1327 {
1328 int err;
1329 struct fuse_link_in inarg;
1330 struct inode *inode = d_inode(entry);
1331 struct fuse_mount *fm = get_fuse_mount(inode);
1332 FUSE_ARGS(args);
1333
1334 if (fm->fc->no_link)
1335 goto out;
1336
1337 memset(&inarg, 0, sizeof(inarg));
1338 inarg.oldnodeid = get_node_id(inode);
1339 args.opcode = FUSE_LINK;
1340 args.in_numargs = 2;
1341 args.in_args[0].size = sizeof(inarg);
1342 args.in_args[0].value = &inarg;
1343 args.in_args[1].size = newent->d_name.len + 1;
1344 args.in_args[1].value = newent->d_name.name;
1345 err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1346 if (!err)
1347 fuse_update_ctime_in_cache(inode);
1348 else if (err == -EINTR)
1349 fuse_invalidate_attr(inode);
1350
1351 if (err == -ENOSYS)
1352 fm->fc->no_link = 1;
1353 out:
1354 if (fm->fc->no_link)
1355 return -EPERM;
1356
1357 return err;
1358 }
1359
fuse_fillattr(struct mnt_idmap * idmap,struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1360 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1361 struct fuse_attr *attr, struct kstat *stat)
1362 {
1363 unsigned int blkbits;
1364 struct fuse_conn *fc = get_fuse_conn(inode);
1365 vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1366 make_kuid(fc->user_ns, attr->uid));
1367 vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1368 make_kgid(fc->user_ns, attr->gid));
1369
1370 stat->dev = inode->i_sb->s_dev;
1371 stat->ino = attr->ino;
1372 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1373 stat->nlink = attr->nlink;
1374 stat->uid = vfsuid_into_kuid(vfsuid);
1375 stat->gid = vfsgid_into_kgid(vfsgid);
1376 stat->rdev = inode->i_rdev;
1377 stat->atime.tv_sec = attr->atime;
1378 stat->atime.tv_nsec = attr->atimensec;
1379 stat->mtime.tv_sec = attr->mtime;
1380 stat->mtime.tv_nsec = attr->mtimensec;
1381 stat->ctime.tv_sec = attr->ctime;
1382 stat->ctime.tv_nsec = attr->ctimensec;
1383 stat->size = attr->size;
1384 stat->blocks = attr->blocks;
1385
1386 if (attr->blksize != 0)
1387 blkbits = ilog2(attr->blksize);
1388 else
1389 blkbits = inode->i_sb->s_blocksize_bits;
1390
1391 stat->blksize = 1 << blkbits;
1392 }
1393
fuse_statx_to_attr(struct fuse_statx * sx,struct fuse_attr * attr)1394 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1395 {
1396 memset(attr, 0, sizeof(*attr));
1397 attr->ino = sx->ino;
1398 attr->size = sx->size;
1399 attr->blocks = sx->blocks;
1400 attr->atime = sx->atime.tv_sec;
1401 attr->mtime = sx->mtime.tv_sec;
1402 attr->ctime = sx->ctime.tv_sec;
1403 attr->atimensec = sx->atime.tv_nsec;
1404 attr->mtimensec = sx->mtime.tv_nsec;
1405 attr->ctimensec = sx->ctime.tv_nsec;
1406 attr->mode = sx->mode;
1407 attr->nlink = sx->nlink;
1408 attr->uid = sx->uid;
1409 attr->gid = sx->gid;
1410 attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1411 attr->blksize = sx->blksize;
1412 }
1413
fuse_do_statx(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat)1414 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1415 struct file *file, struct kstat *stat)
1416 {
1417 int err;
1418 struct fuse_attr attr;
1419 struct fuse_statx *sx;
1420 struct fuse_statx_in inarg;
1421 struct fuse_statx_out outarg;
1422 struct fuse_mount *fm = get_fuse_mount(inode);
1423 u64 attr_version = fuse_get_attr_version(fm->fc);
1424 FUSE_ARGS(args);
1425
1426 memset(&inarg, 0, sizeof(inarg));
1427 memset(&outarg, 0, sizeof(outarg));
1428 /* Directories have separate file-handle space */
1429 if (file && S_ISREG(inode->i_mode)) {
1430 struct fuse_file *ff = file->private_data;
1431
1432 inarg.getattr_flags |= FUSE_GETATTR_FH;
1433 inarg.fh = ff->fh;
1434 }
1435 /* For now leave sync hints as the default, request all stats. */
1436 inarg.sx_flags = 0;
1437 inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1438 args.opcode = FUSE_STATX;
1439 args.nodeid = get_node_id(inode);
1440 args.in_numargs = 1;
1441 args.in_args[0].size = sizeof(inarg);
1442 args.in_args[0].value = &inarg;
1443 args.out_numargs = 1;
1444 args.out_args[0].size = sizeof(outarg);
1445 args.out_args[0].value = &outarg;
1446 err = fuse_simple_request(fm, &args);
1447 if (err)
1448 return err;
1449
1450 sx = &outarg.stat;
1451 if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1452 ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1453 inode_wrong_type(inode, sx->mode)))) {
1454 fuse_make_bad(inode);
1455 return -EIO;
1456 }
1457
1458 fuse_statx_to_attr(&outarg.stat, &attr);
1459 if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1460 fuse_change_attributes(inode, &attr, &outarg.stat,
1461 ATTR_TIMEOUT(&outarg), attr_version);
1462 }
1463
1464 if (stat) {
1465 stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1466 stat->btime.tv_sec = sx->btime.tv_sec;
1467 stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1468 fuse_fillattr(idmap, inode, &attr, stat);
1469 stat->result_mask |= STATX_TYPE;
1470 }
1471
1472 return 0;
1473 }
1474
fuse_do_getattr(struct mnt_idmap * idmap,struct inode * inode,struct kstat * stat,struct file * file)1475 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1476 struct kstat *stat, struct file *file)
1477 {
1478 int err;
1479 struct fuse_getattr_in inarg;
1480 struct fuse_attr_out outarg;
1481 struct fuse_mount *fm = get_fuse_mount(inode);
1482 FUSE_ARGS(args);
1483 u64 attr_version;
1484
1485 attr_version = fuse_get_attr_version(fm->fc);
1486
1487 memset(&inarg, 0, sizeof(inarg));
1488 memset(&outarg, 0, sizeof(outarg));
1489 /* Directories have separate file-handle space */
1490 if (file && S_ISREG(inode->i_mode)) {
1491 struct fuse_file *ff = file->private_data;
1492
1493 inarg.getattr_flags |= FUSE_GETATTR_FH;
1494 inarg.fh = ff->fh;
1495 }
1496 args.opcode = FUSE_GETATTR;
1497 args.nodeid = get_node_id(inode);
1498 args.in_numargs = 1;
1499 args.in_args[0].size = sizeof(inarg);
1500 args.in_args[0].value = &inarg;
1501 args.out_numargs = 1;
1502 args.out_args[0].size = sizeof(outarg);
1503 args.out_args[0].value = &outarg;
1504 err = fuse_simple_request(fm, &args);
1505 if (!err) {
1506 if (fuse_invalid_attr(&outarg.attr) ||
1507 inode_wrong_type(inode, outarg.attr.mode)) {
1508 fuse_make_bad(inode);
1509 err = -EIO;
1510 } else {
1511 fuse_change_attributes(inode, &outarg.attr, NULL,
1512 ATTR_TIMEOUT(&outarg),
1513 attr_version);
1514 if (stat)
1515 fuse_fillattr(idmap, inode, &outarg.attr, stat);
1516 }
1517 }
1518 return err;
1519 }
1520
fuse_update_get_attr(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat,u32 request_mask,unsigned int flags)1521 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1522 struct file *file, struct kstat *stat,
1523 u32 request_mask, unsigned int flags)
1524 {
1525 struct fuse_inode *fi = get_fuse_inode(inode);
1526 struct fuse_conn *fc = get_fuse_conn(inode);
1527 int err = 0;
1528 bool sync;
1529 u32 inval_mask = READ_ONCE(fi->inval_mask);
1530 u32 cache_mask = fuse_get_cache_mask(inode);
1531
1532
1533 /* FUSE only supports basic stats and possibly btime */
1534 request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1535 retry:
1536 if (fc->no_statx)
1537 request_mask &= STATX_BASIC_STATS;
1538
1539 if (!request_mask)
1540 sync = false;
1541 else if (flags & AT_STATX_FORCE_SYNC)
1542 sync = true;
1543 else if (flags & AT_STATX_DONT_SYNC)
1544 sync = false;
1545 else if (request_mask & inval_mask & ~cache_mask)
1546 sync = true;
1547 else
1548 sync = time_before64(fi->i_time, get_jiffies_64());
1549
1550 if (sync) {
1551 forget_all_cached_acls(inode);
1552 /* Try statx if BTIME is requested */
1553 if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1554 err = fuse_do_statx(idmap, inode, file, stat);
1555 if (err == -ENOSYS) {
1556 fc->no_statx = 1;
1557 err = 0;
1558 goto retry;
1559 }
1560 } else {
1561 err = fuse_do_getattr(idmap, inode, stat, file);
1562 }
1563 } else if (stat) {
1564 generic_fillattr(idmap, request_mask, inode, stat);
1565 stat->mode = fi->orig_i_mode;
1566 stat->ino = fi->orig_ino;
1567 stat->blksize = 1 << fi->cached_i_blkbits;
1568 if (test_bit(FUSE_I_BTIME, &fi->state)) {
1569 stat->btime = fi->i_btime;
1570 stat->result_mask |= STATX_BTIME;
1571 }
1572 }
1573
1574 return err;
1575 }
1576
fuse_update_attributes(struct inode * inode,struct file * file,u32 mask)1577 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1578 {
1579 return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1580 }
1581
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name,u32 flags)1582 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1583 u64 child_nodeid, struct qstr *name, u32 flags)
1584 {
1585 int err = -ENOTDIR;
1586 struct inode *parent;
1587 struct dentry *dir;
1588 struct dentry *entry;
1589
1590 parent = fuse_ilookup(fc, parent_nodeid, NULL);
1591 if (!parent)
1592 return -ENOENT;
1593
1594 if (!S_ISDIR(parent->i_mode))
1595 goto put_parent;
1596
1597 err = -ENOENT;
1598 dir = d_find_alias(parent);
1599 if (!dir)
1600 goto put_parent;
1601
1602 entry = start_removing_noperm(dir, name);
1603 dput(dir);
1604 if (IS_ERR(entry))
1605 goto put_parent;
1606
1607 fuse_dir_changed(parent);
1608 if (!(flags & FUSE_EXPIRE_ONLY))
1609 d_invalidate(entry);
1610 fuse_invalidate_entry_cache(entry);
1611
1612 if (child_nodeid != 0) {
1613 inode_lock(d_inode(entry));
1614 if (get_node_id(d_inode(entry)) != child_nodeid) {
1615 err = -ENOENT;
1616 goto badentry;
1617 }
1618 if (d_mountpoint(entry)) {
1619 err = -EBUSY;
1620 goto badentry;
1621 }
1622 if (d_is_dir(entry)) {
1623 shrink_dcache_parent(entry);
1624 if (!simple_empty(entry)) {
1625 err = -ENOTEMPTY;
1626 goto badentry;
1627 }
1628 d_inode(entry)->i_flags |= S_DEAD;
1629 }
1630 dont_mount(entry);
1631 clear_nlink(d_inode(entry));
1632 err = 0;
1633 badentry:
1634 inode_unlock(d_inode(entry));
1635 if (!err)
1636 d_delete(entry);
1637 } else {
1638 err = 0;
1639 }
1640
1641 end_removing(entry);
1642 put_parent:
1643 iput(parent);
1644 return err;
1645 }
1646
fuse_permissible_uidgid(struct fuse_conn * fc)1647 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1648 {
1649 const struct cred *cred = current_cred();
1650
1651 return (uid_eq(cred->euid, fc->user_id) &&
1652 uid_eq(cred->suid, fc->user_id) &&
1653 uid_eq(cred->uid, fc->user_id) &&
1654 gid_eq(cred->egid, fc->group_id) &&
1655 gid_eq(cred->sgid, fc->group_id) &&
1656 gid_eq(cred->gid, fc->group_id));
1657 }
1658
1659 /*
1660 * Calling into a user-controlled filesystem gives the filesystem
1661 * daemon ptrace-like capabilities over the current process. This
1662 * means, that the filesystem daemon is able to record the exact
1663 * filesystem operations performed, and can also control the behavior
1664 * of the requester process in otherwise impossible ways. For example
1665 * it can delay the operation for arbitrary length of time allowing
1666 * DoS against the requester.
1667 *
1668 * For this reason only those processes can call into the filesystem,
1669 * for which the owner of the mount has ptrace privilege. This
1670 * excludes processes started by other users, suid or sgid processes.
1671 */
fuse_allow_current_process(struct fuse_conn * fc)1672 bool fuse_allow_current_process(struct fuse_conn *fc)
1673 {
1674 bool allow;
1675
1676 if (fc->allow_other)
1677 allow = current_in_userns(fc->user_ns);
1678 else
1679 allow = fuse_permissible_uidgid(fc);
1680
1681 if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1682 allow = true;
1683
1684 return allow;
1685 }
1686
fuse_access(struct inode * inode,int mask)1687 static int fuse_access(struct inode *inode, int mask)
1688 {
1689 struct fuse_mount *fm = get_fuse_mount(inode);
1690 FUSE_ARGS(args);
1691 struct fuse_access_in inarg;
1692 int err;
1693
1694 BUG_ON(mask & MAY_NOT_BLOCK);
1695
1696 /*
1697 * We should not send FUSE_ACCESS to the userspace
1698 * when idmapped mounts are enabled as for this case
1699 * we have fc->default_permissions = 1 and access
1700 * permission checks are done on the kernel side.
1701 */
1702 WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1703
1704 if (fm->fc->no_access)
1705 return 0;
1706
1707 memset(&inarg, 0, sizeof(inarg));
1708 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1709 args.opcode = FUSE_ACCESS;
1710 args.nodeid = get_node_id(inode);
1711 args.in_numargs = 1;
1712 args.in_args[0].size = sizeof(inarg);
1713 args.in_args[0].value = &inarg;
1714 err = fuse_simple_request(fm, &args);
1715 if (err == -ENOSYS) {
1716 fm->fc->no_access = 1;
1717 err = 0;
1718 }
1719 return err;
1720 }
1721
fuse_perm_getattr(struct inode * inode,int mask)1722 static int fuse_perm_getattr(struct inode *inode, int mask)
1723 {
1724 if (mask & MAY_NOT_BLOCK)
1725 return -ECHILD;
1726
1727 forget_all_cached_acls(inode);
1728 return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1729 }
1730
1731 /*
1732 * Check permission. The two basic access models of FUSE are:
1733 *
1734 * 1) Local access checking ('default_permissions' mount option) based
1735 * on file mode. This is the plain old disk filesystem permission
1736 * model.
1737 *
1738 * 2) "Remote" access checking, where server is responsible for
1739 * checking permission in each inode operation. An exception to this
1740 * is if ->permission() was invoked from sys_access() in which case an
1741 * access request is sent. Execute permission is still checked
1742 * locally based on file mode.
1743 */
fuse_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)1744 static int fuse_permission(struct mnt_idmap *idmap,
1745 struct inode *inode, int mask)
1746 {
1747 struct fuse_conn *fc = get_fuse_conn(inode);
1748 bool refreshed = false;
1749 int err = 0;
1750
1751 if (fuse_is_bad(inode))
1752 return -EIO;
1753
1754 if (!fuse_allow_current_process(fc))
1755 return -EACCES;
1756
1757 /*
1758 * If attributes are needed, refresh them before proceeding
1759 */
1760 if (fc->default_permissions ||
1761 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1762 struct fuse_inode *fi = get_fuse_inode(inode);
1763 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1764
1765 if (perm_mask & READ_ONCE(fi->inval_mask) ||
1766 time_before64(fi->i_time, get_jiffies_64())) {
1767 refreshed = true;
1768
1769 err = fuse_perm_getattr(inode, mask);
1770 if (err)
1771 return err;
1772 }
1773 }
1774
1775 if (fc->default_permissions) {
1776 err = generic_permission(idmap, inode, mask);
1777
1778 /* If permission is denied, try to refresh file
1779 attributes. This is also needed, because the root
1780 node will at first have no permissions */
1781 if (err == -EACCES && !refreshed) {
1782 err = fuse_perm_getattr(inode, mask);
1783 if (!err)
1784 err = generic_permission(idmap,
1785 inode, mask);
1786 }
1787
1788 /* Note: the opposite of the above test does not
1789 exist. So if permissions are revoked this won't be
1790 noticed immediately, only after the attribute
1791 timeout has expired */
1792 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1793 err = fuse_access(inode, mask);
1794 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1795 if (!(inode->i_mode & S_IXUGO)) {
1796 if (refreshed)
1797 return -EACCES;
1798
1799 err = fuse_perm_getattr(inode, mask);
1800 if (!err && !(inode->i_mode & S_IXUGO))
1801 return -EACCES;
1802 }
1803 }
1804 return err;
1805 }
1806
fuse_readlink_folio(struct inode * inode,struct folio * folio)1807 static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
1808 {
1809 struct fuse_mount *fm = get_fuse_mount(inode);
1810 struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
1811 struct fuse_args_pages ap = {
1812 .num_folios = 1,
1813 .folios = &folio,
1814 .descs = &desc,
1815 };
1816 char *link;
1817 ssize_t res;
1818
1819 ap.args.opcode = FUSE_READLINK;
1820 ap.args.nodeid = get_node_id(inode);
1821 ap.args.out_pages = true;
1822 ap.args.out_argvar = true;
1823 ap.args.page_zeroing = true;
1824 ap.args.out_numargs = 1;
1825 ap.args.out_args[0].size = desc.length;
1826 res = fuse_simple_request(fm, &ap.args);
1827
1828 fuse_invalidate_atime(inode);
1829
1830 if (res < 0)
1831 return res;
1832
1833 if (WARN_ON(res >= PAGE_SIZE))
1834 return -EIO;
1835
1836 link = folio_address(folio);
1837 link[res] = '\0';
1838
1839 return 0;
1840 }
1841
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1842 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1843 struct delayed_call *callback)
1844 {
1845 struct fuse_conn *fc = get_fuse_conn(inode);
1846 struct folio *folio;
1847 int err;
1848
1849 err = -EIO;
1850 if (fuse_is_bad(inode))
1851 goto out_err;
1852
1853 if (fc->cache_symlinks)
1854 return page_get_link_raw(dentry, inode, callback);
1855
1856 err = -ECHILD;
1857 if (!dentry)
1858 goto out_err;
1859
1860 folio = folio_alloc(GFP_KERNEL, 0);
1861 err = -ENOMEM;
1862 if (!folio)
1863 goto out_err;
1864
1865 err = fuse_readlink_folio(inode, folio);
1866 if (err) {
1867 folio_put(folio);
1868 goto out_err;
1869 }
1870
1871 set_delayed_call(callback, page_put_link, folio);
1872
1873 return folio_address(folio);
1874
1875 out_err:
1876 return ERR_PTR(err);
1877 }
1878
fuse_dir_open(struct inode * inode,struct file * file)1879 static int fuse_dir_open(struct inode *inode, struct file *file)
1880 {
1881 struct fuse_mount *fm = get_fuse_mount(inode);
1882 int err;
1883
1884 if (fuse_is_bad(inode))
1885 return -EIO;
1886
1887 err = generic_file_open(inode, file);
1888 if (err)
1889 return err;
1890
1891 err = fuse_do_open(fm, get_node_id(inode), file, true);
1892 if (!err) {
1893 struct fuse_file *ff = file->private_data;
1894
1895 /*
1896 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1897 * directories for backward compatibility, though it's unlikely
1898 * to be useful.
1899 */
1900 if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1901 nonseekable_open(inode, file);
1902 if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1903 invalidate_inode_pages2(inode->i_mapping);
1904 }
1905
1906 return err;
1907 }
1908
fuse_dir_release(struct inode * inode,struct file * file)1909 static int fuse_dir_release(struct inode *inode, struct file *file)
1910 {
1911 fuse_release_common(file, true);
1912
1913 return 0;
1914 }
1915
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1916 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1917 int datasync)
1918 {
1919 struct inode *inode = file->f_mapping->host;
1920 struct fuse_conn *fc = get_fuse_conn(inode);
1921 int err;
1922
1923 if (fuse_is_bad(inode))
1924 return -EIO;
1925
1926 if (fc->no_fsyncdir)
1927 return 0;
1928
1929 inode_lock(inode);
1930 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1931 if (err == -ENOSYS) {
1932 fc->no_fsyncdir = 1;
1933 err = 0;
1934 }
1935 inode_unlock(inode);
1936
1937 return err;
1938 }
1939
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1940 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1941 unsigned long arg)
1942 {
1943 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1944
1945 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1946 if (fc->minor < 18)
1947 return -ENOTTY;
1948
1949 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1950 }
1951
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1952 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1953 unsigned long arg)
1954 {
1955 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1956
1957 if (fc->minor < 18)
1958 return -ENOTTY;
1959
1960 return fuse_ioctl_common(file, cmd, arg,
1961 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1962 }
1963
update_mtime(unsigned ivalid,bool trust_local_mtime)1964 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1965 {
1966 /* Always update if mtime is explicitly set */
1967 if (ivalid & ATTR_MTIME_SET)
1968 return true;
1969
1970 /* Or if kernel i_mtime is the official one */
1971 if (trust_local_mtime)
1972 return true;
1973
1974 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1975 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1976 return false;
1977
1978 /* In all other cases update */
1979 return true;
1980 }
1981
iattr_to_fattr(struct mnt_idmap * idmap,struct fuse_conn * fc,struct iattr * iattr,struct fuse_setattr_in * arg,bool trust_local_cmtime)1982 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1983 struct iattr *iattr, struct fuse_setattr_in *arg,
1984 bool trust_local_cmtime)
1985 {
1986 unsigned ivalid = iattr->ia_valid;
1987
1988 if (ivalid & ATTR_MODE)
1989 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1990
1991 if (ivalid & ATTR_UID) {
1992 kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1993
1994 arg->valid |= FATTR_UID;
1995 arg->uid = from_kuid(fc->user_ns, fsuid);
1996 }
1997
1998 if (ivalid & ATTR_GID) {
1999 kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
2000
2001 arg->valid |= FATTR_GID;
2002 arg->gid = from_kgid(fc->user_ns, fsgid);
2003 }
2004
2005 if (ivalid & ATTR_SIZE)
2006 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
2007 if (ivalid & ATTR_ATIME) {
2008 arg->valid |= FATTR_ATIME;
2009 arg->atime = iattr->ia_atime.tv_sec;
2010 arg->atimensec = iattr->ia_atime.tv_nsec;
2011 if (!(ivalid & ATTR_ATIME_SET))
2012 arg->valid |= FATTR_ATIME_NOW;
2013 }
2014 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
2015 arg->valid |= FATTR_MTIME;
2016 arg->mtime = iattr->ia_mtime.tv_sec;
2017 arg->mtimensec = iattr->ia_mtime.tv_nsec;
2018 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
2019 arg->valid |= FATTR_MTIME_NOW;
2020 }
2021 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
2022 arg->valid |= FATTR_CTIME;
2023 arg->ctime = iattr->ia_ctime.tv_sec;
2024 arg->ctimensec = iattr->ia_ctime.tv_nsec;
2025 }
2026 }
2027
2028 /*
2029 * Prevent concurrent writepages on inode
2030 *
2031 * This is done by adding a negative bias to the inode write counter
2032 * and waiting for all pending writes to finish.
2033 */
fuse_set_nowrite(struct inode * inode)2034 void fuse_set_nowrite(struct inode *inode)
2035 {
2036 struct fuse_inode *fi = get_fuse_inode(inode);
2037
2038 BUG_ON(!inode_is_locked(inode));
2039
2040 spin_lock(&fi->lock);
2041 BUG_ON(fi->writectr < 0);
2042 fi->writectr += FUSE_NOWRITE;
2043 spin_unlock(&fi->lock);
2044 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
2045 }
2046
2047 /*
2048 * Allow writepages on inode
2049 *
2050 * Remove the bias from the writecounter and send any queued
2051 * writepages.
2052 */
__fuse_release_nowrite(struct inode * inode)2053 static void __fuse_release_nowrite(struct inode *inode)
2054 {
2055 struct fuse_inode *fi = get_fuse_inode(inode);
2056
2057 BUG_ON(fi->writectr != FUSE_NOWRITE);
2058 fi->writectr = 0;
2059 fuse_flush_writepages(inode);
2060 }
2061
fuse_release_nowrite(struct inode * inode)2062 void fuse_release_nowrite(struct inode *inode)
2063 {
2064 struct fuse_inode *fi = get_fuse_inode(inode);
2065
2066 spin_lock(&fi->lock);
2067 __fuse_release_nowrite(inode);
2068 spin_unlock(&fi->lock);
2069 }
2070
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)2071 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
2072 struct inode *inode,
2073 struct fuse_setattr_in *inarg_p,
2074 struct fuse_attr_out *outarg_p)
2075 {
2076 args->opcode = FUSE_SETATTR;
2077 args->nodeid = get_node_id(inode);
2078 args->in_numargs = 1;
2079 args->in_args[0].size = sizeof(*inarg_p);
2080 args->in_args[0].value = inarg_p;
2081 args->out_numargs = 1;
2082 args->out_args[0].size = sizeof(*outarg_p);
2083 args->out_args[0].value = outarg_p;
2084 }
2085
2086 /*
2087 * Flush inode->i_mtime to the server
2088 */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)2089 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
2090 {
2091 struct fuse_mount *fm = get_fuse_mount(inode);
2092 FUSE_ARGS(args);
2093 struct fuse_setattr_in inarg;
2094 struct fuse_attr_out outarg;
2095
2096 memset(&inarg, 0, sizeof(inarg));
2097 memset(&outarg, 0, sizeof(outarg));
2098
2099 inarg.valid = FATTR_MTIME;
2100 inarg.mtime = inode_get_mtime_sec(inode);
2101 inarg.mtimensec = inode_get_mtime_nsec(inode);
2102 if (fm->fc->minor >= 23) {
2103 inarg.valid |= FATTR_CTIME;
2104 inarg.ctime = inode_get_ctime_sec(inode);
2105 inarg.ctimensec = inode_get_ctime_nsec(inode);
2106 }
2107 if (ff) {
2108 inarg.valid |= FATTR_FH;
2109 inarg.fh = ff->fh;
2110 }
2111 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
2112
2113 return fuse_simple_request(fm, &args);
2114 }
2115
2116 /*
2117 * Set attributes, and at the same time refresh them.
2118 *
2119 * Truncation is slightly complicated, because the 'truncate' request
2120 * may fail, in which case we don't want to touch the mapping.
2121 * vmtruncate() doesn't allow for this case, so do the rlimit checking
2122 * and the actual truncation by hand.
2123 */
fuse_do_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr,struct file * file)2124 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
2125 struct iattr *attr, struct file *file)
2126 {
2127 struct inode *inode = d_inode(dentry);
2128 struct fuse_mount *fm = get_fuse_mount(inode);
2129 struct fuse_conn *fc = fm->fc;
2130 struct fuse_inode *fi = get_fuse_inode(inode);
2131 struct address_space *mapping = inode->i_mapping;
2132 FUSE_ARGS(args);
2133 struct fuse_setattr_in inarg;
2134 struct fuse_attr_out outarg;
2135 bool is_truncate = false;
2136 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
2137 loff_t oldsize;
2138 int err;
2139 bool trust_local_cmtime = is_wb;
2140 bool fault_blocked = false;
2141 u64 attr_version;
2142
2143 if (!fc->default_permissions)
2144 attr->ia_valid |= ATTR_FORCE;
2145
2146 err = setattr_prepare(idmap, dentry, attr);
2147 if (err)
2148 return err;
2149
2150 if (attr->ia_valid & ATTR_SIZE) {
2151 if (WARN_ON(!S_ISREG(inode->i_mode)))
2152 return -EIO;
2153 is_truncate = true;
2154 }
2155
2156 if (FUSE_IS_DAX(inode) && is_truncate) {
2157 filemap_invalidate_lock(mapping);
2158 fault_blocked = true;
2159 err = fuse_dax_break_layouts(inode, 0, -1);
2160 if (err) {
2161 filemap_invalidate_unlock(mapping);
2162 return err;
2163 }
2164 }
2165
2166 if (attr->ia_valid & ATTR_OPEN) {
2167 /* This is coming from open(..., ... | O_TRUNC); */
2168 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
2169 WARN_ON(attr->ia_size != 0);
2170 if (fc->atomic_o_trunc) {
2171 /*
2172 * No need to send request to userspace, since actual
2173 * truncation has already been done by OPEN. But still
2174 * need to truncate page cache.
2175 */
2176 i_size_write(inode, 0);
2177 truncate_pagecache(inode, 0);
2178 goto out;
2179 }
2180 file = NULL;
2181 }
2182
2183 /* Flush dirty data/metadata before non-truncate SETATTR */
2184 if (is_wb &&
2185 attr->ia_valid &
2186 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
2187 ATTR_TIMES_SET)) {
2188 err = write_inode_now(inode, true);
2189 if (err)
2190 return err;
2191
2192 fuse_set_nowrite(inode);
2193 fuse_release_nowrite(inode);
2194 }
2195
2196 if (is_truncate) {
2197 fuse_set_nowrite(inode);
2198 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2199 if (trust_local_cmtime && attr->ia_size != inode->i_size)
2200 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2201 }
2202
2203 memset(&inarg, 0, sizeof(inarg));
2204 memset(&outarg, 0, sizeof(outarg));
2205 iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2206 if (file) {
2207 struct fuse_file *ff = file->private_data;
2208 inarg.valid |= FATTR_FH;
2209 inarg.fh = ff->fh;
2210 }
2211
2212 /* Kill suid/sgid for non-directory chown unconditionally */
2213 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2214 attr->ia_valid & (ATTR_UID | ATTR_GID))
2215 inarg.valid |= FATTR_KILL_SUIDGID;
2216
2217 if (attr->ia_valid & ATTR_SIZE) {
2218 /* For mandatory locking in truncate */
2219 inarg.valid |= FATTR_LOCKOWNER;
2220 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2221
2222 /* Kill suid/sgid for truncate only if no CAP_FSETID */
2223 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2224 inarg.valid |= FATTR_KILL_SUIDGID;
2225 }
2226
2227 attr_version = fuse_get_attr_version(fm->fc);
2228 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2229 err = fuse_simple_request(fm, &args);
2230 if (err) {
2231 if (err == -EINTR)
2232 fuse_invalidate_attr(inode);
2233 goto error;
2234 }
2235
2236 if (fuse_invalid_attr(&outarg.attr) ||
2237 inode_wrong_type(inode, outarg.attr.mode)) {
2238 fuse_make_bad(inode);
2239 err = -EIO;
2240 goto error;
2241 }
2242
2243 spin_lock(&fi->lock);
2244 /* the kernel maintains i_mtime locally */
2245 if (trust_local_cmtime) {
2246 if (attr->ia_valid & ATTR_MTIME)
2247 inode_set_mtime_to_ts(inode, attr->ia_mtime);
2248 if (attr->ia_valid & ATTR_CTIME)
2249 inode_set_ctime_to_ts(inode, attr->ia_ctime);
2250 /* FIXME: clear I_DIRTY_SYNC? */
2251 }
2252
2253 if (fi->attr_version > attr_version) {
2254 /*
2255 * Apply attributes, for example for fsnotify_change(), but set
2256 * attribute timeout to zero.
2257 */
2258 outarg.attr_valid = outarg.attr_valid_nsec = 0;
2259 }
2260
2261 fuse_change_attributes_common(inode, &outarg.attr, NULL,
2262 ATTR_TIMEOUT(&outarg),
2263 fuse_get_cache_mask(inode), 0);
2264 oldsize = inode->i_size;
2265 /* see the comment in fuse_change_attributes() */
2266 if (!is_wb || is_truncate)
2267 i_size_write(inode, outarg.attr.size);
2268
2269 if (is_truncate) {
2270 /* NOTE: this may release/reacquire fi->lock */
2271 __fuse_release_nowrite(inode);
2272 }
2273 spin_unlock(&fi->lock);
2274
2275 /*
2276 * Only call invalidate_inode_pages2() after removing
2277 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2278 */
2279 if ((is_truncate || !is_wb) &&
2280 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2281 truncate_pagecache(inode, outarg.attr.size);
2282 invalidate_inode_pages2(mapping);
2283 }
2284
2285 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2286 out:
2287 if (fault_blocked)
2288 filemap_invalidate_unlock(mapping);
2289
2290 return 0;
2291
2292 error:
2293 if (is_truncate)
2294 fuse_release_nowrite(inode);
2295
2296 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2297
2298 if (fault_blocked)
2299 filemap_invalidate_unlock(mapping);
2300 return err;
2301 }
2302
fuse_setattr(struct mnt_idmap * idmap,struct dentry * entry,struct iattr * attr)2303 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2304 struct iattr *attr)
2305 {
2306 struct inode *inode = d_inode(entry);
2307 struct fuse_conn *fc = get_fuse_conn(inode);
2308 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2309 int ret;
2310
2311 if (fuse_is_bad(inode))
2312 return -EIO;
2313
2314 if (!fuse_allow_current_process(get_fuse_conn(inode)))
2315 return -EACCES;
2316
2317 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2318 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2319 ATTR_MODE);
2320
2321 /*
2322 * The only sane way to reliably kill suid/sgid is to do it in
2323 * the userspace filesystem
2324 *
2325 * This should be done on write(), truncate() and chown().
2326 */
2327 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2328 /*
2329 * ia_mode calculation may have used stale i_mode.
2330 * Refresh and recalculate.
2331 */
2332 ret = fuse_do_getattr(idmap, inode, NULL, file);
2333 if (ret)
2334 return ret;
2335
2336 attr->ia_mode = inode->i_mode;
2337 if (inode->i_mode & S_ISUID) {
2338 attr->ia_valid |= ATTR_MODE;
2339 attr->ia_mode &= ~S_ISUID;
2340 }
2341 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2342 attr->ia_valid |= ATTR_MODE;
2343 attr->ia_mode &= ~S_ISGID;
2344 }
2345 }
2346 }
2347 if (!attr->ia_valid)
2348 return 0;
2349
2350 ret = fuse_do_setattr(idmap, entry, attr, file);
2351 if (!ret) {
2352 /*
2353 * If filesystem supports acls it may have updated acl xattrs in
2354 * the filesystem, so forget cached acls for the inode.
2355 */
2356 if (fc->posix_acl)
2357 forget_all_cached_acls(inode);
2358
2359 /* Directory mode changed, may need to revalidate access */
2360 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2361 fuse_invalidate_entry_cache(entry);
2362 }
2363 return ret;
2364 }
2365
fuse_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)2366 static int fuse_getattr(struct mnt_idmap *idmap,
2367 const struct path *path, struct kstat *stat,
2368 u32 request_mask, unsigned int flags)
2369 {
2370 struct inode *inode = d_inode(path->dentry);
2371 struct fuse_conn *fc = get_fuse_conn(inode);
2372
2373 if (fuse_is_bad(inode))
2374 return -EIO;
2375
2376 if (!fuse_allow_current_process(fc)) {
2377 if (!request_mask) {
2378 /*
2379 * If user explicitly requested *nothing* then don't
2380 * error out, but return st_dev only.
2381 */
2382 stat->result_mask = 0;
2383 stat->dev = inode->i_sb->s_dev;
2384 return 0;
2385 }
2386 return -EACCES;
2387 }
2388
2389 return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2390 }
2391
2392 static const struct inode_operations fuse_dir_inode_operations = {
2393 .lookup = fuse_lookup,
2394 .mkdir = fuse_mkdir,
2395 .symlink = fuse_symlink,
2396 .unlink = fuse_unlink,
2397 .rmdir = fuse_rmdir,
2398 .rename = fuse_rename2,
2399 .link = fuse_link,
2400 .setattr = fuse_setattr,
2401 .create = fuse_create,
2402 .atomic_open = fuse_atomic_open,
2403 .tmpfile = fuse_tmpfile,
2404 .mknod = fuse_mknod,
2405 .permission = fuse_permission,
2406 .getattr = fuse_getattr,
2407 .listxattr = fuse_listxattr,
2408 .get_inode_acl = fuse_get_inode_acl,
2409 .get_acl = fuse_get_acl,
2410 .set_acl = fuse_set_acl,
2411 .fileattr_get = fuse_fileattr_get,
2412 .fileattr_set = fuse_fileattr_set,
2413 };
2414
2415 static const struct file_operations fuse_dir_operations = {
2416 .llseek = generic_file_llseek,
2417 .read = generic_read_dir,
2418 .iterate_shared = fuse_readdir,
2419 .open = fuse_dir_open,
2420 .release = fuse_dir_release,
2421 .fsync = fuse_dir_fsync,
2422 .unlocked_ioctl = fuse_dir_ioctl,
2423 .compat_ioctl = fuse_dir_compat_ioctl,
2424 .setlease = simple_nosetlease,
2425 };
2426
2427 static const struct inode_operations fuse_common_inode_operations = {
2428 .setattr = fuse_setattr,
2429 .permission = fuse_permission,
2430 .getattr = fuse_getattr,
2431 .listxattr = fuse_listxattr,
2432 .get_inode_acl = fuse_get_inode_acl,
2433 .get_acl = fuse_get_acl,
2434 .set_acl = fuse_set_acl,
2435 .fileattr_get = fuse_fileattr_get,
2436 .fileattr_set = fuse_fileattr_set,
2437 };
2438
2439 static const struct inode_operations fuse_symlink_inode_operations = {
2440 .setattr = fuse_setattr,
2441 .get_link = fuse_get_link,
2442 .getattr = fuse_getattr,
2443 .listxattr = fuse_listxattr,
2444 };
2445
fuse_init_common(struct inode * inode)2446 void fuse_init_common(struct inode *inode)
2447 {
2448 inode->i_op = &fuse_common_inode_operations;
2449 }
2450
fuse_init_dir(struct inode * inode)2451 void fuse_init_dir(struct inode *inode)
2452 {
2453 struct fuse_inode *fi = get_fuse_inode(inode);
2454
2455 inode->i_op = &fuse_dir_inode_operations;
2456 inode->i_fop = &fuse_dir_operations;
2457
2458 spin_lock_init(&fi->rdc.lock);
2459 fi->rdc.cached = false;
2460 fi->rdc.size = 0;
2461 fi->rdc.pos = 0;
2462 fi->rdc.version = 0;
2463 }
2464
fuse_symlink_read_folio(struct file * null,struct folio * folio)2465 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2466 {
2467 int err = fuse_readlink_folio(folio->mapping->host, folio);
2468
2469 if (!err)
2470 folio_mark_uptodate(folio);
2471
2472 folio_unlock(folio);
2473
2474 return err;
2475 }
2476
2477 static const struct address_space_operations fuse_symlink_aops = {
2478 .read_folio = fuse_symlink_read_folio,
2479 };
2480
fuse_init_symlink(struct inode * inode)2481 void fuse_init_symlink(struct inode *inode)
2482 {
2483 inode->i_op = &fuse_symlink_inode_operations;
2484 inode->i_data.a_ops = &fuse_symlink_aops;
2485 inode_nohighmem(inode);
2486 }
2487