xref: /linux/fs/fuse/dir.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3   FUSE: Filesystem in Userspace
4   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
5 */
6 
7 #include "dev.h"
8 #include "fuse_i.h"
9 
10 #include <linux/pagemap.h>
11 #include <linux/file.h>
12 #include <linux/fs_context.h>
13 #include <linux/moduleparam.h>
14 #include <linux/sched.h>
15 #include <linux/namei.h>
16 #include <linux/slab.h>
17 #include <linux/xattr.h>
18 #include <linux/iversion.h>
19 #include <linux/posix_acl.h>
20 #include <linux/security.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 
24 static bool __read_mostly allow_sys_admin_access;
25 module_param(allow_sys_admin_access, bool, 0644);
26 MODULE_PARM_DESC(allow_sys_admin_access,
27 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
28 
29 struct dentry_bucket {
30 	struct rb_root tree;
31 	spinlock_t lock;
32 };
33 
34 #define FUSE_HASH_BITS	5
35 #define FUSE_HASH_SIZE	(1 << FUSE_HASH_BITS)
36 static struct dentry_bucket dentry_hash[FUSE_HASH_SIZE];
37 struct delayed_work dentry_tree_work;
38 
39 /* Minimum invalidation work queue frequency */
40 #define FUSE_DENTRY_INVAL_FREQ_MIN 5
41 
42 unsigned __read_mostly inval_wq;
43 static int inval_wq_set(const char *val, const struct kernel_param *kp)
44 {
45 	unsigned int num;
46 	unsigned int old = inval_wq;
47 	int ret;
48 
49 	if (!val)
50 		return -EINVAL;
51 
52 	ret = kstrtouint(val, 0, &num);
53 	if (ret)
54 		return ret;
55 
56 	if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0))
57 		return -EINVAL;
58 
59 	/* This should prevent overflow in secs_to_jiffies() */
60 	if (num > USHRT_MAX)
61 		return -EINVAL;
62 
63 	*((unsigned int *)kp->arg) = num;
64 
65 	if (num && !old)
66 		schedule_delayed_work(&dentry_tree_work,
67 				      secs_to_jiffies(num));
68 	else if (!num && old)
69 		cancel_delayed_work_sync(&dentry_tree_work);
70 
71 	return 0;
72 }
73 static const struct kernel_param_ops inval_wq_ops = {
74 	.set = inval_wq_set,
75 	.get = param_get_uint,
76 };
77 module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
78 __MODULE_PARM_TYPE(inval_wq, "uint");
79 MODULE_PARM_DESC(inval_wq,
80 		 "Dentries invalidation work queue period in secs (>= "
81 		 __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ").");
82 
83 static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry)
84 {
85 	int i = hash_ptr(dentry, FUSE_HASH_BITS);
86 
87 	return &dentry_hash[i];
88 }
89 
90 static void fuse_advise_use_readdirplus(struct inode *dir)
91 {
92 	struct fuse_inode *fi = get_fuse_inode(dir);
93 
94 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
95 }
96 
97 struct fuse_dentry {
98 	u64 time;
99 	union {
100 		struct rcu_head rcu;
101 		struct rb_node node;
102 	};
103 	struct dentry *dentry;
104 };
105 
106 static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd,
107 					struct dentry_bucket *bucket)
108 {
109 	if (!RB_EMPTY_NODE(&fd->node)) {
110 		rb_erase(&fd->node, &bucket->tree);
111 		RB_CLEAR_NODE(&fd->node);
112 	}
113 }
114 
115 static void fuse_dentry_tree_del_node(struct dentry *dentry)
116 {
117 	struct fuse_dentry *fd = dentry->d_fsdata;
118 	struct dentry_bucket *bucket = get_dentry_bucket(dentry);
119 
120 	spin_lock(&bucket->lock);
121 	__fuse_dentry_tree_del_node(fd, bucket);
122 	spin_unlock(&bucket->lock);
123 }
124 
125 static void fuse_dentry_tree_add_node(struct dentry *dentry)
126 {
127 	struct fuse_dentry *fd = dentry->d_fsdata;
128 	struct dentry_bucket *bucket;
129 	struct fuse_dentry *cur;
130 	struct rb_node **p, *parent = NULL;
131 
132 	if (!inval_wq)
133 		return;
134 
135 	bucket = get_dentry_bucket(dentry);
136 
137 	spin_lock(&bucket->lock);
138 
139 	__fuse_dentry_tree_del_node(fd, bucket);
140 
141 	p = &bucket->tree.rb_node;
142 	while (*p) {
143 		parent = *p;
144 		cur = rb_entry(*p, struct fuse_dentry, node);
145 		if (fd->time < cur->time)
146 			p = &(*p)->rb_left;
147 		else
148 			p = &(*p)->rb_right;
149 	}
150 	rb_link_node(&fd->node, parent, p);
151 	rb_insert_color(&fd->node, &bucket->tree);
152 	spin_unlock(&bucket->lock);
153 }
154 
155 /*
156  * work queue which, when enabled, will periodically check for expired dentries
157  * in the dentries tree.
158  */
159 static void fuse_dentry_tree_work(struct work_struct *work)
160 {
161 	LIST_HEAD(dispose);
162 	struct fuse_dentry *fd;
163 	struct rb_node *node;
164 	int i;
165 
166 	for (i = 0; i < FUSE_HASH_SIZE; i++) {
167 		spin_lock(&dentry_hash[i].lock);
168 		node = rb_first(&dentry_hash[i].tree);
169 		while (node) {
170 			fd = rb_entry(node, struct fuse_dentry, node);
171 			if (!time_before64(fd->time, get_jiffies_64()))
172 				break;
173 
174 			rb_erase(&fd->node, &dentry_hash[i].tree);
175 			RB_CLEAR_NODE(&fd->node);
176 			spin_lock(&fd->dentry->d_lock);
177 			/* If dentry is still referenced, let next dput release it */
178 			fd->dentry->d_flags |= DCACHE_OP_DELETE;
179 			__move_to_shrink_list(fd->dentry, &dispose);
180 			spin_unlock(&fd->dentry->d_lock);
181 			if (need_resched()) {
182 				spin_unlock(&dentry_hash[i].lock);
183 				cond_resched();
184 				spin_lock(&dentry_hash[i].lock);
185 			}
186 			node = rb_first(&dentry_hash[i].tree);
187 		}
188 		spin_unlock(&dentry_hash[i].lock);
189 	}
190 	shrink_dentry_list(&dispose);
191 
192 	if (inval_wq)
193 		schedule_delayed_work(&dentry_tree_work,
194 				      secs_to_jiffies(inval_wq));
195 }
196 
197 void fuse_epoch_work(struct work_struct *work)
198 {
199 	struct fuse_conn *fc = container_of(work, struct fuse_conn,
200 					    epoch_work);
201 	struct fuse_mount *fm;
202 	struct inode *inode;
203 
204 	down_read(&fc->killsb);
205 
206 	inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm);
207 	if (inode) {
208 		iput(inode);
209 		/* Remove all possible active references to cached inodes */
210 		shrink_dcache_sb(fm->sb);
211 	} else
212 		pr_warn("Failed to get root inode");
213 
214 	up_read(&fc->killsb);
215 }
216 
217 void fuse_dentry_tree_init(void)
218 {
219 	int i;
220 
221 	for (i = 0; i < FUSE_HASH_SIZE; i++) {
222 		spin_lock_init(&dentry_hash[i].lock);
223 		dentry_hash[i].tree = RB_ROOT;
224 	}
225 	INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work);
226 }
227 
228 void fuse_dentry_tree_cleanup(void)
229 {
230 	int i;
231 
232 	inval_wq = 0;
233 	cancel_delayed_work_sync(&dentry_tree_work);
234 
235 	for (i = 0; i < FUSE_HASH_SIZE; i++)
236 		WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree));
237 }
238 
239 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
240 {
241 	((struct fuse_dentry *) dentry->d_fsdata)->time = time;
242 }
243 
244 static inline u64 fuse_dentry_time(const struct dentry *entry)
245 {
246 	return ((struct fuse_dentry *) entry->d_fsdata)->time;
247 }
248 
249 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
250 {
251 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
252 	bool delete = !time && fc->delete_stale;
253 	/*
254 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
255 	 * Don't care about races, either way it's just an optimization
256 	 */
257 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
258 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
259 		spin_lock(&dentry->d_lock);
260 		if (!delete)
261 			dentry->d_flags &= ~DCACHE_OP_DELETE;
262 		else
263 			dentry->d_flags |= DCACHE_OP_DELETE;
264 		spin_unlock(&dentry->d_lock);
265 	}
266 
267 	__fuse_dentry_settime(dentry, time);
268 	fuse_dentry_tree_add_node(dentry);
269 }
270 
271 /*
272  * FUSE caches dentries and attributes with separate timeout.  The
273  * time in jiffies until the dentry/attributes are valid is stored in
274  * dentry->d_fsdata and fuse_inode->i_time respectively.
275  */
276 
277 /*
278  * Calculate the time in jiffies until a dentry/attributes are valid
279  */
280 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
281 {
282 	if (sec || nsec) {
283 		struct timespec64 ts = {
284 			sec,
285 			min_t(u32, nsec, NSEC_PER_SEC - 1)
286 		};
287 
288 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
289 	} else
290 		return 0;
291 }
292 
293 /*
294  * Set dentry and possibly attribute timeouts from the lookup/mk*
295  * replies
296  */
297 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
298 {
299 	fuse_dentry_settime(entry,
300 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
301 }
302 
303 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
304 {
305 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
306 }
307 
308 /*
309  * Mark the attributes as stale, so that at the next call to
310  * ->getattr() they will be fetched from userspace
311  */
312 void fuse_invalidate_attr(struct inode *inode)
313 {
314 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
315 }
316 
317 static void fuse_dir_changed(struct inode *dir)
318 {
319 	fuse_invalidate_attr_mask(dir, FUSE_STATX_MODDIR);
320 	inode_maybe_inc_iversion(dir, false);
321 }
322 
323 /*
324  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
325  * atime is not used.
326  */
327 void fuse_invalidate_atime(struct inode *inode)
328 {
329 	if (!IS_RDONLY(inode))
330 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
331 }
332 
333 /*
334  * Just mark the entry as stale, so that a next attempt to look it up
335  * will result in a new lookup call to userspace
336  *
337  * This is called when a dentry is about to become negative and the
338  * timeout is unknown (unlink, rmdir, rename and in some cases
339  * lookup)
340  */
341 void fuse_invalidate_entry_cache(struct dentry *entry)
342 {
343 	fuse_dentry_settime(entry, 0);
344 }
345 
346 /*
347  * Same as fuse_invalidate_entry_cache(), but also try to remove the
348  * dentry from the hash
349  */
350 static void fuse_invalidate_entry(struct dentry *entry)
351 {
352 	d_invalidate(entry);
353 	fuse_invalidate_entry_cache(entry);
354 }
355 
356 static void fuse_lookup_init(struct fuse_args *args, u64 nodeid,
357 			     const struct qstr *name,
358 			     struct fuse_entry_out *outarg)
359 {
360 	memset(outarg, 0, sizeof(struct fuse_entry_out));
361 	args->opcode = FUSE_LOOKUP;
362 	args->nodeid = nodeid;
363 	args->in_numargs = 3;
364 	fuse_set_zero_arg0(args);
365 	args->in_args[1].size = name->len;
366 	args->in_args[1].value = name->name;
367 	args->in_args[2].size = 1;
368 	args->in_args[2].value = "";
369 	args->out_numargs = 1;
370 	args->out_args[0].size = sizeof(struct fuse_entry_out);
371 	args->out_args[0].value = outarg;
372 }
373 
374 /*
375  * Check whether the dentry is still valid
376  *
377  * If the entry validity timeout has expired and the dentry is
378  * positive, try to redo the lookup.  If the lookup results in a
379  * different inode, then let the VFS invalidate the dentry and redo
380  * the lookup once more.  If the lookup results in the same inode,
381  * then refresh the attributes, timeouts and mark the dentry valid.
382  */
383 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
384 				  struct dentry *entry, unsigned int flags)
385 {
386 	struct inode *inode;
387 	struct fuse_mount *fm;
388 	struct fuse_conn *fc;
389 	struct fuse_inode *fi;
390 	int ret;
391 
392 	fc = get_fuse_conn_super(dir->i_sb);
393 	if (entry->d_time < atomic_read(&fc->epoch))
394 		goto invalid;
395 
396 	inode = d_inode_rcu(entry);
397 	if (inode && fuse_is_bad(inode))
398 		goto invalid;
399 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
400 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
401 		struct fuse_entry_out outarg;
402 		FUSE_ARGS(args);
403 		struct fuse_forget_link *forget;
404 		u64 attr_version;
405 
406 		/* For negative dentries, always do a fresh lookup */
407 		if (!inode)
408 			goto invalid;
409 
410 		ret = -ECHILD;
411 		if (flags & LOOKUP_RCU)
412 			goto out;
413 
414 		fm = get_fuse_mount(inode);
415 
416 		forget = fuse_alloc_forget();
417 		ret = -ENOMEM;
418 		if (!forget)
419 			goto out;
420 
421 		attr_version = fuse_get_attr_version(fm->fc);
422 
423 		fuse_lookup_init(&args, get_node_id(dir), name, &outarg);
424 		ret = fuse_simple_request(fm, &args);
425 		/* Zero nodeid is same as -ENOENT */
426 		if (!ret && !outarg.nodeid)
427 			ret = -ENOENT;
428 		if (!ret) {
429 			fi = get_fuse_inode(inode);
430 			if (outarg.nodeid != get_node_id(inode) ||
431 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
432 				fuse_chan_queue_forget(fm->fc->chan, forget,
433 						  outarg.nodeid, 1);
434 				goto invalid;
435 			}
436 			spin_lock(&fi->lock);
437 			fi->nlookup++;
438 			spin_unlock(&fi->lock);
439 		}
440 		kfree(forget);
441 		if (ret == -ENOMEM || ret == -EINTR)
442 			goto out;
443 		if (ret || fuse_invalid_attr(&outarg.attr) ||
444 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
445 			goto invalid;
446 
447 		forget_all_cached_acls(inode);
448 		fuse_change_attributes(inode, &outarg.attr, NULL,
449 				       ATTR_TIMEOUT(&outarg),
450 				       attr_version);
451 		fuse_change_entry_timeout(entry, &outarg);
452 	} else if (inode) {
453 		fi = get_fuse_inode(inode);
454 		if (flags & LOOKUP_RCU) {
455 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
456 				return -ECHILD;
457 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
458 			fuse_advise_use_readdirplus(dir);
459 		}
460 	}
461 	ret = 1;
462 out:
463 	return ret;
464 
465 invalid:
466 	ret = 0;
467 	goto out;
468 }
469 
470 static int fuse_dentry_init(struct dentry *dentry)
471 {
472 	struct fuse_dentry *fd;
473 
474 	fd = kzalloc_obj(struct fuse_dentry,
475 			 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
476 	if (!fd)
477 		return -ENOMEM;
478 
479 	fd->dentry = dentry;
480 	RB_CLEAR_NODE(&fd->node);
481 	dentry->d_fsdata = fd;
482 	/*
483 	 * Initialising d_time (epoch) to '0' ensures the dentry is invalid
484 	 * if compared to fc->epoch, which is initialized to '1'.
485 	 */
486 	dentry->d_time = 0;
487 
488 	return 0;
489 }
490 
491 static void fuse_dentry_release(struct dentry *dentry)
492 {
493 	struct fuse_dentry *fd = dentry->d_fsdata;
494 
495 	if (!RB_EMPTY_NODE(&fd->node))
496 		fuse_dentry_tree_del_node(dentry);
497 	kfree_rcu(fd, rcu);
498 }
499 
500 static int fuse_dentry_delete(const struct dentry *dentry)
501 {
502 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
503 }
504 
505 /*
506  * Create a fuse_mount object with a new superblock (with path->dentry
507  * as the root), and return that mount so it can be auto-mounted on
508  * @path.
509  */
510 static struct vfsmount *fuse_dentry_automount(struct path *path)
511 {
512 	struct fs_context *fsc;
513 	struct vfsmount *mnt;
514 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
515 
516 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
517 	if (IS_ERR(fsc))
518 		return ERR_CAST(fsc);
519 
520 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
521 	fsc->fs_private = mp_fi;
522 
523 	/* Create the submount */
524 	mnt = fc_mount(fsc);
525 	put_fs_context(fsc);
526 	return mnt;
527 }
528 
529 const struct dentry_operations fuse_dentry_operations = {
530 	.d_revalidate	= fuse_dentry_revalidate,
531 	.d_delete	= fuse_dentry_delete,
532 	.d_init		= fuse_dentry_init,
533 	.d_release	= fuse_dentry_release,
534 	.d_automount	= fuse_dentry_automount,
535 };
536 
537 int fuse_valid_type(int m)
538 {
539 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
540 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
541 }
542 
543 static bool fuse_valid_size(u64 size)
544 {
545 	return size <= LLONG_MAX;
546 }
547 
548 bool fuse_invalid_attr(struct fuse_attr *attr)
549 {
550 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
551 }
552 
553 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
554 		     struct fuse_entry_out *outarg, struct inode **inode)
555 {
556 	struct fuse_mount *fm = get_fuse_mount_super(sb);
557 	FUSE_ARGS(args);
558 	struct fuse_forget_link *forget;
559 	u64 attr_version, evict_ctr;
560 	int err;
561 
562 	*inode = NULL;
563 	err = -ENAMETOOLONG;
564 	if (name->len > fm->fc->name_max)
565 		goto out;
566 
567 
568 	forget = fuse_alloc_forget();
569 	err = -ENOMEM;
570 	if (!forget)
571 		goto out;
572 
573 	attr_version = fuse_get_attr_version(fm->fc);
574 	evict_ctr = fuse_get_evict_ctr(fm->fc);
575 
576 	fuse_lookup_init(&args, nodeid, name, outarg);
577 	err = fuse_simple_request(fm, &args);
578 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
579 	if (err || !outarg->nodeid)
580 		goto out_put_forget;
581 
582 	err = -EIO;
583 	if (fuse_invalid_attr(&outarg->attr))
584 		goto out_put_forget;
585 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
586 		pr_warn_once("root generation should be zero\n");
587 		outarg->generation = 0;
588 	}
589 
590 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
591 			   &outarg->attr, ATTR_TIMEOUT(outarg),
592 			   attr_version, evict_ctr);
593 	err = -ENOMEM;
594 	if (!*inode) {
595 		fuse_chan_queue_forget(fm->fc->chan, forget, outarg->nodeid, 1);
596 		goto out;
597 	}
598 	err = 0;
599 
600  out_put_forget:
601 	kfree(forget);
602  out:
603 	return err;
604 }
605 
606 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
607 				  unsigned int flags)
608 {
609 	struct fuse_entry_out outarg;
610 	struct fuse_conn *fc;
611 	struct inode *inode;
612 	struct dentry *newent;
613 	int err, epoch;
614 	bool outarg_valid = true;
615 	bool locked;
616 
617 	if (fuse_is_bad(dir))
618 		return ERR_PTR(-EIO);
619 
620 	fc = get_fuse_conn_super(dir->i_sb);
621 	epoch = atomic_read(&fc->epoch);
622 
623 	locked = fuse_lock_inode(dir);
624 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
625 			       &outarg, &inode);
626 	fuse_unlock_inode(dir, locked);
627 	if (err == -ENOENT) {
628 		outarg_valid = false;
629 		err = 0;
630 	}
631 	if (err)
632 		goto out_err;
633 
634 	err = -EIO;
635 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
636 		goto out_iput;
637 
638 	newent = d_splice_alias(inode, entry);
639 	err = PTR_ERR(newent);
640 	if (IS_ERR(newent))
641 		goto out_err;
642 
643 	entry = newent ? newent : entry;
644 	entry->d_time = epoch;
645 	if (outarg_valid)
646 		fuse_change_entry_timeout(entry, &outarg);
647 	else
648 		fuse_invalidate_entry_cache(entry);
649 
650 	if (inode)
651 		fuse_advise_use_readdirplus(dir);
652 	return newent;
653 
654  out_iput:
655 	iput(inode);
656  out_err:
657 	return ERR_PTR(err);
658 }
659 
660 static int get_security_context(struct dentry *entry, umode_t mode,
661 				struct fuse_in_arg *ext)
662 {
663 	struct fuse_secctx *fctx;
664 	struct fuse_secctx_header *header;
665 	struct lsm_context lsmctx = { };
666 	void *ptr;
667 	u32 total_len = sizeof(*header);
668 	int err, nr_ctx = 0;
669 	const char *name = NULL;
670 	size_t namesize;
671 
672 	err = security_dentry_init_security(entry, mode, &entry->d_name,
673 					    &name, &lsmctx);
674 
675 	/* If no LSM is supporting this security hook ignore error */
676 	if (err && err != -EOPNOTSUPP)
677 		goto out_err;
678 
679 	if (lsmctx.len) {
680 		nr_ctx = 1;
681 		namesize = strlen(name) + 1;
682 		err = -EIO;
683 		if (WARN_ON(namesize > XATTR_NAME_MAX + 1 ||
684 		    lsmctx.len > S32_MAX))
685 			goto out_err;
686 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize +
687 					    lsmctx.len);
688 	}
689 
690 	err = -ENOMEM;
691 	header = ptr = kzalloc(total_len, GFP_KERNEL);
692 	if (!ptr)
693 		goto out_err;
694 
695 	header->nr_secctx = nr_ctx;
696 	header->size = total_len;
697 	ptr += sizeof(*header);
698 	if (nr_ctx) {
699 		fctx = ptr;
700 		fctx->size = lsmctx.len;
701 		ptr += sizeof(*fctx);
702 
703 		strscpy(ptr, name, namesize);
704 		ptr += namesize;
705 
706 		memcpy(ptr, lsmctx.context, lsmctx.len);
707 	}
708 	ext->size = total_len;
709 	ext->value = header;
710 	err = 0;
711 out_err:
712 	if (nr_ctx)
713 		security_release_secctx(&lsmctx);
714 	return err;
715 }
716 
717 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
718 {
719 	void *p;
720 	u32 newlen = buf->size + bytes;
721 
722 	p = krealloc(buf->value, newlen, GFP_KERNEL);
723 	if (!p) {
724 		kfree(buf->value);
725 		buf->size = 0;
726 		buf->value = NULL;
727 		return NULL;
728 	}
729 
730 	memset(p + buf->size, 0, bytes);
731 	buf->value = p;
732 	buf->size = newlen;
733 
734 	return p + newlen - bytes;
735 }
736 
737 static u32 fuse_ext_size(size_t size)
738 {
739 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
740 }
741 
742 /*
743  * This adds just a single supplementary group that matches the parent's group.
744  */
745 static int get_create_supp_group(struct mnt_idmap *idmap,
746 				 struct inode *dir,
747 				 struct fuse_in_arg *ext)
748 {
749 	struct fuse_conn *fc = get_fuse_conn(dir);
750 	struct fuse_ext_header *xh;
751 	struct fuse_supp_groups *sg;
752 	kgid_t kgid = dir->i_gid;
753 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
754 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
755 
756 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
757 
758 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
759 	    !vfsgid_in_group_p(vfsgid))
760 		return 0;
761 
762 	xh = extend_arg(ext, sg_len);
763 	if (!xh)
764 		return -ENOMEM;
765 
766 	xh->size = sg_len;
767 	xh->type = FUSE_EXT_GROUPS;
768 
769 	sg = (struct fuse_supp_groups *) &xh[1];
770 	sg->nr_groups = 1;
771 	sg->groups[0] = parent_gid;
772 
773 	return 0;
774 }
775 
776 static int get_create_ext(struct mnt_idmap *idmap,
777 			  struct fuse_args *args,
778 			  struct inode *dir, struct dentry *dentry,
779 			  umode_t mode)
780 {
781 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
782 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
783 	int err = 0;
784 
785 	if (fc->init_security)
786 		err = get_security_context(dentry, mode, &ext);
787 	if (!err && fc->create_supp_group)
788 		err = get_create_supp_group(idmap, dir, &ext);
789 
790 	if (!err && ext.size) {
791 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
792 		args->is_ext = true;
793 		args->ext_idx = args->in_numargs++;
794 		args->in_args[args->ext_idx] = ext;
795 	} else {
796 		kfree(ext.value);
797 	}
798 
799 	return err;
800 }
801 
802 static void free_ext_value(struct fuse_args *args)
803 {
804 	if (args->is_ext)
805 		kfree(args->in_args[args->ext_idx].value);
806 }
807 
808 /*
809  * Atomic create+open operation
810  *
811  * If the filesystem doesn't support this, then fall back to separate
812  * 'mknod' + 'open' requests.
813  */
814 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
815 			    struct dentry *entry, struct file *file,
816 			    unsigned int flags, umode_t mode, u32 opcode)
817 {
818 	struct inode *inode;
819 	struct fuse_mount *fm = get_fuse_mount(dir);
820 	FUSE_ARGS(args);
821 	struct fuse_forget_link *forget;
822 	struct fuse_create_in inarg;
823 	struct fuse_open_out *outopenp;
824 	struct fuse_entry_out outentry;
825 	struct fuse_inode *fi;
826 	struct fuse_file *ff;
827 	int epoch, err;
828 	bool trunc = flags & O_TRUNC;
829 
830 	/* Userspace expects S_IFREG in create mode */
831 	BUG_ON((mode & S_IFMT) != S_IFREG);
832 
833 	epoch = atomic_read(&fm->fc->epoch);
834 	forget = fuse_alloc_forget();
835 	err = -ENOMEM;
836 	if (!forget)
837 		goto out_err;
838 
839 	ff = fuse_file_alloc(fm, true);
840 	if (!ff)
841 		goto out_put_forget_req;
842 
843 	if (!fm->fc->dont_mask)
844 		mode &= ~current_umask();
845 
846 	flags &= ~O_NOCTTY;
847 	memset(&inarg, 0, sizeof(inarg));
848 	memset(&outentry, 0, sizeof(outentry));
849 	inarg.flags = flags;
850 	inarg.mode = mode;
851 	inarg.umask = current_umask();
852 
853 	if (fm->fc->handle_killpriv_v2 && trunc &&
854 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
855 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
856 	}
857 
858 	args.opcode = opcode;
859 	args.nodeid = get_node_id(dir);
860 	args.in_numargs = 2;
861 	args.in_args[0].size = sizeof(inarg);
862 	args.in_args[0].value = &inarg;
863 	args.in_args[1].size = entry->d_name.len + 1;
864 	args.in_args[1].value = entry->d_name.name;
865 	args.out_numargs = 2;
866 	args.out_args[0].size = sizeof(outentry);
867 	args.out_args[0].value = &outentry;
868 	/* Store outarg for fuse_finish_open() */
869 	outopenp = &ff->args->open_outarg;
870 	args.out_args[1].size = sizeof(*outopenp);
871 	args.out_args[1].value = outopenp;
872 
873 	err = get_create_ext(idmap, &args, dir, entry, mode);
874 	if (err)
875 		goto out_free_ff;
876 
877 	err = fuse_simple_idmap_request(idmap, fm, &args);
878 	free_ext_value(&args);
879 	if (err)
880 		goto out_free_ff;
881 
882 	err = -EIO;
883 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
884 	    fuse_invalid_attr(&outentry.attr))
885 		goto out_free_ff;
886 
887 	ff->fh = outopenp->fh;
888 	ff->nodeid = outentry.nodeid;
889 	ff->open_flags = outopenp->open_flags;
890 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
891 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
892 	if (!inode) {
893 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
894 		fuse_sync_release(NULL, ff, flags);
895 		fuse_chan_queue_forget(fm->fc->chan, forget, outentry.nodeid, 1);
896 		err = -ENOMEM;
897 		goto out_err;
898 	}
899 	kfree(forget);
900 	d_instantiate(entry, inode);
901 	entry->d_time = epoch;
902 	fuse_change_entry_timeout(entry, &outentry);
903 	fuse_dir_changed(dir);
904 	err = generic_file_open(inode, file);
905 	if (!err) {
906 		file->private_data = ff;
907 		err = finish_open(file, entry, fuse_finish_open);
908 	}
909 	if (err) {
910 		fi = get_fuse_inode(inode);
911 		fuse_sync_release(fi, ff, flags);
912 	} else {
913 		if (fm->fc->atomic_o_trunc && trunc)
914 			truncate_pagecache(inode, 0);
915 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
916 			invalidate_inode_pages2(inode->i_mapping);
917 	}
918 	return err;
919 
920 out_free_ff:
921 	fuse_file_free(ff);
922 out_put_forget_req:
923 	kfree(forget);
924 out_err:
925 	return err;
926 }
927 
928 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
929 		      umode_t, dev_t);
930 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
931 			    struct file *file, unsigned flags,
932 			    umode_t mode)
933 {
934 	int err;
935 	struct mnt_idmap *idmap = file_mnt_idmap(file);
936 	struct fuse_conn *fc = get_fuse_conn(dir);
937 
938 	if (fuse_is_bad(dir))
939 		return -EIO;
940 
941 	if (d_in_lookup(entry)) {
942 		struct dentry *res = fuse_lookup(dir, entry, 0);
943 		if (res || d_really_is_positive(entry))
944 			return finish_no_open(file, res);
945 	}
946 
947 	if (!(flags & O_CREAT))
948 		return finish_no_open(file, NULL);
949 
950 	/* Only creates */
951 	file->f_mode |= FMODE_CREATED;
952 
953 	if (fc->no_create)
954 		goto mknod;
955 
956 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
957 	if (err == -ENOSYS) {
958 		fc->no_create = 1;
959 		goto mknod;
960 	} else if (err == -EEXIST)
961 		fuse_invalidate_entry(entry);
962 	return err;
963 
964 mknod:
965 	err = fuse_mknod(idmap, dir, entry, mode, 0);
966 	if (err)
967 		return err;
968 	return finish_no_open(file, NULL);
969 }
970 
971 /*
972  * Code shared between mknod, mkdir, symlink and link
973  */
974 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
975 				       struct fuse_args *args, struct inode *dir,
976 				       struct dentry *entry, umode_t mode)
977 {
978 	struct fuse_entry_out outarg;
979 	struct inode *inode;
980 	struct dentry *d;
981 	struct fuse_forget_link *forget;
982 	int epoch, err;
983 
984 	if (fuse_is_bad(dir))
985 		return ERR_PTR(-EIO);
986 
987 	epoch = atomic_read(&fm->fc->epoch);
988 
989 	forget = fuse_alloc_forget();
990 	if (!forget)
991 		return ERR_PTR(-ENOMEM);
992 
993 	memset(&outarg, 0, sizeof(outarg));
994 	args->nodeid = get_node_id(dir);
995 	args->out_numargs = 1;
996 	args->out_args[0].size = sizeof(outarg);
997 	args->out_args[0].value = &outarg;
998 
999 	if (args->opcode != FUSE_LINK) {
1000 		err = get_create_ext(idmap, args, dir, entry, mode);
1001 		if (err)
1002 			goto out_put_forget_req;
1003 	}
1004 
1005 	err = fuse_simple_idmap_request(idmap, fm, args);
1006 	free_ext_value(args);
1007 	if (err)
1008 		goto out_put_forget_req;
1009 
1010 	err = -EIO;
1011 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
1012 		goto out_put_forget_req;
1013 
1014 	if ((outarg.attr.mode ^ mode) & S_IFMT)
1015 		goto out_put_forget_req;
1016 
1017 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
1018 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
1019 	if (!inode) {
1020 		fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1);
1021 		return ERR_PTR(-ENOMEM);
1022 	}
1023 	kfree(forget);
1024 
1025 	d_drop(entry);
1026 	d = d_splice_alias(inode, entry);
1027 	if (IS_ERR(d))
1028 		return d;
1029 
1030 	if (d) {
1031 		d->d_time = epoch;
1032 		fuse_change_entry_timeout(d, &outarg);
1033 	} else {
1034 		entry->d_time = epoch;
1035 		fuse_change_entry_timeout(entry, &outarg);
1036 	}
1037 	fuse_dir_changed(dir);
1038 	return d;
1039 
1040  out_put_forget_req:
1041 	if (err == -EEXIST)
1042 		fuse_invalidate_entry(entry);
1043 	kfree(forget);
1044 	return ERR_PTR(err);
1045 }
1046 
1047 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
1048 			     struct fuse_args *args, struct inode *dir,
1049 			     struct dentry *entry, umode_t mode)
1050 {
1051 	/*
1052 	 * Note that when creating anything other than a directory we
1053 	 * can be sure create_new_entry() will NOT return an alternate
1054 	 * dentry as d_splice_alias() only returns an alternate dentry
1055 	 * for directories.  So we don't need to check for that case
1056 	 * when passing back the result.
1057 	 */
1058 	WARN_ON_ONCE(S_ISDIR(mode));
1059 
1060 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
1061 }
1062 
1063 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
1064 		      struct dentry *entry, umode_t mode, dev_t rdev)
1065 {
1066 	struct fuse_mknod_in inarg;
1067 	struct fuse_mount *fm = get_fuse_mount(dir);
1068 	FUSE_ARGS(args);
1069 
1070 	if (!fm->fc->dont_mask)
1071 		mode &= ~current_umask();
1072 
1073 	memset(&inarg, 0, sizeof(inarg));
1074 	inarg.mode = mode;
1075 	inarg.rdev = new_encode_dev(rdev);
1076 	inarg.umask = current_umask();
1077 	args.opcode = FUSE_MKNOD;
1078 	args.in_numargs = 2;
1079 	args.in_args[0].size = sizeof(inarg);
1080 	args.in_args[0].value = &inarg;
1081 	args.in_args[1].size = entry->d_name.len + 1;
1082 	args.in_args[1].value = entry->d_name.name;
1083 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
1084 }
1085 
1086 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
1087 		       struct dentry *entry, umode_t mode, bool excl)
1088 {
1089 	return fuse_mknod(idmap, dir, entry, mode, 0);
1090 }
1091 
1092 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
1093 			struct file *file, umode_t mode)
1094 {
1095 	struct fuse_conn *fc = get_fuse_conn(dir);
1096 	int err;
1097 
1098 	if (fc->no_tmpfile)
1099 		return -EOPNOTSUPP;
1100 
1101 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
1102 			       file->f_flags, mode, FUSE_TMPFILE);
1103 	if (err == -ENOSYS) {
1104 		fc->no_tmpfile = 1;
1105 		err = -EOPNOTSUPP;
1106 	}
1107 	return err;
1108 }
1109 
1110 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
1111 				 struct dentry *entry, umode_t mode)
1112 {
1113 	struct fuse_mkdir_in inarg;
1114 	struct fuse_mount *fm = get_fuse_mount(dir);
1115 	FUSE_ARGS(args);
1116 
1117 	if (!fm->fc->dont_mask)
1118 		mode &= ~current_umask();
1119 
1120 	memset(&inarg, 0, sizeof(inarg));
1121 	inarg.mode = mode;
1122 	inarg.umask = current_umask();
1123 	args.opcode = FUSE_MKDIR;
1124 	args.in_numargs = 2;
1125 	args.in_args[0].size = sizeof(inarg);
1126 	args.in_args[0].value = &inarg;
1127 	args.in_args[1].size = entry->d_name.len + 1;
1128 	args.in_args[1].value = entry->d_name.name;
1129 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
1130 }
1131 
1132 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
1133 			struct dentry *entry, const char *link)
1134 {
1135 	struct fuse_mount *fm = get_fuse_mount(dir);
1136 	unsigned len = strlen(link) + 1;
1137 	FUSE_ARGS(args);
1138 
1139 	args.opcode = FUSE_SYMLINK;
1140 	args.in_numargs = 3;
1141 	fuse_set_zero_arg0(&args);
1142 	args.in_args[1].size = entry->d_name.len + 1;
1143 	args.in_args[1].value = entry->d_name.name;
1144 	args.in_args[2].size = len;
1145 	args.in_args[2].value = link;
1146 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
1147 }
1148 
1149 void fuse_flush_time_update(struct inode *inode)
1150 {
1151 	int err = sync_inode_metadata(inode, 1);
1152 
1153 	mapping_set_error(inode->i_mapping, err);
1154 }
1155 
1156 static void fuse_update_ctime_in_cache(struct inode *inode)
1157 {
1158 	if (!IS_NOCMTIME(inode)) {
1159 		inode_set_ctime_current(inode);
1160 		mark_inode_dirty_sync(inode);
1161 		fuse_flush_time_update(inode);
1162 	}
1163 }
1164 
1165 void fuse_update_ctime(struct inode *inode)
1166 {
1167 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
1168 	fuse_update_ctime_in_cache(inode);
1169 }
1170 
1171 static void fuse_entry_unlinked(struct dentry *entry)
1172 {
1173 	struct inode *inode = d_inode(entry);
1174 	struct fuse_conn *fc = get_fuse_conn(inode);
1175 	struct fuse_inode *fi = get_fuse_inode(inode);
1176 
1177 	spin_lock(&fi->lock);
1178 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
1179 	/*
1180 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
1181 	 * happen if userspace filesystem is careless.  It would be
1182 	 * difficult to enforce correct nlink usage so just ignore this
1183 	 * condition here
1184 	 */
1185 	if (S_ISDIR(inode->i_mode))
1186 		clear_nlink(inode);
1187 	else if (inode->i_nlink > 0)
1188 		drop_nlink(inode);
1189 	spin_unlock(&fi->lock);
1190 	fuse_invalidate_entry_cache(entry);
1191 	fuse_update_ctime(inode);
1192 }
1193 
1194 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1195 {
1196 	int err;
1197 	struct fuse_mount *fm = get_fuse_mount(dir);
1198 	FUSE_ARGS(args);
1199 
1200 	if (fuse_is_bad(dir))
1201 		return -EIO;
1202 
1203 	args.opcode = FUSE_UNLINK;
1204 	args.nodeid = get_node_id(dir);
1205 	args.in_numargs = 2;
1206 	fuse_set_zero_arg0(&args);
1207 	args.in_args[1].size = entry->d_name.len + 1;
1208 	args.in_args[1].value = entry->d_name.name;
1209 	err = fuse_simple_request(fm, &args);
1210 	if (!err) {
1211 		fuse_dir_changed(dir);
1212 		fuse_entry_unlinked(entry);
1213 	} else if (err == -EINTR || err == -ENOENT)
1214 		fuse_invalidate_entry(entry);
1215 	return err;
1216 }
1217 
1218 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1219 {
1220 	int err;
1221 	struct fuse_mount *fm = get_fuse_mount(dir);
1222 	FUSE_ARGS(args);
1223 
1224 	if (fuse_is_bad(dir))
1225 		return -EIO;
1226 
1227 	args.opcode = FUSE_RMDIR;
1228 	args.nodeid = get_node_id(dir);
1229 	args.in_numargs = 2;
1230 	fuse_set_zero_arg0(&args);
1231 	args.in_args[1].size = entry->d_name.len + 1;
1232 	args.in_args[1].value = entry->d_name.name;
1233 	err = fuse_simple_request(fm, &args);
1234 	if (!err) {
1235 		fuse_dir_changed(dir);
1236 		fuse_entry_unlinked(entry);
1237 	} else if (err == -EINTR || err == -ENOENT)
1238 		fuse_invalidate_entry(entry);
1239 	return err;
1240 }
1241 
1242 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1243 			      struct inode *newdir, struct dentry *newent,
1244 			      unsigned int flags, int opcode, size_t argsize)
1245 {
1246 	int err;
1247 	struct fuse_rename2_in inarg;
1248 	struct fuse_mount *fm = get_fuse_mount(olddir);
1249 	FUSE_ARGS(args);
1250 
1251 	memset(&inarg, 0, argsize);
1252 	inarg.newdir = get_node_id(newdir);
1253 	inarg.flags = flags;
1254 	args.opcode = opcode;
1255 	args.nodeid = get_node_id(olddir);
1256 	args.in_numargs = 3;
1257 	args.in_args[0].size = argsize;
1258 	args.in_args[0].value = &inarg;
1259 	args.in_args[1].size = oldent->d_name.len + 1;
1260 	args.in_args[1].value = oldent->d_name.name;
1261 	args.in_args[2].size = newent->d_name.len + 1;
1262 	args.in_args[2].value = newent->d_name.name;
1263 	err = fuse_simple_idmap_request(idmap, fm, &args);
1264 	if (!err) {
1265 		/* ctime changes */
1266 		fuse_update_ctime(d_inode(oldent));
1267 
1268 		if (flags & RENAME_EXCHANGE)
1269 			fuse_update_ctime(d_inode(newent));
1270 
1271 		fuse_dir_changed(olddir);
1272 		if (olddir != newdir)
1273 			fuse_dir_changed(newdir);
1274 
1275 		/* newent will end up negative */
1276 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1277 			fuse_entry_unlinked(newent);
1278 	} else if (err == -EINTR || err == -ENOENT) {
1279 		/* If request was interrupted, DEITY only knows if the
1280 		   rename actually took place.  If the invalidation
1281 		   fails (e.g. some process has CWD under the renamed
1282 		   directory), then there can be inconsistency between
1283 		   the dcache and the real filesystem.  Tough luck. */
1284 		fuse_invalidate_entry(oldent);
1285 		if (d_really_is_positive(newent))
1286 			fuse_invalidate_entry(newent);
1287 	}
1288 
1289 	return err;
1290 }
1291 
1292 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1293 			struct dentry *oldent, struct inode *newdir,
1294 			struct dentry *newent, unsigned int flags)
1295 {
1296 	struct fuse_conn *fc = get_fuse_conn(olddir);
1297 	int err;
1298 
1299 	if (fuse_is_bad(olddir))
1300 		return -EIO;
1301 
1302 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1303 		return -EINVAL;
1304 
1305 	if (flags) {
1306 		if (fc->no_rename2 || fc->minor < 23)
1307 			return -EINVAL;
1308 
1309 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1310 					 olddir, oldent, newdir, newent, flags,
1311 					 FUSE_RENAME2,
1312 					 sizeof(struct fuse_rename2_in));
1313 		if (err == -ENOSYS) {
1314 			fc->no_rename2 = 1;
1315 			err = -EINVAL;
1316 		}
1317 	} else {
1318 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1319 					 FUSE_RENAME,
1320 					 sizeof(struct fuse_rename_in));
1321 	}
1322 
1323 	return err;
1324 }
1325 
1326 static int fuse_link(struct dentry *entry, struct inode *newdir,
1327 		     struct dentry *newent)
1328 {
1329 	int err;
1330 	struct fuse_link_in inarg;
1331 	struct inode *inode = d_inode(entry);
1332 	struct fuse_mount *fm = get_fuse_mount(inode);
1333 	FUSE_ARGS(args);
1334 
1335 	if (fm->fc->no_link)
1336 		goto out;
1337 
1338 	memset(&inarg, 0, sizeof(inarg));
1339 	inarg.oldnodeid = get_node_id(inode);
1340 	args.opcode = FUSE_LINK;
1341 	args.in_numargs = 2;
1342 	args.in_args[0].size = sizeof(inarg);
1343 	args.in_args[0].value = &inarg;
1344 	args.in_args[1].size = newent->d_name.len + 1;
1345 	args.in_args[1].value = newent->d_name.name;
1346 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1347 	if (!err)
1348 		fuse_update_ctime_in_cache(inode);
1349 	else if (err == -EINTR)
1350 		fuse_invalidate_attr(inode);
1351 
1352 	if (err == -ENOSYS)
1353 		fm->fc->no_link = 1;
1354 out:
1355 	if (fm->fc->no_link)
1356 		return -EPERM;
1357 
1358 	return err;
1359 }
1360 
1361 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1362 			  struct fuse_attr *attr, struct kstat *stat)
1363 {
1364 	unsigned int blkbits;
1365 	struct fuse_conn *fc = get_fuse_conn(inode);
1366 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1367 				      make_kuid(fc->user_ns, attr->uid));
1368 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1369 				      make_kgid(fc->user_ns, attr->gid));
1370 
1371 	stat->dev = inode->i_sb->s_dev;
1372 	stat->ino = attr->ino;
1373 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1374 	stat->nlink = attr->nlink;
1375 	stat->uid = vfsuid_into_kuid(vfsuid);
1376 	stat->gid = vfsgid_into_kgid(vfsgid);
1377 	stat->rdev = inode->i_rdev;
1378 	stat->atime.tv_sec = attr->atime;
1379 	stat->atime.tv_nsec = attr->atimensec;
1380 	stat->mtime.tv_sec = attr->mtime;
1381 	stat->mtime.tv_nsec = attr->mtimensec;
1382 	stat->ctime.tv_sec = attr->ctime;
1383 	stat->ctime.tv_nsec = attr->ctimensec;
1384 	stat->size = attr->size;
1385 	stat->blocks = attr->blocks;
1386 
1387 	if (attr->blksize != 0)
1388 		blkbits = ilog2(attr->blksize);
1389 	else
1390 		blkbits = inode->i_sb->s_blocksize_bits;
1391 
1392 	stat->blksize = 1 << blkbits;
1393 }
1394 
1395 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1396 {
1397 	memset(attr, 0, sizeof(*attr));
1398 	attr->ino = sx->ino;
1399 	attr->size = sx->size;
1400 	attr->blocks = sx->blocks;
1401 	attr->atime = sx->atime.tv_sec;
1402 	attr->mtime = sx->mtime.tv_sec;
1403 	attr->ctime = sx->ctime.tv_sec;
1404 	attr->atimensec = sx->atime.tv_nsec;
1405 	attr->mtimensec = sx->mtime.tv_nsec;
1406 	attr->ctimensec = sx->ctime.tv_nsec;
1407 	attr->mode = sx->mode;
1408 	attr->nlink = sx->nlink;
1409 	attr->uid = sx->uid;
1410 	attr->gid = sx->gid;
1411 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1412 	attr->blksize = sx->blksize;
1413 }
1414 
1415 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1416 			 struct file *file, struct kstat *stat)
1417 {
1418 	int err;
1419 	struct fuse_attr attr;
1420 	struct fuse_statx *sx;
1421 	struct fuse_statx_in inarg;
1422 	struct fuse_statx_out outarg;
1423 	struct fuse_mount *fm = get_fuse_mount(inode);
1424 	u64 attr_version = fuse_get_attr_version(fm->fc);
1425 	FUSE_ARGS(args);
1426 
1427 	memset(&inarg, 0, sizeof(inarg));
1428 	memset(&outarg, 0, sizeof(outarg));
1429 	/* Directories have separate file-handle space */
1430 	if (file && S_ISREG(inode->i_mode)) {
1431 		struct fuse_file *ff = file->private_data;
1432 
1433 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1434 		inarg.fh = ff->fh;
1435 	}
1436 	/* For now leave sync hints as the default, request all stats. */
1437 	inarg.sx_flags = 0;
1438 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1439 	args.opcode = FUSE_STATX;
1440 	args.nodeid = get_node_id(inode);
1441 	args.in_numargs = 1;
1442 	args.in_args[0].size = sizeof(inarg);
1443 	args.in_args[0].value = &inarg;
1444 	args.out_numargs = 1;
1445 	args.out_args[0].size = sizeof(outarg);
1446 	args.out_args[0].value = &outarg;
1447 	err = fuse_simple_request(fm, &args);
1448 	if (err)
1449 		return err;
1450 
1451 	sx = &outarg.stat;
1452 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1453 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1454 					 inode_wrong_type(inode, sx->mode)))) {
1455 		fuse_make_bad(inode);
1456 		return -EIO;
1457 	}
1458 
1459 	fuse_statx_to_attr(&outarg.stat, &attr);
1460 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1461 		fuse_change_attributes(inode, &attr, &outarg.stat,
1462 				       ATTR_TIMEOUT(&outarg), attr_version);
1463 	}
1464 
1465 	if (stat) {
1466 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1467 		stat->btime.tv_sec = sx->btime.tv_sec;
1468 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1469 		fuse_fillattr(idmap, inode, &attr, stat);
1470 		stat->result_mask |= STATX_TYPE;
1471 	}
1472 
1473 	return 0;
1474 }
1475 
1476 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1477 			   struct kstat *stat, struct file *file)
1478 {
1479 	int err;
1480 	struct fuse_getattr_in inarg;
1481 	struct fuse_attr_out outarg;
1482 	struct fuse_mount *fm = get_fuse_mount(inode);
1483 	FUSE_ARGS(args);
1484 	u64 attr_version;
1485 
1486 	attr_version = fuse_get_attr_version(fm->fc);
1487 
1488 	memset(&inarg, 0, sizeof(inarg));
1489 	memset(&outarg, 0, sizeof(outarg));
1490 	/* Directories have separate file-handle space */
1491 	if (file && S_ISREG(inode->i_mode)) {
1492 		struct fuse_file *ff = file->private_data;
1493 
1494 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1495 		inarg.fh = ff->fh;
1496 	}
1497 	args.opcode = FUSE_GETATTR;
1498 	args.nodeid = get_node_id(inode);
1499 	args.in_numargs = 1;
1500 	args.in_args[0].size = sizeof(inarg);
1501 	args.in_args[0].value = &inarg;
1502 	args.out_numargs = 1;
1503 	args.out_args[0].size = sizeof(outarg);
1504 	args.out_args[0].value = &outarg;
1505 	err = fuse_simple_request(fm, &args);
1506 	if (!err) {
1507 		if (fuse_invalid_attr(&outarg.attr) ||
1508 		    inode_wrong_type(inode, outarg.attr.mode)) {
1509 			fuse_make_bad(inode);
1510 			err = -EIO;
1511 		} else {
1512 			fuse_change_attributes(inode, &outarg.attr, NULL,
1513 					       ATTR_TIMEOUT(&outarg),
1514 					       attr_version);
1515 			if (stat)
1516 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1517 		}
1518 	}
1519 	return err;
1520 }
1521 
1522 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1523 				struct file *file, struct kstat *stat,
1524 				u32 request_mask, unsigned int flags)
1525 {
1526 	struct fuse_inode *fi = get_fuse_inode(inode);
1527 	struct fuse_conn *fc = get_fuse_conn(inode);
1528 	int err = 0;
1529 	bool sync;
1530 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1531 	u32 cache_mask = fuse_get_cache_mask(inode);
1532 
1533 
1534 	/* FUSE only supports basic stats and possibly btime */
1535 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1536 retry:
1537 	if (fc->no_statx)
1538 		request_mask &= STATX_BASIC_STATS;
1539 
1540 	if (!request_mask)
1541 		sync = false;
1542 	else if (flags & AT_STATX_FORCE_SYNC)
1543 		sync = true;
1544 	else if (flags & AT_STATX_DONT_SYNC)
1545 		sync = false;
1546 	else if (request_mask & inval_mask & ~cache_mask)
1547 		sync = true;
1548 	else
1549 		sync = time_before64(fi->i_time, get_jiffies_64());
1550 
1551 	if (sync) {
1552 		forget_all_cached_acls(inode);
1553 		/* Try statx if BTIME is requested */
1554 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1555 			err = fuse_do_statx(idmap, inode, file, stat);
1556 			if (err == -ENOSYS) {
1557 				fc->no_statx = 1;
1558 				err = 0;
1559 				goto retry;
1560 			}
1561 		} else {
1562 			err = fuse_do_getattr(idmap, inode, stat, file);
1563 		}
1564 	} else if (stat) {
1565 		generic_fillattr(idmap, request_mask, inode, stat);
1566 		stat->mode = fi->orig_i_mode;
1567 		stat->ino = fi->orig_ino;
1568 		stat->blksize = 1 << fi->cached_i_blkbits;
1569 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1570 			stat->btime = fi->i_btime;
1571 			stat->result_mask |= STATX_BTIME;
1572 		}
1573 	}
1574 
1575 	return err;
1576 }
1577 
1578 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1579 {
1580 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1581 }
1582 
1583 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1584 			     u64 child_nodeid, struct qstr *name, u32 flags)
1585 {
1586 	int err = -ENOTDIR;
1587 	struct inode *parent;
1588 	struct dentry *dir;
1589 	struct dentry *entry;
1590 
1591 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1592 	if (!parent)
1593 		return -ENOENT;
1594 
1595 	inode_lock_nested(parent, I_MUTEX_PARENT);
1596 	if (!S_ISDIR(parent->i_mode))
1597 		goto unlock;
1598 
1599 	err = -ENOENT;
1600 	dir = d_find_alias(parent);
1601 	if (!dir)
1602 		goto unlock;
1603 
1604 	name->hash = full_name_hash(dir, name->name, name->len);
1605 	entry = d_lookup(dir, name);
1606 	dput(dir);
1607 	if (!entry)
1608 		goto unlock;
1609 
1610 	fuse_dir_changed(parent);
1611 	if (!(flags & FUSE_EXPIRE_ONLY))
1612 		d_invalidate(entry);
1613 	fuse_invalidate_entry_cache(entry);
1614 
1615 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1616 		inode_lock(d_inode(entry));
1617 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1618 			err = -ENOENT;
1619 			goto badentry;
1620 		}
1621 		if (d_mountpoint(entry)) {
1622 			err = -EBUSY;
1623 			goto badentry;
1624 		}
1625 		if (d_is_dir(entry)) {
1626 			shrink_dcache_parent(entry);
1627 			if (!simple_empty(entry)) {
1628 				err = -ENOTEMPTY;
1629 				goto badentry;
1630 			}
1631 			d_inode(entry)->i_flags |= S_DEAD;
1632 		}
1633 		dont_mount(entry);
1634 		clear_nlink(d_inode(entry));
1635 		err = 0;
1636  badentry:
1637 		inode_unlock(d_inode(entry));
1638 		if (!err)
1639 			d_delete(entry);
1640 	} else {
1641 		err = 0;
1642 	}
1643 	dput(entry);
1644 
1645  unlock:
1646 	inode_unlock(parent);
1647 	iput(parent);
1648 	return err;
1649 }
1650 
1651 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1652 {
1653 	const struct cred *cred = current_cred();
1654 
1655 	return (uid_eq(cred->euid, fc->user_id) &&
1656 		uid_eq(cred->suid, fc->user_id) &&
1657 		uid_eq(cred->uid,  fc->user_id) &&
1658 		gid_eq(cred->egid, fc->group_id) &&
1659 		gid_eq(cred->sgid, fc->group_id) &&
1660 		gid_eq(cred->gid,  fc->group_id));
1661 }
1662 
1663 /*
1664  * Calling into a user-controlled filesystem gives the filesystem
1665  * daemon ptrace-like capabilities over the current process.  This
1666  * means, that the filesystem daemon is able to record the exact
1667  * filesystem operations performed, and can also control the behavior
1668  * of the requester process in otherwise impossible ways.  For example
1669  * it can delay the operation for arbitrary length of time allowing
1670  * DoS against the requester.
1671  *
1672  * For this reason only those processes can call into the filesystem,
1673  * for which the owner of the mount has ptrace privilege.  This
1674  * excludes processes started by other users, suid or sgid processes.
1675  */
1676 bool fuse_allow_current_process(struct fuse_conn *fc)
1677 {
1678 	bool allow;
1679 
1680 	if (fc->allow_other)
1681 		allow = current_in_userns(fc->user_ns);
1682 	else
1683 		allow = fuse_permissible_uidgid(fc);
1684 
1685 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1686 		allow = true;
1687 
1688 	return allow;
1689 }
1690 
1691 static int fuse_access(struct inode *inode, int mask)
1692 {
1693 	struct fuse_mount *fm = get_fuse_mount(inode);
1694 	FUSE_ARGS(args);
1695 	struct fuse_access_in inarg;
1696 	int err;
1697 
1698 	BUG_ON(mask & MAY_NOT_BLOCK);
1699 
1700 	/*
1701 	 * We should not send FUSE_ACCESS to the userspace
1702 	 * when idmapped mounts are enabled as for this case
1703 	 * we have fc->default_permissions = 1 and access
1704 	 * permission checks are done on the kernel side.
1705 	 */
1706 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1707 
1708 	if (fm->fc->no_access)
1709 		return 0;
1710 
1711 	memset(&inarg, 0, sizeof(inarg));
1712 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1713 	args.opcode = FUSE_ACCESS;
1714 	args.nodeid = get_node_id(inode);
1715 	args.in_numargs = 1;
1716 	args.in_args[0].size = sizeof(inarg);
1717 	args.in_args[0].value = &inarg;
1718 	err = fuse_simple_request(fm, &args);
1719 	if (err == -ENOSYS) {
1720 		fm->fc->no_access = 1;
1721 		err = 0;
1722 	}
1723 	return err;
1724 }
1725 
1726 static int fuse_perm_getattr(struct inode *inode, int mask)
1727 {
1728 	if (mask & MAY_NOT_BLOCK)
1729 		return -ECHILD;
1730 
1731 	forget_all_cached_acls(inode);
1732 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1733 }
1734 
1735 /*
1736  * Check permission.  The two basic access models of FUSE are:
1737  *
1738  * 1) Local access checking ('default_permissions' mount option) based
1739  * on file mode.  This is the plain old disk filesystem permission
1740  * model.
1741  *
1742  * 2) "Remote" access checking, where server is responsible for
1743  * checking permission in each inode operation.  An exception to this
1744  * is if ->permission() was invoked from sys_access() in which case an
1745  * access request is sent.  Execute permission is still checked
1746  * locally based on file mode.
1747  */
1748 static int fuse_permission(struct mnt_idmap *idmap,
1749 			   struct inode *inode, int mask)
1750 {
1751 	struct fuse_conn *fc = get_fuse_conn(inode);
1752 	bool refreshed = false;
1753 	int err = 0;
1754 
1755 	if (fuse_is_bad(inode))
1756 		return -EIO;
1757 
1758 	if (!fuse_allow_current_process(fc))
1759 		return -EACCES;
1760 
1761 	/*
1762 	 * If attributes are needed, refresh them before proceeding
1763 	 */
1764 	if (fc->default_permissions ||
1765 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1766 		struct fuse_inode *fi = get_fuse_inode(inode);
1767 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1768 
1769 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1770 		    time_before64(fi->i_time, get_jiffies_64())) {
1771 			refreshed = true;
1772 
1773 			err = fuse_perm_getattr(inode, mask);
1774 			if (err)
1775 				return err;
1776 		}
1777 	}
1778 
1779 	if (fc->default_permissions) {
1780 		err = generic_permission(idmap, inode, mask);
1781 
1782 		/* If permission is denied, try to refresh file
1783 		   attributes.  This is also needed, because the root
1784 		   node will at first have no permissions */
1785 		if (err == -EACCES && !refreshed) {
1786 			err = fuse_perm_getattr(inode, mask);
1787 			if (!err)
1788 				err = generic_permission(idmap,
1789 							 inode, mask);
1790 		}
1791 
1792 		/* Note: the opposite of the above test does not
1793 		   exist.  So if permissions are revoked this won't be
1794 		   noticed immediately, only after the attribute
1795 		   timeout has expired */
1796 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1797 		err = fuse_access(inode, mask);
1798 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1799 		if (!(inode->i_mode & S_IXUGO)) {
1800 			if (refreshed)
1801 				return -EACCES;
1802 
1803 			err = fuse_perm_getattr(inode, mask);
1804 			if (!err && !(inode->i_mode & S_IXUGO))
1805 				return -EACCES;
1806 		}
1807 	}
1808 	return err;
1809 }
1810 
1811 static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
1812 {
1813 	struct fuse_mount *fm = get_fuse_mount(inode);
1814 	struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
1815 	struct fuse_args_pages ap = {
1816 		.num_folios = 1,
1817 		.folios = &folio,
1818 		.descs = &desc,
1819 	};
1820 	char *link;
1821 	ssize_t res;
1822 
1823 	ap.args.opcode = FUSE_READLINK;
1824 	ap.args.nodeid = get_node_id(inode);
1825 	ap.args.out_pages = true;
1826 	ap.args.out_argvar = true;
1827 	ap.args.page_zeroing = true;
1828 	ap.args.out_numargs = 1;
1829 	ap.args.out_args[0].size = desc.length;
1830 	res = fuse_simple_request(fm, &ap.args);
1831 
1832 	fuse_invalidate_atime(inode);
1833 
1834 	if (res < 0)
1835 		return res;
1836 
1837 	if (WARN_ON(res >= PAGE_SIZE))
1838 		return -EIO;
1839 
1840 	link = folio_address(folio);
1841 	link[res] = '\0';
1842 
1843 	return 0;
1844 }
1845 
1846 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1847 				 struct delayed_call *callback)
1848 {
1849 	struct fuse_conn *fc = get_fuse_conn(inode);
1850 	struct folio *folio;
1851 	int err;
1852 
1853 	err = -EIO;
1854 	if (fuse_is_bad(inode))
1855 		goto out_err;
1856 
1857 	if (fc->cache_symlinks)
1858 		return page_get_link_raw(dentry, inode, callback);
1859 
1860 	err = -ECHILD;
1861 	if (!dentry)
1862 		goto out_err;
1863 
1864 	folio = folio_alloc(GFP_KERNEL, 0);
1865 	err = -ENOMEM;
1866 	if (!folio)
1867 		goto out_err;
1868 
1869 	err = fuse_readlink_folio(inode, folio);
1870 	if (err) {
1871 		folio_put(folio);
1872 		goto out_err;
1873 	}
1874 
1875 	set_delayed_call(callback, page_put_link, folio);
1876 
1877 	return folio_address(folio);
1878 
1879 out_err:
1880 	return ERR_PTR(err);
1881 }
1882 
1883 static int fuse_dir_open(struct inode *inode, struct file *file)
1884 {
1885 	struct fuse_mount *fm = get_fuse_mount(inode);
1886 	int err;
1887 
1888 	if (fuse_is_bad(inode))
1889 		return -EIO;
1890 
1891 	err = generic_file_open(inode, file);
1892 	if (err)
1893 		return err;
1894 
1895 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1896 	if (!err) {
1897 		struct fuse_file *ff = file->private_data;
1898 
1899 		/*
1900 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1901 		 * directories for backward compatibility, though it's unlikely
1902 		 * to be useful.
1903 		 */
1904 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1905 			nonseekable_open(inode, file);
1906 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1907 			invalidate_inode_pages2(inode->i_mapping);
1908 	}
1909 
1910 	return err;
1911 }
1912 
1913 static int fuse_dir_release(struct inode *inode, struct file *file)
1914 {
1915 	fuse_release_common(file, true);
1916 
1917 	return 0;
1918 }
1919 
1920 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1921 			  int datasync)
1922 {
1923 	struct inode *inode = file->f_mapping->host;
1924 	struct fuse_conn *fc = get_fuse_conn(inode);
1925 	int err;
1926 
1927 	if (fuse_is_bad(inode))
1928 		return -EIO;
1929 
1930 	if (fc->no_fsyncdir)
1931 		return 0;
1932 
1933 	inode_lock(inode);
1934 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1935 	if (err == -ENOSYS) {
1936 		fc->no_fsyncdir = 1;
1937 		err = 0;
1938 	}
1939 	inode_unlock(inode);
1940 
1941 	return err;
1942 }
1943 
1944 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1945 			    unsigned long arg)
1946 {
1947 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1948 
1949 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1950 	if (fc->minor < 18)
1951 		return -ENOTTY;
1952 
1953 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1954 }
1955 
1956 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1957 				   unsigned long arg)
1958 {
1959 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1960 
1961 	if (fc->minor < 18)
1962 		return -ENOTTY;
1963 
1964 	return fuse_ioctl_common(file, cmd, arg,
1965 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1966 }
1967 
1968 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1969 {
1970 	/* Always update if mtime is explicitly set  */
1971 	if (ivalid & ATTR_MTIME_SET)
1972 		return true;
1973 
1974 	/* Or if kernel i_mtime is the official one */
1975 	if (trust_local_mtime)
1976 		return true;
1977 
1978 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1979 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1980 		return false;
1981 
1982 	/* In all other cases update */
1983 	return true;
1984 }
1985 
1986 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1987 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1988 			   bool trust_local_cmtime)
1989 {
1990 	unsigned ivalid = iattr->ia_valid;
1991 
1992 	if (ivalid & ATTR_MODE)
1993 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1994 
1995 	if (ivalid & ATTR_UID) {
1996 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1997 
1998 		arg->valid |= FATTR_UID;
1999 		arg->uid = from_kuid(fc->user_ns, fsuid);
2000 	}
2001 
2002 	if (ivalid & ATTR_GID) {
2003 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
2004 
2005 		arg->valid |= FATTR_GID;
2006 		arg->gid = from_kgid(fc->user_ns, fsgid);
2007 	}
2008 
2009 	if (ivalid & ATTR_SIZE)
2010 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
2011 	if (ivalid & ATTR_ATIME) {
2012 		arg->valid |= FATTR_ATIME;
2013 		arg->atime = iattr->ia_atime.tv_sec;
2014 		arg->atimensec = iattr->ia_atime.tv_nsec;
2015 		if (!(ivalid & ATTR_ATIME_SET))
2016 			arg->valid |= FATTR_ATIME_NOW;
2017 	}
2018 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
2019 		arg->valid |= FATTR_MTIME;
2020 		arg->mtime = iattr->ia_mtime.tv_sec;
2021 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
2022 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
2023 			arg->valid |= FATTR_MTIME_NOW;
2024 	}
2025 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
2026 		arg->valid |= FATTR_CTIME;
2027 		arg->ctime = iattr->ia_ctime.tv_sec;
2028 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
2029 	}
2030 }
2031 
2032 /*
2033  * Prevent concurrent writepages on inode
2034  *
2035  * This is done by adding a negative bias to the inode write counter
2036  * and waiting for all pending writes to finish.
2037  */
2038 void fuse_set_nowrite(struct inode *inode)
2039 {
2040 	struct fuse_inode *fi = get_fuse_inode(inode);
2041 
2042 	BUG_ON(!inode_is_locked(inode));
2043 
2044 	spin_lock(&fi->lock);
2045 	BUG_ON(fi->writectr < 0);
2046 	fi->writectr += FUSE_NOWRITE;
2047 	spin_unlock(&fi->lock);
2048 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
2049 }
2050 
2051 /*
2052  * Allow writepages on inode
2053  *
2054  * Remove the bias from the writecounter and send any queued
2055  * writepages.
2056  */
2057 static void __fuse_release_nowrite(struct inode *inode)
2058 {
2059 	struct fuse_inode *fi = get_fuse_inode(inode);
2060 
2061 	BUG_ON(fi->writectr != FUSE_NOWRITE);
2062 	fi->writectr = 0;
2063 	fuse_flush_writepages(inode);
2064 }
2065 
2066 void fuse_release_nowrite(struct inode *inode)
2067 {
2068 	struct fuse_inode *fi = get_fuse_inode(inode);
2069 
2070 	spin_lock(&fi->lock);
2071 	__fuse_release_nowrite(inode);
2072 	spin_unlock(&fi->lock);
2073 }
2074 
2075 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
2076 			      struct inode *inode,
2077 			      struct fuse_setattr_in *inarg_p,
2078 			      struct fuse_attr_out *outarg_p)
2079 {
2080 	args->opcode = FUSE_SETATTR;
2081 	args->nodeid = get_node_id(inode);
2082 	args->in_numargs = 1;
2083 	args->in_args[0].size = sizeof(*inarg_p);
2084 	args->in_args[0].value = inarg_p;
2085 	args->out_numargs = 1;
2086 	args->out_args[0].size = sizeof(*outarg_p);
2087 	args->out_args[0].value = outarg_p;
2088 }
2089 
2090 /*
2091  * Flush inode->i_mtime to the server
2092  */
2093 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
2094 {
2095 	struct fuse_mount *fm = get_fuse_mount(inode);
2096 	FUSE_ARGS(args);
2097 	struct fuse_setattr_in inarg;
2098 	struct fuse_attr_out outarg;
2099 
2100 	memset(&inarg, 0, sizeof(inarg));
2101 	memset(&outarg, 0, sizeof(outarg));
2102 
2103 	inarg.valid = FATTR_MTIME;
2104 	inarg.mtime = inode_get_mtime_sec(inode);
2105 	inarg.mtimensec = inode_get_mtime_nsec(inode);
2106 	if (fm->fc->minor >= 23) {
2107 		inarg.valid |= FATTR_CTIME;
2108 		inarg.ctime = inode_get_ctime_sec(inode);
2109 		inarg.ctimensec = inode_get_ctime_nsec(inode);
2110 	}
2111 	if (ff) {
2112 		inarg.valid |= FATTR_FH;
2113 		inarg.fh = ff->fh;
2114 	}
2115 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
2116 
2117 	return fuse_simple_request(fm, &args);
2118 }
2119 
2120 /*
2121  * Set attributes, and at the same time refresh them.
2122  *
2123  * Truncation is slightly complicated, because the 'truncate' request
2124  * may fail, in which case we don't want to touch the mapping.
2125  * vmtruncate() doesn't allow for this case, so do the rlimit checking
2126  * and the actual truncation by hand.
2127  */
2128 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
2129 		    struct iattr *attr, struct file *file)
2130 {
2131 	struct inode *inode = d_inode(dentry);
2132 	struct fuse_mount *fm = get_fuse_mount(inode);
2133 	struct fuse_conn *fc = fm->fc;
2134 	struct fuse_inode *fi = get_fuse_inode(inode);
2135 	struct address_space *mapping = inode->i_mapping;
2136 	FUSE_ARGS(args);
2137 	struct fuse_setattr_in inarg;
2138 	struct fuse_attr_out outarg;
2139 	bool is_truncate = false;
2140 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
2141 	loff_t oldsize;
2142 	int err;
2143 	bool trust_local_cmtime = is_wb;
2144 	bool fault_blocked = false;
2145 	u64 attr_version;
2146 
2147 	if (!fc->default_permissions)
2148 		attr->ia_valid |= ATTR_FORCE;
2149 
2150 	err = setattr_prepare(idmap, dentry, attr);
2151 	if (err)
2152 		return err;
2153 
2154 	if (attr->ia_valid & ATTR_SIZE) {
2155 		if (WARN_ON(!S_ISREG(inode->i_mode)))
2156 			return -EIO;
2157 		is_truncate = true;
2158 	}
2159 
2160 	if (FUSE_IS_DAX(inode) && is_truncate) {
2161 		filemap_invalidate_lock(mapping);
2162 		fault_blocked = true;
2163 		err = fuse_dax_break_layouts(inode, 0, -1);
2164 		if (err) {
2165 			filemap_invalidate_unlock(mapping);
2166 			return err;
2167 		}
2168 	}
2169 
2170 	if (attr->ia_valid & ATTR_OPEN) {
2171 		/* This is coming from open(..., ... | O_TRUNC); */
2172 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
2173 		WARN_ON(attr->ia_size != 0);
2174 		if (fc->atomic_o_trunc) {
2175 			/*
2176 			 * No need to send request to userspace, since actual
2177 			 * truncation has already been done by OPEN.  But still
2178 			 * need to truncate page cache.
2179 			 */
2180 			i_size_write(inode, 0);
2181 			truncate_pagecache(inode, 0);
2182 			goto out;
2183 		}
2184 		file = NULL;
2185 	}
2186 
2187 	/* Flush dirty data/metadata before non-truncate SETATTR */
2188 	if (is_wb &&
2189 	    attr->ia_valid &
2190 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
2191 			 ATTR_TIMES_SET)) {
2192 		err = write_inode_now(inode, true);
2193 		if (err)
2194 			return err;
2195 
2196 		fuse_set_nowrite(inode);
2197 		fuse_release_nowrite(inode);
2198 	}
2199 
2200 	if (is_truncate) {
2201 		fuse_set_nowrite(inode);
2202 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2203 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2204 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2205 	}
2206 
2207 	memset(&inarg, 0, sizeof(inarg));
2208 	memset(&outarg, 0, sizeof(outarg));
2209 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2210 	if (file) {
2211 		struct fuse_file *ff = file->private_data;
2212 		inarg.valid |= FATTR_FH;
2213 		inarg.fh = ff->fh;
2214 	}
2215 
2216 	/* Kill suid/sgid for non-directory chown unconditionally */
2217 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2218 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2219 		inarg.valid |= FATTR_KILL_SUIDGID;
2220 
2221 	if (attr->ia_valid & ATTR_SIZE) {
2222 		/* For mandatory locking in truncate */
2223 		inarg.valid |= FATTR_LOCKOWNER;
2224 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2225 
2226 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2227 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2228 			inarg.valid |= FATTR_KILL_SUIDGID;
2229 	}
2230 
2231 	attr_version = fuse_get_attr_version(fm->fc);
2232 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2233 	err = fuse_simple_request(fm, &args);
2234 	if (err) {
2235 		if (err == -EINTR)
2236 			fuse_invalidate_attr(inode);
2237 		goto error;
2238 	}
2239 
2240 	if (fuse_invalid_attr(&outarg.attr) ||
2241 	    inode_wrong_type(inode, outarg.attr.mode)) {
2242 		fuse_make_bad(inode);
2243 		err = -EIO;
2244 		goto error;
2245 	}
2246 
2247 	spin_lock(&fi->lock);
2248 	/* the kernel maintains i_mtime locally */
2249 	if (trust_local_cmtime) {
2250 		if (attr->ia_valid & ATTR_MTIME)
2251 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2252 		if (attr->ia_valid & ATTR_CTIME)
2253 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2254 		/* FIXME: clear I_DIRTY_SYNC? */
2255 	}
2256 
2257 	if (fi->attr_version > attr_version) {
2258 		/*
2259 		 * Apply attributes, for example for fsnotify_change(), but set
2260 		 * attribute timeout to zero.
2261 		 */
2262 		outarg.attr_valid = outarg.attr_valid_nsec = 0;
2263 	}
2264 
2265 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2266 				      ATTR_TIMEOUT(&outarg),
2267 				      fuse_get_cache_mask(inode), 0);
2268 	oldsize = inode->i_size;
2269 	/* see the comment in fuse_change_attributes() */
2270 	if (!is_wb || is_truncate)
2271 		i_size_write(inode, outarg.attr.size);
2272 
2273 	if (is_truncate) {
2274 		/* NOTE: this may release/reacquire fi->lock */
2275 		__fuse_release_nowrite(inode);
2276 	}
2277 	spin_unlock(&fi->lock);
2278 
2279 	/*
2280 	 * Only call invalidate_inode_pages2() after removing
2281 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2282 	 */
2283 	if ((is_truncate || !is_wb) &&
2284 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2285 		truncate_pagecache(inode, outarg.attr.size);
2286 		invalidate_inode_pages2(mapping);
2287 	}
2288 
2289 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2290 out:
2291 	if (fault_blocked)
2292 		filemap_invalidate_unlock(mapping);
2293 
2294 	return 0;
2295 
2296 error:
2297 	if (is_truncate)
2298 		fuse_release_nowrite(inode);
2299 
2300 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2301 
2302 	if (fault_blocked)
2303 		filemap_invalidate_unlock(mapping);
2304 	return err;
2305 }
2306 
2307 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2308 			struct iattr *attr)
2309 {
2310 	struct inode *inode = d_inode(entry);
2311 	struct fuse_conn *fc = get_fuse_conn(inode);
2312 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2313 	int ret;
2314 
2315 	if (fuse_is_bad(inode))
2316 		return -EIO;
2317 
2318 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2319 		return -EACCES;
2320 
2321 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2322 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2323 				    ATTR_MODE);
2324 
2325 		/*
2326 		 * The only sane way to reliably kill suid/sgid is to do it in
2327 		 * the userspace filesystem
2328 		 *
2329 		 * This should be done on write(), truncate() and chown().
2330 		 */
2331 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2332 			/*
2333 			 * ia_mode calculation may have used stale i_mode.
2334 			 * Refresh and recalculate.
2335 			 */
2336 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2337 			if (ret)
2338 				return ret;
2339 
2340 			attr->ia_mode = inode->i_mode;
2341 			if (inode->i_mode & S_ISUID) {
2342 				attr->ia_valid |= ATTR_MODE;
2343 				attr->ia_mode &= ~S_ISUID;
2344 			}
2345 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2346 				attr->ia_valid |= ATTR_MODE;
2347 				attr->ia_mode &= ~S_ISGID;
2348 			}
2349 		}
2350 	}
2351 	if (!attr->ia_valid)
2352 		return 0;
2353 
2354 	ret = fuse_do_setattr(idmap, entry, attr, file);
2355 	if (!ret) {
2356 		/*
2357 		 * If filesystem supports acls it may have updated acl xattrs in
2358 		 * the filesystem, so forget cached acls for the inode.
2359 		 */
2360 		if (fc->posix_acl)
2361 			forget_all_cached_acls(inode);
2362 
2363 		/* Directory mode changed, may need to revalidate access */
2364 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2365 			fuse_invalidate_entry_cache(entry);
2366 	}
2367 	return ret;
2368 }
2369 
2370 static int fuse_getattr(struct mnt_idmap *idmap,
2371 			const struct path *path, struct kstat *stat,
2372 			u32 request_mask, unsigned int flags)
2373 {
2374 	struct inode *inode = d_inode(path->dentry);
2375 	struct fuse_conn *fc = get_fuse_conn(inode);
2376 
2377 	if (fuse_is_bad(inode))
2378 		return -EIO;
2379 
2380 	if (!fuse_allow_current_process(fc)) {
2381 		if (!request_mask) {
2382 			/*
2383 			 * If user explicitly requested *nothing* then don't
2384 			 * error out, but return st_dev only.
2385 			 */
2386 			stat->result_mask = 0;
2387 			stat->dev = inode->i_sb->s_dev;
2388 			return 0;
2389 		}
2390 		return -EACCES;
2391 	}
2392 
2393 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2394 }
2395 
2396 static const struct inode_operations fuse_dir_inode_operations = {
2397 	.lookup		= fuse_lookup,
2398 	.mkdir		= fuse_mkdir,
2399 	.symlink	= fuse_symlink,
2400 	.unlink		= fuse_unlink,
2401 	.rmdir		= fuse_rmdir,
2402 	.rename		= fuse_rename2,
2403 	.link		= fuse_link,
2404 	.setattr	= fuse_setattr,
2405 	.create		= fuse_create,
2406 	.atomic_open	= fuse_atomic_open,
2407 	.tmpfile	= fuse_tmpfile,
2408 	.mknod		= fuse_mknod,
2409 	.permission	= fuse_permission,
2410 	.getattr	= fuse_getattr,
2411 	.listxattr	= fuse_listxattr,
2412 	.get_inode_acl	= fuse_get_inode_acl,
2413 	.get_acl	= fuse_get_acl,
2414 	.set_acl	= fuse_set_acl,
2415 	.fileattr_get	= fuse_fileattr_get,
2416 	.fileattr_set	= fuse_fileattr_set,
2417 };
2418 
2419 static const struct file_operations fuse_dir_operations = {
2420 	.llseek		= generic_file_llseek,
2421 	.read		= generic_read_dir,
2422 	.iterate_shared	= fuse_readdir,
2423 	.open		= fuse_dir_open,
2424 	.release	= fuse_dir_release,
2425 	.fsync		= fuse_dir_fsync,
2426 	.unlocked_ioctl	= fuse_dir_ioctl,
2427 	.compat_ioctl	= fuse_dir_compat_ioctl,
2428 };
2429 
2430 static const struct inode_operations fuse_common_inode_operations = {
2431 	.setattr	= fuse_setattr,
2432 	.permission	= fuse_permission,
2433 	.getattr	= fuse_getattr,
2434 	.listxattr	= fuse_listxattr,
2435 	.get_inode_acl	= fuse_get_inode_acl,
2436 	.get_acl	= fuse_get_acl,
2437 	.set_acl	= fuse_set_acl,
2438 	.fileattr_get	= fuse_fileattr_get,
2439 	.fileattr_set	= fuse_fileattr_set,
2440 };
2441 
2442 static const struct inode_operations fuse_symlink_inode_operations = {
2443 	.setattr	= fuse_setattr,
2444 	.get_link	= fuse_get_link,
2445 	.getattr	= fuse_getattr,
2446 	.listxattr	= fuse_listxattr,
2447 };
2448 
2449 void fuse_init_common(struct inode *inode)
2450 {
2451 	inode->i_op = &fuse_common_inode_operations;
2452 }
2453 
2454 void fuse_init_dir(struct inode *inode)
2455 {
2456 	struct fuse_inode *fi = get_fuse_inode(inode);
2457 
2458 	inode->i_op = &fuse_dir_inode_operations;
2459 	inode->i_fop = &fuse_dir_operations;
2460 
2461 	spin_lock_init(&fi->rdc.lock);
2462 	fi->rdc.cached = false;
2463 	fi->rdc.size = 0;
2464 	fi->rdc.pos = 0;
2465 	fi->rdc.version = 0;
2466 }
2467 
2468 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2469 {
2470 	int err = fuse_readlink_folio(folio->mapping->host, folio);
2471 
2472 	if (!err)
2473 		folio_mark_uptodate(folio);
2474 
2475 	folio_unlock(folio);
2476 
2477 	return err;
2478 }
2479 
2480 static const struct address_space_operations fuse_symlink_aops = {
2481 	.read_folio	= fuse_symlink_read_folio,
2482 };
2483 
2484 void fuse_init_symlink(struct inode *inode)
2485 {
2486 	inode->i_op = &fuse_symlink_inode_operations;
2487 	inode->i_data.a_ops = &fuse_symlink_aops;
2488 	inode_nohighmem(inode);
2489 }
2490