xref: /linux/fs/fuse/dir.c (revision fb7399cf2d0b33825b8039f95c45395c7deba25c)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_conn *fc;
204 	struct fuse_inode *fi;
205 	int ret;
206 
207 	fc = get_fuse_conn_super(dir->i_sb);
208 	if (entry->d_time < atomic_read(&fc->epoch))
209 		goto invalid;
210 
211 	inode = d_inode_rcu(entry);
212 	if (inode && fuse_is_bad(inode))
213 		goto invalid;
214 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
215 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
216 		struct fuse_entry_out outarg;
217 		FUSE_ARGS(args);
218 		struct fuse_forget_link *forget;
219 		u64 attr_version;
220 
221 		/* For negative dentries, always do a fresh lookup */
222 		if (!inode)
223 			goto invalid;
224 
225 		ret = -ECHILD;
226 		if (flags & LOOKUP_RCU)
227 			goto out;
228 
229 		fm = get_fuse_mount(inode);
230 
231 		forget = fuse_alloc_forget();
232 		ret = -ENOMEM;
233 		if (!forget)
234 			goto out;
235 
236 		attr_version = fuse_get_attr_version(fm->fc);
237 
238 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
239 				 name, &outarg);
240 		ret = fuse_simple_request(fm, &args);
241 		/* Zero nodeid is same as -ENOENT */
242 		if (!ret && !outarg.nodeid)
243 			ret = -ENOENT;
244 		if (!ret) {
245 			fi = get_fuse_inode(inode);
246 			if (outarg.nodeid != get_node_id(inode) ||
247 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
248 				fuse_queue_forget(fm->fc, forget,
249 						  outarg.nodeid, 1);
250 				goto invalid;
251 			}
252 			spin_lock(&fi->lock);
253 			fi->nlookup++;
254 			spin_unlock(&fi->lock);
255 		}
256 		kfree(forget);
257 		if (ret == -ENOMEM || ret == -EINTR)
258 			goto out;
259 		if (ret || fuse_invalid_attr(&outarg.attr) ||
260 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
261 			goto invalid;
262 
263 		forget_all_cached_acls(inode);
264 		fuse_change_attributes(inode, &outarg.attr, NULL,
265 				       ATTR_TIMEOUT(&outarg),
266 				       attr_version);
267 		fuse_change_entry_timeout(entry, &outarg);
268 	} else if (inode) {
269 		fi = get_fuse_inode(inode);
270 		if (flags & LOOKUP_RCU) {
271 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
272 				return -ECHILD;
273 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
274 			fuse_advise_use_readdirplus(dir);
275 		}
276 	}
277 	ret = 1;
278 out:
279 	return ret;
280 
281 invalid:
282 	ret = 0;
283 	goto out;
284 }
285 
286 #if BITS_PER_LONG < 64
287 static int fuse_dentry_init(struct dentry *dentry)
288 {
289 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
290 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
291 
292 	return dentry->d_fsdata ? 0 : -ENOMEM;
293 }
294 static void fuse_dentry_release(struct dentry *dentry)
295 {
296 	union fuse_dentry *fd = dentry->d_fsdata;
297 
298 	kfree_rcu(fd, rcu);
299 }
300 #endif
301 
302 static int fuse_dentry_delete(const struct dentry *dentry)
303 {
304 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
305 }
306 
307 /*
308  * Create a fuse_mount object with a new superblock (with path->dentry
309  * as the root), and return that mount so it can be auto-mounted on
310  * @path.
311  */
312 static struct vfsmount *fuse_dentry_automount(struct path *path)
313 {
314 	struct fs_context *fsc;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 
318 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
319 	if (IS_ERR(fsc))
320 		return ERR_CAST(fsc);
321 
322 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
323 	fsc->fs_private = mp_fi;
324 
325 	/* Create the submount */
326 	mnt = fc_mount(fsc);
327 	put_fs_context(fsc);
328 	return mnt;
329 }
330 
331 const struct dentry_operations fuse_dentry_operations = {
332 	.d_revalidate	= fuse_dentry_revalidate,
333 	.d_delete	= fuse_dentry_delete,
334 #if BITS_PER_LONG < 64
335 	.d_init		= fuse_dentry_init,
336 	.d_release	= fuse_dentry_release,
337 #endif
338 	.d_automount	= fuse_dentry_automount,
339 };
340 
341 const struct dentry_operations fuse_root_dentry_operations = {
342 #if BITS_PER_LONG < 64
343 	.d_init		= fuse_dentry_init,
344 	.d_release	= fuse_dentry_release,
345 #endif
346 };
347 
348 int fuse_valid_type(int m)
349 {
350 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
351 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
352 }
353 
354 static bool fuse_valid_size(u64 size)
355 {
356 	return size <= LLONG_MAX;
357 }
358 
359 bool fuse_invalid_attr(struct fuse_attr *attr)
360 {
361 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
362 }
363 
364 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
365 		     struct fuse_entry_out *outarg, struct inode **inode)
366 {
367 	struct fuse_mount *fm = get_fuse_mount_super(sb);
368 	FUSE_ARGS(args);
369 	struct fuse_forget_link *forget;
370 	u64 attr_version, evict_ctr;
371 	int err;
372 
373 	*inode = NULL;
374 	err = -ENAMETOOLONG;
375 	if (name->len > fm->fc->name_max)
376 		goto out;
377 
378 
379 	forget = fuse_alloc_forget();
380 	err = -ENOMEM;
381 	if (!forget)
382 		goto out;
383 
384 	attr_version = fuse_get_attr_version(fm->fc);
385 	evict_ctr = fuse_get_evict_ctr(fm->fc);
386 
387 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
388 	err = fuse_simple_request(fm, &args);
389 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
390 	if (err || !outarg->nodeid)
391 		goto out_put_forget;
392 
393 	err = -EIO;
394 	if (fuse_invalid_attr(&outarg->attr))
395 		goto out_put_forget;
396 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
397 		pr_warn_once("root generation should be zero\n");
398 		outarg->generation = 0;
399 	}
400 
401 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
402 			   &outarg->attr, ATTR_TIMEOUT(outarg),
403 			   attr_version, evict_ctr);
404 	err = -ENOMEM;
405 	if (!*inode) {
406 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
407 		goto out;
408 	}
409 	err = 0;
410 
411  out_put_forget:
412 	kfree(forget);
413  out:
414 	return err;
415 }
416 
417 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
418 				  unsigned int flags)
419 {
420 	struct fuse_entry_out outarg;
421 	struct fuse_conn *fc;
422 	struct inode *inode;
423 	struct dentry *newent;
424 	int err, epoch;
425 	bool outarg_valid = true;
426 	bool locked;
427 
428 	if (fuse_is_bad(dir))
429 		return ERR_PTR(-EIO);
430 
431 	fc = get_fuse_conn_super(dir->i_sb);
432 	epoch = atomic_read(&fc->epoch);
433 
434 	locked = fuse_lock_inode(dir);
435 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
436 			       &outarg, &inode);
437 	fuse_unlock_inode(dir, locked);
438 	if (err == -ENOENT) {
439 		outarg_valid = false;
440 		err = 0;
441 	}
442 	if (err)
443 		goto out_err;
444 
445 	err = -EIO;
446 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
447 		goto out_iput;
448 
449 	newent = d_splice_alias(inode, entry);
450 	err = PTR_ERR(newent);
451 	if (IS_ERR(newent))
452 		goto out_err;
453 
454 	entry = newent ? newent : entry;
455 	entry->d_time = epoch;
456 	if (outarg_valid)
457 		fuse_change_entry_timeout(entry, &outarg);
458 	else
459 		fuse_invalidate_entry_cache(entry);
460 
461 	if (inode)
462 		fuse_advise_use_readdirplus(dir);
463 	return newent;
464 
465  out_iput:
466 	iput(inode);
467  out_err:
468 	return ERR_PTR(err);
469 }
470 
471 static int get_security_context(struct dentry *entry, umode_t mode,
472 				struct fuse_in_arg *ext)
473 {
474 	struct fuse_secctx *fctx;
475 	struct fuse_secctx_header *header;
476 	struct lsm_context lsmctx = { };
477 	void *ptr;
478 	u32 total_len = sizeof(*header);
479 	int err, nr_ctx = 0;
480 	const char *name = NULL;
481 	size_t namelen;
482 
483 	err = security_dentry_init_security(entry, mode, &entry->d_name,
484 					    &name, &lsmctx);
485 
486 	/* If no LSM is supporting this security hook ignore error */
487 	if (err && err != -EOPNOTSUPP)
488 		goto out_err;
489 
490 	if (lsmctx.len) {
491 		nr_ctx = 1;
492 		namelen = strlen(name) + 1;
493 		err = -EIO;
494 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
495 		    lsmctx.len > S32_MAX))
496 			goto out_err;
497 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
498 					    lsmctx.len);
499 	}
500 
501 	err = -ENOMEM;
502 	header = ptr = kzalloc(total_len, GFP_KERNEL);
503 	if (!ptr)
504 		goto out_err;
505 
506 	header->nr_secctx = nr_ctx;
507 	header->size = total_len;
508 	ptr += sizeof(*header);
509 	if (nr_ctx) {
510 		fctx = ptr;
511 		fctx->size = lsmctx.len;
512 		ptr += sizeof(*fctx);
513 
514 		strcpy(ptr, name);
515 		ptr += namelen;
516 
517 		memcpy(ptr, lsmctx.context, lsmctx.len);
518 	}
519 	ext->size = total_len;
520 	ext->value = header;
521 	err = 0;
522 out_err:
523 	if (nr_ctx)
524 		security_release_secctx(&lsmctx);
525 	return err;
526 }
527 
528 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
529 {
530 	void *p;
531 	u32 newlen = buf->size + bytes;
532 
533 	p = krealloc(buf->value, newlen, GFP_KERNEL);
534 	if (!p) {
535 		kfree(buf->value);
536 		buf->size = 0;
537 		buf->value = NULL;
538 		return NULL;
539 	}
540 
541 	memset(p + buf->size, 0, bytes);
542 	buf->value = p;
543 	buf->size = newlen;
544 
545 	return p + newlen - bytes;
546 }
547 
548 static u32 fuse_ext_size(size_t size)
549 {
550 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
551 }
552 
553 /*
554  * This adds just a single supplementary group that matches the parent's group.
555  */
556 static int get_create_supp_group(struct mnt_idmap *idmap,
557 				 struct inode *dir,
558 				 struct fuse_in_arg *ext)
559 {
560 	struct fuse_conn *fc = get_fuse_conn(dir);
561 	struct fuse_ext_header *xh;
562 	struct fuse_supp_groups *sg;
563 	kgid_t kgid = dir->i_gid;
564 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
565 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
566 
567 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
568 
569 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
570 	    !vfsgid_in_group_p(vfsgid))
571 		return 0;
572 
573 	xh = extend_arg(ext, sg_len);
574 	if (!xh)
575 		return -ENOMEM;
576 
577 	xh->size = sg_len;
578 	xh->type = FUSE_EXT_GROUPS;
579 
580 	sg = (struct fuse_supp_groups *) &xh[1];
581 	sg->nr_groups = 1;
582 	sg->groups[0] = parent_gid;
583 
584 	return 0;
585 }
586 
587 static int get_create_ext(struct mnt_idmap *idmap,
588 			  struct fuse_args *args,
589 			  struct inode *dir, struct dentry *dentry,
590 			  umode_t mode)
591 {
592 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
593 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
594 	int err = 0;
595 
596 	if (fc->init_security)
597 		err = get_security_context(dentry, mode, &ext);
598 	if (!err && fc->create_supp_group)
599 		err = get_create_supp_group(idmap, dir, &ext);
600 
601 	if (!err && ext.size) {
602 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
603 		args->is_ext = true;
604 		args->ext_idx = args->in_numargs++;
605 		args->in_args[args->ext_idx] = ext;
606 	} else {
607 		kfree(ext.value);
608 	}
609 
610 	return err;
611 }
612 
613 static void free_ext_value(struct fuse_args *args)
614 {
615 	if (args->is_ext)
616 		kfree(args->in_args[args->ext_idx].value);
617 }
618 
619 /*
620  * Atomic create+open operation
621  *
622  * If the filesystem doesn't support this, then fall back to separate
623  * 'mknod' + 'open' requests.
624  */
625 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
626 			    struct dentry *entry, struct file *file,
627 			    unsigned int flags, umode_t mode, u32 opcode)
628 {
629 	struct inode *inode;
630 	struct fuse_mount *fm = get_fuse_mount(dir);
631 	FUSE_ARGS(args);
632 	struct fuse_forget_link *forget;
633 	struct fuse_create_in inarg;
634 	struct fuse_open_out *outopenp;
635 	struct fuse_entry_out outentry;
636 	struct fuse_inode *fi;
637 	struct fuse_file *ff;
638 	int epoch, err;
639 	bool trunc = flags & O_TRUNC;
640 
641 	/* Userspace expects S_IFREG in create mode */
642 	BUG_ON((mode & S_IFMT) != S_IFREG);
643 
644 	epoch = atomic_read(&fm->fc->epoch);
645 	forget = fuse_alloc_forget();
646 	err = -ENOMEM;
647 	if (!forget)
648 		goto out_err;
649 
650 	err = -ENOMEM;
651 	ff = fuse_file_alloc(fm, true);
652 	if (!ff)
653 		goto out_put_forget_req;
654 
655 	if (!fm->fc->dont_mask)
656 		mode &= ~current_umask();
657 
658 	flags &= ~O_NOCTTY;
659 	memset(&inarg, 0, sizeof(inarg));
660 	memset(&outentry, 0, sizeof(outentry));
661 	inarg.flags = flags;
662 	inarg.mode = mode;
663 	inarg.umask = current_umask();
664 
665 	if (fm->fc->handle_killpriv_v2 && trunc &&
666 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
667 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
668 	}
669 
670 	args.opcode = opcode;
671 	args.nodeid = get_node_id(dir);
672 	args.in_numargs = 2;
673 	args.in_args[0].size = sizeof(inarg);
674 	args.in_args[0].value = &inarg;
675 	args.in_args[1].size = entry->d_name.len + 1;
676 	args.in_args[1].value = entry->d_name.name;
677 	args.out_numargs = 2;
678 	args.out_args[0].size = sizeof(outentry);
679 	args.out_args[0].value = &outentry;
680 	/* Store outarg for fuse_finish_open() */
681 	outopenp = &ff->args->open_outarg;
682 	args.out_args[1].size = sizeof(*outopenp);
683 	args.out_args[1].value = outopenp;
684 
685 	err = get_create_ext(idmap, &args, dir, entry, mode);
686 	if (err)
687 		goto out_free_ff;
688 
689 	err = fuse_simple_idmap_request(idmap, fm, &args);
690 	free_ext_value(&args);
691 	if (err)
692 		goto out_free_ff;
693 
694 	err = -EIO;
695 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
696 	    fuse_invalid_attr(&outentry.attr))
697 		goto out_free_ff;
698 
699 	ff->fh = outopenp->fh;
700 	ff->nodeid = outentry.nodeid;
701 	ff->open_flags = outopenp->open_flags;
702 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
703 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
704 	if (!inode) {
705 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
706 		fuse_sync_release(NULL, ff, flags);
707 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
708 		err = -ENOMEM;
709 		goto out_err;
710 	}
711 	kfree(forget);
712 	d_instantiate(entry, inode);
713 	entry->d_time = epoch;
714 	fuse_change_entry_timeout(entry, &outentry);
715 	fuse_dir_changed(dir);
716 	err = generic_file_open(inode, file);
717 	if (!err) {
718 		file->private_data = ff;
719 		err = finish_open(file, entry, fuse_finish_open);
720 	}
721 	if (err) {
722 		fi = get_fuse_inode(inode);
723 		fuse_sync_release(fi, ff, flags);
724 	} else {
725 		if (fm->fc->atomic_o_trunc && trunc)
726 			truncate_pagecache(inode, 0);
727 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
728 			invalidate_inode_pages2(inode->i_mapping);
729 	}
730 	return err;
731 
732 out_free_ff:
733 	fuse_file_free(ff);
734 out_put_forget_req:
735 	kfree(forget);
736 out_err:
737 	return err;
738 }
739 
740 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
741 		      umode_t, dev_t);
742 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
743 			    struct file *file, unsigned flags,
744 			    umode_t mode)
745 {
746 	int err;
747 	struct mnt_idmap *idmap = file_mnt_idmap(file);
748 	struct fuse_conn *fc = get_fuse_conn(dir);
749 	struct dentry *res = NULL;
750 
751 	if (fuse_is_bad(dir))
752 		return -EIO;
753 
754 	if (d_in_lookup(entry)) {
755 		res = fuse_lookup(dir, entry, 0);
756 		if (IS_ERR(res))
757 			return PTR_ERR(res);
758 
759 		if (res)
760 			entry = res;
761 	}
762 
763 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
764 		goto no_open;
765 
766 	/* Only creates */
767 	file->f_mode |= FMODE_CREATED;
768 
769 	if (fc->no_create)
770 		goto mknod;
771 
772 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
773 	if (err == -ENOSYS) {
774 		fc->no_create = 1;
775 		goto mknod;
776 	} else if (err == -EEXIST)
777 		fuse_invalidate_entry(entry);
778 out_dput:
779 	dput(res);
780 	return err;
781 
782 mknod:
783 	err = fuse_mknod(idmap, dir, entry, mode, 0);
784 	if (err)
785 		goto out_dput;
786 no_open:
787 	return finish_no_open(file, res);
788 }
789 
790 /*
791  * Code shared between mknod, mkdir, symlink and link
792  */
793 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
794 				       struct fuse_args *args, struct inode *dir,
795 				       struct dentry *entry, umode_t mode)
796 {
797 	struct fuse_entry_out outarg;
798 	struct inode *inode;
799 	struct dentry *d;
800 	struct fuse_forget_link *forget;
801 	int epoch, err;
802 
803 	if (fuse_is_bad(dir))
804 		return ERR_PTR(-EIO);
805 
806 	epoch = atomic_read(&fm->fc->epoch);
807 
808 	forget = fuse_alloc_forget();
809 	if (!forget)
810 		return ERR_PTR(-ENOMEM);
811 
812 	memset(&outarg, 0, sizeof(outarg));
813 	args->nodeid = get_node_id(dir);
814 	args->out_numargs = 1;
815 	args->out_args[0].size = sizeof(outarg);
816 	args->out_args[0].value = &outarg;
817 
818 	if (args->opcode != FUSE_LINK) {
819 		err = get_create_ext(idmap, args, dir, entry, mode);
820 		if (err)
821 			goto out_put_forget_req;
822 	}
823 
824 	err = fuse_simple_idmap_request(idmap, fm, args);
825 	free_ext_value(args);
826 	if (err)
827 		goto out_put_forget_req;
828 
829 	err = -EIO;
830 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
831 		goto out_put_forget_req;
832 
833 	if ((outarg.attr.mode ^ mode) & S_IFMT)
834 		goto out_put_forget_req;
835 
836 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
837 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
838 	if (!inode) {
839 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
840 		return ERR_PTR(-ENOMEM);
841 	}
842 	kfree(forget);
843 
844 	d_drop(entry);
845 	d = d_splice_alias(inode, entry);
846 	if (IS_ERR(d))
847 		return d;
848 
849 	if (d) {
850 		d->d_time = epoch;
851 		fuse_change_entry_timeout(d, &outarg);
852 	} else {
853 		entry->d_time = epoch;
854 		fuse_change_entry_timeout(entry, &outarg);
855 	}
856 	fuse_dir_changed(dir);
857 	return d;
858 
859  out_put_forget_req:
860 	if (err == -EEXIST)
861 		fuse_invalidate_entry(entry);
862 	kfree(forget);
863 	return ERR_PTR(err);
864 }
865 
866 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
867 			     struct fuse_args *args, struct inode *dir,
868 			     struct dentry *entry, umode_t mode)
869 {
870 	/*
871 	 * Note that when creating anything other than a directory we
872 	 * can be sure create_new_entry() will NOT return an alternate
873 	 * dentry as d_splice_alias() only returns an alternate dentry
874 	 * for directories.  So we don't need to check for that case
875 	 * when passing back the result.
876 	 */
877 	WARN_ON_ONCE(S_ISDIR(mode));
878 
879 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
880 }
881 
882 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
883 		      struct dentry *entry, umode_t mode, dev_t rdev)
884 {
885 	struct fuse_mknod_in inarg;
886 	struct fuse_mount *fm = get_fuse_mount(dir);
887 	FUSE_ARGS(args);
888 
889 	if (!fm->fc->dont_mask)
890 		mode &= ~current_umask();
891 
892 	memset(&inarg, 0, sizeof(inarg));
893 	inarg.mode = mode;
894 	inarg.rdev = new_encode_dev(rdev);
895 	inarg.umask = current_umask();
896 	args.opcode = FUSE_MKNOD;
897 	args.in_numargs = 2;
898 	args.in_args[0].size = sizeof(inarg);
899 	args.in_args[0].value = &inarg;
900 	args.in_args[1].size = entry->d_name.len + 1;
901 	args.in_args[1].value = entry->d_name.name;
902 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
903 }
904 
905 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
906 		       struct dentry *entry, umode_t mode, bool excl)
907 {
908 	return fuse_mknod(idmap, dir, entry, mode, 0);
909 }
910 
911 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
912 			struct file *file, umode_t mode)
913 {
914 	struct fuse_conn *fc = get_fuse_conn(dir);
915 	int err;
916 
917 	if (fc->no_tmpfile)
918 		return -EOPNOTSUPP;
919 
920 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
921 			       file->f_flags, mode, FUSE_TMPFILE);
922 	if (err == -ENOSYS) {
923 		fc->no_tmpfile = 1;
924 		err = -EOPNOTSUPP;
925 	}
926 	return err;
927 }
928 
929 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
930 				 struct dentry *entry, umode_t mode)
931 {
932 	struct fuse_mkdir_in inarg;
933 	struct fuse_mount *fm = get_fuse_mount(dir);
934 	FUSE_ARGS(args);
935 
936 	if (!fm->fc->dont_mask)
937 		mode &= ~current_umask();
938 
939 	memset(&inarg, 0, sizeof(inarg));
940 	inarg.mode = mode;
941 	inarg.umask = current_umask();
942 	args.opcode = FUSE_MKDIR;
943 	args.in_numargs = 2;
944 	args.in_args[0].size = sizeof(inarg);
945 	args.in_args[0].value = &inarg;
946 	args.in_args[1].size = entry->d_name.len + 1;
947 	args.in_args[1].value = entry->d_name.name;
948 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
949 }
950 
951 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
952 			struct dentry *entry, const char *link)
953 {
954 	struct fuse_mount *fm = get_fuse_mount(dir);
955 	unsigned len = strlen(link) + 1;
956 	FUSE_ARGS(args);
957 
958 	args.opcode = FUSE_SYMLINK;
959 	args.in_numargs = 3;
960 	fuse_set_zero_arg0(&args);
961 	args.in_args[1].size = entry->d_name.len + 1;
962 	args.in_args[1].value = entry->d_name.name;
963 	args.in_args[2].size = len;
964 	args.in_args[2].value = link;
965 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
966 }
967 
968 void fuse_flush_time_update(struct inode *inode)
969 {
970 	int err = sync_inode_metadata(inode, 1);
971 
972 	mapping_set_error(inode->i_mapping, err);
973 }
974 
975 static void fuse_update_ctime_in_cache(struct inode *inode)
976 {
977 	if (!IS_NOCMTIME(inode)) {
978 		inode_set_ctime_current(inode);
979 		mark_inode_dirty_sync(inode);
980 		fuse_flush_time_update(inode);
981 	}
982 }
983 
984 void fuse_update_ctime(struct inode *inode)
985 {
986 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
987 	fuse_update_ctime_in_cache(inode);
988 }
989 
990 static void fuse_entry_unlinked(struct dentry *entry)
991 {
992 	struct inode *inode = d_inode(entry);
993 	struct fuse_conn *fc = get_fuse_conn(inode);
994 	struct fuse_inode *fi = get_fuse_inode(inode);
995 
996 	spin_lock(&fi->lock);
997 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
998 	/*
999 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
1000 	 * happen if userspace filesystem is careless.  It would be
1001 	 * difficult to enforce correct nlink usage so just ignore this
1002 	 * condition here
1003 	 */
1004 	if (S_ISDIR(inode->i_mode))
1005 		clear_nlink(inode);
1006 	else if (inode->i_nlink > 0)
1007 		drop_nlink(inode);
1008 	spin_unlock(&fi->lock);
1009 	fuse_invalidate_entry_cache(entry);
1010 	fuse_update_ctime(inode);
1011 }
1012 
1013 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1014 {
1015 	int err;
1016 	struct fuse_mount *fm = get_fuse_mount(dir);
1017 	FUSE_ARGS(args);
1018 
1019 	if (fuse_is_bad(dir))
1020 		return -EIO;
1021 
1022 	args.opcode = FUSE_UNLINK;
1023 	args.nodeid = get_node_id(dir);
1024 	args.in_numargs = 2;
1025 	fuse_set_zero_arg0(&args);
1026 	args.in_args[1].size = entry->d_name.len + 1;
1027 	args.in_args[1].value = entry->d_name.name;
1028 	err = fuse_simple_request(fm, &args);
1029 	if (!err) {
1030 		fuse_dir_changed(dir);
1031 		fuse_entry_unlinked(entry);
1032 	} else if (err == -EINTR || err == -ENOENT)
1033 		fuse_invalidate_entry(entry);
1034 	return err;
1035 }
1036 
1037 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1038 {
1039 	int err;
1040 	struct fuse_mount *fm = get_fuse_mount(dir);
1041 	FUSE_ARGS(args);
1042 
1043 	if (fuse_is_bad(dir))
1044 		return -EIO;
1045 
1046 	args.opcode = FUSE_RMDIR;
1047 	args.nodeid = get_node_id(dir);
1048 	args.in_numargs = 2;
1049 	fuse_set_zero_arg0(&args);
1050 	args.in_args[1].size = entry->d_name.len + 1;
1051 	args.in_args[1].value = entry->d_name.name;
1052 	err = fuse_simple_request(fm, &args);
1053 	if (!err) {
1054 		fuse_dir_changed(dir);
1055 		fuse_entry_unlinked(entry);
1056 	} else if (err == -EINTR || err == -ENOENT)
1057 		fuse_invalidate_entry(entry);
1058 	return err;
1059 }
1060 
1061 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1062 			      struct inode *newdir, struct dentry *newent,
1063 			      unsigned int flags, int opcode, size_t argsize)
1064 {
1065 	int err;
1066 	struct fuse_rename2_in inarg;
1067 	struct fuse_mount *fm = get_fuse_mount(olddir);
1068 	FUSE_ARGS(args);
1069 
1070 	memset(&inarg, 0, argsize);
1071 	inarg.newdir = get_node_id(newdir);
1072 	inarg.flags = flags;
1073 	args.opcode = opcode;
1074 	args.nodeid = get_node_id(olddir);
1075 	args.in_numargs = 3;
1076 	args.in_args[0].size = argsize;
1077 	args.in_args[0].value = &inarg;
1078 	args.in_args[1].size = oldent->d_name.len + 1;
1079 	args.in_args[1].value = oldent->d_name.name;
1080 	args.in_args[2].size = newent->d_name.len + 1;
1081 	args.in_args[2].value = newent->d_name.name;
1082 	err = fuse_simple_idmap_request(idmap, fm, &args);
1083 	if (!err) {
1084 		/* ctime changes */
1085 		fuse_update_ctime(d_inode(oldent));
1086 
1087 		if (flags & RENAME_EXCHANGE)
1088 			fuse_update_ctime(d_inode(newent));
1089 
1090 		fuse_dir_changed(olddir);
1091 		if (olddir != newdir)
1092 			fuse_dir_changed(newdir);
1093 
1094 		/* newent will end up negative */
1095 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1096 			fuse_entry_unlinked(newent);
1097 	} else if (err == -EINTR || err == -ENOENT) {
1098 		/* If request was interrupted, DEITY only knows if the
1099 		   rename actually took place.  If the invalidation
1100 		   fails (e.g. some process has CWD under the renamed
1101 		   directory), then there can be inconsistency between
1102 		   the dcache and the real filesystem.  Tough luck. */
1103 		fuse_invalidate_entry(oldent);
1104 		if (d_really_is_positive(newent))
1105 			fuse_invalidate_entry(newent);
1106 	}
1107 
1108 	return err;
1109 }
1110 
1111 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1112 			struct dentry *oldent, struct inode *newdir,
1113 			struct dentry *newent, unsigned int flags)
1114 {
1115 	struct fuse_conn *fc = get_fuse_conn(olddir);
1116 	int err;
1117 
1118 	if (fuse_is_bad(olddir))
1119 		return -EIO;
1120 
1121 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1122 		return -EINVAL;
1123 
1124 	if (flags) {
1125 		if (fc->no_rename2 || fc->minor < 23)
1126 			return -EINVAL;
1127 
1128 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1129 					 olddir, oldent, newdir, newent, flags,
1130 					 FUSE_RENAME2,
1131 					 sizeof(struct fuse_rename2_in));
1132 		if (err == -ENOSYS) {
1133 			fc->no_rename2 = 1;
1134 			err = -EINVAL;
1135 		}
1136 	} else {
1137 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1138 					 FUSE_RENAME,
1139 					 sizeof(struct fuse_rename_in));
1140 	}
1141 
1142 	return err;
1143 }
1144 
1145 static int fuse_link(struct dentry *entry, struct inode *newdir,
1146 		     struct dentry *newent)
1147 {
1148 	int err;
1149 	struct fuse_link_in inarg;
1150 	struct inode *inode = d_inode(entry);
1151 	struct fuse_mount *fm = get_fuse_mount(inode);
1152 	FUSE_ARGS(args);
1153 
1154 	if (fm->fc->no_link)
1155 		goto out;
1156 
1157 	memset(&inarg, 0, sizeof(inarg));
1158 	inarg.oldnodeid = get_node_id(inode);
1159 	args.opcode = FUSE_LINK;
1160 	args.in_numargs = 2;
1161 	args.in_args[0].size = sizeof(inarg);
1162 	args.in_args[0].value = &inarg;
1163 	args.in_args[1].size = newent->d_name.len + 1;
1164 	args.in_args[1].value = newent->d_name.name;
1165 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1166 	if (!err)
1167 		fuse_update_ctime_in_cache(inode);
1168 	else if (err == -EINTR)
1169 		fuse_invalidate_attr(inode);
1170 
1171 	if (err == -ENOSYS)
1172 		fm->fc->no_link = 1;
1173 out:
1174 	if (fm->fc->no_link)
1175 		return -EPERM;
1176 
1177 	return err;
1178 }
1179 
1180 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1181 			  struct fuse_attr *attr, struct kstat *stat)
1182 {
1183 	unsigned int blkbits;
1184 	struct fuse_conn *fc = get_fuse_conn(inode);
1185 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1186 				      make_kuid(fc->user_ns, attr->uid));
1187 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1188 				      make_kgid(fc->user_ns, attr->gid));
1189 
1190 	stat->dev = inode->i_sb->s_dev;
1191 	stat->ino = attr->ino;
1192 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1193 	stat->nlink = attr->nlink;
1194 	stat->uid = vfsuid_into_kuid(vfsuid);
1195 	stat->gid = vfsgid_into_kgid(vfsgid);
1196 	stat->rdev = inode->i_rdev;
1197 	stat->atime.tv_sec = attr->atime;
1198 	stat->atime.tv_nsec = attr->atimensec;
1199 	stat->mtime.tv_sec = attr->mtime;
1200 	stat->mtime.tv_nsec = attr->mtimensec;
1201 	stat->ctime.tv_sec = attr->ctime;
1202 	stat->ctime.tv_nsec = attr->ctimensec;
1203 	stat->size = attr->size;
1204 	stat->blocks = attr->blocks;
1205 
1206 	if (attr->blksize != 0)
1207 		blkbits = ilog2(attr->blksize);
1208 	else
1209 		blkbits = inode->i_sb->s_blocksize_bits;
1210 
1211 	stat->blksize = 1 << blkbits;
1212 }
1213 
1214 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1215 {
1216 	memset(attr, 0, sizeof(*attr));
1217 	attr->ino = sx->ino;
1218 	attr->size = sx->size;
1219 	attr->blocks = sx->blocks;
1220 	attr->atime = sx->atime.tv_sec;
1221 	attr->mtime = sx->mtime.tv_sec;
1222 	attr->ctime = sx->ctime.tv_sec;
1223 	attr->atimensec = sx->atime.tv_nsec;
1224 	attr->mtimensec = sx->mtime.tv_nsec;
1225 	attr->ctimensec = sx->ctime.tv_nsec;
1226 	attr->mode = sx->mode;
1227 	attr->nlink = sx->nlink;
1228 	attr->uid = sx->uid;
1229 	attr->gid = sx->gid;
1230 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1231 	attr->blksize = sx->blksize;
1232 }
1233 
1234 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1235 			 struct file *file, struct kstat *stat)
1236 {
1237 	int err;
1238 	struct fuse_attr attr;
1239 	struct fuse_statx *sx;
1240 	struct fuse_statx_in inarg;
1241 	struct fuse_statx_out outarg;
1242 	struct fuse_mount *fm = get_fuse_mount(inode);
1243 	u64 attr_version = fuse_get_attr_version(fm->fc);
1244 	FUSE_ARGS(args);
1245 
1246 	memset(&inarg, 0, sizeof(inarg));
1247 	memset(&outarg, 0, sizeof(outarg));
1248 	/* Directories have separate file-handle space */
1249 	if (file && S_ISREG(inode->i_mode)) {
1250 		struct fuse_file *ff = file->private_data;
1251 
1252 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1253 		inarg.fh = ff->fh;
1254 	}
1255 	/* For now leave sync hints as the default, request all stats. */
1256 	inarg.sx_flags = 0;
1257 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1258 	args.opcode = FUSE_STATX;
1259 	args.nodeid = get_node_id(inode);
1260 	args.in_numargs = 1;
1261 	args.in_args[0].size = sizeof(inarg);
1262 	args.in_args[0].value = &inarg;
1263 	args.out_numargs = 1;
1264 	args.out_args[0].size = sizeof(outarg);
1265 	args.out_args[0].value = &outarg;
1266 	err = fuse_simple_request(fm, &args);
1267 	if (err)
1268 		return err;
1269 
1270 	sx = &outarg.stat;
1271 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1272 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1273 					 inode_wrong_type(inode, sx->mode)))) {
1274 		fuse_make_bad(inode);
1275 		return -EIO;
1276 	}
1277 
1278 	fuse_statx_to_attr(&outarg.stat, &attr);
1279 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1280 		fuse_change_attributes(inode, &attr, &outarg.stat,
1281 				       ATTR_TIMEOUT(&outarg), attr_version);
1282 	}
1283 
1284 	if (stat) {
1285 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1286 		stat->btime.tv_sec = sx->btime.tv_sec;
1287 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1288 		fuse_fillattr(idmap, inode, &attr, stat);
1289 		stat->result_mask |= STATX_TYPE;
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1296 			   struct kstat *stat, struct file *file)
1297 {
1298 	int err;
1299 	struct fuse_getattr_in inarg;
1300 	struct fuse_attr_out outarg;
1301 	struct fuse_mount *fm = get_fuse_mount(inode);
1302 	FUSE_ARGS(args);
1303 	u64 attr_version;
1304 
1305 	attr_version = fuse_get_attr_version(fm->fc);
1306 
1307 	memset(&inarg, 0, sizeof(inarg));
1308 	memset(&outarg, 0, sizeof(outarg));
1309 	/* Directories have separate file-handle space */
1310 	if (file && S_ISREG(inode->i_mode)) {
1311 		struct fuse_file *ff = file->private_data;
1312 
1313 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1314 		inarg.fh = ff->fh;
1315 	}
1316 	args.opcode = FUSE_GETATTR;
1317 	args.nodeid = get_node_id(inode);
1318 	args.in_numargs = 1;
1319 	args.in_args[0].size = sizeof(inarg);
1320 	args.in_args[0].value = &inarg;
1321 	args.out_numargs = 1;
1322 	args.out_args[0].size = sizeof(outarg);
1323 	args.out_args[0].value = &outarg;
1324 	err = fuse_simple_request(fm, &args);
1325 	if (!err) {
1326 		if (fuse_invalid_attr(&outarg.attr) ||
1327 		    inode_wrong_type(inode, outarg.attr.mode)) {
1328 			fuse_make_bad(inode);
1329 			err = -EIO;
1330 		} else {
1331 			fuse_change_attributes(inode, &outarg.attr, NULL,
1332 					       ATTR_TIMEOUT(&outarg),
1333 					       attr_version);
1334 			if (stat)
1335 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1336 		}
1337 	}
1338 	return err;
1339 }
1340 
1341 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1342 				struct file *file, struct kstat *stat,
1343 				u32 request_mask, unsigned int flags)
1344 {
1345 	struct fuse_inode *fi = get_fuse_inode(inode);
1346 	struct fuse_conn *fc = get_fuse_conn(inode);
1347 	int err = 0;
1348 	bool sync;
1349 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1350 	u32 cache_mask = fuse_get_cache_mask(inode);
1351 
1352 
1353 	/* FUSE only supports basic stats and possibly btime */
1354 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1355 retry:
1356 	if (fc->no_statx)
1357 		request_mask &= STATX_BASIC_STATS;
1358 
1359 	if (!request_mask)
1360 		sync = false;
1361 	else if (flags & AT_STATX_FORCE_SYNC)
1362 		sync = true;
1363 	else if (flags & AT_STATX_DONT_SYNC)
1364 		sync = false;
1365 	else if (request_mask & inval_mask & ~cache_mask)
1366 		sync = true;
1367 	else
1368 		sync = time_before64(fi->i_time, get_jiffies_64());
1369 
1370 	if (sync) {
1371 		forget_all_cached_acls(inode);
1372 		/* Try statx if BTIME is requested */
1373 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1374 			err = fuse_do_statx(idmap, inode, file, stat);
1375 			if (err == -ENOSYS) {
1376 				fc->no_statx = 1;
1377 				err = 0;
1378 				goto retry;
1379 			}
1380 		} else {
1381 			err = fuse_do_getattr(idmap, inode, stat, file);
1382 		}
1383 	} else if (stat) {
1384 		generic_fillattr(idmap, request_mask, inode, stat);
1385 		stat->mode = fi->orig_i_mode;
1386 		stat->ino = fi->orig_ino;
1387 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1388 			stat->btime = fi->i_btime;
1389 			stat->result_mask |= STATX_BTIME;
1390 		}
1391 	}
1392 
1393 	return err;
1394 }
1395 
1396 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1397 {
1398 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1399 }
1400 
1401 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1402 			     u64 child_nodeid, struct qstr *name, u32 flags)
1403 {
1404 	int err = -ENOTDIR;
1405 	struct inode *parent;
1406 	struct dentry *dir;
1407 	struct dentry *entry;
1408 
1409 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1410 	if (!parent)
1411 		return -ENOENT;
1412 
1413 	inode_lock_nested(parent, I_MUTEX_PARENT);
1414 	if (!S_ISDIR(parent->i_mode))
1415 		goto unlock;
1416 
1417 	err = -ENOENT;
1418 	dir = d_find_alias(parent);
1419 	if (!dir)
1420 		goto unlock;
1421 
1422 	name->hash = full_name_hash(dir, name->name, name->len);
1423 	entry = d_lookup(dir, name);
1424 	dput(dir);
1425 	if (!entry)
1426 		goto unlock;
1427 
1428 	fuse_dir_changed(parent);
1429 	if (!(flags & FUSE_EXPIRE_ONLY))
1430 		d_invalidate(entry);
1431 	fuse_invalidate_entry_cache(entry);
1432 
1433 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1434 		inode_lock(d_inode(entry));
1435 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1436 			err = -ENOENT;
1437 			goto badentry;
1438 		}
1439 		if (d_mountpoint(entry)) {
1440 			err = -EBUSY;
1441 			goto badentry;
1442 		}
1443 		if (d_is_dir(entry)) {
1444 			shrink_dcache_parent(entry);
1445 			if (!simple_empty(entry)) {
1446 				err = -ENOTEMPTY;
1447 				goto badentry;
1448 			}
1449 			d_inode(entry)->i_flags |= S_DEAD;
1450 		}
1451 		dont_mount(entry);
1452 		clear_nlink(d_inode(entry));
1453 		err = 0;
1454  badentry:
1455 		inode_unlock(d_inode(entry));
1456 		if (!err)
1457 			d_delete(entry);
1458 	} else {
1459 		err = 0;
1460 	}
1461 	dput(entry);
1462 
1463  unlock:
1464 	inode_unlock(parent);
1465 	iput(parent);
1466 	return err;
1467 }
1468 
1469 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1470 {
1471 	const struct cred *cred = current_cred();
1472 
1473 	return (uid_eq(cred->euid, fc->user_id) &&
1474 		uid_eq(cred->suid, fc->user_id) &&
1475 		uid_eq(cred->uid,  fc->user_id) &&
1476 		gid_eq(cred->egid, fc->group_id) &&
1477 		gid_eq(cred->sgid, fc->group_id) &&
1478 		gid_eq(cred->gid,  fc->group_id));
1479 }
1480 
1481 /*
1482  * Calling into a user-controlled filesystem gives the filesystem
1483  * daemon ptrace-like capabilities over the current process.  This
1484  * means, that the filesystem daemon is able to record the exact
1485  * filesystem operations performed, and can also control the behavior
1486  * of the requester process in otherwise impossible ways.  For example
1487  * it can delay the operation for arbitrary length of time allowing
1488  * DoS against the requester.
1489  *
1490  * For this reason only those processes can call into the filesystem,
1491  * for which the owner of the mount has ptrace privilege.  This
1492  * excludes processes started by other users, suid or sgid processes.
1493  */
1494 bool fuse_allow_current_process(struct fuse_conn *fc)
1495 {
1496 	bool allow;
1497 
1498 	if (fc->allow_other)
1499 		allow = current_in_userns(fc->user_ns);
1500 	else
1501 		allow = fuse_permissible_uidgid(fc);
1502 
1503 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1504 		allow = true;
1505 
1506 	return allow;
1507 }
1508 
1509 static int fuse_access(struct inode *inode, int mask)
1510 {
1511 	struct fuse_mount *fm = get_fuse_mount(inode);
1512 	FUSE_ARGS(args);
1513 	struct fuse_access_in inarg;
1514 	int err;
1515 
1516 	BUG_ON(mask & MAY_NOT_BLOCK);
1517 
1518 	/*
1519 	 * We should not send FUSE_ACCESS to the userspace
1520 	 * when idmapped mounts are enabled as for this case
1521 	 * we have fc->default_permissions = 1 and access
1522 	 * permission checks are done on the kernel side.
1523 	 */
1524 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1525 
1526 	if (fm->fc->no_access)
1527 		return 0;
1528 
1529 	memset(&inarg, 0, sizeof(inarg));
1530 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1531 	args.opcode = FUSE_ACCESS;
1532 	args.nodeid = get_node_id(inode);
1533 	args.in_numargs = 1;
1534 	args.in_args[0].size = sizeof(inarg);
1535 	args.in_args[0].value = &inarg;
1536 	err = fuse_simple_request(fm, &args);
1537 	if (err == -ENOSYS) {
1538 		fm->fc->no_access = 1;
1539 		err = 0;
1540 	}
1541 	return err;
1542 }
1543 
1544 static int fuse_perm_getattr(struct inode *inode, int mask)
1545 {
1546 	if (mask & MAY_NOT_BLOCK)
1547 		return -ECHILD;
1548 
1549 	forget_all_cached_acls(inode);
1550 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1551 }
1552 
1553 /*
1554  * Check permission.  The two basic access models of FUSE are:
1555  *
1556  * 1) Local access checking ('default_permissions' mount option) based
1557  * on file mode.  This is the plain old disk filesystem permission
1558  * model.
1559  *
1560  * 2) "Remote" access checking, where server is responsible for
1561  * checking permission in each inode operation.  An exception to this
1562  * is if ->permission() was invoked from sys_access() in which case an
1563  * access request is sent.  Execute permission is still checked
1564  * locally based on file mode.
1565  */
1566 static int fuse_permission(struct mnt_idmap *idmap,
1567 			   struct inode *inode, int mask)
1568 {
1569 	struct fuse_conn *fc = get_fuse_conn(inode);
1570 	bool refreshed = false;
1571 	int err = 0;
1572 
1573 	if (fuse_is_bad(inode))
1574 		return -EIO;
1575 
1576 	if (!fuse_allow_current_process(fc))
1577 		return -EACCES;
1578 
1579 	/*
1580 	 * If attributes are needed, refresh them before proceeding
1581 	 */
1582 	if (fc->default_permissions ||
1583 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1584 		struct fuse_inode *fi = get_fuse_inode(inode);
1585 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1586 
1587 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1588 		    time_before64(fi->i_time, get_jiffies_64())) {
1589 			refreshed = true;
1590 
1591 			err = fuse_perm_getattr(inode, mask);
1592 			if (err)
1593 				return err;
1594 		}
1595 	}
1596 
1597 	if (fc->default_permissions) {
1598 		err = generic_permission(idmap, inode, mask);
1599 
1600 		/* If permission is denied, try to refresh file
1601 		   attributes.  This is also needed, because the root
1602 		   node will at first have no permissions */
1603 		if (err == -EACCES && !refreshed) {
1604 			err = fuse_perm_getattr(inode, mask);
1605 			if (!err)
1606 				err = generic_permission(idmap,
1607 							 inode, mask);
1608 		}
1609 
1610 		/* Note: the opposite of the above test does not
1611 		   exist.  So if permissions are revoked this won't be
1612 		   noticed immediately, only after the attribute
1613 		   timeout has expired */
1614 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1615 		err = fuse_access(inode, mask);
1616 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1617 		if (!(inode->i_mode & S_IXUGO)) {
1618 			if (refreshed)
1619 				return -EACCES;
1620 
1621 			err = fuse_perm_getattr(inode, mask);
1622 			if (!err && !(inode->i_mode & S_IXUGO))
1623 				return -EACCES;
1624 		}
1625 	}
1626 	return err;
1627 }
1628 
1629 static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
1630 {
1631 	struct fuse_mount *fm = get_fuse_mount(inode);
1632 	struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
1633 	struct fuse_args_pages ap = {
1634 		.num_folios = 1,
1635 		.folios = &folio,
1636 		.descs = &desc,
1637 	};
1638 	char *link;
1639 	ssize_t res;
1640 
1641 	ap.args.opcode = FUSE_READLINK;
1642 	ap.args.nodeid = get_node_id(inode);
1643 	ap.args.out_pages = true;
1644 	ap.args.out_argvar = true;
1645 	ap.args.page_zeroing = true;
1646 	ap.args.out_numargs = 1;
1647 	ap.args.out_args[0].size = desc.length;
1648 	res = fuse_simple_request(fm, &ap.args);
1649 
1650 	fuse_invalidate_atime(inode);
1651 
1652 	if (res < 0)
1653 		return res;
1654 
1655 	if (WARN_ON(res >= PAGE_SIZE))
1656 		return -EIO;
1657 
1658 	link = folio_address(folio);
1659 	link[res] = '\0';
1660 
1661 	return 0;
1662 }
1663 
1664 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1665 				 struct delayed_call *callback)
1666 {
1667 	struct fuse_conn *fc = get_fuse_conn(inode);
1668 	struct folio *folio;
1669 	int err;
1670 
1671 	err = -EIO;
1672 	if (fuse_is_bad(inode))
1673 		goto out_err;
1674 
1675 	if (fc->cache_symlinks)
1676 		return page_get_link_raw(dentry, inode, callback);
1677 
1678 	err = -ECHILD;
1679 	if (!dentry)
1680 		goto out_err;
1681 
1682 	folio = folio_alloc(GFP_KERNEL, 0);
1683 	err = -ENOMEM;
1684 	if (!folio)
1685 		goto out_err;
1686 
1687 	err = fuse_readlink_folio(inode, folio);
1688 	if (err) {
1689 		folio_put(folio);
1690 		goto out_err;
1691 	}
1692 
1693 	set_delayed_call(callback, page_put_link, folio);
1694 
1695 	return folio_address(folio);
1696 
1697 out_err:
1698 	return ERR_PTR(err);
1699 }
1700 
1701 static int fuse_dir_open(struct inode *inode, struct file *file)
1702 {
1703 	struct fuse_mount *fm = get_fuse_mount(inode);
1704 	int err;
1705 
1706 	if (fuse_is_bad(inode))
1707 		return -EIO;
1708 
1709 	err = generic_file_open(inode, file);
1710 	if (err)
1711 		return err;
1712 
1713 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1714 	if (!err) {
1715 		struct fuse_file *ff = file->private_data;
1716 
1717 		/*
1718 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1719 		 * directories for backward compatibility, though it's unlikely
1720 		 * to be useful.
1721 		 */
1722 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1723 			nonseekable_open(inode, file);
1724 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1725 			invalidate_inode_pages2(inode->i_mapping);
1726 	}
1727 
1728 	return err;
1729 }
1730 
1731 static int fuse_dir_release(struct inode *inode, struct file *file)
1732 {
1733 	fuse_release_common(file, true);
1734 
1735 	return 0;
1736 }
1737 
1738 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1739 			  int datasync)
1740 {
1741 	struct inode *inode = file->f_mapping->host;
1742 	struct fuse_conn *fc = get_fuse_conn(inode);
1743 	int err;
1744 
1745 	if (fuse_is_bad(inode))
1746 		return -EIO;
1747 
1748 	if (fc->no_fsyncdir)
1749 		return 0;
1750 
1751 	inode_lock(inode);
1752 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1753 	if (err == -ENOSYS) {
1754 		fc->no_fsyncdir = 1;
1755 		err = 0;
1756 	}
1757 	inode_unlock(inode);
1758 
1759 	return err;
1760 }
1761 
1762 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1763 			    unsigned long arg)
1764 {
1765 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1766 
1767 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1768 	if (fc->minor < 18)
1769 		return -ENOTTY;
1770 
1771 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1772 }
1773 
1774 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1775 				   unsigned long arg)
1776 {
1777 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1778 
1779 	if (fc->minor < 18)
1780 		return -ENOTTY;
1781 
1782 	return fuse_ioctl_common(file, cmd, arg,
1783 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1784 }
1785 
1786 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1787 {
1788 	/* Always update if mtime is explicitly set  */
1789 	if (ivalid & ATTR_MTIME_SET)
1790 		return true;
1791 
1792 	/* Or if kernel i_mtime is the official one */
1793 	if (trust_local_mtime)
1794 		return true;
1795 
1796 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1797 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1798 		return false;
1799 
1800 	/* In all other cases update */
1801 	return true;
1802 }
1803 
1804 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1805 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1806 			   bool trust_local_cmtime)
1807 {
1808 	unsigned ivalid = iattr->ia_valid;
1809 
1810 	if (ivalid & ATTR_MODE)
1811 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1812 
1813 	if (ivalid & ATTR_UID) {
1814 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1815 
1816 		arg->valid |= FATTR_UID;
1817 		arg->uid = from_kuid(fc->user_ns, fsuid);
1818 	}
1819 
1820 	if (ivalid & ATTR_GID) {
1821 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1822 
1823 		arg->valid |= FATTR_GID;
1824 		arg->gid = from_kgid(fc->user_ns, fsgid);
1825 	}
1826 
1827 	if (ivalid & ATTR_SIZE)
1828 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1829 	if (ivalid & ATTR_ATIME) {
1830 		arg->valid |= FATTR_ATIME;
1831 		arg->atime = iattr->ia_atime.tv_sec;
1832 		arg->atimensec = iattr->ia_atime.tv_nsec;
1833 		if (!(ivalid & ATTR_ATIME_SET))
1834 			arg->valid |= FATTR_ATIME_NOW;
1835 	}
1836 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1837 		arg->valid |= FATTR_MTIME;
1838 		arg->mtime = iattr->ia_mtime.tv_sec;
1839 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1840 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1841 			arg->valid |= FATTR_MTIME_NOW;
1842 	}
1843 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1844 		arg->valid |= FATTR_CTIME;
1845 		arg->ctime = iattr->ia_ctime.tv_sec;
1846 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1847 	}
1848 }
1849 
1850 /*
1851  * Prevent concurrent writepages on inode
1852  *
1853  * This is done by adding a negative bias to the inode write counter
1854  * and waiting for all pending writes to finish.
1855  */
1856 void fuse_set_nowrite(struct inode *inode)
1857 {
1858 	struct fuse_inode *fi = get_fuse_inode(inode);
1859 
1860 	BUG_ON(!inode_is_locked(inode));
1861 
1862 	spin_lock(&fi->lock);
1863 	BUG_ON(fi->writectr < 0);
1864 	fi->writectr += FUSE_NOWRITE;
1865 	spin_unlock(&fi->lock);
1866 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1867 }
1868 
1869 /*
1870  * Allow writepages on inode
1871  *
1872  * Remove the bias from the writecounter and send any queued
1873  * writepages.
1874  */
1875 static void __fuse_release_nowrite(struct inode *inode)
1876 {
1877 	struct fuse_inode *fi = get_fuse_inode(inode);
1878 
1879 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1880 	fi->writectr = 0;
1881 	fuse_flush_writepages(inode);
1882 }
1883 
1884 void fuse_release_nowrite(struct inode *inode)
1885 {
1886 	struct fuse_inode *fi = get_fuse_inode(inode);
1887 
1888 	spin_lock(&fi->lock);
1889 	__fuse_release_nowrite(inode);
1890 	spin_unlock(&fi->lock);
1891 }
1892 
1893 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1894 			      struct inode *inode,
1895 			      struct fuse_setattr_in *inarg_p,
1896 			      struct fuse_attr_out *outarg_p)
1897 {
1898 	args->opcode = FUSE_SETATTR;
1899 	args->nodeid = get_node_id(inode);
1900 	args->in_numargs = 1;
1901 	args->in_args[0].size = sizeof(*inarg_p);
1902 	args->in_args[0].value = inarg_p;
1903 	args->out_numargs = 1;
1904 	args->out_args[0].size = sizeof(*outarg_p);
1905 	args->out_args[0].value = outarg_p;
1906 }
1907 
1908 /*
1909  * Flush inode->i_mtime to the server
1910  */
1911 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1912 {
1913 	struct fuse_mount *fm = get_fuse_mount(inode);
1914 	FUSE_ARGS(args);
1915 	struct fuse_setattr_in inarg;
1916 	struct fuse_attr_out outarg;
1917 
1918 	memset(&inarg, 0, sizeof(inarg));
1919 	memset(&outarg, 0, sizeof(outarg));
1920 
1921 	inarg.valid = FATTR_MTIME;
1922 	inarg.mtime = inode_get_mtime_sec(inode);
1923 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1924 	if (fm->fc->minor >= 23) {
1925 		inarg.valid |= FATTR_CTIME;
1926 		inarg.ctime = inode_get_ctime_sec(inode);
1927 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1928 	}
1929 	if (ff) {
1930 		inarg.valid |= FATTR_FH;
1931 		inarg.fh = ff->fh;
1932 	}
1933 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1934 
1935 	return fuse_simple_request(fm, &args);
1936 }
1937 
1938 /*
1939  * Set attributes, and at the same time refresh them.
1940  *
1941  * Truncation is slightly complicated, because the 'truncate' request
1942  * may fail, in which case we don't want to touch the mapping.
1943  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1944  * and the actual truncation by hand.
1945  */
1946 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1947 		    struct iattr *attr, struct file *file)
1948 {
1949 	struct inode *inode = d_inode(dentry);
1950 	struct fuse_mount *fm = get_fuse_mount(inode);
1951 	struct fuse_conn *fc = fm->fc;
1952 	struct fuse_inode *fi = get_fuse_inode(inode);
1953 	struct address_space *mapping = inode->i_mapping;
1954 	FUSE_ARGS(args);
1955 	struct fuse_setattr_in inarg;
1956 	struct fuse_attr_out outarg;
1957 	bool is_truncate = false;
1958 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1959 	loff_t oldsize;
1960 	int err;
1961 	bool trust_local_cmtime = is_wb;
1962 	bool fault_blocked = false;
1963 	u64 attr_version;
1964 
1965 	if (!fc->default_permissions)
1966 		attr->ia_valid |= ATTR_FORCE;
1967 
1968 	err = setattr_prepare(idmap, dentry, attr);
1969 	if (err)
1970 		return err;
1971 
1972 	if (attr->ia_valid & ATTR_SIZE) {
1973 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1974 			return -EIO;
1975 		is_truncate = true;
1976 	}
1977 
1978 	if (FUSE_IS_DAX(inode) && is_truncate) {
1979 		filemap_invalidate_lock(mapping);
1980 		fault_blocked = true;
1981 		err = fuse_dax_break_layouts(inode, 0, -1);
1982 		if (err) {
1983 			filemap_invalidate_unlock(mapping);
1984 			return err;
1985 		}
1986 	}
1987 
1988 	if (attr->ia_valid & ATTR_OPEN) {
1989 		/* This is coming from open(..., ... | O_TRUNC); */
1990 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1991 		WARN_ON(attr->ia_size != 0);
1992 		if (fc->atomic_o_trunc) {
1993 			/*
1994 			 * No need to send request to userspace, since actual
1995 			 * truncation has already been done by OPEN.  But still
1996 			 * need to truncate page cache.
1997 			 */
1998 			i_size_write(inode, 0);
1999 			truncate_pagecache(inode, 0);
2000 			goto out;
2001 		}
2002 		file = NULL;
2003 	}
2004 
2005 	/* Flush dirty data/metadata before non-truncate SETATTR */
2006 	if (is_wb &&
2007 	    attr->ia_valid &
2008 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
2009 			 ATTR_TIMES_SET)) {
2010 		err = write_inode_now(inode, true);
2011 		if (err)
2012 			return err;
2013 
2014 		fuse_set_nowrite(inode);
2015 		fuse_release_nowrite(inode);
2016 	}
2017 
2018 	if (is_truncate) {
2019 		fuse_set_nowrite(inode);
2020 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2021 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2022 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2023 	}
2024 
2025 	memset(&inarg, 0, sizeof(inarg));
2026 	memset(&outarg, 0, sizeof(outarg));
2027 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2028 	if (file) {
2029 		struct fuse_file *ff = file->private_data;
2030 		inarg.valid |= FATTR_FH;
2031 		inarg.fh = ff->fh;
2032 	}
2033 
2034 	/* Kill suid/sgid for non-directory chown unconditionally */
2035 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2036 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2037 		inarg.valid |= FATTR_KILL_SUIDGID;
2038 
2039 	if (attr->ia_valid & ATTR_SIZE) {
2040 		/* For mandatory locking in truncate */
2041 		inarg.valid |= FATTR_LOCKOWNER;
2042 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2043 
2044 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2045 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2046 			inarg.valid |= FATTR_KILL_SUIDGID;
2047 	}
2048 
2049 	attr_version = fuse_get_attr_version(fm->fc);
2050 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2051 	err = fuse_simple_request(fm, &args);
2052 	if (err) {
2053 		if (err == -EINTR)
2054 			fuse_invalidate_attr(inode);
2055 		goto error;
2056 	}
2057 
2058 	if (fuse_invalid_attr(&outarg.attr) ||
2059 	    inode_wrong_type(inode, outarg.attr.mode)) {
2060 		fuse_make_bad(inode);
2061 		err = -EIO;
2062 		goto error;
2063 	}
2064 
2065 	spin_lock(&fi->lock);
2066 	/* the kernel maintains i_mtime locally */
2067 	if (trust_local_cmtime) {
2068 		if (attr->ia_valid & ATTR_MTIME)
2069 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2070 		if (attr->ia_valid & ATTR_CTIME)
2071 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2072 		/* FIXME: clear I_DIRTY_SYNC? */
2073 	}
2074 
2075 	if (fi->attr_version > attr_version) {
2076 		/*
2077 		 * Apply attributes, for example for fsnotify_change(), but set
2078 		 * attribute timeout to zero.
2079 		 */
2080 		outarg.attr_valid = outarg.attr_valid_nsec = 0;
2081 	}
2082 
2083 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2084 				      ATTR_TIMEOUT(&outarg),
2085 				      fuse_get_cache_mask(inode), 0);
2086 	oldsize = inode->i_size;
2087 	/* see the comment in fuse_change_attributes() */
2088 	if (!is_wb || is_truncate)
2089 		i_size_write(inode, outarg.attr.size);
2090 
2091 	if (is_truncate) {
2092 		/* NOTE: this may release/reacquire fi->lock */
2093 		__fuse_release_nowrite(inode);
2094 	}
2095 	spin_unlock(&fi->lock);
2096 
2097 	/*
2098 	 * Only call invalidate_inode_pages2() after removing
2099 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2100 	 */
2101 	if ((is_truncate || !is_wb) &&
2102 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2103 		truncate_pagecache(inode, outarg.attr.size);
2104 		invalidate_inode_pages2(mapping);
2105 	}
2106 
2107 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2108 out:
2109 	if (fault_blocked)
2110 		filemap_invalidate_unlock(mapping);
2111 
2112 	return 0;
2113 
2114 error:
2115 	if (is_truncate)
2116 		fuse_release_nowrite(inode);
2117 
2118 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2119 
2120 	if (fault_blocked)
2121 		filemap_invalidate_unlock(mapping);
2122 	return err;
2123 }
2124 
2125 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2126 			struct iattr *attr)
2127 {
2128 	struct inode *inode = d_inode(entry);
2129 	struct fuse_conn *fc = get_fuse_conn(inode);
2130 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2131 	int ret;
2132 
2133 	if (fuse_is_bad(inode))
2134 		return -EIO;
2135 
2136 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2137 		return -EACCES;
2138 
2139 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2140 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2141 				    ATTR_MODE);
2142 
2143 		/*
2144 		 * The only sane way to reliably kill suid/sgid is to do it in
2145 		 * the userspace filesystem
2146 		 *
2147 		 * This should be done on write(), truncate() and chown().
2148 		 */
2149 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2150 			/*
2151 			 * ia_mode calculation may have used stale i_mode.
2152 			 * Refresh and recalculate.
2153 			 */
2154 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2155 			if (ret)
2156 				return ret;
2157 
2158 			attr->ia_mode = inode->i_mode;
2159 			if (inode->i_mode & S_ISUID) {
2160 				attr->ia_valid |= ATTR_MODE;
2161 				attr->ia_mode &= ~S_ISUID;
2162 			}
2163 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2164 				attr->ia_valid |= ATTR_MODE;
2165 				attr->ia_mode &= ~S_ISGID;
2166 			}
2167 		}
2168 	}
2169 	if (!attr->ia_valid)
2170 		return 0;
2171 
2172 	ret = fuse_do_setattr(idmap, entry, attr, file);
2173 	if (!ret) {
2174 		/*
2175 		 * If filesystem supports acls it may have updated acl xattrs in
2176 		 * the filesystem, so forget cached acls for the inode.
2177 		 */
2178 		if (fc->posix_acl)
2179 			forget_all_cached_acls(inode);
2180 
2181 		/* Directory mode changed, may need to revalidate access */
2182 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2183 			fuse_invalidate_entry_cache(entry);
2184 	}
2185 	return ret;
2186 }
2187 
2188 static int fuse_getattr(struct mnt_idmap *idmap,
2189 			const struct path *path, struct kstat *stat,
2190 			u32 request_mask, unsigned int flags)
2191 {
2192 	struct inode *inode = d_inode(path->dentry);
2193 	struct fuse_conn *fc = get_fuse_conn(inode);
2194 
2195 	if (fuse_is_bad(inode))
2196 		return -EIO;
2197 
2198 	if (!fuse_allow_current_process(fc)) {
2199 		if (!request_mask) {
2200 			/*
2201 			 * If user explicitly requested *nothing* then don't
2202 			 * error out, but return st_dev only.
2203 			 */
2204 			stat->result_mask = 0;
2205 			stat->dev = inode->i_sb->s_dev;
2206 			return 0;
2207 		}
2208 		return -EACCES;
2209 	}
2210 
2211 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2212 }
2213 
2214 static const struct inode_operations fuse_dir_inode_operations = {
2215 	.lookup		= fuse_lookup,
2216 	.mkdir		= fuse_mkdir,
2217 	.symlink	= fuse_symlink,
2218 	.unlink		= fuse_unlink,
2219 	.rmdir		= fuse_rmdir,
2220 	.rename		= fuse_rename2,
2221 	.link		= fuse_link,
2222 	.setattr	= fuse_setattr,
2223 	.create		= fuse_create,
2224 	.atomic_open	= fuse_atomic_open,
2225 	.tmpfile	= fuse_tmpfile,
2226 	.mknod		= fuse_mknod,
2227 	.permission	= fuse_permission,
2228 	.getattr	= fuse_getattr,
2229 	.listxattr	= fuse_listxattr,
2230 	.get_inode_acl	= fuse_get_inode_acl,
2231 	.get_acl	= fuse_get_acl,
2232 	.set_acl	= fuse_set_acl,
2233 	.fileattr_get	= fuse_fileattr_get,
2234 	.fileattr_set	= fuse_fileattr_set,
2235 };
2236 
2237 static const struct file_operations fuse_dir_operations = {
2238 	.llseek		= generic_file_llseek,
2239 	.read		= generic_read_dir,
2240 	.iterate_shared	= fuse_readdir,
2241 	.open		= fuse_dir_open,
2242 	.release	= fuse_dir_release,
2243 	.fsync		= fuse_dir_fsync,
2244 	.unlocked_ioctl	= fuse_dir_ioctl,
2245 	.compat_ioctl	= fuse_dir_compat_ioctl,
2246 };
2247 
2248 static const struct inode_operations fuse_common_inode_operations = {
2249 	.setattr	= fuse_setattr,
2250 	.permission	= fuse_permission,
2251 	.getattr	= fuse_getattr,
2252 	.listxattr	= fuse_listxattr,
2253 	.get_inode_acl	= fuse_get_inode_acl,
2254 	.get_acl	= fuse_get_acl,
2255 	.set_acl	= fuse_set_acl,
2256 	.fileattr_get	= fuse_fileattr_get,
2257 	.fileattr_set	= fuse_fileattr_set,
2258 };
2259 
2260 static const struct inode_operations fuse_symlink_inode_operations = {
2261 	.setattr	= fuse_setattr,
2262 	.get_link	= fuse_get_link,
2263 	.getattr	= fuse_getattr,
2264 	.listxattr	= fuse_listxattr,
2265 };
2266 
2267 void fuse_init_common(struct inode *inode)
2268 {
2269 	inode->i_op = &fuse_common_inode_operations;
2270 }
2271 
2272 void fuse_init_dir(struct inode *inode)
2273 {
2274 	struct fuse_inode *fi = get_fuse_inode(inode);
2275 
2276 	inode->i_op = &fuse_dir_inode_operations;
2277 	inode->i_fop = &fuse_dir_operations;
2278 
2279 	spin_lock_init(&fi->rdc.lock);
2280 	fi->rdc.cached = false;
2281 	fi->rdc.size = 0;
2282 	fi->rdc.pos = 0;
2283 	fi->rdc.version = 0;
2284 }
2285 
2286 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2287 {
2288 	int err = fuse_readlink_folio(folio->mapping->host, folio);
2289 
2290 	if (!err)
2291 		folio_mark_uptodate(folio);
2292 
2293 	folio_unlock(folio);
2294 
2295 	return err;
2296 }
2297 
2298 static const struct address_space_operations fuse_symlink_aops = {
2299 	.read_folio	= fuse_symlink_read_folio,
2300 };
2301 
2302 void fuse_init_symlink(struct inode *inode)
2303 {
2304 	inode->i_op = &fuse_symlink_inode_operations;
2305 	inode->i_data.a_ops = &fuse_symlink_aops;
2306 	inode_nohighmem(inode);
2307 }
2308