xref: /linux/fs/fuse/dir.c (revision 9f10e7fb6a06bce4f81de5fd0f2f0390f99e89e4)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 2;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len + 1;
181 	args->in_args[1].value = name->name;
182 	args->out_numargs = 1;
183 	args->out_args[0].size = sizeof(struct fuse_entry_out);
184 	args->out_args[0].value = outarg;
185 }
186 
187 /*
188  * Check whether the dentry is still valid
189  *
190  * If the entry validity timeout has expired and the dentry is
191  * positive, try to redo the lookup.  If the lookup results in a
192  * different inode, then let the VFS invalidate the dentry and redo
193  * the lookup once more.  If the lookup results in the same inode,
194  * then refresh the attributes, timeouts and mark the dentry valid.
195  */
196 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
197 {
198 	struct inode *inode;
199 	struct dentry *parent;
200 	struct fuse_mount *fm;
201 	struct fuse_inode *fi;
202 	int ret;
203 
204 	inode = d_inode_rcu(entry);
205 	if (inode && fuse_is_bad(inode))
206 		goto invalid;
207 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
208 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
209 		struct fuse_entry_out outarg;
210 		FUSE_ARGS(args);
211 		struct fuse_forget_link *forget;
212 		u64 attr_version;
213 
214 		/* For negative dentries, always do a fresh lookup */
215 		if (!inode)
216 			goto invalid;
217 
218 		ret = -ECHILD;
219 		if (flags & LOOKUP_RCU)
220 			goto out;
221 
222 		fm = get_fuse_mount(inode);
223 
224 		forget = fuse_alloc_forget();
225 		ret = -ENOMEM;
226 		if (!forget)
227 			goto out;
228 
229 		attr_version = fuse_get_attr_version(fm->fc);
230 
231 		parent = dget_parent(entry);
232 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
233 				 &entry->d_name, &outarg);
234 		ret = fuse_simple_request(fm, &args);
235 		dput(parent);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM || ret == -EINTR)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr, NULL,
260 				       ATTR_TIMEOUT(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			parent = dget_parent(entry);
270 			fuse_advise_use_readdirplus(d_inode(parent));
271 			dput(parent);
272 		}
273 	}
274 	ret = 1;
275 out:
276 	return ret;
277 
278 invalid:
279 	ret = 0;
280 	goto out;
281 }
282 
283 #if BITS_PER_LONG < 64
284 static int fuse_dentry_init(struct dentry *dentry)
285 {
286 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
287 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 
289 	return dentry->d_fsdata ? 0 : -ENOMEM;
290 }
291 static void fuse_dentry_release(struct dentry *dentry)
292 {
293 	union fuse_dentry *fd = dentry->d_fsdata;
294 
295 	kfree_rcu(fd, rcu);
296 }
297 #endif
298 
299 static int fuse_dentry_delete(const struct dentry *dentry)
300 {
301 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
302 }
303 
304 /*
305  * Create a fuse_mount object with a new superblock (with path->dentry
306  * as the root), and return that mount so it can be auto-mounted on
307  * @path.
308  */
309 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 {
311 	struct fs_context *fsc;
312 	struct vfsmount *mnt;
313 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
314 
315 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
316 	if (IS_ERR(fsc))
317 		return ERR_CAST(fsc);
318 
319 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
320 	fsc->fs_private = mp_fi;
321 
322 	/* Create the submount */
323 	mnt = fc_mount(fsc);
324 	if (!IS_ERR(mnt))
325 		mntget(mnt);
326 
327 	put_fs_context(fsc);
328 	return mnt;
329 }
330 
331 const struct dentry_operations fuse_dentry_operations = {
332 	.d_revalidate	= fuse_dentry_revalidate,
333 	.d_delete	= fuse_dentry_delete,
334 #if BITS_PER_LONG < 64
335 	.d_init		= fuse_dentry_init,
336 	.d_release	= fuse_dentry_release,
337 #endif
338 	.d_automount	= fuse_dentry_automount,
339 };
340 
341 const struct dentry_operations fuse_root_dentry_operations = {
342 #if BITS_PER_LONG < 64
343 	.d_init		= fuse_dentry_init,
344 	.d_release	= fuse_dentry_release,
345 #endif
346 };
347 
348 int fuse_valid_type(int m)
349 {
350 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
351 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
352 }
353 
354 static bool fuse_valid_size(u64 size)
355 {
356 	return size <= LLONG_MAX;
357 }
358 
359 bool fuse_invalid_attr(struct fuse_attr *attr)
360 {
361 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
362 }
363 
364 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
365 		     struct fuse_entry_out *outarg, struct inode **inode)
366 {
367 	struct fuse_mount *fm = get_fuse_mount_super(sb);
368 	FUSE_ARGS(args);
369 	struct fuse_forget_link *forget;
370 	u64 attr_version, evict_ctr;
371 	int err;
372 
373 	*inode = NULL;
374 	err = -ENAMETOOLONG;
375 	if (name->len > FUSE_NAME_MAX)
376 		goto out;
377 
378 
379 	forget = fuse_alloc_forget();
380 	err = -ENOMEM;
381 	if (!forget)
382 		goto out;
383 
384 	attr_version = fuse_get_attr_version(fm->fc);
385 	evict_ctr = fuse_get_evict_ctr(fm->fc);
386 
387 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
388 	err = fuse_simple_request(fm, &args);
389 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
390 	if (err || !outarg->nodeid)
391 		goto out_put_forget;
392 
393 	err = -EIO;
394 	if (fuse_invalid_attr(&outarg->attr))
395 		goto out_put_forget;
396 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
397 		pr_warn_once("root generation should be zero\n");
398 		outarg->generation = 0;
399 	}
400 
401 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
402 			   &outarg->attr, ATTR_TIMEOUT(outarg),
403 			   attr_version, evict_ctr);
404 	err = -ENOMEM;
405 	if (!*inode) {
406 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
407 		goto out;
408 	}
409 	err = 0;
410 
411  out_put_forget:
412 	kfree(forget);
413  out:
414 	return err;
415 }
416 
417 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
418 				  unsigned int flags)
419 {
420 	int err;
421 	struct fuse_entry_out outarg;
422 	struct inode *inode;
423 	struct dentry *newent;
424 	bool outarg_valid = true;
425 	bool locked;
426 
427 	if (fuse_is_bad(dir))
428 		return ERR_PTR(-EIO);
429 
430 	locked = fuse_lock_inode(dir);
431 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
432 			       &outarg, &inode);
433 	fuse_unlock_inode(dir, locked);
434 	if (err == -ENOENT) {
435 		outarg_valid = false;
436 		err = 0;
437 	}
438 	if (err)
439 		goto out_err;
440 
441 	err = -EIO;
442 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
443 		goto out_iput;
444 
445 	newent = d_splice_alias(inode, entry);
446 	err = PTR_ERR(newent);
447 	if (IS_ERR(newent))
448 		goto out_err;
449 
450 	entry = newent ? newent : entry;
451 	if (outarg_valid)
452 		fuse_change_entry_timeout(entry, &outarg);
453 	else
454 		fuse_invalidate_entry_cache(entry);
455 
456 	if (inode)
457 		fuse_advise_use_readdirplus(dir);
458 	return newent;
459 
460  out_iput:
461 	iput(inode);
462  out_err:
463 	return ERR_PTR(err);
464 }
465 
466 static int get_security_context(struct dentry *entry, umode_t mode,
467 				struct fuse_in_arg *ext)
468 {
469 	struct fuse_secctx *fctx;
470 	struct fuse_secctx_header *header;
471 	struct lsm_context lsmctx = { };
472 	void *ptr;
473 	u32 total_len = sizeof(*header);
474 	int err, nr_ctx = 0;
475 	const char *name = NULL;
476 	size_t namelen;
477 
478 	err = security_dentry_init_security(entry, mode, &entry->d_name,
479 					    &name, &lsmctx);
480 
481 	/* If no LSM is supporting this security hook ignore error */
482 	if (err && err != -EOPNOTSUPP)
483 		goto out_err;
484 
485 	if (lsmctx.len) {
486 		nr_ctx = 1;
487 		namelen = strlen(name) + 1;
488 		err = -EIO;
489 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
490 		    lsmctx.len > S32_MAX))
491 			goto out_err;
492 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
493 					    lsmctx.len);
494 	}
495 
496 	err = -ENOMEM;
497 	header = ptr = kzalloc(total_len, GFP_KERNEL);
498 	if (!ptr)
499 		goto out_err;
500 
501 	header->nr_secctx = nr_ctx;
502 	header->size = total_len;
503 	ptr += sizeof(*header);
504 	if (nr_ctx) {
505 		fctx = ptr;
506 		fctx->size = lsmctx.len;
507 		ptr += sizeof(*fctx);
508 
509 		strcpy(ptr, name);
510 		ptr += namelen;
511 
512 		memcpy(ptr, lsmctx.context, lsmctx.len);
513 	}
514 	ext->size = total_len;
515 	ext->value = header;
516 	err = 0;
517 out_err:
518 	if (nr_ctx)
519 		security_release_secctx(&lsmctx);
520 	return err;
521 }
522 
523 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
524 {
525 	void *p;
526 	u32 newlen = buf->size + bytes;
527 
528 	p = krealloc(buf->value, newlen, GFP_KERNEL);
529 	if (!p) {
530 		kfree(buf->value);
531 		buf->size = 0;
532 		buf->value = NULL;
533 		return NULL;
534 	}
535 
536 	memset(p + buf->size, 0, bytes);
537 	buf->value = p;
538 	buf->size = newlen;
539 
540 	return p + newlen - bytes;
541 }
542 
543 static u32 fuse_ext_size(size_t size)
544 {
545 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
546 }
547 
548 /*
549  * This adds just a single supplementary group that matches the parent's group.
550  */
551 static int get_create_supp_group(struct mnt_idmap *idmap,
552 				 struct inode *dir,
553 				 struct fuse_in_arg *ext)
554 {
555 	struct fuse_conn *fc = get_fuse_conn(dir);
556 	struct fuse_ext_header *xh;
557 	struct fuse_supp_groups *sg;
558 	kgid_t kgid = dir->i_gid;
559 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
560 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
561 
562 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
563 
564 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
565 	    !vfsgid_in_group_p(vfsgid))
566 		return 0;
567 
568 	xh = extend_arg(ext, sg_len);
569 	if (!xh)
570 		return -ENOMEM;
571 
572 	xh->size = sg_len;
573 	xh->type = FUSE_EXT_GROUPS;
574 
575 	sg = (struct fuse_supp_groups *) &xh[1];
576 	sg->nr_groups = 1;
577 	sg->groups[0] = parent_gid;
578 
579 	return 0;
580 }
581 
582 static int get_create_ext(struct mnt_idmap *idmap,
583 			  struct fuse_args *args,
584 			  struct inode *dir, struct dentry *dentry,
585 			  umode_t mode)
586 {
587 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
588 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
589 	int err = 0;
590 
591 	if (fc->init_security)
592 		err = get_security_context(dentry, mode, &ext);
593 	if (!err && fc->create_supp_group)
594 		err = get_create_supp_group(idmap, dir, &ext);
595 
596 	if (!err && ext.size) {
597 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
598 		args->is_ext = true;
599 		args->ext_idx = args->in_numargs++;
600 		args->in_args[args->ext_idx] = ext;
601 	} else {
602 		kfree(ext.value);
603 	}
604 
605 	return err;
606 }
607 
608 static void free_ext_value(struct fuse_args *args)
609 {
610 	if (args->is_ext)
611 		kfree(args->in_args[args->ext_idx].value);
612 }
613 
614 /*
615  * Atomic create+open operation
616  *
617  * If the filesystem doesn't support this, then fall back to separate
618  * 'mknod' + 'open' requests.
619  */
620 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
621 			    struct dentry *entry, struct file *file,
622 			    unsigned int flags, umode_t mode, u32 opcode)
623 {
624 	int err;
625 	struct inode *inode;
626 	struct fuse_mount *fm = get_fuse_mount(dir);
627 	FUSE_ARGS(args);
628 	struct fuse_forget_link *forget;
629 	struct fuse_create_in inarg;
630 	struct fuse_open_out *outopenp;
631 	struct fuse_entry_out outentry;
632 	struct fuse_inode *fi;
633 	struct fuse_file *ff;
634 	bool trunc = flags & O_TRUNC;
635 
636 	/* Userspace expects S_IFREG in create mode */
637 	BUG_ON((mode & S_IFMT) != S_IFREG);
638 
639 	forget = fuse_alloc_forget();
640 	err = -ENOMEM;
641 	if (!forget)
642 		goto out_err;
643 
644 	err = -ENOMEM;
645 	ff = fuse_file_alloc(fm, true);
646 	if (!ff)
647 		goto out_put_forget_req;
648 
649 	if (!fm->fc->dont_mask)
650 		mode &= ~current_umask();
651 
652 	flags &= ~O_NOCTTY;
653 	memset(&inarg, 0, sizeof(inarg));
654 	memset(&outentry, 0, sizeof(outentry));
655 	inarg.flags = flags;
656 	inarg.mode = mode;
657 	inarg.umask = current_umask();
658 
659 	if (fm->fc->handle_killpriv_v2 && trunc &&
660 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
661 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
662 	}
663 
664 	args.opcode = opcode;
665 	args.nodeid = get_node_id(dir);
666 	args.in_numargs = 2;
667 	args.in_args[0].size = sizeof(inarg);
668 	args.in_args[0].value = &inarg;
669 	args.in_args[1].size = entry->d_name.len + 1;
670 	args.in_args[1].value = entry->d_name.name;
671 	args.out_numargs = 2;
672 	args.out_args[0].size = sizeof(outentry);
673 	args.out_args[0].value = &outentry;
674 	/* Store outarg for fuse_finish_open() */
675 	outopenp = &ff->args->open_outarg;
676 	args.out_args[1].size = sizeof(*outopenp);
677 	args.out_args[1].value = outopenp;
678 
679 	err = get_create_ext(idmap, &args, dir, entry, mode);
680 	if (err)
681 		goto out_free_ff;
682 
683 	err = fuse_simple_idmap_request(idmap, fm, &args);
684 	free_ext_value(&args);
685 	if (err)
686 		goto out_free_ff;
687 
688 	err = -EIO;
689 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
690 	    fuse_invalid_attr(&outentry.attr))
691 		goto out_free_ff;
692 
693 	ff->fh = outopenp->fh;
694 	ff->nodeid = outentry.nodeid;
695 	ff->open_flags = outopenp->open_flags;
696 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
697 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
698 	if (!inode) {
699 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
700 		fuse_sync_release(NULL, ff, flags);
701 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
702 		err = -ENOMEM;
703 		goto out_err;
704 	}
705 	kfree(forget);
706 	d_instantiate(entry, inode);
707 	fuse_change_entry_timeout(entry, &outentry);
708 	fuse_dir_changed(dir);
709 	err = generic_file_open(inode, file);
710 	if (!err) {
711 		file->private_data = ff;
712 		err = finish_open(file, entry, fuse_finish_open);
713 	}
714 	if (err) {
715 		fi = get_fuse_inode(inode);
716 		fuse_sync_release(fi, ff, flags);
717 	} else {
718 		if (fm->fc->atomic_o_trunc && trunc)
719 			truncate_pagecache(inode, 0);
720 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
721 			invalidate_inode_pages2(inode->i_mapping);
722 	}
723 	return err;
724 
725 out_free_ff:
726 	fuse_file_free(ff);
727 out_put_forget_req:
728 	kfree(forget);
729 out_err:
730 	return err;
731 }
732 
733 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
734 		      umode_t, dev_t);
735 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
736 			    struct file *file, unsigned flags,
737 			    umode_t mode)
738 {
739 	int err;
740 	struct mnt_idmap *idmap = file_mnt_idmap(file);
741 	struct fuse_conn *fc = get_fuse_conn(dir);
742 	struct dentry *res = NULL;
743 
744 	if (fuse_is_bad(dir))
745 		return -EIO;
746 
747 	if (d_in_lookup(entry)) {
748 		res = fuse_lookup(dir, entry, 0);
749 		if (IS_ERR(res))
750 			return PTR_ERR(res);
751 
752 		if (res)
753 			entry = res;
754 	}
755 
756 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
757 		goto no_open;
758 
759 	/* Only creates */
760 	file->f_mode |= FMODE_CREATED;
761 
762 	if (fc->no_create)
763 		goto mknod;
764 
765 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
766 	if (err == -ENOSYS) {
767 		fc->no_create = 1;
768 		goto mknod;
769 	} else if (err == -EEXIST)
770 		fuse_invalidate_entry(entry);
771 out_dput:
772 	dput(res);
773 	return err;
774 
775 mknod:
776 	err = fuse_mknod(idmap, dir, entry, mode, 0);
777 	if (err)
778 		goto out_dput;
779 no_open:
780 	return finish_no_open(file, res);
781 }
782 
783 /*
784  * Code shared between mknod, mkdir, symlink and link
785  */
786 static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
787 			    struct fuse_args *args, struct inode *dir,
788 			    struct dentry *entry, umode_t mode)
789 {
790 	struct fuse_entry_out outarg;
791 	struct inode *inode;
792 	struct dentry *d;
793 	int err;
794 	struct fuse_forget_link *forget;
795 
796 	if (fuse_is_bad(dir))
797 		return -EIO;
798 
799 	forget = fuse_alloc_forget();
800 	if (!forget)
801 		return -ENOMEM;
802 
803 	memset(&outarg, 0, sizeof(outarg));
804 	args->nodeid = get_node_id(dir);
805 	args->out_numargs = 1;
806 	args->out_args[0].size = sizeof(outarg);
807 	args->out_args[0].value = &outarg;
808 
809 	if (args->opcode != FUSE_LINK) {
810 		err = get_create_ext(idmap, args, dir, entry, mode);
811 		if (err)
812 			goto out_put_forget_req;
813 	}
814 
815 	err = fuse_simple_idmap_request(idmap, fm, args);
816 	free_ext_value(args);
817 	if (err)
818 		goto out_put_forget_req;
819 
820 	err = -EIO;
821 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
822 		goto out_put_forget_req;
823 
824 	if ((outarg.attr.mode ^ mode) & S_IFMT)
825 		goto out_put_forget_req;
826 
827 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
828 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
829 	if (!inode) {
830 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
831 		return -ENOMEM;
832 	}
833 	kfree(forget);
834 
835 	d_drop(entry);
836 	d = d_splice_alias(inode, entry);
837 	if (IS_ERR(d))
838 		return PTR_ERR(d);
839 
840 	if (d) {
841 		fuse_change_entry_timeout(d, &outarg);
842 		dput(d);
843 	} else {
844 		fuse_change_entry_timeout(entry, &outarg);
845 	}
846 	fuse_dir_changed(dir);
847 	return 0;
848 
849  out_put_forget_req:
850 	if (err == -EEXIST)
851 		fuse_invalidate_entry(entry);
852 	kfree(forget);
853 	return err;
854 }
855 
856 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
857 		      struct dentry *entry, umode_t mode, dev_t rdev)
858 {
859 	struct fuse_mknod_in inarg;
860 	struct fuse_mount *fm = get_fuse_mount(dir);
861 	FUSE_ARGS(args);
862 
863 	if (!fm->fc->dont_mask)
864 		mode &= ~current_umask();
865 
866 	memset(&inarg, 0, sizeof(inarg));
867 	inarg.mode = mode;
868 	inarg.rdev = new_encode_dev(rdev);
869 	inarg.umask = current_umask();
870 	args.opcode = FUSE_MKNOD;
871 	args.in_numargs = 2;
872 	args.in_args[0].size = sizeof(inarg);
873 	args.in_args[0].value = &inarg;
874 	args.in_args[1].size = entry->d_name.len + 1;
875 	args.in_args[1].value = entry->d_name.name;
876 	return create_new_entry(idmap, fm, &args, dir, entry, mode);
877 }
878 
879 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
880 		       struct dentry *entry, umode_t mode, bool excl)
881 {
882 	return fuse_mknod(idmap, dir, entry, mode, 0);
883 }
884 
885 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
886 			struct file *file, umode_t mode)
887 {
888 	struct fuse_conn *fc = get_fuse_conn(dir);
889 	int err;
890 
891 	if (fc->no_tmpfile)
892 		return -EOPNOTSUPP;
893 
894 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
895 			       file->f_flags, mode, FUSE_TMPFILE);
896 	if (err == -ENOSYS) {
897 		fc->no_tmpfile = 1;
898 		err = -EOPNOTSUPP;
899 	}
900 	return err;
901 }
902 
903 static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
904 		      struct dentry *entry, umode_t mode)
905 {
906 	struct fuse_mkdir_in inarg;
907 	struct fuse_mount *fm = get_fuse_mount(dir);
908 	FUSE_ARGS(args);
909 
910 	if (!fm->fc->dont_mask)
911 		mode &= ~current_umask();
912 
913 	memset(&inarg, 0, sizeof(inarg));
914 	inarg.mode = mode;
915 	inarg.umask = current_umask();
916 	args.opcode = FUSE_MKDIR;
917 	args.in_numargs = 2;
918 	args.in_args[0].size = sizeof(inarg);
919 	args.in_args[0].value = &inarg;
920 	args.in_args[1].size = entry->d_name.len + 1;
921 	args.in_args[1].value = entry->d_name.name;
922 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
923 }
924 
925 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
926 			struct dentry *entry, const char *link)
927 {
928 	struct fuse_mount *fm = get_fuse_mount(dir);
929 	unsigned len = strlen(link) + 1;
930 	FUSE_ARGS(args);
931 
932 	args.opcode = FUSE_SYMLINK;
933 	args.in_numargs = 3;
934 	fuse_set_zero_arg0(&args);
935 	args.in_args[1].size = entry->d_name.len + 1;
936 	args.in_args[1].value = entry->d_name.name;
937 	args.in_args[2].size = len;
938 	args.in_args[2].value = link;
939 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK);
940 }
941 
942 void fuse_flush_time_update(struct inode *inode)
943 {
944 	int err = sync_inode_metadata(inode, 1);
945 
946 	mapping_set_error(inode->i_mapping, err);
947 }
948 
949 static void fuse_update_ctime_in_cache(struct inode *inode)
950 {
951 	if (!IS_NOCMTIME(inode)) {
952 		inode_set_ctime_current(inode);
953 		mark_inode_dirty_sync(inode);
954 		fuse_flush_time_update(inode);
955 	}
956 }
957 
958 void fuse_update_ctime(struct inode *inode)
959 {
960 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
961 	fuse_update_ctime_in_cache(inode);
962 }
963 
964 static void fuse_entry_unlinked(struct dentry *entry)
965 {
966 	struct inode *inode = d_inode(entry);
967 	struct fuse_conn *fc = get_fuse_conn(inode);
968 	struct fuse_inode *fi = get_fuse_inode(inode);
969 
970 	spin_lock(&fi->lock);
971 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
972 	/*
973 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
974 	 * happen if userspace filesystem is careless.  It would be
975 	 * difficult to enforce correct nlink usage so just ignore this
976 	 * condition here
977 	 */
978 	if (S_ISDIR(inode->i_mode))
979 		clear_nlink(inode);
980 	else if (inode->i_nlink > 0)
981 		drop_nlink(inode);
982 	spin_unlock(&fi->lock);
983 	fuse_invalidate_entry_cache(entry);
984 	fuse_update_ctime(inode);
985 }
986 
987 static int fuse_unlink(struct inode *dir, struct dentry *entry)
988 {
989 	int err;
990 	struct fuse_mount *fm = get_fuse_mount(dir);
991 	FUSE_ARGS(args);
992 
993 	if (fuse_is_bad(dir))
994 		return -EIO;
995 
996 	args.opcode = FUSE_UNLINK;
997 	args.nodeid = get_node_id(dir);
998 	args.in_numargs = 2;
999 	fuse_set_zero_arg0(&args);
1000 	args.in_args[1].size = entry->d_name.len + 1;
1001 	args.in_args[1].value = entry->d_name.name;
1002 	err = fuse_simple_request(fm, &args);
1003 	if (!err) {
1004 		fuse_dir_changed(dir);
1005 		fuse_entry_unlinked(entry);
1006 	} else if (err == -EINTR || err == -ENOENT)
1007 		fuse_invalidate_entry(entry);
1008 	return err;
1009 }
1010 
1011 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1012 {
1013 	int err;
1014 	struct fuse_mount *fm = get_fuse_mount(dir);
1015 	FUSE_ARGS(args);
1016 
1017 	if (fuse_is_bad(dir))
1018 		return -EIO;
1019 
1020 	args.opcode = FUSE_RMDIR;
1021 	args.nodeid = get_node_id(dir);
1022 	args.in_numargs = 2;
1023 	fuse_set_zero_arg0(&args);
1024 	args.in_args[1].size = entry->d_name.len + 1;
1025 	args.in_args[1].value = entry->d_name.name;
1026 	err = fuse_simple_request(fm, &args);
1027 	if (!err) {
1028 		fuse_dir_changed(dir);
1029 		fuse_entry_unlinked(entry);
1030 	} else if (err == -EINTR || err == -ENOENT)
1031 		fuse_invalidate_entry(entry);
1032 	return err;
1033 }
1034 
1035 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1036 			      struct inode *newdir, struct dentry *newent,
1037 			      unsigned int flags, int opcode, size_t argsize)
1038 {
1039 	int err;
1040 	struct fuse_rename2_in inarg;
1041 	struct fuse_mount *fm = get_fuse_mount(olddir);
1042 	FUSE_ARGS(args);
1043 
1044 	memset(&inarg, 0, argsize);
1045 	inarg.newdir = get_node_id(newdir);
1046 	inarg.flags = flags;
1047 	args.opcode = opcode;
1048 	args.nodeid = get_node_id(olddir);
1049 	args.in_numargs = 3;
1050 	args.in_args[0].size = argsize;
1051 	args.in_args[0].value = &inarg;
1052 	args.in_args[1].size = oldent->d_name.len + 1;
1053 	args.in_args[1].value = oldent->d_name.name;
1054 	args.in_args[2].size = newent->d_name.len + 1;
1055 	args.in_args[2].value = newent->d_name.name;
1056 	err = fuse_simple_idmap_request(idmap, fm, &args);
1057 	if (!err) {
1058 		/* ctime changes */
1059 		fuse_update_ctime(d_inode(oldent));
1060 
1061 		if (flags & RENAME_EXCHANGE)
1062 			fuse_update_ctime(d_inode(newent));
1063 
1064 		fuse_dir_changed(olddir);
1065 		if (olddir != newdir)
1066 			fuse_dir_changed(newdir);
1067 
1068 		/* newent will end up negative */
1069 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1070 			fuse_entry_unlinked(newent);
1071 	} else if (err == -EINTR || err == -ENOENT) {
1072 		/* If request was interrupted, DEITY only knows if the
1073 		   rename actually took place.  If the invalidation
1074 		   fails (e.g. some process has CWD under the renamed
1075 		   directory), then there can be inconsistency between
1076 		   the dcache and the real filesystem.  Tough luck. */
1077 		fuse_invalidate_entry(oldent);
1078 		if (d_really_is_positive(newent))
1079 			fuse_invalidate_entry(newent);
1080 	}
1081 
1082 	return err;
1083 }
1084 
1085 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1086 			struct dentry *oldent, struct inode *newdir,
1087 			struct dentry *newent, unsigned int flags)
1088 {
1089 	struct fuse_conn *fc = get_fuse_conn(olddir);
1090 	int err;
1091 
1092 	if (fuse_is_bad(olddir))
1093 		return -EIO;
1094 
1095 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1096 		return -EINVAL;
1097 
1098 	if (flags) {
1099 		if (fc->no_rename2 || fc->minor < 23)
1100 			return -EINVAL;
1101 
1102 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1103 					 olddir, oldent, newdir, newent, flags,
1104 					 FUSE_RENAME2,
1105 					 sizeof(struct fuse_rename2_in));
1106 		if (err == -ENOSYS) {
1107 			fc->no_rename2 = 1;
1108 			err = -EINVAL;
1109 		}
1110 	} else {
1111 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1112 					 FUSE_RENAME,
1113 					 sizeof(struct fuse_rename_in));
1114 	}
1115 
1116 	return err;
1117 }
1118 
1119 static int fuse_link(struct dentry *entry, struct inode *newdir,
1120 		     struct dentry *newent)
1121 {
1122 	int err;
1123 	struct fuse_link_in inarg;
1124 	struct inode *inode = d_inode(entry);
1125 	struct fuse_mount *fm = get_fuse_mount(inode);
1126 	FUSE_ARGS(args);
1127 
1128 	memset(&inarg, 0, sizeof(inarg));
1129 	inarg.oldnodeid = get_node_id(inode);
1130 	args.opcode = FUSE_LINK;
1131 	args.in_numargs = 2;
1132 	args.in_args[0].size = sizeof(inarg);
1133 	args.in_args[0].value = &inarg;
1134 	args.in_args[1].size = newent->d_name.len + 1;
1135 	args.in_args[1].value = newent->d_name.name;
1136 	err = create_new_entry(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1137 	if (!err)
1138 		fuse_update_ctime_in_cache(inode);
1139 	else if (err == -EINTR)
1140 		fuse_invalidate_attr(inode);
1141 
1142 	return err;
1143 }
1144 
1145 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1146 			  struct fuse_attr *attr, struct kstat *stat)
1147 {
1148 	unsigned int blkbits;
1149 	struct fuse_conn *fc = get_fuse_conn(inode);
1150 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1151 				      make_kuid(fc->user_ns, attr->uid));
1152 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1153 				      make_kgid(fc->user_ns, attr->gid));
1154 
1155 	stat->dev = inode->i_sb->s_dev;
1156 	stat->ino = attr->ino;
1157 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1158 	stat->nlink = attr->nlink;
1159 	stat->uid = vfsuid_into_kuid(vfsuid);
1160 	stat->gid = vfsgid_into_kgid(vfsgid);
1161 	stat->rdev = inode->i_rdev;
1162 	stat->atime.tv_sec = attr->atime;
1163 	stat->atime.tv_nsec = attr->atimensec;
1164 	stat->mtime.tv_sec = attr->mtime;
1165 	stat->mtime.tv_nsec = attr->mtimensec;
1166 	stat->ctime.tv_sec = attr->ctime;
1167 	stat->ctime.tv_nsec = attr->ctimensec;
1168 	stat->size = attr->size;
1169 	stat->blocks = attr->blocks;
1170 
1171 	if (attr->blksize != 0)
1172 		blkbits = ilog2(attr->blksize);
1173 	else
1174 		blkbits = inode->i_sb->s_blocksize_bits;
1175 
1176 	stat->blksize = 1 << blkbits;
1177 }
1178 
1179 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1180 {
1181 	memset(attr, 0, sizeof(*attr));
1182 	attr->ino = sx->ino;
1183 	attr->size = sx->size;
1184 	attr->blocks = sx->blocks;
1185 	attr->atime = sx->atime.tv_sec;
1186 	attr->mtime = sx->mtime.tv_sec;
1187 	attr->ctime = sx->ctime.tv_sec;
1188 	attr->atimensec = sx->atime.tv_nsec;
1189 	attr->mtimensec = sx->mtime.tv_nsec;
1190 	attr->ctimensec = sx->ctime.tv_nsec;
1191 	attr->mode = sx->mode;
1192 	attr->nlink = sx->nlink;
1193 	attr->uid = sx->uid;
1194 	attr->gid = sx->gid;
1195 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1196 	attr->blksize = sx->blksize;
1197 }
1198 
1199 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1200 			 struct file *file, struct kstat *stat)
1201 {
1202 	int err;
1203 	struct fuse_attr attr;
1204 	struct fuse_statx *sx;
1205 	struct fuse_statx_in inarg;
1206 	struct fuse_statx_out outarg;
1207 	struct fuse_mount *fm = get_fuse_mount(inode);
1208 	u64 attr_version = fuse_get_attr_version(fm->fc);
1209 	FUSE_ARGS(args);
1210 
1211 	memset(&inarg, 0, sizeof(inarg));
1212 	memset(&outarg, 0, sizeof(outarg));
1213 	/* Directories have separate file-handle space */
1214 	if (file && S_ISREG(inode->i_mode)) {
1215 		struct fuse_file *ff = file->private_data;
1216 
1217 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1218 		inarg.fh = ff->fh;
1219 	}
1220 	/* For now leave sync hints as the default, request all stats. */
1221 	inarg.sx_flags = 0;
1222 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1223 	args.opcode = FUSE_STATX;
1224 	args.nodeid = get_node_id(inode);
1225 	args.in_numargs = 1;
1226 	args.in_args[0].size = sizeof(inarg);
1227 	args.in_args[0].value = &inarg;
1228 	args.out_numargs = 1;
1229 	args.out_args[0].size = sizeof(outarg);
1230 	args.out_args[0].value = &outarg;
1231 	err = fuse_simple_request(fm, &args);
1232 	if (err)
1233 		return err;
1234 
1235 	sx = &outarg.stat;
1236 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1237 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1238 					 inode_wrong_type(inode, sx->mode)))) {
1239 		fuse_make_bad(inode);
1240 		return -EIO;
1241 	}
1242 
1243 	fuse_statx_to_attr(&outarg.stat, &attr);
1244 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1245 		fuse_change_attributes(inode, &attr, &outarg.stat,
1246 				       ATTR_TIMEOUT(&outarg), attr_version);
1247 	}
1248 
1249 	if (stat) {
1250 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1251 		stat->btime.tv_sec = sx->btime.tv_sec;
1252 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1253 		fuse_fillattr(idmap, inode, &attr, stat);
1254 		stat->result_mask |= STATX_TYPE;
1255 	}
1256 
1257 	return 0;
1258 }
1259 
1260 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1261 			   struct kstat *stat, struct file *file)
1262 {
1263 	int err;
1264 	struct fuse_getattr_in inarg;
1265 	struct fuse_attr_out outarg;
1266 	struct fuse_mount *fm = get_fuse_mount(inode);
1267 	FUSE_ARGS(args);
1268 	u64 attr_version;
1269 
1270 	attr_version = fuse_get_attr_version(fm->fc);
1271 
1272 	memset(&inarg, 0, sizeof(inarg));
1273 	memset(&outarg, 0, sizeof(outarg));
1274 	/* Directories have separate file-handle space */
1275 	if (file && S_ISREG(inode->i_mode)) {
1276 		struct fuse_file *ff = file->private_data;
1277 
1278 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1279 		inarg.fh = ff->fh;
1280 	}
1281 	args.opcode = FUSE_GETATTR;
1282 	args.nodeid = get_node_id(inode);
1283 	args.in_numargs = 1;
1284 	args.in_args[0].size = sizeof(inarg);
1285 	args.in_args[0].value = &inarg;
1286 	args.out_numargs = 1;
1287 	args.out_args[0].size = sizeof(outarg);
1288 	args.out_args[0].value = &outarg;
1289 	err = fuse_simple_request(fm, &args);
1290 	if (!err) {
1291 		if (fuse_invalid_attr(&outarg.attr) ||
1292 		    inode_wrong_type(inode, outarg.attr.mode)) {
1293 			fuse_make_bad(inode);
1294 			err = -EIO;
1295 		} else {
1296 			fuse_change_attributes(inode, &outarg.attr, NULL,
1297 					       ATTR_TIMEOUT(&outarg),
1298 					       attr_version);
1299 			if (stat)
1300 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1301 		}
1302 	}
1303 	return err;
1304 }
1305 
1306 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1307 				struct file *file, struct kstat *stat,
1308 				u32 request_mask, unsigned int flags)
1309 {
1310 	struct fuse_inode *fi = get_fuse_inode(inode);
1311 	struct fuse_conn *fc = get_fuse_conn(inode);
1312 	int err = 0;
1313 	bool sync;
1314 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1315 	u32 cache_mask = fuse_get_cache_mask(inode);
1316 
1317 
1318 	/* FUSE only supports basic stats and possibly btime */
1319 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1320 retry:
1321 	if (fc->no_statx)
1322 		request_mask &= STATX_BASIC_STATS;
1323 
1324 	if (!request_mask)
1325 		sync = false;
1326 	else if (flags & AT_STATX_FORCE_SYNC)
1327 		sync = true;
1328 	else if (flags & AT_STATX_DONT_SYNC)
1329 		sync = false;
1330 	else if (request_mask & inval_mask & ~cache_mask)
1331 		sync = true;
1332 	else
1333 		sync = time_before64(fi->i_time, get_jiffies_64());
1334 
1335 	if (sync) {
1336 		forget_all_cached_acls(inode);
1337 		/* Try statx if BTIME is requested */
1338 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1339 			err = fuse_do_statx(idmap, inode, file, stat);
1340 			if (err == -ENOSYS) {
1341 				fc->no_statx = 1;
1342 				err = 0;
1343 				goto retry;
1344 			}
1345 		} else {
1346 			err = fuse_do_getattr(idmap, inode, stat, file);
1347 		}
1348 	} else if (stat) {
1349 		generic_fillattr(idmap, request_mask, inode, stat);
1350 		stat->mode = fi->orig_i_mode;
1351 		stat->ino = fi->orig_ino;
1352 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1353 			stat->btime = fi->i_btime;
1354 			stat->result_mask |= STATX_BTIME;
1355 		}
1356 	}
1357 
1358 	return err;
1359 }
1360 
1361 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1362 {
1363 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1364 }
1365 
1366 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1367 			     u64 child_nodeid, struct qstr *name, u32 flags)
1368 {
1369 	int err = -ENOTDIR;
1370 	struct inode *parent;
1371 	struct dentry *dir;
1372 	struct dentry *entry;
1373 
1374 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1375 	if (!parent)
1376 		return -ENOENT;
1377 
1378 	inode_lock_nested(parent, I_MUTEX_PARENT);
1379 	if (!S_ISDIR(parent->i_mode))
1380 		goto unlock;
1381 
1382 	err = -ENOENT;
1383 	dir = d_find_alias(parent);
1384 	if (!dir)
1385 		goto unlock;
1386 
1387 	name->hash = full_name_hash(dir, name->name, name->len);
1388 	entry = d_lookup(dir, name);
1389 	dput(dir);
1390 	if (!entry)
1391 		goto unlock;
1392 
1393 	fuse_dir_changed(parent);
1394 	if (!(flags & FUSE_EXPIRE_ONLY))
1395 		d_invalidate(entry);
1396 	fuse_invalidate_entry_cache(entry);
1397 
1398 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1399 		inode_lock(d_inode(entry));
1400 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1401 			err = -ENOENT;
1402 			goto badentry;
1403 		}
1404 		if (d_mountpoint(entry)) {
1405 			err = -EBUSY;
1406 			goto badentry;
1407 		}
1408 		if (d_is_dir(entry)) {
1409 			shrink_dcache_parent(entry);
1410 			if (!simple_empty(entry)) {
1411 				err = -ENOTEMPTY;
1412 				goto badentry;
1413 			}
1414 			d_inode(entry)->i_flags |= S_DEAD;
1415 		}
1416 		dont_mount(entry);
1417 		clear_nlink(d_inode(entry));
1418 		err = 0;
1419  badentry:
1420 		inode_unlock(d_inode(entry));
1421 		if (!err)
1422 			d_delete(entry);
1423 	} else {
1424 		err = 0;
1425 	}
1426 	dput(entry);
1427 
1428  unlock:
1429 	inode_unlock(parent);
1430 	iput(parent);
1431 	return err;
1432 }
1433 
1434 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1435 {
1436 	const struct cred *cred = current_cred();
1437 
1438 	return (uid_eq(cred->euid, fc->user_id) &&
1439 		uid_eq(cred->suid, fc->user_id) &&
1440 		uid_eq(cred->uid,  fc->user_id) &&
1441 		gid_eq(cred->egid, fc->group_id) &&
1442 		gid_eq(cred->sgid, fc->group_id) &&
1443 		gid_eq(cred->gid,  fc->group_id));
1444 }
1445 
1446 /*
1447  * Calling into a user-controlled filesystem gives the filesystem
1448  * daemon ptrace-like capabilities over the current process.  This
1449  * means, that the filesystem daemon is able to record the exact
1450  * filesystem operations performed, and can also control the behavior
1451  * of the requester process in otherwise impossible ways.  For example
1452  * it can delay the operation for arbitrary length of time allowing
1453  * DoS against the requester.
1454  *
1455  * For this reason only those processes can call into the filesystem,
1456  * for which the owner of the mount has ptrace privilege.  This
1457  * excludes processes started by other users, suid or sgid processes.
1458  */
1459 bool fuse_allow_current_process(struct fuse_conn *fc)
1460 {
1461 	bool allow;
1462 
1463 	if (fc->allow_other)
1464 		allow = current_in_userns(fc->user_ns);
1465 	else
1466 		allow = fuse_permissible_uidgid(fc);
1467 
1468 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1469 		allow = true;
1470 
1471 	return allow;
1472 }
1473 
1474 static int fuse_access(struct inode *inode, int mask)
1475 {
1476 	struct fuse_mount *fm = get_fuse_mount(inode);
1477 	FUSE_ARGS(args);
1478 	struct fuse_access_in inarg;
1479 	int err;
1480 
1481 	BUG_ON(mask & MAY_NOT_BLOCK);
1482 
1483 	/*
1484 	 * We should not send FUSE_ACCESS to the userspace
1485 	 * when idmapped mounts are enabled as for this case
1486 	 * we have fc->default_permissions = 1 and access
1487 	 * permission checks are done on the kernel side.
1488 	 */
1489 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1490 
1491 	if (fm->fc->no_access)
1492 		return 0;
1493 
1494 	memset(&inarg, 0, sizeof(inarg));
1495 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1496 	args.opcode = FUSE_ACCESS;
1497 	args.nodeid = get_node_id(inode);
1498 	args.in_numargs = 1;
1499 	args.in_args[0].size = sizeof(inarg);
1500 	args.in_args[0].value = &inarg;
1501 	err = fuse_simple_request(fm, &args);
1502 	if (err == -ENOSYS) {
1503 		fm->fc->no_access = 1;
1504 		err = 0;
1505 	}
1506 	return err;
1507 }
1508 
1509 static int fuse_perm_getattr(struct inode *inode, int mask)
1510 {
1511 	if (mask & MAY_NOT_BLOCK)
1512 		return -ECHILD;
1513 
1514 	forget_all_cached_acls(inode);
1515 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1516 }
1517 
1518 /*
1519  * Check permission.  The two basic access models of FUSE are:
1520  *
1521  * 1) Local access checking ('default_permissions' mount option) based
1522  * on file mode.  This is the plain old disk filesystem permission
1523  * model.
1524  *
1525  * 2) "Remote" access checking, where server is responsible for
1526  * checking permission in each inode operation.  An exception to this
1527  * is if ->permission() was invoked from sys_access() in which case an
1528  * access request is sent.  Execute permission is still checked
1529  * locally based on file mode.
1530  */
1531 static int fuse_permission(struct mnt_idmap *idmap,
1532 			   struct inode *inode, int mask)
1533 {
1534 	struct fuse_conn *fc = get_fuse_conn(inode);
1535 	bool refreshed = false;
1536 	int err = 0;
1537 
1538 	if (fuse_is_bad(inode))
1539 		return -EIO;
1540 
1541 	if (!fuse_allow_current_process(fc))
1542 		return -EACCES;
1543 
1544 	/*
1545 	 * If attributes are needed, refresh them before proceeding
1546 	 */
1547 	if (fc->default_permissions ||
1548 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1549 		struct fuse_inode *fi = get_fuse_inode(inode);
1550 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1551 
1552 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1553 		    time_before64(fi->i_time, get_jiffies_64())) {
1554 			refreshed = true;
1555 
1556 			err = fuse_perm_getattr(inode, mask);
1557 			if (err)
1558 				return err;
1559 		}
1560 	}
1561 
1562 	if (fc->default_permissions) {
1563 		err = generic_permission(idmap, inode, mask);
1564 
1565 		/* If permission is denied, try to refresh file
1566 		   attributes.  This is also needed, because the root
1567 		   node will at first have no permissions */
1568 		if (err == -EACCES && !refreshed) {
1569 			err = fuse_perm_getattr(inode, mask);
1570 			if (!err)
1571 				err = generic_permission(idmap,
1572 							 inode, mask);
1573 		}
1574 
1575 		/* Note: the opposite of the above test does not
1576 		   exist.  So if permissions are revoked this won't be
1577 		   noticed immediately, only after the attribute
1578 		   timeout has expired */
1579 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1580 		err = fuse_access(inode, mask);
1581 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1582 		if (!(inode->i_mode & S_IXUGO)) {
1583 			if (refreshed)
1584 				return -EACCES;
1585 
1586 			err = fuse_perm_getattr(inode, mask);
1587 			if (!err && !(inode->i_mode & S_IXUGO))
1588 				return -EACCES;
1589 		}
1590 	}
1591 	return err;
1592 }
1593 
1594 static int fuse_readlink_page(struct inode *inode, struct folio *folio)
1595 {
1596 	struct fuse_mount *fm = get_fuse_mount(inode);
1597 	struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
1598 	struct fuse_args_pages ap = {
1599 		.num_folios = 1,
1600 		.folios = &folio,
1601 		.descs = &desc,
1602 	};
1603 	char *link;
1604 	ssize_t res;
1605 
1606 	ap.args.opcode = FUSE_READLINK;
1607 	ap.args.nodeid = get_node_id(inode);
1608 	ap.args.out_pages = true;
1609 	ap.args.out_argvar = true;
1610 	ap.args.page_zeroing = true;
1611 	ap.args.out_numargs = 1;
1612 	ap.args.out_args[0].size = desc.length;
1613 	res = fuse_simple_request(fm, &ap.args);
1614 
1615 	fuse_invalidate_atime(inode);
1616 
1617 	if (res < 0)
1618 		return res;
1619 
1620 	if (WARN_ON(res >= PAGE_SIZE))
1621 		return -EIO;
1622 
1623 	link = folio_address(folio);
1624 	link[res] = '\0';
1625 
1626 	return 0;
1627 }
1628 
1629 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1630 				 struct delayed_call *callback)
1631 {
1632 	struct fuse_conn *fc = get_fuse_conn(inode);
1633 	struct folio *folio;
1634 	int err;
1635 
1636 	err = -EIO;
1637 	if (fuse_is_bad(inode))
1638 		goto out_err;
1639 
1640 	if (fc->cache_symlinks)
1641 		return page_get_link(dentry, inode, callback);
1642 
1643 	err = -ECHILD;
1644 	if (!dentry)
1645 		goto out_err;
1646 
1647 	folio = folio_alloc(GFP_KERNEL, 0);
1648 	err = -ENOMEM;
1649 	if (!folio)
1650 		goto out_err;
1651 
1652 	err = fuse_readlink_page(inode, folio);
1653 	if (err) {
1654 		folio_put(folio);
1655 		goto out_err;
1656 	}
1657 
1658 	set_delayed_call(callback, page_put_link, &folio->page);
1659 
1660 	return folio_address(folio);
1661 
1662 out_err:
1663 	return ERR_PTR(err);
1664 }
1665 
1666 static int fuse_dir_open(struct inode *inode, struct file *file)
1667 {
1668 	struct fuse_mount *fm = get_fuse_mount(inode);
1669 	int err;
1670 
1671 	if (fuse_is_bad(inode))
1672 		return -EIO;
1673 
1674 	err = generic_file_open(inode, file);
1675 	if (err)
1676 		return err;
1677 
1678 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1679 	if (!err) {
1680 		struct fuse_file *ff = file->private_data;
1681 
1682 		/*
1683 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1684 		 * directories for backward compatibility, though it's unlikely
1685 		 * to be useful.
1686 		 */
1687 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1688 			nonseekable_open(inode, file);
1689 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1690 			invalidate_inode_pages2(inode->i_mapping);
1691 	}
1692 
1693 	return err;
1694 }
1695 
1696 static int fuse_dir_release(struct inode *inode, struct file *file)
1697 {
1698 	fuse_release_common(file, true);
1699 
1700 	return 0;
1701 }
1702 
1703 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1704 			  int datasync)
1705 {
1706 	struct inode *inode = file->f_mapping->host;
1707 	struct fuse_conn *fc = get_fuse_conn(inode);
1708 	int err;
1709 
1710 	if (fuse_is_bad(inode))
1711 		return -EIO;
1712 
1713 	if (fc->no_fsyncdir)
1714 		return 0;
1715 
1716 	inode_lock(inode);
1717 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1718 	if (err == -ENOSYS) {
1719 		fc->no_fsyncdir = 1;
1720 		err = 0;
1721 	}
1722 	inode_unlock(inode);
1723 
1724 	return err;
1725 }
1726 
1727 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1728 			    unsigned long arg)
1729 {
1730 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1731 
1732 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1733 	if (fc->minor < 18)
1734 		return -ENOTTY;
1735 
1736 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1737 }
1738 
1739 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1740 				   unsigned long arg)
1741 {
1742 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1743 
1744 	if (fc->minor < 18)
1745 		return -ENOTTY;
1746 
1747 	return fuse_ioctl_common(file, cmd, arg,
1748 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1749 }
1750 
1751 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1752 {
1753 	/* Always update if mtime is explicitly set  */
1754 	if (ivalid & ATTR_MTIME_SET)
1755 		return true;
1756 
1757 	/* Or if kernel i_mtime is the official one */
1758 	if (trust_local_mtime)
1759 		return true;
1760 
1761 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1762 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1763 		return false;
1764 
1765 	/* In all other cases update */
1766 	return true;
1767 }
1768 
1769 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1770 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1771 			   bool trust_local_cmtime)
1772 {
1773 	unsigned ivalid = iattr->ia_valid;
1774 
1775 	if (ivalid & ATTR_MODE)
1776 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1777 
1778 	if (ivalid & ATTR_UID) {
1779 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1780 
1781 		arg->valid |= FATTR_UID;
1782 		arg->uid = from_kuid(fc->user_ns, fsuid);
1783 	}
1784 
1785 	if (ivalid & ATTR_GID) {
1786 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1787 
1788 		arg->valid |= FATTR_GID;
1789 		arg->gid = from_kgid(fc->user_ns, fsgid);
1790 	}
1791 
1792 	if (ivalid & ATTR_SIZE)
1793 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1794 	if (ivalid & ATTR_ATIME) {
1795 		arg->valid |= FATTR_ATIME;
1796 		arg->atime = iattr->ia_atime.tv_sec;
1797 		arg->atimensec = iattr->ia_atime.tv_nsec;
1798 		if (!(ivalid & ATTR_ATIME_SET))
1799 			arg->valid |= FATTR_ATIME_NOW;
1800 	}
1801 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1802 		arg->valid |= FATTR_MTIME;
1803 		arg->mtime = iattr->ia_mtime.tv_sec;
1804 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1805 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1806 			arg->valid |= FATTR_MTIME_NOW;
1807 	}
1808 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1809 		arg->valid |= FATTR_CTIME;
1810 		arg->ctime = iattr->ia_ctime.tv_sec;
1811 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1812 	}
1813 }
1814 
1815 /*
1816  * Prevent concurrent writepages on inode
1817  *
1818  * This is done by adding a negative bias to the inode write counter
1819  * and waiting for all pending writes to finish.
1820  */
1821 void fuse_set_nowrite(struct inode *inode)
1822 {
1823 	struct fuse_inode *fi = get_fuse_inode(inode);
1824 
1825 	BUG_ON(!inode_is_locked(inode));
1826 
1827 	spin_lock(&fi->lock);
1828 	BUG_ON(fi->writectr < 0);
1829 	fi->writectr += FUSE_NOWRITE;
1830 	spin_unlock(&fi->lock);
1831 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1832 }
1833 
1834 /*
1835  * Allow writepages on inode
1836  *
1837  * Remove the bias from the writecounter and send any queued
1838  * writepages.
1839  */
1840 static void __fuse_release_nowrite(struct inode *inode)
1841 {
1842 	struct fuse_inode *fi = get_fuse_inode(inode);
1843 
1844 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1845 	fi->writectr = 0;
1846 	fuse_flush_writepages(inode);
1847 }
1848 
1849 void fuse_release_nowrite(struct inode *inode)
1850 {
1851 	struct fuse_inode *fi = get_fuse_inode(inode);
1852 
1853 	spin_lock(&fi->lock);
1854 	__fuse_release_nowrite(inode);
1855 	spin_unlock(&fi->lock);
1856 }
1857 
1858 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1859 			      struct inode *inode,
1860 			      struct fuse_setattr_in *inarg_p,
1861 			      struct fuse_attr_out *outarg_p)
1862 {
1863 	args->opcode = FUSE_SETATTR;
1864 	args->nodeid = get_node_id(inode);
1865 	args->in_numargs = 1;
1866 	args->in_args[0].size = sizeof(*inarg_p);
1867 	args->in_args[0].value = inarg_p;
1868 	args->out_numargs = 1;
1869 	args->out_args[0].size = sizeof(*outarg_p);
1870 	args->out_args[0].value = outarg_p;
1871 }
1872 
1873 /*
1874  * Flush inode->i_mtime to the server
1875  */
1876 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1877 {
1878 	struct fuse_mount *fm = get_fuse_mount(inode);
1879 	FUSE_ARGS(args);
1880 	struct fuse_setattr_in inarg;
1881 	struct fuse_attr_out outarg;
1882 
1883 	memset(&inarg, 0, sizeof(inarg));
1884 	memset(&outarg, 0, sizeof(outarg));
1885 
1886 	inarg.valid = FATTR_MTIME;
1887 	inarg.mtime = inode_get_mtime_sec(inode);
1888 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1889 	if (fm->fc->minor >= 23) {
1890 		inarg.valid |= FATTR_CTIME;
1891 		inarg.ctime = inode_get_ctime_sec(inode);
1892 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1893 	}
1894 	if (ff) {
1895 		inarg.valid |= FATTR_FH;
1896 		inarg.fh = ff->fh;
1897 	}
1898 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1899 
1900 	return fuse_simple_request(fm, &args);
1901 }
1902 
1903 /*
1904  * Set attributes, and at the same time refresh them.
1905  *
1906  * Truncation is slightly complicated, because the 'truncate' request
1907  * may fail, in which case we don't want to touch the mapping.
1908  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1909  * and the actual truncation by hand.
1910  */
1911 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1912 		    struct iattr *attr, struct file *file)
1913 {
1914 	struct inode *inode = d_inode(dentry);
1915 	struct fuse_mount *fm = get_fuse_mount(inode);
1916 	struct fuse_conn *fc = fm->fc;
1917 	struct fuse_inode *fi = get_fuse_inode(inode);
1918 	struct address_space *mapping = inode->i_mapping;
1919 	FUSE_ARGS(args);
1920 	struct fuse_setattr_in inarg;
1921 	struct fuse_attr_out outarg;
1922 	bool is_truncate = false;
1923 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1924 	loff_t oldsize;
1925 	int err;
1926 	bool trust_local_cmtime = is_wb;
1927 	bool fault_blocked = false;
1928 
1929 	if (!fc->default_permissions)
1930 		attr->ia_valid |= ATTR_FORCE;
1931 
1932 	err = setattr_prepare(idmap, dentry, attr);
1933 	if (err)
1934 		return err;
1935 
1936 	if (attr->ia_valid & ATTR_SIZE) {
1937 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1938 			return -EIO;
1939 		is_truncate = true;
1940 	}
1941 
1942 	if (FUSE_IS_DAX(inode) && is_truncate) {
1943 		filemap_invalidate_lock(mapping);
1944 		fault_blocked = true;
1945 		err = fuse_dax_break_layouts(inode, 0, 0);
1946 		if (err) {
1947 			filemap_invalidate_unlock(mapping);
1948 			return err;
1949 		}
1950 	}
1951 
1952 	if (attr->ia_valid & ATTR_OPEN) {
1953 		/* This is coming from open(..., ... | O_TRUNC); */
1954 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1955 		WARN_ON(attr->ia_size != 0);
1956 		if (fc->atomic_o_trunc) {
1957 			/*
1958 			 * No need to send request to userspace, since actual
1959 			 * truncation has already been done by OPEN.  But still
1960 			 * need to truncate page cache.
1961 			 */
1962 			i_size_write(inode, 0);
1963 			truncate_pagecache(inode, 0);
1964 			goto out;
1965 		}
1966 		file = NULL;
1967 	}
1968 
1969 	/* Flush dirty data/metadata before non-truncate SETATTR */
1970 	if (is_wb &&
1971 	    attr->ia_valid &
1972 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1973 			 ATTR_TIMES_SET)) {
1974 		err = write_inode_now(inode, true);
1975 		if (err)
1976 			return err;
1977 
1978 		fuse_set_nowrite(inode);
1979 		fuse_release_nowrite(inode);
1980 	}
1981 
1982 	if (is_truncate) {
1983 		fuse_set_nowrite(inode);
1984 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1985 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1986 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1987 	}
1988 
1989 	memset(&inarg, 0, sizeof(inarg));
1990 	memset(&outarg, 0, sizeof(outarg));
1991 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
1992 	if (file) {
1993 		struct fuse_file *ff = file->private_data;
1994 		inarg.valid |= FATTR_FH;
1995 		inarg.fh = ff->fh;
1996 	}
1997 
1998 	/* Kill suid/sgid for non-directory chown unconditionally */
1999 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2000 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2001 		inarg.valid |= FATTR_KILL_SUIDGID;
2002 
2003 	if (attr->ia_valid & ATTR_SIZE) {
2004 		/* For mandatory locking in truncate */
2005 		inarg.valid |= FATTR_LOCKOWNER;
2006 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2007 
2008 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2009 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2010 			inarg.valid |= FATTR_KILL_SUIDGID;
2011 	}
2012 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2013 	err = fuse_simple_request(fm, &args);
2014 	if (err) {
2015 		if (err == -EINTR)
2016 			fuse_invalidate_attr(inode);
2017 		goto error;
2018 	}
2019 
2020 	if (fuse_invalid_attr(&outarg.attr) ||
2021 	    inode_wrong_type(inode, outarg.attr.mode)) {
2022 		fuse_make_bad(inode);
2023 		err = -EIO;
2024 		goto error;
2025 	}
2026 
2027 	spin_lock(&fi->lock);
2028 	/* the kernel maintains i_mtime locally */
2029 	if (trust_local_cmtime) {
2030 		if (attr->ia_valid & ATTR_MTIME)
2031 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2032 		if (attr->ia_valid & ATTR_CTIME)
2033 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2034 		/* FIXME: clear I_DIRTY_SYNC? */
2035 	}
2036 
2037 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2038 				      ATTR_TIMEOUT(&outarg),
2039 				      fuse_get_cache_mask(inode), 0);
2040 	oldsize = inode->i_size;
2041 	/* see the comment in fuse_change_attributes() */
2042 	if (!is_wb || is_truncate)
2043 		i_size_write(inode, outarg.attr.size);
2044 
2045 	if (is_truncate) {
2046 		/* NOTE: this may release/reacquire fi->lock */
2047 		__fuse_release_nowrite(inode);
2048 	}
2049 	spin_unlock(&fi->lock);
2050 
2051 	/*
2052 	 * Only call invalidate_inode_pages2() after removing
2053 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2054 	 */
2055 	if ((is_truncate || !is_wb) &&
2056 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2057 		truncate_pagecache(inode, outarg.attr.size);
2058 		invalidate_inode_pages2(mapping);
2059 	}
2060 
2061 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2062 out:
2063 	if (fault_blocked)
2064 		filemap_invalidate_unlock(mapping);
2065 
2066 	return 0;
2067 
2068 error:
2069 	if (is_truncate)
2070 		fuse_release_nowrite(inode);
2071 
2072 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2073 
2074 	if (fault_blocked)
2075 		filemap_invalidate_unlock(mapping);
2076 	return err;
2077 }
2078 
2079 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2080 			struct iattr *attr)
2081 {
2082 	struct inode *inode = d_inode(entry);
2083 	struct fuse_conn *fc = get_fuse_conn(inode);
2084 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2085 	int ret;
2086 
2087 	if (fuse_is_bad(inode))
2088 		return -EIO;
2089 
2090 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2091 		return -EACCES;
2092 
2093 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2094 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2095 				    ATTR_MODE);
2096 
2097 		/*
2098 		 * The only sane way to reliably kill suid/sgid is to do it in
2099 		 * the userspace filesystem
2100 		 *
2101 		 * This should be done on write(), truncate() and chown().
2102 		 */
2103 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2104 			/*
2105 			 * ia_mode calculation may have used stale i_mode.
2106 			 * Refresh and recalculate.
2107 			 */
2108 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2109 			if (ret)
2110 				return ret;
2111 
2112 			attr->ia_mode = inode->i_mode;
2113 			if (inode->i_mode & S_ISUID) {
2114 				attr->ia_valid |= ATTR_MODE;
2115 				attr->ia_mode &= ~S_ISUID;
2116 			}
2117 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2118 				attr->ia_valid |= ATTR_MODE;
2119 				attr->ia_mode &= ~S_ISGID;
2120 			}
2121 		}
2122 	}
2123 	if (!attr->ia_valid)
2124 		return 0;
2125 
2126 	ret = fuse_do_setattr(idmap, entry, attr, file);
2127 	if (!ret) {
2128 		/*
2129 		 * If filesystem supports acls it may have updated acl xattrs in
2130 		 * the filesystem, so forget cached acls for the inode.
2131 		 */
2132 		if (fc->posix_acl)
2133 			forget_all_cached_acls(inode);
2134 
2135 		/* Directory mode changed, may need to revalidate access */
2136 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2137 			fuse_invalidate_entry_cache(entry);
2138 	}
2139 	return ret;
2140 }
2141 
2142 static int fuse_getattr(struct mnt_idmap *idmap,
2143 			const struct path *path, struct kstat *stat,
2144 			u32 request_mask, unsigned int flags)
2145 {
2146 	struct inode *inode = d_inode(path->dentry);
2147 	struct fuse_conn *fc = get_fuse_conn(inode);
2148 
2149 	if (fuse_is_bad(inode))
2150 		return -EIO;
2151 
2152 	if (!fuse_allow_current_process(fc)) {
2153 		if (!request_mask) {
2154 			/*
2155 			 * If user explicitly requested *nothing* then don't
2156 			 * error out, but return st_dev only.
2157 			 */
2158 			stat->result_mask = 0;
2159 			stat->dev = inode->i_sb->s_dev;
2160 			return 0;
2161 		}
2162 		return -EACCES;
2163 	}
2164 
2165 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2166 }
2167 
2168 static const struct inode_operations fuse_dir_inode_operations = {
2169 	.lookup		= fuse_lookup,
2170 	.mkdir		= fuse_mkdir,
2171 	.symlink	= fuse_symlink,
2172 	.unlink		= fuse_unlink,
2173 	.rmdir		= fuse_rmdir,
2174 	.rename		= fuse_rename2,
2175 	.link		= fuse_link,
2176 	.setattr	= fuse_setattr,
2177 	.create		= fuse_create,
2178 	.atomic_open	= fuse_atomic_open,
2179 	.tmpfile	= fuse_tmpfile,
2180 	.mknod		= fuse_mknod,
2181 	.permission	= fuse_permission,
2182 	.getattr	= fuse_getattr,
2183 	.listxattr	= fuse_listxattr,
2184 	.get_inode_acl	= fuse_get_inode_acl,
2185 	.get_acl	= fuse_get_acl,
2186 	.set_acl	= fuse_set_acl,
2187 	.fileattr_get	= fuse_fileattr_get,
2188 	.fileattr_set	= fuse_fileattr_set,
2189 };
2190 
2191 static const struct file_operations fuse_dir_operations = {
2192 	.llseek		= generic_file_llseek,
2193 	.read		= generic_read_dir,
2194 	.iterate_shared	= fuse_readdir,
2195 	.open		= fuse_dir_open,
2196 	.release	= fuse_dir_release,
2197 	.fsync		= fuse_dir_fsync,
2198 	.unlocked_ioctl	= fuse_dir_ioctl,
2199 	.compat_ioctl	= fuse_dir_compat_ioctl,
2200 };
2201 
2202 static const struct inode_operations fuse_common_inode_operations = {
2203 	.setattr	= fuse_setattr,
2204 	.permission	= fuse_permission,
2205 	.getattr	= fuse_getattr,
2206 	.listxattr	= fuse_listxattr,
2207 	.get_inode_acl	= fuse_get_inode_acl,
2208 	.get_acl	= fuse_get_acl,
2209 	.set_acl	= fuse_set_acl,
2210 	.fileattr_get	= fuse_fileattr_get,
2211 	.fileattr_set	= fuse_fileattr_set,
2212 };
2213 
2214 static const struct inode_operations fuse_symlink_inode_operations = {
2215 	.setattr	= fuse_setattr,
2216 	.get_link	= fuse_get_link,
2217 	.getattr	= fuse_getattr,
2218 	.listxattr	= fuse_listxattr,
2219 };
2220 
2221 void fuse_init_common(struct inode *inode)
2222 {
2223 	inode->i_op = &fuse_common_inode_operations;
2224 }
2225 
2226 void fuse_init_dir(struct inode *inode)
2227 {
2228 	struct fuse_inode *fi = get_fuse_inode(inode);
2229 
2230 	inode->i_op = &fuse_dir_inode_operations;
2231 	inode->i_fop = &fuse_dir_operations;
2232 
2233 	spin_lock_init(&fi->rdc.lock);
2234 	fi->rdc.cached = false;
2235 	fi->rdc.size = 0;
2236 	fi->rdc.pos = 0;
2237 	fi->rdc.version = 0;
2238 }
2239 
2240 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2241 {
2242 	int err = fuse_readlink_page(folio->mapping->host, folio);
2243 
2244 	if (!err)
2245 		folio_mark_uptodate(folio);
2246 
2247 	folio_unlock(folio);
2248 
2249 	return err;
2250 }
2251 
2252 static const struct address_space_operations fuse_symlink_aops = {
2253 	.read_folio	= fuse_symlink_read_folio,
2254 };
2255 
2256 void fuse_init_symlink(struct inode *inode)
2257 {
2258 	inode->i_op = &fuse_symlink_inode_operations;
2259 	inode->i_data.a_ops = &fuse_symlink_aops;
2260 	inode_nohighmem(inode);
2261 }
2262