xref: /linux/fs/fuse/dir.c (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 1;
179 	args->in_args[0].size = name->len + 1;
180 	args->in_args[0].value = name->name;
181 	args->out_numargs = 1;
182 	args->out_args[0].size = sizeof(struct fuse_entry_out);
183 	args->out_args[0].value = outarg;
184 }
185 
186 /*
187  * Check whether the dentry is still valid
188  *
189  * If the entry validity timeout has expired and the dentry is
190  * positive, try to redo the lookup.  If the lookup results in a
191  * different inode, then let the VFS invalidate the dentry and redo
192  * the lookup once more.  If the lookup results in the same inode,
193  * then refresh the attributes, timeouts and mark the dentry valid.
194  */
195 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
196 {
197 	struct inode *inode;
198 	struct dentry *parent;
199 	struct fuse_mount *fm;
200 	struct fuse_inode *fi;
201 	int ret;
202 
203 	inode = d_inode_rcu(entry);
204 	if (inode && fuse_is_bad(inode))
205 		goto invalid;
206 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
208 		struct fuse_entry_out outarg;
209 		FUSE_ARGS(args);
210 		struct fuse_forget_link *forget;
211 		u64 attr_version;
212 
213 		/* For negative dentries, always do a fresh lookup */
214 		if (!inode)
215 			goto invalid;
216 
217 		ret = -ECHILD;
218 		if (flags & LOOKUP_RCU)
219 			goto out;
220 
221 		fm = get_fuse_mount(inode);
222 
223 		forget = fuse_alloc_forget();
224 		ret = -ENOMEM;
225 		if (!forget)
226 			goto out;
227 
228 		attr_version = fuse_get_attr_version(fm->fc);
229 
230 		parent = dget_parent(entry);
231 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
232 				 &entry->d_name, &outarg);
233 		ret = fuse_simple_request(fm, &args);
234 		dput(parent);
235 		/* Zero nodeid is same as -ENOENT */
236 		if (!ret && !outarg.nodeid)
237 			ret = -ENOENT;
238 		if (!ret) {
239 			fi = get_fuse_inode(inode);
240 			if (outarg.nodeid != get_node_id(inode) ||
241 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
242 				fuse_queue_forget(fm->fc, forget,
243 						  outarg.nodeid, 1);
244 				goto invalid;
245 			}
246 			spin_lock(&fi->lock);
247 			fi->nlookup++;
248 			spin_unlock(&fi->lock);
249 		}
250 		kfree(forget);
251 		if (ret == -ENOMEM || ret == -EINTR)
252 			goto out;
253 		if (ret || fuse_invalid_attr(&outarg.attr) ||
254 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
255 			goto invalid;
256 
257 		forget_all_cached_acls(inode);
258 		fuse_change_attributes(inode, &outarg.attr, NULL,
259 				       ATTR_TIMEOUT(&outarg),
260 				       attr_version);
261 		fuse_change_entry_timeout(entry, &outarg);
262 	} else if (inode) {
263 		fi = get_fuse_inode(inode);
264 		if (flags & LOOKUP_RCU) {
265 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
266 				return -ECHILD;
267 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
268 			parent = dget_parent(entry);
269 			fuse_advise_use_readdirplus(d_inode(parent));
270 			dput(parent);
271 		}
272 	}
273 	ret = 1;
274 out:
275 	return ret;
276 
277 invalid:
278 	ret = 0;
279 	goto out;
280 }
281 
282 #if BITS_PER_LONG < 64
283 static int fuse_dentry_init(struct dentry *dentry)
284 {
285 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
286 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
287 
288 	return dentry->d_fsdata ? 0 : -ENOMEM;
289 }
290 static void fuse_dentry_release(struct dentry *dentry)
291 {
292 	union fuse_dentry *fd = dentry->d_fsdata;
293 
294 	kfree_rcu(fd, rcu);
295 }
296 #endif
297 
298 static int fuse_dentry_delete(const struct dentry *dentry)
299 {
300 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
301 }
302 
303 /*
304  * Create a fuse_mount object with a new superblock (with path->dentry
305  * as the root), and return that mount so it can be auto-mounted on
306  * @path.
307  */
308 static struct vfsmount *fuse_dentry_automount(struct path *path)
309 {
310 	struct fs_context *fsc;
311 	struct vfsmount *mnt;
312 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
313 
314 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
315 	if (IS_ERR(fsc))
316 		return ERR_CAST(fsc);
317 
318 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
319 	fsc->fs_private = mp_fi;
320 
321 	/* Create the submount */
322 	mnt = fc_mount(fsc);
323 	if (!IS_ERR(mnt))
324 		mntget(mnt);
325 
326 	put_fs_context(fsc);
327 	return mnt;
328 }
329 
330 const struct dentry_operations fuse_dentry_operations = {
331 	.d_revalidate	= fuse_dentry_revalidate,
332 	.d_delete	= fuse_dentry_delete,
333 #if BITS_PER_LONG < 64
334 	.d_init		= fuse_dentry_init,
335 	.d_release	= fuse_dentry_release,
336 #endif
337 	.d_automount	= fuse_dentry_automount,
338 };
339 
340 const struct dentry_operations fuse_root_dentry_operations = {
341 #if BITS_PER_LONG < 64
342 	.d_init		= fuse_dentry_init,
343 	.d_release	= fuse_dentry_release,
344 #endif
345 };
346 
347 int fuse_valid_type(int m)
348 {
349 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
350 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
351 }
352 
353 static bool fuse_valid_size(u64 size)
354 {
355 	return size <= LLONG_MAX;
356 }
357 
358 bool fuse_invalid_attr(struct fuse_attr *attr)
359 {
360 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
361 }
362 
363 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
364 		     struct fuse_entry_out *outarg, struct inode **inode)
365 {
366 	struct fuse_mount *fm = get_fuse_mount_super(sb);
367 	FUSE_ARGS(args);
368 	struct fuse_forget_link *forget;
369 	u64 attr_version;
370 	int err;
371 
372 	*inode = NULL;
373 	err = -ENAMETOOLONG;
374 	if (name->len > FUSE_NAME_MAX)
375 		goto out;
376 
377 
378 	forget = fuse_alloc_forget();
379 	err = -ENOMEM;
380 	if (!forget)
381 		goto out;
382 
383 	attr_version = fuse_get_attr_version(fm->fc);
384 
385 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 	err = fuse_simple_request(fm, &args);
387 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
388 	if (err || !outarg->nodeid)
389 		goto out_put_forget;
390 
391 	err = -EIO;
392 	if (fuse_invalid_attr(&outarg->attr))
393 		goto out_put_forget;
394 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395 		pr_warn_once("root generation should be zero\n");
396 		outarg->generation = 0;
397 	}
398 
399 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400 			   &outarg->attr, ATTR_TIMEOUT(outarg),
401 			   attr_version);
402 	err = -ENOMEM;
403 	if (!*inode) {
404 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
405 		goto out;
406 	}
407 	err = 0;
408 
409  out_put_forget:
410 	kfree(forget);
411  out:
412 	return err;
413 }
414 
415 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
416 				  unsigned int flags)
417 {
418 	int err;
419 	struct fuse_entry_out outarg;
420 	struct inode *inode;
421 	struct dentry *newent;
422 	bool outarg_valid = true;
423 	bool locked;
424 
425 	if (fuse_is_bad(dir))
426 		return ERR_PTR(-EIO);
427 
428 	locked = fuse_lock_inode(dir);
429 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
430 			       &outarg, &inode);
431 	fuse_unlock_inode(dir, locked);
432 	if (err == -ENOENT) {
433 		outarg_valid = false;
434 		err = 0;
435 	}
436 	if (err)
437 		goto out_err;
438 
439 	err = -EIO;
440 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
441 		goto out_iput;
442 
443 	newent = d_splice_alias(inode, entry);
444 	err = PTR_ERR(newent);
445 	if (IS_ERR(newent))
446 		goto out_err;
447 
448 	entry = newent ? newent : entry;
449 	if (outarg_valid)
450 		fuse_change_entry_timeout(entry, &outarg);
451 	else
452 		fuse_invalidate_entry_cache(entry);
453 
454 	if (inode)
455 		fuse_advise_use_readdirplus(dir);
456 	return newent;
457 
458  out_iput:
459 	iput(inode);
460  out_err:
461 	return ERR_PTR(err);
462 }
463 
464 static int get_security_context(struct dentry *entry, umode_t mode,
465 				struct fuse_in_arg *ext)
466 {
467 	struct fuse_secctx *fctx;
468 	struct fuse_secctx_header *header;
469 	void *ctx = NULL, *ptr;
470 	u32 ctxlen, total_len = sizeof(*header);
471 	int err, nr_ctx = 0;
472 	const char *name;
473 	size_t namelen;
474 
475 	err = security_dentry_init_security(entry, mode, &entry->d_name,
476 					    &name, &ctx, &ctxlen);
477 	if (err) {
478 		if (err != -EOPNOTSUPP)
479 			goto out_err;
480 		/* No LSM is supporting this security hook. Ignore error */
481 		ctxlen = 0;
482 		ctx = NULL;
483 	}
484 
485 	if (ctxlen) {
486 		nr_ctx = 1;
487 		namelen = strlen(name) + 1;
488 		err = -EIO;
489 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
490 			goto out_err;
491 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
492 	}
493 
494 	err = -ENOMEM;
495 	header = ptr = kzalloc(total_len, GFP_KERNEL);
496 	if (!ptr)
497 		goto out_err;
498 
499 	header->nr_secctx = nr_ctx;
500 	header->size = total_len;
501 	ptr += sizeof(*header);
502 	if (nr_ctx) {
503 		fctx = ptr;
504 		fctx->size = ctxlen;
505 		ptr += sizeof(*fctx);
506 
507 		strcpy(ptr, name);
508 		ptr += namelen;
509 
510 		memcpy(ptr, ctx, ctxlen);
511 	}
512 	ext->size = total_len;
513 	ext->value = header;
514 	err = 0;
515 out_err:
516 	kfree(ctx);
517 	return err;
518 }
519 
520 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
521 {
522 	void *p;
523 	u32 newlen = buf->size + bytes;
524 
525 	p = krealloc(buf->value, newlen, GFP_KERNEL);
526 	if (!p) {
527 		kfree(buf->value);
528 		buf->size = 0;
529 		buf->value = NULL;
530 		return NULL;
531 	}
532 
533 	memset(p + buf->size, 0, bytes);
534 	buf->value = p;
535 	buf->size = newlen;
536 
537 	return p + newlen - bytes;
538 }
539 
540 static u32 fuse_ext_size(size_t size)
541 {
542 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
543 }
544 
545 /*
546  * This adds just a single supplementary group that matches the parent's group.
547  */
548 static int get_create_supp_group(struct mnt_idmap *idmap,
549 				 struct inode *dir,
550 				 struct fuse_in_arg *ext)
551 {
552 	struct fuse_conn *fc = get_fuse_conn(dir);
553 	struct fuse_ext_header *xh;
554 	struct fuse_supp_groups *sg;
555 	kgid_t kgid = dir->i_gid;
556 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
557 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
558 
559 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
560 
561 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
562 	    !vfsgid_in_group_p(vfsgid))
563 		return 0;
564 
565 	xh = extend_arg(ext, sg_len);
566 	if (!xh)
567 		return -ENOMEM;
568 
569 	xh->size = sg_len;
570 	xh->type = FUSE_EXT_GROUPS;
571 
572 	sg = (struct fuse_supp_groups *) &xh[1];
573 	sg->nr_groups = 1;
574 	sg->groups[0] = parent_gid;
575 
576 	return 0;
577 }
578 
579 static int get_create_ext(struct mnt_idmap *idmap,
580 			  struct fuse_args *args,
581 			  struct inode *dir, struct dentry *dentry,
582 			  umode_t mode)
583 {
584 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
585 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
586 	int err = 0;
587 
588 	if (fc->init_security)
589 		err = get_security_context(dentry, mode, &ext);
590 	if (!err && fc->create_supp_group)
591 		err = get_create_supp_group(idmap, dir, &ext);
592 
593 	if (!err && ext.size) {
594 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
595 		args->is_ext = true;
596 		args->ext_idx = args->in_numargs++;
597 		args->in_args[args->ext_idx] = ext;
598 	} else {
599 		kfree(ext.value);
600 	}
601 
602 	return err;
603 }
604 
605 static void free_ext_value(struct fuse_args *args)
606 {
607 	if (args->is_ext)
608 		kfree(args->in_args[args->ext_idx].value);
609 }
610 
611 /*
612  * Atomic create+open operation
613  *
614  * If the filesystem doesn't support this, then fall back to separate
615  * 'mknod' + 'open' requests.
616  */
617 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
618 			    struct dentry *entry, struct file *file,
619 			    unsigned int flags, umode_t mode, u32 opcode)
620 {
621 	int err;
622 	struct inode *inode;
623 	struct fuse_mount *fm = get_fuse_mount(dir);
624 	FUSE_ARGS(args);
625 	struct fuse_forget_link *forget;
626 	struct fuse_create_in inarg;
627 	struct fuse_open_out *outopenp;
628 	struct fuse_entry_out outentry;
629 	struct fuse_inode *fi;
630 	struct fuse_file *ff;
631 	bool trunc = flags & O_TRUNC;
632 
633 	/* Userspace expects S_IFREG in create mode */
634 	BUG_ON((mode & S_IFMT) != S_IFREG);
635 
636 	forget = fuse_alloc_forget();
637 	err = -ENOMEM;
638 	if (!forget)
639 		goto out_err;
640 
641 	err = -ENOMEM;
642 	ff = fuse_file_alloc(fm, true);
643 	if (!ff)
644 		goto out_put_forget_req;
645 
646 	if (!fm->fc->dont_mask)
647 		mode &= ~current_umask();
648 
649 	flags &= ~O_NOCTTY;
650 	memset(&inarg, 0, sizeof(inarg));
651 	memset(&outentry, 0, sizeof(outentry));
652 	inarg.flags = flags;
653 	inarg.mode = mode;
654 	inarg.umask = current_umask();
655 
656 	if (fm->fc->handle_killpriv_v2 && trunc &&
657 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
658 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
659 	}
660 
661 	args.opcode = opcode;
662 	args.nodeid = get_node_id(dir);
663 	args.in_numargs = 2;
664 	args.in_args[0].size = sizeof(inarg);
665 	args.in_args[0].value = &inarg;
666 	args.in_args[1].size = entry->d_name.len + 1;
667 	args.in_args[1].value = entry->d_name.name;
668 	args.out_numargs = 2;
669 	args.out_args[0].size = sizeof(outentry);
670 	args.out_args[0].value = &outentry;
671 	/* Store outarg for fuse_finish_open() */
672 	outopenp = &ff->args->open_outarg;
673 	args.out_args[1].size = sizeof(*outopenp);
674 	args.out_args[1].value = outopenp;
675 
676 	err = get_create_ext(idmap, &args, dir, entry, mode);
677 	if (err)
678 		goto out_free_ff;
679 
680 	err = fuse_simple_idmap_request(idmap, fm, &args);
681 	free_ext_value(&args);
682 	if (err)
683 		goto out_free_ff;
684 
685 	err = -EIO;
686 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
687 	    fuse_invalid_attr(&outentry.attr))
688 		goto out_free_ff;
689 
690 	ff->fh = outopenp->fh;
691 	ff->nodeid = outentry.nodeid;
692 	ff->open_flags = outopenp->open_flags;
693 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
694 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
695 	if (!inode) {
696 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
697 		fuse_sync_release(NULL, ff, flags);
698 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
699 		err = -ENOMEM;
700 		goto out_err;
701 	}
702 	kfree(forget);
703 	d_instantiate(entry, inode);
704 	fuse_change_entry_timeout(entry, &outentry);
705 	fuse_dir_changed(dir);
706 	err = generic_file_open(inode, file);
707 	if (!err) {
708 		file->private_data = ff;
709 		err = finish_open(file, entry, fuse_finish_open);
710 	}
711 	if (err) {
712 		fi = get_fuse_inode(inode);
713 		fuse_sync_release(fi, ff, flags);
714 	} else {
715 		if (fm->fc->atomic_o_trunc && trunc)
716 			truncate_pagecache(inode, 0);
717 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
718 			invalidate_inode_pages2(inode->i_mapping);
719 	}
720 	return err;
721 
722 out_free_ff:
723 	fuse_file_free(ff);
724 out_put_forget_req:
725 	kfree(forget);
726 out_err:
727 	return err;
728 }
729 
730 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
731 		      umode_t, dev_t);
732 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
733 			    struct file *file, unsigned flags,
734 			    umode_t mode)
735 {
736 	int err;
737 	struct mnt_idmap *idmap = file_mnt_idmap(file);
738 	struct fuse_conn *fc = get_fuse_conn(dir);
739 	struct dentry *res = NULL;
740 
741 	if (fuse_is_bad(dir))
742 		return -EIO;
743 
744 	if (d_in_lookup(entry)) {
745 		res = fuse_lookup(dir, entry, 0);
746 		if (IS_ERR(res))
747 			return PTR_ERR(res);
748 
749 		if (res)
750 			entry = res;
751 	}
752 
753 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
754 		goto no_open;
755 
756 	/* Only creates */
757 	file->f_mode |= FMODE_CREATED;
758 
759 	if (fc->no_create)
760 		goto mknod;
761 
762 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
763 	if (err == -ENOSYS) {
764 		fc->no_create = 1;
765 		goto mknod;
766 	} else if (err == -EEXIST)
767 		fuse_invalidate_entry(entry);
768 out_dput:
769 	dput(res);
770 	return err;
771 
772 mknod:
773 	err = fuse_mknod(idmap, dir, entry, mode, 0);
774 	if (err)
775 		goto out_dput;
776 no_open:
777 	return finish_no_open(file, res);
778 }
779 
780 /*
781  * Code shared between mknod, mkdir, symlink and link
782  */
783 static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
784 			    struct fuse_args *args, struct inode *dir,
785 			    struct dentry *entry, umode_t mode)
786 {
787 	struct fuse_entry_out outarg;
788 	struct inode *inode;
789 	struct dentry *d;
790 	int err;
791 	struct fuse_forget_link *forget;
792 
793 	if (fuse_is_bad(dir))
794 		return -EIO;
795 
796 	forget = fuse_alloc_forget();
797 	if (!forget)
798 		return -ENOMEM;
799 
800 	memset(&outarg, 0, sizeof(outarg));
801 	args->nodeid = get_node_id(dir);
802 	args->out_numargs = 1;
803 	args->out_args[0].size = sizeof(outarg);
804 	args->out_args[0].value = &outarg;
805 
806 	if (args->opcode != FUSE_LINK) {
807 		err = get_create_ext(idmap, args, dir, entry, mode);
808 		if (err)
809 			goto out_put_forget_req;
810 	}
811 
812 	err = fuse_simple_idmap_request(idmap, fm, args);
813 	free_ext_value(args);
814 	if (err)
815 		goto out_put_forget_req;
816 
817 	err = -EIO;
818 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
819 		goto out_put_forget_req;
820 
821 	if ((outarg.attr.mode ^ mode) & S_IFMT)
822 		goto out_put_forget_req;
823 
824 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
825 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
826 	if (!inode) {
827 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
828 		return -ENOMEM;
829 	}
830 	kfree(forget);
831 
832 	d_drop(entry);
833 	d = d_splice_alias(inode, entry);
834 	if (IS_ERR(d))
835 		return PTR_ERR(d);
836 
837 	if (d) {
838 		fuse_change_entry_timeout(d, &outarg);
839 		dput(d);
840 	} else {
841 		fuse_change_entry_timeout(entry, &outarg);
842 	}
843 	fuse_dir_changed(dir);
844 	return 0;
845 
846  out_put_forget_req:
847 	if (err == -EEXIST)
848 		fuse_invalidate_entry(entry);
849 	kfree(forget);
850 	return err;
851 }
852 
853 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
854 		      struct dentry *entry, umode_t mode, dev_t rdev)
855 {
856 	struct fuse_mknod_in inarg;
857 	struct fuse_mount *fm = get_fuse_mount(dir);
858 	FUSE_ARGS(args);
859 
860 	if (!fm->fc->dont_mask)
861 		mode &= ~current_umask();
862 
863 	memset(&inarg, 0, sizeof(inarg));
864 	inarg.mode = mode;
865 	inarg.rdev = new_encode_dev(rdev);
866 	inarg.umask = current_umask();
867 	args.opcode = FUSE_MKNOD;
868 	args.in_numargs = 2;
869 	args.in_args[0].size = sizeof(inarg);
870 	args.in_args[0].value = &inarg;
871 	args.in_args[1].size = entry->d_name.len + 1;
872 	args.in_args[1].value = entry->d_name.name;
873 	return create_new_entry(idmap, fm, &args, dir, entry, mode);
874 }
875 
876 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
877 		       struct dentry *entry, umode_t mode, bool excl)
878 {
879 	return fuse_mknod(idmap, dir, entry, mode, 0);
880 }
881 
882 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
883 			struct file *file, umode_t mode)
884 {
885 	struct fuse_conn *fc = get_fuse_conn(dir);
886 	int err;
887 
888 	if (fc->no_tmpfile)
889 		return -EOPNOTSUPP;
890 
891 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
892 			       file->f_flags, mode, FUSE_TMPFILE);
893 	if (err == -ENOSYS) {
894 		fc->no_tmpfile = 1;
895 		err = -EOPNOTSUPP;
896 	}
897 	return err;
898 }
899 
900 static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
901 		      struct dentry *entry, umode_t mode)
902 {
903 	struct fuse_mkdir_in inarg;
904 	struct fuse_mount *fm = get_fuse_mount(dir);
905 	FUSE_ARGS(args);
906 
907 	if (!fm->fc->dont_mask)
908 		mode &= ~current_umask();
909 
910 	memset(&inarg, 0, sizeof(inarg));
911 	inarg.mode = mode;
912 	inarg.umask = current_umask();
913 	args.opcode = FUSE_MKDIR;
914 	args.in_numargs = 2;
915 	args.in_args[0].size = sizeof(inarg);
916 	args.in_args[0].value = &inarg;
917 	args.in_args[1].size = entry->d_name.len + 1;
918 	args.in_args[1].value = entry->d_name.name;
919 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
920 }
921 
922 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
923 			struct dentry *entry, const char *link)
924 {
925 	struct fuse_mount *fm = get_fuse_mount(dir);
926 	unsigned len = strlen(link) + 1;
927 	FUSE_ARGS(args);
928 
929 	args.opcode = FUSE_SYMLINK;
930 	args.in_numargs = 2;
931 	args.in_args[0].size = entry->d_name.len + 1;
932 	args.in_args[0].value = entry->d_name.name;
933 	args.in_args[1].size = len;
934 	args.in_args[1].value = link;
935 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK);
936 }
937 
938 void fuse_flush_time_update(struct inode *inode)
939 {
940 	int err = sync_inode_metadata(inode, 1);
941 
942 	mapping_set_error(inode->i_mapping, err);
943 }
944 
945 static void fuse_update_ctime_in_cache(struct inode *inode)
946 {
947 	if (!IS_NOCMTIME(inode)) {
948 		inode_set_ctime_current(inode);
949 		mark_inode_dirty_sync(inode);
950 		fuse_flush_time_update(inode);
951 	}
952 }
953 
954 void fuse_update_ctime(struct inode *inode)
955 {
956 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
957 	fuse_update_ctime_in_cache(inode);
958 }
959 
960 static void fuse_entry_unlinked(struct dentry *entry)
961 {
962 	struct inode *inode = d_inode(entry);
963 	struct fuse_conn *fc = get_fuse_conn(inode);
964 	struct fuse_inode *fi = get_fuse_inode(inode);
965 
966 	spin_lock(&fi->lock);
967 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
968 	/*
969 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
970 	 * happen if userspace filesystem is careless.  It would be
971 	 * difficult to enforce correct nlink usage so just ignore this
972 	 * condition here
973 	 */
974 	if (S_ISDIR(inode->i_mode))
975 		clear_nlink(inode);
976 	else if (inode->i_nlink > 0)
977 		drop_nlink(inode);
978 	spin_unlock(&fi->lock);
979 	fuse_invalidate_entry_cache(entry);
980 	fuse_update_ctime(inode);
981 }
982 
983 static int fuse_unlink(struct inode *dir, struct dentry *entry)
984 {
985 	int err;
986 	struct fuse_mount *fm = get_fuse_mount(dir);
987 	FUSE_ARGS(args);
988 
989 	if (fuse_is_bad(dir))
990 		return -EIO;
991 
992 	args.opcode = FUSE_UNLINK;
993 	args.nodeid = get_node_id(dir);
994 	args.in_numargs = 1;
995 	args.in_args[0].size = entry->d_name.len + 1;
996 	args.in_args[0].value = entry->d_name.name;
997 	err = fuse_simple_request(fm, &args);
998 	if (!err) {
999 		fuse_dir_changed(dir);
1000 		fuse_entry_unlinked(entry);
1001 	} else if (err == -EINTR || err == -ENOENT)
1002 		fuse_invalidate_entry(entry);
1003 	return err;
1004 }
1005 
1006 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1007 {
1008 	int err;
1009 	struct fuse_mount *fm = get_fuse_mount(dir);
1010 	FUSE_ARGS(args);
1011 
1012 	if (fuse_is_bad(dir))
1013 		return -EIO;
1014 
1015 	args.opcode = FUSE_RMDIR;
1016 	args.nodeid = get_node_id(dir);
1017 	args.in_numargs = 1;
1018 	args.in_args[0].size = entry->d_name.len + 1;
1019 	args.in_args[0].value = entry->d_name.name;
1020 	err = fuse_simple_request(fm, &args);
1021 	if (!err) {
1022 		fuse_dir_changed(dir);
1023 		fuse_entry_unlinked(entry);
1024 	} else if (err == -EINTR || err == -ENOENT)
1025 		fuse_invalidate_entry(entry);
1026 	return err;
1027 }
1028 
1029 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1030 			      struct inode *newdir, struct dentry *newent,
1031 			      unsigned int flags, int opcode, size_t argsize)
1032 {
1033 	int err;
1034 	struct fuse_rename2_in inarg;
1035 	struct fuse_mount *fm = get_fuse_mount(olddir);
1036 	FUSE_ARGS(args);
1037 
1038 	memset(&inarg, 0, argsize);
1039 	inarg.newdir = get_node_id(newdir);
1040 	inarg.flags = flags;
1041 	args.opcode = opcode;
1042 	args.nodeid = get_node_id(olddir);
1043 	args.in_numargs = 3;
1044 	args.in_args[0].size = argsize;
1045 	args.in_args[0].value = &inarg;
1046 	args.in_args[1].size = oldent->d_name.len + 1;
1047 	args.in_args[1].value = oldent->d_name.name;
1048 	args.in_args[2].size = newent->d_name.len + 1;
1049 	args.in_args[2].value = newent->d_name.name;
1050 	err = fuse_simple_idmap_request(idmap, fm, &args);
1051 	if (!err) {
1052 		/* ctime changes */
1053 		fuse_update_ctime(d_inode(oldent));
1054 
1055 		if (flags & RENAME_EXCHANGE)
1056 			fuse_update_ctime(d_inode(newent));
1057 
1058 		fuse_dir_changed(olddir);
1059 		if (olddir != newdir)
1060 			fuse_dir_changed(newdir);
1061 
1062 		/* newent will end up negative */
1063 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1064 			fuse_entry_unlinked(newent);
1065 	} else if (err == -EINTR || err == -ENOENT) {
1066 		/* If request was interrupted, DEITY only knows if the
1067 		   rename actually took place.  If the invalidation
1068 		   fails (e.g. some process has CWD under the renamed
1069 		   directory), then there can be inconsistency between
1070 		   the dcache and the real filesystem.  Tough luck. */
1071 		fuse_invalidate_entry(oldent);
1072 		if (d_really_is_positive(newent))
1073 			fuse_invalidate_entry(newent);
1074 	}
1075 
1076 	return err;
1077 }
1078 
1079 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1080 			struct dentry *oldent, struct inode *newdir,
1081 			struct dentry *newent, unsigned int flags)
1082 {
1083 	struct fuse_conn *fc = get_fuse_conn(olddir);
1084 	int err;
1085 
1086 	if (fuse_is_bad(olddir))
1087 		return -EIO;
1088 
1089 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1090 		return -EINVAL;
1091 
1092 	if (flags) {
1093 		if (fc->no_rename2 || fc->minor < 23)
1094 			return -EINVAL;
1095 
1096 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1097 					 olddir, oldent, newdir, newent, flags,
1098 					 FUSE_RENAME2,
1099 					 sizeof(struct fuse_rename2_in));
1100 		if (err == -ENOSYS) {
1101 			fc->no_rename2 = 1;
1102 			err = -EINVAL;
1103 		}
1104 	} else {
1105 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1106 					 FUSE_RENAME,
1107 					 sizeof(struct fuse_rename_in));
1108 	}
1109 
1110 	return err;
1111 }
1112 
1113 static int fuse_link(struct dentry *entry, struct inode *newdir,
1114 		     struct dentry *newent)
1115 {
1116 	int err;
1117 	struct fuse_link_in inarg;
1118 	struct inode *inode = d_inode(entry);
1119 	struct fuse_mount *fm = get_fuse_mount(inode);
1120 	FUSE_ARGS(args);
1121 
1122 	memset(&inarg, 0, sizeof(inarg));
1123 	inarg.oldnodeid = get_node_id(inode);
1124 	args.opcode = FUSE_LINK;
1125 	args.in_numargs = 2;
1126 	args.in_args[0].size = sizeof(inarg);
1127 	args.in_args[0].value = &inarg;
1128 	args.in_args[1].size = newent->d_name.len + 1;
1129 	args.in_args[1].value = newent->d_name.name;
1130 	err = create_new_entry(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1131 	if (!err)
1132 		fuse_update_ctime_in_cache(inode);
1133 	else if (err == -EINTR)
1134 		fuse_invalidate_attr(inode);
1135 
1136 	return err;
1137 }
1138 
1139 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1140 			  struct fuse_attr *attr, struct kstat *stat)
1141 {
1142 	unsigned int blkbits;
1143 	struct fuse_conn *fc = get_fuse_conn(inode);
1144 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1145 				      make_kuid(fc->user_ns, attr->uid));
1146 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1147 				      make_kgid(fc->user_ns, attr->gid));
1148 
1149 	stat->dev = inode->i_sb->s_dev;
1150 	stat->ino = attr->ino;
1151 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1152 	stat->nlink = attr->nlink;
1153 	stat->uid = vfsuid_into_kuid(vfsuid);
1154 	stat->gid = vfsgid_into_kgid(vfsgid);
1155 	stat->rdev = inode->i_rdev;
1156 	stat->atime.tv_sec = attr->atime;
1157 	stat->atime.tv_nsec = attr->atimensec;
1158 	stat->mtime.tv_sec = attr->mtime;
1159 	stat->mtime.tv_nsec = attr->mtimensec;
1160 	stat->ctime.tv_sec = attr->ctime;
1161 	stat->ctime.tv_nsec = attr->ctimensec;
1162 	stat->size = attr->size;
1163 	stat->blocks = attr->blocks;
1164 
1165 	if (attr->blksize != 0)
1166 		blkbits = ilog2(attr->blksize);
1167 	else
1168 		blkbits = inode->i_sb->s_blocksize_bits;
1169 
1170 	stat->blksize = 1 << blkbits;
1171 }
1172 
1173 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1174 {
1175 	memset(attr, 0, sizeof(*attr));
1176 	attr->ino = sx->ino;
1177 	attr->size = sx->size;
1178 	attr->blocks = sx->blocks;
1179 	attr->atime = sx->atime.tv_sec;
1180 	attr->mtime = sx->mtime.tv_sec;
1181 	attr->ctime = sx->ctime.tv_sec;
1182 	attr->atimensec = sx->atime.tv_nsec;
1183 	attr->mtimensec = sx->mtime.tv_nsec;
1184 	attr->ctimensec = sx->ctime.tv_nsec;
1185 	attr->mode = sx->mode;
1186 	attr->nlink = sx->nlink;
1187 	attr->uid = sx->uid;
1188 	attr->gid = sx->gid;
1189 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1190 	attr->blksize = sx->blksize;
1191 }
1192 
1193 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1194 			 struct file *file, struct kstat *stat)
1195 {
1196 	int err;
1197 	struct fuse_attr attr;
1198 	struct fuse_statx *sx;
1199 	struct fuse_statx_in inarg;
1200 	struct fuse_statx_out outarg;
1201 	struct fuse_mount *fm = get_fuse_mount(inode);
1202 	u64 attr_version = fuse_get_attr_version(fm->fc);
1203 	FUSE_ARGS(args);
1204 
1205 	memset(&inarg, 0, sizeof(inarg));
1206 	memset(&outarg, 0, sizeof(outarg));
1207 	/* Directories have separate file-handle space */
1208 	if (file && S_ISREG(inode->i_mode)) {
1209 		struct fuse_file *ff = file->private_data;
1210 
1211 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1212 		inarg.fh = ff->fh;
1213 	}
1214 	/* For now leave sync hints as the default, request all stats. */
1215 	inarg.sx_flags = 0;
1216 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1217 	args.opcode = FUSE_STATX;
1218 	args.nodeid = get_node_id(inode);
1219 	args.in_numargs = 1;
1220 	args.in_args[0].size = sizeof(inarg);
1221 	args.in_args[0].value = &inarg;
1222 	args.out_numargs = 1;
1223 	args.out_args[0].size = sizeof(outarg);
1224 	args.out_args[0].value = &outarg;
1225 	err = fuse_simple_request(fm, &args);
1226 	if (err)
1227 		return err;
1228 
1229 	sx = &outarg.stat;
1230 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1231 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1232 					 inode_wrong_type(inode, sx->mode)))) {
1233 		fuse_make_bad(inode);
1234 		return -EIO;
1235 	}
1236 
1237 	fuse_statx_to_attr(&outarg.stat, &attr);
1238 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1239 		fuse_change_attributes(inode, &attr, &outarg.stat,
1240 				       ATTR_TIMEOUT(&outarg), attr_version);
1241 	}
1242 
1243 	if (stat) {
1244 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1245 		stat->btime.tv_sec = sx->btime.tv_sec;
1246 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1247 		fuse_fillattr(idmap, inode, &attr, stat);
1248 		stat->result_mask |= STATX_TYPE;
1249 	}
1250 
1251 	return 0;
1252 }
1253 
1254 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1255 			   struct kstat *stat, struct file *file)
1256 {
1257 	int err;
1258 	struct fuse_getattr_in inarg;
1259 	struct fuse_attr_out outarg;
1260 	struct fuse_mount *fm = get_fuse_mount(inode);
1261 	FUSE_ARGS(args);
1262 	u64 attr_version;
1263 
1264 	attr_version = fuse_get_attr_version(fm->fc);
1265 
1266 	memset(&inarg, 0, sizeof(inarg));
1267 	memset(&outarg, 0, sizeof(outarg));
1268 	/* Directories have separate file-handle space */
1269 	if (file && S_ISREG(inode->i_mode)) {
1270 		struct fuse_file *ff = file->private_data;
1271 
1272 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1273 		inarg.fh = ff->fh;
1274 	}
1275 	args.opcode = FUSE_GETATTR;
1276 	args.nodeid = get_node_id(inode);
1277 	args.in_numargs = 1;
1278 	args.in_args[0].size = sizeof(inarg);
1279 	args.in_args[0].value = &inarg;
1280 	args.out_numargs = 1;
1281 	args.out_args[0].size = sizeof(outarg);
1282 	args.out_args[0].value = &outarg;
1283 	err = fuse_simple_request(fm, &args);
1284 	if (!err) {
1285 		if (fuse_invalid_attr(&outarg.attr) ||
1286 		    inode_wrong_type(inode, outarg.attr.mode)) {
1287 			fuse_make_bad(inode);
1288 			err = -EIO;
1289 		} else {
1290 			fuse_change_attributes(inode, &outarg.attr, NULL,
1291 					       ATTR_TIMEOUT(&outarg),
1292 					       attr_version);
1293 			if (stat)
1294 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1295 		}
1296 	}
1297 	return err;
1298 }
1299 
1300 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1301 				struct file *file, struct kstat *stat,
1302 				u32 request_mask, unsigned int flags)
1303 {
1304 	struct fuse_inode *fi = get_fuse_inode(inode);
1305 	struct fuse_conn *fc = get_fuse_conn(inode);
1306 	int err = 0;
1307 	bool sync;
1308 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1309 	u32 cache_mask = fuse_get_cache_mask(inode);
1310 
1311 
1312 	/* FUSE only supports basic stats and possibly btime */
1313 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1314 retry:
1315 	if (fc->no_statx)
1316 		request_mask &= STATX_BASIC_STATS;
1317 
1318 	if (!request_mask)
1319 		sync = false;
1320 	else if (flags & AT_STATX_FORCE_SYNC)
1321 		sync = true;
1322 	else if (flags & AT_STATX_DONT_SYNC)
1323 		sync = false;
1324 	else if (request_mask & inval_mask & ~cache_mask)
1325 		sync = true;
1326 	else
1327 		sync = time_before64(fi->i_time, get_jiffies_64());
1328 
1329 	if (sync) {
1330 		forget_all_cached_acls(inode);
1331 		/* Try statx if BTIME is requested */
1332 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1333 			err = fuse_do_statx(idmap, inode, file, stat);
1334 			if (err == -ENOSYS) {
1335 				fc->no_statx = 1;
1336 				err = 0;
1337 				goto retry;
1338 			}
1339 		} else {
1340 			err = fuse_do_getattr(idmap, inode, stat, file);
1341 		}
1342 	} else if (stat) {
1343 		generic_fillattr(idmap, request_mask, inode, stat);
1344 		stat->mode = fi->orig_i_mode;
1345 		stat->ino = fi->orig_ino;
1346 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1347 			stat->btime = fi->i_btime;
1348 			stat->result_mask |= STATX_BTIME;
1349 		}
1350 	}
1351 
1352 	return err;
1353 }
1354 
1355 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1356 {
1357 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1358 }
1359 
1360 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1361 			     u64 child_nodeid, struct qstr *name, u32 flags)
1362 {
1363 	int err = -ENOTDIR;
1364 	struct inode *parent;
1365 	struct dentry *dir;
1366 	struct dentry *entry;
1367 
1368 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1369 	if (!parent)
1370 		return -ENOENT;
1371 
1372 	inode_lock_nested(parent, I_MUTEX_PARENT);
1373 	if (!S_ISDIR(parent->i_mode))
1374 		goto unlock;
1375 
1376 	err = -ENOENT;
1377 	dir = d_find_alias(parent);
1378 	if (!dir)
1379 		goto unlock;
1380 
1381 	name->hash = full_name_hash(dir, name->name, name->len);
1382 	entry = d_lookup(dir, name);
1383 	dput(dir);
1384 	if (!entry)
1385 		goto unlock;
1386 
1387 	fuse_dir_changed(parent);
1388 	if (!(flags & FUSE_EXPIRE_ONLY))
1389 		d_invalidate(entry);
1390 	fuse_invalidate_entry_cache(entry);
1391 
1392 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1393 		inode_lock(d_inode(entry));
1394 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1395 			err = -ENOENT;
1396 			goto badentry;
1397 		}
1398 		if (d_mountpoint(entry)) {
1399 			err = -EBUSY;
1400 			goto badentry;
1401 		}
1402 		if (d_is_dir(entry)) {
1403 			shrink_dcache_parent(entry);
1404 			if (!simple_empty(entry)) {
1405 				err = -ENOTEMPTY;
1406 				goto badentry;
1407 			}
1408 			d_inode(entry)->i_flags |= S_DEAD;
1409 		}
1410 		dont_mount(entry);
1411 		clear_nlink(d_inode(entry));
1412 		err = 0;
1413  badentry:
1414 		inode_unlock(d_inode(entry));
1415 		if (!err)
1416 			d_delete(entry);
1417 	} else {
1418 		err = 0;
1419 	}
1420 	dput(entry);
1421 
1422  unlock:
1423 	inode_unlock(parent);
1424 	iput(parent);
1425 	return err;
1426 }
1427 
1428 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1429 {
1430 	const struct cred *cred = current_cred();
1431 
1432 	return (uid_eq(cred->euid, fc->user_id) &&
1433 		uid_eq(cred->suid, fc->user_id) &&
1434 		uid_eq(cred->uid,  fc->user_id) &&
1435 		gid_eq(cred->egid, fc->group_id) &&
1436 		gid_eq(cred->sgid, fc->group_id) &&
1437 		gid_eq(cred->gid,  fc->group_id));
1438 }
1439 
1440 /*
1441  * Calling into a user-controlled filesystem gives the filesystem
1442  * daemon ptrace-like capabilities over the current process.  This
1443  * means, that the filesystem daemon is able to record the exact
1444  * filesystem operations performed, and can also control the behavior
1445  * of the requester process in otherwise impossible ways.  For example
1446  * it can delay the operation for arbitrary length of time allowing
1447  * DoS against the requester.
1448  *
1449  * For this reason only those processes can call into the filesystem,
1450  * for which the owner of the mount has ptrace privilege.  This
1451  * excludes processes started by other users, suid or sgid processes.
1452  */
1453 bool fuse_allow_current_process(struct fuse_conn *fc)
1454 {
1455 	bool allow;
1456 
1457 	if (fc->allow_other)
1458 		allow = current_in_userns(fc->user_ns);
1459 	else
1460 		allow = fuse_permissible_uidgid(fc);
1461 
1462 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1463 		allow = true;
1464 
1465 	return allow;
1466 }
1467 
1468 static int fuse_access(struct inode *inode, int mask)
1469 {
1470 	struct fuse_mount *fm = get_fuse_mount(inode);
1471 	FUSE_ARGS(args);
1472 	struct fuse_access_in inarg;
1473 	int err;
1474 
1475 	BUG_ON(mask & MAY_NOT_BLOCK);
1476 
1477 	/*
1478 	 * We should not send FUSE_ACCESS to the userspace
1479 	 * when idmapped mounts are enabled as for this case
1480 	 * we have fc->default_permissions = 1 and access
1481 	 * permission checks are done on the kernel side.
1482 	 */
1483 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1484 
1485 	if (fm->fc->no_access)
1486 		return 0;
1487 
1488 	memset(&inarg, 0, sizeof(inarg));
1489 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1490 	args.opcode = FUSE_ACCESS;
1491 	args.nodeid = get_node_id(inode);
1492 	args.in_numargs = 1;
1493 	args.in_args[0].size = sizeof(inarg);
1494 	args.in_args[0].value = &inarg;
1495 	err = fuse_simple_request(fm, &args);
1496 	if (err == -ENOSYS) {
1497 		fm->fc->no_access = 1;
1498 		err = 0;
1499 	}
1500 	return err;
1501 }
1502 
1503 static int fuse_perm_getattr(struct inode *inode, int mask)
1504 {
1505 	if (mask & MAY_NOT_BLOCK)
1506 		return -ECHILD;
1507 
1508 	forget_all_cached_acls(inode);
1509 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1510 }
1511 
1512 /*
1513  * Check permission.  The two basic access models of FUSE are:
1514  *
1515  * 1) Local access checking ('default_permissions' mount option) based
1516  * on file mode.  This is the plain old disk filesystem permission
1517  * model.
1518  *
1519  * 2) "Remote" access checking, where server is responsible for
1520  * checking permission in each inode operation.  An exception to this
1521  * is if ->permission() was invoked from sys_access() in which case an
1522  * access request is sent.  Execute permission is still checked
1523  * locally based on file mode.
1524  */
1525 static int fuse_permission(struct mnt_idmap *idmap,
1526 			   struct inode *inode, int mask)
1527 {
1528 	struct fuse_conn *fc = get_fuse_conn(inode);
1529 	bool refreshed = false;
1530 	int err = 0;
1531 
1532 	if (fuse_is_bad(inode))
1533 		return -EIO;
1534 
1535 	if (!fuse_allow_current_process(fc))
1536 		return -EACCES;
1537 
1538 	/*
1539 	 * If attributes are needed, refresh them before proceeding
1540 	 */
1541 	if (fc->default_permissions ||
1542 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1543 		struct fuse_inode *fi = get_fuse_inode(inode);
1544 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1545 
1546 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1547 		    time_before64(fi->i_time, get_jiffies_64())) {
1548 			refreshed = true;
1549 
1550 			err = fuse_perm_getattr(inode, mask);
1551 			if (err)
1552 				return err;
1553 		}
1554 	}
1555 
1556 	if (fc->default_permissions) {
1557 		err = generic_permission(idmap, inode, mask);
1558 
1559 		/* If permission is denied, try to refresh file
1560 		   attributes.  This is also needed, because the root
1561 		   node will at first have no permissions */
1562 		if (err == -EACCES && !refreshed) {
1563 			err = fuse_perm_getattr(inode, mask);
1564 			if (!err)
1565 				err = generic_permission(idmap,
1566 							 inode, mask);
1567 		}
1568 
1569 		/* Note: the opposite of the above test does not
1570 		   exist.  So if permissions are revoked this won't be
1571 		   noticed immediately, only after the attribute
1572 		   timeout has expired */
1573 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1574 		err = fuse_access(inode, mask);
1575 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1576 		if (!(inode->i_mode & S_IXUGO)) {
1577 			if (refreshed)
1578 				return -EACCES;
1579 
1580 			err = fuse_perm_getattr(inode, mask);
1581 			if (!err && !(inode->i_mode & S_IXUGO))
1582 				return -EACCES;
1583 		}
1584 	}
1585 	return err;
1586 }
1587 
1588 static int fuse_readlink_page(struct inode *inode, struct page *page)
1589 {
1590 	struct fuse_mount *fm = get_fuse_mount(inode);
1591 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1592 	struct fuse_args_pages ap = {
1593 		.num_pages = 1,
1594 		.pages = &page,
1595 		.descs = &desc,
1596 	};
1597 	char *link;
1598 	ssize_t res;
1599 
1600 	ap.args.opcode = FUSE_READLINK;
1601 	ap.args.nodeid = get_node_id(inode);
1602 	ap.args.out_pages = true;
1603 	ap.args.out_argvar = true;
1604 	ap.args.page_zeroing = true;
1605 	ap.args.out_numargs = 1;
1606 	ap.args.out_args[0].size = desc.length;
1607 	res = fuse_simple_request(fm, &ap.args);
1608 
1609 	fuse_invalidate_atime(inode);
1610 
1611 	if (res < 0)
1612 		return res;
1613 
1614 	if (WARN_ON(res >= PAGE_SIZE))
1615 		return -EIO;
1616 
1617 	link = page_address(page);
1618 	link[res] = '\0';
1619 
1620 	return 0;
1621 }
1622 
1623 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1624 				 struct delayed_call *callback)
1625 {
1626 	struct fuse_conn *fc = get_fuse_conn(inode);
1627 	struct page *page;
1628 	int err;
1629 
1630 	err = -EIO;
1631 	if (fuse_is_bad(inode))
1632 		goto out_err;
1633 
1634 	if (fc->cache_symlinks)
1635 		return page_get_link(dentry, inode, callback);
1636 
1637 	err = -ECHILD;
1638 	if (!dentry)
1639 		goto out_err;
1640 
1641 	page = alloc_page(GFP_KERNEL);
1642 	err = -ENOMEM;
1643 	if (!page)
1644 		goto out_err;
1645 
1646 	err = fuse_readlink_page(inode, page);
1647 	if (err) {
1648 		__free_page(page);
1649 		goto out_err;
1650 	}
1651 
1652 	set_delayed_call(callback, page_put_link, page);
1653 
1654 	return page_address(page);
1655 
1656 out_err:
1657 	return ERR_PTR(err);
1658 }
1659 
1660 static int fuse_dir_open(struct inode *inode, struct file *file)
1661 {
1662 	struct fuse_mount *fm = get_fuse_mount(inode);
1663 	int err;
1664 
1665 	if (fuse_is_bad(inode))
1666 		return -EIO;
1667 
1668 	err = generic_file_open(inode, file);
1669 	if (err)
1670 		return err;
1671 
1672 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1673 	if (!err) {
1674 		struct fuse_file *ff = file->private_data;
1675 
1676 		/*
1677 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1678 		 * directories for backward compatibility, though it's unlikely
1679 		 * to be useful.
1680 		 */
1681 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1682 			nonseekable_open(inode, file);
1683 	}
1684 
1685 	return err;
1686 }
1687 
1688 static int fuse_dir_release(struct inode *inode, struct file *file)
1689 {
1690 	fuse_release_common(file, true);
1691 
1692 	return 0;
1693 }
1694 
1695 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1696 			  int datasync)
1697 {
1698 	struct inode *inode = file->f_mapping->host;
1699 	struct fuse_conn *fc = get_fuse_conn(inode);
1700 	int err;
1701 
1702 	if (fuse_is_bad(inode))
1703 		return -EIO;
1704 
1705 	if (fc->no_fsyncdir)
1706 		return 0;
1707 
1708 	inode_lock(inode);
1709 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1710 	if (err == -ENOSYS) {
1711 		fc->no_fsyncdir = 1;
1712 		err = 0;
1713 	}
1714 	inode_unlock(inode);
1715 
1716 	return err;
1717 }
1718 
1719 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1720 			    unsigned long arg)
1721 {
1722 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1723 
1724 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1725 	if (fc->minor < 18)
1726 		return -ENOTTY;
1727 
1728 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1729 }
1730 
1731 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1732 				   unsigned long arg)
1733 {
1734 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1735 
1736 	if (fc->minor < 18)
1737 		return -ENOTTY;
1738 
1739 	return fuse_ioctl_common(file, cmd, arg,
1740 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1741 }
1742 
1743 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1744 {
1745 	/* Always update if mtime is explicitly set  */
1746 	if (ivalid & ATTR_MTIME_SET)
1747 		return true;
1748 
1749 	/* Or if kernel i_mtime is the official one */
1750 	if (trust_local_mtime)
1751 		return true;
1752 
1753 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1754 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1755 		return false;
1756 
1757 	/* In all other cases update */
1758 	return true;
1759 }
1760 
1761 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1762 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1763 			   bool trust_local_cmtime)
1764 {
1765 	unsigned ivalid = iattr->ia_valid;
1766 
1767 	if (ivalid & ATTR_MODE)
1768 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1769 
1770 	if (ivalid & ATTR_UID) {
1771 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1772 
1773 		arg->valid |= FATTR_UID;
1774 		arg->uid = from_kuid(fc->user_ns, fsuid);
1775 	}
1776 
1777 	if (ivalid & ATTR_GID) {
1778 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1779 
1780 		arg->valid |= FATTR_GID;
1781 		arg->gid = from_kgid(fc->user_ns, fsgid);
1782 	}
1783 
1784 	if (ivalid & ATTR_SIZE)
1785 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1786 	if (ivalid & ATTR_ATIME) {
1787 		arg->valid |= FATTR_ATIME;
1788 		arg->atime = iattr->ia_atime.tv_sec;
1789 		arg->atimensec = iattr->ia_atime.tv_nsec;
1790 		if (!(ivalid & ATTR_ATIME_SET))
1791 			arg->valid |= FATTR_ATIME_NOW;
1792 	}
1793 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1794 		arg->valid |= FATTR_MTIME;
1795 		arg->mtime = iattr->ia_mtime.tv_sec;
1796 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1797 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1798 			arg->valid |= FATTR_MTIME_NOW;
1799 	}
1800 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1801 		arg->valid |= FATTR_CTIME;
1802 		arg->ctime = iattr->ia_ctime.tv_sec;
1803 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1804 	}
1805 }
1806 
1807 /*
1808  * Prevent concurrent writepages on inode
1809  *
1810  * This is done by adding a negative bias to the inode write counter
1811  * and waiting for all pending writes to finish.
1812  */
1813 void fuse_set_nowrite(struct inode *inode)
1814 {
1815 	struct fuse_inode *fi = get_fuse_inode(inode);
1816 
1817 	BUG_ON(!inode_is_locked(inode));
1818 
1819 	spin_lock(&fi->lock);
1820 	BUG_ON(fi->writectr < 0);
1821 	fi->writectr += FUSE_NOWRITE;
1822 	spin_unlock(&fi->lock);
1823 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1824 }
1825 
1826 /*
1827  * Allow writepages on inode
1828  *
1829  * Remove the bias from the writecounter and send any queued
1830  * writepages.
1831  */
1832 static void __fuse_release_nowrite(struct inode *inode)
1833 {
1834 	struct fuse_inode *fi = get_fuse_inode(inode);
1835 
1836 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1837 	fi->writectr = 0;
1838 	fuse_flush_writepages(inode);
1839 }
1840 
1841 void fuse_release_nowrite(struct inode *inode)
1842 {
1843 	struct fuse_inode *fi = get_fuse_inode(inode);
1844 
1845 	spin_lock(&fi->lock);
1846 	__fuse_release_nowrite(inode);
1847 	spin_unlock(&fi->lock);
1848 }
1849 
1850 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1851 			      struct inode *inode,
1852 			      struct fuse_setattr_in *inarg_p,
1853 			      struct fuse_attr_out *outarg_p)
1854 {
1855 	args->opcode = FUSE_SETATTR;
1856 	args->nodeid = get_node_id(inode);
1857 	args->in_numargs = 1;
1858 	args->in_args[0].size = sizeof(*inarg_p);
1859 	args->in_args[0].value = inarg_p;
1860 	args->out_numargs = 1;
1861 	args->out_args[0].size = sizeof(*outarg_p);
1862 	args->out_args[0].value = outarg_p;
1863 }
1864 
1865 /*
1866  * Flush inode->i_mtime to the server
1867  */
1868 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1869 {
1870 	struct fuse_mount *fm = get_fuse_mount(inode);
1871 	FUSE_ARGS(args);
1872 	struct fuse_setattr_in inarg;
1873 	struct fuse_attr_out outarg;
1874 
1875 	memset(&inarg, 0, sizeof(inarg));
1876 	memset(&outarg, 0, sizeof(outarg));
1877 
1878 	inarg.valid = FATTR_MTIME;
1879 	inarg.mtime = inode_get_mtime_sec(inode);
1880 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1881 	if (fm->fc->minor >= 23) {
1882 		inarg.valid |= FATTR_CTIME;
1883 		inarg.ctime = inode_get_ctime_sec(inode);
1884 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1885 	}
1886 	if (ff) {
1887 		inarg.valid |= FATTR_FH;
1888 		inarg.fh = ff->fh;
1889 	}
1890 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1891 
1892 	return fuse_simple_request(fm, &args);
1893 }
1894 
1895 /*
1896  * Set attributes, and at the same time refresh them.
1897  *
1898  * Truncation is slightly complicated, because the 'truncate' request
1899  * may fail, in which case we don't want to touch the mapping.
1900  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1901  * and the actual truncation by hand.
1902  */
1903 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1904 		    struct iattr *attr, struct file *file)
1905 {
1906 	struct inode *inode = d_inode(dentry);
1907 	struct fuse_mount *fm = get_fuse_mount(inode);
1908 	struct fuse_conn *fc = fm->fc;
1909 	struct fuse_inode *fi = get_fuse_inode(inode);
1910 	struct address_space *mapping = inode->i_mapping;
1911 	FUSE_ARGS(args);
1912 	struct fuse_setattr_in inarg;
1913 	struct fuse_attr_out outarg;
1914 	bool is_truncate = false;
1915 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1916 	loff_t oldsize;
1917 	int err;
1918 	bool trust_local_cmtime = is_wb;
1919 	bool fault_blocked = false;
1920 
1921 	if (!fc->default_permissions)
1922 		attr->ia_valid |= ATTR_FORCE;
1923 
1924 	err = setattr_prepare(idmap, dentry, attr);
1925 	if (err)
1926 		return err;
1927 
1928 	if (attr->ia_valid & ATTR_SIZE) {
1929 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1930 			return -EIO;
1931 		is_truncate = true;
1932 	}
1933 
1934 	if (FUSE_IS_DAX(inode) && is_truncate) {
1935 		filemap_invalidate_lock(mapping);
1936 		fault_blocked = true;
1937 		err = fuse_dax_break_layouts(inode, 0, 0);
1938 		if (err) {
1939 			filemap_invalidate_unlock(mapping);
1940 			return err;
1941 		}
1942 	}
1943 
1944 	if (attr->ia_valid & ATTR_OPEN) {
1945 		/* This is coming from open(..., ... | O_TRUNC); */
1946 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1947 		WARN_ON(attr->ia_size != 0);
1948 		if (fc->atomic_o_trunc) {
1949 			/*
1950 			 * No need to send request to userspace, since actual
1951 			 * truncation has already been done by OPEN.  But still
1952 			 * need to truncate page cache.
1953 			 */
1954 			i_size_write(inode, 0);
1955 			truncate_pagecache(inode, 0);
1956 			goto out;
1957 		}
1958 		file = NULL;
1959 	}
1960 
1961 	/* Flush dirty data/metadata before non-truncate SETATTR */
1962 	if (is_wb &&
1963 	    attr->ia_valid &
1964 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1965 			 ATTR_TIMES_SET)) {
1966 		err = write_inode_now(inode, true);
1967 		if (err)
1968 			return err;
1969 
1970 		fuse_set_nowrite(inode);
1971 		fuse_release_nowrite(inode);
1972 	}
1973 
1974 	if (is_truncate) {
1975 		fuse_set_nowrite(inode);
1976 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1977 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1978 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1979 	}
1980 
1981 	memset(&inarg, 0, sizeof(inarg));
1982 	memset(&outarg, 0, sizeof(outarg));
1983 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
1984 	if (file) {
1985 		struct fuse_file *ff = file->private_data;
1986 		inarg.valid |= FATTR_FH;
1987 		inarg.fh = ff->fh;
1988 	}
1989 
1990 	/* Kill suid/sgid for non-directory chown unconditionally */
1991 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1992 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
1993 		inarg.valid |= FATTR_KILL_SUIDGID;
1994 
1995 	if (attr->ia_valid & ATTR_SIZE) {
1996 		/* For mandatory locking in truncate */
1997 		inarg.valid |= FATTR_LOCKOWNER;
1998 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1999 
2000 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2001 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2002 			inarg.valid |= FATTR_KILL_SUIDGID;
2003 	}
2004 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2005 	err = fuse_simple_request(fm, &args);
2006 	if (err) {
2007 		if (err == -EINTR)
2008 			fuse_invalidate_attr(inode);
2009 		goto error;
2010 	}
2011 
2012 	if (fuse_invalid_attr(&outarg.attr) ||
2013 	    inode_wrong_type(inode, outarg.attr.mode)) {
2014 		fuse_make_bad(inode);
2015 		err = -EIO;
2016 		goto error;
2017 	}
2018 
2019 	spin_lock(&fi->lock);
2020 	/* the kernel maintains i_mtime locally */
2021 	if (trust_local_cmtime) {
2022 		if (attr->ia_valid & ATTR_MTIME)
2023 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2024 		if (attr->ia_valid & ATTR_CTIME)
2025 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2026 		/* FIXME: clear I_DIRTY_SYNC? */
2027 	}
2028 
2029 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2030 				      ATTR_TIMEOUT(&outarg),
2031 				      fuse_get_cache_mask(inode));
2032 	oldsize = inode->i_size;
2033 	/* see the comment in fuse_change_attributes() */
2034 	if (!is_wb || is_truncate)
2035 		i_size_write(inode, outarg.attr.size);
2036 
2037 	if (is_truncate) {
2038 		/* NOTE: this may release/reacquire fi->lock */
2039 		__fuse_release_nowrite(inode);
2040 	}
2041 	spin_unlock(&fi->lock);
2042 
2043 	/*
2044 	 * Only call invalidate_inode_pages2() after removing
2045 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2046 	 */
2047 	if ((is_truncate || !is_wb) &&
2048 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2049 		truncate_pagecache(inode, outarg.attr.size);
2050 		invalidate_inode_pages2(mapping);
2051 	}
2052 
2053 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2054 out:
2055 	if (fault_blocked)
2056 		filemap_invalidate_unlock(mapping);
2057 
2058 	return 0;
2059 
2060 error:
2061 	if (is_truncate)
2062 		fuse_release_nowrite(inode);
2063 
2064 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2065 
2066 	if (fault_blocked)
2067 		filemap_invalidate_unlock(mapping);
2068 	return err;
2069 }
2070 
2071 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2072 			struct iattr *attr)
2073 {
2074 	struct inode *inode = d_inode(entry);
2075 	struct fuse_conn *fc = get_fuse_conn(inode);
2076 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2077 	int ret;
2078 
2079 	if (fuse_is_bad(inode))
2080 		return -EIO;
2081 
2082 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2083 		return -EACCES;
2084 
2085 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2086 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2087 				    ATTR_MODE);
2088 
2089 		/*
2090 		 * The only sane way to reliably kill suid/sgid is to do it in
2091 		 * the userspace filesystem
2092 		 *
2093 		 * This should be done on write(), truncate() and chown().
2094 		 */
2095 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2096 			/*
2097 			 * ia_mode calculation may have used stale i_mode.
2098 			 * Refresh and recalculate.
2099 			 */
2100 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2101 			if (ret)
2102 				return ret;
2103 
2104 			attr->ia_mode = inode->i_mode;
2105 			if (inode->i_mode & S_ISUID) {
2106 				attr->ia_valid |= ATTR_MODE;
2107 				attr->ia_mode &= ~S_ISUID;
2108 			}
2109 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2110 				attr->ia_valid |= ATTR_MODE;
2111 				attr->ia_mode &= ~S_ISGID;
2112 			}
2113 		}
2114 	}
2115 	if (!attr->ia_valid)
2116 		return 0;
2117 
2118 	ret = fuse_do_setattr(idmap, entry, attr, file);
2119 	if (!ret) {
2120 		/*
2121 		 * If filesystem supports acls it may have updated acl xattrs in
2122 		 * the filesystem, so forget cached acls for the inode.
2123 		 */
2124 		if (fc->posix_acl)
2125 			forget_all_cached_acls(inode);
2126 
2127 		/* Directory mode changed, may need to revalidate access */
2128 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2129 			fuse_invalidate_entry_cache(entry);
2130 	}
2131 	return ret;
2132 }
2133 
2134 static int fuse_getattr(struct mnt_idmap *idmap,
2135 			const struct path *path, struct kstat *stat,
2136 			u32 request_mask, unsigned int flags)
2137 {
2138 	struct inode *inode = d_inode(path->dentry);
2139 	struct fuse_conn *fc = get_fuse_conn(inode);
2140 
2141 	if (fuse_is_bad(inode))
2142 		return -EIO;
2143 
2144 	if (!fuse_allow_current_process(fc)) {
2145 		if (!request_mask) {
2146 			/*
2147 			 * If user explicitly requested *nothing* then don't
2148 			 * error out, but return st_dev only.
2149 			 */
2150 			stat->result_mask = 0;
2151 			stat->dev = inode->i_sb->s_dev;
2152 			return 0;
2153 		}
2154 		return -EACCES;
2155 	}
2156 
2157 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2158 }
2159 
2160 static const struct inode_operations fuse_dir_inode_operations = {
2161 	.lookup		= fuse_lookup,
2162 	.mkdir		= fuse_mkdir,
2163 	.symlink	= fuse_symlink,
2164 	.unlink		= fuse_unlink,
2165 	.rmdir		= fuse_rmdir,
2166 	.rename		= fuse_rename2,
2167 	.link		= fuse_link,
2168 	.setattr	= fuse_setattr,
2169 	.create		= fuse_create,
2170 	.atomic_open	= fuse_atomic_open,
2171 	.tmpfile	= fuse_tmpfile,
2172 	.mknod		= fuse_mknod,
2173 	.permission	= fuse_permission,
2174 	.getattr	= fuse_getattr,
2175 	.listxattr	= fuse_listxattr,
2176 	.get_inode_acl	= fuse_get_inode_acl,
2177 	.get_acl	= fuse_get_acl,
2178 	.set_acl	= fuse_set_acl,
2179 	.fileattr_get	= fuse_fileattr_get,
2180 	.fileattr_set	= fuse_fileattr_set,
2181 };
2182 
2183 static const struct file_operations fuse_dir_operations = {
2184 	.llseek		= generic_file_llseek,
2185 	.read		= generic_read_dir,
2186 	.iterate_shared	= fuse_readdir,
2187 	.open		= fuse_dir_open,
2188 	.release	= fuse_dir_release,
2189 	.fsync		= fuse_dir_fsync,
2190 	.unlocked_ioctl	= fuse_dir_ioctl,
2191 	.compat_ioctl	= fuse_dir_compat_ioctl,
2192 };
2193 
2194 static const struct inode_operations fuse_common_inode_operations = {
2195 	.setattr	= fuse_setattr,
2196 	.permission	= fuse_permission,
2197 	.getattr	= fuse_getattr,
2198 	.listxattr	= fuse_listxattr,
2199 	.get_inode_acl	= fuse_get_inode_acl,
2200 	.get_acl	= fuse_get_acl,
2201 	.set_acl	= fuse_set_acl,
2202 	.fileattr_get	= fuse_fileattr_get,
2203 	.fileattr_set	= fuse_fileattr_set,
2204 };
2205 
2206 static const struct inode_operations fuse_symlink_inode_operations = {
2207 	.setattr	= fuse_setattr,
2208 	.get_link	= fuse_get_link,
2209 	.getattr	= fuse_getattr,
2210 	.listxattr	= fuse_listxattr,
2211 };
2212 
2213 void fuse_init_common(struct inode *inode)
2214 {
2215 	inode->i_op = &fuse_common_inode_operations;
2216 }
2217 
2218 void fuse_init_dir(struct inode *inode)
2219 {
2220 	struct fuse_inode *fi = get_fuse_inode(inode);
2221 
2222 	inode->i_op = &fuse_dir_inode_operations;
2223 	inode->i_fop = &fuse_dir_operations;
2224 
2225 	spin_lock_init(&fi->rdc.lock);
2226 	fi->rdc.cached = false;
2227 	fi->rdc.size = 0;
2228 	fi->rdc.pos = 0;
2229 	fi->rdc.version = 0;
2230 }
2231 
2232 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2233 {
2234 	int err = fuse_readlink_page(folio->mapping->host, &folio->page);
2235 
2236 	if (!err)
2237 		folio_mark_uptodate(folio);
2238 
2239 	folio_unlock(folio);
2240 
2241 	return err;
2242 }
2243 
2244 static const struct address_space_operations fuse_symlink_aops = {
2245 	.read_folio	= fuse_symlink_read_folio,
2246 };
2247 
2248 void fuse_init_symlink(struct inode *inode)
2249 {
2250 	inode->i_op = &fuse_symlink_inode_operations;
2251 	inode->i_data.a_ops = &fuse_symlink_aops;
2252 	inode_nohighmem(inode);
2253 }
2254