xref: /linux/fs/fuse/dir.c (revision dee264c16a6334dcdbea5c186f5ff35f98b1df42)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_inode *fi;
204 	int ret;
205 
206 	inode = d_inode_rcu(entry);
207 	if (inode && fuse_is_bad(inode))
208 		goto invalid;
209 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
210 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
211 		struct fuse_entry_out outarg;
212 		FUSE_ARGS(args);
213 		struct fuse_forget_link *forget;
214 		u64 attr_version;
215 
216 		/* For negative dentries, always do a fresh lookup */
217 		if (!inode)
218 			goto invalid;
219 
220 		ret = -ECHILD;
221 		if (flags & LOOKUP_RCU)
222 			goto out;
223 
224 		fm = get_fuse_mount(inode);
225 
226 		forget = fuse_alloc_forget();
227 		ret = -ENOMEM;
228 		if (!forget)
229 			goto out;
230 
231 		attr_version = fuse_get_attr_version(fm->fc);
232 
233 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
234 				 name, &outarg);
235 		ret = fuse_simple_request(fm, &args);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM || ret == -EINTR)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr, NULL,
260 				       ATTR_TIMEOUT(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			fuse_advise_use_readdirplus(dir);
270 		}
271 	}
272 	ret = 1;
273 out:
274 	return ret;
275 
276 invalid:
277 	ret = 0;
278 	goto out;
279 }
280 
281 #if BITS_PER_LONG < 64
282 static int fuse_dentry_init(struct dentry *dentry)
283 {
284 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
285 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
286 
287 	return dentry->d_fsdata ? 0 : -ENOMEM;
288 }
289 static void fuse_dentry_release(struct dentry *dentry)
290 {
291 	union fuse_dentry *fd = dentry->d_fsdata;
292 
293 	kfree_rcu(fd, rcu);
294 }
295 #endif
296 
297 static int fuse_dentry_delete(const struct dentry *dentry)
298 {
299 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
300 }
301 
302 /*
303  * Create a fuse_mount object with a new superblock (with path->dentry
304  * as the root), and return that mount so it can be auto-mounted on
305  * @path.
306  */
307 static struct vfsmount *fuse_dentry_automount(struct path *path)
308 {
309 	struct fs_context *fsc;
310 	struct vfsmount *mnt;
311 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
312 
313 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
314 	if (IS_ERR(fsc))
315 		return ERR_CAST(fsc);
316 
317 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
318 	fsc->fs_private = mp_fi;
319 
320 	/* Create the submount */
321 	mnt = fc_mount(fsc);
322 	put_fs_context(fsc);
323 	return mnt;
324 }
325 
326 const struct dentry_operations fuse_dentry_operations = {
327 	.d_revalidate	= fuse_dentry_revalidate,
328 	.d_delete	= fuse_dentry_delete,
329 #if BITS_PER_LONG < 64
330 	.d_init		= fuse_dentry_init,
331 	.d_release	= fuse_dentry_release,
332 #endif
333 	.d_automount	= fuse_dentry_automount,
334 };
335 
336 const struct dentry_operations fuse_root_dentry_operations = {
337 #if BITS_PER_LONG < 64
338 	.d_init		= fuse_dentry_init,
339 	.d_release	= fuse_dentry_release,
340 #endif
341 };
342 
343 int fuse_valid_type(int m)
344 {
345 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
346 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
347 }
348 
349 static bool fuse_valid_size(u64 size)
350 {
351 	return size <= LLONG_MAX;
352 }
353 
354 bool fuse_invalid_attr(struct fuse_attr *attr)
355 {
356 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
357 }
358 
359 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
360 		     struct fuse_entry_out *outarg, struct inode **inode)
361 {
362 	struct fuse_mount *fm = get_fuse_mount_super(sb);
363 	FUSE_ARGS(args);
364 	struct fuse_forget_link *forget;
365 	u64 attr_version, evict_ctr;
366 	int err;
367 
368 	*inode = NULL;
369 	err = -ENAMETOOLONG;
370 	if (name->len > fm->fc->name_max)
371 		goto out;
372 
373 
374 	forget = fuse_alloc_forget();
375 	err = -ENOMEM;
376 	if (!forget)
377 		goto out;
378 
379 	attr_version = fuse_get_attr_version(fm->fc);
380 	evict_ctr = fuse_get_evict_ctr(fm->fc);
381 
382 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
383 	err = fuse_simple_request(fm, &args);
384 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
385 	if (err || !outarg->nodeid)
386 		goto out_put_forget;
387 
388 	err = -EIO;
389 	if (fuse_invalid_attr(&outarg->attr))
390 		goto out_put_forget;
391 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
392 		pr_warn_once("root generation should be zero\n");
393 		outarg->generation = 0;
394 	}
395 
396 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
397 			   &outarg->attr, ATTR_TIMEOUT(outarg),
398 			   attr_version, evict_ctr);
399 	err = -ENOMEM;
400 	if (!*inode) {
401 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
402 		goto out;
403 	}
404 	err = 0;
405 
406  out_put_forget:
407 	kfree(forget);
408  out:
409 	return err;
410 }
411 
412 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
413 				  unsigned int flags)
414 {
415 	int err;
416 	struct fuse_entry_out outarg;
417 	struct inode *inode;
418 	struct dentry *newent;
419 	bool outarg_valid = true;
420 	bool locked;
421 
422 	if (fuse_is_bad(dir))
423 		return ERR_PTR(-EIO);
424 
425 	locked = fuse_lock_inode(dir);
426 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
427 			       &outarg, &inode);
428 	fuse_unlock_inode(dir, locked);
429 	if (err == -ENOENT) {
430 		outarg_valid = false;
431 		err = 0;
432 	}
433 	if (err)
434 		goto out_err;
435 
436 	err = -EIO;
437 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
438 		goto out_iput;
439 
440 	newent = d_splice_alias(inode, entry);
441 	err = PTR_ERR(newent);
442 	if (IS_ERR(newent))
443 		goto out_err;
444 
445 	entry = newent ? newent : entry;
446 	if (outarg_valid)
447 		fuse_change_entry_timeout(entry, &outarg);
448 	else
449 		fuse_invalidate_entry_cache(entry);
450 
451 	if (inode)
452 		fuse_advise_use_readdirplus(dir);
453 	return newent;
454 
455  out_iput:
456 	iput(inode);
457  out_err:
458 	return ERR_PTR(err);
459 }
460 
461 static int get_security_context(struct dentry *entry, umode_t mode,
462 				struct fuse_in_arg *ext)
463 {
464 	struct fuse_secctx *fctx;
465 	struct fuse_secctx_header *header;
466 	struct lsm_context lsmctx = { };
467 	void *ptr;
468 	u32 total_len = sizeof(*header);
469 	int err, nr_ctx = 0;
470 	const char *name = NULL;
471 	size_t namelen;
472 
473 	err = security_dentry_init_security(entry, mode, &entry->d_name,
474 					    &name, &lsmctx);
475 
476 	/* If no LSM is supporting this security hook ignore error */
477 	if (err && err != -EOPNOTSUPP)
478 		goto out_err;
479 
480 	if (lsmctx.len) {
481 		nr_ctx = 1;
482 		namelen = strlen(name) + 1;
483 		err = -EIO;
484 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
485 		    lsmctx.len > S32_MAX))
486 			goto out_err;
487 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
488 					    lsmctx.len);
489 	}
490 
491 	err = -ENOMEM;
492 	header = ptr = kzalloc(total_len, GFP_KERNEL);
493 	if (!ptr)
494 		goto out_err;
495 
496 	header->nr_secctx = nr_ctx;
497 	header->size = total_len;
498 	ptr += sizeof(*header);
499 	if (nr_ctx) {
500 		fctx = ptr;
501 		fctx->size = lsmctx.len;
502 		ptr += sizeof(*fctx);
503 
504 		strcpy(ptr, name);
505 		ptr += namelen;
506 
507 		memcpy(ptr, lsmctx.context, lsmctx.len);
508 	}
509 	ext->size = total_len;
510 	ext->value = header;
511 	err = 0;
512 out_err:
513 	if (nr_ctx)
514 		security_release_secctx(&lsmctx);
515 	return err;
516 }
517 
518 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
519 {
520 	void *p;
521 	u32 newlen = buf->size + bytes;
522 
523 	p = krealloc(buf->value, newlen, GFP_KERNEL);
524 	if (!p) {
525 		kfree(buf->value);
526 		buf->size = 0;
527 		buf->value = NULL;
528 		return NULL;
529 	}
530 
531 	memset(p + buf->size, 0, bytes);
532 	buf->value = p;
533 	buf->size = newlen;
534 
535 	return p + newlen - bytes;
536 }
537 
538 static u32 fuse_ext_size(size_t size)
539 {
540 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
541 }
542 
543 /*
544  * This adds just a single supplementary group that matches the parent's group.
545  */
546 static int get_create_supp_group(struct mnt_idmap *idmap,
547 				 struct inode *dir,
548 				 struct fuse_in_arg *ext)
549 {
550 	struct fuse_conn *fc = get_fuse_conn(dir);
551 	struct fuse_ext_header *xh;
552 	struct fuse_supp_groups *sg;
553 	kgid_t kgid = dir->i_gid;
554 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
555 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
556 
557 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
558 
559 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
560 	    !vfsgid_in_group_p(vfsgid))
561 		return 0;
562 
563 	xh = extend_arg(ext, sg_len);
564 	if (!xh)
565 		return -ENOMEM;
566 
567 	xh->size = sg_len;
568 	xh->type = FUSE_EXT_GROUPS;
569 
570 	sg = (struct fuse_supp_groups *) &xh[1];
571 	sg->nr_groups = 1;
572 	sg->groups[0] = parent_gid;
573 
574 	return 0;
575 }
576 
577 static int get_create_ext(struct mnt_idmap *idmap,
578 			  struct fuse_args *args,
579 			  struct inode *dir, struct dentry *dentry,
580 			  umode_t mode)
581 {
582 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
583 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
584 	int err = 0;
585 
586 	if (fc->init_security)
587 		err = get_security_context(dentry, mode, &ext);
588 	if (!err && fc->create_supp_group)
589 		err = get_create_supp_group(idmap, dir, &ext);
590 
591 	if (!err && ext.size) {
592 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
593 		args->is_ext = true;
594 		args->ext_idx = args->in_numargs++;
595 		args->in_args[args->ext_idx] = ext;
596 	} else {
597 		kfree(ext.value);
598 	}
599 
600 	return err;
601 }
602 
603 static void free_ext_value(struct fuse_args *args)
604 {
605 	if (args->is_ext)
606 		kfree(args->in_args[args->ext_idx].value);
607 }
608 
609 /*
610  * Atomic create+open operation
611  *
612  * If the filesystem doesn't support this, then fall back to separate
613  * 'mknod' + 'open' requests.
614  */
615 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
616 			    struct dentry *entry, struct file *file,
617 			    unsigned int flags, umode_t mode, u32 opcode)
618 {
619 	int err;
620 	struct inode *inode;
621 	struct fuse_mount *fm = get_fuse_mount(dir);
622 	FUSE_ARGS(args);
623 	struct fuse_forget_link *forget;
624 	struct fuse_create_in inarg;
625 	struct fuse_open_out *outopenp;
626 	struct fuse_entry_out outentry;
627 	struct fuse_inode *fi;
628 	struct fuse_file *ff;
629 	bool trunc = flags & O_TRUNC;
630 
631 	/* Userspace expects S_IFREG in create mode */
632 	BUG_ON((mode & S_IFMT) != S_IFREG);
633 
634 	forget = fuse_alloc_forget();
635 	err = -ENOMEM;
636 	if (!forget)
637 		goto out_err;
638 
639 	err = -ENOMEM;
640 	ff = fuse_file_alloc(fm, true);
641 	if (!ff)
642 		goto out_put_forget_req;
643 
644 	if (!fm->fc->dont_mask)
645 		mode &= ~current_umask();
646 
647 	flags &= ~O_NOCTTY;
648 	memset(&inarg, 0, sizeof(inarg));
649 	memset(&outentry, 0, sizeof(outentry));
650 	inarg.flags = flags;
651 	inarg.mode = mode;
652 	inarg.umask = current_umask();
653 
654 	if (fm->fc->handle_killpriv_v2 && trunc &&
655 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
656 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
657 	}
658 
659 	args.opcode = opcode;
660 	args.nodeid = get_node_id(dir);
661 	args.in_numargs = 2;
662 	args.in_args[0].size = sizeof(inarg);
663 	args.in_args[0].value = &inarg;
664 	args.in_args[1].size = entry->d_name.len + 1;
665 	args.in_args[1].value = entry->d_name.name;
666 	args.out_numargs = 2;
667 	args.out_args[0].size = sizeof(outentry);
668 	args.out_args[0].value = &outentry;
669 	/* Store outarg for fuse_finish_open() */
670 	outopenp = &ff->args->open_outarg;
671 	args.out_args[1].size = sizeof(*outopenp);
672 	args.out_args[1].value = outopenp;
673 
674 	err = get_create_ext(idmap, &args, dir, entry, mode);
675 	if (err)
676 		goto out_free_ff;
677 
678 	err = fuse_simple_idmap_request(idmap, fm, &args);
679 	free_ext_value(&args);
680 	if (err)
681 		goto out_free_ff;
682 
683 	err = -EIO;
684 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
685 	    fuse_invalid_attr(&outentry.attr))
686 		goto out_free_ff;
687 
688 	ff->fh = outopenp->fh;
689 	ff->nodeid = outentry.nodeid;
690 	ff->open_flags = outopenp->open_flags;
691 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
692 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
693 	if (!inode) {
694 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
695 		fuse_sync_release(NULL, ff, flags);
696 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
697 		err = -ENOMEM;
698 		goto out_err;
699 	}
700 	kfree(forget);
701 	d_instantiate(entry, inode);
702 	fuse_change_entry_timeout(entry, &outentry);
703 	fuse_dir_changed(dir);
704 	err = generic_file_open(inode, file);
705 	if (!err) {
706 		file->private_data = ff;
707 		err = finish_open(file, entry, fuse_finish_open);
708 	}
709 	if (err) {
710 		fi = get_fuse_inode(inode);
711 		fuse_sync_release(fi, ff, flags);
712 	} else {
713 		if (fm->fc->atomic_o_trunc && trunc)
714 			truncate_pagecache(inode, 0);
715 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
716 			invalidate_inode_pages2(inode->i_mapping);
717 	}
718 	return err;
719 
720 out_free_ff:
721 	fuse_file_free(ff);
722 out_put_forget_req:
723 	kfree(forget);
724 out_err:
725 	return err;
726 }
727 
728 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
729 		      umode_t, dev_t);
730 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
731 			    struct file *file, unsigned flags,
732 			    umode_t mode)
733 {
734 	int err;
735 	struct mnt_idmap *idmap = file_mnt_idmap(file);
736 	struct fuse_conn *fc = get_fuse_conn(dir);
737 	struct dentry *res = NULL;
738 
739 	if (fuse_is_bad(dir))
740 		return -EIO;
741 
742 	if (d_in_lookup(entry)) {
743 		res = fuse_lookup(dir, entry, 0);
744 		if (IS_ERR(res))
745 			return PTR_ERR(res);
746 
747 		if (res)
748 			entry = res;
749 	}
750 
751 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
752 		goto no_open;
753 
754 	/* Only creates */
755 	file->f_mode |= FMODE_CREATED;
756 
757 	if (fc->no_create)
758 		goto mknod;
759 
760 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
761 	if (err == -ENOSYS) {
762 		fc->no_create = 1;
763 		goto mknod;
764 	} else if (err == -EEXIST)
765 		fuse_invalidate_entry(entry);
766 out_dput:
767 	dput(res);
768 	return err;
769 
770 mknod:
771 	err = fuse_mknod(idmap, dir, entry, mode, 0);
772 	if (err)
773 		goto out_dput;
774 no_open:
775 	return finish_no_open(file, res);
776 }
777 
778 /*
779  * Code shared between mknod, mkdir, symlink and link
780  */
781 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
782 				       struct fuse_args *args, struct inode *dir,
783 				       struct dentry *entry, umode_t mode)
784 {
785 	struct fuse_entry_out outarg;
786 	struct inode *inode;
787 	struct dentry *d;
788 	int err;
789 	struct fuse_forget_link *forget;
790 
791 	if (fuse_is_bad(dir))
792 		return ERR_PTR(-EIO);
793 
794 	forget = fuse_alloc_forget();
795 	if (!forget)
796 		return ERR_PTR(-ENOMEM);
797 
798 	memset(&outarg, 0, sizeof(outarg));
799 	args->nodeid = get_node_id(dir);
800 	args->out_numargs = 1;
801 	args->out_args[0].size = sizeof(outarg);
802 	args->out_args[0].value = &outarg;
803 
804 	if (args->opcode != FUSE_LINK) {
805 		err = get_create_ext(idmap, args, dir, entry, mode);
806 		if (err)
807 			goto out_put_forget_req;
808 	}
809 
810 	err = fuse_simple_idmap_request(idmap, fm, args);
811 	free_ext_value(args);
812 	if (err)
813 		goto out_put_forget_req;
814 
815 	err = -EIO;
816 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
817 		goto out_put_forget_req;
818 
819 	if ((outarg.attr.mode ^ mode) & S_IFMT)
820 		goto out_put_forget_req;
821 
822 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
823 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
824 	if (!inode) {
825 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
826 		return ERR_PTR(-ENOMEM);
827 	}
828 	kfree(forget);
829 
830 	d_drop(entry);
831 	d = d_splice_alias(inode, entry);
832 	if (IS_ERR(d))
833 		return d;
834 
835 	if (d)
836 		fuse_change_entry_timeout(d, &outarg);
837 	else
838 		fuse_change_entry_timeout(entry, &outarg);
839 	fuse_dir_changed(dir);
840 	return d;
841 
842  out_put_forget_req:
843 	if (err == -EEXIST)
844 		fuse_invalidate_entry(entry);
845 	kfree(forget);
846 	return ERR_PTR(err);
847 }
848 
849 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
850 			     struct fuse_args *args, struct inode *dir,
851 			     struct dentry *entry, umode_t mode)
852 {
853 	/*
854 	 * Note that when creating anything other than a directory we
855 	 * can be sure create_new_entry() will NOT return an alternate
856 	 * dentry as d_splice_alias() only returns an alternate dentry
857 	 * for directories.  So we don't need to check for that case
858 	 * when passing back the result.
859 	 */
860 	WARN_ON_ONCE(S_ISDIR(mode));
861 
862 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
863 }
864 
865 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
866 		      struct dentry *entry, umode_t mode, dev_t rdev)
867 {
868 	struct fuse_mknod_in inarg;
869 	struct fuse_mount *fm = get_fuse_mount(dir);
870 	FUSE_ARGS(args);
871 
872 	if (!fm->fc->dont_mask)
873 		mode &= ~current_umask();
874 
875 	memset(&inarg, 0, sizeof(inarg));
876 	inarg.mode = mode;
877 	inarg.rdev = new_encode_dev(rdev);
878 	inarg.umask = current_umask();
879 	args.opcode = FUSE_MKNOD;
880 	args.in_numargs = 2;
881 	args.in_args[0].size = sizeof(inarg);
882 	args.in_args[0].value = &inarg;
883 	args.in_args[1].size = entry->d_name.len + 1;
884 	args.in_args[1].value = entry->d_name.name;
885 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
886 }
887 
888 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
889 		       struct dentry *entry, umode_t mode, bool excl)
890 {
891 	return fuse_mknod(idmap, dir, entry, mode, 0);
892 }
893 
894 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
895 			struct file *file, umode_t mode)
896 {
897 	struct fuse_conn *fc = get_fuse_conn(dir);
898 	int err;
899 
900 	if (fc->no_tmpfile)
901 		return -EOPNOTSUPP;
902 
903 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
904 			       file->f_flags, mode, FUSE_TMPFILE);
905 	if (err == -ENOSYS) {
906 		fc->no_tmpfile = 1;
907 		err = -EOPNOTSUPP;
908 	}
909 	return err;
910 }
911 
912 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
913 				 struct dentry *entry, umode_t mode)
914 {
915 	struct fuse_mkdir_in inarg;
916 	struct fuse_mount *fm = get_fuse_mount(dir);
917 	FUSE_ARGS(args);
918 
919 	if (!fm->fc->dont_mask)
920 		mode &= ~current_umask();
921 
922 	memset(&inarg, 0, sizeof(inarg));
923 	inarg.mode = mode;
924 	inarg.umask = current_umask();
925 	args.opcode = FUSE_MKDIR;
926 	args.in_numargs = 2;
927 	args.in_args[0].size = sizeof(inarg);
928 	args.in_args[0].value = &inarg;
929 	args.in_args[1].size = entry->d_name.len + 1;
930 	args.in_args[1].value = entry->d_name.name;
931 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
932 }
933 
934 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
935 			struct dentry *entry, const char *link)
936 {
937 	struct fuse_mount *fm = get_fuse_mount(dir);
938 	unsigned len = strlen(link) + 1;
939 	FUSE_ARGS(args);
940 
941 	args.opcode = FUSE_SYMLINK;
942 	args.in_numargs = 3;
943 	fuse_set_zero_arg0(&args);
944 	args.in_args[1].size = entry->d_name.len + 1;
945 	args.in_args[1].value = entry->d_name.name;
946 	args.in_args[2].size = len;
947 	args.in_args[2].value = link;
948 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
949 }
950 
951 void fuse_flush_time_update(struct inode *inode)
952 {
953 	int err = sync_inode_metadata(inode, 1);
954 
955 	mapping_set_error(inode->i_mapping, err);
956 }
957 
958 static void fuse_update_ctime_in_cache(struct inode *inode)
959 {
960 	if (!IS_NOCMTIME(inode)) {
961 		inode_set_ctime_current(inode);
962 		mark_inode_dirty_sync(inode);
963 		fuse_flush_time_update(inode);
964 	}
965 }
966 
967 void fuse_update_ctime(struct inode *inode)
968 {
969 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
970 	fuse_update_ctime_in_cache(inode);
971 }
972 
973 static void fuse_entry_unlinked(struct dentry *entry)
974 {
975 	struct inode *inode = d_inode(entry);
976 	struct fuse_conn *fc = get_fuse_conn(inode);
977 	struct fuse_inode *fi = get_fuse_inode(inode);
978 
979 	spin_lock(&fi->lock);
980 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
981 	/*
982 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
983 	 * happen if userspace filesystem is careless.  It would be
984 	 * difficult to enforce correct nlink usage so just ignore this
985 	 * condition here
986 	 */
987 	if (S_ISDIR(inode->i_mode))
988 		clear_nlink(inode);
989 	else if (inode->i_nlink > 0)
990 		drop_nlink(inode);
991 	spin_unlock(&fi->lock);
992 	fuse_invalidate_entry_cache(entry);
993 	fuse_update_ctime(inode);
994 }
995 
996 static int fuse_unlink(struct inode *dir, struct dentry *entry)
997 {
998 	int err;
999 	struct fuse_mount *fm = get_fuse_mount(dir);
1000 	FUSE_ARGS(args);
1001 
1002 	if (fuse_is_bad(dir))
1003 		return -EIO;
1004 
1005 	args.opcode = FUSE_UNLINK;
1006 	args.nodeid = get_node_id(dir);
1007 	args.in_numargs = 2;
1008 	fuse_set_zero_arg0(&args);
1009 	args.in_args[1].size = entry->d_name.len + 1;
1010 	args.in_args[1].value = entry->d_name.name;
1011 	err = fuse_simple_request(fm, &args);
1012 	if (!err) {
1013 		fuse_dir_changed(dir);
1014 		fuse_entry_unlinked(entry);
1015 	} else if (err == -EINTR || err == -ENOENT)
1016 		fuse_invalidate_entry(entry);
1017 	return err;
1018 }
1019 
1020 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1021 {
1022 	int err;
1023 	struct fuse_mount *fm = get_fuse_mount(dir);
1024 	FUSE_ARGS(args);
1025 
1026 	if (fuse_is_bad(dir))
1027 		return -EIO;
1028 
1029 	args.opcode = FUSE_RMDIR;
1030 	args.nodeid = get_node_id(dir);
1031 	args.in_numargs = 2;
1032 	fuse_set_zero_arg0(&args);
1033 	args.in_args[1].size = entry->d_name.len + 1;
1034 	args.in_args[1].value = entry->d_name.name;
1035 	err = fuse_simple_request(fm, &args);
1036 	if (!err) {
1037 		fuse_dir_changed(dir);
1038 		fuse_entry_unlinked(entry);
1039 	} else if (err == -EINTR || err == -ENOENT)
1040 		fuse_invalidate_entry(entry);
1041 	return err;
1042 }
1043 
1044 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1045 			      struct inode *newdir, struct dentry *newent,
1046 			      unsigned int flags, int opcode, size_t argsize)
1047 {
1048 	int err;
1049 	struct fuse_rename2_in inarg;
1050 	struct fuse_mount *fm = get_fuse_mount(olddir);
1051 	FUSE_ARGS(args);
1052 
1053 	memset(&inarg, 0, argsize);
1054 	inarg.newdir = get_node_id(newdir);
1055 	inarg.flags = flags;
1056 	args.opcode = opcode;
1057 	args.nodeid = get_node_id(olddir);
1058 	args.in_numargs = 3;
1059 	args.in_args[0].size = argsize;
1060 	args.in_args[0].value = &inarg;
1061 	args.in_args[1].size = oldent->d_name.len + 1;
1062 	args.in_args[1].value = oldent->d_name.name;
1063 	args.in_args[2].size = newent->d_name.len + 1;
1064 	args.in_args[2].value = newent->d_name.name;
1065 	err = fuse_simple_idmap_request(idmap, fm, &args);
1066 	if (!err) {
1067 		/* ctime changes */
1068 		fuse_update_ctime(d_inode(oldent));
1069 
1070 		if (flags & RENAME_EXCHANGE)
1071 			fuse_update_ctime(d_inode(newent));
1072 
1073 		fuse_dir_changed(olddir);
1074 		if (olddir != newdir)
1075 			fuse_dir_changed(newdir);
1076 
1077 		/* newent will end up negative */
1078 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1079 			fuse_entry_unlinked(newent);
1080 	} else if (err == -EINTR || err == -ENOENT) {
1081 		/* If request was interrupted, DEITY only knows if the
1082 		   rename actually took place.  If the invalidation
1083 		   fails (e.g. some process has CWD under the renamed
1084 		   directory), then there can be inconsistency between
1085 		   the dcache and the real filesystem.  Tough luck. */
1086 		fuse_invalidate_entry(oldent);
1087 		if (d_really_is_positive(newent))
1088 			fuse_invalidate_entry(newent);
1089 	}
1090 
1091 	return err;
1092 }
1093 
1094 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1095 			struct dentry *oldent, struct inode *newdir,
1096 			struct dentry *newent, unsigned int flags)
1097 {
1098 	struct fuse_conn *fc = get_fuse_conn(olddir);
1099 	int err;
1100 
1101 	if (fuse_is_bad(olddir))
1102 		return -EIO;
1103 
1104 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1105 		return -EINVAL;
1106 
1107 	if (flags) {
1108 		if (fc->no_rename2 || fc->minor < 23)
1109 			return -EINVAL;
1110 
1111 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1112 					 olddir, oldent, newdir, newent, flags,
1113 					 FUSE_RENAME2,
1114 					 sizeof(struct fuse_rename2_in));
1115 		if (err == -ENOSYS) {
1116 			fc->no_rename2 = 1;
1117 			err = -EINVAL;
1118 		}
1119 	} else {
1120 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1121 					 FUSE_RENAME,
1122 					 sizeof(struct fuse_rename_in));
1123 	}
1124 
1125 	return err;
1126 }
1127 
1128 static int fuse_link(struct dentry *entry, struct inode *newdir,
1129 		     struct dentry *newent)
1130 {
1131 	int err;
1132 	struct fuse_link_in inarg;
1133 	struct inode *inode = d_inode(entry);
1134 	struct fuse_mount *fm = get_fuse_mount(inode);
1135 	FUSE_ARGS(args);
1136 
1137 	if (fm->fc->no_link)
1138 		goto out;
1139 
1140 	memset(&inarg, 0, sizeof(inarg));
1141 	inarg.oldnodeid = get_node_id(inode);
1142 	args.opcode = FUSE_LINK;
1143 	args.in_numargs = 2;
1144 	args.in_args[0].size = sizeof(inarg);
1145 	args.in_args[0].value = &inarg;
1146 	args.in_args[1].size = newent->d_name.len + 1;
1147 	args.in_args[1].value = newent->d_name.name;
1148 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1149 	if (!err)
1150 		fuse_update_ctime_in_cache(inode);
1151 	else if (err == -EINTR)
1152 		fuse_invalidate_attr(inode);
1153 
1154 	if (err == -ENOSYS)
1155 		fm->fc->no_link = 1;
1156 out:
1157 	if (fm->fc->no_link)
1158 		return -EPERM;
1159 
1160 	return err;
1161 }
1162 
1163 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1164 			  struct fuse_attr *attr, struct kstat *stat)
1165 {
1166 	unsigned int blkbits;
1167 	struct fuse_conn *fc = get_fuse_conn(inode);
1168 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1169 				      make_kuid(fc->user_ns, attr->uid));
1170 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1171 				      make_kgid(fc->user_ns, attr->gid));
1172 
1173 	stat->dev = inode->i_sb->s_dev;
1174 	stat->ino = attr->ino;
1175 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1176 	stat->nlink = attr->nlink;
1177 	stat->uid = vfsuid_into_kuid(vfsuid);
1178 	stat->gid = vfsgid_into_kgid(vfsgid);
1179 	stat->rdev = inode->i_rdev;
1180 	stat->atime.tv_sec = attr->atime;
1181 	stat->atime.tv_nsec = attr->atimensec;
1182 	stat->mtime.tv_sec = attr->mtime;
1183 	stat->mtime.tv_nsec = attr->mtimensec;
1184 	stat->ctime.tv_sec = attr->ctime;
1185 	stat->ctime.tv_nsec = attr->ctimensec;
1186 	stat->size = attr->size;
1187 	stat->blocks = attr->blocks;
1188 
1189 	if (attr->blksize != 0)
1190 		blkbits = ilog2(attr->blksize);
1191 	else
1192 		blkbits = inode->i_sb->s_blocksize_bits;
1193 
1194 	stat->blksize = 1 << blkbits;
1195 }
1196 
1197 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1198 {
1199 	memset(attr, 0, sizeof(*attr));
1200 	attr->ino = sx->ino;
1201 	attr->size = sx->size;
1202 	attr->blocks = sx->blocks;
1203 	attr->atime = sx->atime.tv_sec;
1204 	attr->mtime = sx->mtime.tv_sec;
1205 	attr->ctime = sx->ctime.tv_sec;
1206 	attr->atimensec = sx->atime.tv_nsec;
1207 	attr->mtimensec = sx->mtime.tv_nsec;
1208 	attr->ctimensec = sx->ctime.tv_nsec;
1209 	attr->mode = sx->mode;
1210 	attr->nlink = sx->nlink;
1211 	attr->uid = sx->uid;
1212 	attr->gid = sx->gid;
1213 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1214 	attr->blksize = sx->blksize;
1215 }
1216 
1217 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1218 			 struct file *file, struct kstat *stat)
1219 {
1220 	int err;
1221 	struct fuse_attr attr;
1222 	struct fuse_statx *sx;
1223 	struct fuse_statx_in inarg;
1224 	struct fuse_statx_out outarg;
1225 	struct fuse_mount *fm = get_fuse_mount(inode);
1226 	u64 attr_version = fuse_get_attr_version(fm->fc);
1227 	FUSE_ARGS(args);
1228 
1229 	memset(&inarg, 0, sizeof(inarg));
1230 	memset(&outarg, 0, sizeof(outarg));
1231 	/* Directories have separate file-handle space */
1232 	if (file && S_ISREG(inode->i_mode)) {
1233 		struct fuse_file *ff = file->private_data;
1234 
1235 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1236 		inarg.fh = ff->fh;
1237 	}
1238 	/* For now leave sync hints as the default, request all stats. */
1239 	inarg.sx_flags = 0;
1240 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1241 	args.opcode = FUSE_STATX;
1242 	args.nodeid = get_node_id(inode);
1243 	args.in_numargs = 1;
1244 	args.in_args[0].size = sizeof(inarg);
1245 	args.in_args[0].value = &inarg;
1246 	args.out_numargs = 1;
1247 	args.out_args[0].size = sizeof(outarg);
1248 	args.out_args[0].value = &outarg;
1249 	err = fuse_simple_request(fm, &args);
1250 	if (err)
1251 		return err;
1252 
1253 	sx = &outarg.stat;
1254 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1255 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1256 					 inode_wrong_type(inode, sx->mode)))) {
1257 		fuse_make_bad(inode);
1258 		return -EIO;
1259 	}
1260 
1261 	fuse_statx_to_attr(&outarg.stat, &attr);
1262 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1263 		fuse_change_attributes(inode, &attr, &outarg.stat,
1264 				       ATTR_TIMEOUT(&outarg), attr_version);
1265 	}
1266 
1267 	if (stat) {
1268 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1269 		stat->btime.tv_sec = sx->btime.tv_sec;
1270 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1271 		fuse_fillattr(idmap, inode, &attr, stat);
1272 		stat->result_mask |= STATX_TYPE;
1273 	}
1274 
1275 	return 0;
1276 }
1277 
1278 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1279 			   struct kstat *stat, struct file *file)
1280 {
1281 	int err;
1282 	struct fuse_getattr_in inarg;
1283 	struct fuse_attr_out outarg;
1284 	struct fuse_mount *fm = get_fuse_mount(inode);
1285 	FUSE_ARGS(args);
1286 	u64 attr_version;
1287 
1288 	attr_version = fuse_get_attr_version(fm->fc);
1289 
1290 	memset(&inarg, 0, sizeof(inarg));
1291 	memset(&outarg, 0, sizeof(outarg));
1292 	/* Directories have separate file-handle space */
1293 	if (file && S_ISREG(inode->i_mode)) {
1294 		struct fuse_file *ff = file->private_data;
1295 
1296 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1297 		inarg.fh = ff->fh;
1298 	}
1299 	args.opcode = FUSE_GETATTR;
1300 	args.nodeid = get_node_id(inode);
1301 	args.in_numargs = 1;
1302 	args.in_args[0].size = sizeof(inarg);
1303 	args.in_args[0].value = &inarg;
1304 	args.out_numargs = 1;
1305 	args.out_args[0].size = sizeof(outarg);
1306 	args.out_args[0].value = &outarg;
1307 	err = fuse_simple_request(fm, &args);
1308 	if (!err) {
1309 		if (fuse_invalid_attr(&outarg.attr) ||
1310 		    inode_wrong_type(inode, outarg.attr.mode)) {
1311 			fuse_make_bad(inode);
1312 			err = -EIO;
1313 		} else {
1314 			fuse_change_attributes(inode, &outarg.attr, NULL,
1315 					       ATTR_TIMEOUT(&outarg),
1316 					       attr_version);
1317 			if (stat)
1318 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1319 		}
1320 	}
1321 	return err;
1322 }
1323 
1324 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1325 				struct file *file, struct kstat *stat,
1326 				u32 request_mask, unsigned int flags)
1327 {
1328 	struct fuse_inode *fi = get_fuse_inode(inode);
1329 	struct fuse_conn *fc = get_fuse_conn(inode);
1330 	int err = 0;
1331 	bool sync;
1332 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1333 	u32 cache_mask = fuse_get_cache_mask(inode);
1334 
1335 
1336 	/* FUSE only supports basic stats and possibly btime */
1337 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1338 retry:
1339 	if (fc->no_statx)
1340 		request_mask &= STATX_BASIC_STATS;
1341 
1342 	if (!request_mask)
1343 		sync = false;
1344 	else if (flags & AT_STATX_FORCE_SYNC)
1345 		sync = true;
1346 	else if (flags & AT_STATX_DONT_SYNC)
1347 		sync = false;
1348 	else if (request_mask & inval_mask & ~cache_mask)
1349 		sync = true;
1350 	else
1351 		sync = time_before64(fi->i_time, get_jiffies_64());
1352 
1353 	if (sync) {
1354 		forget_all_cached_acls(inode);
1355 		/* Try statx if BTIME is requested */
1356 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1357 			err = fuse_do_statx(idmap, inode, file, stat);
1358 			if (err == -ENOSYS) {
1359 				fc->no_statx = 1;
1360 				err = 0;
1361 				goto retry;
1362 			}
1363 		} else {
1364 			err = fuse_do_getattr(idmap, inode, stat, file);
1365 		}
1366 	} else if (stat) {
1367 		generic_fillattr(idmap, request_mask, inode, stat);
1368 		stat->mode = fi->orig_i_mode;
1369 		stat->ino = fi->orig_ino;
1370 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1371 			stat->btime = fi->i_btime;
1372 			stat->result_mask |= STATX_BTIME;
1373 		}
1374 	}
1375 
1376 	return err;
1377 }
1378 
1379 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1380 {
1381 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1382 }
1383 
1384 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1385 			     u64 child_nodeid, struct qstr *name, u32 flags)
1386 {
1387 	int err = -ENOTDIR;
1388 	struct inode *parent;
1389 	struct dentry *dir;
1390 	struct dentry *entry;
1391 
1392 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1393 	if (!parent)
1394 		return -ENOENT;
1395 
1396 	inode_lock_nested(parent, I_MUTEX_PARENT);
1397 	if (!S_ISDIR(parent->i_mode))
1398 		goto unlock;
1399 
1400 	err = -ENOENT;
1401 	dir = d_find_alias(parent);
1402 	if (!dir)
1403 		goto unlock;
1404 
1405 	name->hash = full_name_hash(dir, name->name, name->len);
1406 	entry = d_lookup(dir, name);
1407 	dput(dir);
1408 	if (!entry)
1409 		goto unlock;
1410 
1411 	fuse_dir_changed(parent);
1412 	if (!(flags & FUSE_EXPIRE_ONLY))
1413 		d_invalidate(entry);
1414 	fuse_invalidate_entry_cache(entry);
1415 
1416 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1417 		inode_lock(d_inode(entry));
1418 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1419 			err = -ENOENT;
1420 			goto badentry;
1421 		}
1422 		if (d_mountpoint(entry)) {
1423 			err = -EBUSY;
1424 			goto badentry;
1425 		}
1426 		if (d_is_dir(entry)) {
1427 			shrink_dcache_parent(entry);
1428 			if (!simple_empty(entry)) {
1429 				err = -ENOTEMPTY;
1430 				goto badentry;
1431 			}
1432 			d_inode(entry)->i_flags |= S_DEAD;
1433 		}
1434 		dont_mount(entry);
1435 		clear_nlink(d_inode(entry));
1436 		err = 0;
1437  badentry:
1438 		inode_unlock(d_inode(entry));
1439 		if (!err)
1440 			d_delete(entry);
1441 	} else {
1442 		err = 0;
1443 	}
1444 	dput(entry);
1445 
1446  unlock:
1447 	inode_unlock(parent);
1448 	iput(parent);
1449 	return err;
1450 }
1451 
1452 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1453 {
1454 	const struct cred *cred = current_cred();
1455 
1456 	return (uid_eq(cred->euid, fc->user_id) &&
1457 		uid_eq(cred->suid, fc->user_id) &&
1458 		uid_eq(cred->uid,  fc->user_id) &&
1459 		gid_eq(cred->egid, fc->group_id) &&
1460 		gid_eq(cred->sgid, fc->group_id) &&
1461 		gid_eq(cred->gid,  fc->group_id));
1462 }
1463 
1464 /*
1465  * Calling into a user-controlled filesystem gives the filesystem
1466  * daemon ptrace-like capabilities over the current process.  This
1467  * means, that the filesystem daemon is able to record the exact
1468  * filesystem operations performed, and can also control the behavior
1469  * of the requester process in otherwise impossible ways.  For example
1470  * it can delay the operation for arbitrary length of time allowing
1471  * DoS against the requester.
1472  *
1473  * For this reason only those processes can call into the filesystem,
1474  * for which the owner of the mount has ptrace privilege.  This
1475  * excludes processes started by other users, suid or sgid processes.
1476  */
1477 bool fuse_allow_current_process(struct fuse_conn *fc)
1478 {
1479 	bool allow;
1480 
1481 	if (fc->allow_other)
1482 		allow = current_in_userns(fc->user_ns);
1483 	else
1484 		allow = fuse_permissible_uidgid(fc);
1485 
1486 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1487 		allow = true;
1488 
1489 	return allow;
1490 }
1491 
1492 static int fuse_access(struct inode *inode, int mask)
1493 {
1494 	struct fuse_mount *fm = get_fuse_mount(inode);
1495 	FUSE_ARGS(args);
1496 	struct fuse_access_in inarg;
1497 	int err;
1498 
1499 	BUG_ON(mask & MAY_NOT_BLOCK);
1500 
1501 	/*
1502 	 * We should not send FUSE_ACCESS to the userspace
1503 	 * when idmapped mounts are enabled as for this case
1504 	 * we have fc->default_permissions = 1 and access
1505 	 * permission checks are done on the kernel side.
1506 	 */
1507 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1508 
1509 	if (fm->fc->no_access)
1510 		return 0;
1511 
1512 	memset(&inarg, 0, sizeof(inarg));
1513 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1514 	args.opcode = FUSE_ACCESS;
1515 	args.nodeid = get_node_id(inode);
1516 	args.in_numargs = 1;
1517 	args.in_args[0].size = sizeof(inarg);
1518 	args.in_args[0].value = &inarg;
1519 	err = fuse_simple_request(fm, &args);
1520 	if (err == -ENOSYS) {
1521 		fm->fc->no_access = 1;
1522 		err = 0;
1523 	}
1524 	return err;
1525 }
1526 
1527 static int fuse_perm_getattr(struct inode *inode, int mask)
1528 {
1529 	if (mask & MAY_NOT_BLOCK)
1530 		return -ECHILD;
1531 
1532 	forget_all_cached_acls(inode);
1533 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1534 }
1535 
1536 /*
1537  * Check permission.  The two basic access models of FUSE are:
1538  *
1539  * 1) Local access checking ('default_permissions' mount option) based
1540  * on file mode.  This is the plain old disk filesystem permission
1541  * model.
1542  *
1543  * 2) "Remote" access checking, where server is responsible for
1544  * checking permission in each inode operation.  An exception to this
1545  * is if ->permission() was invoked from sys_access() in which case an
1546  * access request is sent.  Execute permission is still checked
1547  * locally based on file mode.
1548  */
1549 static int fuse_permission(struct mnt_idmap *idmap,
1550 			   struct inode *inode, int mask)
1551 {
1552 	struct fuse_conn *fc = get_fuse_conn(inode);
1553 	bool refreshed = false;
1554 	int err = 0;
1555 
1556 	if (fuse_is_bad(inode))
1557 		return -EIO;
1558 
1559 	if (!fuse_allow_current_process(fc))
1560 		return -EACCES;
1561 
1562 	/*
1563 	 * If attributes are needed, refresh them before proceeding
1564 	 */
1565 	if (fc->default_permissions ||
1566 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1567 		struct fuse_inode *fi = get_fuse_inode(inode);
1568 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1569 
1570 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1571 		    time_before64(fi->i_time, get_jiffies_64())) {
1572 			refreshed = true;
1573 
1574 			err = fuse_perm_getattr(inode, mask);
1575 			if (err)
1576 				return err;
1577 		}
1578 	}
1579 
1580 	if (fc->default_permissions) {
1581 		err = generic_permission(idmap, inode, mask);
1582 
1583 		/* If permission is denied, try to refresh file
1584 		   attributes.  This is also needed, because the root
1585 		   node will at first have no permissions */
1586 		if (err == -EACCES && !refreshed) {
1587 			err = fuse_perm_getattr(inode, mask);
1588 			if (!err)
1589 				err = generic_permission(idmap,
1590 							 inode, mask);
1591 		}
1592 
1593 		/* Note: the opposite of the above test does not
1594 		   exist.  So if permissions are revoked this won't be
1595 		   noticed immediately, only after the attribute
1596 		   timeout has expired */
1597 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1598 		err = fuse_access(inode, mask);
1599 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1600 		if (!(inode->i_mode & S_IXUGO)) {
1601 			if (refreshed)
1602 				return -EACCES;
1603 
1604 			err = fuse_perm_getattr(inode, mask);
1605 			if (!err && !(inode->i_mode & S_IXUGO))
1606 				return -EACCES;
1607 		}
1608 	}
1609 	return err;
1610 }
1611 
1612 static int fuse_readlink_page(struct inode *inode, struct folio *folio)
1613 {
1614 	struct fuse_mount *fm = get_fuse_mount(inode);
1615 	struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
1616 	struct fuse_args_pages ap = {
1617 		.num_folios = 1,
1618 		.folios = &folio,
1619 		.descs = &desc,
1620 	};
1621 	char *link;
1622 	ssize_t res;
1623 
1624 	ap.args.opcode = FUSE_READLINK;
1625 	ap.args.nodeid = get_node_id(inode);
1626 	ap.args.out_pages = true;
1627 	ap.args.out_argvar = true;
1628 	ap.args.page_zeroing = true;
1629 	ap.args.out_numargs = 1;
1630 	ap.args.out_args[0].size = desc.length;
1631 	res = fuse_simple_request(fm, &ap.args);
1632 
1633 	fuse_invalidate_atime(inode);
1634 
1635 	if (res < 0)
1636 		return res;
1637 
1638 	if (WARN_ON(res >= PAGE_SIZE))
1639 		return -EIO;
1640 
1641 	link = folio_address(folio);
1642 	link[res] = '\0';
1643 
1644 	return 0;
1645 }
1646 
1647 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1648 				 struct delayed_call *callback)
1649 {
1650 	struct fuse_conn *fc = get_fuse_conn(inode);
1651 	struct folio *folio;
1652 	int err;
1653 
1654 	err = -EIO;
1655 	if (fuse_is_bad(inode))
1656 		goto out_err;
1657 
1658 	if (fc->cache_symlinks)
1659 		return page_get_link_raw(dentry, inode, callback);
1660 
1661 	err = -ECHILD;
1662 	if (!dentry)
1663 		goto out_err;
1664 
1665 	folio = folio_alloc(GFP_KERNEL, 0);
1666 	err = -ENOMEM;
1667 	if (!folio)
1668 		goto out_err;
1669 
1670 	err = fuse_readlink_page(inode, folio);
1671 	if (err) {
1672 		folio_put(folio);
1673 		goto out_err;
1674 	}
1675 
1676 	set_delayed_call(callback, page_put_link, folio);
1677 
1678 	return folio_address(folio);
1679 
1680 out_err:
1681 	return ERR_PTR(err);
1682 }
1683 
1684 static int fuse_dir_open(struct inode *inode, struct file *file)
1685 {
1686 	struct fuse_mount *fm = get_fuse_mount(inode);
1687 	int err;
1688 
1689 	if (fuse_is_bad(inode))
1690 		return -EIO;
1691 
1692 	err = generic_file_open(inode, file);
1693 	if (err)
1694 		return err;
1695 
1696 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1697 	if (!err) {
1698 		struct fuse_file *ff = file->private_data;
1699 
1700 		/*
1701 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1702 		 * directories for backward compatibility, though it's unlikely
1703 		 * to be useful.
1704 		 */
1705 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1706 			nonseekable_open(inode, file);
1707 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1708 			invalidate_inode_pages2(inode->i_mapping);
1709 	}
1710 
1711 	return err;
1712 }
1713 
1714 static int fuse_dir_release(struct inode *inode, struct file *file)
1715 {
1716 	fuse_release_common(file, true);
1717 
1718 	return 0;
1719 }
1720 
1721 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1722 			  int datasync)
1723 {
1724 	struct inode *inode = file->f_mapping->host;
1725 	struct fuse_conn *fc = get_fuse_conn(inode);
1726 	int err;
1727 
1728 	if (fuse_is_bad(inode))
1729 		return -EIO;
1730 
1731 	if (fc->no_fsyncdir)
1732 		return 0;
1733 
1734 	inode_lock(inode);
1735 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1736 	if (err == -ENOSYS) {
1737 		fc->no_fsyncdir = 1;
1738 		err = 0;
1739 	}
1740 	inode_unlock(inode);
1741 
1742 	return err;
1743 }
1744 
1745 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1746 			    unsigned long arg)
1747 {
1748 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1749 
1750 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1751 	if (fc->minor < 18)
1752 		return -ENOTTY;
1753 
1754 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1755 }
1756 
1757 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1758 				   unsigned long arg)
1759 {
1760 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1761 
1762 	if (fc->minor < 18)
1763 		return -ENOTTY;
1764 
1765 	return fuse_ioctl_common(file, cmd, arg,
1766 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1767 }
1768 
1769 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1770 {
1771 	/* Always update if mtime is explicitly set  */
1772 	if (ivalid & ATTR_MTIME_SET)
1773 		return true;
1774 
1775 	/* Or if kernel i_mtime is the official one */
1776 	if (trust_local_mtime)
1777 		return true;
1778 
1779 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1780 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1781 		return false;
1782 
1783 	/* In all other cases update */
1784 	return true;
1785 }
1786 
1787 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1788 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1789 			   bool trust_local_cmtime)
1790 {
1791 	unsigned ivalid = iattr->ia_valid;
1792 
1793 	if (ivalid & ATTR_MODE)
1794 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1795 
1796 	if (ivalid & ATTR_UID) {
1797 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1798 
1799 		arg->valid |= FATTR_UID;
1800 		arg->uid = from_kuid(fc->user_ns, fsuid);
1801 	}
1802 
1803 	if (ivalid & ATTR_GID) {
1804 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1805 
1806 		arg->valid |= FATTR_GID;
1807 		arg->gid = from_kgid(fc->user_ns, fsgid);
1808 	}
1809 
1810 	if (ivalid & ATTR_SIZE)
1811 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1812 	if (ivalid & ATTR_ATIME) {
1813 		arg->valid |= FATTR_ATIME;
1814 		arg->atime = iattr->ia_atime.tv_sec;
1815 		arg->atimensec = iattr->ia_atime.tv_nsec;
1816 		if (!(ivalid & ATTR_ATIME_SET))
1817 			arg->valid |= FATTR_ATIME_NOW;
1818 	}
1819 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1820 		arg->valid |= FATTR_MTIME;
1821 		arg->mtime = iattr->ia_mtime.tv_sec;
1822 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1823 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1824 			arg->valid |= FATTR_MTIME_NOW;
1825 	}
1826 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1827 		arg->valid |= FATTR_CTIME;
1828 		arg->ctime = iattr->ia_ctime.tv_sec;
1829 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1830 	}
1831 }
1832 
1833 /*
1834  * Prevent concurrent writepages on inode
1835  *
1836  * This is done by adding a negative bias to the inode write counter
1837  * and waiting for all pending writes to finish.
1838  */
1839 void fuse_set_nowrite(struct inode *inode)
1840 {
1841 	struct fuse_inode *fi = get_fuse_inode(inode);
1842 
1843 	BUG_ON(!inode_is_locked(inode));
1844 
1845 	spin_lock(&fi->lock);
1846 	BUG_ON(fi->writectr < 0);
1847 	fi->writectr += FUSE_NOWRITE;
1848 	spin_unlock(&fi->lock);
1849 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1850 }
1851 
1852 /*
1853  * Allow writepages on inode
1854  *
1855  * Remove the bias from the writecounter and send any queued
1856  * writepages.
1857  */
1858 static void __fuse_release_nowrite(struct inode *inode)
1859 {
1860 	struct fuse_inode *fi = get_fuse_inode(inode);
1861 
1862 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1863 	fi->writectr = 0;
1864 	fuse_flush_writepages(inode);
1865 }
1866 
1867 void fuse_release_nowrite(struct inode *inode)
1868 {
1869 	struct fuse_inode *fi = get_fuse_inode(inode);
1870 
1871 	spin_lock(&fi->lock);
1872 	__fuse_release_nowrite(inode);
1873 	spin_unlock(&fi->lock);
1874 }
1875 
1876 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1877 			      struct inode *inode,
1878 			      struct fuse_setattr_in *inarg_p,
1879 			      struct fuse_attr_out *outarg_p)
1880 {
1881 	args->opcode = FUSE_SETATTR;
1882 	args->nodeid = get_node_id(inode);
1883 	args->in_numargs = 1;
1884 	args->in_args[0].size = sizeof(*inarg_p);
1885 	args->in_args[0].value = inarg_p;
1886 	args->out_numargs = 1;
1887 	args->out_args[0].size = sizeof(*outarg_p);
1888 	args->out_args[0].value = outarg_p;
1889 }
1890 
1891 /*
1892  * Flush inode->i_mtime to the server
1893  */
1894 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1895 {
1896 	struct fuse_mount *fm = get_fuse_mount(inode);
1897 	FUSE_ARGS(args);
1898 	struct fuse_setattr_in inarg;
1899 	struct fuse_attr_out outarg;
1900 
1901 	memset(&inarg, 0, sizeof(inarg));
1902 	memset(&outarg, 0, sizeof(outarg));
1903 
1904 	inarg.valid = FATTR_MTIME;
1905 	inarg.mtime = inode_get_mtime_sec(inode);
1906 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1907 	if (fm->fc->minor >= 23) {
1908 		inarg.valid |= FATTR_CTIME;
1909 		inarg.ctime = inode_get_ctime_sec(inode);
1910 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1911 	}
1912 	if (ff) {
1913 		inarg.valid |= FATTR_FH;
1914 		inarg.fh = ff->fh;
1915 	}
1916 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1917 
1918 	return fuse_simple_request(fm, &args);
1919 }
1920 
1921 /*
1922  * Set attributes, and at the same time refresh them.
1923  *
1924  * Truncation is slightly complicated, because the 'truncate' request
1925  * may fail, in which case we don't want to touch the mapping.
1926  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1927  * and the actual truncation by hand.
1928  */
1929 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1930 		    struct iattr *attr, struct file *file)
1931 {
1932 	struct inode *inode = d_inode(dentry);
1933 	struct fuse_mount *fm = get_fuse_mount(inode);
1934 	struct fuse_conn *fc = fm->fc;
1935 	struct fuse_inode *fi = get_fuse_inode(inode);
1936 	struct address_space *mapping = inode->i_mapping;
1937 	FUSE_ARGS(args);
1938 	struct fuse_setattr_in inarg;
1939 	struct fuse_attr_out outarg;
1940 	bool is_truncate = false;
1941 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1942 	loff_t oldsize;
1943 	int err;
1944 	bool trust_local_cmtime = is_wb;
1945 	bool fault_blocked = false;
1946 
1947 	if (!fc->default_permissions)
1948 		attr->ia_valid |= ATTR_FORCE;
1949 
1950 	err = setattr_prepare(idmap, dentry, attr);
1951 	if (err)
1952 		return err;
1953 
1954 	if (attr->ia_valid & ATTR_SIZE) {
1955 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1956 			return -EIO;
1957 		is_truncate = true;
1958 	}
1959 
1960 	if (FUSE_IS_DAX(inode) && is_truncate) {
1961 		filemap_invalidate_lock(mapping);
1962 		fault_blocked = true;
1963 		err = fuse_dax_break_layouts(inode, 0, -1);
1964 		if (err) {
1965 			filemap_invalidate_unlock(mapping);
1966 			return err;
1967 		}
1968 	}
1969 
1970 	if (attr->ia_valid & ATTR_OPEN) {
1971 		/* This is coming from open(..., ... | O_TRUNC); */
1972 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1973 		WARN_ON(attr->ia_size != 0);
1974 		if (fc->atomic_o_trunc) {
1975 			/*
1976 			 * No need to send request to userspace, since actual
1977 			 * truncation has already been done by OPEN.  But still
1978 			 * need to truncate page cache.
1979 			 */
1980 			i_size_write(inode, 0);
1981 			truncate_pagecache(inode, 0);
1982 			goto out;
1983 		}
1984 		file = NULL;
1985 	}
1986 
1987 	/* Flush dirty data/metadata before non-truncate SETATTR */
1988 	if (is_wb &&
1989 	    attr->ia_valid &
1990 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1991 			 ATTR_TIMES_SET)) {
1992 		err = write_inode_now(inode, true);
1993 		if (err)
1994 			return err;
1995 
1996 		fuse_set_nowrite(inode);
1997 		fuse_release_nowrite(inode);
1998 	}
1999 
2000 	if (is_truncate) {
2001 		fuse_set_nowrite(inode);
2002 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2003 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2004 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2005 	}
2006 
2007 	memset(&inarg, 0, sizeof(inarg));
2008 	memset(&outarg, 0, sizeof(outarg));
2009 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2010 	if (file) {
2011 		struct fuse_file *ff = file->private_data;
2012 		inarg.valid |= FATTR_FH;
2013 		inarg.fh = ff->fh;
2014 	}
2015 
2016 	/* Kill suid/sgid for non-directory chown unconditionally */
2017 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2018 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2019 		inarg.valid |= FATTR_KILL_SUIDGID;
2020 
2021 	if (attr->ia_valid & ATTR_SIZE) {
2022 		/* For mandatory locking in truncate */
2023 		inarg.valid |= FATTR_LOCKOWNER;
2024 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2025 
2026 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2027 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2028 			inarg.valid |= FATTR_KILL_SUIDGID;
2029 	}
2030 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2031 	err = fuse_simple_request(fm, &args);
2032 	if (err) {
2033 		if (err == -EINTR)
2034 			fuse_invalidate_attr(inode);
2035 		goto error;
2036 	}
2037 
2038 	if (fuse_invalid_attr(&outarg.attr) ||
2039 	    inode_wrong_type(inode, outarg.attr.mode)) {
2040 		fuse_make_bad(inode);
2041 		err = -EIO;
2042 		goto error;
2043 	}
2044 
2045 	spin_lock(&fi->lock);
2046 	/* the kernel maintains i_mtime locally */
2047 	if (trust_local_cmtime) {
2048 		if (attr->ia_valid & ATTR_MTIME)
2049 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2050 		if (attr->ia_valid & ATTR_CTIME)
2051 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2052 		/* FIXME: clear I_DIRTY_SYNC? */
2053 	}
2054 
2055 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2056 				      ATTR_TIMEOUT(&outarg),
2057 				      fuse_get_cache_mask(inode), 0);
2058 	oldsize = inode->i_size;
2059 	/* see the comment in fuse_change_attributes() */
2060 	if (!is_wb || is_truncate)
2061 		i_size_write(inode, outarg.attr.size);
2062 
2063 	if (is_truncate) {
2064 		/* NOTE: this may release/reacquire fi->lock */
2065 		__fuse_release_nowrite(inode);
2066 	}
2067 	spin_unlock(&fi->lock);
2068 
2069 	/*
2070 	 * Only call invalidate_inode_pages2() after removing
2071 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2072 	 */
2073 	if ((is_truncate || !is_wb) &&
2074 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2075 		truncate_pagecache(inode, outarg.attr.size);
2076 		invalidate_inode_pages2(mapping);
2077 	}
2078 
2079 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2080 out:
2081 	if (fault_blocked)
2082 		filemap_invalidate_unlock(mapping);
2083 
2084 	return 0;
2085 
2086 error:
2087 	if (is_truncate)
2088 		fuse_release_nowrite(inode);
2089 
2090 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2091 
2092 	if (fault_blocked)
2093 		filemap_invalidate_unlock(mapping);
2094 	return err;
2095 }
2096 
2097 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2098 			struct iattr *attr)
2099 {
2100 	struct inode *inode = d_inode(entry);
2101 	struct fuse_conn *fc = get_fuse_conn(inode);
2102 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2103 	int ret;
2104 
2105 	if (fuse_is_bad(inode))
2106 		return -EIO;
2107 
2108 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2109 		return -EACCES;
2110 
2111 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2112 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2113 				    ATTR_MODE);
2114 
2115 		/*
2116 		 * The only sane way to reliably kill suid/sgid is to do it in
2117 		 * the userspace filesystem
2118 		 *
2119 		 * This should be done on write(), truncate() and chown().
2120 		 */
2121 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2122 			/*
2123 			 * ia_mode calculation may have used stale i_mode.
2124 			 * Refresh and recalculate.
2125 			 */
2126 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2127 			if (ret)
2128 				return ret;
2129 
2130 			attr->ia_mode = inode->i_mode;
2131 			if (inode->i_mode & S_ISUID) {
2132 				attr->ia_valid |= ATTR_MODE;
2133 				attr->ia_mode &= ~S_ISUID;
2134 			}
2135 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2136 				attr->ia_valid |= ATTR_MODE;
2137 				attr->ia_mode &= ~S_ISGID;
2138 			}
2139 		}
2140 	}
2141 	if (!attr->ia_valid)
2142 		return 0;
2143 
2144 	ret = fuse_do_setattr(idmap, entry, attr, file);
2145 	if (!ret) {
2146 		/*
2147 		 * If filesystem supports acls it may have updated acl xattrs in
2148 		 * the filesystem, so forget cached acls for the inode.
2149 		 */
2150 		if (fc->posix_acl)
2151 			forget_all_cached_acls(inode);
2152 
2153 		/* Directory mode changed, may need to revalidate access */
2154 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2155 			fuse_invalidate_entry_cache(entry);
2156 	}
2157 	return ret;
2158 }
2159 
2160 static int fuse_getattr(struct mnt_idmap *idmap,
2161 			const struct path *path, struct kstat *stat,
2162 			u32 request_mask, unsigned int flags)
2163 {
2164 	struct inode *inode = d_inode(path->dentry);
2165 	struct fuse_conn *fc = get_fuse_conn(inode);
2166 
2167 	if (fuse_is_bad(inode))
2168 		return -EIO;
2169 
2170 	if (!fuse_allow_current_process(fc)) {
2171 		if (!request_mask) {
2172 			/*
2173 			 * If user explicitly requested *nothing* then don't
2174 			 * error out, but return st_dev only.
2175 			 */
2176 			stat->result_mask = 0;
2177 			stat->dev = inode->i_sb->s_dev;
2178 			return 0;
2179 		}
2180 		return -EACCES;
2181 	}
2182 
2183 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2184 }
2185 
2186 static const struct inode_operations fuse_dir_inode_operations = {
2187 	.lookup		= fuse_lookup,
2188 	.mkdir		= fuse_mkdir,
2189 	.symlink	= fuse_symlink,
2190 	.unlink		= fuse_unlink,
2191 	.rmdir		= fuse_rmdir,
2192 	.rename		= fuse_rename2,
2193 	.link		= fuse_link,
2194 	.setattr	= fuse_setattr,
2195 	.create		= fuse_create,
2196 	.atomic_open	= fuse_atomic_open,
2197 	.tmpfile	= fuse_tmpfile,
2198 	.mknod		= fuse_mknod,
2199 	.permission	= fuse_permission,
2200 	.getattr	= fuse_getattr,
2201 	.listxattr	= fuse_listxattr,
2202 	.get_inode_acl	= fuse_get_inode_acl,
2203 	.get_acl	= fuse_get_acl,
2204 	.set_acl	= fuse_set_acl,
2205 	.fileattr_get	= fuse_fileattr_get,
2206 	.fileattr_set	= fuse_fileattr_set,
2207 };
2208 
2209 static const struct file_operations fuse_dir_operations = {
2210 	.llseek		= generic_file_llseek,
2211 	.read		= generic_read_dir,
2212 	.iterate_shared	= fuse_readdir,
2213 	.open		= fuse_dir_open,
2214 	.release	= fuse_dir_release,
2215 	.fsync		= fuse_dir_fsync,
2216 	.unlocked_ioctl	= fuse_dir_ioctl,
2217 	.compat_ioctl	= fuse_dir_compat_ioctl,
2218 };
2219 
2220 static const struct inode_operations fuse_common_inode_operations = {
2221 	.setattr	= fuse_setattr,
2222 	.permission	= fuse_permission,
2223 	.getattr	= fuse_getattr,
2224 	.listxattr	= fuse_listxattr,
2225 	.get_inode_acl	= fuse_get_inode_acl,
2226 	.get_acl	= fuse_get_acl,
2227 	.set_acl	= fuse_set_acl,
2228 	.fileattr_get	= fuse_fileattr_get,
2229 	.fileattr_set	= fuse_fileattr_set,
2230 };
2231 
2232 static const struct inode_operations fuse_symlink_inode_operations = {
2233 	.setattr	= fuse_setattr,
2234 	.get_link	= fuse_get_link,
2235 	.getattr	= fuse_getattr,
2236 	.listxattr	= fuse_listxattr,
2237 };
2238 
2239 void fuse_init_common(struct inode *inode)
2240 {
2241 	inode->i_op = &fuse_common_inode_operations;
2242 }
2243 
2244 void fuse_init_dir(struct inode *inode)
2245 {
2246 	struct fuse_inode *fi = get_fuse_inode(inode);
2247 
2248 	inode->i_op = &fuse_dir_inode_operations;
2249 	inode->i_fop = &fuse_dir_operations;
2250 
2251 	spin_lock_init(&fi->rdc.lock);
2252 	fi->rdc.cached = false;
2253 	fi->rdc.size = 0;
2254 	fi->rdc.pos = 0;
2255 	fi->rdc.version = 0;
2256 }
2257 
2258 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2259 {
2260 	int err = fuse_readlink_page(folio->mapping->host, folio);
2261 
2262 	if (!err)
2263 		folio_mark_uptodate(folio);
2264 
2265 	folio_unlock(folio);
2266 
2267 	return err;
2268 }
2269 
2270 static const struct address_space_operations fuse_symlink_aops = {
2271 	.read_folio	= fuse_symlink_read_folio,
2272 };
2273 
2274 void fuse_init_symlink(struct inode *inode)
2275 {
2276 	inode->i_op = &fuse_symlink_inode_operations;
2277 	inode->i_data.a_ops = &fuse_symlink_aops;
2278 	inode_nohighmem(inode);
2279 }
2280