xref: /linux/fs/fuse/dir.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_conn *fc;
204 	struct fuse_inode *fi;
205 	int ret;
206 
207 	fc = get_fuse_conn_super(dir->i_sb);
208 	if (entry->d_time < atomic_read(&fc->epoch))
209 		goto invalid;
210 
211 	inode = d_inode_rcu(entry);
212 	if (inode && fuse_is_bad(inode))
213 		goto invalid;
214 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
215 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
216 		struct fuse_entry_out outarg;
217 		FUSE_ARGS(args);
218 		struct fuse_forget_link *forget;
219 		u64 attr_version;
220 
221 		/* For negative dentries, always do a fresh lookup */
222 		if (!inode)
223 			goto invalid;
224 
225 		ret = -ECHILD;
226 		if (flags & LOOKUP_RCU)
227 			goto out;
228 
229 		fm = get_fuse_mount(inode);
230 
231 		forget = fuse_alloc_forget();
232 		ret = -ENOMEM;
233 		if (!forget)
234 			goto out;
235 
236 		attr_version = fuse_get_attr_version(fm->fc);
237 
238 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
239 				 name, &outarg);
240 		ret = fuse_simple_request(fm, &args);
241 		/* Zero nodeid is same as -ENOENT */
242 		if (!ret && !outarg.nodeid)
243 			ret = -ENOENT;
244 		if (!ret) {
245 			fi = get_fuse_inode(inode);
246 			if (outarg.nodeid != get_node_id(inode) ||
247 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
248 				fuse_queue_forget(fm->fc, forget,
249 						  outarg.nodeid, 1);
250 				goto invalid;
251 			}
252 			spin_lock(&fi->lock);
253 			fi->nlookup++;
254 			spin_unlock(&fi->lock);
255 		}
256 		kfree(forget);
257 		if (ret == -ENOMEM || ret == -EINTR)
258 			goto out;
259 		if (ret || fuse_invalid_attr(&outarg.attr) ||
260 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
261 			goto invalid;
262 
263 		forget_all_cached_acls(inode);
264 		fuse_change_attributes(inode, &outarg.attr, NULL,
265 				       ATTR_TIMEOUT(&outarg),
266 				       attr_version);
267 		fuse_change_entry_timeout(entry, &outarg);
268 	} else if (inode) {
269 		fi = get_fuse_inode(inode);
270 		if (flags & LOOKUP_RCU) {
271 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
272 				return -ECHILD;
273 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
274 			fuse_advise_use_readdirplus(dir);
275 		}
276 	}
277 	ret = 1;
278 out:
279 	return ret;
280 
281 invalid:
282 	ret = 0;
283 	goto out;
284 }
285 
286 #if BITS_PER_LONG < 64
287 static int fuse_dentry_init(struct dentry *dentry)
288 {
289 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
290 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
291 
292 	return dentry->d_fsdata ? 0 : -ENOMEM;
293 }
294 static void fuse_dentry_release(struct dentry *dentry)
295 {
296 	union fuse_dentry *fd = dentry->d_fsdata;
297 
298 	kfree_rcu(fd, rcu);
299 }
300 #endif
301 
302 static int fuse_dentry_delete(const struct dentry *dentry)
303 {
304 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
305 }
306 
307 /*
308  * Create a fuse_mount object with a new superblock (with path->dentry
309  * as the root), and return that mount so it can be auto-mounted on
310  * @path.
311  */
312 static struct vfsmount *fuse_dentry_automount(struct path *path)
313 {
314 	struct fs_context *fsc;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 
318 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
319 	if (IS_ERR(fsc))
320 		return ERR_CAST(fsc);
321 
322 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
323 	fsc->fs_private = mp_fi;
324 
325 	/* Create the submount */
326 	mnt = fc_mount(fsc);
327 	put_fs_context(fsc);
328 	return mnt;
329 }
330 
331 const struct dentry_operations fuse_dentry_operations = {
332 	.d_revalidate	= fuse_dentry_revalidate,
333 	.d_delete	= fuse_dentry_delete,
334 #if BITS_PER_LONG < 64
335 	.d_init		= fuse_dentry_init,
336 	.d_release	= fuse_dentry_release,
337 #endif
338 	.d_automount	= fuse_dentry_automount,
339 };
340 
341 int fuse_valid_type(int m)
342 {
343 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
344 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
345 }
346 
347 static bool fuse_valid_size(u64 size)
348 {
349 	return size <= LLONG_MAX;
350 }
351 
352 bool fuse_invalid_attr(struct fuse_attr *attr)
353 {
354 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
355 }
356 
357 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
358 		     struct fuse_entry_out *outarg, struct inode **inode)
359 {
360 	struct fuse_mount *fm = get_fuse_mount_super(sb);
361 	FUSE_ARGS(args);
362 	struct fuse_forget_link *forget;
363 	u64 attr_version, evict_ctr;
364 	int err;
365 
366 	*inode = NULL;
367 	err = -ENAMETOOLONG;
368 	if (name->len > fm->fc->name_max)
369 		goto out;
370 
371 
372 	forget = fuse_alloc_forget();
373 	err = -ENOMEM;
374 	if (!forget)
375 		goto out;
376 
377 	attr_version = fuse_get_attr_version(fm->fc);
378 	evict_ctr = fuse_get_evict_ctr(fm->fc);
379 
380 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
381 	err = fuse_simple_request(fm, &args);
382 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
383 	if (err || !outarg->nodeid)
384 		goto out_put_forget;
385 
386 	err = -EIO;
387 	if (fuse_invalid_attr(&outarg->attr))
388 		goto out_put_forget;
389 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
390 		pr_warn_once("root generation should be zero\n");
391 		outarg->generation = 0;
392 	}
393 
394 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
395 			   &outarg->attr, ATTR_TIMEOUT(outarg),
396 			   attr_version, evict_ctr);
397 	err = -ENOMEM;
398 	if (!*inode) {
399 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
400 		goto out;
401 	}
402 	err = 0;
403 
404  out_put_forget:
405 	kfree(forget);
406  out:
407 	return err;
408 }
409 
410 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
411 				  unsigned int flags)
412 {
413 	struct fuse_entry_out outarg;
414 	struct fuse_conn *fc;
415 	struct inode *inode;
416 	struct dentry *newent;
417 	int err, epoch;
418 	bool outarg_valid = true;
419 	bool locked;
420 
421 	if (fuse_is_bad(dir))
422 		return ERR_PTR(-EIO);
423 
424 	fc = get_fuse_conn_super(dir->i_sb);
425 	epoch = atomic_read(&fc->epoch);
426 
427 	locked = fuse_lock_inode(dir);
428 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
429 			       &outarg, &inode);
430 	fuse_unlock_inode(dir, locked);
431 	if (err == -ENOENT) {
432 		outarg_valid = false;
433 		err = 0;
434 	}
435 	if (err)
436 		goto out_err;
437 
438 	err = -EIO;
439 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
440 		goto out_iput;
441 
442 	newent = d_splice_alias(inode, entry);
443 	err = PTR_ERR(newent);
444 	if (IS_ERR(newent))
445 		goto out_err;
446 
447 	entry = newent ? newent : entry;
448 	entry->d_time = epoch;
449 	if (outarg_valid)
450 		fuse_change_entry_timeout(entry, &outarg);
451 	else
452 		fuse_invalidate_entry_cache(entry);
453 
454 	if (inode)
455 		fuse_advise_use_readdirplus(dir);
456 	return newent;
457 
458  out_iput:
459 	iput(inode);
460  out_err:
461 	return ERR_PTR(err);
462 }
463 
464 static int get_security_context(struct dentry *entry, umode_t mode,
465 				struct fuse_in_arg *ext)
466 {
467 	struct fuse_secctx *fctx;
468 	struct fuse_secctx_header *header;
469 	struct lsm_context lsmctx = { };
470 	void *ptr;
471 	u32 total_len = sizeof(*header);
472 	int err, nr_ctx = 0;
473 	const char *name = NULL;
474 	size_t namelen;
475 
476 	err = security_dentry_init_security(entry, mode, &entry->d_name,
477 					    &name, &lsmctx);
478 
479 	/* If no LSM is supporting this security hook ignore error */
480 	if (err && err != -EOPNOTSUPP)
481 		goto out_err;
482 
483 	if (lsmctx.len) {
484 		nr_ctx = 1;
485 		namelen = strlen(name) + 1;
486 		err = -EIO;
487 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
488 		    lsmctx.len > S32_MAX))
489 			goto out_err;
490 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
491 					    lsmctx.len);
492 	}
493 
494 	err = -ENOMEM;
495 	header = ptr = kzalloc(total_len, GFP_KERNEL);
496 	if (!ptr)
497 		goto out_err;
498 
499 	header->nr_secctx = nr_ctx;
500 	header->size = total_len;
501 	ptr += sizeof(*header);
502 	if (nr_ctx) {
503 		fctx = ptr;
504 		fctx->size = lsmctx.len;
505 		ptr += sizeof(*fctx);
506 
507 		strcpy(ptr, name);
508 		ptr += namelen;
509 
510 		memcpy(ptr, lsmctx.context, lsmctx.len);
511 	}
512 	ext->size = total_len;
513 	ext->value = header;
514 	err = 0;
515 out_err:
516 	if (nr_ctx)
517 		security_release_secctx(&lsmctx);
518 	return err;
519 }
520 
521 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
522 {
523 	void *p;
524 	u32 newlen = buf->size + bytes;
525 
526 	p = krealloc(buf->value, newlen, GFP_KERNEL);
527 	if (!p) {
528 		kfree(buf->value);
529 		buf->size = 0;
530 		buf->value = NULL;
531 		return NULL;
532 	}
533 
534 	memset(p + buf->size, 0, bytes);
535 	buf->value = p;
536 	buf->size = newlen;
537 
538 	return p + newlen - bytes;
539 }
540 
541 static u32 fuse_ext_size(size_t size)
542 {
543 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
544 }
545 
546 /*
547  * This adds just a single supplementary group that matches the parent's group.
548  */
549 static int get_create_supp_group(struct mnt_idmap *idmap,
550 				 struct inode *dir,
551 				 struct fuse_in_arg *ext)
552 {
553 	struct fuse_conn *fc = get_fuse_conn(dir);
554 	struct fuse_ext_header *xh;
555 	struct fuse_supp_groups *sg;
556 	kgid_t kgid = dir->i_gid;
557 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
558 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
559 
560 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
561 
562 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
563 	    !vfsgid_in_group_p(vfsgid))
564 		return 0;
565 
566 	xh = extend_arg(ext, sg_len);
567 	if (!xh)
568 		return -ENOMEM;
569 
570 	xh->size = sg_len;
571 	xh->type = FUSE_EXT_GROUPS;
572 
573 	sg = (struct fuse_supp_groups *) &xh[1];
574 	sg->nr_groups = 1;
575 	sg->groups[0] = parent_gid;
576 
577 	return 0;
578 }
579 
580 static int get_create_ext(struct mnt_idmap *idmap,
581 			  struct fuse_args *args,
582 			  struct inode *dir, struct dentry *dentry,
583 			  umode_t mode)
584 {
585 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
586 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
587 	int err = 0;
588 
589 	if (fc->init_security)
590 		err = get_security_context(dentry, mode, &ext);
591 	if (!err && fc->create_supp_group)
592 		err = get_create_supp_group(idmap, dir, &ext);
593 
594 	if (!err && ext.size) {
595 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
596 		args->is_ext = true;
597 		args->ext_idx = args->in_numargs++;
598 		args->in_args[args->ext_idx] = ext;
599 	} else {
600 		kfree(ext.value);
601 	}
602 
603 	return err;
604 }
605 
606 static void free_ext_value(struct fuse_args *args)
607 {
608 	if (args->is_ext)
609 		kfree(args->in_args[args->ext_idx].value);
610 }
611 
612 /*
613  * Atomic create+open operation
614  *
615  * If the filesystem doesn't support this, then fall back to separate
616  * 'mknod' + 'open' requests.
617  */
618 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
619 			    struct dentry *entry, struct file *file,
620 			    unsigned int flags, umode_t mode, u32 opcode)
621 {
622 	struct inode *inode;
623 	struct fuse_mount *fm = get_fuse_mount(dir);
624 	FUSE_ARGS(args);
625 	struct fuse_forget_link *forget;
626 	struct fuse_create_in inarg;
627 	struct fuse_open_out *outopenp;
628 	struct fuse_entry_out outentry;
629 	struct fuse_inode *fi;
630 	struct fuse_file *ff;
631 	int epoch, err;
632 	bool trunc = flags & O_TRUNC;
633 
634 	/* Userspace expects S_IFREG in create mode */
635 	BUG_ON((mode & S_IFMT) != S_IFREG);
636 
637 	epoch = atomic_read(&fm->fc->epoch);
638 	forget = fuse_alloc_forget();
639 	err = -ENOMEM;
640 	if (!forget)
641 		goto out_err;
642 
643 	err = -ENOMEM;
644 	ff = fuse_file_alloc(fm, true);
645 	if (!ff)
646 		goto out_put_forget_req;
647 
648 	if (!fm->fc->dont_mask)
649 		mode &= ~current_umask();
650 
651 	flags &= ~O_NOCTTY;
652 	memset(&inarg, 0, sizeof(inarg));
653 	memset(&outentry, 0, sizeof(outentry));
654 	inarg.flags = flags;
655 	inarg.mode = mode;
656 	inarg.umask = current_umask();
657 
658 	if (fm->fc->handle_killpriv_v2 && trunc &&
659 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
660 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
661 	}
662 
663 	args.opcode = opcode;
664 	args.nodeid = get_node_id(dir);
665 	args.in_numargs = 2;
666 	args.in_args[0].size = sizeof(inarg);
667 	args.in_args[0].value = &inarg;
668 	args.in_args[1].size = entry->d_name.len + 1;
669 	args.in_args[1].value = entry->d_name.name;
670 	args.out_numargs = 2;
671 	args.out_args[0].size = sizeof(outentry);
672 	args.out_args[0].value = &outentry;
673 	/* Store outarg for fuse_finish_open() */
674 	outopenp = &ff->args->open_outarg;
675 	args.out_args[1].size = sizeof(*outopenp);
676 	args.out_args[1].value = outopenp;
677 
678 	err = get_create_ext(idmap, &args, dir, entry, mode);
679 	if (err)
680 		goto out_free_ff;
681 
682 	err = fuse_simple_idmap_request(idmap, fm, &args);
683 	free_ext_value(&args);
684 	if (err)
685 		goto out_free_ff;
686 
687 	err = -EIO;
688 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
689 	    fuse_invalid_attr(&outentry.attr))
690 		goto out_free_ff;
691 
692 	ff->fh = outopenp->fh;
693 	ff->nodeid = outentry.nodeid;
694 	ff->open_flags = outopenp->open_flags;
695 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
696 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
697 	if (!inode) {
698 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
699 		fuse_sync_release(NULL, ff, flags);
700 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
701 		err = -ENOMEM;
702 		goto out_err;
703 	}
704 	kfree(forget);
705 	d_instantiate(entry, inode);
706 	entry->d_time = epoch;
707 	fuse_change_entry_timeout(entry, &outentry);
708 	fuse_dir_changed(dir);
709 	err = generic_file_open(inode, file);
710 	if (!err) {
711 		file->private_data = ff;
712 		err = finish_open(file, entry, fuse_finish_open);
713 	}
714 	if (err) {
715 		fi = get_fuse_inode(inode);
716 		fuse_sync_release(fi, ff, flags);
717 	} else {
718 		if (fm->fc->atomic_o_trunc && trunc)
719 			truncate_pagecache(inode, 0);
720 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
721 			invalidate_inode_pages2(inode->i_mapping);
722 	}
723 	return err;
724 
725 out_free_ff:
726 	fuse_file_free(ff);
727 out_put_forget_req:
728 	kfree(forget);
729 out_err:
730 	return err;
731 }
732 
733 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
734 		      umode_t, dev_t);
735 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
736 			    struct file *file, unsigned flags,
737 			    umode_t mode)
738 {
739 	int err;
740 	struct mnt_idmap *idmap = file_mnt_idmap(file);
741 	struct fuse_conn *fc = get_fuse_conn(dir);
742 
743 	if (fuse_is_bad(dir))
744 		return -EIO;
745 
746 	if (d_in_lookup(entry)) {
747 		struct dentry *res = fuse_lookup(dir, entry, 0);
748 		if (res || d_really_is_positive(entry))
749 			return finish_no_open(file, res);
750 	}
751 
752 	if (!(flags & O_CREAT))
753 		return finish_no_open(file, NULL);
754 
755 	/* Only creates */
756 	file->f_mode |= FMODE_CREATED;
757 
758 	if (fc->no_create)
759 		goto mknod;
760 
761 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
762 	if (err == -ENOSYS) {
763 		fc->no_create = 1;
764 		goto mknod;
765 	} else if (err == -EEXIST)
766 		fuse_invalidate_entry(entry);
767 	return err;
768 
769 mknod:
770 	err = fuse_mknod(idmap, dir, entry, mode, 0);
771 	if (err)
772 		return err;
773 	return finish_no_open(file, NULL);
774 }
775 
776 /*
777  * Code shared between mknod, mkdir, symlink and link
778  */
779 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
780 				       struct fuse_args *args, struct inode *dir,
781 				       struct dentry *entry, umode_t mode)
782 {
783 	struct fuse_entry_out outarg;
784 	struct inode *inode;
785 	struct dentry *d;
786 	struct fuse_forget_link *forget;
787 	int epoch, err;
788 
789 	if (fuse_is_bad(dir))
790 		return ERR_PTR(-EIO);
791 
792 	epoch = atomic_read(&fm->fc->epoch);
793 
794 	forget = fuse_alloc_forget();
795 	if (!forget)
796 		return ERR_PTR(-ENOMEM);
797 
798 	memset(&outarg, 0, sizeof(outarg));
799 	args->nodeid = get_node_id(dir);
800 	args->out_numargs = 1;
801 	args->out_args[0].size = sizeof(outarg);
802 	args->out_args[0].value = &outarg;
803 
804 	if (args->opcode != FUSE_LINK) {
805 		err = get_create_ext(idmap, args, dir, entry, mode);
806 		if (err)
807 			goto out_put_forget_req;
808 	}
809 
810 	err = fuse_simple_idmap_request(idmap, fm, args);
811 	free_ext_value(args);
812 	if (err)
813 		goto out_put_forget_req;
814 
815 	err = -EIO;
816 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
817 		goto out_put_forget_req;
818 
819 	if ((outarg.attr.mode ^ mode) & S_IFMT)
820 		goto out_put_forget_req;
821 
822 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
823 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
824 	if (!inode) {
825 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
826 		return ERR_PTR(-ENOMEM);
827 	}
828 	kfree(forget);
829 
830 	d_drop(entry);
831 	d = d_splice_alias(inode, entry);
832 	if (IS_ERR(d))
833 		return d;
834 
835 	if (d) {
836 		d->d_time = epoch;
837 		fuse_change_entry_timeout(d, &outarg);
838 	} else {
839 		entry->d_time = epoch;
840 		fuse_change_entry_timeout(entry, &outarg);
841 	}
842 	fuse_dir_changed(dir);
843 	return d;
844 
845  out_put_forget_req:
846 	if (err == -EEXIST)
847 		fuse_invalidate_entry(entry);
848 	kfree(forget);
849 	return ERR_PTR(err);
850 }
851 
852 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
853 			     struct fuse_args *args, struct inode *dir,
854 			     struct dentry *entry, umode_t mode)
855 {
856 	/*
857 	 * Note that when creating anything other than a directory we
858 	 * can be sure create_new_entry() will NOT return an alternate
859 	 * dentry as d_splice_alias() only returns an alternate dentry
860 	 * for directories.  So we don't need to check for that case
861 	 * when passing back the result.
862 	 */
863 	WARN_ON_ONCE(S_ISDIR(mode));
864 
865 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
866 }
867 
868 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
869 		      struct dentry *entry, umode_t mode, dev_t rdev)
870 {
871 	struct fuse_mknod_in inarg;
872 	struct fuse_mount *fm = get_fuse_mount(dir);
873 	FUSE_ARGS(args);
874 
875 	if (!fm->fc->dont_mask)
876 		mode &= ~current_umask();
877 
878 	memset(&inarg, 0, sizeof(inarg));
879 	inarg.mode = mode;
880 	inarg.rdev = new_encode_dev(rdev);
881 	inarg.umask = current_umask();
882 	args.opcode = FUSE_MKNOD;
883 	args.in_numargs = 2;
884 	args.in_args[0].size = sizeof(inarg);
885 	args.in_args[0].value = &inarg;
886 	args.in_args[1].size = entry->d_name.len + 1;
887 	args.in_args[1].value = entry->d_name.name;
888 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
889 }
890 
891 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
892 		       struct dentry *entry, umode_t mode, bool excl)
893 {
894 	return fuse_mknod(idmap, dir, entry, mode, 0);
895 }
896 
897 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
898 			struct file *file, umode_t mode)
899 {
900 	struct fuse_conn *fc = get_fuse_conn(dir);
901 	int err;
902 
903 	if (fc->no_tmpfile)
904 		return -EOPNOTSUPP;
905 
906 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
907 			       file->f_flags, mode, FUSE_TMPFILE);
908 	if (err == -ENOSYS) {
909 		fc->no_tmpfile = 1;
910 		err = -EOPNOTSUPP;
911 	}
912 	return err;
913 }
914 
915 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
916 				 struct dentry *entry, umode_t mode)
917 {
918 	struct fuse_mkdir_in inarg;
919 	struct fuse_mount *fm = get_fuse_mount(dir);
920 	FUSE_ARGS(args);
921 
922 	if (!fm->fc->dont_mask)
923 		mode &= ~current_umask();
924 
925 	memset(&inarg, 0, sizeof(inarg));
926 	inarg.mode = mode;
927 	inarg.umask = current_umask();
928 	args.opcode = FUSE_MKDIR;
929 	args.in_numargs = 2;
930 	args.in_args[0].size = sizeof(inarg);
931 	args.in_args[0].value = &inarg;
932 	args.in_args[1].size = entry->d_name.len + 1;
933 	args.in_args[1].value = entry->d_name.name;
934 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
935 }
936 
937 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
938 			struct dentry *entry, const char *link)
939 {
940 	struct fuse_mount *fm = get_fuse_mount(dir);
941 	unsigned len = strlen(link) + 1;
942 	FUSE_ARGS(args);
943 
944 	args.opcode = FUSE_SYMLINK;
945 	args.in_numargs = 3;
946 	fuse_set_zero_arg0(&args);
947 	args.in_args[1].size = entry->d_name.len + 1;
948 	args.in_args[1].value = entry->d_name.name;
949 	args.in_args[2].size = len;
950 	args.in_args[2].value = link;
951 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
952 }
953 
954 void fuse_flush_time_update(struct inode *inode)
955 {
956 	int err = sync_inode_metadata(inode, 1);
957 
958 	mapping_set_error(inode->i_mapping, err);
959 }
960 
961 static void fuse_update_ctime_in_cache(struct inode *inode)
962 {
963 	if (!IS_NOCMTIME(inode)) {
964 		inode_set_ctime_current(inode);
965 		mark_inode_dirty_sync(inode);
966 		fuse_flush_time_update(inode);
967 	}
968 }
969 
970 void fuse_update_ctime(struct inode *inode)
971 {
972 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
973 	fuse_update_ctime_in_cache(inode);
974 }
975 
976 static void fuse_entry_unlinked(struct dentry *entry)
977 {
978 	struct inode *inode = d_inode(entry);
979 	struct fuse_conn *fc = get_fuse_conn(inode);
980 	struct fuse_inode *fi = get_fuse_inode(inode);
981 
982 	spin_lock(&fi->lock);
983 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
984 	/*
985 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
986 	 * happen if userspace filesystem is careless.  It would be
987 	 * difficult to enforce correct nlink usage so just ignore this
988 	 * condition here
989 	 */
990 	if (S_ISDIR(inode->i_mode))
991 		clear_nlink(inode);
992 	else if (inode->i_nlink > 0)
993 		drop_nlink(inode);
994 	spin_unlock(&fi->lock);
995 	fuse_invalidate_entry_cache(entry);
996 	fuse_update_ctime(inode);
997 }
998 
999 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1000 {
1001 	int err;
1002 	struct fuse_mount *fm = get_fuse_mount(dir);
1003 	FUSE_ARGS(args);
1004 
1005 	if (fuse_is_bad(dir))
1006 		return -EIO;
1007 
1008 	args.opcode = FUSE_UNLINK;
1009 	args.nodeid = get_node_id(dir);
1010 	args.in_numargs = 2;
1011 	fuse_set_zero_arg0(&args);
1012 	args.in_args[1].size = entry->d_name.len + 1;
1013 	args.in_args[1].value = entry->d_name.name;
1014 	err = fuse_simple_request(fm, &args);
1015 	if (!err) {
1016 		fuse_dir_changed(dir);
1017 		fuse_entry_unlinked(entry);
1018 	} else if (err == -EINTR || err == -ENOENT)
1019 		fuse_invalidate_entry(entry);
1020 	return err;
1021 }
1022 
1023 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1024 {
1025 	int err;
1026 	struct fuse_mount *fm = get_fuse_mount(dir);
1027 	FUSE_ARGS(args);
1028 
1029 	if (fuse_is_bad(dir))
1030 		return -EIO;
1031 
1032 	args.opcode = FUSE_RMDIR;
1033 	args.nodeid = get_node_id(dir);
1034 	args.in_numargs = 2;
1035 	fuse_set_zero_arg0(&args);
1036 	args.in_args[1].size = entry->d_name.len + 1;
1037 	args.in_args[1].value = entry->d_name.name;
1038 	err = fuse_simple_request(fm, &args);
1039 	if (!err) {
1040 		fuse_dir_changed(dir);
1041 		fuse_entry_unlinked(entry);
1042 	} else if (err == -EINTR || err == -ENOENT)
1043 		fuse_invalidate_entry(entry);
1044 	return err;
1045 }
1046 
1047 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1048 			      struct inode *newdir, struct dentry *newent,
1049 			      unsigned int flags, int opcode, size_t argsize)
1050 {
1051 	int err;
1052 	struct fuse_rename2_in inarg;
1053 	struct fuse_mount *fm = get_fuse_mount(olddir);
1054 	FUSE_ARGS(args);
1055 
1056 	memset(&inarg, 0, argsize);
1057 	inarg.newdir = get_node_id(newdir);
1058 	inarg.flags = flags;
1059 	args.opcode = opcode;
1060 	args.nodeid = get_node_id(olddir);
1061 	args.in_numargs = 3;
1062 	args.in_args[0].size = argsize;
1063 	args.in_args[0].value = &inarg;
1064 	args.in_args[1].size = oldent->d_name.len + 1;
1065 	args.in_args[1].value = oldent->d_name.name;
1066 	args.in_args[2].size = newent->d_name.len + 1;
1067 	args.in_args[2].value = newent->d_name.name;
1068 	err = fuse_simple_idmap_request(idmap, fm, &args);
1069 	if (!err) {
1070 		/* ctime changes */
1071 		fuse_update_ctime(d_inode(oldent));
1072 
1073 		if (flags & RENAME_EXCHANGE)
1074 			fuse_update_ctime(d_inode(newent));
1075 
1076 		fuse_dir_changed(olddir);
1077 		if (olddir != newdir)
1078 			fuse_dir_changed(newdir);
1079 
1080 		/* newent will end up negative */
1081 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1082 			fuse_entry_unlinked(newent);
1083 	} else if (err == -EINTR || err == -ENOENT) {
1084 		/* If request was interrupted, DEITY only knows if the
1085 		   rename actually took place.  If the invalidation
1086 		   fails (e.g. some process has CWD under the renamed
1087 		   directory), then there can be inconsistency between
1088 		   the dcache and the real filesystem.  Tough luck. */
1089 		fuse_invalidate_entry(oldent);
1090 		if (d_really_is_positive(newent))
1091 			fuse_invalidate_entry(newent);
1092 	}
1093 
1094 	return err;
1095 }
1096 
1097 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1098 			struct dentry *oldent, struct inode *newdir,
1099 			struct dentry *newent, unsigned int flags)
1100 {
1101 	struct fuse_conn *fc = get_fuse_conn(olddir);
1102 	int err;
1103 
1104 	if (fuse_is_bad(olddir))
1105 		return -EIO;
1106 
1107 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1108 		return -EINVAL;
1109 
1110 	if (flags) {
1111 		if (fc->no_rename2 || fc->minor < 23)
1112 			return -EINVAL;
1113 
1114 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1115 					 olddir, oldent, newdir, newent, flags,
1116 					 FUSE_RENAME2,
1117 					 sizeof(struct fuse_rename2_in));
1118 		if (err == -ENOSYS) {
1119 			fc->no_rename2 = 1;
1120 			err = -EINVAL;
1121 		}
1122 	} else {
1123 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1124 					 FUSE_RENAME,
1125 					 sizeof(struct fuse_rename_in));
1126 	}
1127 
1128 	return err;
1129 }
1130 
1131 static int fuse_link(struct dentry *entry, struct inode *newdir,
1132 		     struct dentry *newent)
1133 {
1134 	int err;
1135 	struct fuse_link_in inarg;
1136 	struct inode *inode = d_inode(entry);
1137 	struct fuse_mount *fm = get_fuse_mount(inode);
1138 	FUSE_ARGS(args);
1139 
1140 	if (fm->fc->no_link)
1141 		goto out;
1142 
1143 	memset(&inarg, 0, sizeof(inarg));
1144 	inarg.oldnodeid = get_node_id(inode);
1145 	args.opcode = FUSE_LINK;
1146 	args.in_numargs = 2;
1147 	args.in_args[0].size = sizeof(inarg);
1148 	args.in_args[0].value = &inarg;
1149 	args.in_args[1].size = newent->d_name.len + 1;
1150 	args.in_args[1].value = newent->d_name.name;
1151 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1152 	if (!err)
1153 		fuse_update_ctime_in_cache(inode);
1154 	else if (err == -EINTR)
1155 		fuse_invalidate_attr(inode);
1156 
1157 	if (err == -ENOSYS)
1158 		fm->fc->no_link = 1;
1159 out:
1160 	if (fm->fc->no_link)
1161 		return -EPERM;
1162 
1163 	return err;
1164 }
1165 
1166 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1167 			  struct fuse_attr *attr, struct kstat *stat)
1168 {
1169 	unsigned int blkbits;
1170 	struct fuse_conn *fc = get_fuse_conn(inode);
1171 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1172 				      make_kuid(fc->user_ns, attr->uid));
1173 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1174 				      make_kgid(fc->user_ns, attr->gid));
1175 
1176 	stat->dev = inode->i_sb->s_dev;
1177 	stat->ino = attr->ino;
1178 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1179 	stat->nlink = attr->nlink;
1180 	stat->uid = vfsuid_into_kuid(vfsuid);
1181 	stat->gid = vfsgid_into_kgid(vfsgid);
1182 	stat->rdev = inode->i_rdev;
1183 	stat->atime.tv_sec = attr->atime;
1184 	stat->atime.tv_nsec = attr->atimensec;
1185 	stat->mtime.tv_sec = attr->mtime;
1186 	stat->mtime.tv_nsec = attr->mtimensec;
1187 	stat->ctime.tv_sec = attr->ctime;
1188 	stat->ctime.tv_nsec = attr->ctimensec;
1189 	stat->size = attr->size;
1190 	stat->blocks = attr->blocks;
1191 
1192 	if (attr->blksize != 0)
1193 		blkbits = ilog2(attr->blksize);
1194 	else
1195 		blkbits = inode->i_sb->s_blocksize_bits;
1196 
1197 	stat->blksize = 1 << blkbits;
1198 }
1199 
1200 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1201 {
1202 	memset(attr, 0, sizeof(*attr));
1203 	attr->ino = sx->ino;
1204 	attr->size = sx->size;
1205 	attr->blocks = sx->blocks;
1206 	attr->atime = sx->atime.tv_sec;
1207 	attr->mtime = sx->mtime.tv_sec;
1208 	attr->ctime = sx->ctime.tv_sec;
1209 	attr->atimensec = sx->atime.tv_nsec;
1210 	attr->mtimensec = sx->mtime.tv_nsec;
1211 	attr->ctimensec = sx->ctime.tv_nsec;
1212 	attr->mode = sx->mode;
1213 	attr->nlink = sx->nlink;
1214 	attr->uid = sx->uid;
1215 	attr->gid = sx->gid;
1216 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1217 	attr->blksize = sx->blksize;
1218 }
1219 
1220 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1221 			 struct file *file, struct kstat *stat)
1222 {
1223 	int err;
1224 	struct fuse_attr attr;
1225 	struct fuse_statx *sx;
1226 	struct fuse_statx_in inarg;
1227 	struct fuse_statx_out outarg;
1228 	struct fuse_mount *fm = get_fuse_mount(inode);
1229 	u64 attr_version = fuse_get_attr_version(fm->fc);
1230 	FUSE_ARGS(args);
1231 
1232 	memset(&inarg, 0, sizeof(inarg));
1233 	memset(&outarg, 0, sizeof(outarg));
1234 	/* Directories have separate file-handle space */
1235 	if (file && S_ISREG(inode->i_mode)) {
1236 		struct fuse_file *ff = file->private_data;
1237 
1238 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1239 		inarg.fh = ff->fh;
1240 	}
1241 	/* For now leave sync hints as the default, request all stats. */
1242 	inarg.sx_flags = 0;
1243 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1244 	args.opcode = FUSE_STATX;
1245 	args.nodeid = get_node_id(inode);
1246 	args.in_numargs = 1;
1247 	args.in_args[0].size = sizeof(inarg);
1248 	args.in_args[0].value = &inarg;
1249 	args.out_numargs = 1;
1250 	args.out_args[0].size = sizeof(outarg);
1251 	args.out_args[0].value = &outarg;
1252 	err = fuse_simple_request(fm, &args);
1253 	if (err)
1254 		return err;
1255 
1256 	sx = &outarg.stat;
1257 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1258 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1259 					 inode_wrong_type(inode, sx->mode)))) {
1260 		fuse_make_bad(inode);
1261 		return -EIO;
1262 	}
1263 
1264 	fuse_statx_to_attr(&outarg.stat, &attr);
1265 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1266 		fuse_change_attributes(inode, &attr, &outarg.stat,
1267 				       ATTR_TIMEOUT(&outarg), attr_version);
1268 	}
1269 
1270 	if (stat) {
1271 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1272 		stat->btime.tv_sec = sx->btime.tv_sec;
1273 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1274 		fuse_fillattr(idmap, inode, &attr, stat);
1275 		stat->result_mask |= STATX_TYPE;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1282 			   struct kstat *stat, struct file *file)
1283 {
1284 	int err;
1285 	struct fuse_getattr_in inarg;
1286 	struct fuse_attr_out outarg;
1287 	struct fuse_mount *fm = get_fuse_mount(inode);
1288 	FUSE_ARGS(args);
1289 	u64 attr_version;
1290 
1291 	attr_version = fuse_get_attr_version(fm->fc);
1292 
1293 	memset(&inarg, 0, sizeof(inarg));
1294 	memset(&outarg, 0, sizeof(outarg));
1295 	/* Directories have separate file-handle space */
1296 	if (file && S_ISREG(inode->i_mode)) {
1297 		struct fuse_file *ff = file->private_data;
1298 
1299 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1300 		inarg.fh = ff->fh;
1301 	}
1302 	args.opcode = FUSE_GETATTR;
1303 	args.nodeid = get_node_id(inode);
1304 	args.in_numargs = 1;
1305 	args.in_args[0].size = sizeof(inarg);
1306 	args.in_args[0].value = &inarg;
1307 	args.out_numargs = 1;
1308 	args.out_args[0].size = sizeof(outarg);
1309 	args.out_args[0].value = &outarg;
1310 	err = fuse_simple_request(fm, &args);
1311 	if (!err) {
1312 		if (fuse_invalid_attr(&outarg.attr) ||
1313 		    inode_wrong_type(inode, outarg.attr.mode)) {
1314 			fuse_make_bad(inode);
1315 			err = -EIO;
1316 		} else {
1317 			fuse_change_attributes(inode, &outarg.attr, NULL,
1318 					       ATTR_TIMEOUT(&outarg),
1319 					       attr_version);
1320 			if (stat)
1321 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1322 		}
1323 	}
1324 	return err;
1325 }
1326 
1327 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1328 				struct file *file, struct kstat *stat,
1329 				u32 request_mask, unsigned int flags)
1330 {
1331 	struct fuse_inode *fi = get_fuse_inode(inode);
1332 	struct fuse_conn *fc = get_fuse_conn(inode);
1333 	int err = 0;
1334 	bool sync;
1335 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1336 	u32 cache_mask = fuse_get_cache_mask(inode);
1337 
1338 
1339 	/* FUSE only supports basic stats and possibly btime */
1340 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1341 retry:
1342 	if (fc->no_statx)
1343 		request_mask &= STATX_BASIC_STATS;
1344 
1345 	if (!request_mask)
1346 		sync = false;
1347 	else if (flags & AT_STATX_FORCE_SYNC)
1348 		sync = true;
1349 	else if (flags & AT_STATX_DONT_SYNC)
1350 		sync = false;
1351 	else if (request_mask & inval_mask & ~cache_mask)
1352 		sync = true;
1353 	else
1354 		sync = time_before64(fi->i_time, get_jiffies_64());
1355 
1356 	if (sync) {
1357 		forget_all_cached_acls(inode);
1358 		/* Try statx if BTIME is requested */
1359 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1360 			err = fuse_do_statx(idmap, inode, file, stat);
1361 			if (err == -ENOSYS) {
1362 				fc->no_statx = 1;
1363 				err = 0;
1364 				goto retry;
1365 			}
1366 		} else {
1367 			err = fuse_do_getattr(idmap, inode, stat, file);
1368 		}
1369 	} else if (stat) {
1370 		generic_fillattr(idmap, request_mask, inode, stat);
1371 		stat->mode = fi->orig_i_mode;
1372 		stat->ino = fi->orig_ino;
1373 		stat->blksize = 1 << fi->cached_i_blkbits;
1374 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1375 			stat->btime = fi->i_btime;
1376 			stat->result_mask |= STATX_BTIME;
1377 		}
1378 	}
1379 
1380 	return err;
1381 }
1382 
1383 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1384 {
1385 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1386 }
1387 
1388 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1389 			     u64 child_nodeid, struct qstr *name, u32 flags)
1390 {
1391 	int err = -ENOTDIR;
1392 	struct inode *parent;
1393 	struct dentry *dir;
1394 	struct dentry *entry;
1395 
1396 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1397 	if (!parent)
1398 		return -ENOENT;
1399 
1400 	if (!S_ISDIR(parent->i_mode))
1401 		goto put_parent;
1402 
1403 	err = -ENOENT;
1404 	dir = d_find_alias(parent);
1405 	if (!dir)
1406 		goto put_parent;
1407 
1408 	entry = start_removing_noperm(dir, name);
1409 	dput(dir);
1410 	if (IS_ERR(entry))
1411 		goto put_parent;
1412 
1413 	fuse_dir_changed(parent);
1414 	if (!(flags & FUSE_EXPIRE_ONLY))
1415 		d_invalidate(entry);
1416 	fuse_invalidate_entry_cache(entry);
1417 
1418 	if (child_nodeid != 0) {
1419 		inode_lock(d_inode(entry));
1420 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1421 			err = -ENOENT;
1422 			goto badentry;
1423 		}
1424 		if (d_mountpoint(entry)) {
1425 			err = -EBUSY;
1426 			goto badentry;
1427 		}
1428 		if (d_is_dir(entry)) {
1429 			shrink_dcache_parent(entry);
1430 			if (!simple_empty(entry)) {
1431 				err = -ENOTEMPTY;
1432 				goto badentry;
1433 			}
1434 			d_inode(entry)->i_flags |= S_DEAD;
1435 		}
1436 		dont_mount(entry);
1437 		clear_nlink(d_inode(entry));
1438 		err = 0;
1439  badentry:
1440 		inode_unlock(d_inode(entry));
1441 		if (!err)
1442 			d_delete(entry);
1443 	} else {
1444 		err = 0;
1445 	}
1446 
1447 	end_removing(entry);
1448  put_parent:
1449 	iput(parent);
1450 	return err;
1451 }
1452 
1453 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1454 {
1455 	const struct cred *cred = current_cred();
1456 
1457 	return (uid_eq(cred->euid, fc->user_id) &&
1458 		uid_eq(cred->suid, fc->user_id) &&
1459 		uid_eq(cred->uid,  fc->user_id) &&
1460 		gid_eq(cred->egid, fc->group_id) &&
1461 		gid_eq(cred->sgid, fc->group_id) &&
1462 		gid_eq(cred->gid,  fc->group_id));
1463 }
1464 
1465 /*
1466  * Calling into a user-controlled filesystem gives the filesystem
1467  * daemon ptrace-like capabilities over the current process.  This
1468  * means, that the filesystem daemon is able to record the exact
1469  * filesystem operations performed, and can also control the behavior
1470  * of the requester process in otherwise impossible ways.  For example
1471  * it can delay the operation for arbitrary length of time allowing
1472  * DoS against the requester.
1473  *
1474  * For this reason only those processes can call into the filesystem,
1475  * for which the owner of the mount has ptrace privilege.  This
1476  * excludes processes started by other users, suid or sgid processes.
1477  */
1478 bool fuse_allow_current_process(struct fuse_conn *fc)
1479 {
1480 	bool allow;
1481 
1482 	if (fc->allow_other)
1483 		allow = current_in_userns(fc->user_ns);
1484 	else
1485 		allow = fuse_permissible_uidgid(fc);
1486 
1487 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1488 		allow = true;
1489 
1490 	return allow;
1491 }
1492 
1493 static int fuse_access(struct inode *inode, int mask)
1494 {
1495 	struct fuse_mount *fm = get_fuse_mount(inode);
1496 	FUSE_ARGS(args);
1497 	struct fuse_access_in inarg;
1498 	int err;
1499 
1500 	BUG_ON(mask & MAY_NOT_BLOCK);
1501 
1502 	/*
1503 	 * We should not send FUSE_ACCESS to the userspace
1504 	 * when idmapped mounts are enabled as for this case
1505 	 * we have fc->default_permissions = 1 and access
1506 	 * permission checks are done on the kernel side.
1507 	 */
1508 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1509 
1510 	if (fm->fc->no_access)
1511 		return 0;
1512 
1513 	memset(&inarg, 0, sizeof(inarg));
1514 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1515 	args.opcode = FUSE_ACCESS;
1516 	args.nodeid = get_node_id(inode);
1517 	args.in_numargs = 1;
1518 	args.in_args[0].size = sizeof(inarg);
1519 	args.in_args[0].value = &inarg;
1520 	err = fuse_simple_request(fm, &args);
1521 	if (err == -ENOSYS) {
1522 		fm->fc->no_access = 1;
1523 		err = 0;
1524 	}
1525 	return err;
1526 }
1527 
1528 static int fuse_perm_getattr(struct inode *inode, int mask)
1529 {
1530 	if (mask & MAY_NOT_BLOCK)
1531 		return -ECHILD;
1532 
1533 	forget_all_cached_acls(inode);
1534 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1535 }
1536 
1537 /*
1538  * Check permission.  The two basic access models of FUSE are:
1539  *
1540  * 1) Local access checking ('default_permissions' mount option) based
1541  * on file mode.  This is the plain old disk filesystem permission
1542  * model.
1543  *
1544  * 2) "Remote" access checking, where server is responsible for
1545  * checking permission in each inode operation.  An exception to this
1546  * is if ->permission() was invoked from sys_access() in which case an
1547  * access request is sent.  Execute permission is still checked
1548  * locally based on file mode.
1549  */
1550 static int fuse_permission(struct mnt_idmap *idmap,
1551 			   struct inode *inode, int mask)
1552 {
1553 	struct fuse_conn *fc = get_fuse_conn(inode);
1554 	bool refreshed = false;
1555 	int err = 0;
1556 
1557 	if (fuse_is_bad(inode))
1558 		return -EIO;
1559 
1560 	if (!fuse_allow_current_process(fc))
1561 		return -EACCES;
1562 
1563 	/*
1564 	 * If attributes are needed, refresh them before proceeding
1565 	 */
1566 	if (fc->default_permissions ||
1567 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1568 		struct fuse_inode *fi = get_fuse_inode(inode);
1569 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1570 
1571 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1572 		    time_before64(fi->i_time, get_jiffies_64())) {
1573 			refreshed = true;
1574 
1575 			err = fuse_perm_getattr(inode, mask);
1576 			if (err)
1577 				return err;
1578 		}
1579 	}
1580 
1581 	if (fc->default_permissions) {
1582 		err = generic_permission(idmap, inode, mask);
1583 
1584 		/* If permission is denied, try to refresh file
1585 		   attributes.  This is also needed, because the root
1586 		   node will at first have no permissions */
1587 		if (err == -EACCES && !refreshed) {
1588 			err = fuse_perm_getattr(inode, mask);
1589 			if (!err)
1590 				err = generic_permission(idmap,
1591 							 inode, mask);
1592 		}
1593 
1594 		/* Note: the opposite of the above test does not
1595 		   exist.  So if permissions are revoked this won't be
1596 		   noticed immediately, only after the attribute
1597 		   timeout has expired */
1598 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1599 		err = fuse_access(inode, mask);
1600 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1601 		if (!(inode->i_mode & S_IXUGO)) {
1602 			if (refreshed)
1603 				return -EACCES;
1604 
1605 			err = fuse_perm_getattr(inode, mask);
1606 			if (!err && !(inode->i_mode & S_IXUGO))
1607 				return -EACCES;
1608 		}
1609 	}
1610 	return err;
1611 }
1612 
1613 static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
1614 {
1615 	struct fuse_mount *fm = get_fuse_mount(inode);
1616 	struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
1617 	struct fuse_args_pages ap = {
1618 		.num_folios = 1,
1619 		.folios = &folio,
1620 		.descs = &desc,
1621 	};
1622 	char *link;
1623 	ssize_t res;
1624 
1625 	ap.args.opcode = FUSE_READLINK;
1626 	ap.args.nodeid = get_node_id(inode);
1627 	ap.args.out_pages = true;
1628 	ap.args.out_argvar = true;
1629 	ap.args.page_zeroing = true;
1630 	ap.args.out_numargs = 1;
1631 	ap.args.out_args[0].size = desc.length;
1632 	res = fuse_simple_request(fm, &ap.args);
1633 
1634 	fuse_invalidate_atime(inode);
1635 
1636 	if (res < 0)
1637 		return res;
1638 
1639 	if (WARN_ON(res >= PAGE_SIZE))
1640 		return -EIO;
1641 
1642 	link = folio_address(folio);
1643 	link[res] = '\0';
1644 
1645 	return 0;
1646 }
1647 
1648 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1649 				 struct delayed_call *callback)
1650 {
1651 	struct fuse_conn *fc = get_fuse_conn(inode);
1652 	struct folio *folio;
1653 	int err;
1654 
1655 	err = -EIO;
1656 	if (fuse_is_bad(inode))
1657 		goto out_err;
1658 
1659 	if (fc->cache_symlinks)
1660 		return page_get_link_raw(dentry, inode, callback);
1661 
1662 	err = -ECHILD;
1663 	if (!dentry)
1664 		goto out_err;
1665 
1666 	folio = folio_alloc(GFP_KERNEL, 0);
1667 	err = -ENOMEM;
1668 	if (!folio)
1669 		goto out_err;
1670 
1671 	err = fuse_readlink_folio(inode, folio);
1672 	if (err) {
1673 		folio_put(folio);
1674 		goto out_err;
1675 	}
1676 
1677 	set_delayed_call(callback, page_put_link, folio);
1678 
1679 	return folio_address(folio);
1680 
1681 out_err:
1682 	return ERR_PTR(err);
1683 }
1684 
1685 static int fuse_dir_open(struct inode *inode, struct file *file)
1686 {
1687 	struct fuse_mount *fm = get_fuse_mount(inode);
1688 	int err;
1689 
1690 	if (fuse_is_bad(inode))
1691 		return -EIO;
1692 
1693 	err = generic_file_open(inode, file);
1694 	if (err)
1695 		return err;
1696 
1697 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1698 	if (!err) {
1699 		struct fuse_file *ff = file->private_data;
1700 
1701 		/*
1702 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1703 		 * directories for backward compatibility, though it's unlikely
1704 		 * to be useful.
1705 		 */
1706 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1707 			nonseekable_open(inode, file);
1708 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1709 			invalidate_inode_pages2(inode->i_mapping);
1710 	}
1711 
1712 	return err;
1713 }
1714 
1715 static int fuse_dir_release(struct inode *inode, struct file *file)
1716 {
1717 	fuse_release_common(file, true);
1718 
1719 	return 0;
1720 }
1721 
1722 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1723 			  int datasync)
1724 {
1725 	struct inode *inode = file->f_mapping->host;
1726 	struct fuse_conn *fc = get_fuse_conn(inode);
1727 	int err;
1728 
1729 	if (fuse_is_bad(inode))
1730 		return -EIO;
1731 
1732 	if (fc->no_fsyncdir)
1733 		return 0;
1734 
1735 	inode_lock(inode);
1736 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1737 	if (err == -ENOSYS) {
1738 		fc->no_fsyncdir = 1;
1739 		err = 0;
1740 	}
1741 	inode_unlock(inode);
1742 
1743 	return err;
1744 }
1745 
1746 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1747 			    unsigned long arg)
1748 {
1749 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1750 
1751 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1752 	if (fc->minor < 18)
1753 		return -ENOTTY;
1754 
1755 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1756 }
1757 
1758 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1759 				   unsigned long arg)
1760 {
1761 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1762 
1763 	if (fc->minor < 18)
1764 		return -ENOTTY;
1765 
1766 	return fuse_ioctl_common(file, cmd, arg,
1767 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1768 }
1769 
1770 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1771 {
1772 	/* Always update if mtime is explicitly set  */
1773 	if (ivalid & ATTR_MTIME_SET)
1774 		return true;
1775 
1776 	/* Or if kernel i_mtime is the official one */
1777 	if (trust_local_mtime)
1778 		return true;
1779 
1780 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1781 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1782 		return false;
1783 
1784 	/* In all other cases update */
1785 	return true;
1786 }
1787 
1788 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1789 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1790 			   bool trust_local_cmtime)
1791 {
1792 	unsigned ivalid = iattr->ia_valid;
1793 
1794 	if (ivalid & ATTR_MODE)
1795 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1796 
1797 	if (ivalid & ATTR_UID) {
1798 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1799 
1800 		arg->valid |= FATTR_UID;
1801 		arg->uid = from_kuid(fc->user_ns, fsuid);
1802 	}
1803 
1804 	if (ivalid & ATTR_GID) {
1805 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1806 
1807 		arg->valid |= FATTR_GID;
1808 		arg->gid = from_kgid(fc->user_ns, fsgid);
1809 	}
1810 
1811 	if (ivalid & ATTR_SIZE)
1812 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1813 	if (ivalid & ATTR_ATIME) {
1814 		arg->valid |= FATTR_ATIME;
1815 		arg->atime = iattr->ia_atime.tv_sec;
1816 		arg->atimensec = iattr->ia_atime.tv_nsec;
1817 		if (!(ivalid & ATTR_ATIME_SET))
1818 			arg->valid |= FATTR_ATIME_NOW;
1819 	}
1820 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1821 		arg->valid |= FATTR_MTIME;
1822 		arg->mtime = iattr->ia_mtime.tv_sec;
1823 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1824 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1825 			arg->valid |= FATTR_MTIME_NOW;
1826 	}
1827 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1828 		arg->valid |= FATTR_CTIME;
1829 		arg->ctime = iattr->ia_ctime.tv_sec;
1830 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1831 	}
1832 }
1833 
1834 /*
1835  * Prevent concurrent writepages on inode
1836  *
1837  * This is done by adding a negative bias to the inode write counter
1838  * and waiting for all pending writes to finish.
1839  */
1840 void fuse_set_nowrite(struct inode *inode)
1841 {
1842 	struct fuse_inode *fi = get_fuse_inode(inode);
1843 
1844 	BUG_ON(!inode_is_locked(inode));
1845 
1846 	spin_lock(&fi->lock);
1847 	BUG_ON(fi->writectr < 0);
1848 	fi->writectr += FUSE_NOWRITE;
1849 	spin_unlock(&fi->lock);
1850 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1851 }
1852 
1853 /*
1854  * Allow writepages on inode
1855  *
1856  * Remove the bias from the writecounter and send any queued
1857  * writepages.
1858  */
1859 static void __fuse_release_nowrite(struct inode *inode)
1860 {
1861 	struct fuse_inode *fi = get_fuse_inode(inode);
1862 
1863 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1864 	fi->writectr = 0;
1865 	fuse_flush_writepages(inode);
1866 }
1867 
1868 void fuse_release_nowrite(struct inode *inode)
1869 {
1870 	struct fuse_inode *fi = get_fuse_inode(inode);
1871 
1872 	spin_lock(&fi->lock);
1873 	__fuse_release_nowrite(inode);
1874 	spin_unlock(&fi->lock);
1875 }
1876 
1877 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1878 			      struct inode *inode,
1879 			      struct fuse_setattr_in *inarg_p,
1880 			      struct fuse_attr_out *outarg_p)
1881 {
1882 	args->opcode = FUSE_SETATTR;
1883 	args->nodeid = get_node_id(inode);
1884 	args->in_numargs = 1;
1885 	args->in_args[0].size = sizeof(*inarg_p);
1886 	args->in_args[0].value = inarg_p;
1887 	args->out_numargs = 1;
1888 	args->out_args[0].size = sizeof(*outarg_p);
1889 	args->out_args[0].value = outarg_p;
1890 }
1891 
1892 /*
1893  * Flush inode->i_mtime to the server
1894  */
1895 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1896 {
1897 	struct fuse_mount *fm = get_fuse_mount(inode);
1898 	FUSE_ARGS(args);
1899 	struct fuse_setattr_in inarg;
1900 	struct fuse_attr_out outarg;
1901 
1902 	memset(&inarg, 0, sizeof(inarg));
1903 	memset(&outarg, 0, sizeof(outarg));
1904 
1905 	inarg.valid = FATTR_MTIME;
1906 	inarg.mtime = inode_get_mtime_sec(inode);
1907 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1908 	if (fm->fc->minor >= 23) {
1909 		inarg.valid |= FATTR_CTIME;
1910 		inarg.ctime = inode_get_ctime_sec(inode);
1911 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1912 	}
1913 	if (ff) {
1914 		inarg.valid |= FATTR_FH;
1915 		inarg.fh = ff->fh;
1916 	}
1917 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1918 
1919 	return fuse_simple_request(fm, &args);
1920 }
1921 
1922 /*
1923  * Set attributes, and at the same time refresh them.
1924  *
1925  * Truncation is slightly complicated, because the 'truncate' request
1926  * may fail, in which case we don't want to touch the mapping.
1927  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1928  * and the actual truncation by hand.
1929  */
1930 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1931 		    struct iattr *attr, struct file *file)
1932 {
1933 	struct inode *inode = d_inode(dentry);
1934 	struct fuse_mount *fm = get_fuse_mount(inode);
1935 	struct fuse_conn *fc = fm->fc;
1936 	struct fuse_inode *fi = get_fuse_inode(inode);
1937 	struct address_space *mapping = inode->i_mapping;
1938 	FUSE_ARGS(args);
1939 	struct fuse_setattr_in inarg;
1940 	struct fuse_attr_out outarg;
1941 	bool is_truncate = false;
1942 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1943 	loff_t oldsize;
1944 	int err;
1945 	bool trust_local_cmtime = is_wb;
1946 	bool fault_blocked = false;
1947 	u64 attr_version;
1948 
1949 	if (!fc->default_permissions)
1950 		attr->ia_valid |= ATTR_FORCE;
1951 
1952 	err = setattr_prepare(idmap, dentry, attr);
1953 	if (err)
1954 		return err;
1955 
1956 	if (attr->ia_valid & ATTR_SIZE) {
1957 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1958 			return -EIO;
1959 		is_truncate = true;
1960 	}
1961 
1962 	if (FUSE_IS_DAX(inode) && is_truncate) {
1963 		filemap_invalidate_lock(mapping);
1964 		fault_blocked = true;
1965 		err = fuse_dax_break_layouts(inode, 0, -1);
1966 		if (err) {
1967 			filemap_invalidate_unlock(mapping);
1968 			return err;
1969 		}
1970 	}
1971 
1972 	if (attr->ia_valid & ATTR_OPEN) {
1973 		/* This is coming from open(..., ... | O_TRUNC); */
1974 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1975 		WARN_ON(attr->ia_size != 0);
1976 		if (fc->atomic_o_trunc) {
1977 			/*
1978 			 * No need to send request to userspace, since actual
1979 			 * truncation has already been done by OPEN.  But still
1980 			 * need to truncate page cache.
1981 			 */
1982 			i_size_write(inode, 0);
1983 			truncate_pagecache(inode, 0);
1984 			goto out;
1985 		}
1986 		file = NULL;
1987 	}
1988 
1989 	/* Flush dirty data/metadata before non-truncate SETATTR */
1990 	if (is_wb &&
1991 	    attr->ia_valid &
1992 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1993 			 ATTR_TIMES_SET)) {
1994 		err = write_inode_now(inode, true);
1995 		if (err)
1996 			return err;
1997 
1998 		fuse_set_nowrite(inode);
1999 		fuse_release_nowrite(inode);
2000 	}
2001 
2002 	if (is_truncate) {
2003 		fuse_set_nowrite(inode);
2004 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2005 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2006 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2007 	}
2008 
2009 	memset(&inarg, 0, sizeof(inarg));
2010 	memset(&outarg, 0, sizeof(outarg));
2011 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2012 	if (file) {
2013 		struct fuse_file *ff = file->private_data;
2014 		inarg.valid |= FATTR_FH;
2015 		inarg.fh = ff->fh;
2016 	}
2017 
2018 	/* Kill suid/sgid for non-directory chown unconditionally */
2019 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2020 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2021 		inarg.valid |= FATTR_KILL_SUIDGID;
2022 
2023 	if (attr->ia_valid & ATTR_SIZE) {
2024 		/* For mandatory locking in truncate */
2025 		inarg.valid |= FATTR_LOCKOWNER;
2026 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2027 
2028 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2029 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2030 			inarg.valid |= FATTR_KILL_SUIDGID;
2031 	}
2032 
2033 	attr_version = fuse_get_attr_version(fm->fc);
2034 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2035 	err = fuse_simple_request(fm, &args);
2036 	if (err) {
2037 		if (err == -EINTR)
2038 			fuse_invalidate_attr(inode);
2039 		goto error;
2040 	}
2041 
2042 	if (fuse_invalid_attr(&outarg.attr) ||
2043 	    inode_wrong_type(inode, outarg.attr.mode)) {
2044 		fuse_make_bad(inode);
2045 		err = -EIO;
2046 		goto error;
2047 	}
2048 
2049 	spin_lock(&fi->lock);
2050 	/* the kernel maintains i_mtime locally */
2051 	if (trust_local_cmtime) {
2052 		if (attr->ia_valid & ATTR_MTIME)
2053 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2054 		if (attr->ia_valid & ATTR_CTIME)
2055 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2056 		/* FIXME: clear I_DIRTY_SYNC? */
2057 	}
2058 
2059 	if (fi->attr_version > attr_version) {
2060 		/*
2061 		 * Apply attributes, for example for fsnotify_change(), but set
2062 		 * attribute timeout to zero.
2063 		 */
2064 		outarg.attr_valid = outarg.attr_valid_nsec = 0;
2065 	}
2066 
2067 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2068 				      ATTR_TIMEOUT(&outarg),
2069 				      fuse_get_cache_mask(inode), 0);
2070 	oldsize = inode->i_size;
2071 	/* see the comment in fuse_change_attributes() */
2072 	if (!is_wb || is_truncate)
2073 		i_size_write(inode, outarg.attr.size);
2074 
2075 	if (is_truncate) {
2076 		/* NOTE: this may release/reacquire fi->lock */
2077 		__fuse_release_nowrite(inode);
2078 	}
2079 	spin_unlock(&fi->lock);
2080 
2081 	/*
2082 	 * Only call invalidate_inode_pages2() after removing
2083 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2084 	 */
2085 	if ((is_truncate || !is_wb) &&
2086 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2087 		truncate_pagecache(inode, outarg.attr.size);
2088 		invalidate_inode_pages2(mapping);
2089 	}
2090 
2091 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2092 out:
2093 	if (fault_blocked)
2094 		filemap_invalidate_unlock(mapping);
2095 
2096 	return 0;
2097 
2098 error:
2099 	if (is_truncate)
2100 		fuse_release_nowrite(inode);
2101 
2102 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2103 
2104 	if (fault_blocked)
2105 		filemap_invalidate_unlock(mapping);
2106 	return err;
2107 }
2108 
2109 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2110 			struct iattr *attr)
2111 {
2112 	struct inode *inode = d_inode(entry);
2113 	struct fuse_conn *fc = get_fuse_conn(inode);
2114 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2115 	int ret;
2116 
2117 	if (fuse_is_bad(inode))
2118 		return -EIO;
2119 
2120 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2121 		return -EACCES;
2122 
2123 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2124 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2125 				    ATTR_MODE);
2126 
2127 		/*
2128 		 * The only sane way to reliably kill suid/sgid is to do it in
2129 		 * the userspace filesystem
2130 		 *
2131 		 * This should be done on write(), truncate() and chown().
2132 		 */
2133 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2134 			/*
2135 			 * ia_mode calculation may have used stale i_mode.
2136 			 * Refresh and recalculate.
2137 			 */
2138 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2139 			if (ret)
2140 				return ret;
2141 
2142 			attr->ia_mode = inode->i_mode;
2143 			if (inode->i_mode & S_ISUID) {
2144 				attr->ia_valid |= ATTR_MODE;
2145 				attr->ia_mode &= ~S_ISUID;
2146 			}
2147 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2148 				attr->ia_valid |= ATTR_MODE;
2149 				attr->ia_mode &= ~S_ISGID;
2150 			}
2151 		}
2152 	}
2153 	if (!attr->ia_valid)
2154 		return 0;
2155 
2156 	ret = fuse_do_setattr(idmap, entry, attr, file);
2157 	if (!ret) {
2158 		/*
2159 		 * If filesystem supports acls it may have updated acl xattrs in
2160 		 * the filesystem, so forget cached acls for the inode.
2161 		 */
2162 		if (fc->posix_acl)
2163 			forget_all_cached_acls(inode);
2164 
2165 		/* Directory mode changed, may need to revalidate access */
2166 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2167 			fuse_invalidate_entry_cache(entry);
2168 	}
2169 	return ret;
2170 }
2171 
2172 static int fuse_getattr(struct mnt_idmap *idmap,
2173 			const struct path *path, struct kstat *stat,
2174 			u32 request_mask, unsigned int flags)
2175 {
2176 	struct inode *inode = d_inode(path->dentry);
2177 	struct fuse_conn *fc = get_fuse_conn(inode);
2178 
2179 	if (fuse_is_bad(inode))
2180 		return -EIO;
2181 
2182 	if (!fuse_allow_current_process(fc)) {
2183 		if (!request_mask) {
2184 			/*
2185 			 * If user explicitly requested *nothing* then don't
2186 			 * error out, but return st_dev only.
2187 			 */
2188 			stat->result_mask = 0;
2189 			stat->dev = inode->i_sb->s_dev;
2190 			return 0;
2191 		}
2192 		return -EACCES;
2193 	}
2194 
2195 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2196 }
2197 
2198 static const struct inode_operations fuse_dir_inode_operations = {
2199 	.lookup		= fuse_lookup,
2200 	.mkdir		= fuse_mkdir,
2201 	.symlink	= fuse_symlink,
2202 	.unlink		= fuse_unlink,
2203 	.rmdir		= fuse_rmdir,
2204 	.rename		= fuse_rename2,
2205 	.link		= fuse_link,
2206 	.setattr	= fuse_setattr,
2207 	.create		= fuse_create,
2208 	.atomic_open	= fuse_atomic_open,
2209 	.tmpfile	= fuse_tmpfile,
2210 	.mknod		= fuse_mknod,
2211 	.permission	= fuse_permission,
2212 	.getattr	= fuse_getattr,
2213 	.listxattr	= fuse_listxattr,
2214 	.get_inode_acl	= fuse_get_inode_acl,
2215 	.get_acl	= fuse_get_acl,
2216 	.set_acl	= fuse_set_acl,
2217 	.fileattr_get	= fuse_fileattr_get,
2218 	.fileattr_set	= fuse_fileattr_set,
2219 };
2220 
2221 static const struct file_operations fuse_dir_operations = {
2222 	.llseek		= generic_file_llseek,
2223 	.read		= generic_read_dir,
2224 	.iterate_shared	= fuse_readdir,
2225 	.open		= fuse_dir_open,
2226 	.release	= fuse_dir_release,
2227 	.fsync		= fuse_dir_fsync,
2228 	.unlocked_ioctl	= fuse_dir_ioctl,
2229 	.compat_ioctl	= fuse_dir_compat_ioctl,
2230 	.setlease	= simple_nosetlease,
2231 };
2232 
2233 static const struct inode_operations fuse_common_inode_operations = {
2234 	.setattr	= fuse_setattr,
2235 	.permission	= fuse_permission,
2236 	.getattr	= fuse_getattr,
2237 	.listxattr	= fuse_listxattr,
2238 	.get_inode_acl	= fuse_get_inode_acl,
2239 	.get_acl	= fuse_get_acl,
2240 	.set_acl	= fuse_set_acl,
2241 	.fileattr_get	= fuse_fileattr_get,
2242 	.fileattr_set	= fuse_fileattr_set,
2243 };
2244 
2245 static const struct inode_operations fuse_symlink_inode_operations = {
2246 	.setattr	= fuse_setattr,
2247 	.get_link	= fuse_get_link,
2248 	.getattr	= fuse_getattr,
2249 	.listxattr	= fuse_listxattr,
2250 };
2251 
2252 void fuse_init_common(struct inode *inode)
2253 {
2254 	inode->i_op = &fuse_common_inode_operations;
2255 }
2256 
2257 void fuse_init_dir(struct inode *inode)
2258 {
2259 	struct fuse_inode *fi = get_fuse_inode(inode);
2260 
2261 	inode->i_op = &fuse_dir_inode_operations;
2262 	inode->i_fop = &fuse_dir_operations;
2263 
2264 	spin_lock_init(&fi->rdc.lock);
2265 	fi->rdc.cached = false;
2266 	fi->rdc.size = 0;
2267 	fi->rdc.pos = 0;
2268 	fi->rdc.version = 0;
2269 }
2270 
2271 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2272 {
2273 	int err = fuse_readlink_folio(folio->mapping->host, folio);
2274 
2275 	if (!err)
2276 		folio_mark_uptodate(folio);
2277 
2278 	folio_unlock(folio);
2279 
2280 	return err;
2281 }
2282 
2283 static const struct address_space_operations fuse_symlink_aops = {
2284 	.read_folio	= fuse_symlink_read_folio,
2285 };
2286 
2287 void fuse_init_symlink(struct inode *inode)
2288 {
2289 	inode->i_op = &fuse_symlink_inode_operations;
2290 	inode->i_data.a_ops = &fuse_symlink_aops;
2291 	inode_nohighmem(inode);
2292 }
2293