xref: /linux/fs/fuse/dir.c (revision 357660d7596bd40d1004762739e426b1fbe10a14)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
fuse_advise_use_readdirplus(struct inode * dir)30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
__fuse_dentry_settime(struct dentry * entry,u64 time)38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
fuse_dentry_time(const struct dentry * entry)43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
__fuse_dentry_settime(struct dentry * dentry,u64 time)54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
fuse_dentry_time(const struct dentry * entry)59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
fuse_dentry_settime(struct dentry * dentry,u64 time)65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
fuse_time_to_jiffies(u64 sec,u32 nsec)95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
fuse_invalidate_attr(struct inode * inode)127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
fuse_dir_changed(struct inode * dir)132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
fuse_invalidate_atime(struct inode * inode)142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
fuse_invalidate_entry_cache(struct dentry * entry)156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
fuse_invalidate_entry(struct dentry * entry)165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg)171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
fuse_dentry_revalidate(struct inode * dir,const struct qstr * name,struct dentry * entry,unsigned int flags)198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_inode *fi;
204 	int ret;
205 
206 	inode = d_inode_rcu(entry);
207 	if (inode && fuse_is_bad(inode))
208 		goto invalid;
209 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
210 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
211 		struct fuse_entry_out outarg;
212 		FUSE_ARGS(args);
213 		struct fuse_forget_link *forget;
214 		u64 attr_version;
215 
216 		/* For negative dentries, always do a fresh lookup */
217 		if (!inode)
218 			goto invalid;
219 
220 		ret = -ECHILD;
221 		if (flags & LOOKUP_RCU)
222 			goto out;
223 
224 		fm = get_fuse_mount(inode);
225 
226 		forget = fuse_alloc_forget();
227 		ret = -ENOMEM;
228 		if (!forget)
229 			goto out;
230 
231 		attr_version = fuse_get_attr_version(fm->fc);
232 
233 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
234 				 name, &outarg);
235 		ret = fuse_simple_request(fm, &args);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM || ret == -EINTR)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr, NULL,
260 				       ATTR_TIMEOUT(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			fuse_advise_use_readdirplus(dir);
270 		}
271 	}
272 	ret = 1;
273 out:
274 	return ret;
275 
276 invalid:
277 	ret = 0;
278 	goto out;
279 }
280 
281 #if BITS_PER_LONG < 64
fuse_dentry_init(struct dentry * dentry)282 static int fuse_dentry_init(struct dentry *dentry)
283 {
284 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
285 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
286 
287 	return dentry->d_fsdata ? 0 : -ENOMEM;
288 }
fuse_dentry_release(struct dentry * dentry)289 static void fuse_dentry_release(struct dentry *dentry)
290 {
291 	union fuse_dentry *fd = dentry->d_fsdata;
292 
293 	kfree_rcu(fd, rcu);
294 }
295 #endif
296 
fuse_dentry_delete(const struct dentry * dentry)297 static int fuse_dentry_delete(const struct dentry *dentry)
298 {
299 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
300 }
301 
302 /*
303  * Create a fuse_mount object with a new superblock (with path->dentry
304  * as the root), and return that mount so it can be auto-mounted on
305  * @path.
306  */
fuse_dentry_automount(struct path * path)307 static struct vfsmount *fuse_dentry_automount(struct path *path)
308 {
309 	struct fs_context *fsc;
310 	struct vfsmount *mnt;
311 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
312 
313 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
314 	if (IS_ERR(fsc))
315 		return ERR_CAST(fsc);
316 
317 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
318 	fsc->fs_private = mp_fi;
319 
320 	/* Create the submount */
321 	mnt = fc_mount(fsc);
322 	if (!IS_ERR(mnt))
323 		mntget(mnt);
324 
325 	put_fs_context(fsc);
326 	return mnt;
327 }
328 
329 const struct dentry_operations fuse_dentry_operations = {
330 	.d_revalidate	= fuse_dentry_revalidate,
331 	.d_delete	= fuse_dentry_delete,
332 #if BITS_PER_LONG < 64
333 	.d_init		= fuse_dentry_init,
334 	.d_release	= fuse_dentry_release,
335 #endif
336 	.d_automount	= fuse_dentry_automount,
337 };
338 
339 const struct dentry_operations fuse_root_dentry_operations = {
340 #if BITS_PER_LONG < 64
341 	.d_init		= fuse_dentry_init,
342 	.d_release	= fuse_dentry_release,
343 #endif
344 };
345 
fuse_valid_type(int m)346 int fuse_valid_type(int m)
347 {
348 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
349 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
350 }
351 
fuse_valid_size(u64 size)352 static bool fuse_valid_size(u64 size)
353 {
354 	return size <= LLONG_MAX;
355 }
356 
fuse_invalid_attr(struct fuse_attr * attr)357 bool fuse_invalid_attr(struct fuse_attr *attr)
358 {
359 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
360 }
361 
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct inode ** inode)362 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
363 		     struct fuse_entry_out *outarg, struct inode **inode)
364 {
365 	struct fuse_mount *fm = get_fuse_mount_super(sb);
366 	FUSE_ARGS(args);
367 	struct fuse_forget_link *forget;
368 	u64 attr_version, evict_ctr;
369 	int err;
370 
371 	*inode = NULL;
372 	err = -ENAMETOOLONG;
373 	if (name->len > FUSE_NAME_MAX)
374 		goto out;
375 
376 
377 	forget = fuse_alloc_forget();
378 	err = -ENOMEM;
379 	if (!forget)
380 		goto out;
381 
382 	attr_version = fuse_get_attr_version(fm->fc);
383 	evict_ctr = fuse_get_evict_ctr(fm->fc);
384 
385 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 	err = fuse_simple_request(fm, &args);
387 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
388 	if (err || !outarg->nodeid)
389 		goto out_put_forget;
390 
391 	err = -EIO;
392 	if (fuse_invalid_attr(&outarg->attr))
393 		goto out_put_forget;
394 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395 		pr_warn_once("root generation should be zero\n");
396 		outarg->generation = 0;
397 	}
398 
399 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400 			   &outarg->attr, ATTR_TIMEOUT(outarg),
401 			   attr_version, evict_ctr);
402 	err = -ENOMEM;
403 	if (!*inode) {
404 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
405 		goto out;
406 	}
407 	err = 0;
408 
409  out_put_forget:
410 	kfree(forget);
411  out:
412 	return err;
413 }
414 
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)415 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
416 				  unsigned int flags)
417 {
418 	int err;
419 	struct fuse_entry_out outarg;
420 	struct inode *inode;
421 	struct dentry *newent;
422 	bool outarg_valid = true;
423 	bool locked;
424 
425 	if (fuse_is_bad(dir))
426 		return ERR_PTR(-EIO);
427 
428 	locked = fuse_lock_inode(dir);
429 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
430 			       &outarg, &inode);
431 	fuse_unlock_inode(dir, locked);
432 	if (err == -ENOENT) {
433 		outarg_valid = false;
434 		err = 0;
435 	}
436 	if (err)
437 		goto out_err;
438 
439 	err = -EIO;
440 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
441 		goto out_iput;
442 
443 	newent = d_splice_alias(inode, entry);
444 	err = PTR_ERR(newent);
445 	if (IS_ERR(newent))
446 		goto out_err;
447 
448 	entry = newent ? newent : entry;
449 	if (outarg_valid)
450 		fuse_change_entry_timeout(entry, &outarg);
451 	else
452 		fuse_invalidate_entry_cache(entry);
453 
454 	if (inode)
455 		fuse_advise_use_readdirplus(dir);
456 	return newent;
457 
458  out_iput:
459 	iput(inode);
460  out_err:
461 	return ERR_PTR(err);
462 }
463 
get_security_context(struct dentry * entry,umode_t mode,struct fuse_in_arg * ext)464 static int get_security_context(struct dentry *entry, umode_t mode,
465 				struct fuse_in_arg *ext)
466 {
467 	struct fuse_secctx *fctx;
468 	struct fuse_secctx_header *header;
469 	struct lsm_context lsmctx = { };
470 	void *ptr;
471 	u32 total_len = sizeof(*header);
472 	int err, nr_ctx = 0;
473 	const char *name = NULL;
474 	size_t namelen;
475 
476 	err = security_dentry_init_security(entry, mode, &entry->d_name,
477 					    &name, &lsmctx);
478 
479 	/* If no LSM is supporting this security hook ignore error */
480 	if (err && err != -EOPNOTSUPP)
481 		goto out_err;
482 
483 	if (lsmctx.len) {
484 		nr_ctx = 1;
485 		namelen = strlen(name) + 1;
486 		err = -EIO;
487 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
488 		    lsmctx.len > S32_MAX))
489 			goto out_err;
490 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
491 					    lsmctx.len);
492 	}
493 
494 	err = -ENOMEM;
495 	header = ptr = kzalloc(total_len, GFP_KERNEL);
496 	if (!ptr)
497 		goto out_err;
498 
499 	header->nr_secctx = nr_ctx;
500 	header->size = total_len;
501 	ptr += sizeof(*header);
502 	if (nr_ctx) {
503 		fctx = ptr;
504 		fctx->size = lsmctx.len;
505 		ptr += sizeof(*fctx);
506 
507 		strcpy(ptr, name);
508 		ptr += namelen;
509 
510 		memcpy(ptr, lsmctx.context, lsmctx.len);
511 	}
512 	ext->size = total_len;
513 	ext->value = header;
514 	err = 0;
515 out_err:
516 	if (nr_ctx)
517 		security_release_secctx(&lsmctx);
518 	return err;
519 }
520 
extend_arg(struct fuse_in_arg * buf,u32 bytes)521 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
522 {
523 	void *p;
524 	u32 newlen = buf->size + bytes;
525 
526 	p = krealloc(buf->value, newlen, GFP_KERNEL);
527 	if (!p) {
528 		kfree(buf->value);
529 		buf->size = 0;
530 		buf->value = NULL;
531 		return NULL;
532 	}
533 
534 	memset(p + buf->size, 0, bytes);
535 	buf->value = p;
536 	buf->size = newlen;
537 
538 	return p + newlen - bytes;
539 }
540 
fuse_ext_size(size_t size)541 static u32 fuse_ext_size(size_t size)
542 {
543 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
544 }
545 
546 /*
547  * This adds just a single supplementary group that matches the parent's group.
548  */
get_create_supp_group(struct mnt_idmap * idmap,struct inode * dir,struct fuse_in_arg * ext)549 static int get_create_supp_group(struct mnt_idmap *idmap,
550 				 struct inode *dir,
551 				 struct fuse_in_arg *ext)
552 {
553 	struct fuse_conn *fc = get_fuse_conn(dir);
554 	struct fuse_ext_header *xh;
555 	struct fuse_supp_groups *sg;
556 	kgid_t kgid = dir->i_gid;
557 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
558 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
559 
560 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
561 
562 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
563 	    !vfsgid_in_group_p(vfsgid))
564 		return 0;
565 
566 	xh = extend_arg(ext, sg_len);
567 	if (!xh)
568 		return -ENOMEM;
569 
570 	xh->size = sg_len;
571 	xh->type = FUSE_EXT_GROUPS;
572 
573 	sg = (struct fuse_supp_groups *) &xh[1];
574 	sg->nr_groups = 1;
575 	sg->groups[0] = parent_gid;
576 
577 	return 0;
578 }
579 
get_create_ext(struct mnt_idmap * idmap,struct fuse_args * args,struct inode * dir,struct dentry * dentry,umode_t mode)580 static int get_create_ext(struct mnt_idmap *idmap,
581 			  struct fuse_args *args,
582 			  struct inode *dir, struct dentry *dentry,
583 			  umode_t mode)
584 {
585 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
586 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
587 	int err = 0;
588 
589 	if (fc->init_security)
590 		err = get_security_context(dentry, mode, &ext);
591 	if (!err && fc->create_supp_group)
592 		err = get_create_supp_group(idmap, dir, &ext);
593 
594 	if (!err && ext.size) {
595 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
596 		args->is_ext = true;
597 		args->ext_idx = args->in_numargs++;
598 		args->in_args[args->ext_idx] = ext;
599 	} else {
600 		kfree(ext.value);
601 	}
602 
603 	return err;
604 }
605 
free_ext_value(struct fuse_args * args)606 static void free_ext_value(struct fuse_args *args)
607 {
608 	if (args->is_ext)
609 		kfree(args->in_args[args->ext_idx].value);
610 }
611 
612 /*
613  * Atomic create+open operation
614  *
615  * If the filesystem doesn't support this, then fall back to separate
616  * 'mknod' + 'open' requests.
617  */
fuse_create_open(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode,u32 opcode)618 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
619 			    struct dentry *entry, struct file *file,
620 			    unsigned int flags, umode_t mode, u32 opcode)
621 {
622 	int err;
623 	struct inode *inode;
624 	struct fuse_mount *fm = get_fuse_mount(dir);
625 	FUSE_ARGS(args);
626 	struct fuse_forget_link *forget;
627 	struct fuse_create_in inarg;
628 	struct fuse_open_out *outopenp;
629 	struct fuse_entry_out outentry;
630 	struct fuse_inode *fi;
631 	struct fuse_file *ff;
632 	bool trunc = flags & O_TRUNC;
633 
634 	/* Userspace expects S_IFREG in create mode */
635 	BUG_ON((mode & S_IFMT) != S_IFREG);
636 
637 	forget = fuse_alloc_forget();
638 	err = -ENOMEM;
639 	if (!forget)
640 		goto out_err;
641 
642 	err = -ENOMEM;
643 	ff = fuse_file_alloc(fm, true);
644 	if (!ff)
645 		goto out_put_forget_req;
646 
647 	if (!fm->fc->dont_mask)
648 		mode &= ~current_umask();
649 
650 	flags &= ~O_NOCTTY;
651 	memset(&inarg, 0, sizeof(inarg));
652 	memset(&outentry, 0, sizeof(outentry));
653 	inarg.flags = flags;
654 	inarg.mode = mode;
655 	inarg.umask = current_umask();
656 
657 	if (fm->fc->handle_killpriv_v2 && trunc &&
658 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
659 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
660 	}
661 
662 	args.opcode = opcode;
663 	args.nodeid = get_node_id(dir);
664 	args.in_numargs = 2;
665 	args.in_args[0].size = sizeof(inarg);
666 	args.in_args[0].value = &inarg;
667 	args.in_args[1].size = entry->d_name.len + 1;
668 	args.in_args[1].value = entry->d_name.name;
669 	args.out_numargs = 2;
670 	args.out_args[0].size = sizeof(outentry);
671 	args.out_args[0].value = &outentry;
672 	/* Store outarg for fuse_finish_open() */
673 	outopenp = &ff->args->open_outarg;
674 	args.out_args[1].size = sizeof(*outopenp);
675 	args.out_args[1].value = outopenp;
676 
677 	err = get_create_ext(idmap, &args, dir, entry, mode);
678 	if (err)
679 		goto out_free_ff;
680 
681 	err = fuse_simple_idmap_request(idmap, fm, &args);
682 	free_ext_value(&args);
683 	if (err)
684 		goto out_free_ff;
685 
686 	err = -EIO;
687 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
688 	    fuse_invalid_attr(&outentry.attr))
689 		goto out_free_ff;
690 
691 	ff->fh = outopenp->fh;
692 	ff->nodeid = outentry.nodeid;
693 	ff->open_flags = outopenp->open_flags;
694 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
695 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
696 	if (!inode) {
697 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
698 		fuse_sync_release(NULL, ff, flags);
699 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
700 		err = -ENOMEM;
701 		goto out_err;
702 	}
703 	kfree(forget);
704 	d_instantiate(entry, inode);
705 	fuse_change_entry_timeout(entry, &outentry);
706 	fuse_dir_changed(dir);
707 	err = generic_file_open(inode, file);
708 	if (!err) {
709 		file->private_data = ff;
710 		err = finish_open(file, entry, fuse_finish_open);
711 	}
712 	if (err) {
713 		fi = get_fuse_inode(inode);
714 		fuse_sync_release(fi, ff, flags);
715 	} else {
716 		if (fm->fc->atomic_o_trunc && trunc)
717 			truncate_pagecache(inode, 0);
718 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
719 			invalidate_inode_pages2(inode->i_mapping);
720 	}
721 	return err;
722 
723 out_free_ff:
724 	fuse_file_free(ff);
725 out_put_forget_req:
726 	kfree(forget);
727 out_err:
728 	return err;
729 }
730 
731 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
732 		      umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)733 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
734 			    struct file *file, unsigned flags,
735 			    umode_t mode)
736 {
737 	int err;
738 	struct mnt_idmap *idmap = file_mnt_idmap(file);
739 	struct fuse_conn *fc = get_fuse_conn(dir);
740 	struct dentry *res = NULL;
741 
742 	if (fuse_is_bad(dir))
743 		return -EIO;
744 
745 	if (d_in_lookup(entry)) {
746 		res = fuse_lookup(dir, entry, 0);
747 		if (IS_ERR(res))
748 			return PTR_ERR(res);
749 
750 		if (res)
751 			entry = res;
752 	}
753 
754 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
755 		goto no_open;
756 
757 	/* Only creates */
758 	file->f_mode |= FMODE_CREATED;
759 
760 	if (fc->no_create)
761 		goto mknod;
762 
763 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
764 	if (err == -ENOSYS) {
765 		fc->no_create = 1;
766 		goto mknod;
767 	} else if (err == -EEXIST)
768 		fuse_invalidate_entry(entry);
769 out_dput:
770 	dput(res);
771 	return err;
772 
773 mknod:
774 	err = fuse_mknod(idmap, dir, entry, mode, 0);
775 	if (err)
776 		goto out_dput;
777 no_open:
778 	return finish_no_open(file, res);
779 }
780 
781 /*
782  * Code shared between mknod, mkdir, symlink and link
783  */
create_new_entry(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)784 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
785 				       struct fuse_args *args, struct inode *dir,
786 				       struct dentry *entry, umode_t mode)
787 {
788 	struct fuse_entry_out outarg;
789 	struct inode *inode;
790 	struct dentry *d;
791 	int err;
792 	struct fuse_forget_link *forget;
793 
794 	if (fuse_is_bad(dir))
795 		return ERR_PTR(-EIO);
796 
797 	forget = fuse_alloc_forget();
798 	if (!forget)
799 		return ERR_PTR(-ENOMEM);
800 
801 	memset(&outarg, 0, sizeof(outarg));
802 	args->nodeid = get_node_id(dir);
803 	args->out_numargs = 1;
804 	args->out_args[0].size = sizeof(outarg);
805 	args->out_args[0].value = &outarg;
806 
807 	if (args->opcode != FUSE_LINK) {
808 		err = get_create_ext(idmap, args, dir, entry, mode);
809 		if (err)
810 			goto out_put_forget_req;
811 	}
812 
813 	err = fuse_simple_idmap_request(idmap, fm, args);
814 	free_ext_value(args);
815 	if (err)
816 		goto out_put_forget_req;
817 
818 	err = -EIO;
819 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
820 		goto out_put_forget_req;
821 
822 	if ((outarg.attr.mode ^ mode) & S_IFMT)
823 		goto out_put_forget_req;
824 
825 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
826 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
827 	if (!inode) {
828 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
829 		return ERR_PTR(-ENOMEM);
830 	}
831 	kfree(forget);
832 
833 	d_drop(entry);
834 	d = d_splice_alias(inode, entry);
835 	if (IS_ERR(d))
836 		return d;
837 
838 	if (d)
839 		fuse_change_entry_timeout(d, &outarg);
840 	else
841 		fuse_change_entry_timeout(entry, &outarg);
842 	fuse_dir_changed(dir);
843 	return d;
844 
845  out_put_forget_req:
846 	if (err == -EEXIST)
847 		fuse_invalidate_entry(entry);
848 	kfree(forget);
849 	return ERR_PTR(err);
850 }
851 
create_new_nondir(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)852 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
853 			     struct fuse_args *args, struct inode *dir,
854 			     struct dentry *entry, umode_t mode)
855 {
856 	/*
857 	 * Note that when creating anything other than a directory we
858 	 * can be sure create_new_entry() will NOT return an alternate
859 	 * dentry as d_splice_alias() only returns an alternate dentry
860 	 * for directories.  So we don't need to check for that case
861 	 * when passing back the result.
862 	 */
863 	WARN_ON_ONCE(S_ISDIR(mode));
864 
865 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
866 }
867 
fuse_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)868 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
869 		      struct dentry *entry, umode_t mode, dev_t rdev)
870 {
871 	struct fuse_mknod_in inarg;
872 	struct fuse_mount *fm = get_fuse_mount(dir);
873 	FUSE_ARGS(args);
874 
875 	if (!fm->fc->dont_mask)
876 		mode &= ~current_umask();
877 
878 	memset(&inarg, 0, sizeof(inarg));
879 	inarg.mode = mode;
880 	inarg.rdev = new_encode_dev(rdev);
881 	inarg.umask = current_umask();
882 	args.opcode = FUSE_MKNOD;
883 	args.in_numargs = 2;
884 	args.in_args[0].size = sizeof(inarg);
885 	args.in_args[0].value = &inarg;
886 	args.in_args[1].size = entry->d_name.len + 1;
887 	args.in_args[1].value = entry->d_name.name;
888 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
889 }
890 
fuse_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)891 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
892 		       struct dentry *entry, umode_t mode, bool excl)
893 {
894 	return fuse_mknod(idmap, dir, entry, mode, 0);
895 }
896 
fuse_tmpfile(struct mnt_idmap * idmap,struct inode * dir,struct file * file,umode_t mode)897 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
898 			struct file *file, umode_t mode)
899 {
900 	struct fuse_conn *fc = get_fuse_conn(dir);
901 	int err;
902 
903 	if (fc->no_tmpfile)
904 		return -EOPNOTSUPP;
905 
906 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
907 			       file->f_flags, mode, FUSE_TMPFILE);
908 	if (err == -ENOSYS) {
909 		fc->no_tmpfile = 1;
910 		err = -EOPNOTSUPP;
911 	}
912 	return err;
913 }
914 
fuse_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode)915 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
916 				 struct dentry *entry, umode_t mode)
917 {
918 	struct fuse_mkdir_in inarg;
919 	struct fuse_mount *fm = get_fuse_mount(dir);
920 	FUSE_ARGS(args);
921 
922 	if (!fm->fc->dont_mask)
923 		mode &= ~current_umask();
924 
925 	memset(&inarg, 0, sizeof(inarg));
926 	inarg.mode = mode;
927 	inarg.umask = current_umask();
928 	args.opcode = FUSE_MKDIR;
929 	args.in_numargs = 2;
930 	args.in_args[0].size = sizeof(inarg);
931 	args.in_args[0].value = &inarg;
932 	args.in_args[1].size = entry->d_name.len + 1;
933 	args.in_args[1].value = entry->d_name.name;
934 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
935 }
936 
fuse_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,const char * link)937 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
938 			struct dentry *entry, const char *link)
939 {
940 	struct fuse_mount *fm = get_fuse_mount(dir);
941 	unsigned len = strlen(link) + 1;
942 	FUSE_ARGS(args);
943 
944 	args.opcode = FUSE_SYMLINK;
945 	args.in_numargs = 3;
946 	fuse_set_zero_arg0(&args);
947 	args.in_args[1].size = entry->d_name.len + 1;
948 	args.in_args[1].value = entry->d_name.name;
949 	args.in_args[2].size = len;
950 	args.in_args[2].value = link;
951 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
952 }
953 
fuse_flush_time_update(struct inode * inode)954 void fuse_flush_time_update(struct inode *inode)
955 {
956 	int err = sync_inode_metadata(inode, 1);
957 
958 	mapping_set_error(inode->i_mapping, err);
959 }
960 
fuse_update_ctime_in_cache(struct inode * inode)961 static void fuse_update_ctime_in_cache(struct inode *inode)
962 {
963 	if (!IS_NOCMTIME(inode)) {
964 		inode_set_ctime_current(inode);
965 		mark_inode_dirty_sync(inode);
966 		fuse_flush_time_update(inode);
967 	}
968 }
969 
fuse_update_ctime(struct inode * inode)970 void fuse_update_ctime(struct inode *inode)
971 {
972 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
973 	fuse_update_ctime_in_cache(inode);
974 }
975 
fuse_entry_unlinked(struct dentry * entry)976 static void fuse_entry_unlinked(struct dentry *entry)
977 {
978 	struct inode *inode = d_inode(entry);
979 	struct fuse_conn *fc = get_fuse_conn(inode);
980 	struct fuse_inode *fi = get_fuse_inode(inode);
981 
982 	spin_lock(&fi->lock);
983 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
984 	/*
985 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
986 	 * happen if userspace filesystem is careless.  It would be
987 	 * difficult to enforce correct nlink usage so just ignore this
988 	 * condition here
989 	 */
990 	if (S_ISDIR(inode->i_mode))
991 		clear_nlink(inode);
992 	else if (inode->i_nlink > 0)
993 		drop_nlink(inode);
994 	spin_unlock(&fi->lock);
995 	fuse_invalidate_entry_cache(entry);
996 	fuse_update_ctime(inode);
997 }
998 
fuse_unlink(struct inode * dir,struct dentry * entry)999 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1000 {
1001 	int err;
1002 	struct fuse_mount *fm = get_fuse_mount(dir);
1003 	FUSE_ARGS(args);
1004 
1005 	if (fuse_is_bad(dir))
1006 		return -EIO;
1007 
1008 	args.opcode = FUSE_UNLINK;
1009 	args.nodeid = get_node_id(dir);
1010 	args.in_numargs = 2;
1011 	fuse_set_zero_arg0(&args);
1012 	args.in_args[1].size = entry->d_name.len + 1;
1013 	args.in_args[1].value = entry->d_name.name;
1014 	err = fuse_simple_request(fm, &args);
1015 	if (!err) {
1016 		fuse_dir_changed(dir);
1017 		fuse_entry_unlinked(entry);
1018 	} else if (err == -EINTR || err == -ENOENT)
1019 		fuse_invalidate_entry(entry);
1020 	return err;
1021 }
1022 
fuse_rmdir(struct inode * dir,struct dentry * entry)1023 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1024 {
1025 	int err;
1026 	struct fuse_mount *fm = get_fuse_mount(dir);
1027 	FUSE_ARGS(args);
1028 
1029 	if (fuse_is_bad(dir))
1030 		return -EIO;
1031 
1032 	args.opcode = FUSE_RMDIR;
1033 	args.nodeid = get_node_id(dir);
1034 	args.in_numargs = 2;
1035 	fuse_set_zero_arg0(&args);
1036 	args.in_args[1].size = entry->d_name.len + 1;
1037 	args.in_args[1].value = entry->d_name.name;
1038 	err = fuse_simple_request(fm, &args);
1039 	if (!err) {
1040 		fuse_dir_changed(dir);
1041 		fuse_entry_unlinked(entry);
1042 	} else if (err == -EINTR || err == -ENOENT)
1043 		fuse_invalidate_entry(entry);
1044 	return err;
1045 }
1046 
fuse_rename_common(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)1047 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1048 			      struct inode *newdir, struct dentry *newent,
1049 			      unsigned int flags, int opcode, size_t argsize)
1050 {
1051 	int err;
1052 	struct fuse_rename2_in inarg;
1053 	struct fuse_mount *fm = get_fuse_mount(olddir);
1054 	FUSE_ARGS(args);
1055 
1056 	memset(&inarg, 0, argsize);
1057 	inarg.newdir = get_node_id(newdir);
1058 	inarg.flags = flags;
1059 	args.opcode = opcode;
1060 	args.nodeid = get_node_id(olddir);
1061 	args.in_numargs = 3;
1062 	args.in_args[0].size = argsize;
1063 	args.in_args[0].value = &inarg;
1064 	args.in_args[1].size = oldent->d_name.len + 1;
1065 	args.in_args[1].value = oldent->d_name.name;
1066 	args.in_args[2].size = newent->d_name.len + 1;
1067 	args.in_args[2].value = newent->d_name.name;
1068 	err = fuse_simple_idmap_request(idmap, fm, &args);
1069 	if (!err) {
1070 		/* ctime changes */
1071 		fuse_update_ctime(d_inode(oldent));
1072 
1073 		if (flags & RENAME_EXCHANGE)
1074 			fuse_update_ctime(d_inode(newent));
1075 
1076 		fuse_dir_changed(olddir);
1077 		if (olddir != newdir)
1078 			fuse_dir_changed(newdir);
1079 
1080 		/* newent will end up negative */
1081 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1082 			fuse_entry_unlinked(newent);
1083 	} else if (err == -EINTR || err == -ENOENT) {
1084 		/* If request was interrupted, DEITY only knows if the
1085 		   rename actually took place.  If the invalidation
1086 		   fails (e.g. some process has CWD under the renamed
1087 		   directory), then there can be inconsistency between
1088 		   the dcache and the real filesystem.  Tough luck. */
1089 		fuse_invalidate_entry(oldent);
1090 		if (d_really_is_positive(newent))
1091 			fuse_invalidate_entry(newent);
1092 	}
1093 
1094 	return err;
1095 }
1096 
fuse_rename2(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)1097 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1098 			struct dentry *oldent, struct inode *newdir,
1099 			struct dentry *newent, unsigned int flags)
1100 {
1101 	struct fuse_conn *fc = get_fuse_conn(olddir);
1102 	int err;
1103 
1104 	if (fuse_is_bad(olddir))
1105 		return -EIO;
1106 
1107 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1108 		return -EINVAL;
1109 
1110 	if (flags) {
1111 		if (fc->no_rename2 || fc->minor < 23)
1112 			return -EINVAL;
1113 
1114 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1115 					 olddir, oldent, newdir, newent, flags,
1116 					 FUSE_RENAME2,
1117 					 sizeof(struct fuse_rename2_in));
1118 		if (err == -ENOSYS) {
1119 			fc->no_rename2 = 1;
1120 			err = -EINVAL;
1121 		}
1122 	} else {
1123 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1124 					 FUSE_RENAME,
1125 					 sizeof(struct fuse_rename_in));
1126 	}
1127 
1128 	return err;
1129 }
1130 
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1131 static int fuse_link(struct dentry *entry, struct inode *newdir,
1132 		     struct dentry *newent)
1133 {
1134 	int err;
1135 	struct fuse_link_in inarg;
1136 	struct inode *inode = d_inode(entry);
1137 	struct fuse_mount *fm = get_fuse_mount(inode);
1138 	FUSE_ARGS(args);
1139 
1140 	memset(&inarg, 0, sizeof(inarg));
1141 	inarg.oldnodeid = get_node_id(inode);
1142 	args.opcode = FUSE_LINK;
1143 	args.in_numargs = 2;
1144 	args.in_args[0].size = sizeof(inarg);
1145 	args.in_args[0].value = &inarg;
1146 	args.in_args[1].size = newent->d_name.len + 1;
1147 	args.in_args[1].value = newent->d_name.name;
1148 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1149 	if (!err)
1150 		fuse_update_ctime_in_cache(inode);
1151 	else if (err == -EINTR)
1152 		fuse_invalidate_attr(inode);
1153 
1154 	return err;
1155 }
1156 
fuse_fillattr(struct mnt_idmap * idmap,struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1157 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1158 			  struct fuse_attr *attr, struct kstat *stat)
1159 {
1160 	unsigned int blkbits;
1161 	struct fuse_conn *fc = get_fuse_conn(inode);
1162 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1163 				      make_kuid(fc->user_ns, attr->uid));
1164 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1165 				      make_kgid(fc->user_ns, attr->gid));
1166 
1167 	stat->dev = inode->i_sb->s_dev;
1168 	stat->ino = attr->ino;
1169 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1170 	stat->nlink = attr->nlink;
1171 	stat->uid = vfsuid_into_kuid(vfsuid);
1172 	stat->gid = vfsgid_into_kgid(vfsgid);
1173 	stat->rdev = inode->i_rdev;
1174 	stat->atime.tv_sec = attr->atime;
1175 	stat->atime.tv_nsec = attr->atimensec;
1176 	stat->mtime.tv_sec = attr->mtime;
1177 	stat->mtime.tv_nsec = attr->mtimensec;
1178 	stat->ctime.tv_sec = attr->ctime;
1179 	stat->ctime.tv_nsec = attr->ctimensec;
1180 	stat->size = attr->size;
1181 	stat->blocks = attr->blocks;
1182 
1183 	if (attr->blksize != 0)
1184 		blkbits = ilog2(attr->blksize);
1185 	else
1186 		blkbits = inode->i_sb->s_blocksize_bits;
1187 
1188 	stat->blksize = 1 << blkbits;
1189 }
1190 
fuse_statx_to_attr(struct fuse_statx * sx,struct fuse_attr * attr)1191 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1192 {
1193 	memset(attr, 0, sizeof(*attr));
1194 	attr->ino = sx->ino;
1195 	attr->size = sx->size;
1196 	attr->blocks = sx->blocks;
1197 	attr->atime = sx->atime.tv_sec;
1198 	attr->mtime = sx->mtime.tv_sec;
1199 	attr->ctime = sx->ctime.tv_sec;
1200 	attr->atimensec = sx->atime.tv_nsec;
1201 	attr->mtimensec = sx->mtime.tv_nsec;
1202 	attr->ctimensec = sx->ctime.tv_nsec;
1203 	attr->mode = sx->mode;
1204 	attr->nlink = sx->nlink;
1205 	attr->uid = sx->uid;
1206 	attr->gid = sx->gid;
1207 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1208 	attr->blksize = sx->blksize;
1209 }
1210 
fuse_do_statx(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat)1211 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1212 			 struct file *file, struct kstat *stat)
1213 {
1214 	int err;
1215 	struct fuse_attr attr;
1216 	struct fuse_statx *sx;
1217 	struct fuse_statx_in inarg;
1218 	struct fuse_statx_out outarg;
1219 	struct fuse_mount *fm = get_fuse_mount(inode);
1220 	u64 attr_version = fuse_get_attr_version(fm->fc);
1221 	FUSE_ARGS(args);
1222 
1223 	memset(&inarg, 0, sizeof(inarg));
1224 	memset(&outarg, 0, sizeof(outarg));
1225 	/* Directories have separate file-handle space */
1226 	if (file && S_ISREG(inode->i_mode)) {
1227 		struct fuse_file *ff = file->private_data;
1228 
1229 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1230 		inarg.fh = ff->fh;
1231 	}
1232 	/* For now leave sync hints as the default, request all stats. */
1233 	inarg.sx_flags = 0;
1234 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1235 	args.opcode = FUSE_STATX;
1236 	args.nodeid = get_node_id(inode);
1237 	args.in_numargs = 1;
1238 	args.in_args[0].size = sizeof(inarg);
1239 	args.in_args[0].value = &inarg;
1240 	args.out_numargs = 1;
1241 	args.out_args[0].size = sizeof(outarg);
1242 	args.out_args[0].value = &outarg;
1243 	err = fuse_simple_request(fm, &args);
1244 	if (err)
1245 		return err;
1246 
1247 	sx = &outarg.stat;
1248 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1249 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1250 					 inode_wrong_type(inode, sx->mode)))) {
1251 		fuse_make_bad(inode);
1252 		return -EIO;
1253 	}
1254 
1255 	fuse_statx_to_attr(&outarg.stat, &attr);
1256 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1257 		fuse_change_attributes(inode, &attr, &outarg.stat,
1258 				       ATTR_TIMEOUT(&outarg), attr_version);
1259 	}
1260 
1261 	if (stat) {
1262 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1263 		stat->btime.tv_sec = sx->btime.tv_sec;
1264 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1265 		fuse_fillattr(idmap, inode, &attr, stat);
1266 		stat->result_mask |= STATX_TYPE;
1267 	}
1268 
1269 	return 0;
1270 }
1271 
fuse_do_getattr(struct mnt_idmap * idmap,struct inode * inode,struct kstat * stat,struct file * file)1272 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1273 			   struct kstat *stat, struct file *file)
1274 {
1275 	int err;
1276 	struct fuse_getattr_in inarg;
1277 	struct fuse_attr_out outarg;
1278 	struct fuse_mount *fm = get_fuse_mount(inode);
1279 	FUSE_ARGS(args);
1280 	u64 attr_version;
1281 
1282 	attr_version = fuse_get_attr_version(fm->fc);
1283 
1284 	memset(&inarg, 0, sizeof(inarg));
1285 	memset(&outarg, 0, sizeof(outarg));
1286 	/* Directories have separate file-handle space */
1287 	if (file && S_ISREG(inode->i_mode)) {
1288 		struct fuse_file *ff = file->private_data;
1289 
1290 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1291 		inarg.fh = ff->fh;
1292 	}
1293 	args.opcode = FUSE_GETATTR;
1294 	args.nodeid = get_node_id(inode);
1295 	args.in_numargs = 1;
1296 	args.in_args[0].size = sizeof(inarg);
1297 	args.in_args[0].value = &inarg;
1298 	args.out_numargs = 1;
1299 	args.out_args[0].size = sizeof(outarg);
1300 	args.out_args[0].value = &outarg;
1301 	err = fuse_simple_request(fm, &args);
1302 	if (!err) {
1303 		if (fuse_invalid_attr(&outarg.attr) ||
1304 		    inode_wrong_type(inode, outarg.attr.mode)) {
1305 			fuse_make_bad(inode);
1306 			err = -EIO;
1307 		} else {
1308 			fuse_change_attributes(inode, &outarg.attr, NULL,
1309 					       ATTR_TIMEOUT(&outarg),
1310 					       attr_version);
1311 			if (stat)
1312 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1313 		}
1314 	}
1315 	return err;
1316 }
1317 
fuse_update_get_attr(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat,u32 request_mask,unsigned int flags)1318 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1319 				struct file *file, struct kstat *stat,
1320 				u32 request_mask, unsigned int flags)
1321 {
1322 	struct fuse_inode *fi = get_fuse_inode(inode);
1323 	struct fuse_conn *fc = get_fuse_conn(inode);
1324 	int err = 0;
1325 	bool sync;
1326 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1327 	u32 cache_mask = fuse_get_cache_mask(inode);
1328 
1329 
1330 	/* FUSE only supports basic stats and possibly btime */
1331 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1332 retry:
1333 	if (fc->no_statx)
1334 		request_mask &= STATX_BASIC_STATS;
1335 
1336 	if (!request_mask)
1337 		sync = false;
1338 	else if (flags & AT_STATX_FORCE_SYNC)
1339 		sync = true;
1340 	else if (flags & AT_STATX_DONT_SYNC)
1341 		sync = false;
1342 	else if (request_mask & inval_mask & ~cache_mask)
1343 		sync = true;
1344 	else
1345 		sync = time_before64(fi->i_time, get_jiffies_64());
1346 
1347 	if (sync) {
1348 		forget_all_cached_acls(inode);
1349 		/* Try statx if BTIME is requested */
1350 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1351 			err = fuse_do_statx(idmap, inode, file, stat);
1352 			if (err == -ENOSYS) {
1353 				fc->no_statx = 1;
1354 				err = 0;
1355 				goto retry;
1356 			}
1357 		} else {
1358 			err = fuse_do_getattr(idmap, inode, stat, file);
1359 		}
1360 	} else if (stat) {
1361 		generic_fillattr(idmap, request_mask, inode, stat);
1362 		stat->mode = fi->orig_i_mode;
1363 		stat->ino = fi->orig_ino;
1364 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1365 			stat->btime = fi->i_btime;
1366 			stat->result_mask |= STATX_BTIME;
1367 		}
1368 	}
1369 
1370 	return err;
1371 }
1372 
fuse_update_attributes(struct inode * inode,struct file * file,u32 mask)1373 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1374 {
1375 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1376 }
1377 
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name,u32 flags)1378 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1379 			     u64 child_nodeid, struct qstr *name, u32 flags)
1380 {
1381 	int err = -ENOTDIR;
1382 	struct inode *parent;
1383 	struct dentry *dir;
1384 	struct dentry *entry;
1385 
1386 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1387 	if (!parent)
1388 		return -ENOENT;
1389 
1390 	inode_lock_nested(parent, I_MUTEX_PARENT);
1391 	if (!S_ISDIR(parent->i_mode))
1392 		goto unlock;
1393 
1394 	err = -ENOENT;
1395 	dir = d_find_alias(parent);
1396 	if (!dir)
1397 		goto unlock;
1398 
1399 	name->hash = full_name_hash(dir, name->name, name->len);
1400 	entry = d_lookup(dir, name);
1401 	dput(dir);
1402 	if (!entry)
1403 		goto unlock;
1404 
1405 	fuse_dir_changed(parent);
1406 	if (!(flags & FUSE_EXPIRE_ONLY))
1407 		d_invalidate(entry);
1408 	fuse_invalidate_entry_cache(entry);
1409 
1410 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1411 		inode_lock(d_inode(entry));
1412 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1413 			err = -ENOENT;
1414 			goto badentry;
1415 		}
1416 		if (d_mountpoint(entry)) {
1417 			err = -EBUSY;
1418 			goto badentry;
1419 		}
1420 		if (d_is_dir(entry)) {
1421 			shrink_dcache_parent(entry);
1422 			if (!simple_empty(entry)) {
1423 				err = -ENOTEMPTY;
1424 				goto badentry;
1425 			}
1426 			d_inode(entry)->i_flags |= S_DEAD;
1427 		}
1428 		dont_mount(entry);
1429 		clear_nlink(d_inode(entry));
1430 		err = 0;
1431  badentry:
1432 		inode_unlock(d_inode(entry));
1433 		if (!err)
1434 			d_delete(entry);
1435 	} else {
1436 		err = 0;
1437 	}
1438 	dput(entry);
1439 
1440  unlock:
1441 	inode_unlock(parent);
1442 	iput(parent);
1443 	return err;
1444 }
1445 
fuse_permissible_uidgid(struct fuse_conn * fc)1446 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1447 {
1448 	const struct cred *cred = current_cred();
1449 
1450 	return (uid_eq(cred->euid, fc->user_id) &&
1451 		uid_eq(cred->suid, fc->user_id) &&
1452 		uid_eq(cred->uid,  fc->user_id) &&
1453 		gid_eq(cred->egid, fc->group_id) &&
1454 		gid_eq(cred->sgid, fc->group_id) &&
1455 		gid_eq(cred->gid,  fc->group_id));
1456 }
1457 
1458 /*
1459  * Calling into a user-controlled filesystem gives the filesystem
1460  * daemon ptrace-like capabilities over the current process.  This
1461  * means, that the filesystem daemon is able to record the exact
1462  * filesystem operations performed, and can also control the behavior
1463  * of the requester process in otherwise impossible ways.  For example
1464  * it can delay the operation for arbitrary length of time allowing
1465  * DoS against the requester.
1466  *
1467  * For this reason only those processes can call into the filesystem,
1468  * for which the owner of the mount has ptrace privilege.  This
1469  * excludes processes started by other users, suid or sgid processes.
1470  */
fuse_allow_current_process(struct fuse_conn * fc)1471 bool fuse_allow_current_process(struct fuse_conn *fc)
1472 {
1473 	bool allow;
1474 
1475 	if (fc->allow_other)
1476 		allow = current_in_userns(fc->user_ns);
1477 	else
1478 		allow = fuse_permissible_uidgid(fc);
1479 
1480 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1481 		allow = true;
1482 
1483 	return allow;
1484 }
1485 
fuse_access(struct inode * inode,int mask)1486 static int fuse_access(struct inode *inode, int mask)
1487 {
1488 	struct fuse_mount *fm = get_fuse_mount(inode);
1489 	FUSE_ARGS(args);
1490 	struct fuse_access_in inarg;
1491 	int err;
1492 
1493 	BUG_ON(mask & MAY_NOT_BLOCK);
1494 
1495 	/*
1496 	 * We should not send FUSE_ACCESS to the userspace
1497 	 * when idmapped mounts are enabled as for this case
1498 	 * we have fc->default_permissions = 1 and access
1499 	 * permission checks are done on the kernel side.
1500 	 */
1501 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1502 
1503 	if (fm->fc->no_access)
1504 		return 0;
1505 
1506 	memset(&inarg, 0, sizeof(inarg));
1507 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1508 	args.opcode = FUSE_ACCESS;
1509 	args.nodeid = get_node_id(inode);
1510 	args.in_numargs = 1;
1511 	args.in_args[0].size = sizeof(inarg);
1512 	args.in_args[0].value = &inarg;
1513 	err = fuse_simple_request(fm, &args);
1514 	if (err == -ENOSYS) {
1515 		fm->fc->no_access = 1;
1516 		err = 0;
1517 	}
1518 	return err;
1519 }
1520 
fuse_perm_getattr(struct inode * inode,int mask)1521 static int fuse_perm_getattr(struct inode *inode, int mask)
1522 {
1523 	if (mask & MAY_NOT_BLOCK)
1524 		return -ECHILD;
1525 
1526 	forget_all_cached_acls(inode);
1527 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1528 }
1529 
1530 /*
1531  * Check permission.  The two basic access models of FUSE are:
1532  *
1533  * 1) Local access checking ('default_permissions' mount option) based
1534  * on file mode.  This is the plain old disk filesystem permission
1535  * model.
1536  *
1537  * 2) "Remote" access checking, where server is responsible for
1538  * checking permission in each inode operation.  An exception to this
1539  * is if ->permission() was invoked from sys_access() in which case an
1540  * access request is sent.  Execute permission is still checked
1541  * locally based on file mode.
1542  */
fuse_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)1543 static int fuse_permission(struct mnt_idmap *idmap,
1544 			   struct inode *inode, int mask)
1545 {
1546 	struct fuse_conn *fc = get_fuse_conn(inode);
1547 	bool refreshed = false;
1548 	int err = 0;
1549 
1550 	if (fuse_is_bad(inode))
1551 		return -EIO;
1552 
1553 	if (!fuse_allow_current_process(fc))
1554 		return -EACCES;
1555 
1556 	/*
1557 	 * If attributes are needed, refresh them before proceeding
1558 	 */
1559 	if (fc->default_permissions ||
1560 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1561 		struct fuse_inode *fi = get_fuse_inode(inode);
1562 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1563 
1564 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1565 		    time_before64(fi->i_time, get_jiffies_64())) {
1566 			refreshed = true;
1567 
1568 			err = fuse_perm_getattr(inode, mask);
1569 			if (err)
1570 				return err;
1571 		}
1572 	}
1573 
1574 	if (fc->default_permissions) {
1575 		err = generic_permission(idmap, inode, mask);
1576 
1577 		/* If permission is denied, try to refresh file
1578 		   attributes.  This is also needed, because the root
1579 		   node will at first have no permissions */
1580 		if (err == -EACCES && !refreshed) {
1581 			err = fuse_perm_getattr(inode, mask);
1582 			if (!err)
1583 				err = generic_permission(idmap,
1584 							 inode, mask);
1585 		}
1586 
1587 		/* Note: the opposite of the above test does not
1588 		   exist.  So if permissions are revoked this won't be
1589 		   noticed immediately, only after the attribute
1590 		   timeout has expired */
1591 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1592 		err = fuse_access(inode, mask);
1593 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1594 		if (!(inode->i_mode & S_IXUGO)) {
1595 			if (refreshed)
1596 				return -EACCES;
1597 
1598 			err = fuse_perm_getattr(inode, mask);
1599 			if (!err && !(inode->i_mode & S_IXUGO))
1600 				return -EACCES;
1601 		}
1602 	}
1603 	return err;
1604 }
1605 
fuse_readlink_page(struct inode * inode,struct folio * folio)1606 static int fuse_readlink_page(struct inode *inode, struct folio *folio)
1607 {
1608 	struct fuse_mount *fm = get_fuse_mount(inode);
1609 	struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
1610 	struct fuse_args_pages ap = {
1611 		.num_folios = 1,
1612 		.folios = &folio,
1613 		.descs = &desc,
1614 	};
1615 	char *link;
1616 	ssize_t res;
1617 
1618 	ap.args.opcode = FUSE_READLINK;
1619 	ap.args.nodeid = get_node_id(inode);
1620 	ap.args.out_pages = true;
1621 	ap.args.out_argvar = true;
1622 	ap.args.page_zeroing = true;
1623 	ap.args.out_numargs = 1;
1624 	ap.args.out_args[0].size = desc.length;
1625 	res = fuse_simple_request(fm, &ap.args);
1626 
1627 	fuse_invalidate_atime(inode);
1628 
1629 	if (res < 0)
1630 		return res;
1631 
1632 	if (WARN_ON(res >= PAGE_SIZE))
1633 		return -EIO;
1634 
1635 	link = folio_address(folio);
1636 	link[res] = '\0';
1637 
1638 	return 0;
1639 }
1640 
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1641 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1642 				 struct delayed_call *callback)
1643 {
1644 	struct fuse_conn *fc = get_fuse_conn(inode);
1645 	struct folio *folio;
1646 	int err;
1647 
1648 	err = -EIO;
1649 	if (fuse_is_bad(inode))
1650 		goto out_err;
1651 
1652 	if (fc->cache_symlinks)
1653 		return page_get_link_raw(dentry, inode, callback);
1654 
1655 	err = -ECHILD;
1656 	if (!dentry)
1657 		goto out_err;
1658 
1659 	folio = folio_alloc(GFP_KERNEL, 0);
1660 	err = -ENOMEM;
1661 	if (!folio)
1662 		goto out_err;
1663 
1664 	err = fuse_readlink_page(inode, folio);
1665 	if (err) {
1666 		folio_put(folio);
1667 		goto out_err;
1668 	}
1669 
1670 	set_delayed_call(callback, page_put_link, &folio->page);
1671 
1672 	return folio_address(folio);
1673 
1674 out_err:
1675 	return ERR_PTR(err);
1676 }
1677 
fuse_dir_open(struct inode * inode,struct file * file)1678 static int fuse_dir_open(struct inode *inode, struct file *file)
1679 {
1680 	struct fuse_mount *fm = get_fuse_mount(inode);
1681 	int err;
1682 
1683 	if (fuse_is_bad(inode))
1684 		return -EIO;
1685 
1686 	err = generic_file_open(inode, file);
1687 	if (err)
1688 		return err;
1689 
1690 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1691 	if (!err) {
1692 		struct fuse_file *ff = file->private_data;
1693 
1694 		/*
1695 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1696 		 * directories for backward compatibility, though it's unlikely
1697 		 * to be useful.
1698 		 */
1699 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1700 			nonseekable_open(inode, file);
1701 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1702 			invalidate_inode_pages2(inode->i_mapping);
1703 	}
1704 
1705 	return err;
1706 }
1707 
fuse_dir_release(struct inode * inode,struct file * file)1708 static int fuse_dir_release(struct inode *inode, struct file *file)
1709 {
1710 	fuse_release_common(file, true);
1711 
1712 	return 0;
1713 }
1714 
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1715 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1716 			  int datasync)
1717 {
1718 	struct inode *inode = file->f_mapping->host;
1719 	struct fuse_conn *fc = get_fuse_conn(inode);
1720 	int err;
1721 
1722 	if (fuse_is_bad(inode))
1723 		return -EIO;
1724 
1725 	if (fc->no_fsyncdir)
1726 		return 0;
1727 
1728 	inode_lock(inode);
1729 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1730 	if (err == -ENOSYS) {
1731 		fc->no_fsyncdir = 1;
1732 		err = 0;
1733 	}
1734 	inode_unlock(inode);
1735 
1736 	return err;
1737 }
1738 
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1739 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1740 			    unsigned long arg)
1741 {
1742 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1743 
1744 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1745 	if (fc->minor < 18)
1746 		return -ENOTTY;
1747 
1748 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1749 }
1750 
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1751 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1752 				   unsigned long arg)
1753 {
1754 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1755 
1756 	if (fc->minor < 18)
1757 		return -ENOTTY;
1758 
1759 	return fuse_ioctl_common(file, cmd, arg,
1760 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1761 }
1762 
update_mtime(unsigned ivalid,bool trust_local_mtime)1763 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1764 {
1765 	/* Always update if mtime is explicitly set  */
1766 	if (ivalid & ATTR_MTIME_SET)
1767 		return true;
1768 
1769 	/* Or if kernel i_mtime is the official one */
1770 	if (trust_local_mtime)
1771 		return true;
1772 
1773 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1774 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1775 		return false;
1776 
1777 	/* In all other cases update */
1778 	return true;
1779 }
1780 
iattr_to_fattr(struct mnt_idmap * idmap,struct fuse_conn * fc,struct iattr * iattr,struct fuse_setattr_in * arg,bool trust_local_cmtime)1781 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1782 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1783 			   bool trust_local_cmtime)
1784 {
1785 	unsigned ivalid = iattr->ia_valid;
1786 
1787 	if (ivalid & ATTR_MODE)
1788 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1789 
1790 	if (ivalid & ATTR_UID) {
1791 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1792 
1793 		arg->valid |= FATTR_UID;
1794 		arg->uid = from_kuid(fc->user_ns, fsuid);
1795 	}
1796 
1797 	if (ivalid & ATTR_GID) {
1798 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1799 
1800 		arg->valid |= FATTR_GID;
1801 		arg->gid = from_kgid(fc->user_ns, fsgid);
1802 	}
1803 
1804 	if (ivalid & ATTR_SIZE)
1805 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1806 	if (ivalid & ATTR_ATIME) {
1807 		arg->valid |= FATTR_ATIME;
1808 		arg->atime = iattr->ia_atime.tv_sec;
1809 		arg->atimensec = iattr->ia_atime.tv_nsec;
1810 		if (!(ivalid & ATTR_ATIME_SET))
1811 			arg->valid |= FATTR_ATIME_NOW;
1812 	}
1813 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1814 		arg->valid |= FATTR_MTIME;
1815 		arg->mtime = iattr->ia_mtime.tv_sec;
1816 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1817 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1818 			arg->valid |= FATTR_MTIME_NOW;
1819 	}
1820 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1821 		arg->valid |= FATTR_CTIME;
1822 		arg->ctime = iattr->ia_ctime.tv_sec;
1823 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1824 	}
1825 }
1826 
1827 /*
1828  * Prevent concurrent writepages on inode
1829  *
1830  * This is done by adding a negative bias to the inode write counter
1831  * and waiting for all pending writes to finish.
1832  */
fuse_set_nowrite(struct inode * inode)1833 void fuse_set_nowrite(struct inode *inode)
1834 {
1835 	struct fuse_inode *fi = get_fuse_inode(inode);
1836 
1837 	BUG_ON(!inode_is_locked(inode));
1838 
1839 	spin_lock(&fi->lock);
1840 	BUG_ON(fi->writectr < 0);
1841 	fi->writectr += FUSE_NOWRITE;
1842 	spin_unlock(&fi->lock);
1843 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1844 }
1845 
1846 /*
1847  * Allow writepages on inode
1848  *
1849  * Remove the bias from the writecounter and send any queued
1850  * writepages.
1851  */
__fuse_release_nowrite(struct inode * inode)1852 static void __fuse_release_nowrite(struct inode *inode)
1853 {
1854 	struct fuse_inode *fi = get_fuse_inode(inode);
1855 
1856 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1857 	fi->writectr = 0;
1858 	fuse_flush_writepages(inode);
1859 }
1860 
fuse_release_nowrite(struct inode * inode)1861 void fuse_release_nowrite(struct inode *inode)
1862 {
1863 	struct fuse_inode *fi = get_fuse_inode(inode);
1864 
1865 	spin_lock(&fi->lock);
1866 	__fuse_release_nowrite(inode);
1867 	spin_unlock(&fi->lock);
1868 }
1869 
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)1870 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1871 			      struct inode *inode,
1872 			      struct fuse_setattr_in *inarg_p,
1873 			      struct fuse_attr_out *outarg_p)
1874 {
1875 	args->opcode = FUSE_SETATTR;
1876 	args->nodeid = get_node_id(inode);
1877 	args->in_numargs = 1;
1878 	args->in_args[0].size = sizeof(*inarg_p);
1879 	args->in_args[0].value = inarg_p;
1880 	args->out_numargs = 1;
1881 	args->out_args[0].size = sizeof(*outarg_p);
1882 	args->out_args[0].value = outarg_p;
1883 }
1884 
1885 /*
1886  * Flush inode->i_mtime to the server
1887  */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)1888 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1889 {
1890 	struct fuse_mount *fm = get_fuse_mount(inode);
1891 	FUSE_ARGS(args);
1892 	struct fuse_setattr_in inarg;
1893 	struct fuse_attr_out outarg;
1894 
1895 	memset(&inarg, 0, sizeof(inarg));
1896 	memset(&outarg, 0, sizeof(outarg));
1897 
1898 	inarg.valid = FATTR_MTIME;
1899 	inarg.mtime = inode_get_mtime_sec(inode);
1900 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1901 	if (fm->fc->minor >= 23) {
1902 		inarg.valid |= FATTR_CTIME;
1903 		inarg.ctime = inode_get_ctime_sec(inode);
1904 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1905 	}
1906 	if (ff) {
1907 		inarg.valid |= FATTR_FH;
1908 		inarg.fh = ff->fh;
1909 	}
1910 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1911 
1912 	return fuse_simple_request(fm, &args);
1913 }
1914 
1915 /*
1916  * Set attributes, and at the same time refresh them.
1917  *
1918  * Truncation is slightly complicated, because the 'truncate' request
1919  * may fail, in which case we don't want to touch the mapping.
1920  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1921  * and the actual truncation by hand.
1922  */
fuse_do_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr,struct file * file)1923 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1924 		    struct iattr *attr, struct file *file)
1925 {
1926 	struct inode *inode = d_inode(dentry);
1927 	struct fuse_mount *fm = get_fuse_mount(inode);
1928 	struct fuse_conn *fc = fm->fc;
1929 	struct fuse_inode *fi = get_fuse_inode(inode);
1930 	struct address_space *mapping = inode->i_mapping;
1931 	FUSE_ARGS(args);
1932 	struct fuse_setattr_in inarg;
1933 	struct fuse_attr_out outarg;
1934 	bool is_truncate = false;
1935 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1936 	loff_t oldsize;
1937 	int err;
1938 	bool trust_local_cmtime = is_wb;
1939 	bool fault_blocked = false;
1940 
1941 	if (!fc->default_permissions)
1942 		attr->ia_valid |= ATTR_FORCE;
1943 
1944 	err = setattr_prepare(idmap, dentry, attr);
1945 	if (err)
1946 		return err;
1947 
1948 	if (attr->ia_valid & ATTR_SIZE) {
1949 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1950 			return -EIO;
1951 		is_truncate = true;
1952 	}
1953 
1954 	if (FUSE_IS_DAX(inode) && is_truncate) {
1955 		filemap_invalidate_lock(mapping);
1956 		fault_blocked = true;
1957 		err = fuse_dax_break_layouts(inode, 0, 0);
1958 		if (err) {
1959 			filemap_invalidate_unlock(mapping);
1960 			return err;
1961 		}
1962 	}
1963 
1964 	if (attr->ia_valid & ATTR_OPEN) {
1965 		/* This is coming from open(..., ... | O_TRUNC); */
1966 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1967 		WARN_ON(attr->ia_size != 0);
1968 		if (fc->atomic_o_trunc) {
1969 			/*
1970 			 * No need to send request to userspace, since actual
1971 			 * truncation has already been done by OPEN.  But still
1972 			 * need to truncate page cache.
1973 			 */
1974 			i_size_write(inode, 0);
1975 			truncate_pagecache(inode, 0);
1976 			goto out;
1977 		}
1978 		file = NULL;
1979 	}
1980 
1981 	/* Flush dirty data/metadata before non-truncate SETATTR */
1982 	if (is_wb &&
1983 	    attr->ia_valid &
1984 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1985 			 ATTR_TIMES_SET)) {
1986 		err = write_inode_now(inode, true);
1987 		if (err)
1988 			return err;
1989 
1990 		fuse_set_nowrite(inode);
1991 		fuse_release_nowrite(inode);
1992 	}
1993 
1994 	if (is_truncate) {
1995 		fuse_set_nowrite(inode);
1996 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1997 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1998 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1999 	}
2000 
2001 	memset(&inarg, 0, sizeof(inarg));
2002 	memset(&outarg, 0, sizeof(outarg));
2003 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2004 	if (file) {
2005 		struct fuse_file *ff = file->private_data;
2006 		inarg.valid |= FATTR_FH;
2007 		inarg.fh = ff->fh;
2008 	}
2009 
2010 	/* Kill suid/sgid for non-directory chown unconditionally */
2011 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2012 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2013 		inarg.valid |= FATTR_KILL_SUIDGID;
2014 
2015 	if (attr->ia_valid & ATTR_SIZE) {
2016 		/* For mandatory locking in truncate */
2017 		inarg.valid |= FATTR_LOCKOWNER;
2018 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2019 
2020 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2021 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2022 			inarg.valid |= FATTR_KILL_SUIDGID;
2023 	}
2024 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2025 	err = fuse_simple_request(fm, &args);
2026 	if (err) {
2027 		if (err == -EINTR)
2028 			fuse_invalidate_attr(inode);
2029 		goto error;
2030 	}
2031 
2032 	if (fuse_invalid_attr(&outarg.attr) ||
2033 	    inode_wrong_type(inode, outarg.attr.mode)) {
2034 		fuse_make_bad(inode);
2035 		err = -EIO;
2036 		goto error;
2037 	}
2038 
2039 	spin_lock(&fi->lock);
2040 	/* the kernel maintains i_mtime locally */
2041 	if (trust_local_cmtime) {
2042 		if (attr->ia_valid & ATTR_MTIME)
2043 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2044 		if (attr->ia_valid & ATTR_CTIME)
2045 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2046 		/* FIXME: clear I_DIRTY_SYNC? */
2047 	}
2048 
2049 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2050 				      ATTR_TIMEOUT(&outarg),
2051 				      fuse_get_cache_mask(inode), 0);
2052 	oldsize = inode->i_size;
2053 	/* see the comment in fuse_change_attributes() */
2054 	if (!is_wb || is_truncate)
2055 		i_size_write(inode, outarg.attr.size);
2056 
2057 	if (is_truncate) {
2058 		/* NOTE: this may release/reacquire fi->lock */
2059 		__fuse_release_nowrite(inode);
2060 	}
2061 	spin_unlock(&fi->lock);
2062 
2063 	/*
2064 	 * Only call invalidate_inode_pages2() after removing
2065 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2066 	 */
2067 	if ((is_truncate || !is_wb) &&
2068 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2069 		truncate_pagecache(inode, outarg.attr.size);
2070 		invalidate_inode_pages2(mapping);
2071 	}
2072 
2073 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2074 out:
2075 	if (fault_blocked)
2076 		filemap_invalidate_unlock(mapping);
2077 
2078 	return 0;
2079 
2080 error:
2081 	if (is_truncate)
2082 		fuse_release_nowrite(inode);
2083 
2084 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2085 
2086 	if (fault_blocked)
2087 		filemap_invalidate_unlock(mapping);
2088 	return err;
2089 }
2090 
fuse_setattr(struct mnt_idmap * idmap,struct dentry * entry,struct iattr * attr)2091 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2092 			struct iattr *attr)
2093 {
2094 	struct inode *inode = d_inode(entry);
2095 	struct fuse_conn *fc = get_fuse_conn(inode);
2096 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2097 	int ret;
2098 
2099 	if (fuse_is_bad(inode))
2100 		return -EIO;
2101 
2102 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2103 		return -EACCES;
2104 
2105 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2106 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2107 				    ATTR_MODE);
2108 
2109 		/*
2110 		 * The only sane way to reliably kill suid/sgid is to do it in
2111 		 * the userspace filesystem
2112 		 *
2113 		 * This should be done on write(), truncate() and chown().
2114 		 */
2115 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2116 			/*
2117 			 * ia_mode calculation may have used stale i_mode.
2118 			 * Refresh and recalculate.
2119 			 */
2120 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2121 			if (ret)
2122 				return ret;
2123 
2124 			attr->ia_mode = inode->i_mode;
2125 			if (inode->i_mode & S_ISUID) {
2126 				attr->ia_valid |= ATTR_MODE;
2127 				attr->ia_mode &= ~S_ISUID;
2128 			}
2129 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2130 				attr->ia_valid |= ATTR_MODE;
2131 				attr->ia_mode &= ~S_ISGID;
2132 			}
2133 		}
2134 	}
2135 	if (!attr->ia_valid)
2136 		return 0;
2137 
2138 	ret = fuse_do_setattr(idmap, entry, attr, file);
2139 	if (!ret) {
2140 		/*
2141 		 * If filesystem supports acls it may have updated acl xattrs in
2142 		 * the filesystem, so forget cached acls for the inode.
2143 		 */
2144 		if (fc->posix_acl)
2145 			forget_all_cached_acls(inode);
2146 
2147 		/* Directory mode changed, may need to revalidate access */
2148 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2149 			fuse_invalidate_entry_cache(entry);
2150 	}
2151 	return ret;
2152 }
2153 
fuse_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)2154 static int fuse_getattr(struct mnt_idmap *idmap,
2155 			const struct path *path, struct kstat *stat,
2156 			u32 request_mask, unsigned int flags)
2157 {
2158 	struct inode *inode = d_inode(path->dentry);
2159 	struct fuse_conn *fc = get_fuse_conn(inode);
2160 
2161 	if (fuse_is_bad(inode))
2162 		return -EIO;
2163 
2164 	if (!fuse_allow_current_process(fc)) {
2165 		if (!request_mask) {
2166 			/*
2167 			 * If user explicitly requested *nothing* then don't
2168 			 * error out, but return st_dev only.
2169 			 */
2170 			stat->result_mask = 0;
2171 			stat->dev = inode->i_sb->s_dev;
2172 			return 0;
2173 		}
2174 		return -EACCES;
2175 	}
2176 
2177 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2178 }
2179 
2180 static const struct inode_operations fuse_dir_inode_operations = {
2181 	.lookup		= fuse_lookup,
2182 	.mkdir		= fuse_mkdir,
2183 	.symlink	= fuse_symlink,
2184 	.unlink		= fuse_unlink,
2185 	.rmdir		= fuse_rmdir,
2186 	.rename		= fuse_rename2,
2187 	.link		= fuse_link,
2188 	.setattr	= fuse_setattr,
2189 	.create		= fuse_create,
2190 	.atomic_open	= fuse_atomic_open,
2191 	.tmpfile	= fuse_tmpfile,
2192 	.mknod		= fuse_mknod,
2193 	.permission	= fuse_permission,
2194 	.getattr	= fuse_getattr,
2195 	.listxattr	= fuse_listxattr,
2196 	.get_inode_acl	= fuse_get_inode_acl,
2197 	.get_acl	= fuse_get_acl,
2198 	.set_acl	= fuse_set_acl,
2199 	.fileattr_get	= fuse_fileattr_get,
2200 	.fileattr_set	= fuse_fileattr_set,
2201 };
2202 
2203 static const struct file_operations fuse_dir_operations = {
2204 	.llseek		= generic_file_llseek,
2205 	.read		= generic_read_dir,
2206 	.iterate_shared	= fuse_readdir,
2207 	.open		= fuse_dir_open,
2208 	.release	= fuse_dir_release,
2209 	.fsync		= fuse_dir_fsync,
2210 	.unlocked_ioctl	= fuse_dir_ioctl,
2211 	.compat_ioctl	= fuse_dir_compat_ioctl,
2212 };
2213 
2214 static const struct inode_operations fuse_common_inode_operations = {
2215 	.setattr	= fuse_setattr,
2216 	.permission	= fuse_permission,
2217 	.getattr	= fuse_getattr,
2218 	.listxattr	= fuse_listxattr,
2219 	.get_inode_acl	= fuse_get_inode_acl,
2220 	.get_acl	= fuse_get_acl,
2221 	.set_acl	= fuse_set_acl,
2222 	.fileattr_get	= fuse_fileattr_get,
2223 	.fileattr_set	= fuse_fileattr_set,
2224 };
2225 
2226 static const struct inode_operations fuse_symlink_inode_operations = {
2227 	.setattr	= fuse_setattr,
2228 	.get_link	= fuse_get_link,
2229 	.getattr	= fuse_getattr,
2230 	.listxattr	= fuse_listxattr,
2231 };
2232 
fuse_init_common(struct inode * inode)2233 void fuse_init_common(struct inode *inode)
2234 {
2235 	inode->i_op = &fuse_common_inode_operations;
2236 }
2237 
fuse_init_dir(struct inode * inode)2238 void fuse_init_dir(struct inode *inode)
2239 {
2240 	struct fuse_inode *fi = get_fuse_inode(inode);
2241 
2242 	inode->i_op = &fuse_dir_inode_operations;
2243 	inode->i_fop = &fuse_dir_operations;
2244 
2245 	spin_lock_init(&fi->rdc.lock);
2246 	fi->rdc.cached = false;
2247 	fi->rdc.size = 0;
2248 	fi->rdc.pos = 0;
2249 	fi->rdc.version = 0;
2250 }
2251 
fuse_symlink_read_folio(struct file * null,struct folio * folio)2252 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2253 {
2254 	int err = fuse_readlink_page(folio->mapping->host, folio);
2255 
2256 	if (!err)
2257 		folio_mark_uptodate(folio);
2258 
2259 	folio_unlock(folio);
2260 
2261 	return err;
2262 }
2263 
2264 static const struct address_space_operations fuse_symlink_aops = {
2265 	.read_folio	= fuse_symlink_read_folio,
2266 };
2267 
fuse_init_symlink(struct inode * inode)2268 void fuse_init_symlink(struct inode *inode)
2269 {
2270 	inode->i_op = &fuse_symlink_inode_operations;
2271 	inode->i_data.a_ops = &fuse_symlink_aops;
2272 	inode_nohighmem(inode);
2273 }
2274