xref: /linux/fs/fuse/dir.c (revision 829745b75a1af25bfb0c7dc36640548c98c57169)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
fuse_advise_use_readdirplus(struct inode * dir)30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
__fuse_dentry_settime(struct dentry * entry,u64 time)38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
fuse_dentry_time(const struct dentry * entry)43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
__fuse_dentry_settime(struct dentry * dentry,u64 time)54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
fuse_dentry_time(const struct dentry * entry)59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
fuse_dentry_settime(struct dentry * dentry,u64 time)65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
fuse_time_to_jiffies(u64 sec,u32 nsec)95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
fuse_invalidate_attr(struct inode * inode)127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
fuse_dir_changed(struct inode * dir)132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
fuse_invalidate_atime(struct inode * inode)142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
fuse_invalidate_entry_cache(struct dentry * entry)156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
fuse_invalidate_entry(struct dentry * entry)165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg)171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
fuse_dentry_revalidate(struct inode * dir,const struct qstr * name,struct dentry * entry,unsigned int flags)198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_conn *fc;
204 	struct fuse_inode *fi;
205 	int ret;
206 
207 	fc = get_fuse_conn_super(dir->i_sb);
208 	if (entry->d_time < atomic_read(&fc->epoch))
209 		goto invalid;
210 
211 	inode = d_inode_rcu(entry);
212 	if (inode && fuse_is_bad(inode))
213 		goto invalid;
214 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
215 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
216 		struct fuse_entry_out outarg;
217 		FUSE_ARGS(args);
218 		struct fuse_forget_link *forget;
219 		u64 attr_version;
220 
221 		/* For negative dentries, always do a fresh lookup */
222 		if (!inode)
223 			goto invalid;
224 
225 		ret = -ECHILD;
226 		if (flags & LOOKUP_RCU)
227 			goto out;
228 
229 		fm = get_fuse_mount(inode);
230 
231 		forget = fuse_alloc_forget();
232 		ret = -ENOMEM;
233 		if (!forget)
234 			goto out;
235 
236 		attr_version = fuse_get_attr_version(fm->fc);
237 
238 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
239 				 name, &outarg);
240 		ret = fuse_simple_request(fm, &args);
241 		/* Zero nodeid is same as -ENOENT */
242 		if (!ret && !outarg.nodeid)
243 			ret = -ENOENT;
244 		if (!ret) {
245 			fi = get_fuse_inode(inode);
246 			if (outarg.nodeid != get_node_id(inode) ||
247 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
248 				fuse_queue_forget(fm->fc, forget,
249 						  outarg.nodeid, 1);
250 				goto invalid;
251 			}
252 			spin_lock(&fi->lock);
253 			fi->nlookup++;
254 			spin_unlock(&fi->lock);
255 		}
256 		kfree(forget);
257 		if (ret == -ENOMEM || ret == -EINTR)
258 			goto out;
259 		if (ret || fuse_invalid_attr(&outarg.attr) ||
260 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
261 			goto invalid;
262 
263 		forget_all_cached_acls(inode);
264 		fuse_change_attributes(inode, &outarg.attr, NULL,
265 				       ATTR_TIMEOUT(&outarg),
266 				       attr_version);
267 		fuse_change_entry_timeout(entry, &outarg);
268 	} else if (inode) {
269 		fi = get_fuse_inode(inode);
270 		if (flags & LOOKUP_RCU) {
271 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
272 				return -ECHILD;
273 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
274 			fuse_advise_use_readdirplus(dir);
275 		}
276 	}
277 	ret = 1;
278 out:
279 	return ret;
280 
281 invalid:
282 	ret = 0;
283 	goto out;
284 }
285 
286 #if BITS_PER_LONG < 64
fuse_dentry_init(struct dentry * dentry)287 static int fuse_dentry_init(struct dentry *dentry)
288 {
289 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
290 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
291 
292 	return dentry->d_fsdata ? 0 : -ENOMEM;
293 }
fuse_dentry_release(struct dentry * dentry)294 static void fuse_dentry_release(struct dentry *dentry)
295 {
296 	union fuse_dentry *fd = dentry->d_fsdata;
297 
298 	kfree_rcu(fd, rcu);
299 }
300 #endif
301 
fuse_dentry_delete(const struct dentry * dentry)302 static int fuse_dentry_delete(const struct dentry *dentry)
303 {
304 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
305 }
306 
307 /*
308  * Create a fuse_mount object with a new superblock (with path->dentry
309  * as the root), and return that mount so it can be auto-mounted on
310  * @path.
311  */
fuse_dentry_automount(struct path * path)312 static struct vfsmount *fuse_dentry_automount(struct path *path)
313 {
314 	struct fs_context *fsc;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 
318 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
319 	if (IS_ERR(fsc))
320 		return ERR_CAST(fsc);
321 
322 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
323 	fsc->fs_private = mp_fi;
324 
325 	/* Create the submount */
326 	mnt = fc_mount(fsc);
327 	put_fs_context(fsc);
328 	return mnt;
329 }
330 
331 const struct dentry_operations fuse_dentry_operations = {
332 	.d_revalidate	= fuse_dentry_revalidate,
333 	.d_delete	= fuse_dentry_delete,
334 #if BITS_PER_LONG < 64
335 	.d_init		= fuse_dentry_init,
336 	.d_release	= fuse_dentry_release,
337 #endif
338 	.d_automount	= fuse_dentry_automount,
339 };
340 
fuse_valid_type(int m)341 int fuse_valid_type(int m)
342 {
343 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
344 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
345 }
346 
fuse_valid_size(u64 size)347 static bool fuse_valid_size(u64 size)
348 {
349 	return size <= LLONG_MAX;
350 }
351 
fuse_invalid_attr(struct fuse_attr * attr)352 bool fuse_invalid_attr(struct fuse_attr *attr)
353 {
354 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
355 }
356 
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct inode ** inode)357 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
358 		     struct fuse_entry_out *outarg, struct inode **inode)
359 {
360 	struct fuse_mount *fm = get_fuse_mount_super(sb);
361 	FUSE_ARGS(args);
362 	struct fuse_forget_link *forget;
363 	u64 attr_version, evict_ctr;
364 	int err;
365 
366 	*inode = NULL;
367 	err = -ENAMETOOLONG;
368 	if (name->len > fm->fc->name_max)
369 		goto out;
370 
371 
372 	forget = fuse_alloc_forget();
373 	err = -ENOMEM;
374 	if (!forget)
375 		goto out;
376 
377 	attr_version = fuse_get_attr_version(fm->fc);
378 	evict_ctr = fuse_get_evict_ctr(fm->fc);
379 
380 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
381 	err = fuse_simple_request(fm, &args);
382 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
383 	if (err || !outarg->nodeid)
384 		goto out_put_forget;
385 
386 	err = -EIO;
387 	if (fuse_invalid_attr(&outarg->attr))
388 		goto out_put_forget;
389 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
390 		pr_warn_once("root generation should be zero\n");
391 		outarg->generation = 0;
392 	}
393 
394 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
395 			   &outarg->attr, ATTR_TIMEOUT(outarg),
396 			   attr_version, evict_ctr);
397 	err = -ENOMEM;
398 	if (!*inode) {
399 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
400 		goto out;
401 	}
402 	err = 0;
403 
404  out_put_forget:
405 	kfree(forget);
406  out:
407 	return err;
408 }
409 
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)410 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
411 				  unsigned int flags)
412 {
413 	struct fuse_entry_out outarg;
414 	struct fuse_conn *fc;
415 	struct inode *inode;
416 	struct dentry *newent;
417 	int err, epoch;
418 	bool outarg_valid = true;
419 	bool locked;
420 
421 	if (fuse_is_bad(dir))
422 		return ERR_PTR(-EIO);
423 
424 	fc = get_fuse_conn_super(dir->i_sb);
425 	epoch = atomic_read(&fc->epoch);
426 
427 	locked = fuse_lock_inode(dir);
428 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
429 			       &outarg, &inode);
430 	fuse_unlock_inode(dir, locked);
431 	if (err == -ENOENT) {
432 		outarg_valid = false;
433 		err = 0;
434 	}
435 	if (err)
436 		goto out_err;
437 
438 	err = -EIO;
439 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
440 		goto out_iput;
441 
442 	newent = d_splice_alias(inode, entry);
443 	err = PTR_ERR(newent);
444 	if (IS_ERR(newent))
445 		goto out_err;
446 
447 	entry = newent ? newent : entry;
448 	entry->d_time = epoch;
449 	if (outarg_valid)
450 		fuse_change_entry_timeout(entry, &outarg);
451 	else
452 		fuse_invalidate_entry_cache(entry);
453 
454 	if (inode)
455 		fuse_advise_use_readdirplus(dir);
456 	return newent;
457 
458  out_iput:
459 	iput(inode);
460  out_err:
461 	return ERR_PTR(err);
462 }
463 
get_security_context(struct dentry * entry,umode_t mode,struct fuse_in_arg * ext)464 static int get_security_context(struct dentry *entry, umode_t mode,
465 				struct fuse_in_arg *ext)
466 {
467 	struct fuse_secctx *fctx;
468 	struct fuse_secctx_header *header;
469 	struct lsm_context lsmctx = { };
470 	void *ptr;
471 	u32 total_len = sizeof(*header);
472 	int err, nr_ctx = 0;
473 	const char *name = NULL;
474 	size_t namelen;
475 
476 	err = security_dentry_init_security(entry, mode, &entry->d_name,
477 					    &name, &lsmctx);
478 
479 	/* If no LSM is supporting this security hook ignore error */
480 	if (err && err != -EOPNOTSUPP)
481 		goto out_err;
482 
483 	if (lsmctx.len) {
484 		nr_ctx = 1;
485 		namelen = strlen(name) + 1;
486 		err = -EIO;
487 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
488 		    lsmctx.len > S32_MAX))
489 			goto out_err;
490 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
491 					    lsmctx.len);
492 	}
493 
494 	err = -ENOMEM;
495 	header = ptr = kzalloc(total_len, GFP_KERNEL);
496 	if (!ptr)
497 		goto out_err;
498 
499 	header->nr_secctx = nr_ctx;
500 	header->size = total_len;
501 	ptr += sizeof(*header);
502 	if (nr_ctx) {
503 		fctx = ptr;
504 		fctx->size = lsmctx.len;
505 		ptr += sizeof(*fctx);
506 
507 		strcpy(ptr, name);
508 		ptr += namelen;
509 
510 		memcpy(ptr, lsmctx.context, lsmctx.len);
511 	}
512 	ext->size = total_len;
513 	ext->value = header;
514 	err = 0;
515 out_err:
516 	if (nr_ctx)
517 		security_release_secctx(&lsmctx);
518 	return err;
519 }
520 
extend_arg(struct fuse_in_arg * buf,u32 bytes)521 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
522 {
523 	void *p;
524 	u32 newlen = buf->size + bytes;
525 
526 	p = krealloc(buf->value, newlen, GFP_KERNEL);
527 	if (!p) {
528 		kfree(buf->value);
529 		buf->size = 0;
530 		buf->value = NULL;
531 		return NULL;
532 	}
533 
534 	memset(p + buf->size, 0, bytes);
535 	buf->value = p;
536 	buf->size = newlen;
537 
538 	return p + newlen - bytes;
539 }
540 
fuse_ext_size(size_t size)541 static u32 fuse_ext_size(size_t size)
542 {
543 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
544 }
545 
546 /*
547  * This adds just a single supplementary group that matches the parent's group.
548  */
get_create_supp_group(struct mnt_idmap * idmap,struct inode * dir,struct fuse_in_arg * ext)549 static int get_create_supp_group(struct mnt_idmap *idmap,
550 				 struct inode *dir,
551 				 struct fuse_in_arg *ext)
552 {
553 	struct fuse_conn *fc = get_fuse_conn(dir);
554 	struct fuse_ext_header *xh;
555 	struct fuse_supp_groups *sg;
556 	kgid_t kgid = dir->i_gid;
557 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
558 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
559 
560 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
561 
562 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
563 	    !vfsgid_in_group_p(vfsgid))
564 		return 0;
565 
566 	xh = extend_arg(ext, sg_len);
567 	if (!xh)
568 		return -ENOMEM;
569 
570 	xh->size = sg_len;
571 	xh->type = FUSE_EXT_GROUPS;
572 
573 	sg = (struct fuse_supp_groups *) &xh[1];
574 	sg->nr_groups = 1;
575 	sg->groups[0] = parent_gid;
576 
577 	return 0;
578 }
579 
get_create_ext(struct mnt_idmap * idmap,struct fuse_args * args,struct inode * dir,struct dentry * dentry,umode_t mode)580 static int get_create_ext(struct mnt_idmap *idmap,
581 			  struct fuse_args *args,
582 			  struct inode *dir, struct dentry *dentry,
583 			  umode_t mode)
584 {
585 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
586 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
587 	int err = 0;
588 
589 	if (fc->init_security)
590 		err = get_security_context(dentry, mode, &ext);
591 	if (!err && fc->create_supp_group)
592 		err = get_create_supp_group(idmap, dir, &ext);
593 
594 	if (!err && ext.size) {
595 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
596 		args->is_ext = true;
597 		args->ext_idx = args->in_numargs++;
598 		args->in_args[args->ext_idx] = ext;
599 	} else {
600 		kfree(ext.value);
601 	}
602 
603 	return err;
604 }
605 
free_ext_value(struct fuse_args * args)606 static void free_ext_value(struct fuse_args *args)
607 {
608 	if (args->is_ext)
609 		kfree(args->in_args[args->ext_idx].value);
610 }
611 
612 /*
613  * Atomic create+open operation
614  *
615  * If the filesystem doesn't support this, then fall back to separate
616  * 'mknod' + 'open' requests.
617  */
fuse_create_open(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode,u32 opcode)618 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
619 			    struct dentry *entry, struct file *file,
620 			    unsigned int flags, umode_t mode, u32 opcode)
621 {
622 	struct inode *inode;
623 	struct fuse_mount *fm = get_fuse_mount(dir);
624 	FUSE_ARGS(args);
625 	struct fuse_forget_link *forget;
626 	struct fuse_create_in inarg;
627 	struct fuse_open_out *outopenp;
628 	struct fuse_entry_out outentry;
629 	struct fuse_inode *fi;
630 	struct fuse_file *ff;
631 	int epoch, err;
632 	bool trunc = flags & O_TRUNC;
633 
634 	/* Userspace expects S_IFREG in create mode */
635 	BUG_ON((mode & S_IFMT) != S_IFREG);
636 
637 	epoch = atomic_read(&fm->fc->epoch);
638 	forget = fuse_alloc_forget();
639 	err = -ENOMEM;
640 	if (!forget)
641 		goto out_err;
642 
643 	err = -ENOMEM;
644 	ff = fuse_file_alloc(fm, true);
645 	if (!ff)
646 		goto out_put_forget_req;
647 
648 	if (!fm->fc->dont_mask)
649 		mode &= ~current_umask();
650 
651 	flags &= ~O_NOCTTY;
652 	memset(&inarg, 0, sizeof(inarg));
653 	memset(&outentry, 0, sizeof(outentry));
654 	inarg.flags = flags;
655 	inarg.mode = mode;
656 	inarg.umask = current_umask();
657 
658 	if (fm->fc->handle_killpriv_v2 && trunc &&
659 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
660 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
661 	}
662 
663 	args.opcode = opcode;
664 	args.nodeid = get_node_id(dir);
665 	args.in_numargs = 2;
666 	args.in_args[0].size = sizeof(inarg);
667 	args.in_args[0].value = &inarg;
668 	args.in_args[1].size = entry->d_name.len + 1;
669 	args.in_args[1].value = entry->d_name.name;
670 	args.out_numargs = 2;
671 	args.out_args[0].size = sizeof(outentry);
672 	args.out_args[0].value = &outentry;
673 	/* Store outarg for fuse_finish_open() */
674 	outopenp = &ff->args->open_outarg;
675 	args.out_args[1].size = sizeof(*outopenp);
676 	args.out_args[1].value = outopenp;
677 
678 	err = get_create_ext(idmap, &args, dir, entry, mode);
679 	if (err)
680 		goto out_free_ff;
681 
682 	err = fuse_simple_idmap_request(idmap, fm, &args);
683 	free_ext_value(&args);
684 	if (err)
685 		goto out_free_ff;
686 
687 	err = -EIO;
688 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
689 	    fuse_invalid_attr(&outentry.attr))
690 		goto out_free_ff;
691 
692 	ff->fh = outopenp->fh;
693 	ff->nodeid = outentry.nodeid;
694 	ff->open_flags = outopenp->open_flags;
695 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
696 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
697 	if (!inode) {
698 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
699 		fuse_sync_release(NULL, ff, flags);
700 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
701 		err = -ENOMEM;
702 		goto out_err;
703 	}
704 	kfree(forget);
705 	d_instantiate(entry, inode);
706 	entry->d_time = epoch;
707 	fuse_change_entry_timeout(entry, &outentry);
708 	fuse_dir_changed(dir);
709 	err = generic_file_open(inode, file);
710 	if (!err) {
711 		file->private_data = ff;
712 		err = finish_open(file, entry, fuse_finish_open);
713 	}
714 	if (err) {
715 		fi = get_fuse_inode(inode);
716 		fuse_sync_release(fi, ff, flags);
717 	} else {
718 		if (fm->fc->atomic_o_trunc && trunc)
719 			truncate_pagecache(inode, 0);
720 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
721 			invalidate_inode_pages2(inode->i_mapping);
722 	}
723 	return err;
724 
725 out_free_ff:
726 	fuse_file_free(ff);
727 out_put_forget_req:
728 	kfree(forget);
729 out_err:
730 	return err;
731 }
732 
733 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
734 		      umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)735 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
736 			    struct file *file, unsigned flags,
737 			    umode_t mode)
738 {
739 	int err;
740 	struct mnt_idmap *idmap = file_mnt_idmap(file);
741 	struct fuse_conn *fc = get_fuse_conn(dir);
742 
743 	if (fuse_is_bad(dir))
744 		return -EIO;
745 
746 	if (d_in_lookup(entry)) {
747 		struct dentry *res = fuse_lookup(dir, entry, 0);
748 		if (res || d_really_is_positive(entry))
749 			return finish_no_open(file, res);
750 	}
751 
752 	if (!(flags & O_CREAT))
753 		return finish_no_open(file, NULL);
754 
755 	/* Only creates */
756 	file->f_mode |= FMODE_CREATED;
757 
758 	if (fc->no_create)
759 		goto mknod;
760 
761 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
762 	if (err == -ENOSYS) {
763 		fc->no_create = 1;
764 		goto mknod;
765 	} else if (err == -EEXIST)
766 		fuse_invalidate_entry(entry);
767 	return err;
768 
769 mknod:
770 	err = fuse_mknod(idmap, dir, entry, mode, 0);
771 	if (err)
772 		return err;
773 	return finish_no_open(file, NULL);
774 }
775 
776 /*
777  * Code shared between mknod, mkdir, symlink and link
778  */
create_new_entry(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)779 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
780 				       struct fuse_args *args, struct inode *dir,
781 				       struct dentry *entry, umode_t mode)
782 {
783 	struct fuse_entry_out outarg;
784 	struct inode *inode;
785 	struct dentry *d;
786 	struct fuse_forget_link *forget;
787 	int epoch, err;
788 
789 	if (fuse_is_bad(dir))
790 		return ERR_PTR(-EIO);
791 
792 	epoch = atomic_read(&fm->fc->epoch);
793 
794 	forget = fuse_alloc_forget();
795 	if (!forget)
796 		return ERR_PTR(-ENOMEM);
797 
798 	memset(&outarg, 0, sizeof(outarg));
799 	args->nodeid = get_node_id(dir);
800 	args->out_numargs = 1;
801 	args->out_args[0].size = sizeof(outarg);
802 	args->out_args[0].value = &outarg;
803 
804 	if (args->opcode != FUSE_LINK) {
805 		err = get_create_ext(idmap, args, dir, entry, mode);
806 		if (err)
807 			goto out_put_forget_req;
808 	}
809 
810 	err = fuse_simple_idmap_request(idmap, fm, args);
811 	free_ext_value(args);
812 	if (err)
813 		goto out_put_forget_req;
814 
815 	err = -EIO;
816 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
817 		goto out_put_forget_req;
818 
819 	if ((outarg.attr.mode ^ mode) & S_IFMT)
820 		goto out_put_forget_req;
821 
822 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
823 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
824 	if (!inode) {
825 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
826 		return ERR_PTR(-ENOMEM);
827 	}
828 	kfree(forget);
829 
830 	d_drop(entry);
831 	d = d_splice_alias(inode, entry);
832 	if (IS_ERR(d))
833 		return d;
834 
835 	if (d) {
836 		d->d_time = epoch;
837 		fuse_change_entry_timeout(d, &outarg);
838 	} else {
839 		entry->d_time = epoch;
840 		fuse_change_entry_timeout(entry, &outarg);
841 	}
842 	fuse_dir_changed(dir);
843 	return d;
844 
845  out_put_forget_req:
846 	if (err == -EEXIST)
847 		fuse_invalidate_entry(entry);
848 	kfree(forget);
849 	return ERR_PTR(err);
850 }
851 
create_new_nondir(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)852 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
853 			     struct fuse_args *args, struct inode *dir,
854 			     struct dentry *entry, umode_t mode)
855 {
856 	/*
857 	 * Note that when creating anything other than a directory we
858 	 * can be sure create_new_entry() will NOT return an alternate
859 	 * dentry as d_splice_alias() only returns an alternate dentry
860 	 * for directories.  So we don't need to check for that case
861 	 * when passing back the result.
862 	 */
863 	WARN_ON_ONCE(S_ISDIR(mode));
864 
865 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
866 }
867 
fuse_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)868 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
869 		      struct dentry *entry, umode_t mode, dev_t rdev)
870 {
871 	struct fuse_mknod_in inarg;
872 	struct fuse_mount *fm = get_fuse_mount(dir);
873 	FUSE_ARGS(args);
874 
875 	if (!fm->fc->dont_mask)
876 		mode &= ~current_umask();
877 
878 	memset(&inarg, 0, sizeof(inarg));
879 	inarg.mode = mode;
880 	inarg.rdev = new_encode_dev(rdev);
881 	inarg.umask = current_umask();
882 	args.opcode = FUSE_MKNOD;
883 	args.in_numargs = 2;
884 	args.in_args[0].size = sizeof(inarg);
885 	args.in_args[0].value = &inarg;
886 	args.in_args[1].size = entry->d_name.len + 1;
887 	args.in_args[1].value = entry->d_name.name;
888 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
889 }
890 
fuse_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)891 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
892 		       struct dentry *entry, umode_t mode, bool excl)
893 {
894 	return fuse_mknod(idmap, dir, entry, mode, 0);
895 }
896 
fuse_tmpfile(struct mnt_idmap * idmap,struct inode * dir,struct file * file,umode_t mode)897 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
898 			struct file *file, umode_t mode)
899 {
900 	struct fuse_conn *fc = get_fuse_conn(dir);
901 	int err;
902 
903 	if (fc->no_tmpfile)
904 		return -EOPNOTSUPP;
905 
906 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
907 			       file->f_flags, mode, FUSE_TMPFILE);
908 	if (err == -ENOSYS) {
909 		fc->no_tmpfile = 1;
910 		err = -EOPNOTSUPP;
911 	}
912 	return err;
913 }
914 
fuse_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode)915 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
916 				 struct dentry *entry, umode_t mode)
917 {
918 	struct fuse_mkdir_in inarg;
919 	struct fuse_mount *fm = get_fuse_mount(dir);
920 	FUSE_ARGS(args);
921 
922 	if (!fm->fc->dont_mask)
923 		mode &= ~current_umask();
924 
925 	memset(&inarg, 0, sizeof(inarg));
926 	inarg.mode = mode;
927 	inarg.umask = current_umask();
928 	args.opcode = FUSE_MKDIR;
929 	args.in_numargs = 2;
930 	args.in_args[0].size = sizeof(inarg);
931 	args.in_args[0].value = &inarg;
932 	args.in_args[1].size = entry->d_name.len + 1;
933 	args.in_args[1].value = entry->d_name.name;
934 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
935 }
936 
fuse_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,const char * link)937 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
938 			struct dentry *entry, const char *link)
939 {
940 	struct fuse_mount *fm = get_fuse_mount(dir);
941 	unsigned len = strlen(link) + 1;
942 	FUSE_ARGS(args);
943 
944 	args.opcode = FUSE_SYMLINK;
945 	args.in_numargs = 3;
946 	fuse_set_zero_arg0(&args);
947 	args.in_args[1].size = entry->d_name.len + 1;
948 	args.in_args[1].value = entry->d_name.name;
949 	args.in_args[2].size = len;
950 	args.in_args[2].value = link;
951 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
952 }
953 
fuse_flush_time_update(struct inode * inode)954 void fuse_flush_time_update(struct inode *inode)
955 {
956 	int err = sync_inode_metadata(inode, 1);
957 
958 	mapping_set_error(inode->i_mapping, err);
959 }
960 
fuse_update_ctime_in_cache(struct inode * inode)961 static void fuse_update_ctime_in_cache(struct inode *inode)
962 {
963 	if (!IS_NOCMTIME(inode)) {
964 		inode_set_ctime_current(inode);
965 		mark_inode_dirty_sync(inode);
966 		fuse_flush_time_update(inode);
967 	}
968 }
969 
fuse_update_ctime(struct inode * inode)970 void fuse_update_ctime(struct inode *inode)
971 {
972 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
973 	fuse_update_ctime_in_cache(inode);
974 }
975 
fuse_entry_unlinked(struct dentry * entry)976 static void fuse_entry_unlinked(struct dentry *entry)
977 {
978 	struct inode *inode = d_inode(entry);
979 	struct fuse_conn *fc = get_fuse_conn(inode);
980 	struct fuse_inode *fi = get_fuse_inode(inode);
981 
982 	spin_lock(&fi->lock);
983 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
984 	/*
985 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
986 	 * happen if userspace filesystem is careless.  It would be
987 	 * difficult to enforce correct nlink usage so just ignore this
988 	 * condition here
989 	 */
990 	if (S_ISDIR(inode->i_mode))
991 		clear_nlink(inode);
992 	else if (inode->i_nlink > 0)
993 		drop_nlink(inode);
994 	spin_unlock(&fi->lock);
995 	fuse_invalidate_entry_cache(entry);
996 	fuse_update_ctime(inode);
997 }
998 
fuse_unlink(struct inode * dir,struct dentry * entry)999 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1000 {
1001 	int err;
1002 	struct fuse_mount *fm = get_fuse_mount(dir);
1003 	FUSE_ARGS(args);
1004 
1005 	if (fuse_is_bad(dir))
1006 		return -EIO;
1007 
1008 	args.opcode = FUSE_UNLINK;
1009 	args.nodeid = get_node_id(dir);
1010 	args.in_numargs = 2;
1011 	fuse_set_zero_arg0(&args);
1012 	args.in_args[1].size = entry->d_name.len + 1;
1013 	args.in_args[1].value = entry->d_name.name;
1014 	err = fuse_simple_request(fm, &args);
1015 	if (!err) {
1016 		fuse_dir_changed(dir);
1017 		fuse_entry_unlinked(entry);
1018 	} else if (err == -EINTR || err == -ENOENT)
1019 		fuse_invalidate_entry(entry);
1020 	return err;
1021 }
1022 
fuse_rmdir(struct inode * dir,struct dentry * entry)1023 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1024 {
1025 	int err;
1026 	struct fuse_mount *fm = get_fuse_mount(dir);
1027 	FUSE_ARGS(args);
1028 
1029 	if (fuse_is_bad(dir))
1030 		return -EIO;
1031 
1032 	args.opcode = FUSE_RMDIR;
1033 	args.nodeid = get_node_id(dir);
1034 	args.in_numargs = 2;
1035 	fuse_set_zero_arg0(&args);
1036 	args.in_args[1].size = entry->d_name.len + 1;
1037 	args.in_args[1].value = entry->d_name.name;
1038 	err = fuse_simple_request(fm, &args);
1039 	if (!err) {
1040 		fuse_dir_changed(dir);
1041 		fuse_entry_unlinked(entry);
1042 	} else if (err == -EINTR || err == -ENOENT)
1043 		fuse_invalidate_entry(entry);
1044 	return err;
1045 }
1046 
fuse_rename_common(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)1047 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1048 			      struct inode *newdir, struct dentry *newent,
1049 			      unsigned int flags, int opcode, size_t argsize)
1050 {
1051 	int err;
1052 	struct fuse_rename2_in inarg;
1053 	struct fuse_mount *fm = get_fuse_mount(olddir);
1054 	FUSE_ARGS(args);
1055 
1056 	memset(&inarg, 0, argsize);
1057 	inarg.newdir = get_node_id(newdir);
1058 	inarg.flags = flags;
1059 	args.opcode = opcode;
1060 	args.nodeid = get_node_id(olddir);
1061 	args.in_numargs = 3;
1062 	args.in_args[0].size = argsize;
1063 	args.in_args[0].value = &inarg;
1064 	args.in_args[1].size = oldent->d_name.len + 1;
1065 	args.in_args[1].value = oldent->d_name.name;
1066 	args.in_args[2].size = newent->d_name.len + 1;
1067 	args.in_args[2].value = newent->d_name.name;
1068 	err = fuse_simple_idmap_request(idmap, fm, &args);
1069 	if (!err) {
1070 		/* ctime changes */
1071 		fuse_update_ctime(d_inode(oldent));
1072 
1073 		if (flags & RENAME_EXCHANGE)
1074 			fuse_update_ctime(d_inode(newent));
1075 
1076 		fuse_dir_changed(olddir);
1077 		if (olddir != newdir)
1078 			fuse_dir_changed(newdir);
1079 
1080 		/* newent will end up negative */
1081 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1082 			fuse_entry_unlinked(newent);
1083 	} else if (err == -EINTR || err == -ENOENT) {
1084 		/* If request was interrupted, DEITY only knows if the
1085 		   rename actually took place.  If the invalidation
1086 		   fails (e.g. some process has CWD under the renamed
1087 		   directory), then there can be inconsistency between
1088 		   the dcache and the real filesystem.  Tough luck. */
1089 		fuse_invalidate_entry(oldent);
1090 		if (d_really_is_positive(newent))
1091 			fuse_invalidate_entry(newent);
1092 	}
1093 
1094 	return err;
1095 }
1096 
fuse_rename2(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)1097 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1098 			struct dentry *oldent, struct inode *newdir,
1099 			struct dentry *newent, unsigned int flags)
1100 {
1101 	struct fuse_conn *fc = get_fuse_conn(olddir);
1102 	int err;
1103 
1104 	if (fuse_is_bad(olddir))
1105 		return -EIO;
1106 
1107 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1108 		return -EINVAL;
1109 
1110 	if (flags) {
1111 		if (fc->no_rename2 || fc->minor < 23)
1112 			return -EINVAL;
1113 
1114 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1115 					 olddir, oldent, newdir, newent, flags,
1116 					 FUSE_RENAME2,
1117 					 sizeof(struct fuse_rename2_in));
1118 		if (err == -ENOSYS) {
1119 			fc->no_rename2 = 1;
1120 			err = -EINVAL;
1121 		}
1122 	} else {
1123 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1124 					 FUSE_RENAME,
1125 					 sizeof(struct fuse_rename_in));
1126 	}
1127 
1128 	return err;
1129 }
1130 
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1131 static int fuse_link(struct dentry *entry, struct inode *newdir,
1132 		     struct dentry *newent)
1133 {
1134 	int err;
1135 	struct fuse_link_in inarg;
1136 	struct inode *inode = d_inode(entry);
1137 	struct fuse_mount *fm = get_fuse_mount(inode);
1138 	FUSE_ARGS(args);
1139 
1140 	if (fm->fc->no_link)
1141 		goto out;
1142 
1143 	memset(&inarg, 0, sizeof(inarg));
1144 	inarg.oldnodeid = get_node_id(inode);
1145 	args.opcode = FUSE_LINK;
1146 	args.in_numargs = 2;
1147 	args.in_args[0].size = sizeof(inarg);
1148 	args.in_args[0].value = &inarg;
1149 	args.in_args[1].size = newent->d_name.len + 1;
1150 	args.in_args[1].value = newent->d_name.name;
1151 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1152 	if (!err)
1153 		fuse_update_ctime_in_cache(inode);
1154 	else if (err == -EINTR)
1155 		fuse_invalidate_attr(inode);
1156 
1157 	if (err == -ENOSYS)
1158 		fm->fc->no_link = 1;
1159 out:
1160 	if (fm->fc->no_link)
1161 		return -EPERM;
1162 
1163 	return err;
1164 }
1165 
fuse_fillattr(struct mnt_idmap * idmap,struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1166 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1167 			  struct fuse_attr *attr, struct kstat *stat)
1168 {
1169 	unsigned int blkbits;
1170 	struct fuse_conn *fc = get_fuse_conn(inode);
1171 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1172 				      make_kuid(fc->user_ns, attr->uid));
1173 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1174 				      make_kgid(fc->user_ns, attr->gid));
1175 
1176 	stat->dev = inode->i_sb->s_dev;
1177 	stat->ino = attr->ino;
1178 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1179 	stat->nlink = attr->nlink;
1180 	stat->uid = vfsuid_into_kuid(vfsuid);
1181 	stat->gid = vfsgid_into_kgid(vfsgid);
1182 	stat->rdev = inode->i_rdev;
1183 	stat->atime.tv_sec = attr->atime;
1184 	stat->atime.tv_nsec = attr->atimensec;
1185 	stat->mtime.tv_sec = attr->mtime;
1186 	stat->mtime.tv_nsec = attr->mtimensec;
1187 	stat->ctime.tv_sec = attr->ctime;
1188 	stat->ctime.tv_nsec = attr->ctimensec;
1189 	stat->size = attr->size;
1190 	stat->blocks = attr->blocks;
1191 
1192 	if (attr->blksize != 0)
1193 		blkbits = ilog2(attr->blksize);
1194 	else
1195 		blkbits = fc->blkbits;
1196 
1197 	stat->blksize = 1 << blkbits;
1198 }
1199 
fuse_statx_to_attr(struct fuse_statx * sx,struct fuse_attr * attr)1200 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1201 {
1202 	memset(attr, 0, sizeof(*attr));
1203 	attr->ino = sx->ino;
1204 	attr->size = sx->size;
1205 	attr->blocks = sx->blocks;
1206 	attr->atime = sx->atime.tv_sec;
1207 	attr->mtime = sx->mtime.tv_sec;
1208 	attr->ctime = sx->ctime.tv_sec;
1209 	attr->atimensec = sx->atime.tv_nsec;
1210 	attr->mtimensec = sx->mtime.tv_nsec;
1211 	attr->ctimensec = sx->ctime.tv_nsec;
1212 	attr->mode = sx->mode;
1213 	attr->nlink = sx->nlink;
1214 	attr->uid = sx->uid;
1215 	attr->gid = sx->gid;
1216 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1217 	attr->blksize = sx->blksize;
1218 }
1219 
fuse_do_statx(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat)1220 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1221 			 struct file *file, struct kstat *stat)
1222 {
1223 	int err;
1224 	struct fuse_attr attr;
1225 	struct fuse_statx *sx;
1226 	struct fuse_statx_in inarg;
1227 	struct fuse_statx_out outarg;
1228 	struct fuse_mount *fm = get_fuse_mount(inode);
1229 	u64 attr_version = fuse_get_attr_version(fm->fc);
1230 	FUSE_ARGS(args);
1231 
1232 	memset(&inarg, 0, sizeof(inarg));
1233 	memset(&outarg, 0, sizeof(outarg));
1234 	/* Directories have separate file-handle space */
1235 	if (file && S_ISREG(inode->i_mode)) {
1236 		struct fuse_file *ff = file->private_data;
1237 
1238 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1239 		inarg.fh = ff->fh;
1240 	}
1241 	/* For now leave sync hints as the default, request all stats. */
1242 	inarg.sx_flags = 0;
1243 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1244 	args.opcode = FUSE_STATX;
1245 	args.nodeid = get_node_id(inode);
1246 	args.in_numargs = 1;
1247 	args.in_args[0].size = sizeof(inarg);
1248 	args.in_args[0].value = &inarg;
1249 	args.out_numargs = 1;
1250 	args.out_args[0].size = sizeof(outarg);
1251 	args.out_args[0].value = &outarg;
1252 	err = fuse_simple_request(fm, &args);
1253 	if (err)
1254 		return err;
1255 
1256 	sx = &outarg.stat;
1257 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1258 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1259 					 inode_wrong_type(inode, sx->mode)))) {
1260 		fuse_make_bad(inode);
1261 		return -EIO;
1262 	}
1263 
1264 	fuse_statx_to_attr(&outarg.stat, &attr);
1265 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1266 		fuse_change_attributes(inode, &attr, &outarg.stat,
1267 				       ATTR_TIMEOUT(&outarg), attr_version);
1268 	}
1269 
1270 	if (stat) {
1271 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1272 		stat->btime.tv_sec = sx->btime.tv_sec;
1273 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1274 		fuse_fillattr(idmap, inode, &attr, stat);
1275 		stat->result_mask |= STATX_TYPE;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
fuse_do_getattr(struct mnt_idmap * idmap,struct inode * inode,struct kstat * stat,struct file * file)1281 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1282 			   struct kstat *stat, struct file *file)
1283 {
1284 	int err;
1285 	struct fuse_getattr_in inarg;
1286 	struct fuse_attr_out outarg;
1287 	struct fuse_mount *fm = get_fuse_mount(inode);
1288 	FUSE_ARGS(args);
1289 	u64 attr_version;
1290 
1291 	attr_version = fuse_get_attr_version(fm->fc);
1292 
1293 	memset(&inarg, 0, sizeof(inarg));
1294 	memset(&outarg, 0, sizeof(outarg));
1295 	/* Directories have separate file-handle space */
1296 	if (file && S_ISREG(inode->i_mode)) {
1297 		struct fuse_file *ff = file->private_data;
1298 
1299 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1300 		inarg.fh = ff->fh;
1301 	}
1302 	args.opcode = FUSE_GETATTR;
1303 	args.nodeid = get_node_id(inode);
1304 	args.in_numargs = 1;
1305 	args.in_args[0].size = sizeof(inarg);
1306 	args.in_args[0].value = &inarg;
1307 	args.out_numargs = 1;
1308 	args.out_args[0].size = sizeof(outarg);
1309 	args.out_args[0].value = &outarg;
1310 	err = fuse_simple_request(fm, &args);
1311 	if (!err) {
1312 		if (fuse_invalid_attr(&outarg.attr) ||
1313 		    inode_wrong_type(inode, outarg.attr.mode)) {
1314 			fuse_make_bad(inode);
1315 			err = -EIO;
1316 		} else {
1317 			fuse_change_attributes(inode, &outarg.attr, NULL,
1318 					       ATTR_TIMEOUT(&outarg),
1319 					       attr_version);
1320 			if (stat)
1321 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1322 		}
1323 	}
1324 	return err;
1325 }
1326 
fuse_update_get_attr(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat,u32 request_mask,unsigned int flags)1327 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1328 				struct file *file, struct kstat *stat,
1329 				u32 request_mask, unsigned int flags)
1330 {
1331 	struct fuse_inode *fi = get_fuse_inode(inode);
1332 	struct fuse_conn *fc = get_fuse_conn(inode);
1333 	int err = 0;
1334 	bool sync;
1335 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1336 	u32 cache_mask = fuse_get_cache_mask(inode);
1337 
1338 
1339 	/* FUSE only supports basic stats and possibly btime */
1340 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1341 retry:
1342 	if (fc->no_statx)
1343 		request_mask &= STATX_BASIC_STATS;
1344 
1345 	if (!request_mask)
1346 		sync = false;
1347 	else if (flags & AT_STATX_FORCE_SYNC)
1348 		sync = true;
1349 	else if (flags & AT_STATX_DONT_SYNC)
1350 		sync = false;
1351 	else if (request_mask & inval_mask & ~cache_mask)
1352 		sync = true;
1353 	else
1354 		sync = time_before64(fi->i_time, get_jiffies_64());
1355 
1356 	if (sync) {
1357 		forget_all_cached_acls(inode);
1358 		/* Try statx if BTIME is requested */
1359 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1360 			err = fuse_do_statx(idmap, inode, file, stat);
1361 			if (err == -ENOSYS) {
1362 				fc->no_statx = 1;
1363 				err = 0;
1364 				goto retry;
1365 			}
1366 		} else {
1367 			err = fuse_do_getattr(idmap, inode, stat, file);
1368 		}
1369 	} else if (stat) {
1370 		generic_fillattr(idmap, request_mask, inode, stat);
1371 		stat->mode = fi->orig_i_mode;
1372 		stat->ino = fi->orig_ino;
1373 		stat->blksize = 1 << fi->cached_i_blkbits;
1374 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1375 			stat->btime = fi->i_btime;
1376 			stat->result_mask |= STATX_BTIME;
1377 		}
1378 	}
1379 
1380 	return err;
1381 }
1382 
fuse_update_attributes(struct inode * inode,struct file * file,u32 mask)1383 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1384 {
1385 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1386 }
1387 
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name,u32 flags)1388 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1389 			     u64 child_nodeid, struct qstr *name, u32 flags)
1390 {
1391 	int err = -ENOTDIR;
1392 	struct inode *parent;
1393 	struct dentry *dir;
1394 	struct dentry *entry;
1395 
1396 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1397 	if (!parent)
1398 		return -ENOENT;
1399 
1400 	inode_lock_nested(parent, I_MUTEX_PARENT);
1401 	if (!S_ISDIR(parent->i_mode))
1402 		goto unlock;
1403 
1404 	err = -ENOENT;
1405 	dir = d_find_alias(parent);
1406 	if (!dir)
1407 		goto unlock;
1408 
1409 	name->hash = full_name_hash(dir, name->name, name->len);
1410 	entry = d_lookup(dir, name);
1411 	dput(dir);
1412 	if (!entry)
1413 		goto unlock;
1414 
1415 	fuse_dir_changed(parent);
1416 	if (!(flags & FUSE_EXPIRE_ONLY))
1417 		d_invalidate(entry);
1418 	fuse_invalidate_entry_cache(entry);
1419 
1420 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1421 		inode_lock(d_inode(entry));
1422 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1423 			err = -ENOENT;
1424 			goto badentry;
1425 		}
1426 		if (d_mountpoint(entry)) {
1427 			err = -EBUSY;
1428 			goto badentry;
1429 		}
1430 		if (d_is_dir(entry)) {
1431 			shrink_dcache_parent(entry);
1432 			if (!simple_empty(entry)) {
1433 				err = -ENOTEMPTY;
1434 				goto badentry;
1435 			}
1436 			d_inode(entry)->i_flags |= S_DEAD;
1437 		}
1438 		dont_mount(entry);
1439 		clear_nlink(d_inode(entry));
1440 		err = 0;
1441  badentry:
1442 		inode_unlock(d_inode(entry));
1443 		if (!err)
1444 			d_delete(entry);
1445 	} else {
1446 		err = 0;
1447 	}
1448 	dput(entry);
1449 
1450  unlock:
1451 	inode_unlock(parent);
1452 	iput(parent);
1453 	return err;
1454 }
1455 
fuse_permissible_uidgid(struct fuse_conn * fc)1456 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1457 {
1458 	const struct cred *cred = current_cred();
1459 
1460 	return (uid_eq(cred->euid, fc->user_id) &&
1461 		uid_eq(cred->suid, fc->user_id) &&
1462 		uid_eq(cred->uid,  fc->user_id) &&
1463 		gid_eq(cred->egid, fc->group_id) &&
1464 		gid_eq(cred->sgid, fc->group_id) &&
1465 		gid_eq(cred->gid,  fc->group_id));
1466 }
1467 
1468 /*
1469  * Calling into a user-controlled filesystem gives the filesystem
1470  * daemon ptrace-like capabilities over the current process.  This
1471  * means, that the filesystem daemon is able to record the exact
1472  * filesystem operations performed, and can also control the behavior
1473  * of the requester process in otherwise impossible ways.  For example
1474  * it can delay the operation for arbitrary length of time allowing
1475  * DoS against the requester.
1476  *
1477  * For this reason only those processes can call into the filesystem,
1478  * for which the owner of the mount has ptrace privilege.  This
1479  * excludes processes started by other users, suid or sgid processes.
1480  */
fuse_allow_current_process(struct fuse_conn * fc)1481 bool fuse_allow_current_process(struct fuse_conn *fc)
1482 {
1483 	bool allow;
1484 
1485 	if (fc->allow_other)
1486 		allow = current_in_userns(fc->user_ns);
1487 	else
1488 		allow = fuse_permissible_uidgid(fc);
1489 
1490 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1491 		allow = true;
1492 
1493 	return allow;
1494 }
1495 
fuse_access(struct inode * inode,int mask)1496 static int fuse_access(struct inode *inode, int mask)
1497 {
1498 	struct fuse_mount *fm = get_fuse_mount(inode);
1499 	FUSE_ARGS(args);
1500 	struct fuse_access_in inarg;
1501 	int err;
1502 
1503 	BUG_ON(mask & MAY_NOT_BLOCK);
1504 
1505 	/*
1506 	 * We should not send FUSE_ACCESS to the userspace
1507 	 * when idmapped mounts are enabled as for this case
1508 	 * we have fc->default_permissions = 1 and access
1509 	 * permission checks are done on the kernel side.
1510 	 */
1511 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1512 
1513 	if (fm->fc->no_access)
1514 		return 0;
1515 
1516 	memset(&inarg, 0, sizeof(inarg));
1517 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1518 	args.opcode = FUSE_ACCESS;
1519 	args.nodeid = get_node_id(inode);
1520 	args.in_numargs = 1;
1521 	args.in_args[0].size = sizeof(inarg);
1522 	args.in_args[0].value = &inarg;
1523 	err = fuse_simple_request(fm, &args);
1524 	if (err == -ENOSYS) {
1525 		fm->fc->no_access = 1;
1526 		err = 0;
1527 	}
1528 	return err;
1529 }
1530 
fuse_perm_getattr(struct inode * inode,int mask)1531 static int fuse_perm_getattr(struct inode *inode, int mask)
1532 {
1533 	if (mask & MAY_NOT_BLOCK)
1534 		return -ECHILD;
1535 
1536 	forget_all_cached_acls(inode);
1537 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1538 }
1539 
1540 /*
1541  * Check permission.  The two basic access models of FUSE are:
1542  *
1543  * 1) Local access checking ('default_permissions' mount option) based
1544  * on file mode.  This is the plain old disk filesystem permission
1545  * model.
1546  *
1547  * 2) "Remote" access checking, where server is responsible for
1548  * checking permission in each inode operation.  An exception to this
1549  * is if ->permission() was invoked from sys_access() in which case an
1550  * access request is sent.  Execute permission is still checked
1551  * locally based on file mode.
1552  */
fuse_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)1553 static int fuse_permission(struct mnt_idmap *idmap,
1554 			   struct inode *inode, int mask)
1555 {
1556 	struct fuse_conn *fc = get_fuse_conn(inode);
1557 	bool refreshed = false;
1558 	int err = 0;
1559 
1560 	if (fuse_is_bad(inode))
1561 		return -EIO;
1562 
1563 	if (!fuse_allow_current_process(fc))
1564 		return -EACCES;
1565 
1566 	/*
1567 	 * If attributes are needed, refresh them before proceeding
1568 	 */
1569 	if (fc->default_permissions ||
1570 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1571 		struct fuse_inode *fi = get_fuse_inode(inode);
1572 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1573 
1574 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1575 		    time_before64(fi->i_time, get_jiffies_64())) {
1576 			refreshed = true;
1577 
1578 			err = fuse_perm_getattr(inode, mask);
1579 			if (err)
1580 				return err;
1581 		}
1582 	}
1583 
1584 	if (fc->default_permissions) {
1585 		err = generic_permission(idmap, inode, mask);
1586 
1587 		/* If permission is denied, try to refresh file
1588 		   attributes.  This is also needed, because the root
1589 		   node will at first have no permissions */
1590 		if (err == -EACCES && !refreshed) {
1591 			err = fuse_perm_getattr(inode, mask);
1592 			if (!err)
1593 				err = generic_permission(idmap,
1594 							 inode, mask);
1595 		}
1596 
1597 		/* Note: the opposite of the above test does not
1598 		   exist.  So if permissions are revoked this won't be
1599 		   noticed immediately, only after the attribute
1600 		   timeout has expired */
1601 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1602 		err = fuse_access(inode, mask);
1603 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1604 		if (!(inode->i_mode & S_IXUGO)) {
1605 			if (refreshed)
1606 				return -EACCES;
1607 
1608 			err = fuse_perm_getattr(inode, mask);
1609 			if (!err && !(inode->i_mode & S_IXUGO))
1610 				return -EACCES;
1611 		}
1612 	}
1613 	return err;
1614 }
1615 
fuse_readlink_folio(struct inode * inode,struct folio * folio)1616 static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
1617 {
1618 	struct fuse_mount *fm = get_fuse_mount(inode);
1619 	struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
1620 	struct fuse_args_pages ap = {
1621 		.num_folios = 1,
1622 		.folios = &folio,
1623 		.descs = &desc,
1624 	};
1625 	char *link;
1626 	ssize_t res;
1627 
1628 	ap.args.opcode = FUSE_READLINK;
1629 	ap.args.nodeid = get_node_id(inode);
1630 	ap.args.out_pages = true;
1631 	ap.args.out_argvar = true;
1632 	ap.args.page_zeroing = true;
1633 	ap.args.out_numargs = 1;
1634 	ap.args.out_args[0].size = desc.length;
1635 	res = fuse_simple_request(fm, &ap.args);
1636 
1637 	fuse_invalidate_atime(inode);
1638 
1639 	if (res < 0)
1640 		return res;
1641 
1642 	if (WARN_ON(res >= PAGE_SIZE))
1643 		return -EIO;
1644 
1645 	link = folio_address(folio);
1646 	link[res] = '\0';
1647 
1648 	return 0;
1649 }
1650 
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1651 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1652 				 struct delayed_call *callback)
1653 {
1654 	struct fuse_conn *fc = get_fuse_conn(inode);
1655 	struct folio *folio;
1656 	int err;
1657 
1658 	err = -EIO;
1659 	if (fuse_is_bad(inode))
1660 		goto out_err;
1661 
1662 	if (fc->cache_symlinks)
1663 		return page_get_link_raw(dentry, inode, callback);
1664 
1665 	err = -ECHILD;
1666 	if (!dentry)
1667 		goto out_err;
1668 
1669 	folio = folio_alloc(GFP_KERNEL, 0);
1670 	err = -ENOMEM;
1671 	if (!folio)
1672 		goto out_err;
1673 
1674 	err = fuse_readlink_folio(inode, folio);
1675 	if (err) {
1676 		folio_put(folio);
1677 		goto out_err;
1678 	}
1679 
1680 	set_delayed_call(callback, page_put_link, folio);
1681 
1682 	return folio_address(folio);
1683 
1684 out_err:
1685 	return ERR_PTR(err);
1686 }
1687 
fuse_dir_open(struct inode * inode,struct file * file)1688 static int fuse_dir_open(struct inode *inode, struct file *file)
1689 {
1690 	struct fuse_mount *fm = get_fuse_mount(inode);
1691 	int err;
1692 
1693 	if (fuse_is_bad(inode))
1694 		return -EIO;
1695 
1696 	err = generic_file_open(inode, file);
1697 	if (err)
1698 		return err;
1699 
1700 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1701 	if (!err) {
1702 		struct fuse_file *ff = file->private_data;
1703 
1704 		/*
1705 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1706 		 * directories for backward compatibility, though it's unlikely
1707 		 * to be useful.
1708 		 */
1709 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1710 			nonseekable_open(inode, file);
1711 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1712 			invalidate_inode_pages2(inode->i_mapping);
1713 	}
1714 
1715 	return err;
1716 }
1717 
fuse_dir_release(struct inode * inode,struct file * file)1718 static int fuse_dir_release(struct inode *inode, struct file *file)
1719 {
1720 	fuse_release_common(file, true);
1721 
1722 	return 0;
1723 }
1724 
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1725 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1726 			  int datasync)
1727 {
1728 	struct inode *inode = file->f_mapping->host;
1729 	struct fuse_conn *fc = get_fuse_conn(inode);
1730 	int err;
1731 
1732 	if (fuse_is_bad(inode))
1733 		return -EIO;
1734 
1735 	if (fc->no_fsyncdir)
1736 		return 0;
1737 
1738 	inode_lock(inode);
1739 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1740 	if (err == -ENOSYS) {
1741 		fc->no_fsyncdir = 1;
1742 		err = 0;
1743 	}
1744 	inode_unlock(inode);
1745 
1746 	return err;
1747 }
1748 
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1749 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1750 			    unsigned long arg)
1751 {
1752 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1753 
1754 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1755 	if (fc->minor < 18)
1756 		return -ENOTTY;
1757 
1758 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1759 }
1760 
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1761 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1762 				   unsigned long arg)
1763 {
1764 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1765 
1766 	if (fc->minor < 18)
1767 		return -ENOTTY;
1768 
1769 	return fuse_ioctl_common(file, cmd, arg,
1770 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1771 }
1772 
update_mtime(unsigned ivalid,bool trust_local_mtime)1773 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1774 {
1775 	/* Always update if mtime is explicitly set  */
1776 	if (ivalid & ATTR_MTIME_SET)
1777 		return true;
1778 
1779 	/* Or if kernel i_mtime is the official one */
1780 	if (trust_local_mtime)
1781 		return true;
1782 
1783 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1784 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1785 		return false;
1786 
1787 	/* In all other cases update */
1788 	return true;
1789 }
1790 
iattr_to_fattr(struct mnt_idmap * idmap,struct fuse_conn * fc,struct iattr * iattr,struct fuse_setattr_in * arg,bool trust_local_cmtime)1791 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1792 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1793 			   bool trust_local_cmtime)
1794 {
1795 	unsigned ivalid = iattr->ia_valid;
1796 
1797 	if (ivalid & ATTR_MODE)
1798 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1799 
1800 	if (ivalid & ATTR_UID) {
1801 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1802 
1803 		arg->valid |= FATTR_UID;
1804 		arg->uid = from_kuid(fc->user_ns, fsuid);
1805 	}
1806 
1807 	if (ivalid & ATTR_GID) {
1808 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1809 
1810 		arg->valid |= FATTR_GID;
1811 		arg->gid = from_kgid(fc->user_ns, fsgid);
1812 	}
1813 
1814 	if (ivalid & ATTR_SIZE)
1815 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1816 	if (ivalid & ATTR_ATIME) {
1817 		arg->valid |= FATTR_ATIME;
1818 		arg->atime = iattr->ia_atime.tv_sec;
1819 		arg->atimensec = iattr->ia_atime.tv_nsec;
1820 		if (!(ivalid & ATTR_ATIME_SET))
1821 			arg->valid |= FATTR_ATIME_NOW;
1822 	}
1823 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1824 		arg->valid |= FATTR_MTIME;
1825 		arg->mtime = iattr->ia_mtime.tv_sec;
1826 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1827 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1828 			arg->valid |= FATTR_MTIME_NOW;
1829 	}
1830 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1831 		arg->valid |= FATTR_CTIME;
1832 		arg->ctime = iattr->ia_ctime.tv_sec;
1833 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1834 	}
1835 }
1836 
1837 /*
1838  * Prevent concurrent writepages on inode
1839  *
1840  * This is done by adding a negative bias to the inode write counter
1841  * and waiting for all pending writes to finish.
1842  */
fuse_set_nowrite(struct inode * inode)1843 void fuse_set_nowrite(struct inode *inode)
1844 {
1845 	struct fuse_inode *fi = get_fuse_inode(inode);
1846 
1847 	BUG_ON(!inode_is_locked(inode));
1848 
1849 	spin_lock(&fi->lock);
1850 	BUG_ON(fi->writectr < 0);
1851 	fi->writectr += FUSE_NOWRITE;
1852 	spin_unlock(&fi->lock);
1853 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1854 }
1855 
1856 /*
1857  * Allow writepages on inode
1858  *
1859  * Remove the bias from the writecounter and send any queued
1860  * writepages.
1861  */
__fuse_release_nowrite(struct inode * inode)1862 static void __fuse_release_nowrite(struct inode *inode)
1863 {
1864 	struct fuse_inode *fi = get_fuse_inode(inode);
1865 
1866 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1867 	fi->writectr = 0;
1868 	fuse_flush_writepages(inode);
1869 }
1870 
fuse_release_nowrite(struct inode * inode)1871 void fuse_release_nowrite(struct inode *inode)
1872 {
1873 	struct fuse_inode *fi = get_fuse_inode(inode);
1874 
1875 	spin_lock(&fi->lock);
1876 	__fuse_release_nowrite(inode);
1877 	spin_unlock(&fi->lock);
1878 }
1879 
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)1880 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1881 			      struct inode *inode,
1882 			      struct fuse_setattr_in *inarg_p,
1883 			      struct fuse_attr_out *outarg_p)
1884 {
1885 	args->opcode = FUSE_SETATTR;
1886 	args->nodeid = get_node_id(inode);
1887 	args->in_numargs = 1;
1888 	args->in_args[0].size = sizeof(*inarg_p);
1889 	args->in_args[0].value = inarg_p;
1890 	args->out_numargs = 1;
1891 	args->out_args[0].size = sizeof(*outarg_p);
1892 	args->out_args[0].value = outarg_p;
1893 }
1894 
1895 /*
1896  * Flush inode->i_mtime to the server
1897  */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)1898 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1899 {
1900 	struct fuse_mount *fm = get_fuse_mount(inode);
1901 	FUSE_ARGS(args);
1902 	struct fuse_setattr_in inarg;
1903 	struct fuse_attr_out outarg;
1904 
1905 	memset(&inarg, 0, sizeof(inarg));
1906 	memset(&outarg, 0, sizeof(outarg));
1907 
1908 	inarg.valid = FATTR_MTIME;
1909 	inarg.mtime = inode_get_mtime_sec(inode);
1910 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1911 	if (fm->fc->minor >= 23) {
1912 		inarg.valid |= FATTR_CTIME;
1913 		inarg.ctime = inode_get_ctime_sec(inode);
1914 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1915 	}
1916 	if (ff) {
1917 		inarg.valid |= FATTR_FH;
1918 		inarg.fh = ff->fh;
1919 	}
1920 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1921 
1922 	return fuse_simple_request(fm, &args);
1923 }
1924 
1925 /*
1926  * Set attributes, and at the same time refresh them.
1927  *
1928  * Truncation is slightly complicated, because the 'truncate' request
1929  * may fail, in which case we don't want to touch the mapping.
1930  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1931  * and the actual truncation by hand.
1932  */
fuse_do_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr,struct file * file)1933 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1934 		    struct iattr *attr, struct file *file)
1935 {
1936 	struct inode *inode = d_inode(dentry);
1937 	struct fuse_mount *fm = get_fuse_mount(inode);
1938 	struct fuse_conn *fc = fm->fc;
1939 	struct fuse_inode *fi = get_fuse_inode(inode);
1940 	struct address_space *mapping = inode->i_mapping;
1941 	FUSE_ARGS(args);
1942 	struct fuse_setattr_in inarg;
1943 	struct fuse_attr_out outarg;
1944 	bool is_truncate = false;
1945 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1946 	loff_t oldsize;
1947 	int err;
1948 	bool trust_local_cmtime = is_wb;
1949 	bool fault_blocked = false;
1950 	u64 attr_version;
1951 
1952 	if (!fc->default_permissions)
1953 		attr->ia_valid |= ATTR_FORCE;
1954 
1955 	err = setattr_prepare(idmap, dentry, attr);
1956 	if (err)
1957 		return err;
1958 
1959 	if (attr->ia_valid & ATTR_SIZE) {
1960 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1961 			return -EIO;
1962 		is_truncate = true;
1963 	}
1964 
1965 	if (FUSE_IS_DAX(inode) && is_truncate) {
1966 		filemap_invalidate_lock(mapping);
1967 		fault_blocked = true;
1968 		err = fuse_dax_break_layouts(inode, 0, -1);
1969 		if (err) {
1970 			filemap_invalidate_unlock(mapping);
1971 			return err;
1972 		}
1973 	}
1974 
1975 	if (attr->ia_valid & ATTR_OPEN) {
1976 		/* This is coming from open(..., ... | O_TRUNC); */
1977 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1978 		WARN_ON(attr->ia_size != 0);
1979 		if (fc->atomic_o_trunc) {
1980 			/*
1981 			 * No need to send request to userspace, since actual
1982 			 * truncation has already been done by OPEN.  But still
1983 			 * need to truncate page cache.
1984 			 */
1985 			i_size_write(inode, 0);
1986 			truncate_pagecache(inode, 0);
1987 			goto out;
1988 		}
1989 		file = NULL;
1990 	}
1991 
1992 	/* Flush dirty data/metadata before non-truncate SETATTR */
1993 	if (is_wb &&
1994 	    attr->ia_valid &
1995 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1996 			 ATTR_TIMES_SET)) {
1997 		err = write_inode_now(inode, true);
1998 		if (err)
1999 			return err;
2000 
2001 		fuse_set_nowrite(inode);
2002 		fuse_release_nowrite(inode);
2003 	}
2004 
2005 	if (is_truncate) {
2006 		fuse_set_nowrite(inode);
2007 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2008 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2009 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2010 	}
2011 
2012 	memset(&inarg, 0, sizeof(inarg));
2013 	memset(&outarg, 0, sizeof(outarg));
2014 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2015 	if (file) {
2016 		struct fuse_file *ff = file->private_data;
2017 		inarg.valid |= FATTR_FH;
2018 		inarg.fh = ff->fh;
2019 	}
2020 
2021 	/* Kill suid/sgid for non-directory chown unconditionally */
2022 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2023 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2024 		inarg.valid |= FATTR_KILL_SUIDGID;
2025 
2026 	if (attr->ia_valid & ATTR_SIZE) {
2027 		/* For mandatory locking in truncate */
2028 		inarg.valid |= FATTR_LOCKOWNER;
2029 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2030 
2031 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2032 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2033 			inarg.valid |= FATTR_KILL_SUIDGID;
2034 	}
2035 
2036 	attr_version = fuse_get_attr_version(fm->fc);
2037 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2038 	err = fuse_simple_request(fm, &args);
2039 	if (err) {
2040 		if (err == -EINTR)
2041 			fuse_invalidate_attr(inode);
2042 		goto error;
2043 	}
2044 
2045 	if (fuse_invalid_attr(&outarg.attr) ||
2046 	    inode_wrong_type(inode, outarg.attr.mode)) {
2047 		fuse_make_bad(inode);
2048 		err = -EIO;
2049 		goto error;
2050 	}
2051 
2052 	spin_lock(&fi->lock);
2053 	/* the kernel maintains i_mtime locally */
2054 	if (trust_local_cmtime) {
2055 		if (attr->ia_valid & ATTR_MTIME)
2056 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2057 		if (attr->ia_valid & ATTR_CTIME)
2058 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2059 		/* FIXME: clear I_DIRTY_SYNC? */
2060 	}
2061 
2062 	if (fi->attr_version > attr_version) {
2063 		/*
2064 		 * Apply attributes, for example for fsnotify_change(), but set
2065 		 * attribute timeout to zero.
2066 		 */
2067 		outarg.attr_valid = outarg.attr_valid_nsec = 0;
2068 	}
2069 
2070 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2071 				      ATTR_TIMEOUT(&outarg),
2072 				      fuse_get_cache_mask(inode), 0);
2073 	oldsize = inode->i_size;
2074 	/* see the comment in fuse_change_attributes() */
2075 	if (!is_wb || is_truncate)
2076 		i_size_write(inode, outarg.attr.size);
2077 
2078 	if (is_truncate) {
2079 		/* NOTE: this may release/reacquire fi->lock */
2080 		__fuse_release_nowrite(inode);
2081 	}
2082 	spin_unlock(&fi->lock);
2083 
2084 	/*
2085 	 * Only call invalidate_inode_pages2() after removing
2086 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2087 	 */
2088 	if ((is_truncate || !is_wb) &&
2089 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2090 		truncate_pagecache(inode, outarg.attr.size);
2091 		invalidate_inode_pages2(mapping);
2092 	}
2093 
2094 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2095 out:
2096 	if (fault_blocked)
2097 		filemap_invalidate_unlock(mapping);
2098 
2099 	return 0;
2100 
2101 error:
2102 	if (is_truncate)
2103 		fuse_release_nowrite(inode);
2104 
2105 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2106 
2107 	if (fault_blocked)
2108 		filemap_invalidate_unlock(mapping);
2109 	return err;
2110 }
2111 
fuse_setattr(struct mnt_idmap * idmap,struct dentry * entry,struct iattr * attr)2112 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2113 			struct iattr *attr)
2114 {
2115 	struct inode *inode = d_inode(entry);
2116 	struct fuse_conn *fc = get_fuse_conn(inode);
2117 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2118 	int ret;
2119 
2120 	if (fuse_is_bad(inode))
2121 		return -EIO;
2122 
2123 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2124 		return -EACCES;
2125 
2126 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2127 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2128 				    ATTR_MODE);
2129 
2130 		/*
2131 		 * The only sane way to reliably kill suid/sgid is to do it in
2132 		 * the userspace filesystem
2133 		 *
2134 		 * This should be done on write(), truncate() and chown().
2135 		 */
2136 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2137 			/*
2138 			 * ia_mode calculation may have used stale i_mode.
2139 			 * Refresh and recalculate.
2140 			 */
2141 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2142 			if (ret)
2143 				return ret;
2144 
2145 			attr->ia_mode = inode->i_mode;
2146 			if (inode->i_mode & S_ISUID) {
2147 				attr->ia_valid |= ATTR_MODE;
2148 				attr->ia_mode &= ~S_ISUID;
2149 			}
2150 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2151 				attr->ia_valid |= ATTR_MODE;
2152 				attr->ia_mode &= ~S_ISGID;
2153 			}
2154 		}
2155 	}
2156 	if (!attr->ia_valid)
2157 		return 0;
2158 
2159 	ret = fuse_do_setattr(idmap, entry, attr, file);
2160 	if (!ret) {
2161 		/*
2162 		 * If filesystem supports acls it may have updated acl xattrs in
2163 		 * the filesystem, so forget cached acls for the inode.
2164 		 */
2165 		if (fc->posix_acl)
2166 			forget_all_cached_acls(inode);
2167 
2168 		/* Directory mode changed, may need to revalidate access */
2169 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2170 			fuse_invalidate_entry_cache(entry);
2171 	}
2172 	return ret;
2173 }
2174 
fuse_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)2175 static int fuse_getattr(struct mnt_idmap *idmap,
2176 			const struct path *path, struct kstat *stat,
2177 			u32 request_mask, unsigned int flags)
2178 {
2179 	struct inode *inode = d_inode(path->dentry);
2180 	struct fuse_conn *fc = get_fuse_conn(inode);
2181 
2182 	if (fuse_is_bad(inode))
2183 		return -EIO;
2184 
2185 	if (!fuse_allow_current_process(fc)) {
2186 		if (!request_mask) {
2187 			/*
2188 			 * If user explicitly requested *nothing* then don't
2189 			 * error out, but return st_dev only.
2190 			 */
2191 			stat->result_mask = 0;
2192 			stat->dev = inode->i_sb->s_dev;
2193 			return 0;
2194 		}
2195 		return -EACCES;
2196 	}
2197 
2198 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2199 }
2200 
2201 static const struct inode_operations fuse_dir_inode_operations = {
2202 	.lookup		= fuse_lookup,
2203 	.mkdir		= fuse_mkdir,
2204 	.symlink	= fuse_symlink,
2205 	.unlink		= fuse_unlink,
2206 	.rmdir		= fuse_rmdir,
2207 	.rename		= fuse_rename2,
2208 	.link		= fuse_link,
2209 	.setattr	= fuse_setattr,
2210 	.create		= fuse_create,
2211 	.atomic_open	= fuse_atomic_open,
2212 	.tmpfile	= fuse_tmpfile,
2213 	.mknod		= fuse_mknod,
2214 	.permission	= fuse_permission,
2215 	.getattr	= fuse_getattr,
2216 	.listxattr	= fuse_listxattr,
2217 	.get_inode_acl	= fuse_get_inode_acl,
2218 	.get_acl	= fuse_get_acl,
2219 	.set_acl	= fuse_set_acl,
2220 	.fileattr_get	= fuse_fileattr_get,
2221 	.fileattr_set	= fuse_fileattr_set,
2222 };
2223 
2224 static const struct file_operations fuse_dir_operations = {
2225 	.llseek		= generic_file_llseek,
2226 	.read		= generic_read_dir,
2227 	.iterate_shared	= fuse_readdir,
2228 	.open		= fuse_dir_open,
2229 	.release	= fuse_dir_release,
2230 	.fsync		= fuse_dir_fsync,
2231 	.unlocked_ioctl	= fuse_dir_ioctl,
2232 	.compat_ioctl	= fuse_dir_compat_ioctl,
2233 };
2234 
2235 static const struct inode_operations fuse_common_inode_operations = {
2236 	.setattr	= fuse_setattr,
2237 	.permission	= fuse_permission,
2238 	.getattr	= fuse_getattr,
2239 	.listxattr	= fuse_listxattr,
2240 	.get_inode_acl	= fuse_get_inode_acl,
2241 	.get_acl	= fuse_get_acl,
2242 	.set_acl	= fuse_set_acl,
2243 	.fileattr_get	= fuse_fileattr_get,
2244 	.fileattr_set	= fuse_fileattr_set,
2245 };
2246 
2247 static const struct inode_operations fuse_symlink_inode_operations = {
2248 	.setattr	= fuse_setattr,
2249 	.get_link	= fuse_get_link,
2250 	.getattr	= fuse_getattr,
2251 	.listxattr	= fuse_listxattr,
2252 };
2253 
fuse_init_common(struct inode * inode)2254 void fuse_init_common(struct inode *inode)
2255 {
2256 	inode->i_op = &fuse_common_inode_operations;
2257 }
2258 
fuse_init_dir(struct inode * inode)2259 void fuse_init_dir(struct inode *inode)
2260 {
2261 	struct fuse_inode *fi = get_fuse_inode(inode);
2262 
2263 	inode->i_op = &fuse_dir_inode_operations;
2264 	inode->i_fop = &fuse_dir_operations;
2265 
2266 	spin_lock_init(&fi->rdc.lock);
2267 	fi->rdc.cached = false;
2268 	fi->rdc.size = 0;
2269 	fi->rdc.pos = 0;
2270 	fi->rdc.version = 0;
2271 }
2272 
fuse_symlink_read_folio(struct file * null,struct folio * folio)2273 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2274 {
2275 	int err = fuse_readlink_folio(folio->mapping->host, folio);
2276 
2277 	if (!err)
2278 		folio_mark_uptodate(folio);
2279 
2280 	folio_unlock(folio);
2281 
2282 	return err;
2283 }
2284 
2285 static const struct address_space_operations fuse_symlink_aops = {
2286 	.read_folio	= fuse_symlink_read_folio,
2287 };
2288 
fuse_init_symlink(struct inode * inode)2289 void fuse_init_symlink(struct inode *inode)
2290 {
2291 	inode->i_op = &fuse_symlink_inode_operations;
2292 	inode->i_data.a_ops = &fuse_symlink_aops;
2293 	inode_nohighmem(inode);
2294 }
2295