xref: /linux/fs/fuse/inode.c (revision 5cfe477f6a3f9a4d9b2906d442964f2115b0403f)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/seq_file.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/statfs.h>
21 #include <linux/random.h>
22 #include <linux/sched.h>
23 #include <linux/exportfs.h>
24 #include <linux/posix_acl.h>
25 #include <linux/pid_namespace.h>
26 #include <uapi/linux/magic.h>
27 
28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
29 MODULE_DESCRIPTION("Filesystem in Userspace");
30 MODULE_LICENSE("GPL");
31 
32 static struct kmem_cache *fuse_inode_cachep;
33 struct list_head fuse_conn_list;
34 DEFINE_MUTEX(fuse_mutex);
35 
36 static int set_global_limit(const char *val, const struct kernel_param *kp);
37 
38 unsigned max_user_bgreq;
39 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
40 		  &max_user_bgreq, 0644);
41 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
42 MODULE_PARM_DESC(max_user_bgreq,
43  "Global limit for the maximum number of backgrounded requests an "
44  "unprivileged user can set");
45 
46 unsigned max_user_congthresh;
47 module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
48 		  &max_user_congthresh, 0644);
49 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
50 MODULE_PARM_DESC(max_user_congthresh,
51  "Global limit for the maximum congestion threshold an "
52  "unprivileged user can set");
53 
54 #define FUSE_DEFAULT_BLKSIZE 512
55 
56 /** Maximum number of outstanding background requests */
57 #define FUSE_DEFAULT_MAX_BACKGROUND 12
58 
59 /** Congestion starts at 75% of maximum */
60 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
61 
62 #ifdef CONFIG_BLOCK
63 static struct file_system_type fuseblk_fs_type;
64 #endif
65 
66 struct fuse_forget_link *fuse_alloc_forget(void)
67 {
68 	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
69 }
70 
71 static struct inode *fuse_alloc_inode(struct super_block *sb)
72 {
73 	struct fuse_inode *fi;
74 
75 	fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
76 	if (!fi)
77 		return NULL;
78 
79 	fi->i_time = 0;
80 	fi->inval_mask = 0;
81 	fi->nodeid = 0;
82 	fi->nlookup = 0;
83 	fi->attr_version = 0;
84 	fi->orig_ino = 0;
85 	fi->state = 0;
86 	mutex_init(&fi->mutex);
87 	spin_lock_init(&fi->lock);
88 	fi->forget = fuse_alloc_forget();
89 	if (!fi->forget)
90 		goto out_free;
91 
92 	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
93 		goto out_free_forget;
94 
95 	return &fi->inode;
96 
97 out_free_forget:
98 	kfree(fi->forget);
99 out_free:
100 	kmem_cache_free(fuse_inode_cachep, fi);
101 	return NULL;
102 }
103 
104 static void fuse_free_inode(struct inode *inode)
105 {
106 	struct fuse_inode *fi = get_fuse_inode(inode);
107 
108 	mutex_destroy(&fi->mutex);
109 	kfree(fi->forget);
110 #ifdef CONFIG_FUSE_DAX
111 	kfree(fi->dax);
112 #endif
113 	kmem_cache_free(fuse_inode_cachep, fi);
114 }
115 
116 static void fuse_evict_inode(struct inode *inode)
117 {
118 	struct fuse_inode *fi = get_fuse_inode(inode);
119 
120 	/* Will write inode on close/munmap and in all other dirtiers */
121 	WARN_ON(inode->i_state & I_DIRTY_INODE);
122 
123 	truncate_inode_pages_final(&inode->i_data);
124 	clear_inode(inode);
125 	if (inode->i_sb->s_flags & SB_ACTIVE) {
126 		struct fuse_conn *fc = get_fuse_conn(inode);
127 
128 		if (FUSE_IS_DAX(inode))
129 			fuse_dax_inode_cleanup(inode);
130 		if (fi->nlookup) {
131 			fuse_queue_forget(fc, fi->forget, fi->nodeid,
132 					  fi->nlookup);
133 			fi->forget = NULL;
134 		}
135 	}
136 	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
137 		WARN_ON(!list_empty(&fi->write_files));
138 		WARN_ON(!list_empty(&fi->queued_writes));
139 	}
140 }
141 
142 static int fuse_reconfigure(struct fs_context *fsc)
143 {
144 	struct super_block *sb = fsc->root->d_sb;
145 
146 	sync_filesystem(sb);
147 	if (fsc->sb_flags & SB_MANDLOCK)
148 		return -EINVAL;
149 
150 	return 0;
151 }
152 
153 /*
154  * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
155  * so that it will fit.
156  */
157 static ino_t fuse_squash_ino(u64 ino64)
158 {
159 	ino_t ino = (ino_t) ino64;
160 	if (sizeof(ino_t) < sizeof(u64))
161 		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
162 	return ino;
163 }
164 
165 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
166 				   u64 attr_valid, u32 cache_mask)
167 {
168 	struct fuse_conn *fc = get_fuse_conn(inode);
169 	struct fuse_inode *fi = get_fuse_inode(inode);
170 
171 	lockdep_assert_held(&fi->lock);
172 
173 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
174 	fi->i_time = attr_valid;
175 	WRITE_ONCE(fi->inval_mask, 0);
176 
177 	inode->i_ino     = fuse_squash_ino(attr->ino);
178 	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
179 	set_nlink(inode, attr->nlink);
180 	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
181 	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
182 	inode->i_blocks  = attr->blocks;
183 	inode->i_atime.tv_sec   = attr->atime;
184 	inode->i_atime.tv_nsec  = attr->atimensec;
185 	/* mtime from server may be stale due to local buffered write */
186 	if (!(cache_mask & STATX_MTIME)) {
187 		inode->i_mtime.tv_sec   = attr->mtime;
188 		inode->i_mtime.tv_nsec  = attr->mtimensec;
189 	}
190 	if (!(cache_mask & STATX_CTIME)) {
191 		inode->i_ctime.tv_sec   = attr->ctime;
192 		inode->i_ctime.tv_nsec  = attr->ctimensec;
193 	}
194 
195 	if (attr->blksize != 0)
196 		inode->i_blkbits = ilog2(attr->blksize);
197 	else
198 		inode->i_blkbits = inode->i_sb->s_blocksize_bits;
199 
200 	/*
201 	 * Don't set the sticky bit in i_mode, unless we want the VFS
202 	 * to check permissions.  This prevents failures due to the
203 	 * check in may_delete().
204 	 */
205 	fi->orig_i_mode = inode->i_mode;
206 	if (!fc->default_permissions)
207 		inode->i_mode &= ~S_ISVTX;
208 
209 	fi->orig_ino = attr->ino;
210 
211 	/*
212 	 * We are refreshing inode data and it is possible that another
213 	 * client set suid/sgid or security.capability xattr. So clear
214 	 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
215 	 * was set or if security.capability xattr was set. But we don't
216 	 * know if security.capability has been set or not. So clear it
217 	 * anyway. Its less efficient but should be safe.
218 	 */
219 	inode->i_flags &= ~S_NOSEC;
220 }
221 
222 u32 fuse_get_cache_mask(struct inode *inode)
223 {
224 	struct fuse_conn *fc = get_fuse_conn(inode);
225 
226 	if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
227 		return 0;
228 
229 	return STATX_MTIME | STATX_CTIME | STATX_SIZE;
230 }
231 
232 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
233 			    u64 attr_valid, u64 attr_version)
234 {
235 	struct fuse_conn *fc = get_fuse_conn(inode);
236 	struct fuse_inode *fi = get_fuse_inode(inode);
237 	u32 cache_mask;
238 	loff_t oldsize;
239 	struct timespec64 old_mtime;
240 
241 	spin_lock(&fi->lock);
242 	/*
243 	 * In case of writeback_cache enabled, writes update mtime, ctime and
244 	 * may update i_size.  In these cases trust the cached value in the
245 	 * inode.
246 	 */
247 	cache_mask = fuse_get_cache_mask(inode);
248 	if (cache_mask & STATX_SIZE)
249 		attr->size = i_size_read(inode);
250 
251 	if (cache_mask & STATX_MTIME) {
252 		attr->mtime = inode->i_mtime.tv_sec;
253 		attr->mtimensec = inode->i_mtime.tv_nsec;
254 	}
255 	if (cache_mask & STATX_CTIME) {
256 		attr->ctime = inode->i_ctime.tv_sec;
257 		attr->ctimensec = inode->i_ctime.tv_nsec;
258 	}
259 
260 	if ((attr_version != 0 && fi->attr_version > attr_version) ||
261 	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
262 		spin_unlock(&fi->lock);
263 		return;
264 	}
265 
266 	old_mtime = inode->i_mtime;
267 	fuse_change_attributes_common(inode, attr, attr_valid, cache_mask);
268 
269 	oldsize = inode->i_size;
270 	/*
271 	 * In case of writeback_cache enabled, the cached writes beyond EOF
272 	 * extend local i_size without keeping userspace server in sync. So,
273 	 * attr->size coming from server can be stale. We cannot trust it.
274 	 */
275 	if (!(cache_mask & STATX_SIZE))
276 		i_size_write(inode, attr->size);
277 	spin_unlock(&fi->lock);
278 
279 	if (!cache_mask && S_ISREG(inode->i_mode)) {
280 		bool inval = false;
281 
282 		if (oldsize != attr->size) {
283 			truncate_pagecache(inode, attr->size);
284 			if (!fc->explicit_inval_data)
285 				inval = true;
286 		} else if (fc->auto_inval_data) {
287 			struct timespec64 new_mtime = {
288 				.tv_sec = attr->mtime,
289 				.tv_nsec = attr->mtimensec,
290 			};
291 
292 			/*
293 			 * Auto inval mode also checks and invalidates if mtime
294 			 * has changed.
295 			 */
296 			if (!timespec64_equal(&old_mtime, &new_mtime))
297 				inval = true;
298 		}
299 
300 		if (inval)
301 			invalidate_inode_pages2(inode->i_mapping);
302 	}
303 
304 	if (IS_ENABLED(CONFIG_FUSE_DAX))
305 		fuse_dax_dontcache(inode, attr->flags);
306 }
307 
308 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
309 {
310 	inode->i_mode = attr->mode & S_IFMT;
311 	inode->i_size = attr->size;
312 	inode->i_mtime.tv_sec  = attr->mtime;
313 	inode->i_mtime.tv_nsec = attr->mtimensec;
314 	inode->i_ctime.tv_sec  = attr->ctime;
315 	inode->i_ctime.tv_nsec = attr->ctimensec;
316 	if (S_ISREG(inode->i_mode)) {
317 		fuse_init_common(inode);
318 		fuse_init_file_inode(inode, attr->flags);
319 	} else if (S_ISDIR(inode->i_mode))
320 		fuse_init_dir(inode);
321 	else if (S_ISLNK(inode->i_mode))
322 		fuse_init_symlink(inode);
323 	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
324 		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
325 		fuse_init_common(inode);
326 		init_special_inode(inode, inode->i_mode,
327 				   new_decode_dev(attr->rdev));
328 	} else
329 		BUG();
330 }
331 
332 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
333 {
334 	u64 nodeid = *(u64 *) _nodeidp;
335 	if (get_node_id(inode) == nodeid)
336 		return 1;
337 	else
338 		return 0;
339 }
340 
341 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
342 {
343 	u64 nodeid = *(u64 *) _nodeidp;
344 	get_fuse_inode(inode)->nodeid = nodeid;
345 	return 0;
346 }
347 
348 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
349 			int generation, struct fuse_attr *attr,
350 			u64 attr_valid, u64 attr_version)
351 {
352 	struct inode *inode;
353 	struct fuse_inode *fi;
354 	struct fuse_conn *fc = get_fuse_conn_super(sb);
355 
356 	/*
357 	 * Auto mount points get their node id from the submount root, which is
358 	 * not a unique identifier within this filesystem.
359 	 *
360 	 * To avoid conflicts, do not place submount points into the inode hash
361 	 * table.
362 	 */
363 	if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
364 	    S_ISDIR(attr->mode)) {
365 		inode = new_inode(sb);
366 		if (!inode)
367 			return NULL;
368 
369 		fuse_init_inode(inode, attr);
370 		get_fuse_inode(inode)->nodeid = nodeid;
371 		inode->i_flags |= S_AUTOMOUNT;
372 		goto done;
373 	}
374 
375 retry:
376 	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
377 	if (!inode)
378 		return NULL;
379 
380 	if ((inode->i_state & I_NEW)) {
381 		inode->i_flags |= S_NOATIME;
382 		if (!fc->writeback_cache || !S_ISREG(attr->mode))
383 			inode->i_flags |= S_NOCMTIME;
384 		inode->i_generation = generation;
385 		fuse_init_inode(inode, attr);
386 		unlock_new_inode(inode);
387 	} else if (fuse_stale_inode(inode, generation, attr)) {
388 		/* nodeid was reused, any I/O on the old inode should fail */
389 		fuse_make_bad(inode);
390 		iput(inode);
391 		goto retry;
392 	}
393 done:
394 	fi = get_fuse_inode(inode);
395 	spin_lock(&fi->lock);
396 	fi->nlookup++;
397 	spin_unlock(&fi->lock);
398 	fuse_change_attributes(inode, attr, attr_valid, attr_version);
399 
400 	return inode;
401 }
402 
403 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
404 			   struct fuse_mount **fm)
405 {
406 	struct fuse_mount *fm_iter;
407 	struct inode *inode;
408 
409 	WARN_ON(!rwsem_is_locked(&fc->killsb));
410 	list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
411 		if (!fm_iter->sb)
412 			continue;
413 
414 		inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
415 		if (inode) {
416 			if (fm)
417 				*fm = fm_iter;
418 			return inode;
419 		}
420 	}
421 
422 	return NULL;
423 }
424 
425 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
426 			     loff_t offset, loff_t len)
427 {
428 	struct fuse_inode *fi;
429 	struct inode *inode;
430 	pgoff_t pg_start;
431 	pgoff_t pg_end;
432 
433 	inode = fuse_ilookup(fc, nodeid, NULL);
434 	if (!inode)
435 		return -ENOENT;
436 
437 	fi = get_fuse_inode(inode);
438 	spin_lock(&fi->lock);
439 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
440 	spin_unlock(&fi->lock);
441 
442 	fuse_invalidate_attr(inode);
443 	forget_all_cached_acls(inode);
444 	if (offset >= 0) {
445 		pg_start = offset >> PAGE_SHIFT;
446 		if (len <= 0)
447 			pg_end = -1;
448 		else
449 			pg_end = (offset + len - 1) >> PAGE_SHIFT;
450 		invalidate_inode_pages2_range(inode->i_mapping,
451 					      pg_start, pg_end);
452 	}
453 	iput(inode);
454 	return 0;
455 }
456 
457 bool fuse_lock_inode(struct inode *inode)
458 {
459 	bool locked = false;
460 
461 	if (!get_fuse_conn(inode)->parallel_dirops) {
462 		mutex_lock(&get_fuse_inode(inode)->mutex);
463 		locked = true;
464 	}
465 
466 	return locked;
467 }
468 
469 void fuse_unlock_inode(struct inode *inode, bool locked)
470 {
471 	if (locked)
472 		mutex_unlock(&get_fuse_inode(inode)->mutex);
473 }
474 
475 static void fuse_umount_begin(struct super_block *sb)
476 {
477 	struct fuse_conn *fc = get_fuse_conn_super(sb);
478 
479 	if (!fc->no_force_umount)
480 		fuse_abort_conn(fc);
481 }
482 
483 static void fuse_send_destroy(struct fuse_mount *fm)
484 {
485 	if (fm->fc->conn_init) {
486 		FUSE_ARGS(args);
487 
488 		args.opcode = FUSE_DESTROY;
489 		args.force = true;
490 		args.nocreds = true;
491 		fuse_simple_request(fm, &args);
492 	}
493 }
494 
495 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
496 {
497 	stbuf->f_type    = FUSE_SUPER_MAGIC;
498 	stbuf->f_bsize   = attr->bsize;
499 	stbuf->f_frsize  = attr->frsize;
500 	stbuf->f_blocks  = attr->blocks;
501 	stbuf->f_bfree   = attr->bfree;
502 	stbuf->f_bavail  = attr->bavail;
503 	stbuf->f_files   = attr->files;
504 	stbuf->f_ffree   = attr->ffree;
505 	stbuf->f_namelen = attr->namelen;
506 	/* fsid is left zero */
507 }
508 
509 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
510 {
511 	struct super_block *sb = dentry->d_sb;
512 	struct fuse_mount *fm = get_fuse_mount_super(sb);
513 	FUSE_ARGS(args);
514 	struct fuse_statfs_out outarg;
515 	int err;
516 
517 	if (!fuse_allow_current_process(fm->fc)) {
518 		buf->f_type = FUSE_SUPER_MAGIC;
519 		return 0;
520 	}
521 
522 	memset(&outarg, 0, sizeof(outarg));
523 	args.in_numargs = 0;
524 	args.opcode = FUSE_STATFS;
525 	args.nodeid = get_node_id(d_inode(dentry));
526 	args.out_numargs = 1;
527 	args.out_args[0].size = sizeof(outarg);
528 	args.out_args[0].value = &outarg;
529 	err = fuse_simple_request(fm, &args);
530 	if (!err)
531 		convert_fuse_statfs(buf, &outarg.st);
532 	return err;
533 }
534 
535 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
536 {
537 	struct fuse_sync_bucket *bucket;
538 
539 	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
540 	if (bucket) {
541 		init_waitqueue_head(&bucket->waitq);
542 		/* Initial active count */
543 		atomic_set(&bucket->count, 1);
544 	}
545 	return bucket;
546 }
547 
548 static void fuse_sync_fs_writes(struct fuse_conn *fc)
549 {
550 	struct fuse_sync_bucket *bucket, *new_bucket;
551 	int count;
552 
553 	new_bucket = fuse_sync_bucket_alloc();
554 	spin_lock(&fc->lock);
555 	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
556 	count = atomic_read(&bucket->count);
557 	WARN_ON(count < 1);
558 	/* No outstanding writes? */
559 	if (count == 1) {
560 		spin_unlock(&fc->lock);
561 		kfree(new_bucket);
562 		return;
563 	}
564 
565 	/*
566 	 * Completion of new bucket depends on completion of this bucket, so add
567 	 * one more count.
568 	 */
569 	atomic_inc(&new_bucket->count);
570 	rcu_assign_pointer(fc->curr_bucket, new_bucket);
571 	spin_unlock(&fc->lock);
572 	/*
573 	 * Drop initial active count.  At this point if all writes in this and
574 	 * ancestor buckets complete, the count will go to zero and this task
575 	 * will be woken up.
576 	 */
577 	atomic_dec(&bucket->count);
578 
579 	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
580 
581 	/* Drop temp count on descendant bucket */
582 	fuse_sync_bucket_dec(new_bucket);
583 	kfree_rcu(bucket, rcu);
584 }
585 
586 static int fuse_sync_fs(struct super_block *sb, int wait)
587 {
588 	struct fuse_mount *fm = get_fuse_mount_super(sb);
589 	struct fuse_conn *fc = fm->fc;
590 	struct fuse_syncfs_in inarg;
591 	FUSE_ARGS(args);
592 	int err;
593 
594 	/*
595 	 * Userspace cannot handle the wait == 0 case.  Avoid a
596 	 * gratuitous roundtrip.
597 	 */
598 	if (!wait)
599 		return 0;
600 
601 	/* The filesystem is being unmounted.  Nothing to do. */
602 	if (!sb->s_root)
603 		return 0;
604 
605 	if (!fc->sync_fs)
606 		return 0;
607 
608 	fuse_sync_fs_writes(fc);
609 
610 	memset(&inarg, 0, sizeof(inarg));
611 	args.in_numargs = 1;
612 	args.in_args[0].size = sizeof(inarg);
613 	args.in_args[0].value = &inarg;
614 	args.opcode = FUSE_SYNCFS;
615 	args.nodeid = get_node_id(sb->s_root->d_inode);
616 	args.out_numargs = 0;
617 
618 	err = fuse_simple_request(fm, &args);
619 	if (err == -ENOSYS) {
620 		fc->sync_fs = 0;
621 		err = 0;
622 	}
623 
624 	return err;
625 }
626 
627 enum {
628 	OPT_SOURCE,
629 	OPT_SUBTYPE,
630 	OPT_FD,
631 	OPT_ROOTMODE,
632 	OPT_USER_ID,
633 	OPT_GROUP_ID,
634 	OPT_DEFAULT_PERMISSIONS,
635 	OPT_ALLOW_OTHER,
636 	OPT_MAX_READ,
637 	OPT_BLKSIZE,
638 	OPT_ERR
639 };
640 
641 static const struct fs_parameter_spec fuse_fs_parameters[] = {
642 	fsparam_string	("source",		OPT_SOURCE),
643 	fsparam_u32	("fd",			OPT_FD),
644 	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
645 	fsparam_u32	("user_id",		OPT_USER_ID),
646 	fsparam_u32	("group_id",		OPT_GROUP_ID),
647 	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
648 	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
649 	fsparam_u32	("max_read",		OPT_MAX_READ),
650 	fsparam_u32	("blksize",		OPT_BLKSIZE),
651 	fsparam_string	("subtype",		OPT_SUBTYPE),
652 	{}
653 };
654 
655 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
656 {
657 	struct fs_parse_result result;
658 	struct fuse_fs_context *ctx = fsc->fs_private;
659 	int opt;
660 
661 	if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
662 		/*
663 		 * Ignore options coming from mount(MS_REMOUNT) for backward
664 		 * compatibility.
665 		 */
666 		if (fsc->oldapi)
667 			return 0;
668 
669 		return invalfc(fsc, "No changes allowed in reconfigure");
670 	}
671 
672 	opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
673 	if (opt < 0)
674 		return opt;
675 
676 	switch (opt) {
677 	case OPT_SOURCE:
678 		if (fsc->source)
679 			return invalfc(fsc, "Multiple sources specified");
680 		fsc->source = param->string;
681 		param->string = NULL;
682 		break;
683 
684 	case OPT_SUBTYPE:
685 		if (ctx->subtype)
686 			return invalfc(fsc, "Multiple subtypes specified");
687 		ctx->subtype = param->string;
688 		param->string = NULL;
689 		return 0;
690 
691 	case OPT_FD:
692 		ctx->fd = result.uint_32;
693 		ctx->fd_present = true;
694 		break;
695 
696 	case OPT_ROOTMODE:
697 		if (!fuse_valid_type(result.uint_32))
698 			return invalfc(fsc, "Invalid rootmode");
699 		ctx->rootmode = result.uint_32;
700 		ctx->rootmode_present = true;
701 		break;
702 
703 	case OPT_USER_ID:
704 		ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
705 		if (!uid_valid(ctx->user_id))
706 			return invalfc(fsc, "Invalid user_id");
707 		ctx->user_id_present = true;
708 		break;
709 
710 	case OPT_GROUP_ID:
711 		ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
712 		if (!gid_valid(ctx->group_id))
713 			return invalfc(fsc, "Invalid group_id");
714 		ctx->group_id_present = true;
715 		break;
716 
717 	case OPT_DEFAULT_PERMISSIONS:
718 		ctx->default_permissions = true;
719 		break;
720 
721 	case OPT_ALLOW_OTHER:
722 		ctx->allow_other = true;
723 		break;
724 
725 	case OPT_MAX_READ:
726 		ctx->max_read = result.uint_32;
727 		break;
728 
729 	case OPT_BLKSIZE:
730 		if (!ctx->is_bdev)
731 			return invalfc(fsc, "blksize only supported for fuseblk");
732 		ctx->blksize = result.uint_32;
733 		break;
734 
735 	default:
736 		return -EINVAL;
737 	}
738 
739 	return 0;
740 }
741 
742 static void fuse_free_fsc(struct fs_context *fsc)
743 {
744 	struct fuse_fs_context *ctx = fsc->fs_private;
745 
746 	if (ctx) {
747 		kfree(ctx->subtype);
748 		kfree(ctx);
749 	}
750 }
751 
752 static int fuse_show_options(struct seq_file *m, struct dentry *root)
753 {
754 	struct super_block *sb = root->d_sb;
755 	struct fuse_conn *fc = get_fuse_conn_super(sb);
756 
757 	if (fc->legacy_opts_show) {
758 		seq_printf(m, ",user_id=%u",
759 			   from_kuid_munged(fc->user_ns, fc->user_id));
760 		seq_printf(m, ",group_id=%u",
761 			   from_kgid_munged(fc->user_ns, fc->group_id));
762 		if (fc->default_permissions)
763 			seq_puts(m, ",default_permissions");
764 		if (fc->allow_other)
765 			seq_puts(m, ",allow_other");
766 		if (fc->max_read != ~0)
767 			seq_printf(m, ",max_read=%u", fc->max_read);
768 		if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
769 			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
770 	}
771 #ifdef CONFIG_FUSE_DAX
772 	if (fc->dax_mode == FUSE_DAX_ALWAYS)
773 		seq_puts(m, ",dax=always");
774 	else if (fc->dax_mode == FUSE_DAX_NEVER)
775 		seq_puts(m, ",dax=never");
776 	else if (fc->dax_mode == FUSE_DAX_INODE_USER)
777 		seq_puts(m, ",dax=inode");
778 #endif
779 
780 	return 0;
781 }
782 
783 static void fuse_iqueue_init(struct fuse_iqueue *fiq,
784 			     const struct fuse_iqueue_ops *ops,
785 			     void *priv)
786 {
787 	memset(fiq, 0, sizeof(struct fuse_iqueue));
788 	spin_lock_init(&fiq->lock);
789 	init_waitqueue_head(&fiq->waitq);
790 	INIT_LIST_HEAD(&fiq->pending);
791 	INIT_LIST_HEAD(&fiq->interrupts);
792 	fiq->forget_list_tail = &fiq->forget_list_head;
793 	fiq->connected = 1;
794 	fiq->ops = ops;
795 	fiq->priv = priv;
796 }
797 
798 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
799 {
800 	unsigned int i;
801 
802 	spin_lock_init(&fpq->lock);
803 	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
804 		INIT_LIST_HEAD(&fpq->processing[i]);
805 	INIT_LIST_HEAD(&fpq->io);
806 	fpq->connected = 1;
807 }
808 
809 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
810 		    struct user_namespace *user_ns,
811 		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
812 {
813 	memset(fc, 0, sizeof(*fc));
814 	spin_lock_init(&fc->lock);
815 	spin_lock_init(&fc->bg_lock);
816 	init_rwsem(&fc->killsb);
817 	refcount_set(&fc->count, 1);
818 	atomic_set(&fc->dev_count, 1);
819 	init_waitqueue_head(&fc->blocked_waitq);
820 	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
821 	INIT_LIST_HEAD(&fc->bg_queue);
822 	INIT_LIST_HEAD(&fc->entry);
823 	INIT_LIST_HEAD(&fc->devices);
824 	atomic_set(&fc->num_waiting, 0);
825 	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
826 	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
827 	atomic64_set(&fc->khctr, 0);
828 	fc->polled_files = RB_ROOT;
829 	fc->blocked = 0;
830 	fc->initialized = 0;
831 	fc->connected = 1;
832 	atomic64_set(&fc->attr_version, 1);
833 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
834 	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
835 	fc->user_ns = get_user_ns(user_ns);
836 	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
837 	fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
838 
839 	INIT_LIST_HEAD(&fc->mounts);
840 	list_add(&fm->fc_entry, &fc->mounts);
841 	fm->fc = fc;
842 }
843 EXPORT_SYMBOL_GPL(fuse_conn_init);
844 
845 void fuse_conn_put(struct fuse_conn *fc)
846 {
847 	if (refcount_dec_and_test(&fc->count)) {
848 		struct fuse_iqueue *fiq = &fc->iq;
849 		struct fuse_sync_bucket *bucket;
850 
851 		if (IS_ENABLED(CONFIG_FUSE_DAX))
852 			fuse_dax_conn_free(fc);
853 		if (fiq->ops->release)
854 			fiq->ops->release(fiq);
855 		put_pid_ns(fc->pid_ns);
856 		put_user_ns(fc->user_ns);
857 		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
858 		if (bucket) {
859 			WARN_ON(atomic_read(&bucket->count) != 1);
860 			kfree(bucket);
861 		}
862 		fc->release(fc);
863 	}
864 }
865 EXPORT_SYMBOL_GPL(fuse_conn_put);
866 
867 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
868 {
869 	refcount_inc(&fc->count);
870 	return fc;
871 }
872 EXPORT_SYMBOL_GPL(fuse_conn_get);
873 
874 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
875 {
876 	struct fuse_attr attr;
877 	memset(&attr, 0, sizeof(attr));
878 
879 	attr.mode = mode;
880 	attr.ino = FUSE_ROOT_ID;
881 	attr.nlink = 1;
882 	return fuse_iget(sb, 1, 0, &attr, 0, 0);
883 }
884 
885 struct fuse_inode_handle {
886 	u64 nodeid;
887 	u32 generation;
888 };
889 
890 static struct dentry *fuse_get_dentry(struct super_block *sb,
891 				      struct fuse_inode_handle *handle)
892 {
893 	struct fuse_conn *fc = get_fuse_conn_super(sb);
894 	struct inode *inode;
895 	struct dentry *entry;
896 	int err = -ESTALE;
897 
898 	if (handle->nodeid == 0)
899 		goto out_err;
900 
901 	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
902 	if (!inode) {
903 		struct fuse_entry_out outarg;
904 		const struct qstr name = QSTR_INIT(".", 1);
905 
906 		if (!fc->export_support)
907 			goto out_err;
908 
909 		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
910 				       &inode);
911 		if (err && err != -ENOENT)
912 			goto out_err;
913 		if (err || !inode) {
914 			err = -ESTALE;
915 			goto out_err;
916 		}
917 		err = -EIO;
918 		if (get_node_id(inode) != handle->nodeid)
919 			goto out_iput;
920 	}
921 	err = -ESTALE;
922 	if (inode->i_generation != handle->generation)
923 		goto out_iput;
924 
925 	entry = d_obtain_alias(inode);
926 	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
927 		fuse_invalidate_entry_cache(entry);
928 
929 	return entry;
930 
931  out_iput:
932 	iput(inode);
933  out_err:
934 	return ERR_PTR(err);
935 }
936 
937 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
938 			   struct inode *parent)
939 {
940 	int len = parent ? 6 : 3;
941 	u64 nodeid;
942 	u32 generation;
943 
944 	if (*max_len < len) {
945 		*max_len = len;
946 		return  FILEID_INVALID;
947 	}
948 
949 	nodeid = get_fuse_inode(inode)->nodeid;
950 	generation = inode->i_generation;
951 
952 	fh[0] = (u32)(nodeid >> 32);
953 	fh[1] = (u32)(nodeid & 0xffffffff);
954 	fh[2] = generation;
955 
956 	if (parent) {
957 		nodeid = get_fuse_inode(parent)->nodeid;
958 		generation = parent->i_generation;
959 
960 		fh[3] = (u32)(nodeid >> 32);
961 		fh[4] = (u32)(nodeid & 0xffffffff);
962 		fh[5] = generation;
963 	}
964 
965 	*max_len = len;
966 	return parent ? 0x82 : 0x81;
967 }
968 
969 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
970 		struct fid *fid, int fh_len, int fh_type)
971 {
972 	struct fuse_inode_handle handle;
973 
974 	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
975 		return NULL;
976 
977 	handle.nodeid = (u64) fid->raw[0] << 32;
978 	handle.nodeid |= (u64) fid->raw[1];
979 	handle.generation = fid->raw[2];
980 	return fuse_get_dentry(sb, &handle);
981 }
982 
983 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
984 		struct fid *fid, int fh_len, int fh_type)
985 {
986 	struct fuse_inode_handle parent;
987 
988 	if (fh_type != 0x82 || fh_len < 6)
989 		return NULL;
990 
991 	parent.nodeid = (u64) fid->raw[3] << 32;
992 	parent.nodeid |= (u64) fid->raw[4];
993 	parent.generation = fid->raw[5];
994 	return fuse_get_dentry(sb, &parent);
995 }
996 
997 static struct dentry *fuse_get_parent(struct dentry *child)
998 {
999 	struct inode *child_inode = d_inode(child);
1000 	struct fuse_conn *fc = get_fuse_conn(child_inode);
1001 	struct inode *inode;
1002 	struct dentry *parent;
1003 	struct fuse_entry_out outarg;
1004 	int err;
1005 
1006 	if (!fc->export_support)
1007 		return ERR_PTR(-ESTALE);
1008 
1009 	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
1010 			       &dotdot_name, &outarg, &inode);
1011 	if (err) {
1012 		if (err == -ENOENT)
1013 			return ERR_PTR(-ESTALE);
1014 		return ERR_PTR(err);
1015 	}
1016 
1017 	parent = d_obtain_alias(inode);
1018 	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
1019 		fuse_invalidate_entry_cache(parent);
1020 
1021 	return parent;
1022 }
1023 
1024 static const struct export_operations fuse_export_operations = {
1025 	.fh_to_dentry	= fuse_fh_to_dentry,
1026 	.fh_to_parent	= fuse_fh_to_parent,
1027 	.encode_fh	= fuse_encode_fh,
1028 	.get_parent	= fuse_get_parent,
1029 };
1030 
1031 static const struct super_operations fuse_super_operations = {
1032 	.alloc_inode    = fuse_alloc_inode,
1033 	.free_inode     = fuse_free_inode,
1034 	.evict_inode	= fuse_evict_inode,
1035 	.write_inode	= fuse_write_inode,
1036 	.drop_inode	= generic_delete_inode,
1037 	.umount_begin	= fuse_umount_begin,
1038 	.statfs		= fuse_statfs,
1039 	.sync_fs	= fuse_sync_fs,
1040 	.show_options	= fuse_show_options,
1041 };
1042 
1043 static void sanitize_global_limit(unsigned *limit)
1044 {
1045 	/*
1046 	 * The default maximum number of async requests is calculated to consume
1047 	 * 1/2^13 of the total memory, assuming 392 bytes per request.
1048 	 */
1049 	if (*limit == 0)
1050 		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1051 
1052 	if (*limit >= 1 << 16)
1053 		*limit = (1 << 16) - 1;
1054 }
1055 
1056 static int set_global_limit(const char *val, const struct kernel_param *kp)
1057 {
1058 	int rv;
1059 
1060 	rv = param_set_uint(val, kp);
1061 	if (rv)
1062 		return rv;
1063 
1064 	sanitize_global_limit((unsigned *)kp->arg);
1065 
1066 	return 0;
1067 }
1068 
1069 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1070 {
1071 	int cap_sys_admin = capable(CAP_SYS_ADMIN);
1072 
1073 	if (arg->minor < 13)
1074 		return;
1075 
1076 	sanitize_global_limit(&max_user_bgreq);
1077 	sanitize_global_limit(&max_user_congthresh);
1078 
1079 	spin_lock(&fc->bg_lock);
1080 	if (arg->max_background) {
1081 		fc->max_background = arg->max_background;
1082 
1083 		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1084 			fc->max_background = max_user_bgreq;
1085 	}
1086 	if (arg->congestion_threshold) {
1087 		fc->congestion_threshold = arg->congestion_threshold;
1088 
1089 		if (!cap_sys_admin &&
1090 		    fc->congestion_threshold > max_user_congthresh)
1091 			fc->congestion_threshold = max_user_congthresh;
1092 	}
1093 	spin_unlock(&fc->bg_lock);
1094 }
1095 
1096 struct fuse_init_args {
1097 	struct fuse_args args;
1098 	struct fuse_init_in in;
1099 	struct fuse_init_out out;
1100 };
1101 
1102 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1103 			       int error)
1104 {
1105 	struct fuse_conn *fc = fm->fc;
1106 	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1107 	struct fuse_init_out *arg = &ia->out;
1108 	bool ok = true;
1109 
1110 	if (error || arg->major != FUSE_KERNEL_VERSION)
1111 		ok = false;
1112 	else {
1113 		unsigned long ra_pages;
1114 
1115 		process_init_limits(fc, arg);
1116 
1117 		if (arg->minor >= 6) {
1118 			u64 flags = arg->flags | (u64) arg->flags2 << 32;
1119 
1120 			ra_pages = arg->max_readahead / PAGE_SIZE;
1121 			if (flags & FUSE_ASYNC_READ)
1122 				fc->async_read = 1;
1123 			if (!(flags & FUSE_POSIX_LOCKS))
1124 				fc->no_lock = 1;
1125 			if (arg->minor >= 17) {
1126 				if (!(flags & FUSE_FLOCK_LOCKS))
1127 					fc->no_flock = 1;
1128 			} else {
1129 				if (!(flags & FUSE_POSIX_LOCKS))
1130 					fc->no_flock = 1;
1131 			}
1132 			if (flags & FUSE_ATOMIC_O_TRUNC)
1133 				fc->atomic_o_trunc = 1;
1134 			if (arg->minor >= 9) {
1135 				/* LOOKUP has dependency on proto version */
1136 				if (flags & FUSE_EXPORT_SUPPORT)
1137 					fc->export_support = 1;
1138 			}
1139 			if (flags & FUSE_BIG_WRITES)
1140 				fc->big_writes = 1;
1141 			if (flags & FUSE_DONT_MASK)
1142 				fc->dont_mask = 1;
1143 			if (flags & FUSE_AUTO_INVAL_DATA)
1144 				fc->auto_inval_data = 1;
1145 			else if (flags & FUSE_EXPLICIT_INVAL_DATA)
1146 				fc->explicit_inval_data = 1;
1147 			if (flags & FUSE_DO_READDIRPLUS) {
1148 				fc->do_readdirplus = 1;
1149 				if (flags & FUSE_READDIRPLUS_AUTO)
1150 					fc->readdirplus_auto = 1;
1151 			}
1152 			if (flags & FUSE_ASYNC_DIO)
1153 				fc->async_dio = 1;
1154 			if (flags & FUSE_WRITEBACK_CACHE)
1155 				fc->writeback_cache = 1;
1156 			if (flags & FUSE_PARALLEL_DIROPS)
1157 				fc->parallel_dirops = 1;
1158 			if (flags & FUSE_HANDLE_KILLPRIV)
1159 				fc->handle_killpriv = 1;
1160 			if (arg->time_gran && arg->time_gran <= 1000000000)
1161 				fm->sb->s_time_gran = arg->time_gran;
1162 			if ((flags & FUSE_POSIX_ACL)) {
1163 				fc->default_permissions = 1;
1164 				fc->posix_acl = 1;
1165 				fm->sb->s_xattr = fuse_acl_xattr_handlers;
1166 			}
1167 			if (flags & FUSE_CACHE_SYMLINKS)
1168 				fc->cache_symlinks = 1;
1169 			if (flags & FUSE_ABORT_ERROR)
1170 				fc->abort_err = 1;
1171 			if (flags & FUSE_MAX_PAGES) {
1172 				fc->max_pages =
1173 					min_t(unsigned int, fc->max_pages_limit,
1174 					max_t(unsigned int, arg->max_pages, 1));
1175 			}
1176 			if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1177 				if (flags & FUSE_MAP_ALIGNMENT &&
1178 				    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1179 					ok = false;
1180 				}
1181 				if (flags & FUSE_HAS_INODE_DAX)
1182 					fc->inode_dax = 1;
1183 			}
1184 			if (flags & FUSE_HANDLE_KILLPRIV_V2) {
1185 				fc->handle_killpriv_v2 = 1;
1186 				fm->sb->s_flags |= SB_NOSEC;
1187 			}
1188 			if (flags & FUSE_SETXATTR_EXT)
1189 				fc->setxattr_ext = 1;
1190 			if (flags & FUSE_SECURITY_CTX)
1191 				fc->init_security = 1;
1192 		} else {
1193 			ra_pages = fc->max_read / PAGE_SIZE;
1194 			fc->no_lock = 1;
1195 			fc->no_flock = 1;
1196 		}
1197 
1198 		fm->sb->s_bdi->ra_pages =
1199 				min(fm->sb->s_bdi->ra_pages, ra_pages);
1200 		fc->minor = arg->minor;
1201 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1202 		fc->max_write = max_t(unsigned, 4096, fc->max_write);
1203 		fc->conn_init = 1;
1204 	}
1205 	kfree(ia);
1206 
1207 	if (!ok) {
1208 		fc->conn_init = 0;
1209 		fc->conn_error = 1;
1210 	}
1211 
1212 	fuse_set_initialized(fc);
1213 	wake_up_all(&fc->blocked_waitq);
1214 }
1215 
1216 void fuse_send_init(struct fuse_mount *fm)
1217 {
1218 	struct fuse_init_args *ia;
1219 	u64 flags;
1220 
1221 	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1222 
1223 	ia->in.major = FUSE_KERNEL_VERSION;
1224 	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1225 	ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1226 	flags =
1227 		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1228 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1229 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1230 		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1231 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1232 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1233 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1234 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1235 		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1236 		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
1237 		FUSE_SECURITY_CTX;
1238 #ifdef CONFIG_FUSE_DAX
1239 	if (fm->fc->dax)
1240 		flags |= FUSE_MAP_ALIGNMENT;
1241 	if (fuse_is_inode_dax_mode(fm->fc->dax_mode))
1242 		flags |= FUSE_HAS_INODE_DAX;
1243 #endif
1244 	if (fm->fc->auto_submounts)
1245 		flags |= FUSE_SUBMOUNTS;
1246 
1247 	ia->in.flags = flags;
1248 	ia->in.flags2 = flags >> 32;
1249 
1250 	ia->args.opcode = FUSE_INIT;
1251 	ia->args.in_numargs = 1;
1252 	ia->args.in_args[0].size = sizeof(ia->in);
1253 	ia->args.in_args[0].value = &ia->in;
1254 	ia->args.out_numargs = 1;
1255 	/* Variable length argument used for backward compatibility
1256 	   with interface version < 7.5.  Rest of init_out is zeroed
1257 	   by do_get_request(), so a short reply is not a problem */
1258 	ia->args.out_argvar = true;
1259 	ia->args.out_args[0].size = sizeof(ia->out);
1260 	ia->args.out_args[0].value = &ia->out;
1261 	ia->args.force = true;
1262 	ia->args.nocreds = true;
1263 	ia->args.end = process_init_reply;
1264 
1265 	if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1266 		process_init_reply(fm, &ia->args, -ENOTCONN);
1267 }
1268 EXPORT_SYMBOL_GPL(fuse_send_init);
1269 
1270 void fuse_free_conn(struct fuse_conn *fc)
1271 {
1272 	WARN_ON(!list_empty(&fc->devices));
1273 	kfree_rcu(fc, rcu);
1274 }
1275 EXPORT_SYMBOL_GPL(fuse_free_conn);
1276 
1277 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1278 {
1279 	int err;
1280 	char *suffix = "";
1281 
1282 	if (sb->s_bdev) {
1283 		suffix = "-fuseblk";
1284 		/*
1285 		 * sb->s_bdi points to blkdev's bdi however we want to redirect
1286 		 * it to our private bdi...
1287 		 */
1288 		bdi_put(sb->s_bdi);
1289 		sb->s_bdi = &noop_backing_dev_info;
1290 	}
1291 	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1292 				   MINOR(fc->dev), suffix);
1293 	if (err)
1294 		return err;
1295 
1296 	/* fuse does it's own writeback accounting */
1297 	sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1298 	sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1299 
1300 	/*
1301 	 * For a single fuse filesystem use max 1% of dirty +
1302 	 * writeback threshold.
1303 	 *
1304 	 * This gives about 1M of write buffer for memory maps on a
1305 	 * machine with 1G and 10% dirty_ratio, which should be more
1306 	 * than enough.
1307 	 *
1308 	 * Privileged users can raise it by writing to
1309 	 *
1310 	 *    /sys/class/bdi/<bdi>/max_ratio
1311 	 */
1312 	bdi_set_max_ratio(sb->s_bdi, 1);
1313 
1314 	return 0;
1315 }
1316 
1317 struct fuse_dev *fuse_dev_alloc(void)
1318 {
1319 	struct fuse_dev *fud;
1320 	struct list_head *pq;
1321 
1322 	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1323 	if (!fud)
1324 		return NULL;
1325 
1326 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1327 	if (!pq) {
1328 		kfree(fud);
1329 		return NULL;
1330 	}
1331 
1332 	fud->pq.processing = pq;
1333 	fuse_pqueue_init(&fud->pq);
1334 
1335 	return fud;
1336 }
1337 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1338 
1339 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1340 {
1341 	fud->fc = fuse_conn_get(fc);
1342 	spin_lock(&fc->lock);
1343 	list_add_tail(&fud->entry, &fc->devices);
1344 	spin_unlock(&fc->lock);
1345 }
1346 EXPORT_SYMBOL_GPL(fuse_dev_install);
1347 
1348 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1349 {
1350 	struct fuse_dev *fud;
1351 
1352 	fud = fuse_dev_alloc();
1353 	if (!fud)
1354 		return NULL;
1355 
1356 	fuse_dev_install(fud, fc);
1357 	return fud;
1358 }
1359 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1360 
1361 void fuse_dev_free(struct fuse_dev *fud)
1362 {
1363 	struct fuse_conn *fc = fud->fc;
1364 
1365 	if (fc) {
1366 		spin_lock(&fc->lock);
1367 		list_del(&fud->entry);
1368 		spin_unlock(&fc->lock);
1369 
1370 		fuse_conn_put(fc);
1371 	}
1372 	kfree(fud->pq.processing);
1373 	kfree(fud);
1374 }
1375 EXPORT_SYMBOL_GPL(fuse_dev_free);
1376 
1377 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1378 				      const struct fuse_inode *fi)
1379 {
1380 	*attr = (struct fuse_attr){
1381 		.ino		= fi->inode.i_ino,
1382 		.size		= fi->inode.i_size,
1383 		.blocks		= fi->inode.i_blocks,
1384 		.atime		= fi->inode.i_atime.tv_sec,
1385 		.mtime		= fi->inode.i_mtime.tv_sec,
1386 		.ctime		= fi->inode.i_ctime.tv_sec,
1387 		.atimensec	= fi->inode.i_atime.tv_nsec,
1388 		.mtimensec	= fi->inode.i_mtime.tv_nsec,
1389 		.ctimensec	= fi->inode.i_ctime.tv_nsec,
1390 		.mode		= fi->inode.i_mode,
1391 		.nlink		= fi->inode.i_nlink,
1392 		.uid		= fi->inode.i_uid.val,
1393 		.gid		= fi->inode.i_gid.val,
1394 		.rdev		= fi->inode.i_rdev,
1395 		.blksize	= 1u << fi->inode.i_blkbits,
1396 	};
1397 }
1398 
1399 static void fuse_sb_defaults(struct super_block *sb)
1400 {
1401 	sb->s_magic = FUSE_SUPER_MAGIC;
1402 	sb->s_op = &fuse_super_operations;
1403 	sb->s_xattr = fuse_xattr_handlers;
1404 	sb->s_maxbytes = MAX_LFS_FILESIZE;
1405 	sb->s_time_gran = 1;
1406 	sb->s_export_op = &fuse_export_operations;
1407 	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1408 	if (sb->s_user_ns != &init_user_ns)
1409 		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1410 	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1411 
1412 	/*
1413 	 * If we are not in the initial user namespace posix
1414 	 * acls must be translated.
1415 	 */
1416 	if (sb->s_user_ns != &init_user_ns)
1417 		sb->s_xattr = fuse_no_acl_xattr_handlers;
1418 }
1419 
1420 static int fuse_fill_super_submount(struct super_block *sb,
1421 				    struct fuse_inode *parent_fi)
1422 {
1423 	struct fuse_mount *fm = get_fuse_mount_super(sb);
1424 	struct super_block *parent_sb = parent_fi->inode.i_sb;
1425 	struct fuse_attr root_attr;
1426 	struct inode *root;
1427 
1428 	fuse_sb_defaults(sb);
1429 	fm->sb = sb;
1430 
1431 	WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1432 	sb->s_bdi = bdi_get(parent_sb->s_bdi);
1433 
1434 	sb->s_xattr = parent_sb->s_xattr;
1435 	sb->s_time_gran = parent_sb->s_time_gran;
1436 	sb->s_blocksize = parent_sb->s_blocksize;
1437 	sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1438 	sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1439 	if (parent_sb->s_subtype && !sb->s_subtype)
1440 		return -ENOMEM;
1441 
1442 	fuse_fill_attr_from_inode(&root_attr, parent_fi);
1443 	root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1444 	/*
1445 	 * This inode is just a duplicate, so it is not looked up and
1446 	 * its nlookup should not be incremented.  fuse_iget() does
1447 	 * that, though, so undo it here.
1448 	 */
1449 	get_fuse_inode(root)->nlookup--;
1450 	sb->s_d_op = &fuse_dentry_operations;
1451 	sb->s_root = d_make_root(root);
1452 	if (!sb->s_root)
1453 		return -ENOMEM;
1454 
1455 	return 0;
1456 }
1457 
1458 /* Filesystem context private data holds the FUSE inode of the mount point */
1459 static int fuse_get_tree_submount(struct fs_context *fsc)
1460 {
1461 	struct fuse_mount *fm;
1462 	struct fuse_inode *mp_fi = fsc->fs_private;
1463 	struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1464 	struct super_block *sb;
1465 	int err;
1466 
1467 	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1468 	if (!fm)
1469 		return -ENOMEM;
1470 
1471 	fm->fc = fuse_conn_get(fc);
1472 	fsc->s_fs_info = fm;
1473 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
1474 	if (fsc->s_fs_info)
1475 		fuse_mount_destroy(fm);
1476 	if (IS_ERR(sb))
1477 		return PTR_ERR(sb);
1478 
1479 	/* Initialize superblock, making @mp_fi its root */
1480 	err = fuse_fill_super_submount(sb, mp_fi);
1481 	if (err) {
1482 		deactivate_locked_super(sb);
1483 		return err;
1484 	}
1485 
1486 	down_write(&fc->killsb);
1487 	list_add_tail(&fm->fc_entry, &fc->mounts);
1488 	up_write(&fc->killsb);
1489 
1490 	sb->s_flags |= SB_ACTIVE;
1491 	fsc->root = dget(sb->s_root);
1492 
1493 	return 0;
1494 }
1495 
1496 static const struct fs_context_operations fuse_context_submount_ops = {
1497 	.get_tree	= fuse_get_tree_submount,
1498 };
1499 
1500 int fuse_init_fs_context_submount(struct fs_context *fsc)
1501 {
1502 	fsc->ops = &fuse_context_submount_ops;
1503 	return 0;
1504 }
1505 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1506 
1507 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1508 {
1509 	struct fuse_dev *fud = NULL;
1510 	struct fuse_mount *fm = get_fuse_mount_super(sb);
1511 	struct fuse_conn *fc = fm->fc;
1512 	struct inode *root;
1513 	struct dentry *root_dentry;
1514 	int err;
1515 
1516 	err = -EINVAL;
1517 	if (sb->s_flags & SB_MANDLOCK)
1518 		goto err;
1519 
1520 	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1521 	fuse_sb_defaults(sb);
1522 
1523 	if (ctx->is_bdev) {
1524 #ifdef CONFIG_BLOCK
1525 		err = -EINVAL;
1526 		if (!sb_set_blocksize(sb, ctx->blksize))
1527 			goto err;
1528 #endif
1529 	} else {
1530 		sb->s_blocksize = PAGE_SIZE;
1531 		sb->s_blocksize_bits = PAGE_SHIFT;
1532 	}
1533 
1534 	sb->s_subtype = ctx->subtype;
1535 	ctx->subtype = NULL;
1536 	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1537 		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
1538 		if (err)
1539 			goto err;
1540 	}
1541 
1542 	if (ctx->fudptr) {
1543 		err = -ENOMEM;
1544 		fud = fuse_dev_alloc_install(fc);
1545 		if (!fud)
1546 			goto err_free_dax;
1547 	}
1548 
1549 	fc->dev = sb->s_dev;
1550 	fm->sb = sb;
1551 	err = fuse_bdi_init(fc, sb);
1552 	if (err)
1553 		goto err_dev_free;
1554 
1555 	/* Handle umasking inside the fuse code */
1556 	if (sb->s_flags & SB_POSIXACL)
1557 		fc->dont_mask = 1;
1558 	sb->s_flags |= SB_POSIXACL;
1559 
1560 	fc->default_permissions = ctx->default_permissions;
1561 	fc->allow_other = ctx->allow_other;
1562 	fc->user_id = ctx->user_id;
1563 	fc->group_id = ctx->group_id;
1564 	fc->legacy_opts_show = ctx->legacy_opts_show;
1565 	fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1566 	fc->destroy = ctx->destroy;
1567 	fc->no_control = ctx->no_control;
1568 	fc->no_force_umount = ctx->no_force_umount;
1569 
1570 	err = -ENOMEM;
1571 	root = fuse_get_root_inode(sb, ctx->rootmode);
1572 	sb->s_d_op = &fuse_root_dentry_operations;
1573 	root_dentry = d_make_root(root);
1574 	if (!root_dentry)
1575 		goto err_dev_free;
1576 	/* Root dentry doesn't have .d_revalidate */
1577 	sb->s_d_op = &fuse_dentry_operations;
1578 
1579 	mutex_lock(&fuse_mutex);
1580 	err = -EINVAL;
1581 	if (ctx->fudptr && *ctx->fudptr)
1582 		goto err_unlock;
1583 
1584 	err = fuse_ctl_add_conn(fc);
1585 	if (err)
1586 		goto err_unlock;
1587 
1588 	list_add_tail(&fc->entry, &fuse_conn_list);
1589 	sb->s_root = root_dentry;
1590 	if (ctx->fudptr)
1591 		*ctx->fudptr = fud;
1592 	mutex_unlock(&fuse_mutex);
1593 	return 0;
1594 
1595  err_unlock:
1596 	mutex_unlock(&fuse_mutex);
1597 	dput(root_dentry);
1598  err_dev_free:
1599 	if (fud)
1600 		fuse_dev_free(fud);
1601  err_free_dax:
1602 	if (IS_ENABLED(CONFIG_FUSE_DAX))
1603 		fuse_dax_conn_free(fc);
1604  err:
1605 	return err;
1606 }
1607 EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1608 
1609 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1610 {
1611 	struct fuse_fs_context *ctx = fsc->fs_private;
1612 	int err;
1613 
1614 	if (!ctx->file || !ctx->rootmode_present ||
1615 	    !ctx->user_id_present || !ctx->group_id_present)
1616 		return -EINVAL;
1617 
1618 	/*
1619 	 * Require mount to happen from the same user namespace which
1620 	 * opened /dev/fuse to prevent potential attacks.
1621 	 */
1622 	if ((ctx->file->f_op != &fuse_dev_operations) ||
1623 	    (ctx->file->f_cred->user_ns != sb->s_user_ns))
1624 		return -EINVAL;
1625 	ctx->fudptr = &ctx->file->private_data;
1626 
1627 	err = fuse_fill_super_common(sb, ctx);
1628 	if (err)
1629 		return err;
1630 	/* file->private_data shall be visible on all CPUs after this */
1631 	smp_mb();
1632 	fuse_send_init(get_fuse_mount_super(sb));
1633 	return 0;
1634 }
1635 
1636 /*
1637  * This is the path where user supplied an already initialized fuse dev.  In
1638  * this case never create a new super if the old one is gone.
1639  */
1640 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1641 {
1642 	return -ENOTCONN;
1643 }
1644 
1645 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1646 {
1647 
1648 	return fsc->sget_key == get_fuse_conn_super(sb);
1649 }
1650 
1651 static int fuse_get_tree(struct fs_context *fsc)
1652 {
1653 	struct fuse_fs_context *ctx = fsc->fs_private;
1654 	struct fuse_dev *fud;
1655 	struct fuse_conn *fc;
1656 	struct fuse_mount *fm;
1657 	struct super_block *sb;
1658 	int err;
1659 
1660 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1661 	if (!fc)
1662 		return -ENOMEM;
1663 
1664 	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1665 	if (!fm) {
1666 		kfree(fc);
1667 		return -ENOMEM;
1668 	}
1669 
1670 	fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
1671 	fc->release = fuse_free_conn;
1672 
1673 	fsc->s_fs_info = fm;
1674 
1675 	if (ctx->fd_present)
1676 		ctx->file = fget(ctx->fd);
1677 
1678 	if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1679 		err = get_tree_bdev(fsc, fuse_fill_super);
1680 		goto out;
1681 	}
1682 	/*
1683 	 * While block dev mount can be initialized with a dummy device fd
1684 	 * (found by device name), normal fuse mounts can't
1685 	 */
1686 	err = -EINVAL;
1687 	if (!ctx->file)
1688 		goto out;
1689 
1690 	/*
1691 	 * Allow creating a fuse mount with an already initialized fuse
1692 	 * connection
1693 	 */
1694 	fud = READ_ONCE(ctx->file->private_data);
1695 	if (ctx->file->f_op == &fuse_dev_operations && fud) {
1696 		fsc->sget_key = fud->fc;
1697 		sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
1698 		err = PTR_ERR_OR_ZERO(sb);
1699 		if (!IS_ERR(sb))
1700 			fsc->root = dget(sb->s_root);
1701 	} else {
1702 		err = get_tree_nodev(fsc, fuse_fill_super);
1703 	}
1704 out:
1705 	if (fsc->s_fs_info)
1706 		fuse_mount_destroy(fm);
1707 	if (ctx->file)
1708 		fput(ctx->file);
1709 	return err;
1710 }
1711 
1712 static const struct fs_context_operations fuse_context_ops = {
1713 	.free		= fuse_free_fsc,
1714 	.parse_param	= fuse_parse_param,
1715 	.reconfigure	= fuse_reconfigure,
1716 	.get_tree	= fuse_get_tree,
1717 };
1718 
1719 /*
1720  * Set up the filesystem mount context.
1721  */
1722 static int fuse_init_fs_context(struct fs_context *fsc)
1723 {
1724 	struct fuse_fs_context *ctx;
1725 
1726 	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1727 	if (!ctx)
1728 		return -ENOMEM;
1729 
1730 	ctx->max_read = ~0;
1731 	ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1732 	ctx->legacy_opts_show = true;
1733 
1734 #ifdef CONFIG_BLOCK
1735 	if (fsc->fs_type == &fuseblk_fs_type) {
1736 		ctx->is_bdev = true;
1737 		ctx->destroy = true;
1738 	}
1739 #endif
1740 
1741 	fsc->fs_private = ctx;
1742 	fsc->ops = &fuse_context_ops;
1743 	return 0;
1744 }
1745 
1746 bool fuse_mount_remove(struct fuse_mount *fm)
1747 {
1748 	struct fuse_conn *fc = fm->fc;
1749 	bool last = false;
1750 
1751 	down_write(&fc->killsb);
1752 	list_del_init(&fm->fc_entry);
1753 	if (list_empty(&fc->mounts))
1754 		last = true;
1755 	up_write(&fc->killsb);
1756 
1757 	return last;
1758 }
1759 EXPORT_SYMBOL_GPL(fuse_mount_remove);
1760 
1761 void fuse_conn_destroy(struct fuse_mount *fm)
1762 {
1763 	struct fuse_conn *fc = fm->fc;
1764 
1765 	if (fc->destroy)
1766 		fuse_send_destroy(fm);
1767 
1768 	fuse_abort_conn(fc);
1769 	fuse_wait_aborted(fc);
1770 
1771 	if (!list_empty(&fc->entry)) {
1772 		mutex_lock(&fuse_mutex);
1773 		list_del(&fc->entry);
1774 		fuse_ctl_remove_conn(fc);
1775 		mutex_unlock(&fuse_mutex);
1776 	}
1777 }
1778 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
1779 
1780 static void fuse_sb_destroy(struct super_block *sb)
1781 {
1782 	struct fuse_mount *fm = get_fuse_mount_super(sb);
1783 	bool last;
1784 
1785 	if (sb->s_root) {
1786 		last = fuse_mount_remove(fm);
1787 		if (last)
1788 			fuse_conn_destroy(fm);
1789 	}
1790 }
1791 
1792 void fuse_mount_destroy(struct fuse_mount *fm)
1793 {
1794 	fuse_conn_put(fm->fc);
1795 	kfree(fm);
1796 }
1797 EXPORT_SYMBOL(fuse_mount_destroy);
1798 
1799 static void fuse_kill_sb_anon(struct super_block *sb)
1800 {
1801 	fuse_sb_destroy(sb);
1802 	kill_anon_super(sb);
1803 	fuse_mount_destroy(get_fuse_mount_super(sb));
1804 }
1805 
1806 static struct file_system_type fuse_fs_type = {
1807 	.owner		= THIS_MODULE,
1808 	.name		= "fuse",
1809 	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1810 	.init_fs_context = fuse_init_fs_context,
1811 	.parameters	= fuse_fs_parameters,
1812 	.kill_sb	= fuse_kill_sb_anon,
1813 };
1814 MODULE_ALIAS_FS("fuse");
1815 
1816 #ifdef CONFIG_BLOCK
1817 static void fuse_kill_sb_blk(struct super_block *sb)
1818 {
1819 	fuse_sb_destroy(sb);
1820 	kill_block_super(sb);
1821 	fuse_mount_destroy(get_fuse_mount_super(sb));
1822 }
1823 
1824 static struct file_system_type fuseblk_fs_type = {
1825 	.owner		= THIS_MODULE,
1826 	.name		= "fuseblk",
1827 	.init_fs_context = fuse_init_fs_context,
1828 	.parameters	= fuse_fs_parameters,
1829 	.kill_sb	= fuse_kill_sb_blk,
1830 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1831 };
1832 MODULE_ALIAS_FS("fuseblk");
1833 
1834 static inline int register_fuseblk(void)
1835 {
1836 	return register_filesystem(&fuseblk_fs_type);
1837 }
1838 
1839 static inline void unregister_fuseblk(void)
1840 {
1841 	unregister_filesystem(&fuseblk_fs_type);
1842 }
1843 #else
1844 static inline int register_fuseblk(void)
1845 {
1846 	return 0;
1847 }
1848 
1849 static inline void unregister_fuseblk(void)
1850 {
1851 }
1852 #endif
1853 
1854 static void fuse_inode_init_once(void *foo)
1855 {
1856 	struct inode *inode = foo;
1857 
1858 	inode_init_once(inode);
1859 }
1860 
1861 static int __init fuse_fs_init(void)
1862 {
1863 	int err;
1864 
1865 	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1866 			sizeof(struct fuse_inode), 0,
1867 			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
1868 			fuse_inode_init_once);
1869 	err = -ENOMEM;
1870 	if (!fuse_inode_cachep)
1871 		goto out;
1872 
1873 	err = register_fuseblk();
1874 	if (err)
1875 		goto out2;
1876 
1877 	err = register_filesystem(&fuse_fs_type);
1878 	if (err)
1879 		goto out3;
1880 
1881 	return 0;
1882 
1883  out3:
1884 	unregister_fuseblk();
1885  out2:
1886 	kmem_cache_destroy(fuse_inode_cachep);
1887  out:
1888 	return err;
1889 }
1890 
1891 static void fuse_fs_cleanup(void)
1892 {
1893 	unregister_filesystem(&fuse_fs_type);
1894 	unregister_fuseblk();
1895 
1896 	/*
1897 	 * Make sure all delayed rcu free inodes are flushed before we
1898 	 * destroy cache.
1899 	 */
1900 	rcu_barrier();
1901 	kmem_cache_destroy(fuse_inode_cachep);
1902 }
1903 
1904 static struct kobject *fuse_kobj;
1905 
1906 static int fuse_sysfs_init(void)
1907 {
1908 	int err;
1909 
1910 	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1911 	if (!fuse_kobj) {
1912 		err = -ENOMEM;
1913 		goto out_err;
1914 	}
1915 
1916 	err = sysfs_create_mount_point(fuse_kobj, "connections");
1917 	if (err)
1918 		goto out_fuse_unregister;
1919 
1920 	return 0;
1921 
1922  out_fuse_unregister:
1923 	kobject_put(fuse_kobj);
1924  out_err:
1925 	return err;
1926 }
1927 
1928 static void fuse_sysfs_cleanup(void)
1929 {
1930 	sysfs_remove_mount_point(fuse_kobj, "connections");
1931 	kobject_put(fuse_kobj);
1932 }
1933 
1934 static int __init fuse_init(void)
1935 {
1936 	int res;
1937 
1938 	pr_info("init (API version %i.%i)\n",
1939 		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
1940 
1941 	INIT_LIST_HEAD(&fuse_conn_list);
1942 	res = fuse_fs_init();
1943 	if (res)
1944 		goto err;
1945 
1946 	res = fuse_dev_init();
1947 	if (res)
1948 		goto err_fs_cleanup;
1949 
1950 	res = fuse_sysfs_init();
1951 	if (res)
1952 		goto err_dev_cleanup;
1953 
1954 	res = fuse_ctl_init();
1955 	if (res)
1956 		goto err_sysfs_cleanup;
1957 
1958 	sanitize_global_limit(&max_user_bgreq);
1959 	sanitize_global_limit(&max_user_congthresh);
1960 
1961 	return 0;
1962 
1963  err_sysfs_cleanup:
1964 	fuse_sysfs_cleanup();
1965  err_dev_cleanup:
1966 	fuse_dev_cleanup();
1967  err_fs_cleanup:
1968 	fuse_fs_cleanup();
1969  err:
1970 	return res;
1971 }
1972 
1973 static void __exit fuse_exit(void)
1974 {
1975 	pr_debug("exit\n");
1976 
1977 	fuse_ctl_cleanup();
1978 	fuse_sysfs_cleanup();
1979 	fuse_fs_cleanup();
1980 	fuse_dev_cleanup();
1981 }
1982 
1983 module_init(fuse_init);
1984 module_exit(fuse_exit);
1985