1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 FUSE: Filesystem in Userspace 4 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 5 */ 6 7 #include "dev.h" 8 #include "fuse_i.h" 9 10 #include <linux/dax.h> 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/seq_file.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs_context.h> 19 #include <linux/fs_parser.h> 20 #include <linux/statfs.h> 21 #include <linux/random.h> 22 #include <linux/sched.h> 23 #include <linux/exportfs.h> 24 #include <linux/posix_acl.h> 25 #include <linux/pid_namespace.h> 26 #include <uapi/linux/magic.h> 27 28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 29 MODULE_DESCRIPTION("Filesystem in Userspace"); 30 MODULE_LICENSE("GPL"); 31 32 static struct kmem_cache *fuse_inode_cachep; 33 struct list_head fuse_conn_list; 34 DEFINE_MUTEX(fuse_mutex); 35 36 static int set_global_limit(const char *val, const struct kernel_param *kp); 37 38 unsigned int fuse_max_pages_limit = 256; 39 /* default is no timeout */ 40 41 unsigned int max_user_bgreq; 42 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 43 &max_user_bgreq, 0644); 44 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 45 MODULE_PARM_DESC(max_user_bgreq, 46 "Global limit for the maximum number of backgrounded requests an " 47 "unprivileged user can set"); 48 49 unsigned int max_user_congthresh; 50 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 51 &max_user_congthresh, 0644); 52 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 53 MODULE_PARM_DESC(max_user_congthresh, 54 "Global limit for the maximum congestion threshold an " 55 "unprivileged user can set"); 56 57 #define FUSE_DEFAULT_BLKSIZE 512 58 59 /** Congestion starts at 75% of maximum */ 60 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 61 62 #ifdef CONFIG_BLOCK 63 static struct file_system_type fuseblk_fs_type; 64 #endif 65 66 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) 67 { 68 struct fuse_submount_lookup *sl; 69 70 sl = kzalloc_obj(struct fuse_submount_lookup, GFP_KERNEL_ACCOUNT); 71 if (!sl) 72 return NULL; 73 sl->forget = fuse_alloc_forget(); 74 if (!sl->forget) 75 goto out_free; 76 77 return sl; 78 79 out_free: 80 kfree(sl); 81 return NULL; 82 } 83 84 static struct inode *fuse_alloc_inode(struct super_block *sb) 85 { 86 struct fuse_inode *fi; 87 88 fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL); 89 if (!fi) 90 return NULL; 91 92 /* Initialize private data (i.e. everything except fi->inode) */ 93 BUILD_BUG_ON(offsetof(struct fuse_inode, inode) != 0); 94 memset((void *) fi + sizeof(fi->inode), 0, sizeof(*fi) - sizeof(fi->inode)); 95 96 fi->inval_mask = ~0; 97 mutex_init(&fi->mutex); 98 spin_lock_init(&fi->lock); 99 fi->forget = fuse_alloc_forget(); 100 if (!fi->forget) 101 goto out_free; 102 103 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 104 goto out_free_forget; 105 106 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 107 fuse_inode_backing_set(fi, NULL); 108 109 return &fi->inode; 110 111 out_free_forget: 112 kfree(fi->forget); 113 out_free: 114 kmem_cache_free(fuse_inode_cachep, fi); 115 return NULL; 116 } 117 118 static void fuse_free_inode(struct inode *inode) 119 { 120 struct fuse_inode *fi = get_fuse_inode(inode); 121 122 mutex_destroy(&fi->mutex); 123 kfree(fi->forget); 124 #ifdef CONFIG_FUSE_DAX 125 kfree(fi->dax); 126 #endif 127 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 128 fuse_backing_put(fuse_inode_backing(fi)); 129 130 kmem_cache_free(fuse_inode_cachep, fi); 131 } 132 133 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, 134 struct fuse_submount_lookup *sl) 135 { 136 if (!refcount_dec_and_test(&sl->count)) 137 return; 138 139 fuse_chan_queue_forget(fc->chan, sl->forget, sl->nodeid, 1); 140 sl->forget = NULL; 141 kfree(sl); 142 } 143 144 static void fuse_evict_inode(struct inode *inode) 145 { 146 struct fuse_inode *fi = get_fuse_inode(inode); 147 148 /* Will write inode on close/munmap and in all other dirtiers */ 149 WARN_ON(inode_state_read_once(inode) & I_DIRTY_INODE); 150 151 if (FUSE_IS_DAX(inode)) 152 dax_break_layout_final(inode); 153 154 truncate_inode_pages_final(&inode->i_data); 155 clear_inode(inode); 156 if (inode->i_sb->s_flags & SB_ACTIVE) { 157 struct fuse_conn *fc = get_fuse_conn(inode); 158 159 if (FUSE_IS_DAX(inode)) 160 fuse_dax_inode_cleanup(inode); 161 if (fi->nlookup) { 162 fuse_chan_queue_forget(fc->chan, fi->forget, fi->nodeid, 163 fi->nlookup); 164 fi->forget = NULL; 165 } 166 167 if (fi->submount_lookup) { 168 fuse_cleanup_submount_lookup(fc, fi->submount_lookup); 169 fi->submount_lookup = NULL; 170 } 171 /* 172 * Evict of non-deleted inode may race with outstanding 173 * LOOKUP/READDIRPLUS requests and result in inconsistency when 174 * the request finishes. Deal with that here by bumping a 175 * counter that can be compared to the starting value. 176 */ 177 if (inode->i_nlink > 0) 178 atomic64_inc(&fc->evict_ctr); 179 } 180 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 181 WARN_ON(fi->iocachectr != 0); 182 WARN_ON(!list_empty(&fi->write_files)); 183 WARN_ON(!list_empty(&fi->queued_writes)); 184 } 185 } 186 187 static int fuse_reconfigure(struct fs_context *fsc) 188 { 189 struct super_block *sb = fsc->root->d_sb; 190 191 sync_filesystem(sb); 192 if (fsc->sb_flags & SB_MANDLOCK) 193 return -EINVAL; 194 195 return 0; 196 } 197 198 /* 199 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 200 * so that it will fit. 201 */ 202 static ino_t fuse_squash_ino(u64 ino64) 203 { 204 ino_t ino = (ino_t) ino64; 205 if (sizeof(ino_t) < sizeof(u64)) 206 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 207 return ino; 208 } 209 210 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 211 struct fuse_statx *sx, 212 u64 attr_valid, u32 cache_mask, 213 u64 evict_ctr) 214 { 215 struct fuse_conn *fc = get_fuse_conn(inode); 216 struct fuse_inode *fi = get_fuse_inode(inode); 217 218 lockdep_assert_held(&fi->lock); 219 220 /* 221 * Clear basic stats from invalid mask. 222 * 223 * Don't do this if this is coming from a fuse_iget() call and there 224 * might have been a racing evict which would've invalidated the result 225 * if the attr_version would've been preserved. 226 * 227 * !evict_ctr -> this is create 228 * fi->attr_version != 0 -> this is not a new inode 229 * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request 230 */ 231 if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) 232 set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); 233 234 fi->attr_version = atomic64_inc_return(&fc->attr_version); 235 fi->i_time = attr_valid; 236 237 inode->i_ino = fuse_squash_ino(attr->ino); 238 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 239 set_nlink(inode, attr->nlink); 240 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 241 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 242 inode->i_blocks = attr->blocks; 243 244 /* Sanitize nsecs */ 245 attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); 246 attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); 247 attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); 248 249 inode_set_atime(inode, attr->atime, attr->atimensec); 250 /* mtime from server may be stale due to local buffered write */ 251 if (!(cache_mask & STATX_MTIME)) { 252 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 253 } 254 if (!(cache_mask & STATX_CTIME)) { 255 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 256 } 257 if (sx) { 258 /* Sanitize nsecs */ 259 sx->btime.tv_nsec = 260 min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); 261 262 /* 263 * Btime has been queried, cache is valid (whether or not btime 264 * is available or not) so clear STATX_BTIME from inval_mask. 265 * 266 * Availability of the btime attribute is indicated in 267 * FUSE_I_BTIME 268 */ 269 set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); 270 if (sx->mask & STATX_BTIME) { 271 set_bit(FUSE_I_BTIME, &fi->state); 272 fi->i_btime.tv_sec = sx->btime.tv_sec; 273 fi->i_btime.tv_nsec = sx->btime.tv_nsec; 274 } 275 } 276 277 if (attr->blksize) 278 fi->cached_i_blkbits = ilog2(attr->blksize); 279 else 280 fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; 281 282 /* 283 * Don't set the sticky bit in i_mode, unless we want the VFS 284 * to check permissions. This prevents failures due to the 285 * check in may_delete(). 286 */ 287 fi->orig_i_mode = inode->i_mode; 288 if (!fc->default_permissions) 289 inode->i_mode &= ~S_ISVTX; 290 291 fi->orig_ino = attr->ino; 292 293 /* 294 * We are refreshing inode data and it is possible that another 295 * client set suid/sgid or security.capability xattr. So clear 296 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid 297 * was set or if security.capability xattr was set. But we don't 298 * know if security.capability has been set or not. So clear it 299 * anyway. Its less efficient but should be safe. 300 */ 301 inode->i_flags &= ~S_NOSEC; 302 } 303 304 u32 fuse_get_cache_mask(struct inode *inode) 305 { 306 struct fuse_conn *fc = get_fuse_conn(inode); 307 308 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) 309 return 0; 310 311 return STATX_MTIME | STATX_CTIME | STATX_SIZE; 312 } 313 314 static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, 315 struct fuse_statx *sx, u64 attr_valid, 316 u64 attr_version, u64 evict_ctr) 317 { 318 struct fuse_conn *fc = get_fuse_conn(inode); 319 struct fuse_inode *fi = get_fuse_inode(inode); 320 u32 cache_mask; 321 loff_t oldsize; 322 struct timespec64 old_mtime; 323 324 spin_lock(&fi->lock); 325 /* 326 * In case of writeback_cache enabled, writes update mtime, ctime and 327 * may update i_size. In these cases trust the cached value in the 328 * inode. 329 */ 330 cache_mask = fuse_get_cache_mask(inode); 331 if (cache_mask & STATX_SIZE) 332 attr->size = i_size_read(inode); 333 334 if (cache_mask & STATX_MTIME) { 335 attr->mtime = inode_get_mtime_sec(inode); 336 attr->mtimensec = inode_get_mtime_nsec(inode); 337 } 338 if (cache_mask & STATX_CTIME) { 339 attr->ctime = inode_get_ctime_sec(inode); 340 attr->ctimensec = inode_get_ctime_nsec(inode); 341 } 342 343 if ((attr_version != 0 && fi->attr_version > attr_version) || 344 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 345 spin_unlock(&fi->lock); 346 return; 347 } 348 349 old_mtime = inode_get_mtime(inode); 350 fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, 351 evict_ctr); 352 353 oldsize = inode->i_size; 354 /* 355 * In case of writeback_cache enabled, the cached writes beyond EOF 356 * extend local i_size without keeping userspace server in sync. So, 357 * attr->size coming from server can be stale. We cannot trust it. 358 */ 359 if (!(cache_mask & STATX_SIZE)) 360 i_size_write(inode, attr->size); 361 spin_unlock(&fi->lock); 362 363 if (!cache_mask && S_ISREG(inode->i_mode)) { 364 bool inval = false; 365 366 if (oldsize != attr->size) { 367 truncate_pagecache(inode, attr->size); 368 if (!fc->explicit_inval_data) 369 inval = true; 370 } else if (fc->auto_inval_data) { 371 struct timespec64 new_mtime = { 372 .tv_sec = attr->mtime, 373 .tv_nsec = attr->mtimensec, 374 }; 375 376 /* 377 * Auto inval mode also checks and invalidates if mtime 378 * has changed. 379 */ 380 if (!timespec64_equal(&old_mtime, &new_mtime)) 381 inval = true; 382 } 383 384 if (inval) 385 invalidate_inode_pages2(inode->i_mapping); 386 } 387 388 if (IS_ENABLED(CONFIG_FUSE_DAX)) 389 fuse_dax_dontcache(inode, attr->flags); 390 } 391 392 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 393 struct fuse_statx *sx, u64 attr_valid, 394 u64 attr_version) 395 { 396 fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); 397 } 398 399 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, 400 u64 nodeid) 401 { 402 sl->nodeid = nodeid; 403 refcount_set(&sl->count, 1); 404 } 405 406 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, 407 struct fuse_conn *fc) 408 { 409 inode->i_mode = attr->mode & S_IFMT; 410 inode->i_size = attr->size; 411 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 412 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 413 if (S_ISREG(inode->i_mode)) { 414 fuse_init_common(inode); 415 fuse_init_file_inode(inode, attr->flags); 416 } else if (S_ISDIR(inode->i_mode)) 417 fuse_init_dir(inode); 418 else if (S_ISLNK(inode->i_mode)) 419 fuse_init_symlink(inode); 420 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 421 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 422 fuse_init_common(inode); 423 init_special_inode(inode, inode->i_mode, 424 new_decode_dev(attr->rdev)); 425 } else 426 BUG(); 427 /* 428 * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL 429 * so they see the exact same behavior as before. 430 */ 431 if (!fc->posix_acl) 432 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 433 } 434 435 static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 436 { 437 u64 nodeid = *(u64 *) _nodeidp; 438 if (get_node_id(inode) == nodeid) 439 return 1; 440 else 441 return 0; 442 } 443 444 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 445 { 446 u64 nodeid = *(u64 *) _nodeidp; 447 get_fuse_inode(inode)->nodeid = nodeid; 448 return 0; 449 } 450 451 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 452 int generation, struct fuse_attr *attr, 453 u64 attr_valid, u64 attr_version, 454 u64 evict_ctr) 455 { 456 struct inode *inode; 457 struct fuse_inode *fi; 458 struct fuse_conn *fc = get_fuse_conn_super(sb); 459 bool is_new_inode = false; 460 461 /* 462 * Auto mount points get their node id from the submount root, which is 463 * not a unique identifier within this filesystem. 464 * 465 * To avoid conflicts, do not place submount points into the inode hash 466 * table. 467 */ 468 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 469 S_ISDIR(attr->mode)) { 470 struct fuse_inode *fi; 471 472 inode = new_inode(sb); 473 if (!inode) 474 return NULL; 475 476 fuse_init_inode(inode, attr, fc); 477 fi = get_fuse_inode(inode); 478 fi->nodeid = nodeid; 479 fi->submount_lookup = fuse_alloc_submount_lookup(); 480 if (!fi->submount_lookup) { 481 iput(inode); 482 return NULL; 483 } 484 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ 485 fuse_init_submount_lookup(fi->submount_lookup, nodeid); 486 inode->i_flags |= S_AUTOMOUNT; 487 goto done; 488 } 489 490 retry: 491 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 492 if (!inode) 493 return NULL; 494 495 is_new_inode = inode_state_read_once(inode) & I_NEW; 496 if (is_new_inode) { 497 inode->i_flags |= S_NOATIME; 498 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 499 inode->i_flags |= S_NOCMTIME; 500 inode->i_generation = generation; 501 fuse_init_inode(inode, attr, fc); 502 } else if (fuse_stale_inode(inode, generation, attr)) { 503 /* nodeid was reused, any I/O on the old inode should fail */ 504 fuse_make_bad(inode); 505 if (inode != d_inode(sb->s_root)) { 506 remove_inode_hash(inode); 507 iput(inode); 508 goto retry; 509 } 510 } 511 fi = get_fuse_inode(inode); 512 spin_lock(&fi->lock); 513 fi->nlookup++; 514 spin_unlock(&fi->lock); 515 done: 516 fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, 517 evict_ctr); 518 if (is_new_inode) 519 unlock_new_inode(inode); 520 return inode; 521 } 522 523 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 524 struct fuse_mount **fm) 525 { 526 struct fuse_mount *fm_iter; 527 struct inode *inode; 528 529 WARN_ON(!rwsem_is_locked(&fc->killsb)); 530 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 531 if (!fm_iter->sb) 532 continue; 533 534 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 535 if (inode) { 536 if (fm) 537 *fm = fm_iter; 538 return inode; 539 } 540 } 541 542 return NULL; 543 } 544 545 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 546 loff_t offset, loff_t len) 547 { 548 struct fuse_inode *fi; 549 struct inode *inode; 550 pgoff_t pg_start; 551 pgoff_t pg_end; 552 553 inode = fuse_ilookup(fc, nodeid, NULL); 554 if (!inode) 555 return -ENOENT; 556 557 fi = get_fuse_inode(inode); 558 spin_lock(&fi->lock); 559 fi->attr_version = atomic64_inc_return(&fc->attr_version); 560 spin_unlock(&fi->lock); 561 562 fuse_invalidate_attr(inode); 563 forget_all_cached_acls(inode); 564 if (offset >= 0) { 565 pg_start = offset >> PAGE_SHIFT; 566 if (len <= 0) 567 pg_end = -1; 568 else 569 pg_end = (offset + len - 1) >> PAGE_SHIFT; 570 invalidate_inode_pages2_range(inode->i_mapping, 571 pg_start, pg_end); 572 } 573 iput(inode); 574 return 0; 575 } 576 577 void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid) 578 { 579 struct inode *inode; 580 581 inode = fuse_ilookup(fc, nodeid, NULL); 582 if (!inode) 583 return; 584 d_prune_aliases(inode); 585 iput(inode); 586 } 587 588 bool fuse_lock_inode(struct inode *inode) 589 { 590 bool locked = false; 591 592 if (!get_fuse_conn(inode)->parallel_dirops) { 593 mutex_lock(&get_fuse_inode(inode)->mutex); 594 locked = true; 595 } 596 597 return locked; 598 } 599 600 void fuse_unlock_inode(struct inode *inode, bool locked) 601 { 602 if (locked) 603 mutex_unlock(&get_fuse_inode(inode)->mutex); 604 } 605 606 static void fuse_umount_begin(struct super_block *sb) 607 { 608 struct fuse_conn *fc = get_fuse_conn_super(sb); 609 610 if (fc->no_force_umount) 611 return; 612 613 fuse_chan_abort(fc->chan, false); 614 615 // Only retire block-device-based superblocks. 616 if (sb->s_bdev != NULL) 617 retire_super(sb); 618 } 619 620 static void fuse_send_destroy(struct fuse_mount *fm) 621 { 622 if (fm->fc->conn_init) { 623 FUSE_ARGS(args); 624 625 args.opcode = FUSE_DESTROY; 626 args.force = true; 627 args.nocreds = true; 628 fuse_simple_request(fm, &args); 629 } 630 } 631 632 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 633 { 634 stbuf->f_type = FUSE_SUPER_MAGIC; 635 stbuf->f_bsize = attr->bsize; 636 stbuf->f_frsize = attr->frsize; 637 stbuf->f_blocks = attr->blocks; 638 stbuf->f_bfree = attr->bfree; 639 stbuf->f_bavail = attr->bavail; 640 stbuf->f_files = attr->files; 641 stbuf->f_ffree = attr->ffree; 642 stbuf->f_namelen = attr->namelen; 643 /* fsid is left zero */ 644 } 645 646 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 647 { 648 struct super_block *sb = dentry->d_sb; 649 struct fuse_mount *fm = get_fuse_mount_super(sb); 650 FUSE_ARGS(args); 651 struct fuse_statfs_out outarg; 652 int err; 653 654 if (!fuse_allow_current_process(fm->fc)) { 655 buf->f_type = FUSE_SUPER_MAGIC; 656 return 0; 657 } 658 659 memset(&outarg, 0, sizeof(outarg)); 660 args.in_numargs = 0; 661 args.opcode = FUSE_STATFS; 662 args.nodeid = get_node_id(d_inode(dentry)); 663 args.out_numargs = 1; 664 args.out_args[0].size = sizeof(outarg); 665 args.out_args[0].value = &outarg; 666 err = fuse_simple_request(fm, &args); 667 if (!err) 668 convert_fuse_statfs(buf, &outarg.st); 669 return err; 670 } 671 672 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) 673 { 674 struct fuse_sync_bucket *bucket; 675 676 bucket = kzalloc_obj(*bucket, GFP_KERNEL | __GFP_NOFAIL); 677 init_waitqueue_head(&bucket->waitq); 678 /* Initial active count */ 679 atomic_set(&bucket->count, 1); 680 return bucket; 681 } 682 683 static void fuse_sync_fs_writes(struct fuse_conn *fc) 684 { 685 struct fuse_sync_bucket *bucket, *new_bucket; 686 int count; 687 688 new_bucket = fuse_sync_bucket_alloc(); 689 spin_lock(&fc->lock); 690 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 691 count = atomic_read(&bucket->count); 692 WARN_ON(count < 1); 693 /* No outstanding writes? */ 694 if (count == 1) { 695 spin_unlock(&fc->lock); 696 kfree(new_bucket); 697 return; 698 } 699 700 /* 701 * Completion of new bucket depends on completion of this bucket, so add 702 * one more count. 703 */ 704 atomic_inc(&new_bucket->count); 705 rcu_assign_pointer(fc->curr_bucket, new_bucket); 706 spin_unlock(&fc->lock); 707 /* 708 * Drop initial active count. At this point if all writes in this and 709 * ancestor buckets complete, the count will go to zero and this task 710 * will be woken up. 711 */ 712 atomic_dec(&bucket->count); 713 714 wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); 715 716 /* Drop temp count on descendant bucket */ 717 fuse_sync_bucket_dec(new_bucket); 718 kfree_rcu(bucket, rcu); 719 } 720 721 static int fuse_sync_fs(struct super_block *sb, int wait) 722 { 723 struct fuse_mount *fm = get_fuse_mount_super(sb); 724 struct fuse_conn *fc = fm->fc; 725 struct fuse_syncfs_in inarg; 726 FUSE_ARGS(args); 727 int err; 728 729 /* 730 * Userspace cannot handle the wait == 0 case. Avoid a 731 * gratuitous roundtrip. 732 */ 733 if (!wait) 734 return 0; 735 736 /* The filesystem is being unmounted. Nothing to do. */ 737 if (!sb->s_root) 738 return 0; 739 740 if (!fc->sync_fs) 741 return 0; 742 743 fuse_sync_fs_writes(fc); 744 745 memset(&inarg, 0, sizeof(inarg)); 746 args.in_numargs = 1; 747 args.in_args[0].size = sizeof(inarg); 748 args.in_args[0].value = &inarg; 749 args.opcode = FUSE_SYNCFS; 750 args.nodeid = get_node_id(sb->s_root->d_inode); 751 args.out_numargs = 0; 752 753 err = fuse_simple_request(fm, &args); 754 if (err == -ENOSYS) { 755 fc->sync_fs = 0; 756 err = 0; 757 } 758 759 return err; 760 } 761 762 enum { 763 OPT_SOURCE, 764 OPT_SUBTYPE, 765 OPT_FD, 766 OPT_ROOTMODE, 767 OPT_USER_ID, 768 OPT_GROUP_ID, 769 OPT_DEFAULT_PERMISSIONS, 770 OPT_ALLOW_OTHER, 771 OPT_MAX_READ, 772 OPT_BLKSIZE, 773 OPT_ERR 774 }; 775 776 static const struct fs_parameter_spec fuse_fs_parameters[] = { 777 fsparam_string ("source", OPT_SOURCE), 778 fsparam_fd ("fd", OPT_FD), 779 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 780 fsparam_uid ("user_id", OPT_USER_ID), 781 fsparam_gid ("group_id", OPT_GROUP_ID), 782 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 783 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 784 fsparam_u32 ("max_read", OPT_MAX_READ), 785 fsparam_u32 ("blksize", OPT_BLKSIZE), 786 fsparam_string ("subtype", OPT_SUBTYPE), 787 {} 788 }; 789 790 static int fuse_opt_fd(struct fs_context *fsc, struct file *file) 791 { 792 struct fuse_fs_context *ctx = fsc->fs_private; 793 794 if (file->f_op != &fuse_dev_operations) 795 return invalfc(fsc, "fd is not a fuse device"); 796 /* 797 * Require mount to happen from the same user namespace which 798 * opened /dev/fuse to prevent potential attacks. 799 */ 800 if (file->f_cred->user_ns != fsc->user_ns) 801 return invalfc(fsc, "wrong user namespace for fuse device"); 802 803 ctx->fud = fuse_dev_grab(file); 804 805 return 0; 806 } 807 808 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) 809 { 810 struct fs_parse_result result; 811 struct fuse_fs_context *ctx = fsc->fs_private; 812 int opt; 813 kuid_t kuid; 814 kgid_t kgid; 815 816 if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 817 /* 818 * Ignore options coming from mount(MS_REMOUNT) for backward 819 * compatibility. 820 */ 821 if (fsc->oldapi) 822 return 0; 823 824 return invalfc(fsc, "No changes allowed in reconfigure"); 825 } 826 827 opt = fs_parse(fsc, fuse_fs_parameters, param, &result); 828 if (opt < 0) 829 return opt; 830 831 switch (opt) { 832 case OPT_SOURCE: 833 if (fsc->source) 834 return invalfc(fsc, "Multiple sources specified"); 835 fsc->source = param->string; 836 param->string = NULL; 837 break; 838 839 case OPT_SUBTYPE: 840 if (ctx->subtype) 841 return invalfc(fsc, "Multiple subtypes specified"); 842 ctx->subtype = param->string; 843 param->string = NULL; 844 return 0; 845 846 case OPT_FD: 847 if (param->type == fs_value_is_file) { 848 return fuse_opt_fd(fsc, param->file); 849 } else { 850 struct file *file __free(fput) = fget(result.uint_32); 851 if (!file) 852 return -EBADF; 853 854 return fuse_opt_fd(fsc, file); 855 } 856 857 case OPT_ROOTMODE: 858 if (!fuse_valid_type(result.uint_32)) 859 return invalfc(fsc, "Invalid rootmode"); 860 ctx->rootmode = result.uint_32; 861 ctx->rootmode_present = true; 862 break; 863 864 case OPT_USER_ID: 865 kuid = result.uid; 866 /* 867 * The requested uid must be representable in the 868 * filesystem's idmapping. 869 */ 870 if (!kuid_has_mapping(fsc->user_ns, kuid)) 871 return invalfc(fsc, "Invalid user_id"); 872 ctx->user_id = kuid; 873 ctx->user_id_present = true; 874 break; 875 876 case OPT_GROUP_ID: 877 kgid = result.gid; 878 /* 879 * The requested gid must be representable in the 880 * filesystem's idmapping. 881 */ 882 if (!kgid_has_mapping(fsc->user_ns, kgid)) 883 return invalfc(fsc, "Invalid group_id"); 884 ctx->group_id = kgid; 885 ctx->group_id_present = true; 886 break; 887 888 case OPT_DEFAULT_PERMISSIONS: 889 ctx->default_permissions = true; 890 break; 891 892 case OPT_ALLOW_OTHER: 893 ctx->allow_other = true; 894 break; 895 896 case OPT_MAX_READ: 897 ctx->max_read = result.uint_32; 898 break; 899 900 case OPT_BLKSIZE: 901 if (!ctx->is_bdev) 902 return invalfc(fsc, "blksize only supported for fuseblk"); 903 ctx->blksize = result.uint_32; 904 break; 905 906 default: 907 return -EINVAL; 908 } 909 910 return 0; 911 } 912 913 static void fuse_free_fsc(struct fs_context *fsc) 914 { 915 struct fuse_fs_context *ctx = fsc->fs_private; 916 917 if (ctx) { 918 if (ctx->fud) 919 fuse_dev_put(ctx->fud); 920 kfree(ctx->subtype); 921 kfree(ctx); 922 } 923 } 924 925 static int fuse_show_options(struct seq_file *m, struct dentry *root) 926 { 927 struct super_block *sb = root->d_sb; 928 struct fuse_conn *fc = get_fuse_conn_super(sb); 929 930 if (fc->legacy_opts_show) { 931 seq_printf(m, ",user_id=%u", 932 from_kuid_munged(fc->user_ns, fc->user_id)); 933 seq_printf(m, ",group_id=%u", 934 from_kgid_munged(fc->user_ns, fc->group_id)); 935 if (fc->default_permissions) 936 seq_puts(m, ",default_permissions"); 937 if (fc->allow_other) 938 seq_puts(m, ",allow_other"); 939 if (fc->max_read != ~0) 940 seq_printf(m, ",max_read=%u", fc->max_read); 941 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 942 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 943 } 944 #ifdef CONFIG_FUSE_DAX 945 if (fc->dax_mode == FUSE_DAX_ALWAYS) 946 seq_puts(m, ",dax=always"); 947 else if (fc->dax_mode == FUSE_DAX_NEVER) 948 seq_puts(m, ",dax=never"); 949 else if (fc->dax_mode == FUSE_DAX_INODE_USER) 950 seq_puts(m, ",dax=inode"); 951 #endif 952 953 return 0; 954 } 955 956 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 957 struct user_namespace *user_ns, struct fuse_chan *fch) 958 { 959 memset(fc, 0, sizeof(*fc)); 960 spin_lock_init(&fc->lock); 961 init_rwsem(&fc->killsb); 962 refcount_set(&fc->count, 1); 963 atomic_set(&fc->epoch, 1); 964 INIT_WORK(&fc->epoch_work, fuse_epoch_work); 965 INIT_LIST_HEAD(&fc->entry); 966 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 967 atomic64_set(&fc->khctr, 0); 968 fc->polled_files = RB_ROOT; 969 atomic64_set(&fc->attr_version, 1); 970 atomic64_set(&fc->evict_ctr, 1); 971 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 972 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 973 fc->user_ns = get_user_ns(user_ns); 974 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 975 fc->max_pages_limit = fuse_max_pages_limit; 976 fc->name_max = FUSE_NAME_LOW_MAX; 977 978 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 979 fuse_backing_files_init(fc); 980 981 INIT_LIST_HEAD(&fc->mounts); 982 list_add(&fm->fc_entry, &fc->mounts); 983 fm->fc = fc; 984 fuse_chan_set_fc(fch, fc); 985 fc->chan = fch; 986 } 987 EXPORT_SYMBOL_GPL(fuse_conn_init); 988 989 static void delayed_release(struct rcu_head *p) 990 { 991 struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); 992 993 fuse_uring_destruct(fc->chan); 994 fuse_chan_free(fc->chan); 995 996 put_user_ns(fc->user_ns); 997 fc->release(fc); 998 } 999 1000 void fuse_conn_put(struct fuse_conn *fc) 1001 { 1002 struct fuse_sync_bucket *bucket; 1003 1004 if (!refcount_dec_and_test(&fc->count)) 1005 return; 1006 1007 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1008 fuse_dax_conn_free(fc); 1009 cancel_work_sync(&fc->epoch_work); 1010 fuse_chan_release(fc->chan); 1011 put_pid_ns(fc->pid_ns); 1012 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 1013 if (bucket) { 1014 WARN_ON(atomic_read(&bucket->count) != 1); 1015 kfree(bucket); 1016 } 1017 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1018 fuse_backing_files_free(fc); 1019 call_rcu(&fc->rcu, delayed_release); 1020 } 1021 EXPORT_SYMBOL_GPL(fuse_conn_put); 1022 1023 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 1024 { 1025 refcount_inc(&fc->count); 1026 return fc; 1027 } 1028 EXPORT_SYMBOL_GPL(fuse_conn_get); 1029 1030 dev_t fuse_conn_get_id(struct fuse_conn *fc) 1031 { 1032 return fc->dev; 1033 } 1034 1035 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode) 1036 { 1037 struct fuse_attr attr; 1038 memset(&attr, 0, sizeof(attr)); 1039 1040 attr.mode = mode; 1041 attr.ino = FUSE_ROOT_ID; 1042 attr.nlink = 1; 1043 return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); 1044 } 1045 1046 struct fuse_inode_handle { 1047 u64 nodeid; 1048 u32 generation; 1049 }; 1050 1051 static struct dentry *fuse_get_dentry(struct super_block *sb, 1052 struct fuse_inode_handle *handle) 1053 { 1054 struct fuse_conn *fc = get_fuse_conn_super(sb); 1055 struct inode *inode; 1056 struct dentry *entry; 1057 int err = -ESTALE; 1058 1059 if (handle->nodeid == 0) 1060 goto out_err; 1061 1062 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 1063 if (!inode) { 1064 struct fuse_entry_out outarg; 1065 1066 if (!fc->export_support) 1067 goto out_err; 1068 1069 err = fuse_lookup_name(sb, handle->nodeid, &QSTR("."), &outarg, 1070 &inode); 1071 if (err && err != -ENOENT) 1072 goto out_err; 1073 if (err || !inode) { 1074 err = -ESTALE; 1075 goto out_err; 1076 } 1077 err = -EIO; 1078 if (get_node_id(inode) != handle->nodeid) 1079 goto out_iput; 1080 } 1081 err = -ESTALE; 1082 if (inode->i_generation != handle->generation) 1083 goto out_iput; 1084 1085 entry = d_obtain_alias(inode); 1086 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 1087 fuse_invalidate_entry_cache(entry); 1088 1089 return entry; 1090 1091 out_iput: 1092 iput(inode); 1093 out_err: 1094 return ERR_PTR(err); 1095 } 1096 1097 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 1098 struct inode *parent) 1099 { 1100 int len = parent ? 6 : 3; 1101 u64 nodeid; 1102 u32 generation; 1103 1104 if (*max_len < len) { 1105 *max_len = len; 1106 return FILEID_INVALID; 1107 } 1108 1109 nodeid = get_fuse_inode(inode)->nodeid; 1110 generation = inode->i_generation; 1111 1112 fh[0] = (u32)(nodeid >> 32); 1113 fh[1] = (u32)(nodeid & 0xffffffff); 1114 fh[2] = generation; 1115 1116 if (parent) { 1117 nodeid = get_fuse_inode(parent)->nodeid; 1118 generation = parent->i_generation; 1119 1120 fh[3] = (u32)(nodeid >> 32); 1121 fh[4] = (u32)(nodeid & 0xffffffff); 1122 fh[5] = generation; 1123 } 1124 1125 *max_len = len; 1126 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; 1127 } 1128 1129 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 1130 struct fid *fid, int fh_len, int fh_type) 1131 { 1132 struct fuse_inode_handle handle; 1133 1134 if ((fh_type != FILEID_INO64_GEN && 1135 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) 1136 return NULL; 1137 1138 handle.nodeid = (u64) fid->raw[0] << 32; 1139 handle.nodeid |= (u64) fid->raw[1]; 1140 handle.generation = fid->raw[2]; 1141 return fuse_get_dentry(sb, &handle); 1142 } 1143 1144 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 1145 struct fid *fid, int fh_len, int fh_type) 1146 { 1147 struct fuse_inode_handle parent; 1148 1149 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) 1150 return NULL; 1151 1152 parent.nodeid = (u64) fid->raw[3] << 32; 1153 parent.nodeid |= (u64) fid->raw[4]; 1154 parent.generation = fid->raw[5]; 1155 return fuse_get_dentry(sb, &parent); 1156 } 1157 1158 static struct dentry *fuse_get_parent(struct dentry *child) 1159 { 1160 struct inode *child_inode = d_inode(child); 1161 struct fuse_conn *fc = get_fuse_conn(child_inode); 1162 struct inode *inode; 1163 struct dentry *parent; 1164 struct fuse_entry_out outarg; 1165 int err; 1166 1167 if (!fc->export_support) 1168 return ERR_PTR(-ESTALE); 1169 1170 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 1171 &dotdot_name, &outarg, &inode); 1172 if (err) { 1173 if (err == -ENOENT) 1174 return ERR_PTR(-ESTALE); 1175 return ERR_PTR(err); 1176 } 1177 1178 parent = d_obtain_alias(inode); 1179 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 1180 fuse_invalidate_entry_cache(parent); 1181 1182 return parent; 1183 } 1184 1185 /* only for fid encoding; no support for file handle */ 1186 static const struct export_operations fuse_export_fid_operations = { 1187 .encode_fh = fuse_encode_fh, 1188 }; 1189 1190 static const struct export_operations fuse_export_operations = { 1191 .fh_to_dentry = fuse_fh_to_dentry, 1192 .fh_to_parent = fuse_fh_to_parent, 1193 .encode_fh = fuse_encode_fh, 1194 .get_parent = fuse_get_parent, 1195 }; 1196 1197 static const struct super_operations fuse_super_operations = { 1198 .alloc_inode = fuse_alloc_inode, 1199 .free_inode = fuse_free_inode, 1200 .evict_inode = fuse_evict_inode, 1201 .write_inode = fuse_write_inode, 1202 .drop_inode = inode_just_drop, 1203 .umount_begin = fuse_umount_begin, 1204 .statfs = fuse_statfs, 1205 .sync_fs = fuse_sync_fs, 1206 .show_options = fuse_show_options, 1207 }; 1208 1209 static void sanitize_global_limit(unsigned int *limit) 1210 { 1211 /* 1212 * The default maximum number of async requests is calculated to consume 1213 * 1/2^13 of the total memory, assuming 392 bytes per request. 1214 */ 1215 if (*limit == 0) 1216 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 1217 1218 if (*limit >= 1 << 16) 1219 *limit = (1 << 16) - 1; 1220 } 1221 1222 static int set_global_limit(const char *val, const struct kernel_param *kp) 1223 { 1224 int rv; 1225 1226 rv = param_set_uint(val, kp); 1227 if (rv) 1228 return rv; 1229 1230 sanitize_global_limit((unsigned int *)kp->arg); 1231 1232 return 0; 1233 } 1234 1235 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 1236 { 1237 int cap_sys_admin = capable(CAP_SYS_ADMIN); 1238 1239 if (arg->minor < 13) 1240 return; 1241 1242 sanitize_global_limit(&max_user_bgreq); 1243 sanitize_global_limit(&max_user_congthresh); 1244 1245 if (arg->max_background) { 1246 unsigned int max_background = arg->max_background; 1247 1248 if (!cap_sys_admin && max_background > max_user_bgreq) 1249 max_background = max_user_bgreq; 1250 1251 fuse_chan_max_background_set(fc->chan, max_background); 1252 } 1253 if (arg->congestion_threshold) { 1254 fc->congestion_threshold = arg->congestion_threshold; 1255 1256 if (!cap_sys_admin && 1257 fc->congestion_threshold > max_user_congthresh) 1258 fc->congestion_threshold = max_user_congthresh; 1259 } 1260 } 1261 1262 struct fuse_init_args { 1263 struct fuse_args args; 1264 struct fuse_init_in in; 1265 struct fuse_init_out out; 1266 struct fuse_mount *fm; 1267 }; 1268 1269 static void process_init_reply(struct fuse_args *args, int error) 1270 { 1271 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 1272 struct fuse_mount *fm = ia->fm; 1273 struct fuse_conn *fc = fm->fc; 1274 struct fuse_init_out *arg = &ia->out; 1275 bool ok = true; 1276 1277 if (error || arg->major != FUSE_KERNEL_VERSION) 1278 ok = false; 1279 else { 1280 unsigned long ra_pages; 1281 unsigned int timeout = 0; 1282 1283 process_init_limits(fc, arg); 1284 1285 if (arg->minor >= 6) { 1286 u64 flags = arg->flags; 1287 1288 if (flags & FUSE_INIT_EXT) 1289 flags |= (u64) arg->flags2 << 32; 1290 1291 ra_pages = arg->max_readahead / PAGE_SIZE; 1292 if (flags & FUSE_ASYNC_READ) 1293 fc->async_read = 1; 1294 if (!(flags & FUSE_POSIX_LOCKS)) 1295 fc->no_lock = 1; 1296 if (arg->minor >= 17) { 1297 if (!(flags & FUSE_FLOCK_LOCKS)) 1298 fc->no_flock = 1; 1299 } else { 1300 if (!(flags & FUSE_POSIX_LOCKS)) 1301 fc->no_flock = 1; 1302 } 1303 if (flags & FUSE_ATOMIC_O_TRUNC) 1304 fc->atomic_o_trunc = 1; 1305 if (arg->minor >= 9) { 1306 /* LOOKUP has dependency on proto version */ 1307 if (flags & FUSE_EXPORT_SUPPORT) 1308 fc->export_support = 1; 1309 } 1310 if (flags & FUSE_BIG_WRITES) 1311 fc->big_writes = 1; 1312 if (flags & FUSE_DONT_MASK) 1313 fc->dont_mask = 1; 1314 if (flags & FUSE_AUTO_INVAL_DATA) 1315 fc->auto_inval_data = 1; 1316 else if (flags & FUSE_EXPLICIT_INVAL_DATA) 1317 fc->explicit_inval_data = 1; 1318 if (flags & FUSE_DO_READDIRPLUS) { 1319 fc->do_readdirplus = 1; 1320 if (flags & FUSE_READDIRPLUS_AUTO) 1321 fc->readdirplus_auto = 1; 1322 } 1323 if (flags & FUSE_ASYNC_DIO) 1324 fc->async_dio = 1; 1325 if (flags & FUSE_WRITEBACK_CACHE) 1326 fc->writeback_cache = 1; 1327 if (flags & FUSE_PARALLEL_DIROPS) 1328 fc->parallel_dirops = 1; 1329 if (flags & FUSE_HANDLE_KILLPRIV) 1330 fc->handle_killpriv = 1; 1331 if (arg->time_gran && arg->time_gran <= 1000000000) 1332 fm->sb->s_time_gran = arg->time_gran; 1333 if ((flags & FUSE_POSIX_ACL)) { 1334 fc->default_permissions = 1; 1335 fc->posix_acl = 1; 1336 } 1337 if (flags & FUSE_CACHE_SYMLINKS) 1338 fc->cache_symlinks = 1; 1339 if (flags & FUSE_ABORT_ERROR) 1340 fc->abort_err = 1; 1341 if (flags & FUSE_MAX_PAGES) { 1342 fc->max_pages = 1343 min_t(unsigned int, fc->max_pages_limit, 1344 max_t(unsigned int, arg->max_pages, 1)); 1345 1346 /* 1347 * PATH_MAX file names might need two pages for 1348 * ops like rename 1349 */ 1350 if (fc->max_pages > 1) 1351 fc->name_max = FUSE_NAME_MAX; 1352 } 1353 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1354 if (flags & FUSE_MAP_ALIGNMENT && 1355 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1356 ok = false; 1357 } 1358 if (flags & FUSE_HAS_INODE_DAX) 1359 fc->inode_dax = 1; 1360 } 1361 if (flags & FUSE_HANDLE_KILLPRIV_V2) { 1362 fc->handle_killpriv_v2 = 1; 1363 fm->sb->s_flags |= SB_NOSEC; 1364 } 1365 if (flags & FUSE_SETXATTR_EXT) 1366 fc->setxattr_ext = 1; 1367 if (flags & FUSE_SECURITY_CTX) 1368 fc->init_security = 1; 1369 if (flags & FUSE_CREATE_SUPP_GROUP) 1370 fc->create_supp_group = 1; 1371 if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) 1372 fc->direct_io_allow_mmap = 1; 1373 /* 1374 * max_stack_depth is the max stack depth of FUSE fs, 1375 * so it has to be at least 1 to support passthrough 1376 * to backing files. 1377 * 1378 * with max_stack_depth > 1, the backing files can be 1379 * on a stacked fs (e.g. overlayfs) themselves and with 1380 * max_stack_depth == 1, FUSE fs can be stacked as the 1381 * underlying fs of a stacked fs (e.g. overlayfs). 1382 * 1383 * Also don't allow the combination of FUSE_PASSTHROUGH 1384 * and FUSE_WRITEBACK_CACHE, current design doesn't handle 1385 * them together. 1386 */ 1387 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && 1388 (flags & FUSE_PASSTHROUGH) && 1389 arg->max_stack_depth > 0 && 1390 arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH && 1391 !(flags & FUSE_WRITEBACK_CACHE)) { 1392 fc->passthrough = 1; 1393 fc->max_stack_depth = arg->max_stack_depth; 1394 fm->sb->s_stack_depth = arg->max_stack_depth; 1395 } 1396 if (flags & FUSE_NO_EXPORT_SUPPORT) 1397 fm->sb->s_export_op = &fuse_export_fid_operations; 1398 if (flags & FUSE_ALLOW_IDMAP) { 1399 if (fc->default_permissions) 1400 fm->sb->s_iflags &= ~SB_I_NOIDMAP; 1401 else 1402 ok = false; 1403 } 1404 if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) 1405 fuse_chan_io_uring_enable(fc->chan); 1406 1407 if (flags & FUSE_REQUEST_TIMEOUT) 1408 timeout = arg->request_timeout; 1409 } else { 1410 ra_pages = fc->max_read / PAGE_SIZE; 1411 fc->no_lock = 1; 1412 fc->no_flock = 1; 1413 } 1414 1415 fuse_init_server_timeout(fc->chan, timeout); 1416 1417 fm->sb->s_bdi->ra_pages = 1418 min(fm->sb->s_bdi->ra_pages, ra_pages); 1419 fc->minor = arg->minor; 1420 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1421 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1422 fc->conn_init = 1; 1423 } 1424 kfree(ia); 1425 1426 if (!ok) { 1427 fc->conn_init = 0; 1428 fc->conn_error = 1; 1429 fuse_chan_set_initialized(fc->chan, NULL); 1430 } else { 1431 struct fuse_chan_param cp = { 1432 .minor = fc->minor, 1433 .max_write = fc->max_write, 1434 .max_pages = fc->max_pages, 1435 }; 1436 fuse_chan_set_initialized(fc->chan, &cp); 1437 } 1438 } 1439 1440 static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) 1441 { 1442 struct fuse_init_args *ia; 1443 u64 flags; 1444 1445 ia = kzalloc_obj(*ia, GFP_KERNEL | __GFP_NOFAIL); 1446 1447 ia->fm = fm; 1448 ia->in.major = FUSE_KERNEL_VERSION; 1449 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1450 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1451 flags = 1452 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1453 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1454 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1455 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1456 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1457 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1458 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1459 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1460 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | 1461 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | 1462 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | 1463 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | 1464 FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP | 1465 FUSE_REQUEST_TIMEOUT; 1466 #ifdef CONFIG_FUSE_DAX 1467 if (fm->fc->dax) 1468 flags |= FUSE_MAP_ALIGNMENT; 1469 if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) 1470 flags |= FUSE_HAS_INODE_DAX; 1471 #endif 1472 if (fm->fc->auto_submounts) 1473 flags |= FUSE_SUBMOUNTS; 1474 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1475 flags |= FUSE_PASSTHROUGH; 1476 1477 /* 1478 * This is just an information flag for fuse server. No need to check 1479 * the reply - server is either sending IORING_OP_URING_CMD or not. 1480 */ 1481 if (fuse_uring_enabled()) 1482 flags |= FUSE_OVER_IO_URING; 1483 1484 ia->in.flags = flags; 1485 ia->in.flags2 = flags >> 32; 1486 1487 ia->args.opcode = FUSE_INIT; 1488 ia->args.in_numargs = 1; 1489 ia->args.in_args[0].size = sizeof(ia->in); 1490 ia->args.in_args[0].value = &ia->in; 1491 ia->args.out_numargs = 1; 1492 /* Variable length argument used for backward compatibility 1493 with interface version < 7.5. Rest of init_out is zeroed 1494 by do_get_request(), so a short reply is not a problem */ 1495 ia->args.out_argvar = true; 1496 ia->args.out_args[0].size = sizeof(ia->out); 1497 ia->args.out_args[0].value = &ia->out; 1498 ia->args.force = true; 1499 ia->args.nocreds = true; 1500 1501 return ia; 1502 } 1503 1504 int fuse_send_init(struct fuse_mount *fm) 1505 { 1506 struct fuse_init_args *ia = fuse_new_init(fm); 1507 int err; 1508 1509 if (fm->fc->sync_init) { 1510 ia->args.abort_on_kill = true; 1511 err = fuse_simple_request(fm, &ia->args); 1512 /* Ignore size of init reply */ 1513 if (err > 0) 1514 err = 0; 1515 } else { 1516 ia->args.end = process_init_reply; 1517 err = fuse_simple_background(fm, &ia->args, GFP_KERNEL); 1518 if (!err) 1519 return 0; 1520 } 1521 process_init_reply(&ia->args, err); 1522 if (fm->fc->conn_error) 1523 return -ENOTCONN; 1524 return 0; 1525 } 1526 EXPORT_SYMBOL_GPL(fuse_send_init); 1527 1528 void fuse_free_conn(struct fuse_conn *fc) 1529 { 1530 kfree(fc); 1531 } 1532 EXPORT_SYMBOL_GPL(fuse_free_conn); 1533 1534 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1535 { 1536 int err; 1537 char *suffix = ""; 1538 1539 if (sb->s_bdev) { 1540 suffix = "-fuseblk"; 1541 /* 1542 * sb->s_bdi points to blkdev's bdi however we want to redirect 1543 * it to our private bdi... 1544 */ 1545 bdi_put(sb->s_bdi); 1546 sb->s_bdi = &noop_backing_dev_info; 1547 } 1548 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1549 MINOR(fc->dev), suffix); 1550 if (err) 1551 return err; 1552 1553 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1554 1555 /* 1556 * For a single fuse filesystem use max 1% of dirty + 1557 * writeback threshold. 1558 * 1559 * This gives about 1M of write buffer for memory maps on a 1560 * machine with 1G and 10% dirty_ratio, which should be more 1561 * than enough. 1562 * 1563 * Privileged users can raise it by writing to 1564 * 1565 * /sys/class/bdi/<bdi>/max_ratio 1566 */ 1567 bdi_set_max_ratio(sb->s_bdi, 1); 1568 1569 return 0; 1570 } 1571 1572 static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1573 const struct fuse_inode *fi) 1574 { 1575 struct timespec64 atime = inode_get_atime(&fi->inode); 1576 struct timespec64 mtime = inode_get_mtime(&fi->inode); 1577 struct timespec64 ctime = inode_get_ctime(&fi->inode); 1578 1579 *attr = (struct fuse_attr){ 1580 .ino = fi->inode.i_ino, 1581 .size = fi->inode.i_size, 1582 .blocks = fi->inode.i_blocks, 1583 .atime = atime.tv_sec, 1584 .mtime = mtime.tv_sec, 1585 .ctime = ctime.tv_sec, 1586 .atimensec = atime.tv_nsec, 1587 .mtimensec = mtime.tv_nsec, 1588 .ctimensec = ctime.tv_nsec, 1589 .mode = fi->inode.i_mode, 1590 .nlink = fi->inode.i_nlink, 1591 .uid = __kuid_val(fi->inode.i_uid), 1592 .gid = __kgid_val(fi->inode.i_gid), 1593 .rdev = fi->inode.i_rdev, 1594 .blksize = 1u << fi->inode.i_blkbits, 1595 }; 1596 } 1597 1598 static void fuse_sb_defaults(struct super_block *sb) 1599 { 1600 sb->s_magic = FUSE_SUPER_MAGIC; 1601 sb->s_op = &fuse_super_operations; 1602 sb->s_xattr = fuse_xattr_handlers; 1603 sb->s_maxbytes = MAX_LFS_FILESIZE; 1604 sb->s_time_gran = 1; 1605 sb->s_export_op = &fuse_export_operations; 1606 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1607 sb->s_iflags |= SB_I_NOIDMAP; 1608 sb->s_iflags |= SB_I_NO_DATA_INTEGRITY; 1609 if (sb->s_user_ns != &init_user_ns) 1610 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1611 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1612 } 1613 1614 static int fuse_fill_super_submount(struct super_block *sb, 1615 struct fuse_inode *parent_fi) 1616 { 1617 struct fuse_mount *fm = get_fuse_mount_super(sb); 1618 struct super_block *parent_sb = parent_fi->inode.i_sb; 1619 struct fuse_attr root_attr; 1620 struct inode *root; 1621 struct fuse_submount_lookup *sl; 1622 struct fuse_inode *fi; 1623 1624 fuse_sb_defaults(sb); 1625 fm->sb = sb; 1626 1627 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1628 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1629 1630 sb->s_xattr = parent_sb->s_xattr; 1631 sb->s_export_op = parent_sb->s_export_op; 1632 sb->s_time_gran = parent_sb->s_time_gran; 1633 sb->s_blocksize = parent_sb->s_blocksize; 1634 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1635 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1636 if (parent_sb->s_subtype && !sb->s_subtype) 1637 return -ENOMEM; 1638 1639 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1640 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, 1641 fuse_get_evict_ctr(fm->fc)); 1642 /* 1643 * This inode is just a duplicate, so it is not looked up and 1644 * its nlookup should not be incremented. fuse_iget() does 1645 * that, though, so undo it here. 1646 */ 1647 fi = get_fuse_inode(root); 1648 fi->nlookup--; 1649 1650 set_default_d_op(sb, &fuse_dentry_operations); 1651 sb->s_root = d_make_root(root); 1652 if (!sb->s_root) 1653 return -ENOMEM; 1654 1655 /* 1656 * Grab the parent's submount_lookup pointer and take a 1657 * reference on the shared nlookup from the parent. This is to 1658 * prevent the last forget for this nodeid from getting 1659 * triggered until all users have finished with it. 1660 */ 1661 sl = parent_fi->submount_lookup; 1662 WARN_ON(!sl); 1663 if (sl) { 1664 refcount_inc(&sl->count); 1665 fi->submount_lookup = sl; 1666 } 1667 1668 return 0; 1669 } 1670 1671 /* Filesystem context private data holds the FUSE inode of the mount point */ 1672 static int fuse_get_tree_submount(struct fs_context *fsc) 1673 { 1674 struct fuse_mount *fm; 1675 struct fuse_inode *mp_fi = fsc->fs_private; 1676 struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); 1677 struct super_block *sb; 1678 int err; 1679 1680 fm = kzalloc_obj(struct fuse_mount); 1681 if (!fm) 1682 return -ENOMEM; 1683 1684 fm->fc = fuse_conn_get(fc); 1685 fsc->s_fs_info = fm; 1686 sb = sget_fc(fsc, NULL, set_anon_super_fc); 1687 if (fsc->s_fs_info) 1688 fuse_mount_destroy(fm); 1689 if (IS_ERR(sb)) 1690 return PTR_ERR(sb); 1691 1692 /* Initialize superblock, making @mp_fi its root */ 1693 err = fuse_fill_super_submount(sb, mp_fi); 1694 if (err) { 1695 deactivate_locked_super(sb); 1696 return err; 1697 } 1698 1699 down_write(&fc->killsb); 1700 list_add_tail(&fm->fc_entry, &fc->mounts); 1701 up_write(&fc->killsb); 1702 1703 sb->s_flags |= SB_ACTIVE; 1704 fsc->root = dget(sb->s_root); 1705 1706 return 0; 1707 } 1708 1709 static const struct fs_context_operations fuse_context_submount_ops = { 1710 .get_tree = fuse_get_tree_submount, 1711 }; 1712 1713 int fuse_init_fs_context_submount(struct fs_context *fsc) 1714 { 1715 fsc->ops = &fuse_context_submount_ops; 1716 return 0; 1717 } 1718 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); 1719 1720 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1721 { 1722 struct fuse_dev *fud = ctx->fud; 1723 struct fuse_mount *fm = get_fuse_mount_super(sb); 1724 struct fuse_conn *fc = fm->fc; 1725 struct inode *root; 1726 struct dentry *root_dentry; 1727 int err; 1728 1729 err = -EINVAL; 1730 if (sb->s_flags & SB_MANDLOCK) 1731 goto err; 1732 1733 rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc()); 1734 fuse_sb_defaults(sb); 1735 1736 if (ctx->is_bdev) { 1737 #ifdef CONFIG_BLOCK 1738 err = -EINVAL; 1739 if (!sb_set_blocksize(sb, ctx->blksize)) 1740 goto err; 1741 #endif 1742 fc->sync_fs = 1; 1743 } else { 1744 sb->s_blocksize = PAGE_SIZE; 1745 sb->s_blocksize_bits = PAGE_SHIFT; 1746 } 1747 1748 sb->s_subtype = ctx->subtype; 1749 ctx->subtype = NULL; 1750 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1751 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); 1752 if (err) 1753 goto err; 1754 } 1755 1756 fc->dev = sb->s_dev; 1757 fm->sb = sb; 1758 err = fuse_bdi_init(fc, sb); 1759 if (err) 1760 goto err_free_dax; 1761 1762 /* Handle umasking inside the fuse code */ 1763 if (sb->s_flags & SB_POSIXACL) 1764 fc->dont_mask = 1; 1765 sb->s_flags |= SB_POSIXACL; 1766 1767 fc->default_permissions = ctx->default_permissions; 1768 fc->allow_other = ctx->allow_other; 1769 fc->user_id = ctx->user_id; 1770 fc->group_id = ctx->group_id; 1771 fc->legacy_opts_show = ctx->legacy_opts_show; 1772 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1773 fc->destroy = ctx->destroy; 1774 fc->no_control = ctx->no_control; 1775 fc->no_force_umount = ctx->no_force_umount; 1776 1777 err = -ENOMEM; 1778 root = fuse_get_root_inode(sb, ctx->rootmode); 1779 set_default_d_op(sb, &fuse_dentry_operations); 1780 root_dentry = d_make_root(root); 1781 if (!root_dentry) 1782 goto err_free_dax; 1783 1784 mutex_lock(&fuse_mutex); 1785 err = -EINVAL; 1786 if (fud) { 1787 if (fuse_dev_is_installed(fud)) 1788 goto err_unlock; 1789 if (fuse_dev_is_sync_init(fud)) 1790 fc->sync_init = 1; 1791 } 1792 1793 err = fuse_ctl_add_conn(fc); 1794 if (err) 1795 goto err_unlock; 1796 1797 list_add_tail(&fc->entry, &fuse_conn_list); 1798 sb->s_root = root_dentry; 1799 if (fud) 1800 fuse_dev_install(fud, fc->chan); 1801 1802 mutex_unlock(&fuse_mutex); 1803 return 0; 1804 1805 err_unlock: 1806 mutex_unlock(&fuse_mutex); 1807 dput(root_dentry); 1808 err_free_dax: 1809 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1810 fuse_dax_conn_free(fc); 1811 err: 1812 return err; 1813 } 1814 EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1815 1816 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1817 { 1818 struct fuse_fs_context *ctx = fsc->fs_private; 1819 struct fuse_mount *fm; 1820 int err; 1821 1822 if (!ctx->fud || !ctx->rootmode_present || 1823 !ctx->user_id_present || !ctx->group_id_present) 1824 return -EINVAL; 1825 1826 err = fuse_fill_super_common(sb, ctx); 1827 if (err) 1828 return err; 1829 1830 fm = get_fuse_mount_super(sb); 1831 1832 return fuse_send_init(fm); 1833 } 1834 1835 /* 1836 * This is the path where user supplied an already initialized fuse dev. In 1837 * this case never create a new super if the old one is gone. 1838 */ 1839 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) 1840 { 1841 return -ENOTCONN; 1842 } 1843 1844 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) 1845 { 1846 return fuse_dev_verify(fsc->sget_key, get_fuse_conn_super(sb)->chan); 1847 } 1848 1849 static int fuse_get_tree(struct fs_context *fsc) 1850 { 1851 struct fuse_fs_context *ctx = fsc->fs_private; 1852 struct fuse_conn *fc; 1853 struct fuse_mount *fm; 1854 struct super_block *sb; 1855 struct fuse_chan *fch __free(fuse_chan_free) = fuse_dev_chan_new(); 1856 int err; 1857 1858 if (!fch) 1859 return -ENOMEM; 1860 1861 fc = kmalloc_obj(*fc); 1862 if (!fc) 1863 return -ENOMEM; 1864 1865 fm = kzalloc_obj(*fm); 1866 if (!fm) { 1867 kfree(fc); 1868 return -ENOMEM; 1869 } 1870 1871 fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); 1872 fc->release = fuse_free_conn; 1873 1874 fsc->s_fs_info = fm; 1875 1876 if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { 1877 err = get_tree_bdev(fsc, fuse_fill_super); 1878 goto out; 1879 } 1880 /* 1881 * While block dev mount can be initialized with a dummy device fd 1882 * (found by device name), normal fuse mounts can't 1883 */ 1884 err = -EINVAL; 1885 if (!ctx->fud) 1886 goto out; 1887 1888 /* 1889 * Allow creating a fuse mount with an already initialized fuse 1890 * connection 1891 */ 1892 if (fuse_dev_is_installed(ctx->fud)) { 1893 fsc->sget_key = ctx->fud; 1894 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); 1895 err = PTR_ERR_OR_ZERO(sb); 1896 if (!IS_ERR(sb)) 1897 fsc->root = dget(sb->s_root); 1898 } else { 1899 err = get_tree_nodev(fsc, fuse_fill_super); 1900 } 1901 out: 1902 if (fsc->s_fs_info) 1903 fuse_mount_destroy(fm); 1904 return err; 1905 } 1906 1907 static const struct fs_context_operations fuse_context_ops = { 1908 .free = fuse_free_fsc, 1909 .parse_param = fuse_parse_param, 1910 .reconfigure = fuse_reconfigure, 1911 .get_tree = fuse_get_tree, 1912 }; 1913 1914 /* 1915 * Set up the filesystem mount context. 1916 */ 1917 static int fuse_init_fs_context(struct fs_context *fsc) 1918 { 1919 struct fuse_fs_context *ctx; 1920 1921 ctx = kzalloc_obj(struct fuse_fs_context); 1922 if (!ctx) 1923 return -ENOMEM; 1924 1925 ctx->max_read = ~0; 1926 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1927 ctx->legacy_opts_show = true; 1928 1929 #ifdef CONFIG_BLOCK 1930 if (fsc->fs_type == &fuseblk_fs_type) { 1931 ctx->is_bdev = true; 1932 ctx->destroy = true; 1933 } 1934 #endif 1935 1936 fsc->fs_private = ctx; 1937 fsc->ops = &fuse_context_ops; 1938 return 0; 1939 } 1940 1941 bool fuse_mount_remove(struct fuse_mount *fm) 1942 { 1943 struct fuse_conn *fc = fm->fc; 1944 bool last = false; 1945 1946 down_write(&fc->killsb); 1947 list_del_init(&fm->fc_entry); 1948 if (list_empty(&fc->mounts)) 1949 last = true; 1950 up_write(&fc->killsb); 1951 1952 return last; 1953 } 1954 EXPORT_SYMBOL_GPL(fuse_mount_remove); 1955 1956 void fuse_conn_destroy(struct fuse_mount *fm) 1957 { 1958 struct fuse_conn *fc = fm->fc; 1959 1960 if (fc->destroy) 1961 fuse_send_destroy(fm); 1962 1963 fuse_chan_abort(fc->chan, false); 1964 fuse_chan_wait_aborted(fc->chan); 1965 1966 if (!list_empty(&fc->entry)) { 1967 mutex_lock(&fuse_mutex); 1968 list_del(&fc->entry); 1969 fuse_ctl_remove_conn(fc); 1970 mutex_unlock(&fuse_mutex); 1971 } 1972 } 1973 EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1974 1975 static void fuse_sb_destroy(struct super_block *sb) 1976 { 1977 struct fuse_mount *fm = get_fuse_mount_super(sb); 1978 bool last; 1979 1980 if (sb->s_root) { 1981 last = fuse_mount_remove(fm); 1982 if (last) 1983 fuse_conn_destroy(fm); 1984 } 1985 } 1986 1987 void fuse_mount_destroy(struct fuse_mount *fm) 1988 { 1989 fuse_conn_put(fm->fc); 1990 kfree_rcu(fm, rcu); 1991 } 1992 EXPORT_SYMBOL(fuse_mount_destroy); 1993 1994 static void fuse_kill_sb_anon(struct super_block *sb) 1995 { 1996 fuse_sb_destroy(sb); 1997 kill_anon_super(sb); 1998 fuse_mount_destroy(get_fuse_mount_super(sb)); 1999 } 2000 2001 static struct file_system_type fuse_fs_type = { 2002 .owner = THIS_MODULE, 2003 .name = "fuse", 2004 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 2005 .init_fs_context = fuse_init_fs_context, 2006 .parameters = fuse_fs_parameters, 2007 .kill_sb = fuse_kill_sb_anon, 2008 }; 2009 MODULE_ALIAS_FS("fuse"); 2010 2011 #ifdef CONFIG_BLOCK 2012 static void fuse_kill_sb_blk(struct super_block *sb) 2013 { 2014 fuse_sb_destroy(sb); 2015 kill_block_super(sb); 2016 fuse_mount_destroy(get_fuse_mount_super(sb)); 2017 } 2018 2019 static struct file_system_type fuseblk_fs_type = { 2020 .owner = THIS_MODULE, 2021 .name = "fuseblk", 2022 .init_fs_context = fuse_init_fs_context, 2023 .parameters = fuse_fs_parameters, 2024 .kill_sb = fuse_kill_sb_blk, 2025 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, 2026 }; 2027 MODULE_ALIAS_FS("fuseblk"); 2028 2029 static inline int register_fuseblk(void) 2030 { 2031 return register_filesystem(&fuseblk_fs_type); 2032 } 2033 2034 static inline void unregister_fuseblk(void) 2035 { 2036 unregister_filesystem(&fuseblk_fs_type); 2037 } 2038 #else 2039 static inline int register_fuseblk(void) 2040 { 2041 return 0; 2042 } 2043 2044 static inline void unregister_fuseblk(void) 2045 { 2046 } 2047 #endif 2048 2049 static void fuse_inode_init_once(void *foo) 2050 { 2051 struct inode *inode = foo; 2052 2053 inode_init_once(inode); 2054 } 2055 2056 static int __init fuse_fs_init(void) 2057 { 2058 int err; 2059 2060 fuse_inode_cachep = kmem_cache_create("fuse_inode", 2061 sizeof(struct fuse_inode), 0, 2062 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 2063 fuse_inode_init_once); 2064 err = -ENOMEM; 2065 if (!fuse_inode_cachep) 2066 goto out; 2067 2068 err = register_fuseblk(); 2069 if (err) 2070 goto out2; 2071 2072 err = register_filesystem(&fuse_fs_type); 2073 if (err) 2074 goto out3; 2075 2076 err = fuse_sysctl_register(); 2077 if (err) 2078 goto out4; 2079 2080 return 0; 2081 2082 out4: 2083 unregister_filesystem(&fuse_fs_type); 2084 out3: 2085 unregister_fuseblk(); 2086 out2: 2087 kmem_cache_destroy(fuse_inode_cachep); 2088 out: 2089 return err; 2090 } 2091 2092 static void fuse_fs_cleanup(void) 2093 { 2094 fuse_sysctl_unregister(); 2095 unregister_filesystem(&fuse_fs_type); 2096 unregister_fuseblk(); 2097 2098 /* 2099 * Make sure all delayed rcu free inodes are flushed before we 2100 * destroy cache. 2101 */ 2102 rcu_barrier(); 2103 kmem_cache_destroy(fuse_inode_cachep); 2104 } 2105 2106 static struct kobject *fuse_kobj; 2107 2108 static int fuse_sysfs_init(void) 2109 { 2110 int err; 2111 2112 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 2113 if (!fuse_kobj) { 2114 err = -ENOMEM; 2115 goto out_err; 2116 } 2117 2118 err = sysfs_create_mount_point(fuse_kobj, "connections"); 2119 if (err) 2120 goto out_fuse_unregister; 2121 2122 return 0; 2123 2124 out_fuse_unregister: 2125 kobject_put(fuse_kobj); 2126 out_err: 2127 return err; 2128 } 2129 2130 static void fuse_sysfs_cleanup(void) 2131 { 2132 sysfs_remove_mount_point(fuse_kobj, "connections"); 2133 kobject_put(fuse_kobj); 2134 } 2135 2136 static int __init fuse_init(void) 2137 { 2138 int res; 2139 2140 pr_info("init (API version %i.%i)\n", 2141 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 2142 2143 INIT_LIST_HEAD(&fuse_conn_list); 2144 res = fuse_fs_init(); 2145 if (res) 2146 goto err; 2147 2148 res = fuse_dev_init(); 2149 if (res) 2150 goto err_fs_cleanup; 2151 2152 res = fuse_sysfs_init(); 2153 if (res) 2154 goto err_dev_cleanup; 2155 2156 res = fuse_ctl_init(); 2157 if (res) 2158 goto err_sysfs_cleanup; 2159 2160 fuse_dentry_tree_init(); 2161 2162 sanitize_global_limit(&max_user_bgreq); 2163 sanitize_global_limit(&max_user_congthresh); 2164 2165 return 0; 2166 2167 err_sysfs_cleanup: 2168 fuse_sysfs_cleanup(); 2169 err_dev_cleanup: 2170 fuse_dev_cleanup(); 2171 err_fs_cleanup: 2172 fuse_fs_cleanup(); 2173 err: 2174 return res; 2175 } 2176 2177 static void __exit fuse_exit(void) 2178 { 2179 pr_debug("exit\n"); 2180 2181 fuse_dentry_tree_cleanup(); 2182 fuse_ctl_cleanup(); 2183 fuse_sysfs_cleanup(); 2184 fuse_fs_cleanup(); 2185 fuse_dev_cleanup(); 2186 } 2187 2188 module_init(fuse_init); 2189 module_exit(fuse_exit); 2190