1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/seq_file.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs_context.h> 19 #include <linux/fs_parser.h> 20 #include <linux/statfs.h> 21 #include <linux/random.h> 22 #include <linux/sched.h> 23 #include <linux/exportfs.h> 24 #include <linux/posix_acl.h> 25 #include <linux/pid_namespace.h> 26 #include <uapi/linux/magic.h> 27 28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 29 MODULE_DESCRIPTION("Filesystem in Userspace"); 30 MODULE_LICENSE("GPL"); 31 32 static struct kmem_cache *fuse_inode_cachep; 33 struct list_head fuse_conn_list; 34 DEFINE_MUTEX(fuse_mutex); 35 36 static int set_global_limit(const char *val, const struct kernel_param *kp); 37 38 unsigned int fuse_max_pages_limit = 256; 39 40 unsigned max_user_bgreq; 41 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 42 &max_user_bgreq, 0644); 43 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 44 MODULE_PARM_DESC(max_user_bgreq, 45 "Global limit for the maximum number of backgrounded requests an " 46 "unprivileged user can set"); 47 48 unsigned max_user_congthresh; 49 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 50 &max_user_congthresh, 0644); 51 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 52 MODULE_PARM_DESC(max_user_congthresh, 53 "Global limit for the maximum congestion threshold an " 54 "unprivileged user can set"); 55 56 #define FUSE_DEFAULT_BLKSIZE 512 57 58 /** Maximum number of outstanding background requests */ 59 #define FUSE_DEFAULT_MAX_BACKGROUND 12 60 61 /** Congestion starts at 75% of maximum */ 62 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 63 64 #ifdef CONFIG_BLOCK 65 static struct file_system_type fuseblk_fs_type; 66 #endif 67 68 struct fuse_forget_link *fuse_alloc_forget(void) 69 { 70 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); 71 } 72 73 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) 74 { 75 struct fuse_submount_lookup *sl; 76 77 sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); 78 if (!sl) 79 return NULL; 80 sl->forget = fuse_alloc_forget(); 81 if (!sl->forget) 82 goto out_free; 83 84 return sl; 85 86 out_free: 87 kfree(sl); 88 return NULL; 89 } 90 91 static struct inode *fuse_alloc_inode(struct super_block *sb) 92 { 93 struct fuse_inode *fi; 94 95 fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL); 96 if (!fi) 97 return NULL; 98 99 fi->i_time = 0; 100 fi->inval_mask = ~0; 101 fi->nodeid = 0; 102 fi->nlookup = 0; 103 fi->attr_version = 0; 104 fi->orig_ino = 0; 105 fi->state = 0; 106 fi->submount_lookup = NULL; 107 mutex_init(&fi->mutex); 108 spin_lock_init(&fi->lock); 109 fi->forget = fuse_alloc_forget(); 110 if (!fi->forget) 111 goto out_free; 112 113 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 114 goto out_free_forget; 115 116 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 117 fuse_inode_backing_set(fi, NULL); 118 119 return &fi->inode; 120 121 out_free_forget: 122 kfree(fi->forget); 123 out_free: 124 kmem_cache_free(fuse_inode_cachep, fi); 125 return NULL; 126 } 127 128 static void fuse_free_inode(struct inode *inode) 129 { 130 struct fuse_inode *fi = get_fuse_inode(inode); 131 132 mutex_destroy(&fi->mutex); 133 kfree(fi->forget); 134 #ifdef CONFIG_FUSE_DAX 135 kfree(fi->dax); 136 #endif 137 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 138 fuse_backing_put(fuse_inode_backing(fi)); 139 140 kmem_cache_free(fuse_inode_cachep, fi); 141 } 142 143 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, 144 struct fuse_submount_lookup *sl) 145 { 146 if (!refcount_dec_and_test(&sl->count)) 147 return; 148 149 fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); 150 sl->forget = NULL; 151 kfree(sl); 152 } 153 154 static void fuse_evict_inode(struct inode *inode) 155 { 156 struct fuse_inode *fi = get_fuse_inode(inode); 157 158 /* Will write inode on close/munmap and in all other dirtiers */ 159 WARN_ON(inode->i_state & I_DIRTY_INODE); 160 161 truncate_inode_pages_final(&inode->i_data); 162 clear_inode(inode); 163 if (inode->i_sb->s_flags & SB_ACTIVE) { 164 struct fuse_conn *fc = get_fuse_conn(inode); 165 166 if (FUSE_IS_DAX(inode)) 167 fuse_dax_inode_cleanup(inode); 168 if (fi->nlookup) { 169 fuse_queue_forget(fc, fi->forget, fi->nodeid, 170 fi->nlookup); 171 fi->forget = NULL; 172 } 173 174 if (fi->submount_lookup) { 175 fuse_cleanup_submount_lookup(fc, fi->submount_lookup); 176 fi->submount_lookup = NULL; 177 } 178 /* 179 * Evict of non-deleted inode may race with outstanding 180 * LOOKUP/READDIRPLUS requests and result in inconsistency when 181 * the request finishes. Deal with that here by bumping a 182 * counter that can be compared to the starting value. 183 */ 184 if (inode->i_nlink > 0) 185 atomic64_inc(&fc->evict_ctr); 186 } 187 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 188 WARN_ON(fi->iocachectr != 0); 189 WARN_ON(!list_empty(&fi->write_files)); 190 WARN_ON(!list_empty(&fi->queued_writes)); 191 } 192 } 193 194 static int fuse_reconfigure(struct fs_context *fsc) 195 { 196 struct super_block *sb = fsc->root->d_sb; 197 198 sync_filesystem(sb); 199 if (fsc->sb_flags & SB_MANDLOCK) 200 return -EINVAL; 201 202 return 0; 203 } 204 205 /* 206 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 207 * so that it will fit. 208 */ 209 static ino_t fuse_squash_ino(u64 ino64) 210 { 211 ino_t ino = (ino_t) ino64; 212 if (sizeof(ino_t) < sizeof(u64)) 213 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 214 return ino; 215 } 216 217 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 218 struct fuse_statx *sx, 219 u64 attr_valid, u32 cache_mask, 220 u64 evict_ctr) 221 { 222 struct fuse_conn *fc = get_fuse_conn(inode); 223 struct fuse_inode *fi = get_fuse_inode(inode); 224 225 lockdep_assert_held(&fi->lock); 226 227 /* 228 * Clear basic stats from invalid mask. 229 * 230 * Don't do this if this is coming from a fuse_iget() call and there 231 * might have been a racing evict which would've invalidated the result 232 * if the attr_version would've been preserved. 233 * 234 * !evict_ctr -> this is create 235 * fi->attr_version != 0 -> this is not a new inode 236 * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request 237 */ 238 if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) 239 set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); 240 241 fi->attr_version = atomic64_inc_return(&fc->attr_version); 242 fi->i_time = attr_valid; 243 244 inode->i_ino = fuse_squash_ino(attr->ino); 245 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 246 set_nlink(inode, attr->nlink); 247 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 248 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 249 inode->i_blocks = attr->blocks; 250 251 /* Sanitize nsecs */ 252 attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); 253 attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); 254 attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); 255 256 inode_set_atime(inode, attr->atime, attr->atimensec); 257 /* mtime from server may be stale due to local buffered write */ 258 if (!(cache_mask & STATX_MTIME)) { 259 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 260 } 261 if (!(cache_mask & STATX_CTIME)) { 262 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 263 } 264 if (sx) { 265 /* Sanitize nsecs */ 266 sx->btime.tv_nsec = 267 min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); 268 269 /* 270 * Btime has been queried, cache is valid (whether or not btime 271 * is available or not) so clear STATX_BTIME from inval_mask. 272 * 273 * Availability of the btime attribute is indicated in 274 * FUSE_I_BTIME 275 */ 276 set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); 277 if (sx->mask & STATX_BTIME) { 278 set_bit(FUSE_I_BTIME, &fi->state); 279 fi->i_btime.tv_sec = sx->btime.tv_sec; 280 fi->i_btime.tv_nsec = sx->btime.tv_nsec; 281 } 282 } 283 284 if (attr->blksize != 0) 285 inode->i_blkbits = ilog2(attr->blksize); 286 else 287 inode->i_blkbits = inode->i_sb->s_blocksize_bits; 288 289 /* 290 * Don't set the sticky bit in i_mode, unless we want the VFS 291 * to check permissions. This prevents failures due to the 292 * check in may_delete(). 293 */ 294 fi->orig_i_mode = inode->i_mode; 295 if (!fc->default_permissions) 296 inode->i_mode &= ~S_ISVTX; 297 298 fi->orig_ino = attr->ino; 299 300 /* 301 * We are refreshing inode data and it is possible that another 302 * client set suid/sgid or security.capability xattr. So clear 303 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid 304 * was set or if security.capability xattr was set. But we don't 305 * know if security.capability has been set or not. So clear it 306 * anyway. Its less efficient but should be safe. 307 */ 308 inode->i_flags &= ~S_NOSEC; 309 } 310 311 u32 fuse_get_cache_mask(struct inode *inode) 312 { 313 struct fuse_conn *fc = get_fuse_conn(inode); 314 315 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) 316 return 0; 317 318 return STATX_MTIME | STATX_CTIME | STATX_SIZE; 319 } 320 321 static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, 322 struct fuse_statx *sx, u64 attr_valid, 323 u64 attr_version, u64 evict_ctr) 324 { 325 struct fuse_conn *fc = get_fuse_conn(inode); 326 struct fuse_inode *fi = get_fuse_inode(inode); 327 u32 cache_mask; 328 loff_t oldsize; 329 struct timespec64 old_mtime; 330 331 spin_lock(&fi->lock); 332 /* 333 * In case of writeback_cache enabled, writes update mtime, ctime and 334 * may update i_size. In these cases trust the cached value in the 335 * inode. 336 */ 337 cache_mask = fuse_get_cache_mask(inode); 338 if (cache_mask & STATX_SIZE) 339 attr->size = i_size_read(inode); 340 341 if (cache_mask & STATX_MTIME) { 342 attr->mtime = inode_get_mtime_sec(inode); 343 attr->mtimensec = inode_get_mtime_nsec(inode); 344 } 345 if (cache_mask & STATX_CTIME) { 346 attr->ctime = inode_get_ctime_sec(inode); 347 attr->ctimensec = inode_get_ctime_nsec(inode); 348 } 349 350 if ((attr_version != 0 && fi->attr_version > attr_version) || 351 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 352 spin_unlock(&fi->lock); 353 return; 354 } 355 356 old_mtime = inode_get_mtime(inode); 357 fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, 358 evict_ctr); 359 360 oldsize = inode->i_size; 361 /* 362 * In case of writeback_cache enabled, the cached writes beyond EOF 363 * extend local i_size without keeping userspace server in sync. So, 364 * attr->size coming from server can be stale. We cannot trust it. 365 */ 366 if (!(cache_mask & STATX_SIZE)) 367 i_size_write(inode, attr->size); 368 spin_unlock(&fi->lock); 369 370 if (!cache_mask && S_ISREG(inode->i_mode)) { 371 bool inval = false; 372 373 if (oldsize != attr->size) { 374 truncate_pagecache(inode, attr->size); 375 if (!fc->explicit_inval_data) 376 inval = true; 377 } else if (fc->auto_inval_data) { 378 struct timespec64 new_mtime = { 379 .tv_sec = attr->mtime, 380 .tv_nsec = attr->mtimensec, 381 }; 382 383 /* 384 * Auto inval mode also checks and invalidates if mtime 385 * has changed. 386 */ 387 if (!timespec64_equal(&old_mtime, &new_mtime)) 388 inval = true; 389 } 390 391 if (inval) 392 invalidate_inode_pages2(inode->i_mapping); 393 } 394 395 if (IS_ENABLED(CONFIG_FUSE_DAX)) 396 fuse_dax_dontcache(inode, attr->flags); 397 } 398 399 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 400 struct fuse_statx *sx, u64 attr_valid, 401 u64 attr_version) 402 { 403 fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); 404 } 405 406 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, 407 u64 nodeid) 408 { 409 sl->nodeid = nodeid; 410 refcount_set(&sl->count, 1); 411 } 412 413 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, 414 struct fuse_conn *fc) 415 { 416 inode->i_mode = attr->mode & S_IFMT; 417 inode->i_size = attr->size; 418 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 419 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 420 if (S_ISREG(inode->i_mode)) { 421 fuse_init_common(inode); 422 fuse_init_file_inode(inode, attr->flags); 423 } else if (S_ISDIR(inode->i_mode)) 424 fuse_init_dir(inode); 425 else if (S_ISLNK(inode->i_mode)) 426 fuse_init_symlink(inode); 427 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 428 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 429 fuse_init_common(inode); 430 init_special_inode(inode, inode->i_mode, 431 new_decode_dev(attr->rdev)); 432 } else 433 BUG(); 434 /* 435 * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL 436 * so they see the exact same behavior as before. 437 */ 438 if (!fc->posix_acl) 439 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 440 } 441 442 static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 443 { 444 u64 nodeid = *(u64 *) _nodeidp; 445 if (get_node_id(inode) == nodeid) 446 return 1; 447 else 448 return 0; 449 } 450 451 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 452 { 453 u64 nodeid = *(u64 *) _nodeidp; 454 get_fuse_inode(inode)->nodeid = nodeid; 455 return 0; 456 } 457 458 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 459 int generation, struct fuse_attr *attr, 460 u64 attr_valid, u64 attr_version, 461 u64 evict_ctr) 462 { 463 struct inode *inode; 464 struct fuse_inode *fi; 465 struct fuse_conn *fc = get_fuse_conn_super(sb); 466 467 /* 468 * Auto mount points get their node id from the submount root, which is 469 * not a unique identifier within this filesystem. 470 * 471 * To avoid conflicts, do not place submount points into the inode hash 472 * table. 473 */ 474 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 475 S_ISDIR(attr->mode)) { 476 struct fuse_inode *fi; 477 478 inode = new_inode(sb); 479 if (!inode) 480 return NULL; 481 482 fuse_init_inode(inode, attr, fc); 483 fi = get_fuse_inode(inode); 484 fi->nodeid = nodeid; 485 fi->submount_lookup = fuse_alloc_submount_lookup(); 486 if (!fi->submount_lookup) { 487 iput(inode); 488 return NULL; 489 } 490 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ 491 fuse_init_submount_lookup(fi->submount_lookup, nodeid); 492 inode->i_flags |= S_AUTOMOUNT; 493 goto done; 494 } 495 496 retry: 497 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 498 if (!inode) 499 return NULL; 500 501 if ((inode->i_state & I_NEW)) { 502 inode->i_flags |= S_NOATIME; 503 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 504 inode->i_flags |= S_NOCMTIME; 505 inode->i_generation = generation; 506 fuse_init_inode(inode, attr, fc); 507 unlock_new_inode(inode); 508 } else if (fuse_stale_inode(inode, generation, attr)) { 509 /* nodeid was reused, any I/O on the old inode should fail */ 510 fuse_make_bad(inode); 511 if (inode != d_inode(sb->s_root)) { 512 remove_inode_hash(inode); 513 iput(inode); 514 goto retry; 515 } 516 } 517 fi = get_fuse_inode(inode); 518 spin_lock(&fi->lock); 519 fi->nlookup++; 520 spin_unlock(&fi->lock); 521 done: 522 fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, 523 evict_ctr); 524 return inode; 525 } 526 527 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 528 struct fuse_mount **fm) 529 { 530 struct fuse_mount *fm_iter; 531 struct inode *inode; 532 533 WARN_ON(!rwsem_is_locked(&fc->killsb)); 534 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 535 if (!fm_iter->sb) 536 continue; 537 538 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 539 if (inode) { 540 if (fm) 541 *fm = fm_iter; 542 return inode; 543 } 544 } 545 546 return NULL; 547 } 548 549 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 550 loff_t offset, loff_t len) 551 { 552 struct fuse_inode *fi; 553 struct inode *inode; 554 pgoff_t pg_start; 555 pgoff_t pg_end; 556 557 inode = fuse_ilookup(fc, nodeid, NULL); 558 if (!inode) 559 return -ENOENT; 560 561 fi = get_fuse_inode(inode); 562 spin_lock(&fi->lock); 563 fi->attr_version = atomic64_inc_return(&fc->attr_version); 564 spin_unlock(&fi->lock); 565 566 fuse_invalidate_attr(inode); 567 forget_all_cached_acls(inode); 568 if (offset >= 0) { 569 pg_start = offset >> PAGE_SHIFT; 570 if (len <= 0) 571 pg_end = -1; 572 else 573 pg_end = (offset + len - 1) >> PAGE_SHIFT; 574 invalidate_inode_pages2_range(inode->i_mapping, 575 pg_start, pg_end); 576 } 577 iput(inode); 578 return 0; 579 } 580 581 bool fuse_lock_inode(struct inode *inode) 582 { 583 bool locked = false; 584 585 if (!get_fuse_conn(inode)->parallel_dirops) { 586 mutex_lock(&get_fuse_inode(inode)->mutex); 587 locked = true; 588 } 589 590 return locked; 591 } 592 593 void fuse_unlock_inode(struct inode *inode, bool locked) 594 { 595 if (locked) 596 mutex_unlock(&get_fuse_inode(inode)->mutex); 597 } 598 599 static void fuse_umount_begin(struct super_block *sb) 600 { 601 struct fuse_conn *fc = get_fuse_conn_super(sb); 602 603 if (fc->no_force_umount) 604 return; 605 606 fuse_abort_conn(fc); 607 608 // Only retire block-device-based superblocks. 609 if (sb->s_bdev != NULL) 610 retire_super(sb); 611 } 612 613 static void fuse_send_destroy(struct fuse_mount *fm) 614 { 615 if (fm->fc->conn_init) { 616 FUSE_ARGS(args); 617 618 args.opcode = FUSE_DESTROY; 619 args.force = true; 620 args.nocreds = true; 621 fuse_simple_request(fm, &args); 622 } 623 } 624 625 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 626 { 627 stbuf->f_type = FUSE_SUPER_MAGIC; 628 stbuf->f_bsize = attr->bsize; 629 stbuf->f_frsize = attr->frsize; 630 stbuf->f_blocks = attr->blocks; 631 stbuf->f_bfree = attr->bfree; 632 stbuf->f_bavail = attr->bavail; 633 stbuf->f_files = attr->files; 634 stbuf->f_ffree = attr->ffree; 635 stbuf->f_namelen = attr->namelen; 636 /* fsid is left zero */ 637 } 638 639 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 640 { 641 struct super_block *sb = dentry->d_sb; 642 struct fuse_mount *fm = get_fuse_mount_super(sb); 643 FUSE_ARGS(args); 644 struct fuse_statfs_out outarg; 645 int err; 646 647 if (!fuse_allow_current_process(fm->fc)) { 648 buf->f_type = FUSE_SUPER_MAGIC; 649 return 0; 650 } 651 652 memset(&outarg, 0, sizeof(outarg)); 653 args.in_numargs = 0; 654 args.opcode = FUSE_STATFS; 655 args.nodeid = get_node_id(d_inode(dentry)); 656 args.out_numargs = 1; 657 args.out_args[0].size = sizeof(outarg); 658 args.out_args[0].value = &outarg; 659 err = fuse_simple_request(fm, &args); 660 if (!err) 661 convert_fuse_statfs(buf, &outarg.st); 662 return err; 663 } 664 665 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) 666 { 667 struct fuse_sync_bucket *bucket; 668 669 bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL); 670 if (bucket) { 671 init_waitqueue_head(&bucket->waitq); 672 /* Initial active count */ 673 atomic_set(&bucket->count, 1); 674 } 675 return bucket; 676 } 677 678 static void fuse_sync_fs_writes(struct fuse_conn *fc) 679 { 680 struct fuse_sync_bucket *bucket, *new_bucket; 681 int count; 682 683 new_bucket = fuse_sync_bucket_alloc(); 684 spin_lock(&fc->lock); 685 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 686 count = atomic_read(&bucket->count); 687 WARN_ON(count < 1); 688 /* No outstanding writes? */ 689 if (count == 1) { 690 spin_unlock(&fc->lock); 691 kfree(new_bucket); 692 return; 693 } 694 695 /* 696 * Completion of new bucket depends on completion of this bucket, so add 697 * one more count. 698 */ 699 atomic_inc(&new_bucket->count); 700 rcu_assign_pointer(fc->curr_bucket, new_bucket); 701 spin_unlock(&fc->lock); 702 /* 703 * Drop initial active count. At this point if all writes in this and 704 * ancestor buckets complete, the count will go to zero and this task 705 * will be woken up. 706 */ 707 atomic_dec(&bucket->count); 708 709 wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); 710 711 /* Drop temp count on descendant bucket */ 712 fuse_sync_bucket_dec(new_bucket); 713 kfree_rcu(bucket, rcu); 714 } 715 716 static int fuse_sync_fs(struct super_block *sb, int wait) 717 { 718 struct fuse_mount *fm = get_fuse_mount_super(sb); 719 struct fuse_conn *fc = fm->fc; 720 struct fuse_syncfs_in inarg; 721 FUSE_ARGS(args); 722 int err; 723 724 /* 725 * Userspace cannot handle the wait == 0 case. Avoid a 726 * gratuitous roundtrip. 727 */ 728 if (!wait) 729 return 0; 730 731 /* The filesystem is being unmounted. Nothing to do. */ 732 if (!sb->s_root) 733 return 0; 734 735 if (!fc->sync_fs) 736 return 0; 737 738 fuse_sync_fs_writes(fc); 739 740 memset(&inarg, 0, sizeof(inarg)); 741 args.in_numargs = 1; 742 args.in_args[0].size = sizeof(inarg); 743 args.in_args[0].value = &inarg; 744 args.opcode = FUSE_SYNCFS; 745 args.nodeid = get_node_id(sb->s_root->d_inode); 746 args.out_numargs = 0; 747 748 err = fuse_simple_request(fm, &args); 749 if (err == -ENOSYS) { 750 fc->sync_fs = 0; 751 err = 0; 752 } 753 754 return err; 755 } 756 757 enum { 758 OPT_SOURCE, 759 OPT_SUBTYPE, 760 OPT_FD, 761 OPT_ROOTMODE, 762 OPT_USER_ID, 763 OPT_GROUP_ID, 764 OPT_DEFAULT_PERMISSIONS, 765 OPT_ALLOW_OTHER, 766 OPT_MAX_READ, 767 OPT_BLKSIZE, 768 OPT_ERR 769 }; 770 771 static const struct fs_parameter_spec fuse_fs_parameters[] = { 772 fsparam_string ("source", OPT_SOURCE), 773 fsparam_u32 ("fd", OPT_FD), 774 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 775 fsparam_uid ("user_id", OPT_USER_ID), 776 fsparam_gid ("group_id", OPT_GROUP_ID), 777 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 778 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 779 fsparam_u32 ("max_read", OPT_MAX_READ), 780 fsparam_u32 ("blksize", OPT_BLKSIZE), 781 fsparam_string ("subtype", OPT_SUBTYPE), 782 {} 783 }; 784 785 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) 786 { 787 struct fs_parse_result result; 788 struct fuse_fs_context *ctx = fsc->fs_private; 789 int opt; 790 kuid_t kuid; 791 kgid_t kgid; 792 793 if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 794 /* 795 * Ignore options coming from mount(MS_REMOUNT) for backward 796 * compatibility. 797 */ 798 if (fsc->oldapi) 799 return 0; 800 801 return invalfc(fsc, "No changes allowed in reconfigure"); 802 } 803 804 opt = fs_parse(fsc, fuse_fs_parameters, param, &result); 805 if (opt < 0) 806 return opt; 807 808 switch (opt) { 809 case OPT_SOURCE: 810 if (fsc->source) 811 return invalfc(fsc, "Multiple sources specified"); 812 fsc->source = param->string; 813 param->string = NULL; 814 break; 815 816 case OPT_SUBTYPE: 817 if (ctx->subtype) 818 return invalfc(fsc, "Multiple subtypes specified"); 819 ctx->subtype = param->string; 820 param->string = NULL; 821 return 0; 822 823 case OPT_FD: 824 ctx->fd = result.uint_32; 825 ctx->fd_present = true; 826 break; 827 828 case OPT_ROOTMODE: 829 if (!fuse_valid_type(result.uint_32)) 830 return invalfc(fsc, "Invalid rootmode"); 831 ctx->rootmode = result.uint_32; 832 ctx->rootmode_present = true; 833 break; 834 835 case OPT_USER_ID: 836 kuid = result.uid; 837 /* 838 * The requested uid must be representable in the 839 * filesystem's idmapping. 840 */ 841 if (!kuid_has_mapping(fsc->user_ns, kuid)) 842 return invalfc(fsc, "Invalid user_id"); 843 ctx->user_id = kuid; 844 ctx->user_id_present = true; 845 break; 846 847 case OPT_GROUP_ID: 848 kgid = result.gid; 849 /* 850 * The requested gid must be representable in the 851 * filesystem's idmapping. 852 */ 853 if (!kgid_has_mapping(fsc->user_ns, kgid)) 854 return invalfc(fsc, "Invalid group_id"); 855 ctx->group_id = kgid; 856 ctx->group_id_present = true; 857 break; 858 859 case OPT_DEFAULT_PERMISSIONS: 860 ctx->default_permissions = true; 861 break; 862 863 case OPT_ALLOW_OTHER: 864 ctx->allow_other = true; 865 break; 866 867 case OPT_MAX_READ: 868 ctx->max_read = result.uint_32; 869 break; 870 871 case OPT_BLKSIZE: 872 if (!ctx->is_bdev) 873 return invalfc(fsc, "blksize only supported for fuseblk"); 874 ctx->blksize = result.uint_32; 875 break; 876 877 default: 878 return -EINVAL; 879 } 880 881 return 0; 882 } 883 884 static void fuse_free_fsc(struct fs_context *fsc) 885 { 886 struct fuse_fs_context *ctx = fsc->fs_private; 887 888 if (ctx) { 889 kfree(ctx->subtype); 890 kfree(ctx); 891 } 892 } 893 894 static int fuse_show_options(struct seq_file *m, struct dentry *root) 895 { 896 struct super_block *sb = root->d_sb; 897 struct fuse_conn *fc = get_fuse_conn_super(sb); 898 899 if (fc->legacy_opts_show) { 900 seq_printf(m, ",user_id=%u", 901 from_kuid_munged(fc->user_ns, fc->user_id)); 902 seq_printf(m, ",group_id=%u", 903 from_kgid_munged(fc->user_ns, fc->group_id)); 904 if (fc->default_permissions) 905 seq_puts(m, ",default_permissions"); 906 if (fc->allow_other) 907 seq_puts(m, ",allow_other"); 908 if (fc->max_read != ~0) 909 seq_printf(m, ",max_read=%u", fc->max_read); 910 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 911 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 912 } 913 #ifdef CONFIG_FUSE_DAX 914 if (fc->dax_mode == FUSE_DAX_ALWAYS) 915 seq_puts(m, ",dax=always"); 916 else if (fc->dax_mode == FUSE_DAX_NEVER) 917 seq_puts(m, ",dax=never"); 918 else if (fc->dax_mode == FUSE_DAX_INODE_USER) 919 seq_puts(m, ",dax=inode"); 920 #endif 921 922 return 0; 923 } 924 925 static void fuse_iqueue_init(struct fuse_iqueue *fiq, 926 const struct fuse_iqueue_ops *ops, 927 void *priv) 928 { 929 memset(fiq, 0, sizeof(struct fuse_iqueue)); 930 spin_lock_init(&fiq->lock); 931 init_waitqueue_head(&fiq->waitq); 932 INIT_LIST_HEAD(&fiq->pending); 933 INIT_LIST_HEAD(&fiq->interrupts); 934 fiq->forget_list_tail = &fiq->forget_list_head; 935 fiq->connected = 1; 936 fiq->ops = ops; 937 fiq->priv = priv; 938 } 939 940 static void fuse_pqueue_init(struct fuse_pqueue *fpq) 941 { 942 unsigned int i; 943 944 spin_lock_init(&fpq->lock); 945 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) 946 INIT_LIST_HEAD(&fpq->processing[i]); 947 INIT_LIST_HEAD(&fpq->io); 948 fpq->connected = 1; 949 } 950 951 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 952 struct user_namespace *user_ns, 953 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 954 { 955 memset(fc, 0, sizeof(*fc)); 956 spin_lock_init(&fc->lock); 957 spin_lock_init(&fc->bg_lock); 958 init_rwsem(&fc->killsb); 959 refcount_set(&fc->count, 1); 960 atomic_set(&fc->dev_count, 1); 961 init_waitqueue_head(&fc->blocked_waitq); 962 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); 963 INIT_LIST_HEAD(&fc->bg_queue); 964 INIT_LIST_HEAD(&fc->entry); 965 INIT_LIST_HEAD(&fc->devices); 966 atomic_set(&fc->num_waiting, 0); 967 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 968 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 969 atomic64_set(&fc->khctr, 0); 970 fc->polled_files = RB_ROOT; 971 fc->blocked = 0; 972 fc->initialized = 0; 973 fc->connected = 1; 974 atomic64_set(&fc->attr_version, 1); 975 atomic64_set(&fc->evict_ctr, 1); 976 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 977 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 978 fc->user_ns = get_user_ns(user_ns); 979 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 980 fc->max_pages_limit = fuse_max_pages_limit; 981 982 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 983 fuse_backing_files_init(fc); 984 985 INIT_LIST_HEAD(&fc->mounts); 986 list_add(&fm->fc_entry, &fc->mounts); 987 fm->fc = fc; 988 } 989 EXPORT_SYMBOL_GPL(fuse_conn_init); 990 991 static void delayed_release(struct rcu_head *p) 992 { 993 struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); 994 995 put_user_ns(fc->user_ns); 996 fc->release(fc); 997 } 998 999 void fuse_conn_put(struct fuse_conn *fc) 1000 { 1001 if (refcount_dec_and_test(&fc->count)) { 1002 struct fuse_iqueue *fiq = &fc->iq; 1003 struct fuse_sync_bucket *bucket; 1004 1005 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1006 fuse_dax_conn_free(fc); 1007 if (fiq->ops->release) 1008 fiq->ops->release(fiq); 1009 put_pid_ns(fc->pid_ns); 1010 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 1011 if (bucket) { 1012 WARN_ON(atomic_read(&bucket->count) != 1); 1013 kfree(bucket); 1014 } 1015 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1016 fuse_backing_files_free(fc); 1017 call_rcu(&fc->rcu, delayed_release); 1018 } 1019 } 1020 EXPORT_SYMBOL_GPL(fuse_conn_put); 1021 1022 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 1023 { 1024 refcount_inc(&fc->count); 1025 return fc; 1026 } 1027 EXPORT_SYMBOL_GPL(fuse_conn_get); 1028 1029 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 1030 { 1031 struct fuse_attr attr; 1032 memset(&attr, 0, sizeof(attr)); 1033 1034 attr.mode = mode; 1035 attr.ino = FUSE_ROOT_ID; 1036 attr.nlink = 1; 1037 return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); 1038 } 1039 1040 struct fuse_inode_handle { 1041 u64 nodeid; 1042 u32 generation; 1043 }; 1044 1045 static struct dentry *fuse_get_dentry(struct super_block *sb, 1046 struct fuse_inode_handle *handle) 1047 { 1048 struct fuse_conn *fc = get_fuse_conn_super(sb); 1049 struct inode *inode; 1050 struct dentry *entry; 1051 int err = -ESTALE; 1052 1053 if (handle->nodeid == 0) 1054 goto out_err; 1055 1056 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 1057 if (!inode) { 1058 struct fuse_entry_out outarg; 1059 const struct qstr name = QSTR_INIT(".", 1); 1060 1061 if (!fc->export_support) 1062 goto out_err; 1063 1064 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 1065 &inode); 1066 if (err && err != -ENOENT) 1067 goto out_err; 1068 if (err || !inode) { 1069 err = -ESTALE; 1070 goto out_err; 1071 } 1072 err = -EIO; 1073 if (get_node_id(inode) != handle->nodeid) 1074 goto out_iput; 1075 } 1076 err = -ESTALE; 1077 if (inode->i_generation != handle->generation) 1078 goto out_iput; 1079 1080 entry = d_obtain_alias(inode); 1081 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 1082 fuse_invalidate_entry_cache(entry); 1083 1084 return entry; 1085 1086 out_iput: 1087 iput(inode); 1088 out_err: 1089 return ERR_PTR(err); 1090 } 1091 1092 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 1093 struct inode *parent) 1094 { 1095 int len = parent ? 6 : 3; 1096 u64 nodeid; 1097 u32 generation; 1098 1099 if (*max_len < len) { 1100 *max_len = len; 1101 return FILEID_INVALID; 1102 } 1103 1104 nodeid = get_fuse_inode(inode)->nodeid; 1105 generation = inode->i_generation; 1106 1107 fh[0] = (u32)(nodeid >> 32); 1108 fh[1] = (u32)(nodeid & 0xffffffff); 1109 fh[2] = generation; 1110 1111 if (parent) { 1112 nodeid = get_fuse_inode(parent)->nodeid; 1113 generation = parent->i_generation; 1114 1115 fh[3] = (u32)(nodeid >> 32); 1116 fh[4] = (u32)(nodeid & 0xffffffff); 1117 fh[5] = generation; 1118 } 1119 1120 *max_len = len; 1121 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; 1122 } 1123 1124 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 1125 struct fid *fid, int fh_len, int fh_type) 1126 { 1127 struct fuse_inode_handle handle; 1128 1129 if ((fh_type != FILEID_INO64_GEN && 1130 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) 1131 return NULL; 1132 1133 handle.nodeid = (u64) fid->raw[0] << 32; 1134 handle.nodeid |= (u64) fid->raw[1]; 1135 handle.generation = fid->raw[2]; 1136 return fuse_get_dentry(sb, &handle); 1137 } 1138 1139 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 1140 struct fid *fid, int fh_len, int fh_type) 1141 { 1142 struct fuse_inode_handle parent; 1143 1144 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) 1145 return NULL; 1146 1147 parent.nodeid = (u64) fid->raw[3] << 32; 1148 parent.nodeid |= (u64) fid->raw[4]; 1149 parent.generation = fid->raw[5]; 1150 return fuse_get_dentry(sb, &parent); 1151 } 1152 1153 static struct dentry *fuse_get_parent(struct dentry *child) 1154 { 1155 struct inode *child_inode = d_inode(child); 1156 struct fuse_conn *fc = get_fuse_conn(child_inode); 1157 struct inode *inode; 1158 struct dentry *parent; 1159 struct fuse_entry_out outarg; 1160 int err; 1161 1162 if (!fc->export_support) 1163 return ERR_PTR(-ESTALE); 1164 1165 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 1166 &dotdot_name, &outarg, &inode); 1167 if (err) { 1168 if (err == -ENOENT) 1169 return ERR_PTR(-ESTALE); 1170 return ERR_PTR(err); 1171 } 1172 1173 parent = d_obtain_alias(inode); 1174 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 1175 fuse_invalidate_entry_cache(parent); 1176 1177 return parent; 1178 } 1179 1180 /* only for fid encoding; no support for file handle */ 1181 static const struct export_operations fuse_export_fid_operations = { 1182 .encode_fh = fuse_encode_fh, 1183 }; 1184 1185 static const struct export_operations fuse_export_operations = { 1186 .fh_to_dentry = fuse_fh_to_dentry, 1187 .fh_to_parent = fuse_fh_to_parent, 1188 .encode_fh = fuse_encode_fh, 1189 .get_parent = fuse_get_parent, 1190 }; 1191 1192 static const struct super_operations fuse_super_operations = { 1193 .alloc_inode = fuse_alloc_inode, 1194 .free_inode = fuse_free_inode, 1195 .evict_inode = fuse_evict_inode, 1196 .write_inode = fuse_write_inode, 1197 .drop_inode = generic_delete_inode, 1198 .umount_begin = fuse_umount_begin, 1199 .statfs = fuse_statfs, 1200 .sync_fs = fuse_sync_fs, 1201 .show_options = fuse_show_options, 1202 }; 1203 1204 static void sanitize_global_limit(unsigned *limit) 1205 { 1206 /* 1207 * The default maximum number of async requests is calculated to consume 1208 * 1/2^13 of the total memory, assuming 392 bytes per request. 1209 */ 1210 if (*limit == 0) 1211 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 1212 1213 if (*limit >= 1 << 16) 1214 *limit = (1 << 16) - 1; 1215 } 1216 1217 static int set_global_limit(const char *val, const struct kernel_param *kp) 1218 { 1219 int rv; 1220 1221 rv = param_set_uint(val, kp); 1222 if (rv) 1223 return rv; 1224 1225 sanitize_global_limit((unsigned *)kp->arg); 1226 1227 return 0; 1228 } 1229 1230 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 1231 { 1232 int cap_sys_admin = capable(CAP_SYS_ADMIN); 1233 1234 if (arg->minor < 13) 1235 return; 1236 1237 sanitize_global_limit(&max_user_bgreq); 1238 sanitize_global_limit(&max_user_congthresh); 1239 1240 spin_lock(&fc->bg_lock); 1241 if (arg->max_background) { 1242 fc->max_background = arg->max_background; 1243 1244 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 1245 fc->max_background = max_user_bgreq; 1246 } 1247 if (arg->congestion_threshold) { 1248 fc->congestion_threshold = arg->congestion_threshold; 1249 1250 if (!cap_sys_admin && 1251 fc->congestion_threshold > max_user_congthresh) 1252 fc->congestion_threshold = max_user_congthresh; 1253 } 1254 spin_unlock(&fc->bg_lock); 1255 } 1256 1257 struct fuse_init_args { 1258 struct fuse_args args; 1259 struct fuse_init_in in; 1260 struct fuse_init_out out; 1261 }; 1262 1263 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 1264 int error) 1265 { 1266 struct fuse_conn *fc = fm->fc; 1267 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 1268 struct fuse_init_out *arg = &ia->out; 1269 bool ok = true; 1270 1271 if (error || arg->major != FUSE_KERNEL_VERSION) 1272 ok = false; 1273 else { 1274 unsigned long ra_pages; 1275 1276 process_init_limits(fc, arg); 1277 1278 if (arg->minor >= 6) { 1279 u64 flags = arg->flags; 1280 1281 if (flags & FUSE_INIT_EXT) 1282 flags |= (u64) arg->flags2 << 32; 1283 1284 ra_pages = arg->max_readahead / PAGE_SIZE; 1285 if (flags & FUSE_ASYNC_READ) 1286 fc->async_read = 1; 1287 if (!(flags & FUSE_POSIX_LOCKS)) 1288 fc->no_lock = 1; 1289 if (arg->minor >= 17) { 1290 if (!(flags & FUSE_FLOCK_LOCKS)) 1291 fc->no_flock = 1; 1292 } else { 1293 if (!(flags & FUSE_POSIX_LOCKS)) 1294 fc->no_flock = 1; 1295 } 1296 if (flags & FUSE_ATOMIC_O_TRUNC) 1297 fc->atomic_o_trunc = 1; 1298 if (arg->minor >= 9) { 1299 /* LOOKUP has dependency on proto version */ 1300 if (flags & FUSE_EXPORT_SUPPORT) 1301 fc->export_support = 1; 1302 } 1303 if (flags & FUSE_BIG_WRITES) 1304 fc->big_writes = 1; 1305 if (flags & FUSE_DONT_MASK) 1306 fc->dont_mask = 1; 1307 if (flags & FUSE_AUTO_INVAL_DATA) 1308 fc->auto_inval_data = 1; 1309 else if (flags & FUSE_EXPLICIT_INVAL_DATA) 1310 fc->explicit_inval_data = 1; 1311 if (flags & FUSE_DO_READDIRPLUS) { 1312 fc->do_readdirplus = 1; 1313 if (flags & FUSE_READDIRPLUS_AUTO) 1314 fc->readdirplus_auto = 1; 1315 } 1316 if (flags & FUSE_ASYNC_DIO) 1317 fc->async_dio = 1; 1318 if (flags & FUSE_WRITEBACK_CACHE) 1319 fc->writeback_cache = 1; 1320 if (flags & FUSE_PARALLEL_DIROPS) 1321 fc->parallel_dirops = 1; 1322 if (flags & FUSE_HANDLE_KILLPRIV) 1323 fc->handle_killpriv = 1; 1324 if (arg->time_gran && arg->time_gran <= 1000000000) 1325 fm->sb->s_time_gran = arg->time_gran; 1326 if ((flags & FUSE_POSIX_ACL)) { 1327 fc->default_permissions = 1; 1328 fc->posix_acl = 1; 1329 } 1330 if (flags & FUSE_CACHE_SYMLINKS) 1331 fc->cache_symlinks = 1; 1332 if (flags & FUSE_ABORT_ERROR) 1333 fc->abort_err = 1; 1334 if (flags & FUSE_MAX_PAGES) { 1335 fc->max_pages = 1336 min_t(unsigned int, fc->max_pages_limit, 1337 max_t(unsigned int, arg->max_pages, 1)); 1338 } 1339 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1340 if (flags & FUSE_MAP_ALIGNMENT && 1341 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1342 ok = false; 1343 } 1344 if (flags & FUSE_HAS_INODE_DAX) 1345 fc->inode_dax = 1; 1346 } 1347 if (flags & FUSE_HANDLE_KILLPRIV_V2) { 1348 fc->handle_killpriv_v2 = 1; 1349 fm->sb->s_flags |= SB_NOSEC; 1350 } 1351 if (flags & FUSE_SETXATTR_EXT) 1352 fc->setxattr_ext = 1; 1353 if (flags & FUSE_SECURITY_CTX) 1354 fc->init_security = 1; 1355 if (flags & FUSE_CREATE_SUPP_GROUP) 1356 fc->create_supp_group = 1; 1357 if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) 1358 fc->direct_io_allow_mmap = 1; 1359 /* 1360 * max_stack_depth is the max stack depth of FUSE fs, 1361 * so it has to be at least 1 to support passthrough 1362 * to backing files. 1363 * 1364 * with max_stack_depth > 1, the backing files can be 1365 * on a stacked fs (e.g. overlayfs) themselves and with 1366 * max_stack_depth == 1, FUSE fs can be stacked as the 1367 * underlying fs of a stacked fs (e.g. overlayfs). 1368 * 1369 * Also don't allow the combination of FUSE_PASSTHROUGH 1370 * and FUSE_WRITEBACK_CACHE, current design doesn't handle 1371 * them together. 1372 */ 1373 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && 1374 (flags & FUSE_PASSTHROUGH) && 1375 arg->max_stack_depth > 0 && 1376 arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH && 1377 !(flags & FUSE_WRITEBACK_CACHE)) { 1378 fc->passthrough = 1; 1379 fc->max_stack_depth = arg->max_stack_depth; 1380 fm->sb->s_stack_depth = arg->max_stack_depth; 1381 } 1382 if (flags & FUSE_NO_EXPORT_SUPPORT) 1383 fm->sb->s_export_op = &fuse_export_fid_operations; 1384 if (flags & FUSE_ALLOW_IDMAP) { 1385 if (fc->default_permissions) 1386 fm->sb->s_iflags &= ~SB_I_NOIDMAP; 1387 else 1388 ok = false; 1389 } 1390 } else { 1391 ra_pages = fc->max_read / PAGE_SIZE; 1392 fc->no_lock = 1; 1393 fc->no_flock = 1; 1394 } 1395 1396 fm->sb->s_bdi->ra_pages = 1397 min(fm->sb->s_bdi->ra_pages, ra_pages); 1398 fc->minor = arg->minor; 1399 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1400 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1401 fc->conn_init = 1; 1402 } 1403 kfree(ia); 1404 1405 if (!ok) { 1406 fc->conn_init = 0; 1407 fc->conn_error = 1; 1408 } 1409 1410 fuse_set_initialized(fc); 1411 wake_up_all(&fc->blocked_waitq); 1412 } 1413 1414 void fuse_send_init(struct fuse_mount *fm) 1415 { 1416 struct fuse_init_args *ia; 1417 u64 flags; 1418 1419 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); 1420 1421 ia->in.major = FUSE_KERNEL_VERSION; 1422 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1423 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1424 flags = 1425 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1426 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1427 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1428 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1429 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1430 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1431 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1432 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1433 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | 1434 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | 1435 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | 1436 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | 1437 FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP; 1438 #ifdef CONFIG_FUSE_DAX 1439 if (fm->fc->dax) 1440 flags |= FUSE_MAP_ALIGNMENT; 1441 if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) 1442 flags |= FUSE_HAS_INODE_DAX; 1443 #endif 1444 if (fm->fc->auto_submounts) 1445 flags |= FUSE_SUBMOUNTS; 1446 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1447 flags |= FUSE_PASSTHROUGH; 1448 1449 ia->in.flags = flags; 1450 ia->in.flags2 = flags >> 32; 1451 1452 ia->args.opcode = FUSE_INIT; 1453 ia->args.in_numargs = 1; 1454 ia->args.in_args[0].size = sizeof(ia->in); 1455 ia->args.in_args[0].value = &ia->in; 1456 ia->args.out_numargs = 1; 1457 /* Variable length argument used for backward compatibility 1458 with interface version < 7.5. Rest of init_out is zeroed 1459 by do_get_request(), so a short reply is not a problem */ 1460 ia->args.out_argvar = true; 1461 ia->args.out_args[0].size = sizeof(ia->out); 1462 ia->args.out_args[0].value = &ia->out; 1463 ia->args.force = true; 1464 ia->args.nocreds = true; 1465 ia->args.end = process_init_reply; 1466 1467 if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1468 process_init_reply(fm, &ia->args, -ENOTCONN); 1469 } 1470 EXPORT_SYMBOL_GPL(fuse_send_init); 1471 1472 void fuse_free_conn(struct fuse_conn *fc) 1473 { 1474 WARN_ON(!list_empty(&fc->devices)); 1475 kfree(fc); 1476 } 1477 EXPORT_SYMBOL_GPL(fuse_free_conn); 1478 1479 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1480 { 1481 int err; 1482 char *suffix = ""; 1483 1484 if (sb->s_bdev) { 1485 suffix = "-fuseblk"; 1486 /* 1487 * sb->s_bdi points to blkdev's bdi however we want to redirect 1488 * it to our private bdi... 1489 */ 1490 bdi_put(sb->s_bdi); 1491 sb->s_bdi = &noop_backing_dev_info; 1492 } 1493 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1494 MINOR(fc->dev), suffix); 1495 if (err) 1496 return err; 1497 1498 /* fuse does it's own writeback accounting */ 1499 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; 1500 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1501 1502 /* 1503 * For a single fuse filesystem use max 1% of dirty + 1504 * writeback threshold. 1505 * 1506 * This gives about 1M of write buffer for memory maps on a 1507 * machine with 1G and 10% dirty_ratio, which should be more 1508 * than enough. 1509 * 1510 * Privileged users can raise it by writing to 1511 * 1512 * /sys/class/bdi/<bdi>/max_ratio 1513 */ 1514 bdi_set_max_ratio(sb->s_bdi, 1); 1515 1516 return 0; 1517 } 1518 1519 struct fuse_dev *fuse_dev_alloc(void) 1520 { 1521 struct fuse_dev *fud; 1522 struct list_head *pq; 1523 1524 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1525 if (!fud) 1526 return NULL; 1527 1528 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 1529 if (!pq) { 1530 kfree(fud); 1531 return NULL; 1532 } 1533 1534 fud->pq.processing = pq; 1535 fuse_pqueue_init(&fud->pq); 1536 1537 return fud; 1538 } 1539 EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1540 1541 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) 1542 { 1543 fud->fc = fuse_conn_get(fc); 1544 spin_lock(&fc->lock); 1545 list_add_tail(&fud->entry, &fc->devices); 1546 spin_unlock(&fc->lock); 1547 } 1548 EXPORT_SYMBOL_GPL(fuse_dev_install); 1549 1550 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) 1551 { 1552 struct fuse_dev *fud; 1553 1554 fud = fuse_dev_alloc(); 1555 if (!fud) 1556 return NULL; 1557 1558 fuse_dev_install(fud, fc); 1559 return fud; 1560 } 1561 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); 1562 1563 void fuse_dev_free(struct fuse_dev *fud) 1564 { 1565 struct fuse_conn *fc = fud->fc; 1566 1567 if (fc) { 1568 spin_lock(&fc->lock); 1569 list_del(&fud->entry); 1570 spin_unlock(&fc->lock); 1571 1572 fuse_conn_put(fc); 1573 } 1574 kfree(fud->pq.processing); 1575 kfree(fud); 1576 } 1577 EXPORT_SYMBOL_GPL(fuse_dev_free); 1578 1579 static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1580 const struct fuse_inode *fi) 1581 { 1582 struct timespec64 atime = inode_get_atime(&fi->inode); 1583 struct timespec64 mtime = inode_get_mtime(&fi->inode); 1584 struct timespec64 ctime = inode_get_ctime(&fi->inode); 1585 1586 *attr = (struct fuse_attr){ 1587 .ino = fi->inode.i_ino, 1588 .size = fi->inode.i_size, 1589 .blocks = fi->inode.i_blocks, 1590 .atime = atime.tv_sec, 1591 .mtime = mtime.tv_sec, 1592 .ctime = ctime.tv_sec, 1593 .atimensec = atime.tv_nsec, 1594 .mtimensec = mtime.tv_nsec, 1595 .ctimensec = ctime.tv_nsec, 1596 .mode = fi->inode.i_mode, 1597 .nlink = fi->inode.i_nlink, 1598 .uid = __kuid_val(fi->inode.i_uid), 1599 .gid = __kgid_val(fi->inode.i_gid), 1600 .rdev = fi->inode.i_rdev, 1601 .blksize = 1u << fi->inode.i_blkbits, 1602 }; 1603 } 1604 1605 static void fuse_sb_defaults(struct super_block *sb) 1606 { 1607 sb->s_magic = FUSE_SUPER_MAGIC; 1608 sb->s_op = &fuse_super_operations; 1609 sb->s_xattr = fuse_xattr_handlers; 1610 sb->s_maxbytes = MAX_LFS_FILESIZE; 1611 sb->s_time_gran = 1; 1612 sb->s_export_op = &fuse_export_operations; 1613 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1614 sb->s_iflags |= SB_I_NOIDMAP; 1615 if (sb->s_user_ns != &init_user_ns) 1616 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1617 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1618 } 1619 1620 static int fuse_fill_super_submount(struct super_block *sb, 1621 struct fuse_inode *parent_fi) 1622 { 1623 struct fuse_mount *fm = get_fuse_mount_super(sb); 1624 struct super_block *parent_sb = parent_fi->inode.i_sb; 1625 struct fuse_attr root_attr; 1626 struct inode *root; 1627 struct fuse_submount_lookup *sl; 1628 struct fuse_inode *fi; 1629 1630 fuse_sb_defaults(sb); 1631 fm->sb = sb; 1632 1633 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1634 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1635 1636 sb->s_xattr = parent_sb->s_xattr; 1637 sb->s_export_op = parent_sb->s_export_op; 1638 sb->s_time_gran = parent_sb->s_time_gran; 1639 sb->s_blocksize = parent_sb->s_blocksize; 1640 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1641 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1642 if (parent_sb->s_subtype && !sb->s_subtype) 1643 return -ENOMEM; 1644 1645 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1646 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, 1647 fuse_get_evict_ctr(fm->fc)); 1648 /* 1649 * This inode is just a duplicate, so it is not looked up and 1650 * its nlookup should not be incremented. fuse_iget() does 1651 * that, though, so undo it here. 1652 */ 1653 fi = get_fuse_inode(root); 1654 fi->nlookup--; 1655 1656 sb->s_d_op = &fuse_dentry_operations; 1657 sb->s_root = d_make_root(root); 1658 if (!sb->s_root) 1659 return -ENOMEM; 1660 1661 /* 1662 * Grab the parent's submount_lookup pointer and take a 1663 * reference on the shared nlookup from the parent. This is to 1664 * prevent the last forget for this nodeid from getting 1665 * triggered until all users have finished with it. 1666 */ 1667 sl = parent_fi->submount_lookup; 1668 WARN_ON(!sl); 1669 if (sl) { 1670 refcount_inc(&sl->count); 1671 fi->submount_lookup = sl; 1672 } 1673 1674 return 0; 1675 } 1676 1677 /* Filesystem context private data holds the FUSE inode of the mount point */ 1678 static int fuse_get_tree_submount(struct fs_context *fsc) 1679 { 1680 struct fuse_mount *fm; 1681 struct fuse_inode *mp_fi = fsc->fs_private; 1682 struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); 1683 struct super_block *sb; 1684 int err; 1685 1686 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1687 if (!fm) 1688 return -ENOMEM; 1689 1690 fm->fc = fuse_conn_get(fc); 1691 fsc->s_fs_info = fm; 1692 sb = sget_fc(fsc, NULL, set_anon_super_fc); 1693 if (fsc->s_fs_info) 1694 fuse_mount_destroy(fm); 1695 if (IS_ERR(sb)) 1696 return PTR_ERR(sb); 1697 1698 /* Initialize superblock, making @mp_fi its root */ 1699 err = fuse_fill_super_submount(sb, mp_fi); 1700 if (err) { 1701 deactivate_locked_super(sb); 1702 return err; 1703 } 1704 1705 down_write(&fc->killsb); 1706 list_add_tail(&fm->fc_entry, &fc->mounts); 1707 up_write(&fc->killsb); 1708 1709 sb->s_flags |= SB_ACTIVE; 1710 fsc->root = dget(sb->s_root); 1711 1712 return 0; 1713 } 1714 1715 static const struct fs_context_operations fuse_context_submount_ops = { 1716 .get_tree = fuse_get_tree_submount, 1717 }; 1718 1719 int fuse_init_fs_context_submount(struct fs_context *fsc) 1720 { 1721 fsc->ops = &fuse_context_submount_ops; 1722 return 0; 1723 } 1724 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); 1725 1726 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1727 { 1728 struct fuse_dev *fud = NULL; 1729 struct fuse_mount *fm = get_fuse_mount_super(sb); 1730 struct fuse_conn *fc = fm->fc; 1731 struct inode *root; 1732 struct dentry *root_dentry; 1733 int err; 1734 1735 err = -EINVAL; 1736 if (sb->s_flags & SB_MANDLOCK) 1737 goto err; 1738 1739 rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc()); 1740 fuse_sb_defaults(sb); 1741 1742 if (ctx->is_bdev) { 1743 #ifdef CONFIG_BLOCK 1744 err = -EINVAL; 1745 if (!sb_set_blocksize(sb, ctx->blksize)) 1746 goto err; 1747 #endif 1748 } else { 1749 sb->s_blocksize = PAGE_SIZE; 1750 sb->s_blocksize_bits = PAGE_SHIFT; 1751 } 1752 1753 sb->s_subtype = ctx->subtype; 1754 ctx->subtype = NULL; 1755 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1756 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); 1757 if (err) 1758 goto err; 1759 } 1760 1761 if (ctx->fudptr) { 1762 err = -ENOMEM; 1763 fud = fuse_dev_alloc_install(fc); 1764 if (!fud) 1765 goto err_free_dax; 1766 } 1767 1768 fc->dev = sb->s_dev; 1769 fm->sb = sb; 1770 err = fuse_bdi_init(fc, sb); 1771 if (err) 1772 goto err_dev_free; 1773 1774 /* Handle umasking inside the fuse code */ 1775 if (sb->s_flags & SB_POSIXACL) 1776 fc->dont_mask = 1; 1777 sb->s_flags |= SB_POSIXACL; 1778 1779 fc->default_permissions = ctx->default_permissions; 1780 fc->allow_other = ctx->allow_other; 1781 fc->user_id = ctx->user_id; 1782 fc->group_id = ctx->group_id; 1783 fc->legacy_opts_show = ctx->legacy_opts_show; 1784 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1785 fc->destroy = ctx->destroy; 1786 fc->no_control = ctx->no_control; 1787 fc->no_force_umount = ctx->no_force_umount; 1788 1789 err = -ENOMEM; 1790 root = fuse_get_root_inode(sb, ctx->rootmode); 1791 sb->s_d_op = &fuse_root_dentry_operations; 1792 root_dentry = d_make_root(root); 1793 if (!root_dentry) 1794 goto err_dev_free; 1795 /* Root dentry doesn't have .d_revalidate */ 1796 sb->s_d_op = &fuse_dentry_operations; 1797 1798 mutex_lock(&fuse_mutex); 1799 err = -EINVAL; 1800 if (ctx->fudptr && *ctx->fudptr) 1801 goto err_unlock; 1802 1803 err = fuse_ctl_add_conn(fc); 1804 if (err) 1805 goto err_unlock; 1806 1807 list_add_tail(&fc->entry, &fuse_conn_list); 1808 sb->s_root = root_dentry; 1809 if (ctx->fudptr) 1810 *ctx->fudptr = fud; 1811 mutex_unlock(&fuse_mutex); 1812 return 0; 1813 1814 err_unlock: 1815 mutex_unlock(&fuse_mutex); 1816 dput(root_dentry); 1817 err_dev_free: 1818 if (fud) 1819 fuse_dev_free(fud); 1820 err_free_dax: 1821 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1822 fuse_dax_conn_free(fc); 1823 err: 1824 return err; 1825 } 1826 EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1827 1828 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1829 { 1830 struct fuse_fs_context *ctx = fsc->fs_private; 1831 int err; 1832 1833 if (!ctx->file || !ctx->rootmode_present || 1834 !ctx->user_id_present || !ctx->group_id_present) 1835 return -EINVAL; 1836 1837 /* 1838 * Require mount to happen from the same user namespace which 1839 * opened /dev/fuse to prevent potential attacks. 1840 */ 1841 if ((ctx->file->f_op != &fuse_dev_operations) || 1842 (ctx->file->f_cred->user_ns != sb->s_user_ns)) 1843 return -EINVAL; 1844 ctx->fudptr = &ctx->file->private_data; 1845 1846 err = fuse_fill_super_common(sb, ctx); 1847 if (err) 1848 return err; 1849 /* file->private_data shall be visible on all CPUs after this */ 1850 smp_mb(); 1851 fuse_send_init(get_fuse_mount_super(sb)); 1852 return 0; 1853 } 1854 1855 /* 1856 * This is the path where user supplied an already initialized fuse dev. In 1857 * this case never create a new super if the old one is gone. 1858 */ 1859 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) 1860 { 1861 return -ENOTCONN; 1862 } 1863 1864 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) 1865 { 1866 1867 return fsc->sget_key == get_fuse_conn_super(sb); 1868 } 1869 1870 static int fuse_get_tree(struct fs_context *fsc) 1871 { 1872 struct fuse_fs_context *ctx = fsc->fs_private; 1873 struct fuse_dev *fud; 1874 struct fuse_conn *fc; 1875 struct fuse_mount *fm; 1876 struct super_block *sb; 1877 int err; 1878 1879 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1880 if (!fc) 1881 return -ENOMEM; 1882 1883 fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1884 if (!fm) { 1885 kfree(fc); 1886 return -ENOMEM; 1887 } 1888 1889 fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); 1890 fc->release = fuse_free_conn; 1891 1892 fsc->s_fs_info = fm; 1893 1894 if (ctx->fd_present) 1895 ctx->file = fget(ctx->fd); 1896 1897 if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { 1898 err = get_tree_bdev(fsc, fuse_fill_super); 1899 goto out; 1900 } 1901 /* 1902 * While block dev mount can be initialized with a dummy device fd 1903 * (found by device name), normal fuse mounts can't 1904 */ 1905 err = -EINVAL; 1906 if (!ctx->file) 1907 goto out; 1908 1909 /* 1910 * Allow creating a fuse mount with an already initialized fuse 1911 * connection 1912 */ 1913 fud = READ_ONCE(ctx->file->private_data); 1914 if (ctx->file->f_op == &fuse_dev_operations && fud) { 1915 fsc->sget_key = fud->fc; 1916 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); 1917 err = PTR_ERR_OR_ZERO(sb); 1918 if (!IS_ERR(sb)) 1919 fsc->root = dget(sb->s_root); 1920 } else { 1921 err = get_tree_nodev(fsc, fuse_fill_super); 1922 } 1923 out: 1924 if (fsc->s_fs_info) 1925 fuse_mount_destroy(fm); 1926 if (ctx->file) 1927 fput(ctx->file); 1928 return err; 1929 } 1930 1931 static const struct fs_context_operations fuse_context_ops = { 1932 .free = fuse_free_fsc, 1933 .parse_param = fuse_parse_param, 1934 .reconfigure = fuse_reconfigure, 1935 .get_tree = fuse_get_tree, 1936 }; 1937 1938 /* 1939 * Set up the filesystem mount context. 1940 */ 1941 static int fuse_init_fs_context(struct fs_context *fsc) 1942 { 1943 struct fuse_fs_context *ctx; 1944 1945 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1946 if (!ctx) 1947 return -ENOMEM; 1948 1949 ctx->max_read = ~0; 1950 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1951 ctx->legacy_opts_show = true; 1952 1953 #ifdef CONFIG_BLOCK 1954 if (fsc->fs_type == &fuseblk_fs_type) { 1955 ctx->is_bdev = true; 1956 ctx->destroy = true; 1957 } 1958 #endif 1959 1960 fsc->fs_private = ctx; 1961 fsc->ops = &fuse_context_ops; 1962 return 0; 1963 } 1964 1965 bool fuse_mount_remove(struct fuse_mount *fm) 1966 { 1967 struct fuse_conn *fc = fm->fc; 1968 bool last = false; 1969 1970 down_write(&fc->killsb); 1971 list_del_init(&fm->fc_entry); 1972 if (list_empty(&fc->mounts)) 1973 last = true; 1974 up_write(&fc->killsb); 1975 1976 return last; 1977 } 1978 EXPORT_SYMBOL_GPL(fuse_mount_remove); 1979 1980 void fuse_conn_destroy(struct fuse_mount *fm) 1981 { 1982 struct fuse_conn *fc = fm->fc; 1983 1984 if (fc->destroy) 1985 fuse_send_destroy(fm); 1986 1987 fuse_abort_conn(fc); 1988 fuse_wait_aborted(fc); 1989 1990 if (!list_empty(&fc->entry)) { 1991 mutex_lock(&fuse_mutex); 1992 list_del(&fc->entry); 1993 fuse_ctl_remove_conn(fc); 1994 mutex_unlock(&fuse_mutex); 1995 } 1996 } 1997 EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1998 1999 static void fuse_sb_destroy(struct super_block *sb) 2000 { 2001 struct fuse_mount *fm = get_fuse_mount_super(sb); 2002 bool last; 2003 2004 if (sb->s_root) { 2005 last = fuse_mount_remove(fm); 2006 if (last) 2007 fuse_conn_destroy(fm); 2008 } 2009 } 2010 2011 void fuse_mount_destroy(struct fuse_mount *fm) 2012 { 2013 fuse_conn_put(fm->fc); 2014 kfree_rcu(fm, rcu); 2015 } 2016 EXPORT_SYMBOL(fuse_mount_destroy); 2017 2018 static void fuse_kill_sb_anon(struct super_block *sb) 2019 { 2020 fuse_sb_destroy(sb); 2021 kill_anon_super(sb); 2022 fuse_mount_destroy(get_fuse_mount_super(sb)); 2023 } 2024 2025 static struct file_system_type fuse_fs_type = { 2026 .owner = THIS_MODULE, 2027 .name = "fuse", 2028 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 2029 .init_fs_context = fuse_init_fs_context, 2030 .parameters = fuse_fs_parameters, 2031 .kill_sb = fuse_kill_sb_anon, 2032 }; 2033 MODULE_ALIAS_FS("fuse"); 2034 2035 #ifdef CONFIG_BLOCK 2036 static void fuse_kill_sb_blk(struct super_block *sb) 2037 { 2038 fuse_sb_destroy(sb); 2039 kill_block_super(sb); 2040 fuse_mount_destroy(get_fuse_mount_super(sb)); 2041 } 2042 2043 static struct file_system_type fuseblk_fs_type = { 2044 .owner = THIS_MODULE, 2045 .name = "fuseblk", 2046 .init_fs_context = fuse_init_fs_context, 2047 .parameters = fuse_fs_parameters, 2048 .kill_sb = fuse_kill_sb_blk, 2049 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, 2050 }; 2051 MODULE_ALIAS_FS("fuseblk"); 2052 2053 static inline int register_fuseblk(void) 2054 { 2055 return register_filesystem(&fuseblk_fs_type); 2056 } 2057 2058 static inline void unregister_fuseblk(void) 2059 { 2060 unregister_filesystem(&fuseblk_fs_type); 2061 } 2062 #else 2063 static inline int register_fuseblk(void) 2064 { 2065 return 0; 2066 } 2067 2068 static inline void unregister_fuseblk(void) 2069 { 2070 } 2071 #endif 2072 2073 static void fuse_inode_init_once(void *foo) 2074 { 2075 struct inode *inode = foo; 2076 2077 inode_init_once(inode); 2078 } 2079 2080 static int __init fuse_fs_init(void) 2081 { 2082 int err; 2083 2084 fuse_inode_cachep = kmem_cache_create("fuse_inode", 2085 sizeof(struct fuse_inode), 0, 2086 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 2087 fuse_inode_init_once); 2088 err = -ENOMEM; 2089 if (!fuse_inode_cachep) 2090 goto out; 2091 2092 err = register_fuseblk(); 2093 if (err) 2094 goto out2; 2095 2096 err = register_filesystem(&fuse_fs_type); 2097 if (err) 2098 goto out3; 2099 2100 err = fuse_sysctl_register(); 2101 if (err) 2102 goto out4; 2103 2104 return 0; 2105 2106 out4: 2107 unregister_filesystem(&fuse_fs_type); 2108 out3: 2109 unregister_fuseblk(); 2110 out2: 2111 kmem_cache_destroy(fuse_inode_cachep); 2112 out: 2113 return err; 2114 } 2115 2116 static void fuse_fs_cleanup(void) 2117 { 2118 fuse_sysctl_unregister(); 2119 unregister_filesystem(&fuse_fs_type); 2120 unregister_fuseblk(); 2121 2122 /* 2123 * Make sure all delayed rcu free inodes are flushed before we 2124 * destroy cache. 2125 */ 2126 rcu_barrier(); 2127 kmem_cache_destroy(fuse_inode_cachep); 2128 } 2129 2130 static struct kobject *fuse_kobj; 2131 2132 static int fuse_sysfs_init(void) 2133 { 2134 int err; 2135 2136 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 2137 if (!fuse_kobj) { 2138 err = -ENOMEM; 2139 goto out_err; 2140 } 2141 2142 err = sysfs_create_mount_point(fuse_kobj, "connections"); 2143 if (err) 2144 goto out_fuse_unregister; 2145 2146 return 0; 2147 2148 out_fuse_unregister: 2149 kobject_put(fuse_kobj); 2150 out_err: 2151 return err; 2152 } 2153 2154 static void fuse_sysfs_cleanup(void) 2155 { 2156 sysfs_remove_mount_point(fuse_kobj, "connections"); 2157 kobject_put(fuse_kobj); 2158 } 2159 2160 static int __init fuse_init(void) 2161 { 2162 int res; 2163 2164 pr_info("init (API version %i.%i)\n", 2165 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 2166 2167 INIT_LIST_HEAD(&fuse_conn_list); 2168 res = fuse_fs_init(); 2169 if (res) 2170 goto err; 2171 2172 res = fuse_dev_init(); 2173 if (res) 2174 goto err_fs_cleanup; 2175 2176 res = fuse_sysfs_init(); 2177 if (res) 2178 goto err_dev_cleanup; 2179 2180 res = fuse_ctl_init(); 2181 if (res) 2182 goto err_sysfs_cleanup; 2183 2184 sanitize_global_limit(&max_user_bgreq); 2185 sanitize_global_limit(&max_user_congthresh); 2186 2187 return 0; 2188 2189 err_sysfs_cleanup: 2190 fuse_sysfs_cleanup(); 2191 err_dev_cleanup: 2192 fuse_dev_cleanup(); 2193 err_fs_cleanup: 2194 fuse_fs_cleanup(); 2195 err: 2196 return res; 2197 } 2198 2199 static void __exit fuse_exit(void) 2200 { 2201 pr_debug("exit\n"); 2202 2203 fuse_ctl_cleanup(); 2204 fuse_sysfs_cleanup(); 2205 fuse_fs_cleanup(); 2206 fuse_dev_cleanup(); 2207 } 2208 2209 module_init(fuse_init); 2210 module_exit(fuse_exit); 2211