1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 #include "dev_uring_i.h" 11 12 #include <linux/dax.h> 13 #include <linux/pagemap.h> 14 #include <linux/slab.h> 15 #include <linux/file.h> 16 #include <linux/seq_file.h> 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/moduleparam.h> 20 #include <linux/fs_context.h> 21 #include <linux/fs_parser.h> 22 #include <linux/statfs.h> 23 #include <linux/random.h> 24 #include <linux/sched.h> 25 #include <linux/exportfs.h> 26 #include <linux/posix_acl.h> 27 #include <linux/pid_namespace.h> 28 #include <uapi/linux/magic.h> 29 30 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 31 MODULE_DESCRIPTION("Filesystem in Userspace"); 32 MODULE_LICENSE("GPL"); 33 34 static struct kmem_cache *fuse_inode_cachep; 35 struct list_head fuse_conn_list; 36 DEFINE_MUTEX(fuse_mutex); 37 38 static int set_global_limit(const char *val, const struct kernel_param *kp); 39 40 unsigned int fuse_max_pages_limit = 256; 41 /* default is no timeout */ 42 unsigned int fuse_default_req_timeout; 43 unsigned int fuse_max_req_timeout; 44 45 unsigned int max_user_bgreq; 46 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 47 &max_user_bgreq, 0644); 48 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 49 MODULE_PARM_DESC(max_user_bgreq, 50 "Global limit for the maximum number of backgrounded requests an " 51 "unprivileged user can set"); 52 53 unsigned int max_user_congthresh; 54 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 55 &max_user_congthresh, 0644); 56 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 57 MODULE_PARM_DESC(max_user_congthresh, 58 "Global limit for the maximum congestion threshold an " 59 "unprivileged user can set"); 60 61 #define FUSE_DEFAULT_BLKSIZE 512 62 63 /** Maximum number of outstanding background requests */ 64 #define FUSE_DEFAULT_MAX_BACKGROUND 12 65 66 /** Congestion starts at 75% of maximum */ 67 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 68 69 #ifdef CONFIG_BLOCK 70 static struct file_system_type fuseblk_fs_type; 71 #endif 72 73 struct fuse_forget_link *fuse_alloc_forget(void) 74 { 75 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); 76 } 77 78 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) 79 { 80 struct fuse_submount_lookup *sl; 81 82 sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); 83 if (!sl) 84 return NULL; 85 sl->forget = fuse_alloc_forget(); 86 if (!sl->forget) 87 goto out_free; 88 89 return sl; 90 91 out_free: 92 kfree(sl); 93 return NULL; 94 } 95 96 static struct inode *fuse_alloc_inode(struct super_block *sb) 97 { 98 struct fuse_inode *fi; 99 100 fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL); 101 if (!fi) 102 return NULL; 103 104 fi->i_time = 0; 105 fi->inval_mask = ~0; 106 fi->nodeid = 0; 107 fi->nlookup = 0; 108 fi->attr_version = 0; 109 fi->orig_ino = 0; 110 fi->state = 0; 111 fi->submount_lookup = NULL; 112 mutex_init(&fi->mutex); 113 spin_lock_init(&fi->lock); 114 fi->forget = fuse_alloc_forget(); 115 if (!fi->forget) 116 goto out_free; 117 118 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 119 goto out_free_forget; 120 121 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 122 fuse_inode_backing_set(fi, NULL); 123 124 return &fi->inode; 125 126 out_free_forget: 127 kfree(fi->forget); 128 out_free: 129 kmem_cache_free(fuse_inode_cachep, fi); 130 return NULL; 131 } 132 133 static void fuse_free_inode(struct inode *inode) 134 { 135 struct fuse_inode *fi = get_fuse_inode(inode); 136 137 mutex_destroy(&fi->mutex); 138 kfree(fi->forget); 139 #ifdef CONFIG_FUSE_DAX 140 kfree(fi->dax); 141 #endif 142 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 143 fuse_backing_put(fuse_inode_backing(fi)); 144 145 kmem_cache_free(fuse_inode_cachep, fi); 146 } 147 148 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, 149 struct fuse_submount_lookup *sl) 150 { 151 if (!refcount_dec_and_test(&sl->count)) 152 return; 153 154 fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); 155 sl->forget = NULL; 156 kfree(sl); 157 } 158 159 static void fuse_evict_inode(struct inode *inode) 160 { 161 struct fuse_inode *fi = get_fuse_inode(inode); 162 163 /* Will write inode on close/munmap and in all other dirtiers */ 164 WARN_ON(inode->i_state & I_DIRTY_INODE); 165 166 if (FUSE_IS_DAX(inode)) 167 dax_break_layout_final(inode); 168 169 truncate_inode_pages_final(&inode->i_data); 170 clear_inode(inode); 171 if (inode->i_sb->s_flags & SB_ACTIVE) { 172 struct fuse_conn *fc = get_fuse_conn(inode); 173 174 if (FUSE_IS_DAX(inode)) 175 fuse_dax_inode_cleanup(inode); 176 if (fi->nlookup) { 177 fuse_queue_forget(fc, fi->forget, fi->nodeid, 178 fi->nlookup); 179 fi->forget = NULL; 180 } 181 182 if (fi->submount_lookup) { 183 fuse_cleanup_submount_lookup(fc, fi->submount_lookup); 184 fi->submount_lookup = NULL; 185 } 186 /* 187 * Evict of non-deleted inode may race with outstanding 188 * LOOKUP/READDIRPLUS requests and result in inconsistency when 189 * the request finishes. Deal with that here by bumping a 190 * counter that can be compared to the starting value. 191 */ 192 if (inode->i_nlink > 0) 193 atomic64_inc(&fc->evict_ctr); 194 } 195 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 196 WARN_ON(fi->iocachectr != 0); 197 WARN_ON(!list_empty(&fi->write_files)); 198 WARN_ON(!list_empty(&fi->queued_writes)); 199 } 200 } 201 202 static int fuse_reconfigure(struct fs_context *fsc) 203 { 204 struct super_block *sb = fsc->root->d_sb; 205 206 sync_filesystem(sb); 207 if (fsc->sb_flags & SB_MANDLOCK) 208 return -EINVAL; 209 210 return 0; 211 } 212 213 /* 214 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 215 * so that it will fit. 216 */ 217 static ino_t fuse_squash_ino(u64 ino64) 218 { 219 ino_t ino = (ino_t) ino64; 220 if (sizeof(ino_t) < sizeof(u64)) 221 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 222 return ino; 223 } 224 225 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 226 struct fuse_statx *sx, 227 u64 attr_valid, u32 cache_mask, 228 u64 evict_ctr) 229 { 230 struct fuse_conn *fc = get_fuse_conn(inode); 231 struct fuse_inode *fi = get_fuse_inode(inode); 232 233 lockdep_assert_held(&fi->lock); 234 235 /* 236 * Clear basic stats from invalid mask. 237 * 238 * Don't do this if this is coming from a fuse_iget() call and there 239 * might have been a racing evict which would've invalidated the result 240 * if the attr_version would've been preserved. 241 * 242 * !evict_ctr -> this is create 243 * fi->attr_version != 0 -> this is not a new inode 244 * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request 245 */ 246 if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) 247 set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); 248 249 fi->attr_version = atomic64_inc_return(&fc->attr_version); 250 fi->i_time = attr_valid; 251 252 inode->i_ino = fuse_squash_ino(attr->ino); 253 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 254 set_nlink(inode, attr->nlink); 255 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 256 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 257 inode->i_blocks = attr->blocks; 258 259 /* Sanitize nsecs */ 260 attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); 261 attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); 262 attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); 263 264 inode_set_atime(inode, attr->atime, attr->atimensec); 265 /* mtime from server may be stale due to local buffered write */ 266 if (!(cache_mask & STATX_MTIME)) { 267 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 268 } 269 if (!(cache_mask & STATX_CTIME)) { 270 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 271 } 272 if (sx) { 273 /* Sanitize nsecs */ 274 sx->btime.tv_nsec = 275 min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); 276 277 /* 278 * Btime has been queried, cache is valid (whether or not btime 279 * is available or not) so clear STATX_BTIME from inval_mask. 280 * 281 * Availability of the btime attribute is indicated in 282 * FUSE_I_BTIME 283 */ 284 set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); 285 if (sx->mask & STATX_BTIME) { 286 set_bit(FUSE_I_BTIME, &fi->state); 287 fi->i_btime.tv_sec = sx->btime.tv_sec; 288 fi->i_btime.tv_nsec = sx->btime.tv_nsec; 289 } 290 } 291 292 /* 293 * Don't set the sticky bit in i_mode, unless we want the VFS 294 * to check permissions. This prevents failures due to the 295 * check in may_delete(). 296 */ 297 fi->orig_i_mode = inode->i_mode; 298 if (!fc->default_permissions) 299 inode->i_mode &= ~S_ISVTX; 300 301 fi->orig_ino = attr->ino; 302 303 /* 304 * We are refreshing inode data and it is possible that another 305 * client set suid/sgid or security.capability xattr. So clear 306 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid 307 * was set or if security.capability xattr was set. But we don't 308 * know if security.capability has been set or not. So clear it 309 * anyway. Its less efficient but should be safe. 310 */ 311 inode->i_flags &= ~S_NOSEC; 312 } 313 314 u32 fuse_get_cache_mask(struct inode *inode) 315 { 316 struct fuse_conn *fc = get_fuse_conn(inode); 317 318 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) 319 return 0; 320 321 return STATX_MTIME | STATX_CTIME | STATX_SIZE; 322 } 323 324 static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, 325 struct fuse_statx *sx, u64 attr_valid, 326 u64 attr_version, u64 evict_ctr) 327 { 328 struct fuse_conn *fc = get_fuse_conn(inode); 329 struct fuse_inode *fi = get_fuse_inode(inode); 330 u32 cache_mask; 331 loff_t oldsize; 332 struct timespec64 old_mtime; 333 334 spin_lock(&fi->lock); 335 /* 336 * In case of writeback_cache enabled, writes update mtime, ctime and 337 * may update i_size. In these cases trust the cached value in the 338 * inode. 339 */ 340 cache_mask = fuse_get_cache_mask(inode); 341 if (cache_mask & STATX_SIZE) 342 attr->size = i_size_read(inode); 343 344 if (cache_mask & STATX_MTIME) { 345 attr->mtime = inode_get_mtime_sec(inode); 346 attr->mtimensec = inode_get_mtime_nsec(inode); 347 } 348 if (cache_mask & STATX_CTIME) { 349 attr->ctime = inode_get_ctime_sec(inode); 350 attr->ctimensec = inode_get_ctime_nsec(inode); 351 } 352 353 if ((attr_version != 0 && fi->attr_version > attr_version) || 354 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 355 spin_unlock(&fi->lock); 356 return; 357 } 358 359 old_mtime = inode_get_mtime(inode); 360 fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, 361 evict_ctr); 362 363 oldsize = inode->i_size; 364 /* 365 * In case of writeback_cache enabled, the cached writes beyond EOF 366 * extend local i_size without keeping userspace server in sync. So, 367 * attr->size coming from server can be stale. We cannot trust it. 368 */ 369 if (!(cache_mask & STATX_SIZE)) 370 i_size_write(inode, attr->size); 371 spin_unlock(&fi->lock); 372 373 if (!cache_mask && S_ISREG(inode->i_mode)) { 374 bool inval = false; 375 376 if (oldsize != attr->size) { 377 truncate_pagecache(inode, attr->size); 378 if (!fc->explicit_inval_data) 379 inval = true; 380 } else if (fc->auto_inval_data) { 381 struct timespec64 new_mtime = { 382 .tv_sec = attr->mtime, 383 .tv_nsec = attr->mtimensec, 384 }; 385 386 /* 387 * Auto inval mode also checks and invalidates if mtime 388 * has changed. 389 */ 390 if (!timespec64_equal(&old_mtime, &new_mtime)) 391 inval = true; 392 } 393 394 if (inval) 395 invalidate_inode_pages2(inode->i_mapping); 396 } 397 398 if (IS_ENABLED(CONFIG_FUSE_DAX)) 399 fuse_dax_dontcache(inode, attr->flags); 400 } 401 402 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 403 struct fuse_statx *sx, u64 attr_valid, 404 u64 attr_version) 405 { 406 fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); 407 } 408 409 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, 410 u64 nodeid) 411 { 412 sl->nodeid = nodeid; 413 refcount_set(&sl->count, 1); 414 } 415 416 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, 417 struct fuse_conn *fc) 418 { 419 inode->i_mode = attr->mode & S_IFMT; 420 inode->i_size = attr->size; 421 inode_set_mtime(inode, attr->mtime, attr->mtimensec); 422 inode_set_ctime(inode, attr->ctime, attr->ctimensec); 423 if (S_ISREG(inode->i_mode)) { 424 fuse_init_common(inode); 425 fuse_init_file_inode(inode, attr->flags); 426 } else if (S_ISDIR(inode->i_mode)) 427 fuse_init_dir(inode); 428 else if (S_ISLNK(inode->i_mode)) 429 fuse_init_symlink(inode); 430 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 431 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 432 fuse_init_common(inode); 433 init_special_inode(inode, inode->i_mode, 434 new_decode_dev(attr->rdev)); 435 } else 436 BUG(); 437 /* 438 * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL 439 * so they see the exact same behavior as before. 440 */ 441 if (!fc->posix_acl) 442 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 443 } 444 445 static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 446 { 447 u64 nodeid = *(u64 *) _nodeidp; 448 if (get_node_id(inode) == nodeid) 449 return 1; 450 else 451 return 0; 452 } 453 454 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 455 { 456 u64 nodeid = *(u64 *) _nodeidp; 457 get_fuse_inode(inode)->nodeid = nodeid; 458 return 0; 459 } 460 461 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 462 int generation, struct fuse_attr *attr, 463 u64 attr_valid, u64 attr_version, 464 u64 evict_ctr) 465 { 466 struct inode *inode; 467 struct fuse_inode *fi; 468 struct fuse_conn *fc = get_fuse_conn_super(sb); 469 470 /* 471 * Auto mount points get their node id from the submount root, which is 472 * not a unique identifier within this filesystem. 473 * 474 * To avoid conflicts, do not place submount points into the inode hash 475 * table. 476 */ 477 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 478 S_ISDIR(attr->mode)) { 479 struct fuse_inode *fi; 480 481 inode = new_inode(sb); 482 if (!inode) 483 return NULL; 484 485 fuse_init_inode(inode, attr, fc); 486 fi = get_fuse_inode(inode); 487 fi->nodeid = nodeid; 488 fi->submount_lookup = fuse_alloc_submount_lookup(); 489 if (!fi->submount_lookup) { 490 iput(inode); 491 return NULL; 492 } 493 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ 494 fuse_init_submount_lookup(fi->submount_lookup, nodeid); 495 inode->i_flags |= S_AUTOMOUNT; 496 goto done; 497 } 498 499 retry: 500 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 501 if (!inode) 502 return NULL; 503 504 if ((inode->i_state & I_NEW)) { 505 inode->i_flags |= S_NOATIME; 506 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 507 inode->i_flags |= S_NOCMTIME; 508 inode->i_generation = generation; 509 fuse_init_inode(inode, attr, fc); 510 unlock_new_inode(inode); 511 } else if (fuse_stale_inode(inode, generation, attr)) { 512 /* nodeid was reused, any I/O on the old inode should fail */ 513 fuse_make_bad(inode); 514 if (inode != d_inode(sb->s_root)) { 515 remove_inode_hash(inode); 516 iput(inode); 517 goto retry; 518 } 519 } 520 fi = get_fuse_inode(inode); 521 spin_lock(&fi->lock); 522 fi->nlookup++; 523 spin_unlock(&fi->lock); 524 done: 525 fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, 526 evict_ctr); 527 return inode; 528 } 529 530 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 531 struct fuse_mount **fm) 532 { 533 struct fuse_mount *fm_iter; 534 struct inode *inode; 535 536 WARN_ON(!rwsem_is_locked(&fc->killsb)); 537 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 538 if (!fm_iter->sb) 539 continue; 540 541 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 542 if (inode) { 543 if (fm) 544 *fm = fm_iter; 545 return inode; 546 } 547 } 548 549 return NULL; 550 } 551 552 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 553 loff_t offset, loff_t len) 554 { 555 struct fuse_inode *fi; 556 struct inode *inode; 557 pgoff_t pg_start; 558 pgoff_t pg_end; 559 560 inode = fuse_ilookup(fc, nodeid, NULL); 561 if (!inode) 562 return -ENOENT; 563 564 fi = get_fuse_inode(inode); 565 spin_lock(&fi->lock); 566 fi->attr_version = atomic64_inc_return(&fc->attr_version); 567 spin_unlock(&fi->lock); 568 569 fuse_invalidate_attr(inode); 570 forget_all_cached_acls(inode); 571 if (offset >= 0) { 572 pg_start = offset >> PAGE_SHIFT; 573 if (len <= 0) 574 pg_end = -1; 575 else 576 pg_end = (offset + len - 1) >> PAGE_SHIFT; 577 invalidate_inode_pages2_range(inode->i_mapping, 578 pg_start, pg_end); 579 } 580 iput(inode); 581 return 0; 582 } 583 584 bool fuse_lock_inode(struct inode *inode) 585 { 586 bool locked = false; 587 588 if (!get_fuse_conn(inode)->parallel_dirops) { 589 mutex_lock(&get_fuse_inode(inode)->mutex); 590 locked = true; 591 } 592 593 return locked; 594 } 595 596 void fuse_unlock_inode(struct inode *inode, bool locked) 597 { 598 if (locked) 599 mutex_unlock(&get_fuse_inode(inode)->mutex); 600 } 601 602 static void fuse_umount_begin(struct super_block *sb) 603 { 604 struct fuse_conn *fc = get_fuse_conn_super(sb); 605 606 if (fc->no_force_umount) 607 return; 608 609 fuse_abort_conn(fc); 610 611 // Only retire block-device-based superblocks. 612 if (sb->s_bdev != NULL) 613 retire_super(sb); 614 } 615 616 static void fuse_send_destroy(struct fuse_mount *fm) 617 { 618 if (fm->fc->conn_init) { 619 FUSE_ARGS(args); 620 621 args.opcode = FUSE_DESTROY; 622 args.force = true; 623 args.nocreds = true; 624 fuse_simple_request(fm, &args); 625 } 626 } 627 628 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 629 { 630 stbuf->f_type = FUSE_SUPER_MAGIC; 631 stbuf->f_bsize = attr->bsize; 632 stbuf->f_frsize = attr->frsize; 633 stbuf->f_blocks = attr->blocks; 634 stbuf->f_bfree = attr->bfree; 635 stbuf->f_bavail = attr->bavail; 636 stbuf->f_files = attr->files; 637 stbuf->f_ffree = attr->ffree; 638 stbuf->f_namelen = attr->namelen; 639 /* fsid is left zero */ 640 } 641 642 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 643 { 644 struct super_block *sb = dentry->d_sb; 645 struct fuse_mount *fm = get_fuse_mount_super(sb); 646 FUSE_ARGS(args); 647 struct fuse_statfs_out outarg; 648 int err; 649 650 if (!fuse_allow_current_process(fm->fc)) { 651 buf->f_type = FUSE_SUPER_MAGIC; 652 return 0; 653 } 654 655 memset(&outarg, 0, sizeof(outarg)); 656 args.in_numargs = 0; 657 args.opcode = FUSE_STATFS; 658 args.nodeid = get_node_id(d_inode(dentry)); 659 args.out_numargs = 1; 660 args.out_args[0].size = sizeof(outarg); 661 args.out_args[0].value = &outarg; 662 err = fuse_simple_request(fm, &args); 663 if (!err) 664 convert_fuse_statfs(buf, &outarg.st); 665 return err; 666 } 667 668 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) 669 { 670 struct fuse_sync_bucket *bucket; 671 672 bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL); 673 if (bucket) { 674 init_waitqueue_head(&bucket->waitq); 675 /* Initial active count */ 676 atomic_set(&bucket->count, 1); 677 } 678 return bucket; 679 } 680 681 static void fuse_sync_fs_writes(struct fuse_conn *fc) 682 { 683 struct fuse_sync_bucket *bucket, *new_bucket; 684 int count; 685 686 new_bucket = fuse_sync_bucket_alloc(); 687 spin_lock(&fc->lock); 688 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 689 count = atomic_read(&bucket->count); 690 WARN_ON(count < 1); 691 /* No outstanding writes? */ 692 if (count == 1) { 693 spin_unlock(&fc->lock); 694 kfree(new_bucket); 695 return; 696 } 697 698 /* 699 * Completion of new bucket depends on completion of this bucket, so add 700 * one more count. 701 */ 702 atomic_inc(&new_bucket->count); 703 rcu_assign_pointer(fc->curr_bucket, new_bucket); 704 spin_unlock(&fc->lock); 705 /* 706 * Drop initial active count. At this point if all writes in this and 707 * ancestor buckets complete, the count will go to zero and this task 708 * will be woken up. 709 */ 710 atomic_dec(&bucket->count); 711 712 wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); 713 714 /* Drop temp count on descendant bucket */ 715 fuse_sync_bucket_dec(new_bucket); 716 kfree_rcu(bucket, rcu); 717 } 718 719 static int fuse_sync_fs(struct super_block *sb, int wait) 720 { 721 struct fuse_mount *fm = get_fuse_mount_super(sb); 722 struct fuse_conn *fc = fm->fc; 723 struct fuse_syncfs_in inarg; 724 FUSE_ARGS(args); 725 int err; 726 727 /* 728 * Userspace cannot handle the wait == 0 case. Avoid a 729 * gratuitous roundtrip. 730 */ 731 if (!wait) 732 return 0; 733 734 /* The filesystem is being unmounted. Nothing to do. */ 735 if (!sb->s_root) 736 return 0; 737 738 if (!fc->sync_fs) 739 return 0; 740 741 fuse_sync_fs_writes(fc); 742 743 memset(&inarg, 0, sizeof(inarg)); 744 args.in_numargs = 1; 745 args.in_args[0].size = sizeof(inarg); 746 args.in_args[0].value = &inarg; 747 args.opcode = FUSE_SYNCFS; 748 args.nodeid = get_node_id(sb->s_root->d_inode); 749 args.out_numargs = 0; 750 751 err = fuse_simple_request(fm, &args); 752 if (err == -ENOSYS) { 753 fc->sync_fs = 0; 754 err = 0; 755 } 756 757 return err; 758 } 759 760 enum { 761 OPT_SOURCE, 762 OPT_SUBTYPE, 763 OPT_FD, 764 OPT_ROOTMODE, 765 OPT_USER_ID, 766 OPT_GROUP_ID, 767 OPT_DEFAULT_PERMISSIONS, 768 OPT_ALLOW_OTHER, 769 OPT_MAX_READ, 770 OPT_BLKSIZE, 771 OPT_ERR 772 }; 773 774 static const struct fs_parameter_spec fuse_fs_parameters[] = { 775 fsparam_string ("source", OPT_SOURCE), 776 fsparam_u32 ("fd", OPT_FD), 777 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 778 fsparam_uid ("user_id", OPT_USER_ID), 779 fsparam_gid ("group_id", OPT_GROUP_ID), 780 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 781 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 782 fsparam_u32 ("max_read", OPT_MAX_READ), 783 fsparam_u32 ("blksize", OPT_BLKSIZE), 784 fsparam_string ("subtype", OPT_SUBTYPE), 785 {} 786 }; 787 788 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) 789 { 790 struct fs_parse_result result; 791 struct fuse_fs_context *ctx = fsc->fs_private; 792 int opt; 793 kuid_t kuid; 794 kgid_t kgid; 795 796 if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 797 /* 798 * Ignore options coming from mount(MS_REMOUNT) for backward 799 * compatibility. 800 */ 801 if (fsc->oldapi) 802 return 0; 803 804 return invalfc(fsc, "No changes allowed in reconfigure"); 805 } 806 807 opt = fs_parse(fsc, fuse_fs_parameters, param, &result); 808 if (opt < 0) 809 return opt; 810 811 switch (opt) { 812 case OPT_SOURCE: 813 if (fsc->source) 814 return invalfc(fsc, "Multiple sources specified"); 815 fsc->source = param->string; 816 param->string = NULL; 817 break; 818 819 case OPT_SUBTYPE: 820 if (ctx->subtype) 821 return invalfc(fsc, "Multiple subtypes specified"); 822 ctx->subtype = param->string; 823 param->string = NULL; 824 return 0; 825 826 case OPT_FD: 827 ctx->fd = result.uint_32; 828 ctx->fd_present = true; 829 break; 830 831 case OPT_ROOTMODE: 832 if (!fuse_valid_type(result.uint_32)) 833 return invalfc(fsc, "Invalid rootmode"); 834 ctx->rootmode = result.uint_32; 835 ctx->rootmode_present = true; 836 break; 837 838 case OPT_USER_ID: 839 kuid = result.uid; 840 /* 841 * The requested uid must be representable in the 842 * filesystem's idmapping. 843 */ 844 if (!kuid_has_mapping(fsc->user_ns, kuid)) 845 return invalfc(fsc, "Invalid user_id"); 846 ctx->user_id = kuid; 847 ctx->user_id_present = true; 848 break; 849 850 case OPT_GROUP_ID: 851 kgid = result.gid; 852 /* 853 * The requested gid must be representable in the 854 * filesystem's idmapping. 855 */ 856 if (!kgid_has_mapping(fsc->user_ns, kgid)) 857 return invalfc(fsc, "Invalid group_id"); 858 ctx->group_id = kgid; 859 ctx->group_id_present = true; 860 break; 861 862 case OPT_DEFAULT_PERMISSIONS: 863 ctx->default_permissions = true; 864 break; 865 866 case OPT_ALLOW_OTHER: 867 ctx->allow_other = true; 868 break; 869 870 case OPT_MAX_READ: 871 ctx->max_read = result.uint_32; 872 break; 873 874 case OPT_BLKSIZE: 875 if (!ctx->is_bdev) 876 return invalfc(fsc, "blksize only supported for fuseblk"); 877 ctx->blksize = result.uint_32; 878 break; 879 880 default: 881 return -EINVAL; 882 } 883 884 return 0; 885 } 886 887 static void fuse_free_fsc(struct fs_context *fsc) 888 { 889 struct fuse_fs_context *ctx = fsc->fs_private; 890 891 if (ctx) { 892 kfree(ctx->subtype); 893 kfree(ctx); 894 } 895 } 896 897 static int fuse_show_options(struct seq_file *m, struct dentry *root) 898 { 899 struct super_block *sb = root->d_sb; 900 struct fuse_conn *fc = get_fuse_conn_super(sb); 901 902 if (fc->legacy_opts_show) { 903 seq_printf(m, ",user_id=%u", 904 from_kuid_munged(fc->user_ns, fc->user_id)); 905 seq_printf(m, ",group_id=%u", 906 from_kgid_munged(fc->user_ns, fc->group_id)); 907 if (fc->default_permissions) 908 seq_puts(m, ",default_permissions"); 909 if (fc->allow_other) 910 seq_puts(m, ",allow_other"); 911 if (fc->max_read != ~0) 912 seq_printf(m, ",max_read=%u", fc->max_read); 913 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 914 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 915 } 916 #ifdef CONFIG_FUSE_DAX 917 if (fc->dax_mode == FUSE_DAX_ALWAYS) 918 seq_puts(m, ",dax=always"); 919 else if (fc->dax_mode == FUSE_DAX_NEVER) 920 seq_puts(m, ",dax=never"); 921 else if (fc->dax_mode == FUSE_DAX_INODE_USER) 922 seq_puts(m, ",dax=inode"); 923 #endif 924 925 return 0; 926 } 927 928 static void fuse_iqueue_init(struct fuse_iqueue *fiq, 929 const struct fuse_iqueue_ops *ops, 930 void *priv) 931 { 932 memset(fiq, 0, sizeof(struct fuse_iqueue)); 933 spin_lock_init(&fiq->lock); 934 init_waitqueue_head(&fiq->waitq); 935 INIT_LIST_HEAD(&fiq->pending); 936 INIT_LIST_HEAD(&fiq->interrupts); 937 fiq->forget_list_tail = &fiq->forget_list_head; 938 fiq->connected = 1; 939 fiq->ops = ops; 940 fiq->priv = priv; 941 } 942 943 void fuse_pqueue_init(struct fuse_pqueue *fpq) 944 { 945 unsigned int i; 946 947 spin_lock_init(&fpq->lock); 948 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) 949 INIT_LIST_HEAD(&fpq->processing[i]); 950 INIT_LIST_HEAD(&fpq->io); 951 fpq->connected = 1; 952 } 953 954 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 955 struct user_namespace *user_ns, 956 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 957 { 958 memset(fc, 0, sizeof(*fc)); 959 spin_lock_init(&fc->lock); 960 spin_lock_init(&fc->bg_lock); 961 init_rwsem(&fc->killsb); 962 refcount_set(&fc->count, 1); 963 atomic_set(&fc->dev_count, 1); 964 atomic_set(&fc->epoch, 1); 965 init_waitqueue_head(&fc->blocked_waitq); 966 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); 967 INIT_LIST_HEAD(&fc->bg_queue); 968 INIT_LIST_HEAD(&fc->entry); 969 INIT_LIST_HEAD(&fc->devices); 970 atomic_set(&fc->num_waiting, 0); 971 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 972 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 973 atomic64_set(&fc->khctr, 0); 974 fc->polled_files = RB_ROOT; 975 fc->blocked = 0; 976 fc->initialized = 0; 977 fc->connected = 1; 978 atomic64_set(&fc->attr_version, 1); 979 atomic64_set(&fc->evict_ctr, 1); 980 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 981 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 982 fc->user_ns = get_user_ns(user_ns); 983 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 984 fc->max_pages_limit = fuse_max_pages_limit; 985 fc->name_max = FUSE_NAME_LOW_MAX; 986 fc->timeout.req_timeout = 0; 987 988 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 989 fuse_backing_files_init(fc); 990 991 INIT_LIST_HEAD(&fc->mounts); 992 list_add(&fm->fc_entry, &fc->mounts); 993 fm->fc = fc; 994 } 995 EXPORT_SYMBOL_GPL(fuse_conn_init); 996 997 static void delayed_release(struct rcu_head *p) 998 { 999 struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); 1000 1001 fuse_uring_destruct(fc); 1002 1003 put_user_ns(fc->user_ns); 1004 fc->release(fc); 1005 } 1006 1007 void fuse_conn_put(struct fuse_conn *fc) 1008 { 1009 if (refcount_dec_and_test(&fc->count)) { 1010 struct fuse_iqueue *fiq = &fc->iq; 1011 struct fuse_sync_bucket *bucket; 1012 1013 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1014 fuse_dax_conn_free(fc); 1015 if (fc->timeout.req_timeout) 1016 cancel_delayed_work_sync(&fc->timeout.work); 1017 if (fiq->ops->release) 1018 fiq->ops->release(fiq); 1019 put_pid_ns(fc->pid_ns); 1020 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 1021 if (bucket) { 1022 WARN_ON(atomic_read(&bucket->count) != 1); 1023 kfree(bucket); 1024 } 1025 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1026 fuse_backing_files_free(fc); 1027 call_rcu(&fc->rcu, delayed_release); 1028 } 1029 } 1030 EXPORT_SYMBOL_GPL(fuse_conn_put); 1031 1032 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 1033 { 1034 refcount_inc(&fc->count); 1035 return fc; 1036 } 1037 EXPORT_SYMBOL_GPL(fuse_conn_get); 1038 1039 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode) 1040 { 1041 struct fuse_attr attr; 1042 memset(&attr, 0, sizeof(attr)); 1043 1044 attr.mode = mode; 1045 attr.ino = FUSE_ROOT_ID; 1046 attr.nlink = 1; 1047 return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); 1048 } 1049 1050 struct fuse_inode_handle { 1051 u64 nodeid; 1052 u32 generation; 1053 }; 1054 1055 static struct dentry *fuse_get_dentry(struct super_block *sb, 1056 struct fuse_inode_handle *handle) 1057 { 1058 struct fuse_conn *fc = get_fuse_conn_super(sb); 1059 struct inode *inode; 1060 struct dentry *entry; 1061 int err = -ESTALE; 1062 1063 if (handle->nodeid == 0) 1064 goto out_err; 1065 1066 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 1067 if (!inode) { 1068 struct fuse_entry_out outarg; 1069 const struct qstr name = QSTR_INIT(".", 1); 1070 1071 if (!fc->export_support) 1072 goto out_err; 1073 1074 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 1075 &inode); 1076 if (err && err != -ENOENT) 1077 goto out_err; 1078 if (err || !inode) { 1079 err = -ESTALE; 1080 goto out_err; 1081 } 1082 err = -EIO; 1083 if (get_node_id(inode) != handle->nodeid) 1084 goto out_iput; 1085 } 1086 err = -ESTALE; 1087 if (inode->i_generation != handle->generation) 1088 goto out_iput; 1089 1090 entry = d_obtain_alias(inode); 1091 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 1092 fuse_invalidate_entry_cache(entry); 1093 1094 return entry; 1095 1096 out_iput: 1097 iput(inode); 1098 out_err: 1099 return ERR_PTR(err); 1100 } 1101 1102 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 1103 struct inode *parent) 1104 { 1105 int len = parent ? 6 : 3; 1106 u64 nodeid; 1107 u32 generation; 1108 1109 if (*max_len < len) { 1110 *max_len = len; 1111 return FILEID_INVALID; 1112 } 1113 1114 nodeid = get_fuse_inode(inode)->nodeid; 1115 generation = inode->i_generation; 1116 1117 fh[0] = (u32)(nodeid >> 32); 1118 fh[1] = (u32)(nodeid & 0xffffffff); 1119 fh[2] = generation; 1120 1121 if (parent) { 1122 nodeid = get_fuse_inode(parent)->nodeid; 1123 generation = parent->i_generation; 1124 1125 fh[3] = (u32)(nodeid >> 32); 1126 fh[4] = (u32)(nodeid & 0xffffffff); 1127 fh[5] = generation; 1128 } 1129 1130 *max_len = len; 1131 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; 1132 } 1133 1134 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 1135 struct fid *fid, int fh_len, int fh_type) 1136 { 1137 struct fuse_inode_handle handle; 1138 1139 if ((fh_type != FILEID_INO64_GEN && 1140 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) 1141 return NULL; 1142 1143 handle.nodeid = (u64) fid->raw[0] << 32; 1144 handle.nodeid |= (u64) fid->raw[1]; 1145 handle.generation = fid->raw[2]; 1146 return fuse_get_dentry(sb, &handle); 1147 } 1148 1149 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 1150 struct fid *fid, int fh_len, int fh_type) 1151 { 1152 struct fuse_inode_handle parent; 1153 1154 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) 1155 return NULL; 1156 1157 parent.nodeid = (u64) fid->raw[3] << 32; 1158 parent.nodeid |= (u64) fid->raw[4]; 1159 parent.generation = fid->raw[5]; 1160 return fuse_get_dentry(sb, &parent); 1161 } 1162 1163 static struct dentry *fuse_get_parent(struct dentry *child) 1164 { 1165 struct inode *child_inode = d_inode(child); 1166 struct fuse_conn *fc = get_fuse_conn(child_inode); 1167 struct inode *inode; 1168 struct dentry *parent; 1169 struct fuse_entry_out outarg; 1170 int err; 1171 1172 if (!fc->export_support) 1173 return ERR_PTR(-ESTALE); 1174 1175 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 1176 &dotdot_name, &outarg, &inode); 1177 if (err) { 1178 if (err == -ENOENT) 1179 return ERR_PTR(-ESTALE); 1180 return ERR_PTR(err); 1181 } 1182 1183 parent = d_obtain_alias(inode); 1184 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 1185 fuse_invalidate_entry_cache(parent); 1186 1187 return parent; 1188 } 1189 1190 /* only for fid encoding; no support for file handle */ 1191 static const struct export_operations fuse_export_fid_operations = { 1192 .encode_fh = fuse_encode_fh, 1193 }; 1194 1195 static const struct export_operations fuse_export_operations = { 1196 .fh_to_dentry = fuse_fh_to_dentry, 1197 .fh_to_parent = fuse_fh_to_parent, 1198 .encode_fh = fuse_encode_fh, 1199 .get_parent = fuse_get_parent, 1200 }; 1201 1202 static const struct super_operations fuse_super_operations = { 1203 .alloc_inode = fuse_alloc_inode, 1204 .free_inode = fuse_free_inode, 1205 .evict_inode = fuse_evict_inode, 1206 .write_inode = fuse_write_inode, 1207 .drop_inode = generic_delete_inode, 1208 .umount_begin = fuse_umount_begin, 1209 .statfs = fuse_statfs, 1210 .sync_fs = fuse_sync_fs, 1211 .show_options = fuse_show_options, 1212 }; 1213 1214 static void sanitize_global_limit(unsigned int *limit) 1215 { 1216 /* 1217 * The default maximum number of async requests is calculated to consume 1218 * 1/2^13 of the total memory, assuming 392 bytes per request. 1219 */ 1220 if (*limit == 0) 1221 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 1222 1223 if (*limit >= 1 << 16) 1224 *limit = (1 << 16) - 1; 1225 } 1226 1227 static int set_global_limit(const char *val, const struct kernel_param *kp) 1228 { 1229 int rv; 1230 1231 rv = param_set_uint(val, kp); 1232 if (rv) 1233 return rv; 1234 1235 sanitize_global_limit((unsigned int *)kp->arg); 1236 1237 return 0; 1238 } 1239 1240 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 1241 { 1242 int cap_sys_admin = capable(CAP_SYS_ADMIN); 1243 1244 if (arg->minor < 13) 1245 return; 1246 1247 sanitize_global_limit(&max_user_bgreq); 1248 sanitize_global_limit(&max_user_congthresh); 1249 1250 spin_lock(&fc->bg_lock); 1251 if (arg->max_background) { 1252 fc->max_background = arg->max_background; 1253 1254 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 1255 fc->max_background = max_user_bgreq; 1256 } 1257 if (arg->congestion_threshold) { 1258 fc->congestion_threshold = arg->congestion_threshold; 1259 1260 if (!cap_sys_admin && 1261 fc->congestion_threshold > max_user_congthresh) 1262 fc->congestion_threshold = max_user_congthresh; 1263 } 1264 spin_unlock(&fc->bg_lock); 1265 } 1266 1267 static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) 1268 { 1269 fc->timeout.req_timeout = secs_to_jiffies(timeout); 1270 INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); 1271 queue_delayed_work(system_wq, &fc->timeout.work, 1272 fuse_timeout_timer_freq); 1273 } 1274 1275 static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) 1276 { 1277 if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) 1278 return; 1279 1280 if (!timeout) 1281 timeout = fuse_default_req_timeout; 1282 1283 if (fuse_max_req_timeout) { 1284 if (timeout) 1285 timeout = min(fuse_max_req_timeout, timeout); 1286 else 1287 timeout = fuse_max_req_timeout; 1288 } 1289 1290 timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); 1291 1292 set_request_timeout(fc, timeout); 1293 } 1294 1295 struct fuse_init_args { 1296 struct fuse_args args; 1297 struct fuse_init_in in; 1298 struct fuse_init_out out; 1299 }; 1300 1301 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 1302 int error) 1303 { 1304 struct fuse_conn *fc = fm->fc; 1305 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 1306 struct fuse_init_out *arg = &ia->out; 1307 bool ok = true; 1308 1309 if (error || arg->major != FUSE_KERNEL_VERSION) 1310 ok = false; 1311 else { 1312 unsigned long ra_pages; 1313 unsigned int timeout = 0; 1314 1315 process_init_limits(fc, arg); 1316 1317 if (arg->minor >= 6) { 1318 u64 flags = arg->flags; 1319 1320 if (flags & FUSE_INIT_EXT) 1321 flags |= (u64) arg->flags2 << 32; 1322 1323 ra_pages = arg->max_readahead / PAGE_SIZE; 1324 if (flags & FUSE_ASYNC_READ) 1325 fc->async_read = 1; 1326 if (!(flags & FUSE_POSIX_LOCKS)) 1327 fc->no_lock = 1; 1328 if (arg->minor >= 17) { 1329 if (!(flags & FUSE_FLOCK_LOCKS)) 1330 fc->no_flock = 1; 1331 } else { 1332 if (!(flags & FUSE_POSIX_LOCKS)) 1333 fc->no_flock = 1; 1334 } 1335 if (flags & FUSE_ATOMIC_O_TRUNC) 1336 fc->atomic_o_trunc = 1; 1337 if (arg->minor >= 9) { 1338 /* LOOKUP has dependency on proto version */ 1339 if (flags & FUSE_EXPORT_SUPPORT) 1340 fc->export_support = 1; 1341 } 1342 if (flags & FUSE_BIG_WRITES) 1343 fc->big_writes = 1; 1344 if (flags & FUSE_DONT_MASK) 1345 fc->dont_mask = 1; 1346 if (flags & FUSE_AUTO_INVAL_DATA) 1347 fc->auto_inval_data = 1; 1348 else if (flags & FUSE_EXPLICIT_INVAL_DATA) 1349 fc->explicit_inval_data = 1; 1350 if (flags & FUSE_DO_READDIRPLUS) { 1351 fc->do_readdirplus = 1; 1352 if (flags & FUSE_READDIRPLUS_AUTO) 1353 fc->readdirplus_auto = 1; 1354 } 1355 if (flags & FUSE_ASYNC_DIO) 1356 fc->async_dio = 1; 1357 if (flags & FUSE_WRITEBACK_CACHE) 1358 fc->writeback_cache = 1; 1359 if (flags & FUSE_PARALLEL_DIROPS) 1360 fc->parallel_dirops = 1; 1361 if (flags & FUSE_HANDLE_KILLPRIV) 1362 fc->handle_killpriv = 1; 1363 if (arg->time_gran && arg->time_gran <= 1000000000) 1364 fm->sb->s_time_gran = arg->time_gran; 1365 if ((flags & FUSE_POSIX_ACL)) { 1366 fc->default_permissions = 1; 1367 fc->posix_acl = 1; 1368 } 1369 if (flags & FUSE_CACHE_SYMLINKS) 1370 fc->cache_symlinks = 1; 1371 if (flags & FUSE_ABORT_ERROR) 1372 fc->abort_err = 1; 1373 if (flags & FUSE_MAX_PAGES) { 1374 fc->max_pages = 1375 min_t(unsigned int, fc->max_pages_limit, 1376 max_t(unsigned int, arg->max_pages, 1)); 1377 1378 /* 1379 * PATH_MAX file names might need two pages for 1380 * ops like rename 1381 */ 1382 if (fc->max_pages > 1) 1383 fc->name_max = FUSE_NAME_MAX; 1384 } 1385 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1386 if (flags & FUSE_MAP_ALIGNMENT && 1387 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1388 ok = false; 1389 } 1390 if (flags & FUSE_HAS_INODE_DAX) 1391 fc->inode_dax = 1; 1392 } 1393 if (flags & FUSE_HANDLE_KILLPRIV_V2) { 1394 fc->handle_killpriv_v2 = 1; 1395 fm->sb->s_flags |= SB_NOSEC; 1396 } 1397 if (flags & FUSE_SETXATTR_EXT) 1398 fc->setxattr_ext = 1; 1399 if (flags & FUSE_SECURITY_CTX) 1400 fc->init_security = 1; 1401 if (flags & FUSE_CREATE_SUPP_GROUP) 1402 fc->create_supp_group = 1; 1403 if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) 1404 fc->direct_io_allow_mmap = 1; 1405 /* 1406 * max_stack_depth is the max stack depth of FUSE fs, 1407 * so it has to be at least 1 to support passthrough 1408 * to backing files. 1409 * 1410 * with max_stack_depth > 1, the backing files can be 1411 * on a stacked fs (e.g. overlayfs) themselves and with 1412 * max_stack_depth == 1, FUSE fs can be stacked as the 1413 * underlying fs of a stacked fs (e.g. overlayfs). 1414 * 1415 * Also don't allow the combination of FUSE_PASSTHROUGH 1416 * and FUSE_WRITEBACK_CACHE, current design doesn't handle 1417 * them together. 1418 */ 1419 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && 1420 (flags & FUSE_PASSTHROUGH) && 1421 arg->max_stack_depth > 0 && 1422 arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH && 1423 !(flags & FUSE_WRITEBACK_CACHE)) { 1424 fc->passthrough = 1; 1425 fc->max_stack_depth = arg->max_stack_depth; 1426 fm->sb->s_stack_depth = arg->max_stack_depth; 1427 } 1428 if (flags & FUSE_NO_EXPORT_SUPPORT) 1429 fm->sb->s_export_op = &fuse_export_fid_operations; 1430 if (flags & FUSE_ALLOW_IDMAP) { 1431 if (fc->default_permissions) 1432 fm->sb->s_iflags &= ~SB_I_NOIDMAP; 1433 else 1434 ok = false; 1435 } 1436 if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) 1437 fc->io_uring = 1; 1438 1439 if (flags & FUSE_REQUEST_TIMEOUT) 1440 timeout = arg->request_timeout; 1441 } else { 1442 ra_pages = fc->max_read / PAGE_SIZE; 1443 fc->no_lock = 1; 1444 fc->no_flock = 1; 1445 } 1446 1447 init_server_timeout(fc, timeout); 1448 1449 fm->sb->s_bdi->ra_pages = 1450 min(fm->sb->s_bdi->ra_pages, ra_pages); 1451 fc->minor = arg->minor; 1452 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1453 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1454 fc->conn_init = 1; 1455 } 1456 kfree(ia); 1457 1458 if (!ok) { 1459 fc->conn_init = 0; 1460 fc->conn_error = 1; 1461 } 1462 1463 fuse_set_initialized(fc); 1464 wake_up_all(&fc->blocked_waitq); 1465 } 1466 1467 void fuse_send_init(struct fuse_mount *fm) 1468 { 1469 struct fuse_init_args *ia; 1470 u64 flags; 1471 1472 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); 1473 1474 ia->in.major = FUSE_KERNEL_VERSION; 1475 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1476 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1477 flags = 1478 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1479 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1480 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1481 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1482 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1483 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1484 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1485 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1486 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | 1487 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | 1488 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | 1489 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | 1490 FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP | 1491 FUSE_REQUEST_TIMEOUT; 1492 #ifdef CONFIG_FUSE_DAX 1493 if (fm->fc->dax) 1494 flags |= FUSE_MAP_ALIGNMENT; 1495 if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) 1496 flags |= FUSE_HAS_INODE_DAX; 1497 #endif 1498 if (fm->fc->auto_submounts) 1499 flags |= FUSE_SUBMOUNTS; 1500 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 1501 flags |= FUSE_PASSTHROUGH; 1502 1503 /* 1504 * This is just an information flag for fuse server. No need to check 1505 * the reply - server is either sending IORING_OP_URING_CMD or not. 1506 */ 1507 if (fuse_uring_enabled()) 1508 flags |= FUSE_OVER_IO_URING; 1509 1510 ia->in.flags = flags; 1511 ia->in.flags2 = flags >> 32; 1512 1513 ia->args.opcode = FUSE_INIT; 1514 ia->args.in_numargs = 1; 1515 ia->args.in_args[0].size = sizeof(ia->in); 1516 ia->args.in_args[0].value = &ia->in; 1517 ia->args.out_numargs = 1; 1518 /* Variable length argument used for backward compatibility 1519 with interface version < 7.5. Rest of init_out is zeroed 1520 by do_get_request(), so a short reply is not a problem */ 1521 ia->args.out_argvar = true; 1522 ia->args.out_args[0].size = sizeof(ia->out); 1523 ia->args.out_args[0].value = &ia->out; 1524 ia->args.force = true; 1525 ia->args.nocreds = true; 1526 ia->args.end = process_init_reply; 1527 1528 if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1529 process_init_reply(fm, &ia->args, -ENOTCONN); 1530 } 1531 EXPORT_SYMBOL_GPL(fuse_send_init); 1532 1533 void fuse_free_conn(struct fuse_conn *fc) 1534 { 1535 WARN_ON(!list_empty(&fc->devices)); 1536 kfree(fc); 1537 } 1538 EXPORT_SYMBOL_GPL(fuse_free_conn); 1539 1540 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1541 { 1542 int err; 1543 char *suffix = ""; 1544 1545 if (sb->s_bdev) { 1546 suffix = "-fuseblk"; 1547 /* 1548 * sb->s_bdi points to blkdev's bdi however we want to redirect 1549 * it to our private bdi... 1550 */ 1551 bdi_put(sb->s_bdi); 1552 sb->s_bdi = &noop_backing_dev_info; 1553 } 1554 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1555 MINOR(fc->dev), suffix); 1556 if (err) 1557 return err; 1558 1559 /* fuse does it's own writeback accounting */ 1560 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; 1561 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1562 1563 /* 1564 * For a single fuse filesystem use max 1% of dirty + 1565 * writeback threshold. 1566 * 1567 * This gives about 1M of write buffer for memory maps on a 1568 * machine with 1G and 10% dirty_ratio, which should be more 1569 * than enough. 1570 * 1571 * Privileged users can raise it by writing to 1572 * 1573 * /sys/class/bdi/<bdi>/max_ratio 1574 */ 1575 bdi_set_max_ratio(sb->s_bdi, 1); 1576 1577 return 0; 1578 } 1579 1580 struct fuse_dev *fuse_dev_alloc(void) 1581 { 1582 struct fuse_dev *fud; 1583 struct list_head *pq; 1584 1585 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1586 if (!fud) 1587 return NULL; 1588 1589 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 1590 if (!pq) { 1591 kfree(fud); 1592 return NULL; 1593 } 1594 1595 fud->pq.processing = pq; 1596 fuse_pqueue_init(&fud->pq); 1597 1598 return fud; 1599 } 1600 EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1601 1602 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) 1603 { 1604 fud->fc = fuse_conn_get(fc); 1605 spin_lock(&fc->lock); 1606 list_add_tail(&fud->entry, &fc->devices); 1607 spin_unlock(&fc->lock); 1608 } 1609 EXPORT_SYMBOL_GPL(fuse_dev_install); 1610 1611 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) 1612 { 1613 struct fuse_dev *fud; 1614 1615 fud = fuse_dev_alloc(); 1616 if (!fud) 1617 return NULL; 1618 1619 fuse_dev_install(fud, fc); 1620 return fud; 1621 } 1622 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); 1623 1624 void fuse_dev_free(struct fuse_dev *fud) 1625 { 1626 struct fuse_conn *fc = fud->fc; 1627 1628 if (fc) { 1629 spin_lock(&fc->lock); 1630 list_del(&fud->entry); 1631 spin_unlock(&fc->lock); 1632 1633 fuse_conn_put(fc); 1634 } 1635 kfree(fud->pq.processing); 1636 kfree(fud); 1637 } 1638 EXPORT_SYMBOL_GPL(fuse_dev_free); 1639 1640 static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1641 const struct fuse_inode *fi) 1642 { 1643 struct timespec64 atime = inode_get_atime(&fi->inode); 1644 struct timespec64 mtime = inode_get_mtime(&fi->inode); 1645 struct timespec64 ctime = inode_get_ctime(&fi->inode); 1646 1647 *attr = (struct fuse_attr){ 1648 .ino = fi->inode.i_ino, 1649 .size = fi->inode.i_size, 1650 .blocks = fi->inode.i_blocks, 1651 .atime = atime.tv_sec, 1652 .mtime = mtime.tv_sec, 1653 .ctime = ctime.tv_sec, 1654 .atimensec = atime.tv_nsec, 1655 .mtimensec = mtime.tv_nsec, 1656 .ctimensec = ctime.tv_nsec, 1657 .mode = fi->inode.i_mode, 1658 .nlink = fi->inode.i_nlink, 1659 .uid = __kuid_val(fi->inode.i_uid), 1660 .gid = __kgid_val(fi->inode.i_gid), 1661 .rdev = fi->inode.i_rdev, 1662 .blksize = 1u << fi->inode.i_blkbits, 1663 }; 1664 } 1665 1666 static void fuse_sb_defaults(struct super_block *sb) 1667 { 1668 sb->s_magic = FUSE_SUPER_MAGIC; 1669 sb->s_op = &fuse_super_operations; 1670 sb->s_xattr = fuse_xattr_handlers; 1671 sb->s_maxbytes = MAX_LFS_FILESIZE; 1672 sb->s_time_gran = 1; 1673 sb->s_export_op = &fuse_export_operations; 1674 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1675 sb->s_iflags |= SB_I_NOIDMAP; 1676 if (sb->s_user_ns != &init_user_ns) 1677 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1678 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1679 } 1680 1681 static int fuse_fill_super_submount(struct super_block *sb, 1682 struct fuse_inode *parent_fi) 1683 { 1684 struct fuse_mount *fm = get_fuse_mount_super(sb); 1685 struct super_block *parent_sb = parent_fi->inode.i_sb; 1686 struct fuse_attr root_attr; 1687 struct inode *root; 1688 struct fuse_submount_lookup *sl; 1689 struct fuse_inode *fi; 1690 1691 fuse_sb_defaults(sb); 1692 fm->sb = sb; 1693 1694 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1695 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1696 1697 sb->s_xattr = parent_sb->s_xattr; 1698 sb->s_export_op = parent_sb->s_export_op; 1699 sb->s_time_gran = parent_sb->s_time_gran; 1700 sb->s_blocksize = parent_sb->s_blocksize; 1701 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1702 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1703 if (parent_sb->s_subtype && !sb->s_subtype) 1704 return -ENOMEM; 1705 1706 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1707 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, 1708 fuse_get_evict_ctr(fm->fc)); 1709 /* 1710 * This inode is just a duplicate, so it is not looked up and 1711 * its nlookup should not be incremented. fuse_iget() does 1712 * that, though, so undo it here. 1713 */ 1714 fi = get_fuse_inode(root); 1715 fi->nlookup--; 1716 1717 set_default_d_op(sb, &fuse_dentry_operations); 1718 sb->s_root = d_make_root(root); 1719 if (!sb->s_root) 1720 return -ENOMEM; 1721 1722 /* 1723 * Grab the parent's submount_lookup pointer and take a 1724 * reference on the shared nlookup from the parent. This is to 1725 * prevent the last forget for this nodeid from getting 1726 * triggered until all users have finished with it. 1727 */ 1728 sl = parent_fi->submount_lookup; 1729 WARN_ON(!sl); 1730 if (sl) { 1731 refcount_inc(&sl->count); 1732 fi->submount_lookup = sl; 1733 } 1734 1735 return 0; 1736 } 1737 1738 /* Filesystem context private data holds the FUSE inode of the mount point */ 1739 static int fuse_get_tree_submount(struct fs_context *fsc) 1740 { 1741 struct fuse_mount *fm; 1742 struct fuse_inode *mp_fi = fsc->fs_private; 1743 struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); 1744 struct super_block *sb; 1745 int err; 1746 1747 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1748 if (!fm) 1749 return -ENOMEM; 1750 1751 fm->fc = fuse_conn_get(fc); 1752 fsc->s_fs_info = fm; 1753 sb = sget_fc(fsc, NULL, set_anon_super_fc); 1754 if (fsc->s_fs_info) 1755 fuse_mount_destroy(fm); 1756 if (IS_ERR(sb)) 1757 return PTR_ERR(sb); 1758 1759 /* Initialize superblock, making @mp_fi its root */ 1760 err = fuse_fill_super_submount(sb, mp_fi); 1761 if (err) { 1762 deactivate_locked_super(sb); 1763 return err; 1764 } 1765 1766 down_write(&fc->killsb); 1767 list_add_tail(&fm->fc_entry, &fc->mounts); 1768 up_write(&fc->killsb); 1769 1770 sb->s_flags |= SB_ACTIVE; 1771 fsc->root = dget(sb->s_root); 1772 1773 return 0; 1774 } 1775 1776 static const struct fs_context_operations fuse_context_submount_ops = { 1777 .get_tree = fuse_get_tree_submount, 1778 }; 1779 1780 int fuse_init_fs_context_submount(struct fs_context *fsc) 1781 { 1782 fsc->ops = &fuse_context_submount_ops; 1783 return 0; 1784 } 1785 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); 1786 1787 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1788 { 1789 struct fuse_dev *fud = NULL; 1790 struct fuse_mount *fm = get_fuse_mount_super(sb); 1791 struct fuse_conn *fc = fm->fc; 1792 struct inode *root; 1793 struct dentry *root_dentry; 1794 int err; 1795 1796 err = -EINVAL; 1797 if (sb->s_flags & SB_MANDLOCK) 1798 goto err; 1799 1800 rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc()); 1801 fuse_sb_defaults(sb); 1802 1803 if (ctx->is_bdev) { 1804 #ifdef CONFIG_BLOCK 1805 err = -EINVAL; 1806 if (!sb_set_blocksize(sb, ctx->blksize)) 1807 goto err; 1808 #endif 1809 } else { 1810 sb->s_blocksize = PAGE_SIZE; 1811 sb->s_blocksize_bits = PAGE_SHIFT; 1812 } 1813 1814 sb->s_subtype = ctx->subtype; 1815 ctx->subtype = NULL; 1816 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1817 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); 1818 if (err) 1819 goto err; 1820 } 1821 1822 if (ctx->fudptr) { 1823 err = -ENOMEM; 1824 fud = fuse_dev_alloc_install(fc); 1825 if (!fud) 1826 goto err_free_dax; 1827 } 1828 1829 fc->dev = sb->s_dev; 1830 fm->sb = sb; 1831 err = fuse_bdi_init(fc, sb); 1832 if (err) 1833 goto err_dev_free; 1834 1835 /* Handle umasking inside the fuse code */ 1836 if (sb->s_flags & SB_POSIXACL) 1837 fc->dont_mask = 1; 1838 sb->s_flags |= SB_POSIXACL; 1839 1840 fc->default_permissions = ctx->default_permissions; 1841 fc->allow_other = ctx->allow_other; 1842 fc->user_id = ctx->user_id; 1843 fc->group_id = ctx->group_id; 1844 fc->legacy_opts_show = ctx->legacy_opts_show; 1845 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1846 fc->destroy = ctx->destroy; 1847 fc->no_control = ctx->no_control; 1848 fc->no_force_umount = ctx->no_force_umount; 1849 1850 err = -ENOMEM; 1851 root = fuse_get_root_inode(sb, ctx->rootmode); 1852 set_default_d_op(sb, &fuse_dentry_operations); 1853 root_dentry = d_make_root(root); 1854 if (!root_dentry) 1855 goto err_dev_free; 1856 1857 mutex_lock(&fuse_mutex); 1858 err = -EINVAL; 1859 if (ctx->fudptr && *ctx->fudptr) 1860 goto err_unlock; 1861 1862 err = fuse_ctl_add_conn(fc); 1863 if (err) 1864 goto err_unlock; 1865 1866 list_add_tail(&fc->entry, &fuse_conn_list); 1867 sb->s_root = root_dentry; 1868 if (ctx->fudptr) 1869 *ctx->fudptr = fud; 1870 mutex_unlock(&fuse_mutex); 1871 return 0; 1872 1873 err_unlock: 1874 mutex_unlock(&fuse_mutex); 1875 dput(root_dentry); 1876 err_dev_free: 1877 if (fud) 1878 fuse_dev_free(fud); 1879 err_free_dax: 1880 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1881 fuse_dax_conn_free(fc); 1882 err: 1883 return err; 1884 } 1885 EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1886 1887 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1888 { 1889 struct fuse_fs_context *ctx = fsc->fs_private; 1890 int err; 1891 1892 if (!ctx->file || !ctx->rootmode_present || 1893 !ctx->user_id_present || !ctx->group_id_present) 1894 return -EINVAL; 1895 1896 /* 1897 * Require mount to happen from the same user namespace which 1898 * opened /dev/fuse to prevent potential attacks. 1899 */ 1900 if ((ctx->file->f_op != &fuse_dev_operations) || 1901 (ctx->file->f_cred->user_ns != sb->s_user_ns)) 1902 return -EINVAL; 1903 ctx->fudptr = &ctx->file->private_data; 1904 1905 err = fuse_fill_super_common(sb, ctx); 1906 if (err) 1907 return err; 1908 /* file->private_data shall be visible on all CPUs after this */ 1909 smp_mb(); 1910 fuse_send_init(get_fuse_mount_super(sb)); 1911 return 0; 1912 } 1913 1914 /* 1915 * This is the path where user supplied an already initialized fuse dev. In 1916 * this case never create a new super if the old one is gone. 1917 */ 1918 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) 1919 { 1920 return -ENOTCONN; 1921 } 1922 1923 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) 1924 { 1925 1926 return fsc->sget_key == get_fuse_conn_super(sb); 1927 } 1928 1929 static int fuse_get_tree(struct fs_context *fsc) 1930 { 1931 struct fuse_fs_context *ctx = fsc->fs_private; 1932 struct fuse_dev *fud; 1933 struct fuse_conn *fc; 1934 struct fuse_mount *fm; 1935 struct super_block *sb; 1936 int err; 1937 1938 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1939 if (!fc) 1940 return -ENOMEM; 1941 1942 fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1943 if (!fm) { 1944 kfree(fc); 1945 return -ENOMEM; 1946 } 1947 1948 fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); 1949 fc->release = fuse_free_conn; 1950 1951 fsc->s_fs_info = fm; 1952 1953 if (ctx->fd_present) 1954 ctx->file = fget(ctx->fd); 1955 1956 if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { 1957 err = get_tree_bdev(fsc, fuse_fill_super); 1958 goto out; 1959 } 1960 /* 1961 * While block dev mount can be initialized with a dummy device fd 1962 * (found by device name), normal fuse mounts can't 1963 */ 1964 err = -EINVAL; 1965 if (!ctx->file) 1966 goto out; 1967 1968 /* 1969 * Allow creating a fuse mount with an already initialized fuse 1970 * connection 1971 */ 1972 fud = READ_ONCE(ctx->file->private_data); 1973 if (ctx->file->f_op == &fuse_dev_operations && fud) { 1974 fsc->sget_key = fud->fc; 1975 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); 1976 err = PTR_ERR_OR_ZERO(sb); 1977 if (!IS_ERR(sb)) 1978 fsc->root = dget(sb->s_root); 1979 } else { 1980 err = get_tree_nodev(fsc, fuse_fill_super); 1981 } 1982 out: 1983 if (fsc->s_fs_info) 1984 fuse_mount_destroy(fm); 1985 if (ctx->file) 1986 fput(ctx->file); 1987 return err; 1988 } 1989 1990 static const struct fs_context_operations fuse_context_ops = { 1991 .free = fuse_free_fsc, 1992 .parse_param = fuse_parse_param, 1993 .reconfigure = fuse_reconfigure, 1994 .get_tree = fuse_get_tree, 1995 }; 1996 1997 /* 1998 * Set up the filesystem mount context. 1999 */ 2000 static int fuse_init_fs_context(struct fs_context *fsc) 2001 { 2002 struct fuse_fs_context *ctx; 2003 2004 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 2005 if (!ctx) 2006 return -ENOMEM; 2007 2008 ctx->max_read = ~0; 2009 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 2010 ctx->legacy_opts_show = true; 2011 2012 #ifdef CONFIG_BLOCK 2013 if (fsc->fs_type == &fuseblk_fs_type) { 2014 ctx->is_bdev = true; 2015 ctx->destroy = true; 2016 } 2017 #endif 2018 2019 fsc->fs_private = ctx; 2020 fsc->ops = &fuse_context_ops; 2021 return 0; 2022 } 2023 2024 bool fuse_mount_remove(struct fuse_mount *fm) 2025 { 2026 struct fuse_conn *fc = fm->fc; 2027 bool last = false; 2028 2029 down_write(&fc->killsb); 2030 list_del_init(&fm->fc_entry); 2031 if (list_empty(&fc->mounts)) 2032 last = true; 2033 up_write(&fc->killsb); 2034 2035 return last; 2036 } 2037 EXPORT_SYMBOL_GPL(fuse_mount_remove); 2038 2039 void fuse_conn_destroy(struct fuse_mount *fm) 2040 { 2041 struct fuse_conn *fc = fm->fc; 2042 2043 if (fc->destroy) 2044 fuse_send_destroy(fm); 2045 2046 fuse_abort_conn(fc); 2047 fuse_wait_aborted(fc); 2048 2049 if (!list_empty(&fc->entry)) { 2050 mutex_lock(&fuse_mutex); 2051 list_del(&fc->entry); 2052 fuse_ctl_remove_conn(fc); 2053 mutex_unlock(&fuse_mutex); 2054 } 2055 } 2056 EXPORT_SYMBOL_GPL(fuse_conn_destroy); 2057 2058 static void fuse_sb_destroy(struct super_block *sb) 2059 { 2060 struct fuse_mount *fm = get_fuse_mount_super(sb); 2061 bool last; 2062 2063 if (sb->s_root) { 2064 last = fuse_mount_remove(fm); 2065 if (last) 2066 fuse_conn_destroy(fm); 2067 } 2068 } 2069 2070 void fuse_mount_destroy(struct fuse_mount *fm) 2071 { 2072 fuse_conn_put(fm->fc); 2073 kfree_rcu(fm, rcu); 2074 } 2075 EXPORT_SYMBOL(fuse_mount_destroy); 2076 2077 static void fuse_kill_sb_anon(struct super_block *sb) 2078 { 2079 fuse_sb_destroy(sb); 2080 kill_anon_super(sb); 2081 fuse_mount_destroy(get_fuse_mount_super(sb)); 2082 } 2083 2084 static struct file_system_type fuse_fs_type = { 2085 .owner = THIS_MODULE, 2086 .name = "fuse", 2087 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 2088 .init_fs_context = fuse_init_fs_context, 2089 .parameters = fuse_fs_parameters, 2090 .kill_sb = fuse_kill_sb_anon, 2091 }; 2092 MODULE_ALIAS_FS("fuse"); 2093 2094 #ifdef CONFIG_BLOCK 2095 static void fuse_kill_sb_blk(struct super_block *sb) 2096 { 2097 fuse_sb_destroy(sb); 2098 kill_block_super(sb); 2099 fuse_mount_destroy(get_fuse_mount_super(sb)); 2100 } 2101 2102 static struct file_system_type fuseblk_fs_type = { 2103 .owner = THIS_MODULE, 2104 .name = "fuseblk", 2105 .init_fs_context = fuse_init_fs_context, 2106 .parameters = fuse_fs_parameters, 2107 .kill_sb = fuse_kill_sb_blk, 2108 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, 2109 }; 2110 MODULE_ALIAS_FS("fuseblk"); 2111 2112 static inline int register_fuseblk(void) 2113 { 2114 return register_filesystem(&fuseblk_fs_type); 2115 } 2116 2117 static inline void unregister_fuseblk(void) 2118 { 2119 unregister_filesystem(&fuseblk_fs_type); 2120 } 2121 #else 2122 static inline int register_fuseblk(void) 2123 { 2124 return 0; 2125 } 2126 2127 static inline void unregister_fuseblk(void) 2128 { 2129 } 2130 #endif 2131 2132 static void fuse_inode_init_once(void *foo) 2133 { 2134 struct inode *inode = foo; 2135 2136 inode_init_once(inode); 2137 } 2138 2139 static int __init fuse_fs_init(void) 2140 { 2141 int err; 2142 2143 fuse_inode_cachep = kmem_cache_create("fuse_inode", 2144 sizeof(struct fuse_inode), 0, 2145 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 2146 fuse_inode_init_once); 2147 err = -ENOMEM; 2148 if (!fuse_inode_cachep) 2149 goto out; 2150 2151 err = register_fuseblk(); 2152 if (err) 2153 goto out2; 2154 2155 err = register_filesystem(&fuse_fs_type); 2156 if (err) 2157 goto out3; 2158 2159 err = fuse_sysctl_register(); 2160 if (err) 2161 goto out4; 2162 2163 return 0; 2164 2165 out4: 2166 unregister_filesystem(&fuse_fs_type); 2167 out3: 2168 unregister_fuseblk(); 2169 out2: 2170 kmem_cache_destroy(fuse_inode_cachep); 2171 out: 2172 return err; 2173 } 2174 2175 static void fuse_fs_cleanup(void) 2176 { 2177 fuse_sysctl_unregister(); 2178 unregister_filesystem(&fuse_fs_type); 2179 unregister_fuseblk(); 2180 2181 /* 2182 * Make sure all delayed rcu free inodes are flushed before we 2183 * destroy cache. 2184 */ 2185 rcu_barrier(); 2186 kmem_cache_destroy(fuse_inode_cachep); 2187 } 2188 2189 static struct kobject *fuse_kobj; 2190 2191 static int fuse_sysfs_init(void) 2192 { 2193 int err; 2194 2195 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 2196 if (!fuse_kobj) { 2197 err = -ENOMEM; 2198 goto out_err; 2199 } 2200 2201 err = sysfs_create_mount_point(fuse_kobj, "connections"); 2202 if (err) 2203 goto out_fuse_unregister; 2204 2205 return 0; 2206 2207 out_fuse_unregister: 2208 kobject_put(fuse_kobj); 2209 out_err: 2210 return err; 2211 } 2212 2213 static void fuse_sysfs_cleanup(void) 2214 { 2215 sysfs_remove_mount_point(fuse_kobj, "connections"); 2216 kobject_put(fuse_kobj); 2217 } 2218 2219 static int __init fuse_init(void) 2220 { 2221 int res; 2222 2223 pr_info("init (API version %i.%i)\n", 2224 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 2225 2226 INIT_LIST_HEAD(&fuse_conn_list); 2227 res = fuse_fs_init(); 2228 if (res) 2229 goto err; 2230 2231 res = fuse_dev_init(); 2232 if (res) 2233 goto err_fs_cleanup; 2234 2235 res = fuse_sysfs_init(); 2236 if (res) 2237 goto err_dev_cleanup; 2238 2239 res = fuse_ctl_init(); 2240 if (res) 2241 goto err_sysfs_cleanup; 2242 2243 sanitize_global_limit(&max_user_bgreq); 2244 sanitize_global_limit(&max_user_congthresh); 2245 2246 return 0; 2247 2248 err_sysfs_cleanup: 2249 fuse_sysfs_cleanup(); 2250 err_dev_cleanup: 2251 fuse_dev_cleanup(); 2252 err_fs_cleanup: 2253 fuse_fs_cleanup(); 2254 err: 2255 return res; 2256 } 2257 2258 static void __exit fuse_exit(void) 2259 { 2260 pr_debug("exit\n"); 2261 2262 fuse_ctl_cleanup(); 2263 fuse_sysfs_cleanup(); 2264 fuse_fs_cleanup(); 2265 fuse_dev_cleanup(); 2266 } 2267 2268 module_init(fuse_init); 2269 module_exit(fuse_exit); 2270