1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/seq_file.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs_context.h> 19 #include <linux/fs_parser.h> 20 #include <linux/statfs.h> 21 #include <linux/random.h> 22 #include <linux/sched.h> 23 #include <linux/exportfs.h> 24 #include <linux/posix_acl.h> 25 #include <linux/pid_namespace.h> 26 27 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 28 MODULE_DESCRIPTION("Filesystem in Userspace"); 29 MODULE_LICENSE("GPL"); 30 31 static struct kmem_cache *fuse_inode_cachep; 32 struct list_head fuse_conn_list; 33 DEFINE_MUTEX(fuse_mutex); 34 35 static int set_global_limit(const char *val, const struct kernel_param *kp); 36 37 unsigned max_user_bgreq; 38 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 39 &max_user_bgreq, 0644); 40 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 41 MODULE_PARM_DESC(max_user_bgreq, 42 "Global limit for the maximum number of backgrounded requests an " 43 "unprivileged user can set"); 44 45 unsigned max_user_congthresh; 46 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 47 &max_user_congthresh, 0644); 48 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 49 MODULE_PARM_DESC(max_user_congthresh, 50 "Global limit for the maximum congestion threshold an " 51 "unprivileged user can set"); 52 53 #define FUSE_SUPER_MAGIC 0x65735546 54 55 #define FUSE_DEFAULT_BLKSIZE 512 56 57 /** Maximum number of outstanding background requests */ 58 #define FUSE_DEFAULT_MAX_BACKGROUND 12 59 60 /** Congestion starts at 75% of maximum */ 61 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 62 63 #ifdef CONFIG_BLOCK 64 static struct file_system_type fuseblk_fs_type; 65 #endif 66 67 struct fuse_forget_link *fuse_alloc_forget(void) 68 { 69 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); 70 } 71 72 static struct inode *fuse_alloc_inode(struct super_block *sb) 73 { 74 struct fuse_inode *fi; 75 76 fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); 77 if (!fi) 78 return NULL; 79 80 fi->i_time = 0; 81 fi->inval_mask = 0; 82 fi->nodeid = 0; 83 fi->nlookup = 0; 84 fi->attr_version = 0; 85 fi->orig_ino = 0; 86 fi->state = 0; 87 mutex_init(&fi->mutex); 88 spin_lock_init(&fi->lock); 89 fi->forget = fuse_alloc_forget(); 90 if (!fi->forget) 91 goto out_free; 92 93 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 94 goto out_free_forget; 95 96 return &fi->inode; 97 98 out_free_forget: 99 kfree(fi->forget); 100 out_free: 101 kmem_cache_free(fuse_inode_cachep, fi); 102 return NULL; 103 } 104 105 static void fuse_free_inode(struct inode *inode) 106 { 107 struct fuse_inode *fi = get_fuse_inode(inode); 108 109 mutex_destroy(&fi->mutex); 110 kfree(fi->forget); 111 #ifdef CONFIG_FUSE_DAX 112 kfree(fi->dax); 113 #endif 114 kmem_cache_free(fuse_inode_cachep, fi); 115 } 116 117 static void fuse_evict_inode(struct inode *inode) 118 { 119 struct fuse_inode *fi = get_fuse_inode(inode); 120 121 /* Will write inode on close/munmap and in all other dirtiers */ 122 WARN_ON(inode->i_state & I_DIRTY_INODE); 123 124 truncate_inode_pages_final(&inode->i_data); 125 clear_inode(inode); 126 if (inode->i_sb->s_flags & SB_ACTIVE) { 127 struct fuse_conn *fc = get_fuse_conn(inode); 128 129 if (FUSE_IS_DAX(inode)) 130 fuse_dax_inode_cleanup(inode); 131 if (fi->nlookup) { 132 fuse_queue_forget(fc, fi->forget, fi->nodeid, 133 fi->nlookup); 134 fi->forget = NULL; 135 } 136 } 137 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 138 WARN_ON(!list_empty(&fi->write_files)); 139 WARN_ON(!list_empty(&fi->queued_writes)); 140 } 141 } 142 143 static int fuse_reconfigure(struct fs_context *fsc) 144 { 145 struct super_block *sb = fsc->root->d_sb; 146 147 sync_filesystem(sb); 148 if (fsc->sb_flags & SB_MANDLOCK) 149 return -EINVAL; 150 151 return 0; 152 } 153 154 /* 155 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 156 * so that it will fit. 157 */ 158 static ino_t fuse_squash_ino(u64 ino64) 159 { 160 ino_t ino = (ino_t) ino64; 161 if (sizeof(ino_t) < sizeof(u64)) 162 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 163 return ino; 164 } 165 166 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 167 u64 attr_valid, u32 cache_mask) 168 { 169 struct fuse_conn *fc = get_fuse_conn(inode); 170 struct fuse_inode *fi = get_fuse_inode(inode); 171 172 lockdep_assert_held(&fi->lock); 173 174 fi->attr_version = atomic64_inc_return(&fc->attr_version); 175 fi->i_time = attr_valid; 176 WRITE_ONCE(fi->inval_mask, 0); 177 178 inode->i_ino = fuse_squash_ino(attr->ino); 179 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 180 set_nlink(inode, attr->nlink); 181 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 182 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 183 inode->i_blocks = attr->blocks; 184 inode->i_atime.tv_sec = attr->atime; 185 inode->i_atime.tv_nsec = attr->atimensec; 186 /* mtime from server may be stale due to local buffered write */ 187 if (!(cache_mask & STATX_MTIME)) { 188 inode->i_mtime.tv_sec = attr->mtime; 189 inode->i_mtime.tv_nsec = attr->mtimensec; 190 } 191 if (!(cache_mask & STATX_CTIME)) { 192 inode->i_ctime.tv_sec = attr->ctime; 193 inode->i_ctime.tv_nsec = attr->ctimensec; 194 } 195 196 if (attr->blksize != 0) 197 inode->i_blkbits = ilog2(attr->blksize); 198 else 199 inode->i_blkbits = inode->i_sb->s_blocksize_bits; 200 201 /* 202 * Don't set the sticky bit in i_mode, unless we want the VFS 203 * to check permissions. This prevents failures due to the 204 * check in may_delete(). 205 */ 206 fi->orig_i_mode = inode->i_mode; 207 if (!fc->default_permissions) 208 inode->i_mode &= ~S_ISVTX; 209 210 fi->orig_ino = attr->ino; 211 212 /* 213 * We are refreshing inode data and it is possible that another 214 * client set suid/sgid or security.capability xattr. So clear 215 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid 216 * was set or if security.capability xattr was set. But we don't 217 * know if security.capability has been set or not. So clear it 218 * anyway. Its less efficient but should be safe. 219 */ 220 inode->i_flags &= ~S_NOSEC; 221 } 222 223 u32 fuse_get_cache_mask(struct inode *inode) 224 { 225 struct fuse_conn *fc = get_fuse_conn(inode); 226 227 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) 228 return 0; 229 230 return STATX_MTIME | STATX_CTIME | STATX_SIZE; 231 } 232 233 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 234 u64 attr_valid, u64 attr_version) 235 { 236 struct fuse_conn *fc = get_fuse_conn(inode); 237 struct fuse_inode *fi = get_fuse_inode(inode); 238 u32 cache_mask; 239 loff_t oldsize; 240 struct timespec64 old_mtime; 241 242 spin_lock(&fi->lock); 243 /* 244 * In case of writeback_cache enabled, writes update mtime, ctime and 245 * may update i_size. In these cases trust the cached value in the 246 * inode. 247 */ 248 cache_mask = fuse_get_cache_mask(inode); 249 if (cache_mask & STATX_SIZE) 250 attr->size = i_size_read(inode); 251 252 if (cache_mask & STATX_MTIME) { 253 attr->mtime = inode->i_mtime.tv_sec; 254 attr->mtimensec = inode->i_mtime.tv_nsec; 255 } 256 if (cache_mask & STATX_CTIME) { 257 attr->ctime = inode->i_ctime.tv_sec; 258 attr->ctimensec = inode->i_ctime.tv_nsec; 259 } 260 261 if ((attr_version != 0 && fi->attr_version > attr_version) || 262 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 263 spin_unlock(&fi->lock); 264 return; 265 } 266 267 old_mtime = inode->i_mtime; 268 fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); 269 270 oldsize = inode->i_size; 271 /* 272 * In case of writeback_cache enabled, the cached writes beyond EOF 273 * extend local i_size without keeping userspace server in sync. So, 274 * attr->size coming from server can be stale. We cannot trust it. 275 */ 276 if (!(cache_mask & STATX_SIZE)) 277 i_size_write(inode, attr->size); 278 spin_unlock(&fi->lock); 279 280 if (!cache_mask && S_ISREG(inode->i_mode)) { 281 bool inval = false; 282 283 if (oldsize != attr->size) { 284 truncate_pagecache(inode, attr->size); 285 if (!fc->explicit_inval_data) 286 inval = true; 287 } else if (fc->auto_inval_data) { 288 struct timespec64 new_mtime = { 289 .tv_sec = attr->mtime, 290 .tv_nsec = attr->mtimensec, 291 }; 292 293 /* 294 * Auto inval mode also checks and invalidates if mtime 295 * has changed. 296 */ 297 if (!timespec64_equal(&old_mtime, &new_mtime)) 298 inval = true; 299 } 300 301 if (inval) 302 invalidate_inode_pages2(inode->i_mapping); 303 } 304 305 if (IS_ENABLED(CONFIG_FUSE_DAX)) 306 fuse_dax_dontcache(inode, attr->flags); 307 } 308 309 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) 310 { 311 inode->i_mode = attr->mode & S_IFMT; 312 inode->i_size = attr->size; 313 inode->i_mtime.tv_sec = attr->mtime; 314 inode->i_mtime.tv_nsec = attr->mtimensec; 315 inode->i_ctime.tv_sec = attr->ctime; 316 inode->i_ctime.tv_nsec = attr->ctimensec; 317 if (S_ISREG(inode->i_mode)) { 318 fuse_init_common(inode); 319 fuse_init_file_inode(inode, attr->flags); 320 } else if (S_ISDIR(inode->i_mode)) 321 fuse_init_dir(inode); 322 else if (S_ISLNK(inode->i_mode)) 323 fuse_init_symlink(inode); 324 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 325 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 326 fuse_init_common(inode); 327 init_special_inode(inode, inode->i_mode, 328 new_decode_dev(attr->rdev)); 329 } else 330 BUG(); 331 } 332 333 static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 334 { 335 u64 nodeid = *(u64 *) _nodeidp; 336 if (get_node_id(inode) == nodeid) 337 return 1; 338 else 339 return 0; 340 } 341 342 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 343 { 344 u64 nodeid = *(u64 *) _nodeidp; 345 get_fuse_inode(inode)->nodeid = nodeid; 346 return 0; 347 } 348 349 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 350 int generation, struct fuse_attr *attr, 351 u64 attr_valid, u64 attr_version) 352 { 353 struct inode *inode; 354 struct fuse_inode *fi; 355 struct fuse_conn *fc = get_fuse_conn_super(sb); 356 357 /* 358 * Auto mount points get their node id from the submount root, which is 359 * not a unique identifier within this filesystem. 360 * 361 * To avoid conflicts, do not place submount points into the inode hash 362 * table. 363 */ 364 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 365 S_ISDIR(attr->mode)) { 366 inode = new_inode(sb); 367 if (!inode) 368 return NULL; 369 370 fuse_init_inode(inode, attr); 371 get_fuse_inode(inode)->nodeid = nodeid; 372 inode->i_flags |= S_AUTOMOUNT; 373 goto done; 374 } 375 376 retry: 377 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 378 if (!inode) 379 return NULL; 380 381 if ((inode->i_state & I_NEW)) { 382 inode->i_flags |= S_NOATIME; 383 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 384 inode->i_flags |= S_NOCMTIME; 385 inode->i_generation = generation; 386 fuse_init_inode(inode, attr); 387 unlock_new_inode(inode); 388 } else if (fuse_stale_inode(inode, generation, attr)) { 389 /* nodeid was reused, any I/O on the old inode should fail */ 390 fuse_make_bad(inode); 391 iput(inode); 392 goto retry; 393 } 394 done: 395 fi = get_fuse_inode(inode); 396 spin_lock(&fi->lock); 397 fi->nlookup++; 398 spin_unlock(&fi->lock); 399 fuse_change_attributes(inode, attr, attr_valid, attr_version); 400 401 return inode; 402 } 403 404 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 405 struct fuse_mount **fm) 406 { 407 struct fuse_mount *fm_iter; 408 struct inode *inode; 409 410 WARN_ON(!rwsem_is_locked(&fc->killsb)); 411 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 412 if (!fm_iter->sb) 413 continue; 414 415 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 416 if (inode) { 417 if (fm) 418 *fm = fm_iter; 419 return inode; 420 } 421 } 422 423 return NULL; 424 } 425 426 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 427 loff_t offset, loff_t len) 428 { 429 struct fuse_inode *fi; 430 struct inode *inode; 431 pgoff_t pg_start; 432 pgoff_t pg_end; 433 434 inode = fuse_ilookup(fc, nodeid, NULL); 435 if (!inode) 436 return -ENOENT; 437 438 fi = get_fuse_inode(inode); 439 spin_lock(&fi->lock); 440 fi->attr_version = atomic64_inc_return(&fc->attr_version); 441 spin_unlock(&fi->lock); 442 443 fuse_invalidate_attr(inode); 444 forget_all_cached_acls(inode); 445 if (offset >= 0) { 446 pg_start = offset >> PAGE_SHIFT; 447 if (len <= 0) 448 pg_end = -1; 449 else 450 pg_end = (offset + len - 1) >> PAGE_SHIFT; 451 invalidate_inode_pages2_range(inode->i_mapping, 452 pg_start, pg_end); 453 } 454 iput(inode); 455 return 0; 456 } 457 458 bool fuse_lock_inode(struct inode *inode) 459 { 460 bool locked = false; 461 462 if (!get_fuse_conn(inode)->parallel_dirops) { 463 mutex_lock(&get_fuse_inode(inode)->mutex); 464 locked = true; 465 } 466 467 return locked; 468 } 469 470 void fuse_unlock_inode(struct inode *inode, bool locked) 471 { 472 if (locked) 473 mutex_unlock(&get_fuse_inode(inode)->mutex); 474 } 475 476 static void fuse_umount_begin(struct super_block *sb) 477 { 478 struct fuse_conn *fc = get_fuse_conn_super(sb); 479 480 if (!fc->no_force_umount) 481 fuse_abort_conn(fc); 482 } 483 484 static void fuse_send_destroy(struct fuse_mount *fm) 485 { 486 if (fm->fc->conn_init) { 487 FUSE_ARGS(args); 488 489 args.opcode = FUSE_DESTROY; 490 args.force = true; 491 args.nocreds = true; 492 fuse_simple_request(fm, &args); 493 } 494 } 495 496 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 497 { 498 stbuf->f_type = FUSE_SUPER_MAGIC; 499 stbuf->f_bsize = attr->bsize; 500 stbuf->f_frsize = attr->frsize; 501 stbuf->f_blocks = attr->blocks; 502 stbuf->f_bfree = attr->bfree; 503 stbuf->f_bavail = attr->bavail; 504 stbuf->f_files = attr->files; 505 stbuf->f_ffree = attr->ffree; 506 stbuf->f_namelen = attr->namelen; 507 /* fsid is left zero */ 508 } 509 510 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 511 { 512 struct super_block *sb = dentry->d_sb; 513 struct fuse_mount *fm = get_fuse_mount_super(sb); 514 FUSE_ARGS(args); 515 struct fuse_statfs_out outarg; 516 int err; 517 518 if (!fuse_allow_current_process(fm->fc)) { 519 buf->f_type = FUSE_SUPER_MAGIC; 520 return 0; 521 } 522 523 memset(&outarg, 0, sizeof(outarg)); 524 args.in_numargs = 0; 525 args.opcode = FUSE_STATFS; 526 args.nodeid = get_node_id(d_inode(dentry)); 527 args.out_numargs = 1; 528 args.out_args[0].size = sizeof(outarg); 529 args.out_args[0].value = &outarg; 530 err = fuse_simple_request(fm, &args); 531 if (!err) 532 convert_fuse_statfs(buf, &outarg.st); 533 return err; 534 } 535 536 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) 537 { 538 struct fuse_sync_bucket *bucket; 539 540 bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL); 541 if (bucket) { 542 init_waitqueue_head(&bucket->waitq); 543 /* Initial active count */ 544 atomic_set(&bucket->count, 1); 545 } 546 return bucket; 547 } 548 549 static void fuse_sync_fs_writes(struct fuse_conn *fc) 550 { 551 struct fuse_sync_bucket *bucket, *new_bucket; 552 int count; 553 554 new_bucket = fuse_sync_bucket_alloc(); 555 spin_lock(&fc->lock); 556 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 557 count = atomic_read(&bucket->count); 558 WARN_ON(count < 1); 559 /* No outstanding writes? */ 560 if (count == 1) { 561 spin_unlock(&fc->lock); 562 kfree(new_bucket); 563 return; 564 } 565 566 /* 567 * Completion of new bucket depends on completion of this bucket, so add 568 * one more count. 569 */ 570 atomic_inc(&new_bucket->count); 571 rcu_assign_pointer(fc->curr_bucket, new_bucket); 572 spin_unlock(&fc->lock); 573 /* 574 * Drop initial active count. At this point if all writes in this and 575 * ancestor buckets complete, the count will go to zero and this task 576 * will be woken up. 577 */ 578 atomic_dec(&bucket->count); 579 580 wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); 581 582 /* Drop temp count on descendant bucket */ 583 fuse_sync_bucket_dec(new_bucket); 584 kfree_rcu(bucket, rcu); 585 } 586 587 static int fuse_sync_fs(struct super_block *sb, int wait) 588 { 589 struct fuse_mount *fm = get_fuse_mount_super(sb); 590 struct fuse_conn *fc = fm->fc; 591 struct fuse_syncfs_in inarg; 592 FUSE_ARGS(args); 593 int err; 594 595 /* 596 * Userspace cannot handle the wait == 0 case. Avoid a 597 * gratuitous roundtrip. 598 */ 599 if (!wait) 600 return 0; 601 602 /* The filesystem is being unmounted. Nothing to do. */ 603 if (!sb->s_root) 604 return 0; 605 606 if (!fc->sync_fs) 607 return 0; 608 609 fuse_sync_fs_writes(fc); 610 611 memset(&inarg, 0, sizeof(inarg)); 612 args.in_numargs = 1; 613 args.in_args[0].size = sizeof(inarg); 614 args.in_args[0].value = &inarg; 615 args.opcode = FUSE_SYNCFS; 616 args.nodeid = get_node_id(sb->s_root->d_inode); 617 args.out_numargs = 0; 618 619 err = fuse_simple_request(fm, &args); 620 if (err == -ENOSYS) { 621 fc->sync_fs = 0; 622 err = 0; 623 } 624 625 return err; 626 } 627 628 enum { 629 OPT_SOURCE, 630 OPT_SUBTYPE, 631 OPT_FD, 632 OPT_ROOTMODE, 633 OPT_USER_ID, 634 OPT_GROUP_ID, 635 OPT_DEFAULT_PERMISSIONS, 636 OPT_ALLOW_OTHER, 637 OPT_MAX_READ, 638 OPT_BLKSIZE, 639 OPT_ERR 640 }; 641 642 static const struct fs_parameter_spec fuse_fs_parameters[] = { 643 fsparam_string ("source", OPT_SOURCE), 644 fsparam_u32 ("fd", OPT_FD), 645 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 646 fsparam_u32 ("user_id", OPT_USER_ID), 647 fsparam_u32 ("group_id", OPT_GROUP_ID), 648 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 649 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 650 fsparam_u32 ("max_read", OPT_MAX_READ), 651 fsparam_u32 ("blksize", OPT_BLKSIZE), 652 fsparam_string ("subtype", OPT_SUBTYPE), 653 {} 654 }; 655 656 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) 657 { 658 struct fs_parse_result result; 659 struct fuse_fs_context *ctx = fsc->fs_private; 660 int opt; 661 662 if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 663 /* 664 * Ignore options coming from mount(MS_REMOUNT) for backward 665 * compatibility. 666 */ 667 if (fsc->oldapi) 668 return 0; 669 670 return invalfc(fsc, "No changes allowed in reconfigure"); 671 } 672 673 opt = fs_parse(fsc, fuse_fs_parameters, param, &result); 674 if (opt < 0) 675 return opt; 676 677 switch (opt) { 678 case OPT_SOURCE: 679 if (fsc->source) 680 return invalfc(fsc, "Multiple sources specified"); 681 fsc->source = param->string; 682 param->string = NULL; 683 break; 684 685 case OPT_SUBTYPE: 686 if (ctx->subtype) 687 return invalfc(fsc, "Multiple subtypes specified"); 688 ctx->subtype = param->string; 689 param->string = NULL; 690 return 0; 691 692 case OPT_FD: 693 ctx->fd = result.uint_32; 694 ctx->fd_present = true; 695 break; 696 697 case OPT_ROOTMODE: 698 if (!fuse_valid_type(result.uint_32)) 699 return invalfc(fsc, "Invalid rootmode"); 700 ctx->rootmode = result.uint_32; 701 ctx->rootmode_present = true; 702 break; 703 704 case OPT_USER_ID: 705 ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); 706 if (!uid_valid(ctx->user_id)) 707 return invalfc(fsc, "Invalid user_id"); 708 ctx->user_id_present = true; 709 break; 710 711 case OPT_GROUP_ID: 712 ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); 713 if (!gid_valid(ctx->group_id)) 714 return invalfc(fsc, "Invalid group_id"); 715 ctx->group_id_present = true; 716 break; 717 718 case OPT_DEFAULT_PERMISSIONS: 719 ctx->default_permissions = true; 720 break; 721 722 case OPT_ALLOW_OTHER: 723 ctx->allow_other = true; 724 break; 725 726 case OPT_MAX_READ: 727 ctx->max_read = result.uint_32; 728 break; 729 730 case OPT_BLKSIZE: 731 if (!ctx->is_bdev) 732 return invalfc(fsc, "blksize only supported for fuseblk"); 733 ctx->blksize = result.uint_32; 734 break; 735 736 default: 737 return -EINVAL; 738 } 739 740 return 0; 741 } 742 743 static void fuse_free_fsc(struct fs_context *fsc) 744 { 745 struct fuse_fs_context *ctx = fsc->fs_private; 746 747 if (ctx) { 748 kfree(ctx->subtype); 749 kfree(ctx); 750 } 751 } 752 753 static int fuse_show_options(struct seq_file *m, struct dentry *root) 754 { 755 struct super_block *sb = root->d_sb; 756 struct fuse_conn *fc = get_fuse_conn_super(sb); 757 758 if (fc->legacy_opts_show) { 759 seq_printf(m, ",user_id=%u", 760 from_kuid_munged(fc->user_ns, fc->user_id)); 761 seq_printf(m, ",group_id=%u", 762 from_kgid_munged(fc->user_ns, fc->group_id)); 763 if (fc->default_permissions) 764 seq_puts(m, ",default_permissions"); 765 if (fc->allow_other) 766 seq_puts(m, ",allow_other"); 767 if (fc->max_read != ~0) 768 seq_printf(m, ",max_read=%u", fc->max_read); 769 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 770 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 771 } 772 #ifdef CONFIG_FUSE_DAX 773 if (fc->dax_mode == FUSE_DAX_ALWAYS) 774 seq_puts(m, ",dax=always"); 775 else if (fc->dax_mode == FUSE_DAX_NEVER) 776 seq_puts(m, ",dax=never"); 777 else if (fc->dax_mode == FUSE_DAX_INODE_USER) 778 seq_puts(m, ",dax=inode"); 779 #endif 780 781 return 0; 782 } 783 784 static void fuse_iqueue_init(struct fuse_iqueue *fiq, 785 const struct fuse_iqueue_ops *ops, 786 void *priv) 787 { 788 memset(fiq, 0, sizeof(struct fuse_iqueue)); 789 spin_lock_init(&fiq->lock); 790 init_waitqueue_head(&fiq->waitq); 791 INIT_LIST_HEAD(&fiq->pending); 792 INIT_LIST_HEAD(&fiq->interrupts); 793 fiq->forget_list_tail = &fiq->forget_list_head; 794 fiq->connected = 1; 795 fiq->ops = ops; 796 fiq->priv = priv; 797 } 798 799 static void fuse_pqueue_init(struct fuse_pqueue *fpq) 800 { 801 unsigned int i; 802 803 spin_lock_init(&fpq->lock); 804 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) 805 INIT_LIST_HEAD(&fpq->processing[i]); 806 INIT_LIST_HEAD(&fpq->io); 807 fpq->connected = 1; 808 } 809 810 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 811 struct user_namespace *user_ns, 812 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 813 { 814 memset(fc, 0, sizeof(*fc)); 815 spin_lock_init(&fc->lock); 816 spin_lock_init(&fc->bg_lock); 817 init_rwsem(&fc->killsb); 818 refcount_set(&fc->count, 1); 819 atomic_set(&fc->dev_count, 1); 820 init_waitqueue_head(&fc->blocked_waitq); 821 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); 822 INIT_LIST_HEAD(&fc->bg_queue); 823 INIT_LIST_HEAD(&fc->entry); 824 INIT_LIST_HEAD(&fc->devices); 825 atomic_set(&fc->num_waiting, 0); 826 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 827 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 828 atomic64_set(&fc->khctr, 0); 829 fc->polled_files = RB_ROOT; 830 fc->blocked = 0; 831 fc->initialized = 0; 832 fc->connected = 1; 833 atomic64_set(&fc->attr_version, 1); 834 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 835 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 836 fc->user_ns = get_user_ns(user_ns); 837 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 838 fc->max_pages_limit = FUSE_MAX_MAX_PAGES; 839 840 INIT_LIST_HEAD(&fc->mounts); 841 list_add(&fm->fc_entry, &fc->mounts); 842 fm->fc = fc; 843 } 844 EXPORT_SYMBOL_GPL(fuse_conn_init); 845 846 void fuse_conn_put(struct fuse_conn *fc) 847 { 848 if (refcount_dec_and_test(&fc->count)) { 849 struct fuse_iqueue *fiq = &fc->iq; 850 struct fuse_sync_bucket *bucket; 851 852 if (IS_ENABLED(CONFIG_FUSE_DAX)) 853 fuse_dax_conn_free(fc); 854 if (fiq->ops->release) 855 fiq->ops->release(fiq); 856 put_pid_ns(fc->pid_ns); 857 put_user_ns(fc->user_ns); 858 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 859 if (bucket) { 860 WARN_ON(atomic_read(&bucket->count) != 1); 861 kfree(bucket); 862 } 863 fc->release(fc); 864 } 865 } 866 EXPORT_SYMBOL_GPL(fuse_conn_put); 867 868 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 869 { 870 refcount_inc(&fc->count); 871 return fc; 872 } 873 EXPORT_SYMBOL_GPL(fuse_conn_get); 874 875 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 876 { 877 struct fuse_attr attr; 878 memset(&attr, 0, sizeof(attr)); 879 880 attr.mode = mode; 881 attr.ino = FUSE_ROOT_ID; 882 attr.nlink = 1; 883 return fuse_iget(sb, 1, 0, &attr, 0, 0); 884 } 885 886 struct fuse_inode_handle { 887 u64 nodeid; 888 u32 generation; 889 }; 890 891 static struct dentry *fuse_get_dentry(struct super_block *sb, 892 struct fuse_inode_handle *handle) 893 { 894 struct fuse_conn *fc = get_fuse_conn_super(sb); 895 struct inode *inode; 896 struct dentry *entry; 897 int err = -ESTALE; 898 899 if (handle->nodeid == 0) 900 goto out_err; 901 902 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 903 if (!inode) { 904 struct fuse_entry_out outarg; 905 const struct qstr name = QSTR_INIT(".", 1); 906 907 if (!fc->export_support) 908 goto out_err; 909 910 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 911 &inode); 912 if (err && err != -ENOENT) 913 goto out_err; 914 if (err || !inode) { 915 err = -ESTALE; 916 goto out_err; 917 } 918 err = -EIO; 919 if (get_node_id(inode) != handle->nodeid) 920 goto out_iput; 921 } 922 err = -ESTALE; 923 if (inode->i_generation != handle->generation) 924 goto out_iput; 925 926 entry = d_obtain_alias(inode); 927 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 928 fuse_invalidate_entry_cache(entry); 929 930 return entry; 931 932 out_iput: 933 iput(inode); 934 out_err: 935 return ERR_PTR(err); 936 } 937 938 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 939 struct inode *parent) 940 { 941 int len = parent ? 6 : 3; 942 u64 nodeid; 943 u32 generation; 944 945 if (*max_len < len) { 946 *max_len = len; 947 return FILEID_INVALID; 948 } 949 950 nodeid = get_fuse_inode(inode)->nodeid; 951 generation = inode->i_generation; 952 953 fh[0] = (u32)(nodeid >> 32); 954 fh[1] = (u32)(nodeid & 0xffffffff); 955 fh[2] = generation; 956 957 if (parent) { 958 nodeid = get_fuse_inode(parent)->nodeid; 959 generation = parent->i_generation; 960 961 fh[3] = (u32)(nodeid >> 32); 962 fh[4] = (u32)(nodeid & 0xffffffff); 963 fh[5] = generation; 964 } 965 966 *max_len = len; 967 return parent ? 0x82 : 0x81; 968 } 969 970 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 971 struct fid *fid, int fh_len, int fh_type) 972 { 973 struct fuse_inode_handle handle; 974 975 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) 976 return NULL; 977 978 handle.nodeid = (u64) fid->raw[0] << 32; 979 handle.nodeid |= (u64) fid->raw[1]; 980 handle.generation = fid->raw[2]; 981 return fuse_get_dentry(sb, &handle); 982 } 983 984 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 985 struct fid *fid, int fh_len, int fh_type) 986 { 987 struct fuse_inode_handle parent; 988 989 if (fh_type != 0x82 || fh_len < 6) 990 return NULL; 991 992 parent.nodeid = (u64) fid->raw[3] << 32; 993 parent.nodeid |= (u64) fid->raw[4]; 994 parent.generation = fid->raw[5]; 995 return fuse_get_dentry(sb, &parent); 996 } 997 998 static struct dentry *fuse_get_parent(struct dentry *child) 999 { 1000 struct inode *child_inode = d_inode(child); 1001 struct fuse_conn *fc = get_fuse_conn(child_inode); 1002 struct inode *inode; 1003 struct dentry *parent; 1004 struct fuse_entry_out outarg; 1005 int err; 1006 1007 if (!fc->export_support) 1008 return ERR_PTR(-ESTALE); 1009 1010 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 1011 &dotdot_name, &outarg, &inode); 1012 if (err) { 1013 if (err == -ENOENT) 1014 return ERR_PTR(-ESTALE); 1015 return ERR_PTR(err); 1016 } 1017 1018 parent = d_obtain_alias(inode); 1019 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 1020 fuse_invalidate_entry_cache(parent); 1021 1022 return parent; 1023 } 1024 1025 static const struct export_operations fuse_export_operations = { 1026 .fh_to_dentry = fuse_fh_to_dentry, 1027 .fh_to_parent = fuse_fh_to_parent, 1028 .encode_fh = fuse_encode_fh, 1029 .get_parent = fuse_get_parent, 1030 }; 1031 1032 static const struct super_operations fuse_super_operations = { 1033 .alloc_inode = fuse_alloc_inode, 1034 .free_inode = fuse_free_inode, 1035 .evict_inode = fuse_evict_inode, 1036 .write_inode = fuse_write_inode, 1037 .drop_inode = generic_delete_inode, 1038 .umount_begin = fuse_umount_begin, 1039 .statfs = fuse_statfs, 1040 .sync_fs = fuse_sync_fs, 1041 .show_options = fuse_show_options, 1042 }; 1043 1044 static void sanitize_global_limit(unsigned *limit) 1045 { 1046 /* 1047 * The default maximum number of async requests is calculated to consume 1048 * 1/2^13 of the total memory, assuming 392 bytes per request. 1049 */ 1050 if (*limit == 0) 1051 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 1052 1053 if (*limit >= 1 << 16) 1054 *limit = (1 << 16) - 1; 1055 } 1056 1057 static int set_global_limit(const char *val, const struct kernel_param *kp) 1058 { 1059 int rv; 1060 1061 rv = param_set_uint(val, kp); 1062 if (rv) 1063 return rv; 1064 1065 sanitize_global_limit((unsigned *)kp->arg); 1066 1067 return 0; 1068 } 1069 1070 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 1071 { 1072 int cap_sys_admin = capable(CAP_SYS_ADMIN); 1073 1074 if (arg->minor < 13) 1075 return; 1076 1077 sanitize_global_limit(&max_user_bgreq); 1078 sanitize_global_limit(&max_user_congthresh); 1079 1080 spin_lock(&fc->bg_lock); 1081 if (arg->max_background) { 1082 fc->max_background = arg->max_background; 1083 1084 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 1085 fc->max_background = max_user_bgreq; 1086 } 1087 if (arg->congestion_threshold) { 1088 fc->congestion_threshold = arg->congestion_threshold; 1089 1090 if (!cap_sys_admin && 1091 fc->congestion_threshold > max_user_congthresh) 1092 fc->congestion_threshold = max_user_congthresh; 1093 } 1094 spin_unlock(&fc->bg_lock); 1095 } 1096 1097 struct fuse_init_args { 1098 struct fuse_args args; 1099 struct fuse_init_in in; 1100 struct fuse_init_out out; 1101 }; 1102 1103 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 1104 int error) 1105 { 1106 struct fuse_conn *fc = fm->fc; 1107 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 1108 struct fuse_init_out *arg = &ia->out; 1109 bool ok = true; 1110 1111 if (error || arg->major != FUSE_KERNEL_VERSION) 1112 ok = false; 1113 else { 1114 unsigned long ra_pages; 1115 1116 process_init_limits(fc, arg); 1117 1118 if (arg->minor >= 6) { 1119 u64 flags = arg->flags | (u64) arg->flags2 << 32; 1120 1121 ra_pages = arg->max_readahead / PAGE_SIZE; 1122 if (flags & FUSE_ASYNC_READ) 1123 fc->async_read = 1; 1124 if (!(flags & FUSE_POSIX_LOCKS)) 1125 fc->no_lock = 1; 1126 if (arg->minor >= 17) { 1127 if (!(flags & FUSE_FLOCK_LOCKS)) 1128 fc->no_flock = 1; 1129 } else { 1130 if (!(flags & FUSE_POSIX_LOCKS)) 1131 fc->no_flock = 1; 1132 } 1133 if (flags & FUSE_ATOMIC_O_TRUNC) 1134 fc->atomic_o_trunc = 1; 1135 if (arg->minor >= 9) { 1136 /* LOOKUP has dependency on proto version */ 1137 if (flags & FUSE_EXPORT_SUPPORT) 1138 fc->export_support = 1; 1139 } 1140 if (flags & FUSE_BIG_WRITES) 1141 fc->big_writes = 1; 1142 if (flags & FUSE_DONT_MASK) 1143 fc->dont_mask = 1; 1144 if (flags & FUSE_AUTO_INVAL_DATA) 1145 fc->auto_inval_data = 1; 1146 else if (flags & FUSE_EXPLICIT_INVAL_DATA) 1147 fc->explicit_inval_data = 1; 1148 if (flags & FUSE_DO_READDIRPLUS) { 1149 fc->do_readdirplus = 1; 1150 if (flags & FUSE_READDIRPLUS_AUTO) 1151 fc->readdirplus_auto = 1; 1152 } 1153 if (flags & FUSE_ASYNC_DIO) 1154 fc->async_dio = 1; 1155 if (flags & FUSE_WRITEBACK_CACHE) 1156 fc->writeback_cache = 1; 1157 if (flags & FUSE_PARALLEL_DIROPS) 1158 fc->parallel_dirops = 1; 1159 if (flags & FUSE_HANDLE_KILLPRIV) 1160 fc->handle_killpriv = 1; 1161 if (arg->time_gran && arg->time_gran <= 1000000000) 1162 fm->sb->s_time_gran = arg->time_gran; 1163 if ((flags & FUSE_POSIX_ACL)) { 1164 fc->default_permissions = 1; 1165 fc->posix_acl = 1; 1166 fm->sb->s_xattr = fuse_acl_xattr_handlers; 1167 } 1168 if (flags & FUSE_CACHE_SYMLINKS) 1169 fc->cache_symlinks = 1; 1170 if (flags & FUSE_ABORT_ERROR) 1171 fc->abort_err = 1; 1172 if (flags & FUSE_MAX_PAGES) { 1173 fc->max_pages = 1174 min_t(unsigned int, fc->max_pages_limit, 1175 max_t(unsigned int, arg->max_pages, 1)); 1176 } 1177 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1178 if (flags & FUSE_MAP_ALIGNMENT && 1179 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1180 ok = false; 1181 } 1182 if (flags & FUSE_HAS_INODE_DAX) 1183 fc->inode_dax = 1; 1184 } 1185 if (flags & FUSE_HANDLE_KILLPRIV_V2) { 1186 fc->handle_killpriv_v2 = 1; 1187 fm->sb->s_flags |= SB_NOSEC; 1188 } 1189 if (flags & FUSE_SETXATTR_EXT) 1190 fc->setxattr_ext = 1; 1191 if (flags & FUSE_SECURITY_CTX) 1192 fc->init_security = 1; 1193 } else { 1194 ra_pages = fc->max_read / PAGE_SIZE; 1195 fc->no_lock = 1; 1196 fc->no_flock = 1; 1197 } 1198 1199 fm->sb->s_bdi->ra_pages = 1200 min(fm->sb->s_bdi->ra_pages, ra_pages); 1201 fc->minor = arg->minor; 1202 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1203 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1204 fc->conn_init = 1; 1205 } 1206 kfree(ia); 1207 1208 if (!ok) { 1209 fc->conn_init = 0; 1210 fc->conn_error = 1; 1211 } 1212 1213 fuse_set_initialized(fc); 1214 wake_up_all(&fc->blocked_waitq); 1215 } 1216 1217 void fuse_send_init(struct fuse_mount *fm) 1218 { 1219 struct fuse_init_args *ia; 1220 u64 flags; 1221 1222 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); 1223 1224 ia->in.major = FUSE_KERNEL_VERSION; 1225 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1226 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1227 flags = 1228 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1229 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1230 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1231 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1232 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1233 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1234 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1235 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1236 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | 1237 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | 1238 FUSE_SECURITY_CTX; 1239 #ifdef CONFIG_FUSE_DAX 1240 if (fm->fc->dax) 1241 flags |= FUSE_MAP_ALIGNMENT; 1242 if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) 1243 flags |= FUSE_HAS_INODE_DAX; 1244 #endif 1245 if (fm->fc->auto_submounts) 1246 flags |= FUSE_SUBMOUNTS; 1247 1248 ia->in.flags = flags; 1249 ia->in.flags2 = flags >> 32; 1250 1251 ia->args.opcode = FUSE_INIT; 1252 ia->args.in_numargs = 1; 1253 ia->args.in_args[0].size = sizeof(ia->in); 1254 ia->args.in_args[0].value = &ia->in; 1255 ia->args.out_numargs = 1; 1256 /* Variable length argument used for backward compatibility 1257 with interface version < 7.5. Rest of init_out is zeroed 1258 by do_get_request(), so a short reply is not a problem */ 1259 ia->args.out_argvar = true; 1260 ia->args.out_args[0].size = sizeof(ia->out); 1261 ia->args.out_args[0].value = &ia->out; 1262 ia->args.force = true; 1263 ia->args.nocreds = true; 1264 ia->args.end = process_init_reply; 1265 1266 if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1267 process_init_reply(fm, &ia->args, -ENOTCONN); 1268 } 1269 EXPORT_SYMBOL_GPL(fuse_send_init); 1270 1271 void fuse_free_conn(struct fuse_conn *fc) 1272 { 1273 WARN_ON(!list_empty(&fc->devices)); 1274 kfree_rcu(fc, rcu); 1275 } 1276 EXPORT_SYMBOL_GPL(fuse_free_conn); 1277 1278 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1279 { 1280 int err; 1281 char *suffix = ""; 1282 1283 if (sb->s_bdev) { 1284 suffix = "-fuseblk"; 1285 /* 1286 * sb->s_bdi points to blkdev's bdi however we want to redirect 1287 * it to our private bdi... 1288 */ 1289 bdi_put(sb->s_bdi); 1290 sb->s_bdi = &noop_backing_dev_info; 1291 } 1292 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1293 MINOR(fc->dev), suffix); 1294 if (err) 1295 return err; 1296 1297 /* fuse does it's own writeback accounting */ 1298 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; 1299 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1300 1301 /* 1302 * For a single fuse filesystem use max 1% of dirty + 1303 * writeback threshold. 1304 * 1305 * This gives about 1M of write buffer for memory maps on a 1306 * machine with 1G and 10% dirty_ratio, which should be more 1307 * than enough. 1308 * 1309 * Privileged users can raise it by writing to 1310 * 1311 * /sys/class/bdi/<bdi>/max_ratio 1312 */ 1313 bdi_set_max_ratio(sb->s_bdi, 1); 1314 1315 return 0; 1316 } 1317 1318 struct fuse_dev *fuse_dev_alloc(void) 1319 { 1320 struct fuse_dev *fud; 1321 struct list_head *pq; 1322 1323 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1324 if (!fud) 1325 return NULL; 1326 1327 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 1328 if (!pq) { 1329 kfree(fud); 1330 return NULL; 1331 } 1332 1333 fud->pq.processing = pq; 1334 fuse_pqueue_init(&fud->pq); 1335 1336 return fud; 1337 } 1338 EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1339 1340 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) 1341 { 1342 fud->fc = fuse_conn_get(fc); 1343 spin_lock(&fc->lock); 1344 list_add_tail(&fud->entry, &fc->devices); 1345 spin_unlock(&fc->lock); 1346 } 1347 EXPORT_SYMBOL_GPL(fuse_dev_install); 1348 1349 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) 1350 { 1351 struct fuse_dev *fud; 1352 1353 fud = fuse_dev_alloc(); 1354 if (!fud) 1355 return NULL; 1356 1357 fuse_dev_install(fud, fc); 1358 return fud; 1359 } 1360 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); 1361 1362 void fuse_dev_free(struct fuse_dev *fud) 1363 { 1364 struct fuse_conn *fc = fud->fc; 1365 1366 if (fc) { 1367 spin_lock(&fc->lock); 1368 list_del(&fud->entry); 1369 spin_unlock(&fc->lock); 1370 1371 fuse_conn_put(fc); 1372 } 1373 kfree(fud->pq.processing); 1374 kfree(fud); 1375 } 1376 EXPORT_SYMBOL_GPL(fuse_dev_free); 1377 1378 static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1379 const struct fuse_inode *fi) 1380 { 1381 *attr = (struct fuse_attr){ 1382 .ino = fi->inode.i_ino, 1383 .size = fi->inode.i_size, 1384 .blocks = fi->inode.i_blocks, 1385 .atime = fi->inode.i_atime.tv_sec, 1386 .mtime = fi->inode.i_mtime.tv_sec, 1387 .ctime = fi->inode.i_ctime.tv_sec, 1388 .atimensec = fi->inode.i_atime.tv_nsec, 1389 .mtimensec = fi->inode.i_mtime.tv_nsec, 1390 .ctimensec = fi->inode.i_ctime.tv_nsec, 1391 .mode = fi->inode.i_mode, 1392 .nlink = fi->inode.i_nlink, 1393 .uid = fi->inode.i_uid.val, 1394 .gid = fi->inode.i_gid.val, 1395 .rdev = fi->inode.i_rdev, 1396 .blksize = 1u << fi->inode.i_blkbits, 1397 }; 1398 } 1399 1400 static void fuse_sb_defaults(struct super_block *sb) 1401 { 1402 sb->s_magic = FUSE_SUPER_MAGIC; 1403 sb->s_op = &fuse_super_operations; 1404 sb->s_xattr = fuse_xattr_handlers; 1405 sb->s_maxbytes = MAX_LFS_FILESIZE; 1406 sb->s_time_gran = 1; 1407 sb->s_export_op = &fuse_export_operations; 1408 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1409 if (sb->s_user_ns != &init_user_ns) 1410 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1411 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1412 1413 /* 1414 * If we are not in the initial user namespace posix 1415 * acls must be translated. 1416 */ 1417 if (sb->s_user_ns != &init_user_ns) 1418 sb->s_xattr = fuse_no_acl_xattr_handlers; 1419 } 1420 1421 static int fuse_fill_super_submount(struct super_block *sb, 1422 struct fuse_inode *parent_fi) 1423 { 1424 struct fuse_mount *fm = get_fuse_mount_super(sb); 1425 struct super_block *parent_sb = parent_fi->inode.i_sb; 1426 struct fuse_attr root_attr; 1427 struct inode *root; 1428 1429 fuse_sb_defaults(sb); 1430 fm->sb = sb; 1431 1432 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1433 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1434 1435 sb->s_xattr = parent_sb->s_xattr; 1436 sb->s_time_gran = parent_sb->s_time_gran; 1437 sb->s_blocksize = parent_sb->s_blocksize; 1438 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1439 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1440 if (parent_sb->s_subtype && !sb->s_subtype) 1441 return -ENOMEM; 1442 1443 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1444 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1445 /* 1446 * This inode is just a duplicate, so it is not looked up and 1447 * its nlookup should not be incremented. fuse_iget() does 1448 * that, though, so undo it here. 1449 */ 1450 get_fuse_inode(root)->nlookup--; 1451 sb->s_d_op = &fuse_dentry_operations; 1452 sb->s_root = d_make_root(root); 1453 if (!sb->s_root) 1454 return -ENOMEM; 1455 1456 return 0; 1457 } 1458 1459 /* Filesystem context private data holds the FUSE inode of the mount point */ 1460 static int fuse_get_tree_submount(struct fs_context *fsc) 1461 { 1462 struct fuse_mount *fm; 1463 struct fuse_inode *mp_fi = fsc->fs_private; 1464 struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); 1465 struct super_block *sb; 1466 int err; 1467 1468 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1469 if (!fm) 1470 return -ENOMEM; 1471 1472 fm->fc = fuse_conn_get(fc); 1473 fsc->s_fs_info = fm; 1474 sb = sget_fc(fsc, NULL, set_anon_super_fc); 1475 if (fsc->s_fs_info) 1476 fuse_mount_destroy(fm); 1477 if (IS_ERR(sb)) 1478 return PTR_ERR(sb); 1479 1480 /* Initialize superblock, making @mp_fi its root */ 1481 err = fuse_fill_super_submount(sb, mp_fi); 1482 if (err) { 1483 deactivate_locked_super(sb); 1484 return err; 1485 } 1486 1487 down_write(&fc->killsb); 1488 list_add_tail(&fm->fc_entry, &fc->mounts); 1489 up_write(&fc->killsb); 1490 1491 sb->s_flags |= SB_ACTIVE; 1492 fsc->root = dget(sb->s_root); 1493 1494 return 0; 1495 } 1496 1497 static const struct fs_context_operations fuse_context_submount_ops = { 1498 .get_tree = fuse_get_tree_submount, 1499 }; 1500 1501 int fuse_init_fs_context_submount(struct fs_context *fsc) 1502 { 1503 fsc->ops = &fuse_context_submount_ops; 1504 return 0; 1505 } 1506 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); 1507 1508 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1509 { 1510 struct fuse_dev *fud = NULL; 1511 struct fuse_mount *fm = get_fuse_mount_super(sb); 1512 struct fuse_conn *fc = fm->fc; 1513 struct inode *root; 1514 struct dentry *root_dentry; 1515 int err; 1516 1517 err = -EINVAL; 1518 if (sb->s_flags & SB_MANDLOCK) 1519 goto err; 1520 1521 rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc()); 1522 fuse_sb_defaults(sb); 1523 1524 if (ctx->is_bdev) { 1525 #ifdef CONFIG_BLOCK 1526 err = -EINVAL; 1527 if (!sb_set_blocksize(sb, ctx->blksize)) 1528 goto err; 1529 #endif 1530 } else { 1531 sb->s_blocksize = PAGE_SIZE; 1532 sb->s_blocksize_bits = PAGE_SHIFT; 1533 } 1534 1535 sb->s_subtype = ctx->subtype; 1536 ctx->subtype = NULL; 1537 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1538 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); 1539 if (err) 1540 goto err; 1541 } 1542 1543 if (ctx->fudptr) { 1544 err = -ENOMEM; 1545 fud = fuse_dev_alloc_install(fc); 1546 if (!fud) 1547 goto err_free_dax; 1548 } 1549 1550 fc->dev = sb->s_dev; 1551 fm->sb = sb; 1552 err = fuse_bdi_init(fc, sb); 1553 if (err) 1554 goto err_dev_free; 1555 1556 /* Handle umasking inside the fuse code */ 1557 if (sb->s_flags & SB_POSIXACL) 1558 fc->dont_mask = 1; 1559 sb->s_flags |= SB_POSIXACL; 1560 1561 fc->default_permissions = ctx->default_permissions; 1562 fc->allow_other = ctx->allow_other; 1563 fc->user_id = ctx->user_id; 1564 fc->group_id = ctx->group_id; 1565 fc->legacy_opts_show = ctx->legacy_opts_show; 1566 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1567 fc->destroy = ctx->destroy; 1568 fc->no_control = ctx->no_control; 1569 fc->no_force_umount = ctx->no_force_umount; 1570 1571 err = -ENOMEM; 1572 root = fuse_get_root_inode(sb, ctx->rootmode); 1573 sb->s_d_op = &fuse_root_dentry_operations; 1574 root_dentry = d_make_root(root); 1575 if (!root_dentry) 1576 goto err_dev_free; 1577 /* Root dentry doesn't have .d_revalidate */ 1578 sb->s_d_op = &fuse_dentry_operations; 1579 1580 mutex_lock(&fuse_mutex); 1581 err = -EINVAL; 1582 if (ctx->fudptr && *ctx->fudptr) 1583 goto err_unlock; 1584 1585 err = fuse_ctl_add_conn(fc); 1586 if (err) 1587 goto err_unlock; 1588 1589 list_add_tail(&fc->entry, &fuse_conn_list); 1590 sb->s_root = root_dentry; 1591 if (ctx->fudptr) 1592 *ctx->fudptr = fud; 1593 mutex_unlock(&fuse_mutex); 1594 return 0; 1595 1596 err_unlock: 1597 mutex_unlock(&fuse_mutex); 1598 dput(root_dentry); 1599 err_dev_free: 1600 if (fud) 1601 fuse_dev_free(fud); 1602 err_free_dax: 1603 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1604 fuse_dax_conn_free(fc); 1605 err: 1606 return err; 1607 } 1608 EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1609 1610 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1611 { 1612 struct fuse_fs_context *ctx = fsc->fs_private; 1613 int err; 1614 1615 if (!ctx->file || !ctx->rootmode_present || 1616 !ctx->user_id_present || !ctx->group_id_present) 1617 return -EINVAL; 1618 1619 /* 1620 * Require mount to happen from the same user namespace which 1621 * opened /dev/fuse to prevent potential attacks. 1622 */ 1623 if ((ctx->file->f_op != &fuse_dev_operations) || 1624 (ctx->file->f_cred->user_ns != sb->s_user_ns)) 1625 return -EINVAL; 1626 ctx->fudptr = &ctx->file->private_data; 1627 1628 err = fuse_fill_super_common(sb, ctx); 1629 if (err) 1630 return err; 1631 /* file->private_data shall be visible on all CPUs after this */ 1632 smp_mb(); 1633 fuse_send_init(get_fuse_mount_super(sb)); 1634 return 0; 1635 } 1636 1637 /* 1638 * This is the path where user supplied an already initialized fuse dev. In 1639 * this case never create a new super if the old one is gone. 1640 */ 1641 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) 1642 { 1643 return -ENOTCONN; 1644 } 1645 1646 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) 1647 { 1648 1649 return fsc->sget_key == get_fuse_conn_super(sb); 1650 } 1651 1652 static int fuse_get_tree(struct fs_context *fsc) 1653 { 1654 struct fuse_fs_context *ctx = fsc->fs_private; 1655 struct fuse_dev *fud; 1656 struct fuse_conn *fc; 1657 struct fuse_mount *fm; 1658 struct super_block *sb; 1659 int err; 1660 1661 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1662 if (!fc) 1663 return -ENOMEM; 1664 1665 fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1666 if (!fm) { 1667 kfree(fc); 1668 return -ENOMEM; 1669 } 1670 1671 fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); 1672 fc->release = fuse_free_conn; 1673 1674 fsc->s_fs_info = fm; 1675 1676 if (ctx->fd_present) 1677 ctx->file = fget(ctx->fd); 1678 1679 if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { 1680 err = get_tree_bdev(fsc, fuse_fill_super); 1681 goto out; 1682 } 1683 /* 1684 * While block dev mount can be initialized with a dummy device fd 1685 * (found by device name), normal fuse mounts can't 1686 */ 1687 err = -EINVAL; 1688 if (!ctx->file) 1689 goto out; 1690 1691 /* 1692 * Allow creating a fuse mount with an already initialized fuse 1693 * connection 1694 */ 1695 fud = READ_ONCE(ctx->file->private_data); 1696 if (ctx->file->f_op == &fuse_dev_operations && fud) { 1697 fsc->sget_key = fud->fc; 1698 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); 1699 err = PTR_ERR_OR_ZERO(sb); 1700 if (!IS_ERR(sb)) 1701 fsc->root = dget(sb->s_root); 1702 } else { 1703 err = get_tree_nodev(fsc, fuse_fill_super); 1704 } 1705 out: 1706 if (fsc->s_fs_info) 1707 fuse_mount_destroy(fm); 1708 if (ctx->file) 1709 fput(ctx->file); 1710 return err; 1711 } 1712 1713 static const struct fs_context_operations fuse_context_ops = { 1714 .free = fuse_free_fsc, 1715 .parse_param = fuse_parse_param, 1716 .reconfigure = fuse_reconfigure, 1717 .get_tree = fuse_get_tree, 1718 }; 1719 1720 /* 1721 * Set up the filesystem mount context. 1722 */ 1723 static int fuse_init_fs_context(struct fs_context *fsc) 1724 { 1725 struct fuse_fs_context *ctx; 1726 1727 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1728 if (!ctx) 1729 return -ENOMEM; 1730 1731 ctx->max_read = ~0; 1732 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1733 ctx->legacy_opts_show = true; 1734 1735 #ifdef CONFIG_BLOCK 1736 if (fsc->fs_type == &fuseblk_fs_type) { 1737 ctx->is_bdev = true; 1738 ctx->destroy = true; 1739 } 1740 #endif 1741 1742 fsc->fs_private = ctx; 1743 fsc->ops = &fuse_context_ops; 1744 return 0; 1745 } 1746 1747 bool fuse_mount_remove(struct fuse_mount *fm) 1748 { 1749 struct fuse_conn *fc = fm->fc; 1750 bool last = false; 1751 1752 down_write(&fc->killsb); 1753 list_del_init(&fm->fc_entry); 1754 if (list_empty(&fc->mounts)) 1755 last = true; 1756 up_write(&fc->killsb); 1757 1758 return last; 1759 } 1760 EXPORT_SYMBOL_GPL(fuse_mount_remove); 1761 1762 void fuse_conn_destroy(struct fuse_mount *fm) 1763 { 1764 struct fuse_conn *fc = fm->fc; 1765 1766 if (fc->destroy) 1767 fuse_send_destroy(fm); 1768 1769 fuse_abort_conn(fc); 1770 fuse_wait_aborted(fc); 1771 1772 if (!list_empty(&fc->entry)) { 1773 mutex_lock(&fuse_mutex); 1774 list_del(&fc->entry); 1775 fuse_ctl_remove_conn(fc); 1776 mutex_unlock(&fuse_mutex); 1777 } 1778 } 1779 EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1780 1781 static void fuse_sb_destroy(struct super_block *sb) 1782 { 1783 struct fuse_mount *fm = get_fuse_mount_super(sb); 1784 bool last; 1785 1786 if (sb->s_root) { 1787 last = fuse_mount_remove(fm); 1788 if (last) 1789 fuse_conn_destroy(fm); 1790 } 1791 } 1792 1793 void fuse_mount_destroy(struct fuse_mount *fm) 1794 { 1795 fuse_conn_put(fm->fc); 1796 kfree(fm); 1797 } 1798 EXPORT_SYMBOL(fuse_mount_destroy); 1799 1800 static void fuse_kill_sb_anon(struct super_block *sb) 1801 { 1802 fuse_sb_destroy(sb); 1803 kill_anon_super(sb); 1804 fuse_mount_destroy(get_fuse_mount_super(sb)); 1805 } 1806 1807 static struct file_system_type fuse_fs_type = { 1808 .owner = THIS_MODULE, 1809 .name = "fuse", 1810 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, 1811 .init_fs_context = fuse_init_fs_context, 1812 .parameters = fuse_fs_parameters, 1813 .kill_sb = fuse_kill_sb_anon, 1814 }; 1815 MODULE_ALIAS_FS("fuse"); 1816 1817 #ifdef CONFIG_BLOCK 1818 static void fuse_kill_sb_blk(struct super_block *sb) 1819 { 1820 fuse_sb_destroy(sb); 1821 kill_block_super(sb); 1822 fuse_mount_destroy(get_fuse_mount_super(sb)); 1823 } 1824 1825 static struct file_system_type fuseblk_fs_type = { 1826 .owner = THIS_MODULE, 1827 .name = "fuseblk", 1828 .init_fs_context = fuse_init_fs_context, 1829 .parameters = fuse_fs_parameters, 1830 .kill_sb = fuse_kill_sb_blk, 1831 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1832 }; 1833 MODULE_ALIAS_FS("fuseblk"); 1834 1835 static inline int register_fuseblk(void) 1836 { 1837 return register_filesystem(&fuseblk_fs_type); 1838 } 1839 1840 static inline void unregister_fuseblk(void) 1841 { 1842 unregister_filesystem(&fuseblk_fs_type); 1843 } 1844 #else 1845 static inline int register_fuseblk(void) 1846 { 1847 return 0; 1848 } 1849 1850 static inline void unregister_fuseblk(void) 1851 { 1852 } 1853 #endif 1854 1855 static void fuse_inode_init_once(void *foo) 1856 { 1857 struct inode *inode = foo; 1858 1859 inode_init_once(inode); 1860 } 1861 1862 static int __init fuse_fs_init(void) 1863 { 1864 int err; 1865 1866 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1867 sizeof(struct fuse_inode), 0, 1868 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 1869 fuse_inode_init_once); 1870 err = -ENOMEM; 1871 if (!fuse_inode_cachep) 1872 goto out; 1873 1874 err = register_fuseblk(); 1875 if (err) 1876 goto out2; 1877 1878 err = register_filesystem(&fuse_fs_type); 1879 if (err) 1880 goto out3; 1881 1882 return 0; 1883 1884 out3: 1885 unregister_fuseblk(); 1886 out2: 1887 kmem_cache_destroy(fuse_inode_cachep); 1888 out: 1889 return err; 1890 } 1891 1892 static void fuse_fs_cleanup(void) 1893 { 1894 unregister_filesystem(&fuse_fs_type); 1895 unregister_fuseblk(); 1896 1897 /* 1898 * Make sure all delayed rcu free inodes are flushed before we 1899 * destroy cache. 1900 */ 1901 rcu_barrier(); 1902 kmem_cache_destroy(fuse_inode_cachep); 1903 } 1904 1905 static struct kobject *fuse_kobj; 1906 1907 static int fuse_sysfs_init(void) 1908 { 1909 int err; 1910 1911 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 1912 if (!fuse_kobj) { 1913 err = -ENOMEM; 1914 goto out_err; 1915 } 1916 1917 err = sysfs_create_mount_point(fuse_kobj, "connections"); 1918 if (err) 1919 goto out_fuse_unregister; 1920 1921 return 0; 1922 1923 out_fuse_unregister: 1924 kobject_put(fuse_kobj); 1925 out_err: 1926 return err; 1927 } 1928 1929 static void fuse_sysfs_cleanup(void) 1930 { 1931 sysfs_remove_mount_point(fuse_kobj, "connections"); 1932 kobject_put(fuse_kobj); 1933 } 1934 1935 static int __init fuse_init(void) 1936 { 1937 int res; 1938 1939 pr_info("init (API version %i.%i)\n", 1940 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 1941 1942 INIT_LIST_HEAD(&fuse_conn_list); 1943 res = fuse_fs_init(); 1944 if (res) 1945 goto err; 1946 1947 res = fuse_dev_init(); 1948 if (res) 1949 goto err_fs_cleanup; 1950 1951 res = fuse_sysfs_init(); 1952 if (res) 1953 goto err_dev_cleanup; 1954 1955 res = fuse_ctl_init(); 1956 if (res) 1957 goto err_sysfs_cleanup; 1958 1959 sanitize_global_limit(&max_user_bgreq); 1960 sanitize_global_limit(&max_user_congthresh); 1961 1962 return 0; 1963 1964 err_sysfs_cleanup: 1965 fuse_sysfs_cleanup(); 1966 err_dev_cleanup: 1967 fuse_dev_cleanup(); 1968 err_fs_cleanup: 1969 fuse_fs_cleanup(); 1970 err: 1971 return res; 1972 } 1973 1974 static void __exit fuse_exit(void) 1975 { 1976 pr_debug("exit\n"); 1977 1978 fuse_ctl_cleanup(); 1979 fuse_sysfs_cleanup(); 1980 fuse_fs_cleanup(); 1981 fuse_dev_cleanup(); 1982 } 1983 1984 module_init(fuse_init); 1985 module_exit(fuse_exit); 1986