1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/buffer_head.h> 22 #include <linux/fs.h> 23 #include <linux/pagemap.h> 24 #include <linux/highmem.h> 25 #include <linux/time.h> 26 #include <linux/init.h> 27 #include <linux/string.h> 28 #include <linux/smp_lock.h> 29 #include <linux/backing-dev.h> 30 #include <linux/mount.h> 31 #include <linux/mpage.h> 32 #include <linux/swap.h> 33 #include <linux/writeback.h> 34 #include <linux/statfs.h> 35 #include <linux/compat.h> 36 #include <linux/parser.h> 37 #include <linux/ctype.h> 38 #include <linux/namei.h> 39 #include <linux/miscdevice.h> 40 #include <linux/magic.h> 41 #include "compat.h" 42 #include "ctree.h" 43 #include "disk-io.h" 44 #include "transaction.h" 45 #include "btrfs_inode.h" 46 #include "ioctl.h" 47 #include "print-tree.h" 48 #include "xattr.h" 49 #include "volumes.h" 50 #include "version.h" 51 #include "export.h" 52 #include "compression.h" 53 54 55 static struct super_operations btrfs_super_ops; 56 57 static void btrfs_put_super(struct super_block *sb) 58 { 59 struct btrfs_root *root = btrfs_sb(sb); 60 int ret; 61 62 ret = close_ctree(root); 63 sb->s_fs_info = NULL; 64 } 65 66 enum { 67 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 68 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 69 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, 70 }; 71 72 static match_table_t tokens = { 73 {Opt_degraded, "degraded"}, 74 {Opt_subvol, "subvol=%s"}, 75 {Opt_device, "device=%s"}, 76 {Opt_nodatasum, "nodatasum"}, 77 {Opt_nodatacow, "nodatacow"}, 78 {Opt_nobarrier, "nobarrier"}, 79 {Opt_max_extent, "max_extent=%s"}, 80 {Opt_max_inline, "max_inline=%s"}, 81 {Opt_alloc_start, "alloc_start=%s"}, 82 {Opt_thread_pool, "thread_pool=%d"}, 83 {Opt_compress, "compress"}, 84 {Opt_ssd, "ssd"}, 85 {Opt_noacl, "noacl"}, 86 {Opt_err, NULL}, 87 }; 88 89 u64 btrfs_parse_size(char *str) 90 { 91 u64 res; 92 int mult = 1; 93 char *end; 94 char last; 95 96 res = simple_strtoul(str, &end, 10); 97 98 last = end[0]; 99 if (isalpha(last)) { 100 last = tolower(last); 101 switch (last) { 102 case 'g': 103 mult *= 1024; 104 case 'm': 105 mult *= 1024; 106 case 'k': 107 mult *= 1024; 108 } 109 res = res * mult; 110 } 111 return res; 112 } 113 114 /* 115 * Regular mount options parser. Everything that is needed only when 116 * reading in a new superblock is parsed here. 117 */ 118 int btrfs_parse_options(struct btrfs_root *root, char *options) 119 { 120 struct btrfs_fs_info *info = root->fs_info; 121 substring_t args[MAX_OPT_ARGS]; 122 char *p, *num; 123 int intarg; 124 125 if (!options) 126 return 0; 127 128 /* 129 * strsep changes the string, duplicate it because parse_options 130 * gets called twice 131 */ 132 options = kstrdup(options, GFP_NOFS); 133 if (!options) 134 return -ENOMEM; 135 136 137 while ((p = strsep(&options, ",")) != NULL) { 138 int token; 139 if (!*p) 140 continue; 141 142 token = match_token(p, tokens, args); 143 switch (token) { 144 case Opt_degraded: 145 printk(KERN_INFO "btrfs: allowing degraded mounts\n"); 146 btrfs_set_opt(info->mount_opt, DEGRADED); 147 break; 148 case Opt_subvol: 149 case Opt_device: 150 /* 151 * These are parsed by btrfs_parse_early_options 152 * and can be happily ignored here. 153 */ 154 break; 155 case Opt_nodatasum: 156 printk(KERN_INFO "btrfs: setting nodatacsum\n"); 157 btrfs_set_opt(info->mount_opt, NODATASUM); 158 break; 159 case Opt_nodatacow: 160 printk(KERN_INFO "btrfs: setting nodatacow\n"); 161 btrfs_set_opt(info->mount_opt, NODATACOW); 162 btrfs_set_opt(info->mount_opt, NODATASUM); 163 break; 164 case Opt_compress: 165 printk(KERN_INFO "btrfs: use compression\n"); 166 btrfs_set_opt(info->mount_opt, COMPRESS); 167 break; 168 case Opt_ssd: 169 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 170 btrfs_set_opt(info->mount_opt, SSD); 171 break; 172 case Opt_nobarrier: 173 printk(KERN_INFO "btrfs: turning off barriers\n"); 174 btrfs_set_opt(info->mount_opt, NOBARRIER); 175 break; 176 case Opt_thread_pool: 177 intarg = 0; 178 match_int(&args[0], &intarg); 179 if (intarg) { 180 info->thread_pool_size = intarg; 181 printk(KERN_INFO "btrfs: thread pool %d\n", 182 info->thread_pool_size); 183 } 184 break; 185 case Opt_max_extent: 186 num = match_strdup(&args[0]); 187 if (num) { 188 info->max_extent = btrfs_parse_size(num); 189 kfree(num); 190 191 info->max_extent = max_t(u64, 192 info->max_extent, root->sectorsize); 193 printk(KERN_INFO "btrfs: max_extent at %llu\n", 194 info->max_extent); 195 } 196 break; 197 case Opt_max_inline: 198 num = match_strdup(&args[0]); 199 if (num) { 200 info->max_inline = btrfs_parse_size(num); 201 kfree(num); 202 203 if (info->max_inline) { 204 info->max_inline = max_t(u64, 205 info->max_inline, 206 root->sectorsize); 207 } 208 printk(KERN_INFO "btrfs: max_inline at %llu\n", 209 info->max_inline); 210 } 211 break; 212 case Opt_alloc_start: 213 num = match_strdup(&args[0]); 214 if (num) { 215 info->alloc_start = btrfs_parse_size(num); 216 kfree(num); 217 printk(KERN_INFO 218 "btrfs: allocations start at %llu\n", 219 info->alloc_start); 220 } 221 break; 222 case Opt_noacl: 223 root->fs_info->sb->s_flags &= ~MS_POSIXACL; 224 break; 225 default: 226 break; 227 } 228 } 229 kfree(options); 230 return 0; 231 } 232 233 /* 234 * Parse mount options that are required early in the mount process. 235 * 236 * All other options will be parsed on much later in the mount process and 237 * only when we need to allocate a new super block. 238 */ 239 static int btrfs_parse_early_options(const char *options, fmode_t flags, 240 void *holder, char **subvol_name, 241 struct btrfs_fs_devices **fs_devices) 242 { 243 substring_t args[MAX_OPT_ARGS]; 244 char *opts, *p; 245 int error = 0; 246 247 if (!options) 248 goto out; 249 250 /* 251 * strsep changes the string, duplicate it because parse_options 252 * gets called twice 253 */ 254 opts = kstrdup(options, GFP_KERNEL); 255 if (!opts) 256 return -ENOMEM; 257 258 while ((p = strsep(&opts, ",")) != NULL) { 259 int token; 260 if (!*p) 261 continue; 262 263 token = match_token(p, tokens, args); 264 switch (token) { 265 case Opt_subvol: 266 *subvol_name = match_strdup(&args[0]); 267 break; 268 case Opt_device: 269 error = btrfs_scan_one_device(match_strdup(&args[0]), 270 flags, holder, fs_devices); 271 if (error) 272 goto out_free_opts; 273 break; 274 default: 275 break; 276 } 277 } 278 279 out_free_opts: 280 kfree(opts); 281 out: 282 /* 283 * If no subvolume name is specified we use the default one. Allocate 284 * a copy of the string "." here so that code later in the 285 * mount path doesn't care if it's the default volume or another one. 286 */ 287 if (!*subvol_name) { 288 *subvol_name = kstrdup(".", GFP_KERNEL); 289 if (!*subvol_name) 290 return -ENOMEM; 291 } 292 return error; 293 } 294 295 static int btrfs_fill_super(struct super_block *sb, 296 struct btrfs_fs_devices *fs_devices, 297 void *data, int silent) 298 { 299 struct inode *inode; 300 struct dentry *root_dentry; 301 struct btrfs_super_block *disk_super; 302 struct btrfs_root *tree_root; 303 struct btrfs_inode *bi; 304 int err; 305 306 sb->s_maxbytes = MAX_LFS_FILESIZE; 307 sb->s_magic = BTRFS_SUPER_MAGIC; 308 sb->s_op = &btrfs_super_ops; 309 sb->s_export_op = &btrfs_export_ops; 310 sb->s_xattr = btrfs_xattr_handlers; 311 sb->s_time_gran = 1; 312 sb->s_flags |= MS_POSIXACL; 313 314 tree_root = open_ctree(sb, fs_devices, (char *)data); 315 316 if (IS_ERR(tree_root)) { 317 printk("btrfs: open_ctree failed\n"); 318 return PTR_ERR(tree_root); 319 } 320 sb->s_fs_info = tree_root; 321 disk_super = &tree_root->fs_info->super_copy; 322 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID, 323 tree_root->fs_info->fs_root); 324 bi = BTRFS_I(inode); 325 bi->location.objectid = inode->i_ino; 326 bi->location.offset = 0; 327 bi->root = tree_root->fs_info->fs_root; 328 329 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); 330 331 if (!inode) { 332 err = -ENOMEM; 333 goto fail_close; 334 } 335 if (inode->i_state & I_NEW) { 336 btrfs_read_locked_inode(inode); 337 unlock_new_inode(inode); 338 } 339 340 root_dentry = d_alloc_root(inode); 341 if (!root_dentry) { 342 iput(inode); 343 err = -ENOMEM; 344 goto fail_close; 345 } 346 #if 0 347 /* this does the super kobj at the same time */ 348 err = btrfs_sysfs_add_super(tree_root->fs_info); 349 if (err) 350 goto fail_close; 351 #endif 352 353 sb->s_root = root_dentry; 354 355 save_mount_options(sb, data); 356 return 0; 357 358 fail_close: 359 close_ctree(tree_root); 360 return err; 361 } 362 363 int btrfs_sync_fs(struct super_block *sb, int wait) 364 { 365 struct btrfs_trans_handle *trans; 366 struct btrfs_root *root; 367 int ret; 368 root = btrfs_sb(sb); 369 370 if (sb->s_flags & MS_RDONLY) 371 return 0; 372 373 sb->s_dirt = 0; 374 if (!wait) { 375 filemap_flush(root->fs_info->btree_inode->i_mapping); 376 return 0; 377 } 378 379 btrfs_start_delalloc_inodes(root); 380 btrfs_wait_ordered_extents(root, 0); 381 382 btrfs_clean_old_snapshots(root); 383 trans = btrfs_start_transaction(root, 1); 384 ret = btrfs_commit_transaction(trans, root); 385 sb->s_dirt = 0; 386 return ret; 387 } 388 389 static void btrfs_write_super(struct super_block *sb) 390 { 391 sb->s_dirt = 0; 392 } 393 394 static int btrfs_test_super(struct super_block *s, void *data) 395 { 396 struct btrfs_fs_devices *test_fs_devices = data; 397 struct btrfs_root *root = btrfs_sb(s); 398 399 return root->fs_info->fs_devices == test_fs_devices; 400 } 401 402 /* 403 * Find a superblock for the given device / mount point. 404 * 405 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 406 * for multiple device setup. Make sure to keep it in sync. 407 */ 408 static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 409 const char *dev_name, void *data, struct vfsmount *mnt) 410 { 411 char *subvol_name = NULL; 412 struct block_device *bdev = NULL; 413 struct super_block *s; 414 struct dentry *root; 415 struct btrfs_fs_devices *fs_devices = NULL; 416 fmode_t mode = FMODE_READ; 417 int error = 0; 418 419 if (!(flags & MS_RDONLY)) 420 mode |= FMODE_WRITE; 421 422 error = btrfs_parse_early_options(data, mode, fs_type, 423 &subvol_name, &fs_devices); 424 if (error) 425 return error; 426 427 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 428 if (error) 429 goto error_free_subvol_name; 430 431 error = btrfs_open_devices(fs_devices, mode, fs_type); 432 if (error) 433 goto error_free_subvol_name; 434 435 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { 436 error = -EACCES; 437 goto error_close_devices; 438 } 439 440 bdev = fs_devices->latest_bdev; 441 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 442 if (IS_ERR(s)) 443 goto error_s; 444 445 if (s->s_root) { 446 if ((flags ^ s->s_flags) & MS_RDONLY) { 447 up_write(&s->s_umount); 448 deactivate_super(s); 449 error = -EBUSY; 450 goto error_close_devices; 451 } 452 453 btrfs_close_devices(fs_devices); 454 } else { 455 char b[BDEVNAME_SIZE]; 456 457 s->s_flags = flags; 458 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 459 error = btrfs_fill_super(s, fs_devices, data, 460 flags & MS_SILENT ? 1 : 0); 461 if (error) { 462 up_write(&s->s_umount); 463 deactivate_super(s); 464 goto error_free_subvol_name; 465 } 466 467 btrfs_sb(s)->fs_info->bdev_holder = fs_type; 468 s->s_flags |= MS_ACTIVE; 469 } 470 471 if (!strcmp(subvol_name, ".")) 472 root = dget(s->s_root); 473 else { 474 mutex_lock(&s->s_root->d_inode->i_mutex); 475 root = lookup_one_len(subvol_name, s->s_root, 476 strlen(subvol_name)); 477 mutex_unlock(&s->s_root->d_inode->i_mutex); 478 479 if (IS_ERR(root)) { 480 up_write(&s->s_umount); 481 deactivate_super(s); 482 error = PTR_ERR(root); 483 goto error_free_subvol_name; 484 } 485 if (!root->d_inode) { 486 dput(root); 487 up_write(&s->s_umount); 488 deactivate_super(s); 489 error = -ENXIO; 490 goto error_free_subvol_name; 491 } 492 } 493 494 mnt->mnt_sb = s; 495 mnt->mnt_root = root; 496 497 kfree(subvol_name); 498 return 0; 499 500 error_s: 501 error = PTR_ERR(s); 502 error_close_devices: 503 btrfs_close_devices(fs_devices); 504 error_free_subvol_name: 505 kfree(subvol_name); 506 return error; 507 } 508 509 static int btrfs_remount(struct super_block *sb, int *flags, char *data) 510 { 511 struct btrfs_root *root = btrfs_sb(sb); 512 int ret; 513 514 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 515 return 0; 516 517 if (*flags & MS_RDONLY) { 518 sb->s_flags |= MS_RDONLY; 519 520 ret = btrfs_commit_super(root); 521 WARN_ON(ret); 522 } else { 523 if (root->fs_info->fs_devices->rw_devices == 0) 524 return -EACCES; 525 526 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 527 return -EINVAL; 528 529 ret = btrfs_cleanup_reloc_trees(root); 530 WARN_ON(ret); 531 532 ret = btrfs_cleanup_fs_roots(root->fs_info); 533 WARN_ON(ret); 534 535 sb->s_flags &= ~MS_RDONLY; 536 } 537 538 return 0; 539 } 540 541 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 542 { 543 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 544 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 545 int bits = dentry->d_sb->s_blocksize_bits; 546 __be32 *fsid = (__be32 *)root->fs_info->fsid; 547 548 buf->f_namelen = BTRFS_NAME_LEN; 549 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 550 buf->f_bfree = buf->f_blocks - 551 (btrfs_super_bytes_used(disk_super) >> bits); 552 buf->f_bavail = buf->f_bfree; 553 buf->f_bsize = dentry->d_sb->s_blocksize; 554 buf->f_type = BTRFS_SUPER_MAGIC; 555 556 /* We treat it as constant endianness (it doesn't matter _which_) 557 because we want the fsid to come out the same whether mounted 558 on a big-endian or little-endian host */ 559 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); 560 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); 561 /* Mask in the root object ID too, to disambiguate subvols */ 562 buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32; 563 buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid; 564 565 return 0; 566 } 567 568 static struct file_system_type btrfs_fs_type = { 569 .owner = THIS_MODULE, 570 .name = "btrfs", 571 .get_sb = btrfs_get_sb, 572 .kill_sb = kill_anon_super, 573 .fs_flags = FS_REQUIRES_DEV, 574 }; 575 576 /* 577 * used by btrfsctl to scan devices when no FS is mounted 578 */ 579 static long btrfs_control_ioctl(struct file *file, unsigned int cmd, 580 unsigned long arg) 581 { 582 struct btrfs_ioctl_vol_args *vol; 583 struct btrfs_fs_devices *fs_devices; 584 int ret = -ENOTTY; 585 586 if (!capable(CAP_SYS_ADMIN)) 587 return -EPERM; 588 589 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 590 if (!vol) 591 return -ENOMEM; 592 593 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { 594 ret = -EFAULT; 595 goto out; 596 } 597 598 switch (cmd) { 599 case BTRFS_IOC_SCAN_DEV: 600 ret = btrfs_scan_one_device(vol->name, FMODE_READ, 601 &btrfs_fs_type, &fs_devices); 602 break; 603 } 604 out: 605 kfree(vol); 606 return ret; 607 } 608 609 static int btrfs_freeze(struct super_block *sb) 610 { 611 struct btrfs_root *root = btrfs_sb(sb); 612 mutex_lock(&root->fs_info->transaction_kthread_mutex); 613 mutex_lock(&root->fs_info->cleaner_mutex); 614 return 0; 615 } 616 617 static int btrfs_unfreeze(struct super_block *sb) 618 { 619 struct btrfs_root *root = btrfs_sb(sb); 620 mutex_unlock(&root->fs_info->cleaner_mutex); 621 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 622 return 0; 623 } 624 625 static struct super_operations btrfs_super_ops = { 626 .delete_inode = btrfs_delete_inode, 627 .put_super = btrfs_put_super, 628 .write_super = btrfs_write_super, 629 .sync_fs = btrfs_sync_fs, 630 .show_options = generic_show_options, 631 .write_inode = btrfs_write_inode, 632 .dirty_inode = btrfs_dirty_inode, 633 .alloc_inode = btrfs_alloc_inode, 634 .destroy_inode = btrfs_destroy_inode, 635 .statfs = btrfs_statfs, 636 .remount_fs = btrfs_remount, 637 .freeze_fs = btrfs_freeze, 638 .unfreeze_fs = btrfs_unfreeze, 639 }; 640 641 static const struct file_operations btrfs_ctl_fops = { 642 .unlocked_ioctl = btrfs_control_ioctl, 643 .compat_ioctl = btrfs_control_ioctl, 644 .owner = THIS_MODULE, 645 }; 646 647 static struct miscdevice btrfs_misc = { 648 .minor = MISC_DYNAMIC_MINOR, 649 .name = "btrfs-control", 650 .fops = &btrfs_ctl_fops 651 }; 652 653 static int btrfs_interface_init(void) 654 { 655 return misc_register(&btrfs_misc); 656 } 657 658 static void btrfs_interface_exit(void) 659 { 660 if (misc_deregister(&btrfs_misc) < 0) 661 printk(KERN_INFO "misc_deregister failed for control device"); 662 } 663 664 static int __init init_btrfs_fs(void) 665 { 666 int err; 667 668 err = btrfs_init_sysfs(); 669 if (err) 670 return err; 671 672 err = btrfs_init_cachep(); 673 if (err) 674 goto free_sysfs; 675 676 err = extent_io_init(); 677 if (err) 678 goto free_cachep; 679 680 err = extent_map_init(); 681 if (err) 682 goto free_extent_io; 683 684 err = btrfs_interface_init(); 685 if (err) 686 goto free_extent_map; 687 688 err = register_filesystem(&btrfs_fs_type); 689 if (err) 690 goto unregister_ioctl; 691 692 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 693 return 0; 694 695 unregister_ioctl: 696 btrfs_interface_exit(); 697 free_extent_map: 698 extent_map_exit(); 699 free_extent_io: 700 extent_io_exit(); 701 free_cachep: 702 btrfs_destroy_cachep(); 703 free_sysfs: 704 btrfs_exit_sysfs(); 705 return err; 706 } 707 708 static void __exit exit_btrfs_fs(void) 709 { 710 btrfs_destroy_cachep(); 711 extent_map_exit(); 712 extent_io_exit(); 713 btrfs_interface_exit(); 714 unregister_filesystem(&btrfs_fs_type); 715 btrfs_exit_sysfs(); 716 btrfs_cleanup_fs_uuids(); 717 btrfs_zlib_exit(); 718 } 719 720 module_init(init_btrfs_fs) 721 module_exit(exit_btrfs_fs) 722 723 MODULE_LICENSE("GPL"); 724