1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/buffer_head.h> 22 #include <linux/fs.h> 23 #include <linux/pagemap.h> 24 #include <linux/highmem.h> 25 #include <linux/time.h> 26 #include <linux/init.h> 27 #include <linux/string.h> 28 #include <linux/smp_lock.h> 29 #include <linux/backing-dev.h> 30 #include <linux/mount.h> 31 #include <linux/mpage.h> 32 #include <linux/swap.h> 33 #include <linux/writeback.h> 34 #include <linux/statfs.h> 35 #include <linux/compat.h> 36 #include <linux/parser.h> 37 #include <linux/ctype.h> 38 #include <linux/namei.h> 39 #include <linux/miscdevice.h> 40 #include <linux/magic.h> 41 #include "compat.h" 42 #include "ctree.h" 43 #include "disk-io.h" 44 #include "transaction.h" 45 #include "btrfs_inode.h" 46 #include "ioctl.h" 47 #include "print-tree.h" 48 #include "xattr.h" 49 #include "volumes.h" 50 #include "version.h" 51 #include "export.h" 52 #include "compression.h" 53 54 55 static struct super_operations btrfs_super_ops; 56 57 static void btrfs_put_super(struct super_block *sb) 58 { 59 struct btrfs_root *root = btrfs_sb(sb); 60 int ret; 61 62 ret = close_ctree(root); 63 sb->s_fs_info = NULL; 64 } 65 66 enum { 67 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 68 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 69 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, 70 }; 71 72 static match_table_t tokens = { 73 {Opt_degraded, "degraded"}, 74 {Opt_subvol, "subvol=%s"}, 75 {Opt_device, "device=%s"}, 76 {Opt_nodatasum, "nodatasum"}, 77 {Opt_nodatacow, "nodatacow"}, 78 {Opt_nobarrier, "nobarrier"}, 79 {Opt_max_extent, "max_extent=%s"}, 80 {Opt_max_inline, "max_inline=%s"}, 81 {Opt_alloc_start, "alloc_start=%s"}, 82 {Opt_thread_pool, "thread_pool=%d"}, 83 {Opt_compress, "compress"}, 84 {Opt_ssd, "ssd"}, 85 {Opt_noacl, "noacl"}, 86 {Opt_err, NULL}, 87 }; 88 89 u64 btrfs_parse_size(char *str) 90 { 91 u64 res; 92 int mult = 1; 93 char *end; 94 char last; 95 96 res = simple_strtoul(str, &end, 10); 97 98 last = end[0]; 99 if (isalpha(last)) { 100 last = tolower(last); 101 switch (last) { 102 case 'g': 103 mult *= 1024; 104 case 'm': 105 mult *= 1024; 106 case 'k': 107 mult *= 1024; 108 } 109 res = res * mult; 110 } 111 return res; 112 } 113 114 /* 115 * Regular mount options parser. Everything that is needed only when 116 * reading in a new superblock is parsed here. 117 */ 118 int btrfs_parse_options(struct btrfs_root *root, char *options) 119 { 120 struct btrfs_fs_info *info = root->fs_info; 121 substring_t args[MAX_OPT_ARGS]; 122 char *p, *num; 123 int intarg; 124 125 if (!options) 126 return 0; 127 128 /* 129 * strsep changes the string, duplicate it because parse_options 130 * gets called twice 131 */ 132 options = kstrdup(options, GFP_NOFS); 133 if (!options) 134 return -ENOMEM; 135 136 137 while ((p = strsep(&options, ",")) != NULL) { 138 int token; 139 if (!*p) 140 continue; 141 142 token = match_token(p, tokens, args); 143 switch (token) { 144 case Opt_degraded: 145 printk(KERN_INFO "btrfs: allowing degraded mounts\n"); 146 btrfs_set_opt(info->mount_opt, DEGRADED); 147 break; 148 case Opt_subvol: 149 case Opt_device: 150 /* 151 * These are parsed by btrfs_parse_early_options 152 * and can be happily ignored here. 153 */ 154 break; 155 case Opt_nodatasum: 156 printk(KERN_INFO "btrfs: setting nodatacsum\n"); 157 btrfs_set_opt(info->mount_opt, NODATASUM); 158 break; 159 case Opt_nodatacow: 160 printk(KERN_INFO "btrfs: setting nodatacow\n"); 161 btrfs_set_opt(info->mount_opt, NODATACOW); 162 btrfs_set_opt(info->mount_opt, NODATASUM); 163 break; 164 case Opt_compress: 165 printk(KERN_INFO "btrfs: use compression\n"); 166 btrfs_set_opt(info->mount_opt, COMPRESS); 167 break; 168 case Opt_ssd: 169 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 170 btrfs_set_opt(info->mount_opt, SSD); 171 break; 172 case Opt_nobarrier: 173 printk(KERN_INFO "btrfs: turning off barriers\n"); 174 btrfs_set_opt(info->mount_opt, NOBARRIER); 175 break; 176 case Opt_thread_pool: 177 intarg = 0; 178 match_int(&args[0], &intarg); 179 if (intarg) { 180 info->thread_pool_size = intarg; 181 printk(KERN_INFO "btrfs: thread pool %d\n", 182 info->thread_pool_size); 183 } 184 break; 185 case Opt_max_extent: 186 num = match_strdup(&args[0]); 187 if (num) { 188 info->max_extent = btrfs_parse_size(num); 189 kfree(num); 190 191 info->max_extent = max_t(u64, 192 info->max_extent, root->sectorsize); 193 printk(KERN_INFO "btrfs: max_extent at %llu\n", 194 info->max_extent); 195 } 196 break; 197 case Opt_max_inline: 198 num = match_strdup(&args[0]); 199 if (num) { 200 info->max_inline = btrfs_parse_size(num); 201 kfree(num); 202 203 if (info->max_inline) { 204 info->max_inline = max_t(u64, 205 info->max_inline, 206 root->sectorsize); 207 } 208 printk(KERN_INFO "btrfs: max_inline at %llu\n", 209 info->max_inline); 210 } 211 break; 212 case Opt_alloc_start: 213 num = match_strdup(&args[0]); 214 if (num) { 215 info->alloc_start = btrfs_parse_size(num); 216 kfree(num); 217 printk(KERN_INFO 218 "btrfs: allocations start at %llu\n", 219 info->alloc_start); 220 } 221 break; 222 case Opt_noacl: 223 root->fs_info->sb->s_flags &= ~MS_POSIXACL; 224 break; 225 default: 226 break; 227 } 228 } 229 kfree(options); 230 return 0; 231 } 232 233 /* 234 * Parse mount options that are required early in the mount process. 235 * 236 * All other options will be parsed on much later in the mount process and 237 * only when we need to allocate a new super block. 238 */ 239 static int btrfs_parse_early_options(const char *options, fmode_t flags, 240 void *holder, char **subvol_name, 241 struct btrfs_fs_devices **fs_devices) 242 { 243 substring_t args[MAX_OPT_ARGS]; 244 char *opts, *p; 245 int error = 0; 246 247 if (!options) 248 goto out; 249 250 /* 251 * strsep changes the string, duplicate it because parse_options 252 * gets called twice 253 */ 254 opts = kstrdup(options, GFP_KERNEL); 255 if (!opts) 256 return -ENOMEM; 257 258 while ((p = strsep(&opts, ",")) != NULL) { 259 int token; 260 if (!*p) 261 continue; 262 263 token = match_token(p, tokens, args); 264 switch (token) { 265 case Opt_subvol: 266 *subvol_name = match_strdup(&args[0]); 267 break; 268 case Opt_device: 269 error = btrfs_scan_one_device(match_strdup(&args[0]), 270 flags, holder, fs_devices); 271 if (error) 272 goto out_free_opts; 273 break; 274 default: 275 break; 276 } 277 } 278 279 out_free_opts: 280 kfree(opts); 281 out: 282 /* 283 * If no subvolume name is specified we use the default one. Allocate 284 * a copy of the string "." here so that code later in the 285 * mount path doesn't care if it's the default volume or another one. 286 */ 287 if (!*subvol_name) { 288 *subvol_name = kstrdup(".", GFP_KERNEL); 289 if (!*subvol_name) 290 return -ENOMEM; 291 } 292 return error; 293 } 294 295 static int btrfs_fill_super(struct super_block *sb, 296 struct btrfs_fs_devices *fs_devices, 297 void *data, int silent) 298 { 299 struct inode *inode; 300 struct dentry *root_dentry; 301 struct btrfs_super_block *disk_super; 302 struct btrfs_root *tree_root; 303 struct btrfs_inode *bi; 304 int err; 305 306 sb->s_maxbytes = MAX_LFS_FILESIZE; 307 sb->s_magic = BTRFS_SUPER_MAGIC; 308 sb->s_op = &btrfs_super_ops; 309 sb->s_export_op = &btrfs_export_ops; 310 sb->s_xattr = btrfs_xattr_handlers; 311 sb->s_time_gran = 1; 312 sb->s_flags |= MS_POSIXACL; 313 314 tree_root = open_ctree(sb, fs_devices, (char *)data); 315 316 if (IS_ERR(tree_root)) { 317 printk("btrfs: open_ctree failed\n"); 318 return PTR_ERR(tree_root); 319 } 320 sb->s_fs_info = tree_root; 321 disk_super = &tree_root->fs_info->super_copy; 322 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID, 323 tree_root->fs_info->fs_root); 324 bi = BTRFS_I(inode); 325 bi->location.objectid = inode->i_ino; 326 bi->location.offset = 0; 327 bi->root = tree_root->fs_info->fs_root; 328 329 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); 330 331 if (!inode) { 332 err = -ENOMEM; 333 goto fail_close; 334 } 335 if (inode->i_state & I_NEW) { 336 btrfs_read_locked_inode(inode); 337 unlock_new_inode(inode); 338 } 339 340 root_dentry = d_alloc_root(inode); 341 if (!root_dentry) { 342 iput(inode); 343 err = -ENOMEM; 344 goto fail_close; 345 } 346 #if 0 347 /* this does the super kobj at the same time */ 348 err = btrfs_sysfs_add_super(tree_root->fs_info); 349 if (err) 350 goto fail_close; 351 #endif 352 353 sb->s_root = root_dentry; 354 355 save_mount_options(sb, data); 356 return 0; 357 358 fail_close: 359 close_ctree(tree_root); 360 return err; 361 } 362 363 int btrfs_sync_fs(struct super_block *sb, int wait) 364 { 365 struct btrfs_trans_handle *trans; 366 struct btrfs_root *root; 367 int ret; 368 root = btrfs_sb(sb); 369 370 if (sb->s_flags & MS_RDONLY) 371 return 0; 372 373 sb->s_dirt = 0; 374 if (!wait) { 375 filemap_flush(root->fs_info->btree_inode->i_mapping); 376 return 0; 377 } 378 379 btrfs_start_delalloc_inodes(root); 380 btrfs_wait_ordered_extents(root, 0); 381 382 trans = btrfs_start_transaction(root, 1); 383 ret = btrfs_commit_transaction(trans, root); 384 sb->s_dirt = 0; 385 return ret; 386 } 387 388 static void btrfs_write_super(struct super_block *sb) 389 { 390 sb->s_dirt = 0; 391 } 392 393 static int btrfs_test_super(struct super_block *s, void *data) 394 { 395 struct btrfs_fs_devices *test_fs_devices = data; 396 struct btrfs_root *root = btrfs_sb(s); 397 398 return root->fs_info->fs_devices == test_fs_devices; 399 } 400 401 /* 402 * Find a superblock for the given device / mount point. 403 * 404 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 405 * for multiple device setup. Make sure to keep it in sync. 406 */ 407 static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 408 const char *dev_name, void *data, struct vfsmount *mnt) 409 { 410 char *subvol_name = NULL; 411 struct block_device *bdev = NULL; 412 struct super_block *s; 413 struct dentry *root; 414 struct btrfs_fs_devices *fs_devices = NULL; 415 fmode_t mode = FMODE_READ; 416 int error = 0; 417 418 if (!(flags & MS_RDONLY)) 419 mode |= FMODE_WRITE; 420 421 error = btrfs_parse_early_options(data, mode, fs_type, 422 &subvol_name, &fs_devices); 423 if (error) 424 return error; 425 426 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 427 if (error) 428 goto error_free_subvol_name; 429 430 error = btrfs_open_devices(fs_devices, mode, fs_type); 431 if (error) 432 goto error_free_subvol_name; 433 434 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { 435 error = -EACCES; 436 goto error_close_devices; 437 } 438 439 bdev = fs_devices->latest_bdev; 440 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 441 if (IS_ERR(s)) 442 goto error_s; 443 444 if (s->s_root) { 445 if ((flags ^ s->s_flags) & MS_RDONLY) { 446 up_write(&s->s_umount); 447 deactivate_super(s); 448 error = -EBUSY; 449 goto error_close_devices; 450 } 451 452 btrfs_close_devices(fs_devices); 453 } else { 454 char b[BDEVNAME_SIZE]; 455 456 s->s_flags = flags; 457 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 458 error = btrfs_fill_super(s, fs_devices, data, 459 flags & MS_SILENT ? 1 : 0); 460 if (error) { 461 up_write(&s->s_umount); 462 deactivate_super(s); 463 goto error_free_subvol_name; 464 } 465 466 btrfs_sb(s)->fs_info->bdev_holder = fs_type; 467 s->s_flags |= MS_ACTIVE; 468 } 469 470 if (!strcmp(subvol_name, ".")) 471 root = dget(s->s_root); 472 else { 473 mutex_lock(&s->s_root->d_inode->i_mutex); 474 root = lookup_one_len(subvol_name, s->s_root, 475 strlen(subvol_name)); 476 mutex_unlock(&s->s_root->d_inode->i_mutex); 477 478 if (IS_ERR(root)) { 479 up_write(&s->s_umount); 480 deactivate_super(s); 481 error = PTR_ERR(root); 482 goto error_free_subvol_name; 483 } 484 if (!root->d_inode) { 485 dput(root); 486 up_write(&s->s_umount); 487 deactivate_super(s); 488 error = -ENXIO; 489 goto error_free_subvol_name; 490 } 491 } 492 493 mnt->mnt_sb = s; 494 mnt->mnt_root = root; 495 496 kfree(subvol_name); 497 return 0; 498 499 error_s: 500 error = PTR_ERR(s); 501 error_close_devices: 502 btrfs_close_devices(fs_devices); 503 error_free_subvol_name: 504 kfree(subvol_name); 505 return error; 506 } 507 508 static int btrfs_remount(struct super_block *sb, int *flags, char *data) 509 { 510 struct btrfs_root *root = btrfs_sb(sb); 511 int ret; 512 513 ret = btrfs_parse_options(root, data); 514 if (ret) 515 return -EINVAL; 516 517 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 518 return 0; 519 520 if (*flags & MS_RDONLY) { 521 sb->s_flags |= MS_RDONLY; 522 523 ret = btrfs_commit_super(root); 524 WARN_ON(ret); 525 } else { 526 if (root->fs_info->fs_devices->rw_devices == 0) 527 return -EACCES; 528 529 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 530 return -EINVAL; 531 532 ret = btrfs_cleanup_reloc_trees(root); 533 WARN_ON(ret); 534 535 ret = btrfs_cleanup_fs_roots(root->fs_info); 536 WARN_ON(ret); 537 538 sb->s_flags &= ~MS_RDONLY; 539 } 540 541 return 0; 542 } 543 544 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 545 { 546 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 547 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 548 int bits = dentry->d_sb->s_blocksize_bits; 549 __be32 *fsid = (__be32 *)root->fs_info->fsid; 550 551 buf->f_namelen = BTRFS_NAME_LEN; 552 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 553 buf->f_bfree = buf->f_blocks - 554 (btrfs_super_bytes_used(disk_super) >> bits); 555 buf->f_bavail = buf->f_bfree; 556 buf->f_bsize = dentry->d_sb->s_blocksize; 557 buf->f_type = BTRFS_SUPER_MAGIC; 558 559 /* We treat it as constant endianness (it doesn't matter _which_) 560 because we want the fsid to come out the same whether mounted 561 on a big-endian or little-endian host */ 562 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); 563 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); 564 /* Mask in the root object ID too, to disambiguate subvols */ 565 buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32; 566 buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid; 567 568 return 0; 569 } 570 571 static struct file_system_type btrfs_fs_type = { 572 .owner = THIS_MODULE, 573 .name = "btrfs", 574 .get_sb = btrfs_get_sb, 575 .kill_sb = kill_anon_super, 576 .fs_flags = FS_REQUIRES_DEV, 577 }; 578 579 /* 580 * used by btrfsctl to scan devices when no FS is mounted 581 */ 582 static long btrfs_control_ioctl(struct file *file, unsigned int cmd, 583 unsigned long arg) 584 { 585 struct btrfs_ioctl_vol_args *vol; 586 struct btrfs_fs_devices *fs_devices; 587 int ret = -ENOTTY; 588 589 if (!capable(CAP_SYS_ADMIN)) 590 return -EPERM; 591 592 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 593 if (!vol) 594 return -ENOMEM; 595 596 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { 597 ret = -EFAULT; 598 goto out; 599 } 600 601 switch (cmd) { 602 case BTRFS_IOC_SCAN_DEV: 603 ret = btrfs_scan_one_device(vol->name, FMODE_READ, 604 &btrfs_fs_type, &fs_devices); 605 break; 606 } 607 out: 608 kfree(vol); 609 return ret; 610 } 611 612 static int btrfs_freeze(struct super_block *sb) 613 { 614 struct btrfs_root *root = btrfs_sb(sb); 615 mutex_lock(&root->fs_info->transaction_kthread_mutex); 616 mutex_lock(&root->fs_info->cleaner_mutex); 617 return 0; 618 } 619 620 static int btrfs_unfreeze(struct super_block *sb) 621 { 622 struct btrfs_root *root = btrfs_sb(sb); 623 mutex_unlock(&root->fs_info->cleaner_mutex); 624 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 625 return 0; 626 } 627 628 static struct super_operations btrfs_super_ops = { 629 .delete_inode = btrfs_delete_inode, 630 .put_super = btrfs_put_super, 631 .write_super = btrfs_write_super, 632 .sync_fs = btrfs_sync_fs, 633 .show_options = generic_show_options, 634 .write_inode = btrfs_write_inode, 635 .dirty_inode = btrfs_dirty_inode, 636 .alloc_inode = btrfs_alloc_inode, 637 .destroy_inode = btrfs_destroy_inode, 638 .statfs = btrfs_statfs, 639 .remount_fs = btrfs_remount, 640 .freeze_fs = btrfs_freeze, 641 .unfreeze_fs = btrfs_unfreeze, 642 }; 643 644 static const struct file_operations btrfs_ctl_fops = { 645 .unlocked_ioctl = btrfs_control_ioctl, 646 .compat_ioctl = btrfs_control_ioctl, 647 .owner = THIS_MODULE, 648 }; 649 650 static struct miscdevice btrfs_misc = { 651 .minor = MISC_DYNAMIC_MINOR, 652 .name = "btrfs-control", 653 .fops = &btrfs_ctl_fops 654 }; 655 656 static int btrfs_interface_init(void) 657 { 658 return misc_register(&btrfs_misc); 659 } 660 661 static void btrfs_interface_exit(void) 662 { 663 if (misc_deregister(&btrfs_misc) < 0) 664 printk(KERN_INFO "misc_deregister failed for control device"); 665 } 666 667 static int __init init_btrfs_fs(void) 668 { 669 int err; 670 671 err = btrfs_init_sysfs(); 672 if (err) 673 return err; 674 675 err = btrfs_init_cachep(); 676 if (err) 677 goto free_sysfs; 678 679 err = extent_io_init(); 680 if (err) 681 goto free_cachep; 682 683 err = extent_map_init(); 684 if (err) 685 goto free_extent_io; 686 687 err = btrfs_interface_init(); 688 if (err) 689 goto free_extent_map; 690 691 err = register_filesystem(&btrfs_fs_type); 692 if (err) 693 goto unregister_ioctl; 694 695 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 696 return 0; 697 698 unregister_ioctl: 699 btrfs_interface_exit(); 700 free_extent_map: 701 extent_map_exit(); 702 free_extent_io: 703 extent_io_exit(); 704 free_cachep: 705 btrfs_destroy_cachep(); 706 free_sysfs: 707 btrfs_exit_sysfs(); 708 return err; 709 } 710 711 static void __exit exit_btrfs_fs(void) 712 { 713 btrfs_destroy_cachep(); 714 extent_map_exit(); 715 extent_io_exit(); 716 btrfs_interface_exit(); 717 unregister_filesystem(&btrfs_fs_type); 718 btrfs_exit_sysfs(); 719 btrfs_cleanup_fs_uuids(); 720 btrfs_zlib_exit(); 721 } 722 723 module_init(init_btrfs_fs) 724 module_exit(exit_btrfs_fs) 725 726 MODULE_LICENSE("GPL"); 727