1 /* 2 * linux/fs/super.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * super.c contains code to handle: - mount structures 7 * - super-block tables 8 * - filesystem drivers list 9 * - mount system call 10 * - umount system call 11 * - ustat system call 12 * 13 * GK 2/5/95 - Changed to support mounting the root fs via NFS 14 * 15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall 16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 17 * Added options to /proc/mounts: 18 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. 19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 21 */ 22 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/acct.h> 26 #include <linux/blkdev.h> 27 #include <linux/mount.h> 28 #include <linux/security.h> 29 #include <linux/writeback.h> /* for the emergency remount stuff */ 30 #include <linux/idr.h> 31 #include <linux/mutex.h> 32 #include <linux/backing-dev.h> 33 #include "internal.h" 34 35 36 LIST_HEAD(super_blocks); 37 DEFINE_SPINLOCK(sb_lock); 38 39 /** 40 * alloc_super - create new superblock 41 * @type: filesystem type superblock should belong to 42 * 43 * Allocates and initializes a new &struct super_block. alloc_super() 44 * returns a pointer new superblock or %NULL if allocation had failed. 45 */ 46 static struct super_block *alloc_super(struct file_system_type *type) 47 { 48 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 49 static const struct super_operations default_op; 50 51 if (s) { 52 if (security_sb_alloc(s)) { 53 kfree(s); 54 s = NULL; 55 goto out; 56 } 57 INIT_LIST_HEAD(&s->s_files); 58 INIT_LIST_HEAD(&s->s_instances); 59 INIT_HLIST_HEAD(&s->s_anon); 60 INIT_LIST_HEAD(&s->s_inodes); 61 INIT_LIST_HEAD(&s->s_dentry_lru); 62 init_rwsem(&s->s_umount); 63 mutex_init(&s->s_lock); 64 lockdep_set_class(&s->s_umount, &type->s_umount_key); 65 /* 66 * The locking rules for s_lock are up to the 67 * filesystem. For example ext3fs has different 68 * lock ordering than usbfs: 69 */ 70 lockdep_set_class(&s->s_lock, &type->s_lock_key); 71 /* 72 * sget() can have s_umount recursion. 73 * 74 * When it cannot find a suitable sb, it allocates a new 75 * one (this one), and tries again to find a suitable old 76 * one. 77 * 78 * In case that succeeds, it will acquire the s_umount 79 * lock of the old one. Since these are clearly distrinct 80 * locks, and this object isn't exposed yet, there's no 81 * risk of deadlocks. 82 * 83 * Annotate this by putting this lock in a different 84 * subclass. 85 */ 86 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); 87 s->s_count = 1; 88 atomic_set(&s->s_active, 1); 89 mutex_init(&s->s_vfs_rename_mutex); 90 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); 91 mutex_init(&s->s_dquot.dqio_mutex); 92 mutex_init(&s->s_dquot.dqonoff_mutex); 93 init_rwsem(&s->s_dquot.dqptr_sem); 94 init_waitqueue_head(&s->s_wait_unfrozen); 95 s->s_maxbytes = MAX_NON_LFS; 96 s->s_op = &default_op; 97 s->s_time_gran = 1000000000; 98 } 99 out: 100 return s; 101 } 102 103 /** 104 * destroy_super - frees a superblock 105 * @s: superblock to free 106 * 107 * Frees a superblock. 108 */ 109 static inline void destroy_super(struct super_block *s) 110 { 111 security_sb_free(s); 112 kfree(s->s_subtype); 113 kfree(s->s_options); 114 kfree(s); 115 } 116 117 /* Superblock refcounting */ 118 119 /* 120 * Drop a superblock's refcount. The caller must hold sb_lock. 121 */ 122 void __put_super(struct super_block *sb) 123 { 124 if (!--sb->s_count) { 125 list_del_init(&sb->s_list); 126 destroy_super(sb); 127 } 128 } 129 130 /** 131 * put_super - drop a temporary reference to superblock 132 * @sb: superblock in question 133 * 134 * Drops a temporary reference, frees superblock if there's no 135 * references left. 136 */ 137 void put_super(struct super_block *sb) 138 { 139 spin_lock(&sb_lock); 140 __put_super(sb); 141 spin_unlock(&sb_lock); 142 } 143 144 145 /** 146 * deactivate_locked_super - drop an active reference to superblock 147 * @s: superblock to deactivate 148 * 149 * Drops an active reference to superblock, converting it into a temprory 150 * one if there is no other active references left. In that case we 151 * tell fs driver to shut it down and drop the temporary reference we 152 * had just acquired. 153 * 154 * Caller holds exclusive lock on superblock; that lock is released. 155 */ 156 void deactivate_locked_super(struct super_block *s) 157 { 158 struct file_system_type *fs = s->s_type; 159 if (atomic_dec_and_test(&s->s_active)) { 160 fs->kill_sb(s); 161 put_filesystem(fs); 162 put_super(s); 163 } else { 164 up_write(&s->s_umount); 165 } 166 } 167 168 EXPORT_SYMBOL(deactivate_locked_super); 169 170 /** 171 * deactivate_super - drop an active reference to superblock 172 * @s: superblock to deactivate 173 * 174 * Variant of deactivate_locked_super(), except that superblock is *not* 175 * locked by caller. If we are going to drop the final active reference, 176 * lock will be acquired prior to that. 177 */ 178 void deactivate_super(struct super_block *s) 179 { 180 if (!atomic_add_unless(&s->s_active, -1, 1)) { 181 down_write(&s->s_umount); 182 deactivate_locked_super(s); 183 } 184 } 185 186 EXPORT_SYMBOL(deactivate_super); 187 188 /** 189 * grab_super - acquire an active reference 190 * @s: reference we are trying to make active 191 * 192 * Tries to acquire an active reference. grab_super() is used when we 193 * had just found a superblock in super_blocks or fs_type->fs_supers 194 * and want to turn it into a full-blown active reference. grab_super() 195 * is called with sb_lock held and drops it. Returns 1 in case of 196 * success, 0 if we had failed (superblock contents was already dead or 197 * dying when grab_super() had been called). 198 */ 199 static int grab_super(struct super_block *s) __releases(sb_lock) 200 { 201 if (atomic_inc_not_zero(&s->s_active)) { 202 spin_unlock(&sb_lock); 203 return 1; 204 } 205 /* it's going away */ 206 s->s_count++; 207 spin_unlock(&sb_lock); 208 /* wait for it to die */ 209 down_write(&s->s_umount); 210 up_write(&s->s_umount); 211 put_super(s); 212 return 0; 213 } 214 215 /* 216 * Superblock locking. We really ought to get rid of these two. 217 */ 218 void lock_super(struct super_block * sb) 219 { 220 get_fs_excl(); 221 mutex_lock(&sb->s_lock); 222 } 223 224 void unlock_super(struct super_block * sb) 225 { 226 put_fs_excl(); 227 mutex_unlock(&sb->s_lock); 228 } 229 230 EXPORT_SYMBOL(lock_super); 231 EXPORT_SYMBOL(unlock_super); 232 233 /** 234 * generic_shutdown_super - common helper for ->kill_sb() 235 * @sb: superblock to kill 236 * 237 * generic_shutdown_super() does all fs-independent work on superblock 238 * shutdown. Typical ->kill_sb() should pick all fs-specific objects 239 * that need destruction out of superblock, call generic_shutdown_super() 240 * and release aforementioned objects. Note: dentries and inodes _are_ 241 * taken care of and do not need specific handling. 242 * 243 * Upon calling this function, the filesystem may no longer alter or 244 * rearrange the set of dentries belonging to this super_block, nor may it 245 * change the attachments of dentries to inodes. 246 */ 247 void generic_shutdown_super(struct super_block *sb) 248 { 249 const struct super_operations *sop = sb->s_op; 250 251 252 if (sb->s_root) { 253 shrink_dcache_for_umount(sb); 254 sync_filesystem(sb); 255 get_fs_excl(); 256 sb->s_flags &= ~MS_ACTIVE; 257 258 /* bad name - it should be evict_inodes() */ 259 invalidate_inodes(sb); 260 261 if (sop->put_super) 262 sop->put_super(sb); 263 264 /* Forget any remaining inodes */ 265 if (invalidate_inodes(sb)) { 266 printk("VFS: Busy inodes after unmount of %s. " 267 "Self-destruct in 5 seconds. Have a nice day...\n", 268 sb->s_id); 269 } 270 put_fs_excl(); 271 } 272 spin_lock(&sb_lock); 273 /* should be initialized for __put_super_and_need_restart() */ 274 list_del_init(&sb->s_instances); 275 spin_unlock(&sb_lock); 276 up_write(&sb->s_umount); 277 } 278 279 EXPORT_SYMBOL(generic_shutdown_super); 280 281 /** 282 * sget - find or create a superblock 283 * @type: filesystem type superblock should belong to 284 * @test: comparison callback 285 * @set: setup callback 286 * @data: argument to each of them 287 */ 288 struct super_block *sget(struct file_system_type *type, 289 int (*test)(struct super_block *,void *), 290 int (*set)(struct super_block *,void *), 291 void *data) 292 { 293 struct super_block *s = NULL; 294 struct super_block *old; 295 int err; 296 297 retry: 298 spin_lock(&sb_lock); 299 if (test) { 300 list_for_each_entry(old, &type->fs_supers, s_instances) { 301 if (!test(old, data)) 302 continue; 303 if (!grab_super(old)) 304 goto retry; 305 if (s) { 306 up_write(&s->s_umount); 307 destroy_super(s); 308 s = NULL; 309 } 310 down_write(&old->s_umount); 311 if (unlikely(!(old->s_flags & MS_BORN))) { 312 deactivate_locked_super(old); 313 goto retry; 314 } 315 return old; 316 } 317 } 318 if (!s) { 319 spin_unlock(&sb_lock); 320 s = alloc_super(type); 321 if (!s) 322 return ERR_PTR(-ENOMEM); 323 goto retry; 324 } 325 326 err = set(s, data); 327 if (err) { 328 spin_unlock(&sb_lock); 329 up_write(&s->s_umount); 330 destroy_super(s); 331 return ERR_PTR(err); 332 } 333 s->s_type = type; 334 strlcpy(s->s_id, type->name, sizeof(s->s_id)); 335 list_add_tail(&s->s_list, &super_blocks); 336 list_add(&s->s_instances, &type->fs_supers); 337 spin_unlock(&sb_lock); 338 get_filesystem(type); 339 return s; 340 } 341 342 EXPORT_SYMBOL(sget); 343 344 void drop_super(struct super_block *sb) 345 { 346 up_read(&sb->s_umount); 347 put_super(sb); 348 } 349 350 EXPORT_SYMBOL(drop_super); 351 352 /** 353 * sync_supers - helper for periodic superblock writeback 354 * 355 * Call the write_super method if present on all dirty superblocks in 356 * the system. This is for the periodic writeback used by most older 357 * filesystems. For data integrity superblock writeback use 358 * sync_filesystems() instead. 359 * 360 * Note: check the dirty flag before waiting, so we don't 361 * hold up the sync while mounting a device. (The newly 362 * mounted device won't need syncing.) 363 */ 364 void sync_supers(void) 365 { 366 struct super_block *sb, *p = NULL; 367 368 spin_lock(&sb_lock); 369 list_for_each_entry(sb, &super_blocks, s_list) { 370 if (list_empty(&sb->s_instances)) 371 continue; 372 if (sb->s_op->write_super && sb->s_dirt) { 373 sb->s_count++; 374 spin_unlock(&sb_lock); 375 376 down_read(&sb->s_umount); 377 if (sb->s_root && sb->s_dirt) 378 sb->s_op->write_super(sb); 379 up_read(&sb->s_umount); 380 381 spin_lock(&sb_lock); 382 if (p) 383 __put_super(p); 384 p = sb; 385 } 386 } 387 if (p) 388 __put_super(p); 389 spin_unlock(&sb_lock); 390 } 391 392 /** 393 * iterate_supers - call function for all active superblocks 394 * @f: function to call 395 * @arg: argument to pass to it 396 * 397 * Scans the superblock list and calls given function, passing it 398 * locked superblock and given argument. 399 */ 400 void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 401 { 402 struct super_block *sb, *p = NULL; 403 404 spin_lock(&sb_lock); 405 list_for_each_entry(sb, &super_blocks, s_list) { 406 if (list_empty(&sb->s_instances)) 407 continue; 408 sb->s_count++; 409 spin_unlock(&sb_lock); 410 411 down_read(&sb->s_umount); 412 if (sb->s_root) 413 f(sb, arg); 414 up_read(&sb->s_umount); 415 416 spin_lock(&sb_lock); 417 if (p) 418 __put_super(p); 419 p = sb; 420 } 421 if (p) 422 __put_super(p); 423 spin_unlock(&sb_lock); 424 } 425 426 /** 427 * get_super - get the superblock of a device 428 * @bdev: device to get the superblock for 429 * 430 * Scans the superblock list and finds the superblock of the file system 431 * mounted on the device given. %NULL is returned if no match is found. 432 */ 433 434 struct super_block *get_super(struct block_device *bdev) 435 { 436 struct super_block *sb; 437 438 if (!bdev) 439 return NULL; 440 441 spin_lock(&sb_lock); 442 rescan: 443 list_for_each_entry(sb, &super_blocks, s_list) { 444 if (list_empty(&sb->s_instances)) 445 continue; 446 if (sb->s_bdev == bdev) { 447 sb->s_count++; 448 spin_unlock(&sb_lock); 449 down_read(&sb->s_umount); 450 /* still alive? */ 451 if (sb->s_root) 452 return sb; 453 up_read(&sb->s_umount); 454 /* nope, got unmounted */ 455 spin_lock(&sb_lock); 456 __put_super(sb); 457 goto rescan; 458 } 459 } 460 spin_unlock(&sb_lock); 461 return NULL; 462 } 463 464 EXPORT_SYMBOL(get_super); 465 466 /** 467 * get_active_super - get an active reference to the superblock of a device 468 * @bdev: device to get the superblock for 469 * 470 * Scans the superblock list and finds the superblock of the file system 471 * mounted on the device given. Returns the superblock with an active 472 * reference or %NULL if none was found. 473 */ 474 struct super_block *get_active_super(struct block_device *bdev) 475 { 476 struct super_block *sb; 477 478 if (!bdev) 479 return NULL; 480 481 restart: 482 spin_lock(&sb_lock); 483 list_for_each_entry(sb, &super_blocks, s_list) { 484 if (list_empty(&sb->s_instances)) 485 continue; 486 if (sb->s_bdev == bdev) { 487 if (grab_super(sb)) /* drops sb_lock */ 488 return sb; 489 else 490 goto restart; 491 } 492 } 493 spin_unlock(&sb_lock); 494 return NULL; 495 } 496 497 struct super_block *user_get_super(dev_t dev) 498 { 499 struct super_block *sb; 500 501 spin_lock(&sb_lock); 502 rescan: 503 list_for_each_entry(sb, &super_blocks, s_list) { 504 if (list_empty(&sb->s_instances)) 505 continue; 506 if (sb->s_dev == dev) { 507 sb->s_count++; 508 spin_unlock(&sb_lock); 509 down_read(&sb->s_umount); 510 /* still alive? */ 511 if (sb->s_root) 512 return sb; 513 up_read(&sb->s_umount); 514 /* nope, got unmounted */ 515 spin_lock(&sb_lock); 516 __put_super(sb); 517 goto rescan; 518 } 519 } 520 spin_unlock(&sb_lock); 521 return NULL; 522 } 523 524 /** 525 * do_remount_sb - asks filesystem to change mount options. 526 * @sb: superblock in question 527 * @flags: numeric part of options 528 * @data: the rest of options 529 * @force: whether or not to force the change 530 * 531 * Alters the mount options of a mounted file system. 532 */ 533 int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 534 { 535 int retval; 536 int remount_ro; 537 538 if (sb->s_frozen != SB_UNFROZEN) 539 return -EBUSY; 540 541 #ifdef CONFIG_BLOCK 542 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 543 return -EACCES; 544 #endif 545 546 if (flags & MS_RDONLY) 547 acct_auto_close(sb); 548 shrink_dcache_sb(sb); 549 sync_filesystem(sb); 550 551 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); 552 553 /* If we are remounting RDONLY and current sb is read/write, 554 make sure there are no rw files opened */ 555 if (remount_ro) { 556 if (force) 557 mark_files_ro(sb); 558 else if (!fs_may_remount_ro(sb)) 559 return -EBUSY; 560 } 561 562 if (sb->s_op->remount_fs) { 563 retval = sb->s_op->remount_fs(sb, &flags, data); 564 if (retval) 565 return retval; 566 } 567 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 568 569 /* 570 * Some filesystems modify their metadata via some other path than the 571 * bdev buffer cache (eg. use a private mapping, or directories in 572 * pagecache, etc). Also file data modifications go via their own 573 * mappings. So If we try to mount readonly then copy the filesystem 574 * from bdev, we could get stale data, so invalidate it to give a best 575 * effort at coherency. 576 */ 577 if (remount_ro && sb->s_bdev) 578 invalidate_bdev(sb->s_bdev); 579 return 0; 580 } 581 582 static void do_emergency_remount(struct work_struct *work) 583 { 584 struct super_block *sb, *p = NULL; 585 586 spin_lock(&sb_lock); 587 list_for_each_entry(sb, &super_blocks, s_list) { 588 if (list_empty(&sb->s_instances)) 589 continue; 590 sb->s_count++; 591 spin_unlock(&sb_lock); 592 down_write(&sb->s_umount); 593 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { 594 /* 595 * What lock protects sb->s_flags?? 596 */ 597 do_remount_sb(sb, MS_RDONLY, NULL, 1); 598 } 599 up_write(&sb->s_umount); 600 spin_lock(&sb_lock); 601 if (p) 602 __put_super(p); 603 p = sb; 604 } 605 if (p) 606 __put_super(p); 607 spin_unlock(&sb_lock); 608 kfree(work); 609 printk("Emergency Remount complete\n"); 610 } 611 612 void emergency_remount(void) 613 { 614 struct work_struct *work; 615 616 work = kmalloc(sizeof(*work), GFP_ATOMIC); 617 if (work) { 618 INIT_WORK(work, do_emergency_remount); 619 schedule_work(work); 620 } 621 } 622 623 /* 624 * Unnamed block devices are dummy devices used by virtual 625 * filesystems which don't use real block-devices. -- jrs 626 */ 627 628 static DEFINE_IDA(unnamed_dev_ida); 629 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 630 static int unnamed_dev_start = 0; /* don't bother trying below it */ 631 632 int set_anon_super(struct super_block *s, void *data) 633 { 634 int dev; 635 int error; 636 637 retry: 638 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 639 return -ENOMEM; 640 spin_lock(&unnamed_dev_lock); 641 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev); 642 if (!error) 643 unnamed_dev_start = dev + 1; 644 spin_unlock(&unnamed_dev_lock); 645 if (error == -EAGAIN) 646 /* We raced and lost with another CPU. */ 647 goto retry; 648 else if (error) 649 return -EAGAIN; 650 651 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 652 spin_lock(&unnamed_dev_lock); 653 ida_remove(&unnamed_dev_ida, dev); 654 if (unnamed_dev_start > dev) 655 unnamed_dev_start = dev; 656 spin_unlock(&unnamed_dev_lock); 657 return -EMFILE; 658 } 659 s->s_dev = MKDEV(0, dev & MINORMASK); 660 s->s_bdi = &noop_backing_dev_info; 661 return 0; 662 } 663 664 EXPORT_SYMBOL(set_anon_super); 665 666 void kill_anon_super(struct super_block *sb) 667 { 668 int slot = MINOR(sb->s_dev); 669 670 generic_shutdown_super(sb); 671 spin_lock(&unnamed_dev_lock); 672 ida_remove(&unnamed_dev_ida, slot); 673 if (slot < unnamed_dev_start) 674 unnamed_dev_start = slot; 675 spin_unlock(&unnamed_dev_lock); 676 } 677 678 EXPORT_SYMBOL(kill_anon_super); 679 680 void kill_litter_super(struct super_block *sb) 681 { 682 if (sb->s_root) 683 d_genocide(sb->s_root); 684 kill_anon_super(sb); 685 } 686 687 EXPORT_SYMBOL(kill_litter_super); 688 689 static int ns_test_super(struct super_block *sb, void *data) 690 { 691 return sb->s_fs_info == data; 692 } 693 694 static int ns_set_super(struct super_block *sb, void *data) 695 { 696 sb->s_fs_info = data; 697 return set_anon_super(sb, NULL); 698 } 699 700 int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 701 int (*fill_super)(struct super_block *, void *, int), 702 struct vfsmount *mnt) 703 { 704 struct super_block *sb; 705 706 sb = sget(fs_type, ns_test_super, ns_set_super, data); 707 if (IS_ERR(sb)) 708 return PTR_ERR(sb); 709 710 if (!sb->s_root) { 711 int err; 712 sb->s_flags = flags; 713 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 714 if (err) { 715 deactivate_locked_super(sb); 716 return err; 717 } 718 719 sb->s_flags |= MS_ACTIVE; 720 } 721 722 simple_set_mnt(mnt, sb); 723 return 0; 724 } 725 726 EXPORT_SYMBOL(get_sb_ns); 727 728 #ifdef CONFIG_BLOCK 729 static int set_bdev_super(struct super_block *s, void *data) 730 { 731 s->s_bdev = data; 732 s->s_dev = s->s_bdev->bd_dev; 733 734 /* 735 * We set the bdi here to the queue backing, file systems can 736 * overwrite this in ->fill_super() 737 */ 738 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; 739 return 0; 740 } 741 742 static int test_bdev_super(struct super_block *s, void *data) 743 { 744 return (void *)s->s_bdev == data; 745 } 746 747 int get_sb_bdev(struct file_system_type *fs_type, 748 int flags, const char *dev_name, void *data, 749 int (*fill_super)(struct super_block *, void *, int), 750 struct vfsmount *mnt) 751 { 752 struct block_device *bdev; 753 struct super_block *s; 754 fmode_t mode = FMODE_READ; 755 int error = 0; 756 757 if (!(flags & MS_RDONLY)) 758 mode |= FMODE_WRITE; 759 760 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 761 if (IS_ERR(bdev)) 762 return PTR_ERR(bdev); 763 764 /* 765 * once the super is inserted into the list by sget, s_umount 766 * will protect the lockfs code from trying to start a snapshot 767 * while we are mounting 768 */ 769 mutex_lock(&bdev->bd_fsfreeze_mutex); 770 if (bdev->bd_fsfreeze_count > 0) { 771 mutex_unlock(&bdev->bd_fsfreeze_mutex); 772 error = -EBUSY; 773 goto error_bdev; 774 } 775 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); 776 mutex_unlock(&bdev->bd_fsfreeze_mutex); 777 if (IS_ERR(s)) 778 goto error_s; 779 780 if (s->s_root) { 781 if ((flags ^ s->s_flags) & MS_RDONLY) { 782 deactivate_locked_super(s); 783 error = -EBUSY; 784 goto error_bdev; 785 } 786 787 /* 788 * s_umount nests inside bd_mutex during 789 * __invalidate_device(). close_bdev_exclusive() 790 * acquires bd_mutex and can't be called under 791 * s_umount. Drop s_umount temporarily. This is safe 792 * as we're holding an active reference. 793 */ 794 up_write(&s->s_umount); 795 close_bdev_exclusive(bdev, mode); 796 down_write(&s->s_umount); 797 } else { 798 char b[BDEVNAME_SIZE]; 799 800 s->s_flags = flags; 801 s->s_mode = mode; 802 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 803 sb_set_blocksize(s, block_size(bdev)); 804 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 805 if (error) { 806 deactivate_locked_super(s); 807 goto error; 808 } 809 810 s->s_flags |= MS_ACTIVE; 811 bdev->bd_super = s; 812 } 813 814 simple_set_mnt(mnt, s); 815 return 0; 816 817 error_s: 818 error = PTR_ERR(s); 819 error_bdev: 820 close_bdev_exclusive(bdev, mode); 821 error: 822 return error; 823 } 824 825 EXPORT_SYMBOL(get_sb_bdev); 826 827 void kill_block_super(struct super_block *sb) 828 { 829 struct block_device *bdev = sb->s_bdev; 830 fmode_t mode = sb->s_mode; 831 832 bdev->bd_super = NULL; 833 generic_shutdown_super(sb); 834 sync_blockdev(bdev); 835 close_bdev_exclusive(bdev, mode); 836 } 837 838 EXPORT_SYMBOL(kill_block_super); 839 #endif 840 841 int get_sb_nodev(struct file_system_type *fs_type, 842 int flags, void *data, 843 int (*fill_super)(struct super_block *, void *, int), 844 struct vfsmount *mnt) 845 { 846 int error; 847 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 848 849 if (IS_ERR(s)) 850 return PTR_ERR(s); 851 852 s->s_flags = flags; 853 854 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 855 if (error) { 856 deactivate_locked_super(s); 857 return error; 858 } 859 s->s_flags |= MS_ACTIVE; 860 simple_set_mnt(mnt, s); 861 return 0; 862 } 863 864 EXPORT_SYMBOL(get_sb_nodev); 865 866 static int compare_single(struct super_block *s, void *p) 867 { 868 return 1; 869 } 870 871 int get_sb_single(struct file_system_type *fs_type, 872 int flags, void *data, 873 int (*fill_super)(struct super_block *, void *, int), 874 struct vfsmount *mnt) 875 { 876 struct super_block *s; 877 int error; 878 879 s = sget(fs_type, compare_single, set_anon_super, NULL); 880 if (IS_ERR(s)) 881 return PTR_ERR(s); 882 if (!s->s_root) { 883 s->s_flags = flags; 884 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 885 if (error) { 886 deactivate_locked_super(s); 887 return error; 888 } 889 s->s_flags |= MS_ACTIVE; 890 } else { 891 do_remount_sb(s, flags, data, 0); 892 } 893 simple_set_mnt(mnt, s); 894 return 0; 895 } 896 897 EXPORT_SYMBOL(get_sb_single); 898 899 struct vfsmount * 900 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 901 { 902 struct vfsmount *mnt; 903 char *secdata = NULL; 904 int error; 905 906 if (!type) 907 return ERR_PTR(-ENODEV); 908 909 error = -ENOMEM; 910 mnt = alloc_vfsmnt(name); 911 if (!mnt) 912 goto out; 913 914 if (flags & MS_KERNMOUNT) 915 mnt->mnt_flags = MNT_INTERNAL; 916 917 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 918 secdata = alloc_secdata(); 919 if (!secdata) 920 goto out_mnt; 921 922 error = security_sb_copy_data(data, secdata); 923 if (error) 924 goto out_free_secdata; 925 } 926 927 error = type->get_sb(type, flags, name, data, mnt); 928 if (error < 0) 929 goto out_free_secdata; 930 BUG_ON(!mnt->mnt_sb); 931 WARN_ON(!mnt->mnt_sb->s_bdi); 932 mnt->mnt_sb->s_flags |= MS_BORN; 933 934 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 935 if (error) 936 goto out_sb; 937 938 /* 939 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 940 * but s_maxbytes was an unsigned long long for many releases. Throw 941 * this warning for a little while to try and catch filesystems that 942 * violate this rule. This warning should be either removed or 943 * converted to a BUG() in 2.6.34. 944 */ 945 WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 946 "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); 947 948 mnt->mnt_mountpoint = mnt->mnt_root; 949 mnt->mnt_parent = mnt; 950 up_write(&mnt->mnt_sb->s_umount); 951 free_secdata(secdata); 952 return mnt; 953 out_sb: 954 dput(mnt->mnt_root); 955 deactivate_locked_super(mnt->mnt_sb); 956 out_free_secdata: 957 free_secdata(secdata); 958 out_mnt: 959 free_vfsmnt(mnt); 960 out: 961 return ERR_PTR(error); 962 } 963 964 EXPORT_SYMBOL_GPL(vfs_kern_mount); 965 966 /** 967 * freeze_super - lock the filesystem and force it into a consistent state 968 * @sb: the super to lock 969 * 970 * Syncs the super to make sure the filesystem is consistent and calls the fs's 971 * freeze_fs. Subsequent calls to this without first thawing the fs will return 972 * -EBUSY. 973 */ 974 int freeze_super(struct super_block *sb) 975 { 976 int ret; 977 978 atomic_inc(&sb->s_active); 979 down_write(&sb->s_umount); 980 if (sb->s_frozen) { 981 deactivate_locked_super(sb); 982 return -EBUSY; 983 } 984 985 if (sb->s_flags & MS_RDONLY) { 986 sb->s_frozen = SB_FREEZE_TRANS; 987 smp_wmb(); 988 up_write(&sb->s_umount); 989 return 0; 990 } 991 992 sb->s_frozen = SB_FREEZE_WRITE; 993 smp_wmb(); 994 995 sync_filesystem(sb); 996 997 sb->s_frozen = SB_FREEZE_TRANS; 998 smp_wmb(); 999 1000 sync_blockdev(sb->s_bdev); 1001 if (sb->s_op->freeze_fs) { 1002 ret = sb->s_op->freeze_fs(sb); 1003 if (ret) { 1004 printk(KERN_ERR 1005 "VFS:Filesystem freeze failed\n"); 1006 sb->s_frozen = SB_UNFROZEN; 1007 deactivate_locked_super(sb); 1008 return ret; 1009 } 1010 } 1011 up_write(&sb->s_umount); 1012 return 0; 1013 } 1014 EXPORT_SYMBOL(freeze_super); 1015 1016 /** 1017 * thaw_super -- unlock filesystem 1018 * @sb: the super to thaw 1019 * 1020 * Unlocks the filesystem and marks it writeable again after freeze_super(). 1021 */ 1022 int thaw_super(struct super_block *sb) 1023 { 1024 int error; 1025 1026 down_write(&sb->s_umount); 1027 if (sb->s_frozen == SB_UNFROZEN) { 1028 up_write(&sb->s_umount); 1029 return -EINVAL; 1030 } 1031 1032 if (sb->s_flags & MS_RDONLY) 1033 goto out; 1034 1035 if (sb->s_op->unfreeze_fs) { 1036 error = sb->s_op->unfreeze_fs(sb); 1037 if (error) { 1038 printk(KERN_ERR 1039 "VFS:Filesystem thaw failed\n"); 1040 sb->s_frozen = SB_FREEZE_TRANS; 1041 up_write(&sb->s_umount); 1042 return error; 1043 } 1044 } 1045 1046 out: 1047 sb->s_frozen = SB_UNFROZEN; 1048 smp_wmb(); 1049 wake_up(&sb->s_wait_unfrozen); 1050 deactivate_locked_super(sb); 1051 1052 return 0; 1053 } 1054 EXPORT_SYMBOL(thaw_super); 1055 1056 static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) 1057 { 1058 int err; 1059 const char *subtype = strchr(fstype, '.'); 1060 if (subtype) { 1061 subtype++; 1062 err = -EINVAL; 1063 if (!subtype[0]) 1064 goto err; 1065 } else 1066 subtype = ""; 1067 1068 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); 1069 err = -ENOMEM; 1070 if (!mnt->mnt_sb->s_subtype) 1071 goto err; 1072 return mnt; 1073 1074 err: 1075 mntput(mnt); 1076 return ERR_PTR(err); 1077 } 1078 1079 struct vfsmount * 1080 do_kern_mount(const char *fstype, int flags, const char *name, void *data) 1081 { 1082 struct file_system_type *type = get_fs_type(fstype); 1083 struct vfsmount *mnt; 1084 if (!type) 1085 return ERR_PTR(-ENODEV); 1086 mnt = vfs_kern_mount(type, flags, name, data); 1087 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && 1088 !mnt->mnt_sb->s_subtype) 1089 mnt = fs_set_subtype(mnt, fstype); 1090 put_filesystem(type); 1091 return mnt; 1092 } 1093 EXPORT_SYMBOL_GPL(do_kern_mount); 1094 1095 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 1096 { 1097 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); 1098 } 1099 1100 EXPORT_SYMBOL_GPL(kern_mount_data); 1101