1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/jbd2.h> 24 #include <linux/slab.h> 25 #include <linux/init.h> 26 #include <linux/blkdev.h> 27 #include <linux/parser.h> 28 #include <linux/smp_lock.h> 29 #include <linux/buffer_head.h> 30 #include <linux/exportfs.h> 31 #include <linux/vfs.h> 32 #include <linux/random.h> 33 #include <linux/mount.h> 34 #include <linux/namei.h> 35 #include <linux/quotaops.h> 36 #include <linux/seq_file.h> 37 #include <linux/proc_fs.h> 38 #include <linux/marker.h> 39 #include <linux/log2.h> 40 #include <linux/crc16.h> 41 #include <asm/uaccess.h> 42 43 #include "ext4.h" 44 #include "ext4_jbd2.h" 45 #include "xattr.h" 46 #include "acl.h" 47 #include "namei.h" 48 #include "group.h" 49 50 struct proc_dir_entry *ext4_proc_root; 51 52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 53 unsigned long journal_devnum); 54 static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 55 unsigned int); 56 static void ext4_commit_super(struct super_block *sb, 57 struct ext4_super_block *es, int sync); 58 static void ext4_mark_recovery_complete(struct super_block *sb, 59 struct ext4_super_block *es); 60 static void ext4_clear_journal_err(struct super_block *sb, 61 struct ext4_super_block *es); 62 static int ext4_sync_fs(struct super_block *sb, int wait); 63 static const char *ext4_decode_error(struct super_block *sb, int errno, 64 char nbuf[16]); 65 static int ext4_remount(struct super_block *sb, int *flags, char *data); 66 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 67 static void ext4_unlockfs(struct super_block *sb); 68 static void ext4_write_super(struct super_block *sb); 69 static void ext4_write_super_lockfs(struct super_block *sb); 70 71 72 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 73 struct ext4_group_desc *bg) 74 { 75 return le32_to_cpu(bg->bg_block_bitmap_lo) | 76 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 77 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 78 } 79 80 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 81 struct ext4_group_desc *bg) 82 { 83 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 84 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 85 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 86 } 87 88 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 89 struct ext4_group_desc *bg) 90 { 91 return le32_to_cpu(bg->bg_inode_table_lo) | 92 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 94 } 95 96 void ext4_block_bitmap_set(struct super_block *sb, 97 struct ext4_group_desc *bg, ext4_fsblk_t blk) 98 { 99 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 100 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 101 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 102 } 103 104 void ext4_inode_bitmap_set(struct super_block *sb, 105 struct ext4_group_desc *bg, ext4_fsblk_t blk) 106 { 107 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 108 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 109 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 110 } 111 112 void ext4_inode_table_set(struct super_block *sb, 113 struct ext4_group_desc *bg, ext4_fsblk_t blk) 114 { 115 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 116 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 117 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 118 } 119 120 /* 121 * Wrappers for jbd2_journal_start/end. 122 * 123 * The only special thing we need to do here is to make sure that all 124 * journal_end calls result in the superblock being marked dirty, so 125 * that sync() will call the filesystem's write_super callback if 126 * appropriate. 127 */ 128 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 129 { 130 journal_t *journal; 131 132 if (sb->s_flags & MS_RDONLY) 133 return ERR_PTR(-EROFS); 134 135 /* Special case here: if the journal has aborted behind our 136 * backs (eg. EIO in the commit thread), then we still need to 137 * take the FS itself readonly cleanly. */ 138 journal = EXT4_SB(sb)->s_journal; 139 if (is_journal_aborted(journal)) { 140 ext4_abort(sb, __func__, 141 "Detected aborted journal"); 142 return ERR_PTR(-EROFS); 143 } 144 145 return jbd2_journal_start(journal, nblocks); 146 } 147 148 /* 149 * The only special thing we need to do here is to make sure that all 150 * jbd2_journal_stop calls result in the superblock being marked dirty, so 151 * that sync() will call the filesystem's write_super callback if 152 * appropriate. 153 */ 154 int __ext4_journal_stop(const char *where, handle_t *handle) 155 { 156 struct super_block *sb; 157 int err; 158 int rc; 159 160 sb = handle->h_transaction->t_journal->j_private; 161 err = handle->h_err; 162 rc = jbd2_journal_stop(handle); 163 164 if (!err) 165 err = rc; 166 if (err) 167 __ext4_std_error(sb, where, err); 168 return err; 169 } 170 171 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 172 struct buffer_head *bh, handle_t *handle, int err) 173 { 174 char nbuf[16]; 175 const char *errstr = ext4_decode_error(NULL, err, nbuf); 176 177 if (bh) 178 BUFFER_TRACE(bh, "abort"); 179 180 if (!handle->h_err) 181 handle->h_err = err; 182 183 if (is_handle_aborted(handle)) 184 return; 185 186 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 187 caller, errstr, err_fn); 188 189 jbd2_journal_abort_handle(handle); 190 } 191 192 /* Deal with the reporting of failure conditions on a filesystem such as 193 * inconsistencies detected or read IO failures. 194 * 195 * On ext2, we can store the error state of the filesystem in the 196 * superblock. That is not possible on ext4, because we may have other 197 * write ordering constraints on the superblock which prevent us from 198 * writing it out straight away; and given that the journal is about to 199 * be aborted, we can't rely on the current, or future, transactions to 200 * write out the superblock safely. 201 * 202 * We'll just use the jbd2_journal_abort() error code to record an error in 203 * the journal instead. On recovery, the journal will compain about 204 * that error until we've noted it down and cleared it. 205 */ 206 207 static void ext4_handle_error(struct super_block *sb) 208 { 209 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 210 211 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 212 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 213 214 if (sb->s_flags & MS_RDONLY) 215 return; 216 217 if (!test_opt(sb, ERRORS_CONT)) { 218 journal_t *journal = EXT4_SB(sb)->s_journal; 219 220 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 221 if (journal) 222 jbd2_journal_abort(journal, -EIO); 223 } 224 if (test_opt(sb, ERRORS_RO)) { 225 printk(KERN_CRIT "Remounting filesystem read-only\n"); 226 sb->s_flags |= MS_RDONLY; 227 } 228 ext4_commit_super(sb, es, 1); 229 if (test_opt(sb, ERRORS_PANIC)) 230 panic("EXT4-fs (device %s): panic forced after error\n", 231 sb->s_id); 232 } 233 234 void ext4_error(struct super_block *sb, const char *function, 235 const char *fmt, ...) 236 { 237 va_list args; 238 239 va_start(args, fmt); 240 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 241 vprintk(fmt, args); 242 printk("\n"); 243 va_end(args); 244 245 ext4_handle_error(sb); 246 } 247 248 static const char *ext4_decode_error(struct super_block *sb, int errno, 249 char nbuf[16]) 250 { 251 char *errstr = NULL; 252 253 switch (errno) { 254 case -EIO: 255 errstr = "IO failure"; 256 break; 257 case -ENOMEM: 258 errstr = "Out of memory"; 259 break; 260 case -EROFS: 261 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 262 errstr = "Journal has aborted"; 263 else 264 errstr = "Readonly filesystem"; 265 break; 266 default: 267 /* If the caller passed in an extra buffer for unknown 268 * errors, textualise them now. Else we just return 269 * NULL. */ 270 if (nbuf) { 271 /* Check for truncated error codes... */ 272 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 273 errstr = nbuf; 274 } 275 break; 276 } 277 278 return errstr; 279 } 280 281 /* __ext4_std_error decodes expected errors from journaling functions 282 * automatically and invokes the appropriate error response. */ 283 284 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 285 { 286 char nbuf[16]; 287 const char *errstr; 288 289 /* Special case: if the error is EROFS, and we're not already 290 * inside a transaction, then there's really no point in logging 291 * an error. */ 292 if (errno == -EROFS && journal_current_handle() == NULL && 293 (sb->s_flags & MS_RDONLY)) 294 return; 295 296 errstr = ext4_decode_error(sb, errno, nbuf); 297 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 298 sb->s_id, function, errstr); 299 300 ext4_handle_error(sb); 301 } 302 303 /* 304 * ext4_abort is a much stronger failure handler than ext4_error. The 305 * abort function may be used to deal with unrecoverable failures such 306 * as journal IO errors or ENOMEM at a critical moment in log management. 307 * 308 * We unconditionally force the filesystem into an ABORT|READONLY state, 309 * unless the error response on the fs has been set to panic in which 310 * case we take the easy way out and panic immediately. 311 */ 312 313 void ext4_abort(struct super_block *sb, const char *function, 314 const char *fmt, ...) 315 { 316 va_list args; 317 318 printk(KERN_CRIT "ext4_abort called.\n"); 319 320 va_start(args, fmt); 321 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 322 vprintk(fmt, args); 323 printk("\n"); 324 va_end(args); 325 326 if (test_opt(sb, ERRORS_PANIC)) 327 panic("EXT4-fs panic from previous error\n"); 328 329 if (sb->s_flags & MS_RDONLY) 330 return; 331 332 printk(KERN_CRIT "Remounting filesystem read-only\n"); 333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 334 sb->s_flags |= MS_RDONLY; 335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 336 if (EXT4_SB(sb)->s_journal) 337 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 338 } 339 340 void ext4_warning(struct super_block *sb, const char *function, 341 const char *fmt, ...) 342 { 343 va_list args; 344 345 va_start(args, fmt); 346 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 347 sb->s_id, function); 348 vprintk(fmt, args); 349 printk("\n"); 350 va_end(args); 351 } 352 353 void ext4_update_dynamic_rev(struct super_block *sb) 354 { 355 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 356 357 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 358 return; 359 360 ext4_warning(sb, __func__, 361 "updating to rev %d because of new feature flag, " 362 "running e2fsck is recommended", 363 EXT4_DYNAMIC_REV); 364 365 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 366 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 367 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 368 /* leave es->s_feature_*compat flags alone */ 369 /* es->s_uuid will be set by e2fsck if empty */ 370 371 /* 372 * The rest of the superblock fields should be zero, and if not it 373 * means they are likely already in use, so leave them alone. We 374 * can leave it up to e2fsck to clean up any inconsistencies there. 375 */ 376 } 377 378 /* 379 * Open the external journal device 380 */ 381 static struct block_device *ext4_blkdev_get(dev_t dev) 382 { 383 struct block_device *bdev; 384 char b[BDEVNAME_SIZE]; 385 386 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 387 if (IS_ERR(bdev)) 388 goto fail; 389 return bdev; 390 391 fail: 392 printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", 393 __bdevname(dev, b), PTR_ERR(bdev)); 394 return NULL; 395 } 396 397 /* 398 * Release the journal device 399 */ 400 static int ext4_blkdev_put(struct block_device *bdev) 401 { 402 bd_release(bdev); 403 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 404 } 405 406 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 407 { 408 struct block_device *bdev; 409 int ret = -ENODEV; 410 411 bdev = sbi->journal_bdev; 412 if (bdev) { 413 ret = ext4_blkdev_put(bdev); 414 sbi->journal_bdev = NULL; 415 } 416 return ret; 417 } 418 419 static inline struct inode *orphan_list_entry(struct list_head *l) 420 { 421 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 422 } 423 424 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 425 { 426 struct list_head *l; 427 428 printk(KERN_ERR "sb orphan head is %d\n", 429 le32_to_cpu(sbi->s_es->s_last_orphan)); 430 431 printk(KERN_ERR "sb_info orphan list:\n"); 432 list_for_each(l, &sbi->s_orphan) { 433 struct inode *inode = orphan_list_entry(l); 434 printk(KERN_ERR " " 435 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 436 inode->i_sb->s_id, inode->i_ino, inode, 437 inode->i_mode, inode->i_nlink, 438 NEXT_ORPHAN(inode)); 439 } 440 } 441 442 static void ext4_put_super(struct super_block *sb) 443 { 444 struct ext4_sb_info *sbi = EXT4_SB(sb); 445 struct ext4_super_block *es = sbi->s_es; 446 int i, err; 447 448 ext4_mb_release(sb); 449 ext4_ext_release(sb); 450 ext4_xattr_put_super(sb); 451 err = jbd2_journal_destroy(sbi->s_journal); 452 sbi->s_journal = NULL; 453 if (err < 0) 454 ext4_abort(sb, __func__, "Couldn't clean up the journal"); 455 456 if (!(sb->s_flags & MS_RDONLY)) { 457 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 458 es->s_state = cpu_to_le16(sbi->s_mount_state); 459 ext4_commit_super(sb, es, 1); 460 } 461 if (sbi->s_proc) { 462 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 463 remove_proc_entry(sb->s_id, ext4_proc_root); 464 } 465 466 for (i = 0; i < sbi->s_gdb_count; i++) 467 brelse(sbi->s_group_desc[i]); 468 kfree(sbi->s_group_desc); 469 kfree(sbi->s_flex_groups); 470 percpu_counter_destroy(&sbi->s_freeblocks_counter); 471 percpu_counter_destroy(&sbi->s_freeinodes_counter); 472 percpu_counter_destroy(&sbi->s_dirs_counter); 473 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 474 brelse(sbi->s_sbh); 475 #ifdef CONFIG_QUOTA 476 for (i = 0; i < MAXQUOTAS; i++) 477 kfree(sbi->s_qf_names[i]); 478 #endif 479 480 /* Debugging code just in case the in-memory inode orphan list 481 * isn't empty. The on-disk one can be non-empty if we've 482 * detected an error and taken the fs readonly, but the 483 * in-memory list had better be clean by this point. */ 484 if (!list_empty(&sbi->s_orphan)) 485 dump_orphan_list(sb, sbi); 486 J_ASSERT(list_empty(&sbi->s_orphan)); 487 488 invalidate_bdev(sb->s_bdev); 489 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 490 /* 491 * Invalidate the journal device's buffers. We don't want them 492 * floating about in memory - the physical journal device may 493 * hotswapped, and it breaks the `ro-after' testing code. 494 */ 495 sync_blockdev(sbi->journal_bdev); 496 invalidate_bdev(sbi->journal_bdev); 497 ext4_blkdev_remove(sbi); 498 } 499 sb->s_fs_info = NULL; 500 kfree(sbi); 501 return; 502 } 503 504 static struct kmem_cache *ext4_inode_cachep; 505 506 /* 507 * Called inside transaction, so use GFP_NOFS 508 */ 509 static struct inode *ext4_alloc_inode(struct super_block *sb) 510 { 511 struct ext4_inode_info *ei; 512 513 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 514 if (!ei) 515 return NULL; 516 #ifdef CONFIG_EXT4_FS_POSIX_ACL 517 ei->i_acl = EXT4_ACL_NOT_CACHED; 518 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 519 #endif 520 ei->vfs_inode.i_version = 1; 521 ei->vfs_inode.i_data.writeback_index = 0; 522 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 523 INIT_LIST_HEAD(&ei->i_prealloc_list); 524 spin_lock_init(&ei->i_prealloc_lock); 525 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 526 ei->i_reserved_data_blocks = 0; 527 ei->i_reserved_meta_blocks = 0; 528 ei->i_allocated_meta_blocks = 0; 529 ei->i_delalloc_reserved_flag = 0; 530 spin_lock_init(&(ei->i_block_reservation_lock)); 531 return &ei->vfs_inode; 532 } 533 534 static void ext4_destroy_inode(struct inode *inode) 535 { 536 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 537 printk("EXT4 Inode %p: orphan list check failed!\n", 538 EXT4_I(inode)); 539 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 540 EXT4_I(inode), sizeof(struct ext4_inode_info), 541 true); 542 dump_stack(); 543 } 544 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 545 } 546 547 static void init_once(void *foo) 548 { 549 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 550 551 INIT_LIST_HEAD(&ei->i_orphan); 552 #ifdef CONFIG_EXT4_FS_XATTR 553 init_rwsem(&ei->xattr_sem); 554 #endif 555 init_rwsem(&ei->i_data_sem); 556 inode_init_once(&ei->vfs_inode); 557 } 558 559 static int init_inodecache(void) 560 { 561 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 562 sizeof(struct ext4_inode_info), 563 0, (SLAB_RECLAIM_ACCOUNT| 564 SLAB_MEM_SPREAD), 565 init_once); 566 if (ext4_inode_cachep == NULL) 567 return -ENOMEM; 568 return 0; 569 } 570 571 static void destroy_inodecache(void) 572 { 573 kmem_cache_destroy(ext4_inode_cachep); 574 } 575 576 static void ext4_clear_inode(struct inode *inode) 577 { 578 #ifdef CONFIG_EXT4_FS_POSIX_ACL 579 if (EXT4_I(inode)->i_acl && 580 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 581 posix_acl_release(EXT4_I(inode)->i_acl); 582 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 583 } 584 if (EXT4_I(inode)->i_default_acl && 585 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 586 posix_acl_release(EXT4_I(inode)->i_default_acl); 587 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 588 } 589 #endif 590 ext4_discard_preallocations(inode); 591 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 592 &EXT4_I(inode)->jinode); 593 } 594 595 static inline void ext4_show_quota_options(struct seq_file *seq, 596 struct super_block *sb) 597 { 598 #if defined(CONFIG_QUOTA) 599 struct ext4_sb_info *sbi = EXT4_SB(sb); 600 601 if (sbi->s_jquota_fmt) 602 seq_printf(seq, ",jqfmt=%s", 603 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 604 605 if (sbi->s_qf_names[USRQUOTA]) 606 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 607 608 if (sbi->s_qf_names[GRPQUOTA]) 609 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 610 611 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 612 seq_puts(seq, ",usrquota"); 613 614 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 615 seq_puts(seq, ",grpquota"); 616 #endif 617 } 618 619 /* 620 * Show an option if 621 * - it's set to a non-default value OR 622 * - if the per-sb default is different from the global default 623 */ 624 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 625 { 626 int def_errors; 627 unsigned long def_mount_opts; 628 struct super_block *sb = vfs->mnt_sb; 629 struct ext4_sb_info *sbi = EXT4_SB(sb); 630 struct ext4_super_block *es = sbi->s_es; 631 632 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 633 def_errors = le16_to_cpu(es->s_errors); 634 635 if (sbi->s_sb_block != 1) 636 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 637 if (test_opt(sb, MINIX_DF)) 638 seq_puts(seq, ",minixdf"); 639 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 640 seq_puts(seq, ",grpid"); 641 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 642 seq_puts(seq, ",nogrpid"); 643 if (sbi->s_resuid != EXT4_DEF_RESUID || 644 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 645 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 646 } 647 if (sbi->s_resgid != EXT4_DEF_RESGID || 648 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 649 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 650 } 651 if (test_opt(sb, ERRORS_RO)) { 652 if (def_errors == EXT4_ERRORS_PANIC || 653 def_errors == EXT4_ERRORS_CONTINUE) { 654 seq_puts(seq, ",errors=remount-ro"); 655 } 656 } 657 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 658 seq_puts(seq, ",errors=continue"); 659 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 660 seq_puts(seq, ",errors=panic"); 661 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 662 seq_puts(seq, ",nouid32"); 663 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 664 seq_puts(seq, ",debug"); 665 if (test_opt(sb, OLDALLOC)) 666 seq_puts(seq, ",oldalloc"); 667 #ifdef CONFIG_EXT4_FS_XATTR 668 if (test_opt(sb, XATTR_USER) && 669 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 670 seq_puts(seq, ",user_xattr"); 671 if (!test_opt(sb, XATTR_USER) && 672 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 673 seq_puts(seq, ",nouser_xattr"); 674 } 675 #endif 676 #ifdef CONFIG_EXT4_FS_POSIX_ACL 677 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 678 seq_puts(seq, ",acl"); 679 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 680 seq_puts(seq, ",noacl"); 681 #endif 682 if (!test_opt(sb, RESERVATION)) 683 seq_puts(seq, ",noreservation"); 684 if (sbi->s_commit_interval) { 685 seq_printf(seq, ",commit=%u", 686 (unsigned) (sbi->s_commit_interval / HZ)); 687 } 688 /* 689 * We're changing the default of barrier mount option, so 690 * let's always display its mount state so it's clear what its 691 * status is. 692 */ 693 seq_puts(seq, ",barrier="); 694 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 695 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 696 seq_puts(seq, ",journal_async_commit"); 697 if (test_opt(sb, NOBH)) 698 seq_puts(seq, ",nobh"); 699 if (!test_opt(sb, EXTENTS)) 700 seq_puts(seq, ",noextents"); 701 if (test_opt(sb, I_VERSION)) 702 seq_puts(seq, ",i_version"); 703 if (!test_opt(sb, DELALLOC)) 704 seq_puts(seq, ",nodelalloc"); 705 706 707 if (sbi->s_stripe) 708 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 709 /* 710 * journal mode get enabled in different ways 711 * So just print the value even if we didn't specify it 712 */ 713 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 714 seq_puts(seq, ",data=journal"); 715 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 716 seq_puts(seq, ",data=ordered"); 717 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 718 seq_puts(seq, ",data=writeback"); 719 720 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 721 seq_printf(seq, ",inode_readahead_blks=%u", 722 sbi->s_inode_readahead_blks); 723 724 if (test_opt(sb, DATA_ERR_ABORT)) 725 seq_puts(seq, ",data_err=abort"); 726 727 ext4_show_quota_options(seq, sb); 728 return 0; 729 } 730 731 732 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 733 u64 ino, u32 generation) 734 { 735 struct inode *inode; 736 737 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 738 return ERR_PTR(-ESTALE); 739 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 740 return ERR_PTR(-ESTALE); 741 742 /* iget isn't really right if the inode is currently unallocated!! 743 * 744 * ext4_read_inode will return a bad_inode if the inode had been 745 * deleted, so we should be safe. 746 * 747 * Currently we don't know the generation for parent directory, so 748 * a generation of 0 means "accept any" 749 */ 750 inode = ext4_iget(sb, ino); 751 if (IS_ERR(inode)) 752 return ERR_CAST(inode); 753 if (generation && inode->i_generation != generation) { 754 iput(inode); 755 return ERR_PTR(-ESTALE); 756 } 757 758 return inode; 759 } 760 761 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 762 int fh_len, int fh_type) 763 { 764 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 765 ext4_nfs_get_inode); 766 } 767 768 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 769 int fh_len, int fh_type) 770 { 771 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 772 ext4_nfs_get_inode); 773 } 774 775 #ifdef CONFIG_QUOTA 776 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 777 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 778 779 static int ext4_dquot_initialize(struct inode *inode, int type); 780 static int ext4_dquot_drop(struct inode *inode); 781 static int ext4_write_dquot(struct dquot *dquot); 782 static int ext4_acquire_dquot(struct dquot *dquot); 783 static int ext4_release_dquot(struct dquot *dquot); 784 static int ext4_mark_dquot_dirty(struct dquot *dquot); 785 static int ext4_write_info(struct super_block *sb, int type); 786 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 787 char *path, int remount); 788 static int ext4_quota_on_mount(struct super_block *sb, int type); 789 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 790 size_t len, loff_t off); 791 static ssize_t ext4_quota_write(struct super_block *sb, int type, 792 const char *data, size_t len, loff_t off); 793 794 static struct dquot_operations ext4_quota_operations = { 795 .initialize = ext4_dquot_initialize, 796 .drop = ext4_dquot_drop, 797 .alloc_space = dquot_alloc_space, 798 .alloc_inode = dquot_alloc_inode, 799 .free_space = dquot_free_space, 800 .free_inode = dquot_free_inode, 801 .transfer = dquot_transfer, 802 .write_dquot = ext4_write_dquot, 803 .acquire_dquot = ext4_acquire_dquot, 804 .release_dquot = ext4_release_dquot, 805 .mark_dirty = ext4_mark_dquot_dirty, 806 .write_info = ext4_write_info 807 }; 808 809 static struct quotactl_ops ext4_qctl_operations = { 810 .quota_on = ext4_quota_on, 811 .quota_off = vfs_quota_off, 812 .quota_sync = vfs_quota_sync, 813 .get_info = vfs_get_dqinfo, 814 .set_info = vfs_set_dqinfo, 815 .get_dqblk = vfs_get_dqblk, 816 .set_dqblk = vfs_set_dqblk 817 }; 818 #endif 819 820 static const struct super_operations ext4_sops = { 821 .alloc_inode = ext4_alloc_inode, 822 .destroy_inode = ext4_destroy_inode, 823 .write_inode = ext4_write_inode, 824 .dirty_inode = ext4_dirty_inode, 825 .delete_inode = ext4_delete_inode, 826 .put_super = ext4_put_super, 827 .write_super = ext4_write_super, 828 .sync_fs = ext4_sync_fs, 829 .write_super_lockfs = ext4_write_super_lockfs, 830 .unlockfs = ext4_unlockfs, 831 .statfs = ext4_statfs, 832 .remount_fs = ext4_remount, 833 .clear_inode = ext4_clear_inode, 834 .show_options = ext4_show_options, 835 #ifdef CONFIG_QUOTA 836 .quota_read = ext4_quota_read, 837 .quota_write = ext4_quota_write, 838 #endif 839 }; 840 841 static const struct export_operations ext4_export_ops = { 842 .fh_to_dentry = ext4_fh_to_dentry, 843 .fh_to_parent = ext4_fh_to_parent, 844 .get_parent = ext4_get_parent, 845 }; 846 847 enum { 848 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 849 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 850 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 851 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 852 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 853 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 854 Opt_journal_checksum, Opt_journal_async_commit, 855 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 856 Opt_data_err_abort, Opt_data_err_ignore, 857 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 858 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 859 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 860 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 861 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 862 Opt_inode_readahead_blks 863 }; 864 865 static const match_table_t tokens = { 866 {Opt_bsd_df, "bsddf"}, 867 {Opt_minix_df, "minixdf"}, 868 {Opt_grpid, "grpid"}, 869 {Opt_grpid, "bsdgroups"}, 870 {Opt_nogrpid, "nogrpid"}, 871 {Opt_nogrpid, "sysvgroups"}, 872 {Opt_resgid, "resgid=%u"}, 873 {Opt_resuid, "resuid=%u"}, 874 {Opt_sb, "sb=%u"}, 875 {Opt_err_cont, "errors=continue"}, 876 {Opt_err_panic, "errors=panic"}, 877 {Opt_err_ro, "errors=remount-ro"}, 878 {Opt_nouid32, "nouid32"}, 879 {Opt_debug, "debug"}, 880 {Opt_oldalloc, "oldalloc"}, 881 {Opt_orlov, "orlov"}, 882 {Opt_user_xattr, "user_xattr"}, 883 {Opt_nouser_xattr, "nouser_xattr"}, 884 {Opt_acl, "acl"}, 885 {Opt_noacl, "noacl"}, 886 {Opt_reservation, "reservation"}, 887 {Opt_noreservation, "noreservation"}, 888 {Opt_noload, "noload"}, 889 {Opt_nobh, "nobh"}, 890 {Opt_bh, "bh"}, 891 {Opt_commit, "commit=%u"}, 892 {Opt_journal_update, "journal=update"}, 893 {Opt_journal_inum, "journal=%u"}, 894 {Opt_journal_dev, "journal_dev=%u"}, 895 {Opt_journal_checksum, "journal_checksum"}, 896 {Opt_journal_async_commit, "journal_async_commit"}, 897 {Opt_abort, "abort"}, 898 {Opt_data_journal, "data=journal"}, 899 {Opt_data_ordered, "data=ordered"}, 900 {Opt_data_writeback, "data=writeback"}, 901 {Opt_data_err_abort, "data_err=abort"}, 902 {Opt_data_err_ignore, "data_err=ignore"}, 903 {Opt_offusrjquota, "usrjquota="}, 904 {Opt_usrjquota, "usrjquota=%s"}, 905 {Opt_offgrpjquota, "grpjquota="}, 906 {Opt_grpjquota, "grpjquota=%s"}, 907 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 908 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 909 {Opt_grpquota, "grpquota"}, 910 {Opt_noquota, "noquota"}, 911 {Opt_quota, "quota"}, 912 {Opt_usrquota, "usrquota"}, 913 {Opt_barrier, "barrier=%u"}, 914 {Opt_extents, "extents"}, 915 {Opt_noextents, "noextents"}, 916 {Opt_i_version, "i_version"}, 917 {Opt_stripe, "stripe=%u"}, 918 {Opt_resize, "resize"}, 919 {Opt_delalloc, "delalloc"}, 920 {Opt_nodelalloc, "nodelalloc"}, 921 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 922 {Opt_err, NULL}, 923 }; 924 925 static ext4_fsblk_t get_sb_block(void **data) 926 { 927 ext4_fsblk_t sb_block; 928 char *options = (char *) *data; 929 930 if (!options || strncmp(options, "sb=", 3) != 0) 931 return 1; /* Default location */ 932 options += 3; 933 /*todo: use simple_strtoll with >32bit ext4 */ 934 sb_block = simple_strtoul(options, &options, 0); 935 if (*options && *options != ',') { 936 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 937 (char *) *data); 938 return 1; 939 } 940 if (*options == ',') 941 options++; 942 *data = (void *) options; 943 return sb_block; 944 } 945 946 static int parse_options(char *options, struct super_block *sb, 947 unsigned int *inum, unsigned long *journal_devnum, 948 ext4_fsblk_t *n_blocks_count, int is_remount) 949 { 950 struct ext4_sb_info *sbi = EXT4_SB(sb); 951 char *p; 952 substring_t args[MAX_OPT_ARGS]; 953 int data_opt = 0; 954 int option; 955 #ifdef CONFIG_QUOTA 956 int qtype, qfmt; 957 char *qname; 958 #endif 959 ext4_fsblk_t last_block; 960 961 if (!options) 962 return 1; 963 964 while ((p = strsep(&options, ",")) != NULL) { 965 int token; 966 if (!*p) 967 continue; 968 969 token = match_token(p, tokens, args); 970 switch (token) { 971 case Opt_bsd_df: 972 clear_opt(sbi->s_mount_opt, MINIX_DF); 973 break; 974 case Opt_minix_df: 975 set_opt(sbi->s_mount_opt, MINIX_DF); 976 break; 977 case Opt_grpid: 978 set_opt(sbi->s_mount_opt, GRPID); 979 break; 980 case Opt_nogrpid: 981 clear_opt(sbi->s_mount_opt, GRPID); 982 break; 983 case Opt_resuid: 984 if (match_int(&args[0], &option)) 985 return 0; 986 sbi->s_resuid = option; 987 break; 988 case Opt_resgid: 989 if (match_int(&args[0], &option)) 990 return 0; 991 sbi->s_resgid = option; 992 break; 993 case Opt_sb: 994 /* handled by get_sb_block() instead of here */ 995 /* *sb_block = match_int(&args[0]); */ 996 break; 997 case Opt_err_panic: 998 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 999 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1000 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1001 break; 1002 case Opt_err_ro: 1003 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1004 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1005 set_opt(sbi->s_mount_opt, ERRORS_RO); 1006 break; 1007 case Opt_err_cont: 1008 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1009 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1010 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1011 break; 1012 case Opt_nouid32: 1013 set_opt(sbi->s_mount_opt, NO_UID32); 1014 break; 1015 case Opt_debug: 1016 set_opt(sbi->s_mount_opt, DEBUG); 1017 break; 1018 case Opt_oldalloc: 1019 set_opt(sbi->s_mount_opt, OLDALLOC); 1020 break; 1021 case Opt_orlov: 1022 clear_opt(sbi->s_mount_opt, OLDALLOC); 1023 break; 1024 #ifdef CONFIG_EXT4_FS_XATTR 1025 case Opt_user_xattr: 1026 set_opt(sbi->s_mount_opt, XATTR_USER); 1027 break; 1028 case Opt_nouser_xattr: 1029 clear_opt(sbi->s_mount_opt, XATTR_USER); 1030 break; 1031 #else 1032 case Opt_user_xattr: 1033 case Opt_nouser_xattr: 1034 printk(KERN_ERR "EXT4 (no)user_xattr options " 1035 "not supported\n"); 1036 break; 1037 #endif 1038 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1039 case Opt_acl: 1040 set_opt(sbi->s_mount_opt, POSIX_ACL); 1041 break; 1042 case Opt_noacl: 1043 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1044 break; 1045 #else 1046 case Opt_acl: 1047 case Opt_noacl: 1048 printk(KERN_ERR "EXT4 (no)acl options " 1049 "not supported\n"); 1050 break; 1051 #endif 1052 case Opt_reservation: 1053 set_opt(sbi->s_mount_opt, RESERVATION); 1054 break; 1055 case Opt_noreservation: 1056 clear_opt(sbi->s_mount_opt, RESERVATION); 1057 break; 1058 case Opt_journal_update: 1059 /* @@@ FIXME */ 1060 /* Eventually we will want to be able to create 1061 a journal file here. For now, only allow the 1062 user to specify an existing inode to be the 1063 journal file. */ 1064 if (is_remount) { 1065 printk(KERN_ERR "EXT4-fs: cannot specify " 1066 "journal on remount\n"); 1067 return 0; 1068 } 1069 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1070 break; 1071 case Opt_journal_inum: 1072 if (is_remount) { 1073 printk(KERN_ERR "EXT4-fs: cannot specify " 1074 "journal on remount\n"); 1075 return 0; 1076 } 1077 if (match_int(&args[0], &option)) 1078 return 0; 1079 *inum = option; 1080 break; 1081 case Opt_journal_dev: 1082 if (is_remount) { 1083 printk(KERN_ERR "EXT4-fs: cannot specify " 1084 "journal on remount\n"); 1085 return 0; 1086 } 1087 if (match_int(&args[0], &option)) 1088 return 0; 1089 *journal_devnum = option; 1090 break; 1091 case Opt_journal_checksum: 1092 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1093 break; 1094 case Opt_journal_async_commit: 1095 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1096 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1097 break; 1098 case Opt_noload: 1099 set_opt(sbi->s_mount_opt, NOLOAD); 1100 break; 1101 case Opt_commit: 1102 if (match_int(&args[0], &option)) 1103 return 0; 1104 if (option < 0) 1105 return 0; 1106 if (option == 0) 1107 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1108 sbi->s_commit_interval = HZ * option; 1109 break; 1110 case Opt_data_journal: 1111 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1112 goto datacheck; 1113 case Opt_data_ordered: 1114 data_opt = EXT4_MOUNT_ORDERED_DATA; 1115 goto datacheck; 1116 case Opt_data_writeback: 1117 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1118 datacheck: 1119 if (is_remount) { 1120 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1121 != data_opt) { 1122 printk(KERN_ERR 1123 "EXT4-fs: cannot change data " 1124 "mode on remount\n"); 1125 return 0; 1126 } 1127 } else { 1128 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1129 sbi->s_mount_opt |= data_opt; 1130 } 1131 break; 1132 case Opt_data_err_abort: 1133 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1134 break; 1135 case Opt_data_err_ignore: 1136 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1137 break; 1138 #ifdef CONFIG_QUOTA 1139 case Opt_usrjquota: 1140 qtype = USRQUOTA; 1141 goto set_qf_name; 1142 case Opt_grpjquota: 1143 qtype = GRPQUOTA; 1144 set_qf_name: 1145 if ((sb_any_quota_enabled(sb) || 1146 sb_any_quota_suspended(sb)) && 1147 !sbi->s_qf_names[qtype]) { 1148 printk(KERN_ERR 1149 "EXT4-fs: Cannot change journaled " 1150 "quota options when quota turned on.\n"); 1151 return 0; 1152 } 1153 qname = match_strdup(&args[0]); 1154 if (!qname) { 1155 printk(KERN_ERR 1156 "EXT4-fs: not enough memory for " 1157 "storing quotafile name.\n"); 1158 return 0; 1159 } 1160 if (sbi->s_qf_names[qtype] && 1161 strcmp(sbi->s_qf_names[qtype], qname)) { 1162 printk(KERN_ERR 1163 "EXT4-fs: %s quota file already " 1164 "specified.\n", QTYPE2NAME(qtype)); 1165 kfree(qname); 1166 return 0; 1167 } 1168 sbi->s_qf_names[qtype] = qname; 1169 if (strchr(sbi->s_qf_names[qtype], '/')) { 1170 printk(KERN_ERR 1171 "EXT4-fs: quotafile must be on " 1172 "filesystem root.\n"); 1173 kfree(sbi->s_qf_names[qtype]); 1174 sbi->s_qf_names[qtype] = NULL; 1175 return 0; 1176 } 1177 set_opt(sbi->s_mount_opt, QUOTA); 1178 break; 1179 case Opt_offusrjquota: 1180 qtype = USRQUOTA; 1181 goto clear_qf_name; 1182 case Opt_offgrpjquota: 1183 qtype = GRPQUOTA; 1184 clear_qf_name: 1185 if ((sb_any_quota_enabled(sb) || 1186 sb_any_quota_suspended(sb)) && 1187 sbi->s_qf_names[qtype]) { 1188 printk(KERN_ERR "EXT4-fs: Cannot change " 1189 "journaled quota options when " 1190 "quota turned on.\n"); 1191 return 0; 1192 } 1193 /* 1194 * The space will be released later when all options 1195 * are confirmed to be correct 1196 */ 1197 sbi->s_qf_names[qtype] = NULL; 1198 break; 1199 case Opt_jqfmt_vfsold: 1200 qfmt = QFMT_VFS_OLD; 1201 goto set_qf_format; 1202 case Opt_jqfmt_vfsv0: 1203 qfmt = QFMT_VFS_V0; 1204 set_qf_format: 1205 if ((sb_any_quota_enabled(sb) || 1206 sb_any_quota_suspended(sb)) && 1207 sbi->s_jquota_fmt != qfmt) { 1208 printk(KERN_ERR "EXT4-fs: Cannot change " 1209 "journaled quota options when " 1210 "quota turned on.\n"); 1211 return 0; 1212 } 1213 sbi->s_jquota_fmt = qfmt; 1214 break; 1215 case Opt_quota: 1216 case Opt_usrquota: 1217 set_opt(sbi->s_mount_opt, QUOTA); 1218 set_opt(sbi->s_mount_opt, USRQUOTA); 1219 break; 1220 case Opt_grpquota: 1221 set_opt(sbi->s_mount_opt, QUOTA); 1222 set_opt(sbi->s_mount_opt, GRPQUOTA); 1223 break; 1224 case Opt_noquota: 1225 if (sb_any_quota_enabled(sb)) { 1226 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1227 "options when quota turned on.\n"); 1228 return 0; 1229 } 1230 clear_opt(sbi->s_mount_opt, QUOTA); 1231 clear_opt(sbi->s_mount_opt, USRQUOTA); 1232 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1233 break; 1234 #else 1235 case Opt_quota: 1236 case Opt_usrquota: 1237 case Opt_grpquota: 1238 printk(KERN_ERR 1239 "EXT4-fs: quota options not supported.\n"); 1240 break; 1241 case Opt_usrjquota: 1242 case Opt_grpjquota: 1243 case Opt_offusrjquota: 1244 case Opt_offgrpjquota: 1245 case Opt_jqfmt_vfsold: 1246 case Opt_jqfmt_vfsv0: 1247 printk(KERN_ERR 1248 "EXT4-fs: journaled quota options not " 1249 "supported.\n"); 1250 break; 1251 case Opt_noquota: 1252 break; 1253 #endif 1254 case Opt_abort: 1255 set_opt(sbi->s_mount_opt, ABORT); 1256 break; 1257 case Opt_barrier: 1258 if (match_int(&args[0], &option)) 1259 return 0; 1260 if (option) 1261 set_opt(sbi->s_mount_opt, BARRIER); 1262 else 1263 clear_opt(sbi->s_mount_opt, BARRIER); 1264 break; 1265 case Opt_ignore: 1266 break; 1267 case Opt_resize: 1268 if (!is_remount) { 1269 printk("EXT4-fs: resize option only available " 1270 "for remount\n"); 1271 return 0; 1272 } 1273 if (match_int(&args[0], &option) != 0) 1274 return 0; 1275 *n_blocks_count = option; 1276 break; 1277 case Opt_nobh: 1278 set_opt(sbi->s_mount_opt, NOBH); 1279 break; 1280 case Opt_bh: 1281 clear_opt(sbi->s_mount_opt, NOBH); 1282 break; 1283 case Opt_extents: 1284 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, 1285 EXT4_FEATURE_INCOMPAT_EXTENTS)) { 1286 ext4_warning(sb, __func__, 1287 "extents feature not enabled " 1288 "on this filesystem, use tune2fs\n"); 1289 return 0; 1290 } 1291 set_opt(sbi->s_mount_opt, EXTENTS); 1292 break; 1293 case Opt_noextents: 1294 /* 1295 * When e2fsprogs support resizing an already existing 1296 * ext3 file system to greater than 2**32 we need to 1297 * add support to block allocator to handle growing 1298 * already existing block mapped inode so that blocks 1299 * allocated for them fall within 2**32 1300 */ 1301 last_block = ext4_blocks_count(sbi->s_es) - 1; 1302 if (last_block > 0xffffffffULL) { 1303 printk(KERN_ERR "EXT4-fs: Filesystem too " 1304 "large to mount with " 1305 "-o noextents options\n"); 1306 return 0; 1307 } 1308 clear_opt(sbi->s_mount_opt, EXTENTS); 1309 break; 1310 case Opt_i_version: 1311 set_opt(sbi->s_mount_opt, I_VERSION); 1312 sb->s_flags |= MS_I_VERSION; 1313 break; 1314 case Opt_nodelalloc: 1315 clear_opt(sbi->s_mount_opt, DELALLOC); 1316 break; 1317 case Opt_stripe: 1318 if (match_int(&args[0], &option)) 1319 return 0; 1320 if (option < 0) 1321 return 0; 1322 sbi->s_stripe = option; 1323 break; 1324 case Opt_delalloc: 1325 set_opt(sbi->s_mount_opt, DELALLOC); 1326 break; 1327 case Opt_inode_readahead_blks: 1328 if (match_int(&args[0], &option)) 1329 return 0; 1330 if (option < 0 || option > (1 << 30)) 1331 return 0; 1332 sbi->s_inode_readahead_blks = option; 1333 break; 1334 default: 1335 printk(KERN_ERR 1336 "EXT4-fs: Unrecognized mount option \"%s\" " 1337 "or missing value\n", p); 1338 return 0; 1339 } 1340 } 1341 #ifdef CONFIG_QUOTA 1342 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1343 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1344 sbi->s_qf_names[USRQUOTA]) 1345 clear_opt(sbi->s_mount_opt, USRQUOTA); 1346 1347 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1348 sbi->s_qf_names[GRPQUOTA]) 1349 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1350 1351 if ((sbi->s_qf_names[USRQUOTA] && 1352 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1353 (sbi->s_qf_names[GRPQUOTA] && 1354 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1355 printk(KERN_ERR "EXT4-fs: old and new quota " 1356 "format mixing.\n"); 1357 return 0; 1358 } 1359 1360 if (!sbi->s_jquota_fmt) { 1361 printk(KERN_ERR "EXT4-fs: journaled quota format " 1362 "not specified.\n"); 1363 return 0; 1364 } 1365 } else { 1366 if (sbi->s_jquota_fmt) { 1367 printk(KERN_ERR "EXT4-fs: journaled quota format " 1368 "specified with no journaling " 1369 "enabled.\n"); 1370 return 0; 1371 } 1372 } 1373 #endif 1374 return 1; 1375 } 1376 1377 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1378 int read_only) 1379 { 1380 struct ext4_sb_info *sbi = EXT4_SB(sb); 1381 int res = 0; 1382 1383 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1384 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1385 "forcing read-only mode\n"); 1386 res = MS_RDONLY; 1387 } 1388 if (read_only) 1389 return res; 1390 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1391 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1392 "running e2fsck is recommended\n"); 1393 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1394 printk(KERN_WARNING 1395 "EXT4-fs warning: mounting fs with errors, " 1396 "running e2fsck is recommended\n"); 1397 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1398 le16_to_cpu(es->s_mnt_count) >= 1399 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1400 printk(KERN_WARNING 1401 "EXT4-fs warning: maximal mount count reached, " 1402 "running e2fsck is recommended\n"); 1403 else if (le32_to_cpu(es->s_checkinterval) && 1404 (le32_to_cpu(es->s_lastcheck) + 1405 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1406 printk(KERN_WARNING 1407 "EXT4-fs warning: checktime reached, " 1408 "running e2fsck is recommended\n"); 1409 #if 0 1410 /* @@@ We _will_ want to clear the valid bit if we find 1411 * inconsistencies, to force a fsck at reboot. But for 1412 * a plain journaled filesystem we can keep it set as 1413 * valid forever! :) 1414 */ 1415 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1416 #endif 1417 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1418 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1419 le16_add_cpu(&es->s_mnt_count, 1); 1420 es->s_mtime = cpu_to_le32(get_seconds()); 1421 ext4_update_dynamic_rev(sb); 1422 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1423 1424 ext4_commit_super(sb, es, 1); 1425 if (test_opt(sb, DEBUG)) 1426 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " 1427 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1428 sb->s_blocksize, 1429 sbi->s_groups_count, 1430 EXT4_BLOCKS_PER_GROUP(sb), 1431 EXT4_INODES_PER_GROUP(sb), 1432 sbi->s_mount_opt); 1433 1434 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1435 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1436 "external", EXT4_SB(sb)->s_journal->j_devname); 1437 return res; 1438 } 1439 1440 static int ext4_fill_flex_info(struct super_block *sb) 1441 { 1442 struct ext4_sb_info *sbi = EXT4_SB(sb); 1443 struct ext4_group_desc *gdp = NULL; 1444 struct buffer_head *bh; 1445 ext4_group_t flex_group_count; 1446 ext4_group_t flex_group; 1447 int groups_per_flex = 0; 1448 __u64 block_bitmap = 0; 1449 int i; 1450 1451 if (!sbi->s_es->s_log_groups_per_flex) { 1452 sbi->s_log_groups_per_flex = 0; 1453 return 1; 1454 } 1455 1456 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1457 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1458 1459 /* We allocate both existing and potentially added groups */ 1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1461 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1463 sbi->s_flex_groups = kzalloc(flex_group_count * 1464 sizeof(struct flex_groups), GFP_KERNEL); 1465 if (sbi->s_flex_groups == NULL) { 1466 printk(KERN_ERR "EXT4-fs: not enough memory for " 1467 "%lu flex groups\n", flex_group_count); 1468 goto failed; 1469 } 1470 1471 gdp = ext4_get_group_desc(sb, 1, &bh); 1472 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1473 1474 for (i = 0; i < sbi->s_groups_count; i++) { 1475 gdp = ext4_get_group_desc(sb, i, &bh); 1476 1477 flex_group = ext4_flex_group(sbi, i); 1478 sbi->s_flex_groups[flex_group].free_inodes += 1479 le16_to_cpu(gdp->bg_free_inodes_count); 1480 sbi->s_flex_groups[flex_group].free_blocks += 1481 le16_to_cpu(gdp->bg_free_blocks_count); 1482 } 1483 1484 return 1; 1485 failed: 1486 return 0; 1487 } 1488 1489 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1490 struct ext4_group_desc *gdp) 1491 { 1492 __u16 crc = 0; 1493 1494 if (sbi->s_es->s_feature_ro_compat & 1495 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1496 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1497 __le32 le_group = cpu_to_le32(block_group); 1498 1499 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1500 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1501 crc = crc16(crc, (__u8 *)gdp, offset); 1502 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1503 /* for checksum of struct ext4_group_desc do the rest...*/ 1504 if ((sbi->s_es->s_feature_incompat & 1505 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1506 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1507 crc = crc16(crc, (__u8 *)gdp + offset, 1508 le16_to_cpu(sbi->s_es->s_desc_size) - 1509 offset); 1510 } 1511 1512 return cpu_to_le16(crc); 1513 } 1514 1515 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1516 struct ext4_group_desc *gdp) 1517 { 1518 if ((sbi->s_es->s_feature_ro_compat & 1519 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1520 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1521 return 0; 1522 1523 return 1; 1524 } 1525 1526 /* Called at mount-time, super-block is locked */ 1527 static int ext4_check_descriptors(struct super_block *sb) 1528 { 1529 struct ext4_sb_info *sbi = EXT4_SB(sb); 1530 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1531 ext4_fsblk_t last_block; 1532 ext4_fsblk_t block_bitmap; 1533 ext4_fsblk_t inode_bitmap; 1534 ext4_fsblk_t inode_table; 1535 int flexbg_flag = 0; 1536 ext4_group_t i; 1537 1538 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1539 flexbg_flag = 1; 1540 1541 ext4_debug("Checking group descriptors"); 1542 1543 for (i = 0; i < sbi->s_groups_count; i++) { 1544 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1545 1546 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1547 last_block = ext4_blocks_count(sbi->s_es) - 1; 1548 else 1549 last_block = first_block + 1550 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1551 1552 block_bitmap = ext4_block_bitmap(sb, gdp); 1553 if (block_bitmap < first_block || block_bitmap > last_block) { 1554 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1555 "Block bitmap for group %lu not in group " 1556 "(block %llu)!\n", i, block_bitmap); 1557 return 0; 1558 } 1559 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1560 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1561 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1562 "Inode bitmap for group %lu not in group " 1563 "(block %llu)!\n", i, inode_bitmap); 1564 return 0; 1565 } 1566 inode_table = ext4_inode_table(sb, gdp); 1567 if (inode_table < first_block || 1568 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1569 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1570 "Inode table for group %lu not in group " 1571 "(block %llu)!\n", i, inode_table); 1572 return 0; 1573 } 1574 spin_lock(sb_bgl_lock(sbi, i)); 1575 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1576 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1577 "Checksum for group %lu failed (%u!=%u)\n", 1578 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1579 gdp)), le16_to_cpu(gdp->bg_checksum)); 1580 if (!(sb->s_flags & MS_RDONLY)) { 1581 spin_unlock(sb_bgl_lock(sbi, i)); 1582 return 0; 1583 } 1584 } 1585 spin_unlock(sb_bgl_lock(sbi, i)); 1586 if (!flexbg_flag) 1587 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1588 } 1589 1590 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1591 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1592 return 1; 1593 } 1594 1595 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1596 * the superblock) which were deleted from all directories, but held open by 1597 * a process at the time of a crash. We walk the list and try to delete these 1598 * inodes at recovery time (only with a read-write filesystem). 1599 * 1600 * In order to keep the orphan inode chain consistent during traversal (in 1601 * case of crash during recovery), we link each inode into the superblock 1602 * orphan list_head and handle it the same way as an inode deletion during 1603 * normal operation (which journals the operations for us). 1604 * 1605 * We only do an iget() and an iput() on each inode, which is very safe if we 1606 * accidentally point at an in-use or already deleted inode. The worst that 1607 * can happen in this case is that we get a "bit already cleared" message from 1608 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1609 * e2fsck was run on this filesystem, and it must have already done the orphan 1610 * inode cleanup for us, so we can safely abort without any further action. 1611 */ 1612 static void ext4_orphan_cleanup(struct super_block *sb, 1613 struct ext4_super_block *es) 1614 { 1615 unsigned int s_flags = sb->s_flags; 1616 int nr_orphans = 0, nr_truncates = 0; 1617 #ifdef CONFIG_QUOTA 1618 int i; 1619 #endif 1620 if (!es->s_last_orphan) { 1621 jbd_debug(4, "no orphan inodes to clean up\n"); 1622 return; 1623 } 1624 1625 if (bdev_read_only(sb->s_bdev)) { 1626 printk(KERN_ERR "EXT4-fs: write access " 1627 "unavailable, skipping orphan cleanup.\n"); 1628 return; 1629 } 1630 1631 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1632 if (es->s_last_orphan) 1633 jbd_debug(1, "Errors on filesystem, " 1634 "clearing orphan list.\n"); 1635 es->s_last_orphan = 0; 1636 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1637 return; 1638 } 1639 1640 if (s_flags & MS_RDONLY) { 1641 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1642 sb->s_id); 1643 sb->s_flags &= ~MS_RDONLY; 1644 } 1645 #ifdef CONFIG_QUOTA 1646 /* Needed for iput() to work correctly and not trash data */ 1647 sb->s_flags |= MS_ACTIVE; 1648 /* Turn on quotas so that they are updated correctly */ 1649 for (i = 0; i < MAXQUOTAS; i++) { 1650 if (EXT4_SB(sb)->s_qf_names[i]) { 1651 int ret = ext4_quota_on_mount(sb, i); 1652 if (ret < 0) 1653 printk(KERN_ERR 1654 "EXT4-fs: Cannot turn on journaled " 1655 "quota: error %d\n", ret); 1656 } 1657 } 1658 #endif 1659 1660 while (es->s_last_orphan) { 1661 struct inode *inode; 1662 1663 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1664 if (IS_ERR(inode)) { 1665 es->s_last_orphan = 0; 1666 break; 1667 } 1668 1669 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1670 DQUOT_INIT(inode); 1671 if (inode->i_nlink) { 1672 printk(KERN_DEBUG 1673 "%s: truncating inode %lu to %lld bytes\n", 1674 __func__, inode->i_ino, inode->i_size); 1675 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1676 inode->i_ino, inode->i_size); 1677 ext4_truncate(inode); 1678 nr_truncates++; 1679 } else { 1680 printk(KERN_DEBUG 1681 "%s: deleting unreferenced inode %lu\n", 1682 __func__, inode->i_ino); 1683 jbd_debug(2, "deleting unreferenced inode %lu\n", 1684 inode->i_ino); 1685 nr_orphans++; 1686 } 1687 iput(inode); /* The delete magic happens here! */ 1688 } 1689 1690 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1691 1692 if (nr_orphans) 1693 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1694 sb->s_id, PLURAL(nr_orphans)); 1695 if (nr_truncates) 1696 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1697 sb->s_id, PLURAL(nr_truncates)); 1698 #ifdef CONFIG_QUOTA 1699 /* Turn quotas off */ 1700 for (i = 0; i < MAXQUOTAS; i++) { 1701 if (sb_dqopt(sb)->files[i]) 1702 vfs_quota_off(sb, i, 0); 1703 } 1704 #endif 1705 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1706 } 1707 /* 1708 * Maximal extent format file size. 1709 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1710 * extent format containers, within a sector_t, and within i_blocks 1711 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 1712 * so that won't be a limiting factor. 1713 * 1714 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1715 */ 1716 static loff_t ext4_max_size(int blkbits, int has_huge_files) 1717 { 1718 loff_t res; 1719 loff_t upper_limit = MAX_LFS_FILESIZE; 1720 1721 /* small i_blocks in vfs inode? */ 1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1723 /* 1724 * CONFIG_LSF is not enabled implies the inode 1725 * i_block represent total blocks in 512 bytes 1726 * 32 == size of vfs inode i_blocks * 8 1727 */ 1728 upper_limit = (1LL << 32) - 1; 1729 1730 /* total blocks in file system block size */ 1731 upper_limit >>= (blkbits - 9); 1732 upper_limit <<= blkbits; 1733 } 1734 1735 /* 32-bit extent-start container, ee_block */ 1736 res = 1LL << 32; 1737 res <<= blkbits; 1738 res -= 1; 1739 1740 /* Sanity check against vm- & vfs- imposed limits */ 1741 if (res > upper_limit) 1742 res = upper_limit; 1743 1744 return res; 1745 } 1746 1747 /* 1748 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 1749 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1750 * We need to be 1 filesystem block less than the 2^48 sector limit. 1751 */ 1752 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 1753 { 1754 loff_t res = EXT4_NDIR_BLOCKS; 1755 int meta_blocks; 1756 loff_t upper_limit; 1757 /* This is calculated to be the largest file size for a 1758 * dense, bitmapped file such that the total number of 1759 * sectors in the file, including data and all indirect blocks, 1760 * does not exceed 2^48 -1 1761 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1762 * total number of 512 bytes blocks of the file 1763 */ 1764 1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1766 /* 1767 * !has_huge_files or CONFIG_LSF is not enabled 1768 * implies the inode i_block represent total blocks in 1769 * 512 bytes 32 == size of vfs inode i_blocks * 8 1770 */ 1771 upper_limit = (1LL << 32) - 1; 1772 1773 /* total blocks in file system block size */ 1774 upper_limit >>= (bits - 9); 1775 1776 } else { 1777 /* 1778 * We use 48 bit ext4_inode i_blocks 1779 * With EXT4_HUGE_FILE_FL set the i_blocks 1780 * represent total number of blocks in 1781 * file system block size 1782 */ 1783 upper_limit = (1LL << 48) - 1; 1784 1785 } 1786 1787 /* indirect blocks */ 1788 meta_blocks = 1; 1789 /* double indirect blocks */ 1790 meta_blocks += 1 + (1LL << (bits-2)); 1791 /* tripple indirect blocks */ 1792 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1793 1794 upper_limit -= meta_blocks; 1795 upper_limit <<= bits; 1796 1797 res += 1LL << (bits-2); 1798 res += 1LL << (2*(bits-2)); 1799 res += 1LL << (3*(bits-2)); 1800 res <<= bits; 1801 if (res > upper_limit) 1802 res = upper_limit; 1803 1804 if (res > MAX_LFS_FILESIZE) 1805 res = MAX_LFS_FILESIZE; 1806 1807 return res; 1808 } 1809 1810 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 1811 ext4_fsblk_t logical_sb_block, int nr) 1812 { 1813 struct ext4_sb_info *sbi = EXT4_SB(sb); 1814 ext4_group_t bg, first_meta_bg; 1815 int has_super = 0; 1816 1817 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1818 1819 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 1820 nr < first_meta_bg) 1821 return logical_sb_block + nr + 1; 1822 bg = sbi->s_desc_per_block * nr; 1823 if (ext4_bg_has_super(sb, bg)) 1824 has_super = 1; 1825 return (has_super + ext4_group_first_block_no(sb, bg)); 1826 } 1827 1828 /** 1829 * ext4_get_stripe_size: Get the stripe size. 1830 * @sbi: In memory super block info 1831 * 1832 * If we have specified it via mount option, then 1833 * use the mount option value. If the value specified at mount time is 1834 * greater than the blocks per group use the super block value. 1835 * If the super block value is greater than blocks per group return 0. 1836 * Allocator needs it be less than blocks per group. 1837 * 1838 */ 1839 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 1840 { 1841 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 1842 unsigned long stripe_width = 1843 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 1844 1845 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 1846 return sbi->s_stripe; 1847 1848 if (stripe_width <= sbi->s_blocks_per_group) 1849 return stripe_width; 1850 1851 if (stride <= sbi->s_blocks_per_group) 1852 return stride; 1853 1854 return 0; 1855 } 1856 1857 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 1858 __releases(kernel_lock) 1859 __acquires(kernel_lock) 1860 1861 { 1862 struct buffer_head *bh; 1863 struct ext4_super_block *es = NULL; 1864 struct ext4_sb_info *sbi; 1865 ext4_fsblk_t block; 1866 ext4_fsblk_t sb_block = get_sb_block(&data); 1867 ext4_fsblk_t logical_sb_block; 1868 unsigned long offset = 0; 1869 unsigned int journal_inum = 0; 1870 unsigned long journal_devnum = 0; 1871 unsigned long def_mount_opts; 1872 struct inode *root; 1873 char *cp; 1874 int ret = -EINVAL; 1875 int blocksize; 1876 int db_count; 1877 int i; 1878 int needs_recovery, has_huge_files; 1879 __le32 features; 1880 __u64 blocks_count; 1881 int err; 1882 1883 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1884 if (!sbi) 1885 return -ENOMEM; 1886 sb->s_fs_info = sbi; 1887 sbi->s_mount_opt = 0; 1888 sbi->s_resuid = EXT4_DEF_RESUID; 1889 sbi->s_resgid = EXT4_DEF_RESGID; 1890 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 1891 sbi->s_sb_block = sb_block; 1892 1893 unlock_kernel(); 1894 1895 /* Cleanup superblock name */ 1896 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 1897 *cp = '!'; 1898 1899 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1900 if (!blocksize) { 1901 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1902 goto out_fail; 1903 } 1904 1905 /* 1906 * The ext4 superblock will not be buffer aligned for other than 1kB 1907 * block sizes. We need to calculate the offset from buffer start. 1908 */ 1909 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 1910 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1911 offset = do_div(logical_sb_block, blocksize); 1912 } else { 1913 logical_sb_block = sb_block; 1914 } 1915 1916 if (!(bh = sb_bread(sb, logical_sb_block))) { 1917 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 1918 goto out_fail; 1919 } 1920 /* 1921 * Note: s_es must be initialized as soon as possible because 1922 * some ext4 macro-instructions depend on its value 1923 */ 1924 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 1925 sbi->s_es = es; 1926 sb->s_magic = le16_to_cpu(es->s_magic); 1927 if (sb->s_magic != EXT4_SUPER_MAGIC) 1928 goto cantfind_ext4; 1929 1930 /* Set defaults before we parse the mount options */ 1931 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1932 if (def_mount_opts & EXT4_DEFM_DEBUG) 1933 set_opt(sbi->s_mount_opt, DEBUG); 1934 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 1935 set_opt(sbi->s_mount_opt, GRPID); 1936 if (def_mount_opts & EXT4_DEFM_UID16) 1937 set_opt(sbi->s_mount_opt, NO_UID32); 1938 #ifdef CONFIG_EXT4_FS_XATTR 1939 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1940 set_opt(sbi->s_mount_opt, XATTR_USER); 1941 #endif 1942 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1943 if (def_mount_opts & EXT4_DEFM_ACL) 1944 set_opt(sbi->s_mount_opt, POSIX_ACL); 1945 #endif 1946 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 1947 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 1948 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 1949 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 1950 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 1951 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 1952 1953 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 1954 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1955 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 1956 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1957 else 1958 set_opt(sbi->s_mount_opt, ERRORS_RO); 1959 1960 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1961 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1962 1963 set_opt(sbi->s_mount_opt, RESERVATION); 1964 set_opt(sbi->s_mount_opt, BARRIER); 1965 1966 /* 1967 * turn on extents feature by default in ext4 filesystem 1968 * only if feature flag already set by mkfs or tune2fs. 1969 * Use -o noextents to turn it off 1970 */ 1971 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 1972 set_opt(sbi->s_mount_opt, EXTENTS); 1973 else 1974 ext4_warning(sb, __func__, 1975 "extents feature not enabled on this filesystem, " 1976 "use tune2fs.\n"); 1977 1978 /* 1979 * enable delayed allocation by default 1980 * Use -o nodelalloc to turn it off 1981 */ 1982 set_opt(sbi->s_mount_opt, DELALLOC); 1983 1984 1985 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, 1986 NULL, 0)) 1987 goto failed_mount; 1988 1989 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1990 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1991 1992 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 1993 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 1994 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1995 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1996 printk(KERN_WARNING 1997 "EXT4-fs warning: feature flags set on rev 0 fs, " 1998 "running e2fsck is recommended\n"); 1999 2000 /* 2001 * Check feature flags regardless of the revision level, since we 2002 * previously didn't change the revision level when setting the flags, 2003 * so there is a chance incompat flags are set on a rev 0 filesystem. 2004 */ 2005 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2006 if (features) { 2007 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2008 "unsupported optional features (%x).\n", 2009 sb->s_id, le32_to_cpu(features)); 2010 goto failed_mount; 2011 } 2012 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2013 if (!(sb->s_flags & MS_RDONLY) && features) { 2014 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2015 "unsupported optional features (%x).\n", 2016 sb->s_id, le32_to_cpu(features)); 2017 goto failed_mount; 2018 } 2019 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2020 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2021 if (has_huge_files) { 2022 /* 2023 * Large file size enabled file system can only be 2024 * mount if kernel is build with CONFIG_LSF 2025 */ 2026 if (sizeof(root->i_blocks) < sizeof(u64) && 2027 !(sb->s_flags & MS_RDONLY)) { 2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2029 "files cannot be mounted read-write " 2030 "without CONFIG_LSF.\n", sb->s_id); 2031 goto failed_mount; 2032 } 2033 } 2034 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2035 2036 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2037 blocksize > EXT4_MAX_BLOCK_SIZE) { 2038 printk(KERN_ERR 2039 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2040 blocksize, sb->s_id); 2041 goto failed_mount; 2042 } 2043 2044 if (sb->s_blocksize != blocksize) { 2045 2046 /* Validate the filesystem blocksize */ 2047 if (!sb_set_blocksize(sb, blocksize)) { 2048 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2049 blocksize); 2050 goto failed_mount; 2051 } 2052 2053 brelse(bh); 2054 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2055 offset = do_div(logical_sb_block, blocksize); 2056 bh = sb_bread(sb, logical_sb_block); 2057 if (!bh) { 2058 printk(KERN_ERR 2059 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2060 goto failed_mount; 2061 } 2062 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2063 sbi->s_es = es; 2064 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2065 printk(KERN_ERR 2066 "EXT4-fs: Magic mismatch, very weird !\n"); 2067 goto failed_mount; 2068 } 2069 } 2070 2071 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2072 has_huge_files); 2073 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2074 2075 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2076 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2077 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2078 } else { 2079 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2080 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2081 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2082 (!is_power_of_2(sbi->s_inode_size)) || 2083 (sbi->s_inode_size > blocksize)) { 2084 printk(KERN_ERR 2085 "EXT4-fs: unsupported inode size: %d\n", 2086 sbi->s_inode_size); 2087 goto failed_mount; 2088 } 2089 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2090 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2091 } 2092 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2093 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2094 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2095 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2096 !is_power_of_2(sbi->s_desc_size)) { 2097 printk(KERN_ERR 2098 "EXT4-fs: unsupported descriptor size %lu\n", 2099 sbi->s_desc_size); 2100 goto failed_mount; 2101 } 2102 } else 2103 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2104 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2105 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2106 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2107 goto cantfind_ext4; 2108 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2109 if (sbi->s_inodes_per_block == 0) 2110 goto cantfind_ext4; 2111 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2112 sbi->s_inodes_per_block; 2113 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2114 sbi->s_sbh = bh; 2115 sbi->s_mount_state = le16_to_cpu(es->s_state); 2116 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2117 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2118 for (i = 0; i < 4; i++) 2119 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2120 sbi->s_def_hash_version = es->s_def_hash_version; 2121 2122 if (sbi->s_blocks_per_group > blocksize * 8) { 2123 printk(KERN_ERR 2124 "EXT4-fs: #blocks per group too big: %lu\n", 2125 sbi->s_blocks_per_group); 2126 goto failed_mount; 2127 } 2128 if (sbi->s_inodes_per_group > blocksize * 8) { 2129 printk(KERN_ERR 2130 "EXT4-fs: #inodes per group too big: %lu\n", 2131 sbi->s_inodes_per_group); 2132 goto failed_mount; 2133 } 2134 2135 if (ext4_blocks_count(es) > 2136 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2137 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2138 " too large to mount safely\n", sb->s_id); 2139 if (sizeof(sector_t) < 8) 2140 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2141 "enabled\n"); 2142 goto failed_mount; 2143 } 2144 2145 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2146 goto cantfind_ext4; 2147 2148 /* ensure blocks_count calculation below doesn't sign-extend */ 2149 if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < 2150 le32_to_cpu(es->s_first_data_block) + 1) { 2151 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " 2152 "first data block %u, blocks per group %lu\n", 2153 ext4_blocks_count(es), 2154 le32_to_cpu(es->s_first_data_block), 2155 EXT4_BLOCKS_PER_GROUP(sb)); 2156 goto failed_mount; 2157 } 2158 blocks_count = (ext4_blocks_count(es) - 2159 le32_to_cpu(es->s_first_data_block) + 2160 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2161 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2162 sbi->s_groups_count = blocks_count; 2163 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2164 EXT4_DESC_PER_BLOCK(sb); 2165 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2166 GFP_KERNEL); 2167 if (sbi->s_group_desc == NULL) { 2168 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2169 goto failed_mount; 2170 } 2171 2172 #ifdef CONFIG_PROC_FS 2173 if (ext4_proc_root) 2174 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2175 2176 if (sbi->s_proc) 2177 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, 2178 &ext4_ui_proc_fops, 2179 &sbi->s_inode_readahead_blks); 2180 #endif 2181 2182 bgl_lock_init(&sbi->s_blockgroup_lock); 2183 2184 for (i = 0; i < db_count; i++) { 2185 block = descriptor_loc(sb, logical_sb_block, i); 2186 sbi->s_group_desc[i] = sb_bread(sb, block); 2187 if (!sbi->s_group_desc[i]) { 2188 printk(KERN_ERR "EXT4-fs: " 2189 "can't read group descriptor %d\n", i); 2190 db_count = i; 2191 goto failed_mount2; 2192 } 2193 } 2194 if (!ext4_check_descriptors(sb)) { 2195 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2196 goto failed_mount2; 2197 } 2198 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2199 if (!ext4_fill_flex_info(sb)) { 2200 printk(KERN_ERR 2201 "EXT4-fs: unable to initialize " 2202 "flex_bg meta info!\n"); 2203 goto failed_mount2; 2204 } 2205 2206 sbi->s_gdb_count = db_count; 2207 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2208 spin_lock_init(&sbi->s_next_gen_lock); 2209 2210 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2211 ext4_count_free_blocks(sb)); 2212 if (!err) { 2213 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2214 ext4_count_free_inodes(sb)); 2215 } 2216 if (!err) { 2217 err = percpu_counter_init(&sbi->s_dirs_counter, 2218 ext4_count_dirs(sb)); 2219 } 2220 if (!err) { 2221 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2222 } 2223 if (err) { 2224 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2225 goto failed_mount3; 2226 } 2227 2228 sbi->s_stripe = ext4_get_stripe_size(sbi); 2229 2230 /* 2231 * set up enough so that it can read an inode 2232 */ 2233 sb->s_op = &ext4_sops; 2234 sb->s_export_op = &ext4_export_ops; 2235 sb->s_xattr = ext4_xattr_handlers; 2236 #ifdef CONFIG_QUOTA 2237 sb->s_qcop = &ext4_qctl_operations; 2238 sb->dq_op = &ext4_quota_operations; 2239 #endif 2240 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2241 2242 sb->s_root = NULL; 2243 2244 needs_recovery = (es->s_last_orphan != 0 || 2245 EXT4_HAS_INCOMPAT_FEATURE(sb, 2246 EXT4_FEATURE_INCOMPAT_RECOVER)); 2247 2248 /* 2249 * The first inode we look at is the journal inode. Don't try 2250 * root first: it may be modified in the journal! 2251 */ 2252 if (!test_opt(sb, NOLOAD) && 2253 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2254 if (ext4_load_journal(sb, es, journal_devnum)) 2255 goto failed_mount3; 2256 if (!(sb->s_flags & MS_RDONLY) && 2257 EXT4_SB(sb)->s_journal->j_failed_commit) { 2258 printk(KERN_CRIT "EXT4-fs error (device %s): " 2259 "ext4_fill_super: Journal transaction " 2260 "%u is corrupt\n", sb->s_id, 2261 EXT4_SB(sb)->s_journal->j_failed_commit); 2262 if (test_opt(sb, ERRORS_RO)) { 2263 printk(KERN_CRIT 2264 "Mounting filesystem read-only\n"); 2265 sb->s_flags |= MS_RDONLY; 2266 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2267 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2268 } 2269 if (test_opt(sb, ERRORS_PANIC)) { 2270 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2271 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2272 ext4_commit_super(sb, es, 1); 2273 printk(KERN_CRIT 2274 "EXT4-fs (device %s): mount failed\n", 2275 sb->s_id); 2276 goto failed_mount4; 2277 } 2278 } 2279 } else if (journal_inum) { 2280 if (ext4_create_journal(sb, es, journal_inum)) 2281 goto failed_mount3; 2282 } else { 2283 if (!silent) 2284 printk(KERN_ERR 2285 "ext4: No journal on filesystem on %s\n", 2286 sb->s_id); 2287 goto failed_mount3; 2288 } 2289 2290 if (ext4_blocks_count(es) > 0xffffffffULL && 2291 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2292 JBD2_FEATURE_INCOMPAT_64BIT)) { 2293 printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n"); 2294 goto failed_mount4; 2295 } 2296 2297 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2298 jbd2_journal_set_features(sbi->s_journal, 2299 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2300 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2301 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2302 jbd2_journal_set_features(sbi->s_journal, 2303 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2304 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2305 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2306 } else { 2307 jbd2_journal_clear_features(sbi->s_journal, 2308 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2309 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2310 } 2311 2312 /* We have now updated the journal if required, so we can 2313 * validate the data journaling mode. */ 2314 switch (test_opt(sb, DATA_FLAGS)) { 2315 case 0: 2316 /* No mode set, assume a default based on the journal 2317 * capabilities: ORDERED_DATA if the journal can 2318 * cope, else JOURNAL_DATA 2319 */ 2320 if (jbd2_journal_check_available_features 2321 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2322 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2323 else 2324 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2325 break; 2326 2327 case EXT4_MOUNT_ORDERED_DATA: 2328 case EXT4_MOUNT_WRITEBACK_DATA: 2329 if (!jbd2_journal_check_available_features 2330 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2331 printk(KERN_ERR "EXT4-fs: Journal does not support " 2332 "requested data journaling mode\n"); 2333 goto failed_mount4; 2334 } 2335 default: 2336 break; 2337 } 2338 2339 if (test_opt(sb, NOBH)) { 2340 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2341 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2342 "its supported only with writeback mode\n"); 2343 clear_opt(sbi->s_mount_opt, NOBH); 2344 } 2345 } 2346 /* 2347 * The jbd2_journal_load will have done any necessary log recovery, 2348 * so we can safely mount the rest of the filesystem now. 2349 */ 2350 2351 root = ext4_iget(sb, EXT4_ROOT_INO); 2352 if (IS_ERR(root)) { 2353 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2354 ret = PTR_ERR(root); 2355 goto failed_mount4; 2356 } 2357 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2358 iput(root); 2359 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2360 goto failed_mount4; 2361 } 2362 sb->s_root = d_alloc_root(root); 2363 if (!sb->s_root) { 2364 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2365 iput(root); 2366 ret = -ENOMEM; 2367 goto failed_mount4; 2368 } 2369 2370 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2371 2372 /* determine the minimum size of new large inodes, if present */ 2373 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2374 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2375 EXT4_GOOD_OLD_INODE_SIZE; 2376 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2377 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2378 if (sbi->s_want_extra_isize < 2379 le16_to_cpu(es->s_want_extra_isize)) 2380 sbi->s_want_extra_isize = 2381 le16_to_cpu(es->s_want_extra_isize); 2382 if (sbi->s_want_extra_isize < 2383 le16_to_cpu(es->s_min_extra_isize)) 2384 sbi->s_want_extra_isize = 2385 le16_to_cpu(es->s_min_extra_isize); 2386 } 2387 } 2388 /* Check if enough inode space is available */ 2389 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2390 sbi->s_inode_size) { 2391 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2392 EXT4_GOOD_OLD_INODE_SIZE; 2393 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2394 "available.\n"); 2395 } 2396 2397 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2398 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2399 "requested data journaling mode\n"); 2400 clear_opt(sbi->s_mount_opt, DELALLOC); 2401 } else if (test_opt(sb, DELALLOC)) 2402 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2403 2404 ext4_ext_init(sb); 2405 err = ext4_mb_init(sb, needs_recovery); 2406 if (err) { 2407 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2408 err); 2409 goto failed_mount4; 2410 } 2411 2412 /* 2413 * akpm: core read_super() calls in here with the superblock locked. 2414 * That deadlocks, because orphan cleanup needs to lock the superblock 2415 * in numerous places. Here we just pop the lock - it's relatively 2416 * harmless, because we are now ready to accept write_super() requests, 2417 * and aviro says that's the only reason for hanging onto the 2418 * superblock lock. 2419 */ 2420 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2421 ext4_orphan_cleanup(sb, es); 2422 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2423 if (needs_recovery) 2424 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2425 ext4_mark_recovery_complete(sb, es); 2426 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2427 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2428 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2429 "writeback"); 2430 2431 lock_kernel(); 2432 return 0; 2433 2434 cantfind_ext4: 2435 if (!silent) 2436 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2437 sb->s_id); 2438 goto failed_mount; 2439 2440 failed_mount4: 2441 jbd2_journal_destroy(sbi->s_journal); 2442 sbi->s_journal = NULL; 2443 failed_mount3: 2444 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2445 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2446 percpu_counter_destroy(&sbi->s_dirs_counter); 2447 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 2448 failed_mount2: 2449 for (i = 0; i < db_count; i++) 2450 brelse(sbi->s_group_desc[i]); 2451 kfree(sbi->s_group_desc); 2452 failed_mount: 2453 if (sbi->s_proc) { 2454 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 2455 remove_proc_entry(sb->s_id, ext4_proc_root); 2456 } 2457 #ifdef CONFIG_QUOTA 2458 for (i = 0; i < MAXQUOTAS; i++) 2459 kfree(sbi->s_qf_names[i]); 2460 #endif 2461 ext4_blkdev_remove(sbi); 2462 brelse(bh); 2463 out_fail: 2464 sb->s_fs_info = NULL; 2465 kfree(sbi); 2466 lock_kernel(); 2467 return ret; 2468 } 2469 2470 /* 2471 * Setup any per-fs journal parameters now. We'll do this both on 2472 * initial mount, once the journal has been initialised but before we've 2473 * done any recovery; and again on any subsequent remount. 2474 */ 2475 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 2476 { 2477 struct ext4_sb_info *sbi = EXT4_SB(sb); 2478 2479 if (sbi->s_commit_interval) 2480 journal->j_commit_interval = sbi->s_commit_interval; 2481 /* We could also set up an ext4-specific default for the commit 2482 * interval here, but for now we'll just fall back to the jbd 2483 * default. */ 2484 2485 spin_lock(&journal->j_state_lock); 2486 if (test_opt(sb, BARRIER)) 2487 journal->j_flags |= JBD2_BARRIER; 2488 else 2489 journal->j_flags &= ~JBD2_BARRIER; 2490 if (test_opt(sb, DATA_ERR_ABORT)) 2491 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 2492 else 2493 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 2494 spin_unlock(&journal->j_state_lock); 2495 } 2496 2497 static journal_t *ext4_get_journal(struct super_block *sb, 2498 unsigned int journal_inum) 2499 { 2500 struct inode *journal_inode; 2501 journal_t *journal; 2502 2503 /* First, test for the existence of a valid inode on disk. Bad 2504 * things happen if we iget() an unused inode, as the subsequent 2505 * iput() will try to delete it. */ 2506 2507 journal_inode = ext4_iget(sb, journal_inum); 2508 if (IS_ERR(journal_inode)) { 2509 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2510 return NULL; 2511 } 2512 if (!journal_inode->i_nlink) { 2513 make_bad_inode(journal_inode); 2514 iput(journal_inode); 2515 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2516 return NULL; 2517 } 2518 2519 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2520 journal_inode, journal_inode->i_size); 2521 if (!S_ISREG(journal_inode->i_mode)) { 2522 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2523 iput(journal_inode); 2524 return NULL; 2525 } 2526 2527 journal = jbd2_journal_init_inode(journal_inode); 2528 if (!journal) { 2529 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 2530 iput(journal_inode); 2531 return NULL; 2532 } 2533 journal->j_private = sb; 2534 ext4_init_journal_params(sb, journal); 2535 return journal; 2536 } 2537 2538 static journal_t *ext4_get_dev_journal(struct super_block *sb, 2539 dev_t j_dev) 2540 { 2541 struct buffer_head *bh; 2542 journal_t *journal; 2543 ext4_fsblk_t start; 2544 ext4_fsblk_t len; 2545 int hblock, blocksize; 2546 ext4_fsblk_t sb_block; 2547 unsigned long offset; 2548 struct ext4_super_block *es; 2549 struct block_device *bdev; 2550 2551 bdev = ext4_blkdev_get(j_dev); 2552 if (bdev == NULL) 2553 return NULL; 2554 2555 if (bd_claim(bdev, sb)) { 2556 printk(KERN_ERR 2557 "EXT4: failed to claim external journal device.\n"); 2558 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2559 return NULL; 2560 } 2561 2562 blocksize = sb->s_blocksize; 2563 hblock = bdev_hardsect_size(bdev); 2564 if (blocksize < hblock) { 2565 printk(KERN_ERR 2566 "EXT4-fs: blocksize too small for journal device.\n"); 2567 goto out_bdev; 2568 } 2569 2570 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 2571 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 2572 set_blocksize(bdev, blocksize); 2573 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2574 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 2575 "external journal\n"); 2576 goto out_bdev; 2577 } 2578 2579 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2580 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 2581 !(le32_to_cpu(es->s_feature_incompat) & 2582 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2583 printk(KERN_ERR "EXT4-fs: external journal has " 2584 "bad superblock\n"); 2585 brelse(bh); 2586 goto out_bdev; 2587 } 2588 2589 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2590 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 2591 brelse(bh); 2592 goto out_bdev; 2593 } 2594 2595 len = ext4_blocks_count(es); 2596 start = sb_block + 1; 2597 brelse(bh); /* we're done with the superblock */ 2598 2599 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 2600 start, len, blocksize); 2601 if (!journal) { 2602 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 2603 goto out_bdev; 2604 } 2605 journal->j_private = sb; 2606 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2607 wait_on_buffer(journal->j_sb_buffer); 2608 if (!buffer_uptodate(journal->j_sb_buffer)) { 2609 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 2610 goto out_journal; 2611 } 2612 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2613 printk(KERN_ERR "EXT4-fs: External journal has more than one " 2614 "user (unsupported) - %d\n", 2615 be32_to_cpu(journal->j_superblock->s_nr_users)); 2616 goto out_journal; 2617 } 2618 EXT4_SB(sb)->journal_bdev = bdev; 2619 ext4_init_journal_params(sb, journal); 2620 return journal; 2621 out_journal: 2622 jbd2_journal_destroy(journal); 2623 out_bdev: 2624 ext4_blkdev_put(bdev); 2625 return NULL; 2626 } 2627 2628 static int ext4_load_journal(struct super_block *sb, 2629 struct ext4_super_block *es, 2630 unsigned long journal_devnum) 2631 { 2632 journal_t *journal; 2633 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2634 dev_t journal_dev; 2635 int err = 0; 2636 int really_read_only; 2637 2638 if (journal_devnum && 2639 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2640 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2641 "numbers have changed\n"); 2642 journal_dev = new_decode_dev(journal_devnum); 2643 } else 2644 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2645 2646 really_read_only = bdev_read_only(sb->s_bdev); 2647 2648 /* 2649 * Are we loading a blank journal or performing recovery after a 2650 * crash? For recovery, we need to check in advance whether we 2651 * can get read-write access to the device. 2652 */ 2653 2654 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2655 if (sb->s_flags & MS_RDONLY) { 2656 printk(KERN_INFO "EXT4-fs: INFO: recovery " 2657 "required on readonly filesystem.\n"); 2658 if (really_read_only) { 2659 printk(KERN_ERR "EXT4-fs: write access " 2660 "unavailable, cannot proceed.\n"); 2661 return -EROFS; 2662 } 2663 printk(KERN_INFO "EXT4-fs: write access will " 2664 "be enabled during recovery.\n"); 2665 } 2666 } 2667 2668 if (journal_inum && journal_dev) { 2669 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 2670 "and inode journals!\n"); 2671 return -EINVAL; 2672 } 2673 2674 if (journal_inum) { 2675 if (!(journal = ext4_get_journal(sb, journal_inum))) 2676 return -EINVAL; 2677 } else { 2678 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 2679 return -EINVAL; 2680 } 2681 2682 if (journal->j_flags & JBD2_BARRIER) 2683 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 2684 else 2685 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 2686 2687 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2688 err = jbd2_journal_update_format(journal); 2689 if (err) { 2690 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 2691 jbd2_journal_destroy(journal); 2692 return err; 2693 } 2694 } 2695 2696 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 2697 err = jbd2_journal_wipe(journal, !really_read_only); 2698 if (!err) 2699 err = jbd2_journal_load(journal); 2700 2701 if (err) { 2702 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 2703 jbd2_journal_destroy(journal); 2704 return err; 2705 } 2706 2707 EXT4_SB(sb)->s_journal = journal; 2708 ext4_clear_journal_err(sb, es); 2709 2710 if (journal_devnum && 2711 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2712 es->s_journal_dev = cpu_to_le32(journal_devnum); 2713 sb->s_dirt = 1; 2714 2715 /* Make sure we flush the recovery flag to disk. */ 2716 ext4_commit_super(sb, es, 1); 2717 } 2718 2719 return 0; 2720 } 2721 2722 static int ext4_create_journal(struct super_block *sb, 2723 struct ext4_super_block *es, 2724 unsigned int journal_inum) 2725 { 2726 journal_t *journal; 2727 int err; 2728 2729 if (sb->s_flags & MS_RDONLY) { 2730 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " 2731 "create journal.\n"); 2732 return -EROFS; 2733 } 2734 2735 journal = ext4_get_journal(sb, journal_inum); 2736 if (!journal) 2737 return -EINVAL; 2738 2739 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", 2740 journal_inum); 2741 2742 err = jbd2_journal_create(journal); 2743 if (err) { 2744 printk(KERN_ERR "EXT4-fs: error creating journal.\n"); 2745 jbd2_journal_destroy(journal); 2746 return -EIO; 2747 } 2748 2749 EXT4_SB(sb)->s_journal = journal; 2750 2751 ext4_update_dynamic_rev(sb); 2752 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2753 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); 2754 2755 es->s_journal_inum = cpu_to_le32(journal_inum); 2756 sb->s_dirt = 1; 2757 2758 /* Make sure we flush the recovery flag to disk. */ 2759 ext4_commit_super(sb, es, 1); 2760 2761 return 0; 2762 } 2763 2764 static void ext4_commit_super(struct super_block *sb, 2765 struct ext4_super_block *es, int sync) 2766 { 2767 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2768 2769 if (!sbh) 2770 return; 2771 if (buffer_write_io_error(sbh)) { 2772 /* 2773 * Oh, dear. A previous attempt to write the 2774 * superblock failed. This could happen because the 2775 * USB device was yanked out. Or it could happen to 2776 * be a transient write error and maybe the block will 2777 * be remapped. Nothing we can do but to retry the 2778 * write and hope for the best. 2779 */ 2780 printk(KERN_ERR "ext4: previous I/O error to " 2781 "superblock detected for %s.\n", sb->s_id); 2782 clear_buffer_write_io_error(sbh); 2783 set_buffer_uptodate(sbh); 2784 } 2785 es->s_wtime = cpu_to_le32(get_seconds()); 2786 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2787 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2788 BUFFER_TRACE(sbh, "marking dirty"); 2789 mark_buffer_dirty(sbh); 2790 if (sync) { 2791 sync_dirty_buffer(sbh); 2792 if (buffer_write_io_error(sbh)) { 2793 printk(KERN_ERR "ext4: I/O error while writing " 2794 "superblock for %s.\n", sb->s_id); 2795 clear_buffer_write_io_error(sbh); 2796 set_buffer_uptodate(sbh); 2797 } 2798 } 2799 } 2800 2801 2802 /* 2803 * Have we just finished recovery? If so, and if we are mounting (or 2804 * remounting) the filesystem readonly, then we will end up with a 2805 * consistent fs on disk. Record that fact. 2806 */ 2807 static void ext4_mark_recovery_complete(struct super_block *sb, 2808 struct ext4_super_block *es) 2809 { 2810 journal_t *journal = EXT4_SB(sb)->s_journal; 2811 2812 jbd2_journal_lock_updates(journal); 2813 if (jbd2_journal_flush(journal) < 0) 2814 goto out; 2815 2816 lock_super(sb); 2817 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2818 sb->s_flags & MS_RDONLY) { 2819 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2820 sb->s_dirt = 0; 2821 ext4_commit_super(sb, es, 1); 2822 } 2823 unlock_super(sb); 2824 2825 out: 2826 jbd2_journal_unlock_updates(journal); 2827 } 2828 2829 /* 2830 * If we are mounting (or read-write remounting) a filesystem whose journal 2831 * has recorded an error from a previous lifetime, move that error to the 2832 * main filesystem now. 2833 */ 2834 static void ext4_clear_journal_err(struct super_block *sb, 2835 struct ext4_super_block *es) 2836 { 2837 journal_t *journal; 2838 int j_errno; 2839 const char *errstr; 2840 2841 journal = EXT4_SB(sb)->s_journal; 2842 2843 /* 2844 * Now check for any error status which may have been recorded in the 2845 * journal by a prior ext4_error() or ext4_abort() 2846 */ 2847 2848 j_errno = jbd2_journal_errno(journal); 2849 if (j_errno) { 2850 char nbuf[16]; 2851 2852 errstr = ext4_decode_error(sb, j_errno, nbuf); 2853 ext4_warning(sb, __func__, "Filesystem error recorded " 2854 "from previous mount: %s", errstr); 2855 ext4_warning(sb, __func__, "Marking fs in need of " 2856 "filesystem check."); 2857 2858 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2859 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2860 ext4_commit_super(sb, es, 1); 2861 2862 jbd2_journal_clear_err(journal); 2863 } 2864 } 2865 2866 /* 2867 * Force the running and committing transactions to commit, 2868 * and wait on the commit. 2869 */ 2870 int ext4_force_commit(struct super_block *sb) 2871 { 2872 journal_t *journal; 2873 int ret; 2874 2875 if (sb->s_flags & MS_RDONLY) 2876 return 0; 2877 2878 journal = EXT4_SB(sb)->s_journal; 2879 sb->s_dirt = 0; 2880 ret = ext4_journal_force_commit(journal); 2881 return ret; 2882 } 2883 2884 /* 2885 * Ext4 always journals updates to the superblock itself, so we don't 2886 * have to propagate any other updates to the superblock on disk at this 2887 * point. (We can probably nuke this function altogether, and remove 2888 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) 2889 */ 2890 static void ext4_write_super(struct super_block *sb) 2891 { 2892 if (mutex_trylock(&sb->s_lock) != 0) 2893 BUG(); 2894 sb->s_dirt = 0; 2895 } 2896 2897 static int ext4_sync_fs(struct super_block *sb, int wait) 2898 { 2899 int ret = 0; 2900 2901 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 2902 sb->s_dirt = 0; 2903 if (wait) 2904 ret = ext4_force_commit(sb); 2905 else 2906 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); 2907 return ret; 2908 } 2909 2910 /* 2911 * LVM calls this function before a (read-only) snapshot is created. This 2912 * gives us a chance to flush the journal completely and mark the fs clean. 2913 */ 2914 static void ext4_write_super_lockfs(struct super_block *sb) 2915 { 2916 sb->s_dirt = 0; 2917 2918 if (!(sb->s_flags & MS_RDONLY)) { 2919 journal_t *journal = EXT4_SB(sb)->s_journal; 2920 2921 /* Now we set up the journal barrier. */ 2922 jbd2_journal_lock_updates(journal); 2923 2924 /* 2925 * We don't want to clear needs_recovery flag when we failed 2926 * to flush the journal. 2927 */ 2928 if (jbd2_journal_flush(journal) < 0) 2929 return; 2930 2931 /* Journal blocked and flushed, clear needs_recovery flag. */ 2932 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2933 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2934 } 2935 } 2936 2937 /* 2938 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2939 * flag here, even though the filesystem is not technically dirty yet. 2940 */ 2941 static void ext4_unlockfs(struct super_block *sb) 2942 { 2943 if (!(sb->s_flags & MS_RDONLY)) { 2944 lock_super(sb); 2945 /* Reser the needs_recovery flag before the fs is unlocked. */ 2946 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2947 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2948 unlock_super(sb); 2949 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 2950 } 2951 } 2952 2953 static int ext4_remount(struct super_block *sb, int *flags, char *data) 2954 { 2955 struct ext4_super_block *es; 2956 struct ext4_sb_info *sbi = EXT4_SB(sb); 2957 ext4_fsblk_t n_blocks_count = 0; 2958 unsigned long old_sb_flags; 2959 struct ext4_mount_options old_opts; 2960 ext4_group_t g; 2961 int err; 2962 #ifdef CONFIG_QUOTA 2963 int i; 2964 #endif 2965 2966 /* Store the original options */ 2967 old_sb_flags = sb->s_flags; 2968 old_opts.s_mount_opt = sbi->s_mount_opt; 2969 old_opts.s_resuid = sbi->s_resuid; 2970 old_opts.s_resgid = sbi->s_resgid; 2971 old_opts.s_commit_interval = sbi->s_commit_interval; 2972 #ifdef CONFIG_QUOTA 2973 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2974 for (i = 0; i < MAXQUOTAS; i++) 2975 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2976 #endif 2977 2978 /* 2979 * Allow the "check" option to be passed as a remount option. 2980 */ 2981 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2982 err = -EINVAL; 2983 goto restore_opts; 2984 } 2985 2986 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2987 ext4_abort(sb, __func__, "Abort forced by user"); 2988 2989 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2990 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2991 2992 es = sbi->s_es; 2993 2994 ext4_init_journal_params(sb, sbi->s_journal); 2995 2996 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2997 n_blocks_count > ext4_blocks_count(es)) { 2998 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 2999 err = -EROFS; 3000 goto restore_opts; 3001 } 3002 3003 if (*flags & MS_RDONLY) { 3004 /* 3005 * First of all, the unconditional stuff we have to do 3006 * to disable replay of the journal when we next remount 3007 */ 3008 sb->s_flags |= MS_RDONLY; 3009 3010 /* 3011 * OK, test if we are remounting a valid rw partition 3012 * readonly, and if so set the rdonly flag and then 3013 * mark the partition as valid again. 3014 */ 3015 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3016 (sbi->s_mount_state & EXT4_VALID_FS)) 3017 es->s_state = cpu_to_le16(sbi->s_mount_state); 3018 3019 /* 3020 * We have to unlock super so that we can wait for 3021 * transactions. 3022 */ 3023 unlock_super(sb); 3024 ext4_mark_recovery_complete(sb, es); 3025 lock_super(sb); 3026 } else { 3027 __le32 ret; 3028 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3029 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3030 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3031 "remount RDWR because of unsupported " 3032 "optional features (%x).\n", 3033 sb->s_id, le32_to_cpu(ret)); 3034 err = -EROFS; 3035 goto restore_opts; 3036 } 3037 3038 /* 3039 * Make sure the group descriptor checksums 3040 * are sane. If they aren't, refuse to 3041 * remount r/w. 3042 */ 3043 for (g = 0; g < sbi->s_groups_count; g++) { 3044 struct ext4_group_desc *gdp = 3045 ext4_get_group_desc(sb, g, NULL); 3046 3047 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3048 printk(KERN_ERR 3049 "EXT4-fs: ext4_remount: " 3050 "Checksum for group %lu failed (%u!=%u)\n", 3051 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3052 le16_to_cpu(gdp->bg_checksum)); 3053 err = -EINVAL; 3054 goto restore_opts; 3055 } 3056 } 3057 3058 /* 3059 * If we have an unprocessed orphan list hanging 3060 * around from a previously readonly bdev mount, 3061 * require a full umount/remount for now. 3062 */ 3063 if (es->s_last_orphan) { 3064 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3065 "remount RDWR because of unprocessed " 3066 "orphan inode list. Please " 3067 "umount/remount instead.\n", 3068 sb->s_id); 3069 err = -EINVAL; 3070 goto restore_opts; 3071 } 3072 3073 /* 3074 * Mounting a RDONLY partition read-write, so reread 3075 * and store the current valid flag. (It may have 3076 * been changed by e2fsck since we originally mounted 3077 * the partition.) 3078 */ 3079 ext4_clear_journal_err(sb, es); 3080 sbi->s_mount_state = le16_to_cpu(es->s_state); 3081 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3082 goto restore_opts; 3083 if (!ext4_setup_super(sb, es, 0)) 3084 sb->s_flags &= ~MS_RDONLY; 3085 } 3086 } 3087 #ifdef CONFIG_QUOTA 3088 /* Release old quota file names */ 3089 for (i = 0; i < MAXQUOTAS; i++) 3090 if (old_opts.s_qf_names[i] && 3091 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3092 kfree(old_opts.s_qf_names[i]); 3093 #endif 3094 return 0; 3095 restore_opts: 3096 sb->s_flags = old_sb_flags; 3097 sbi->s_mount_opt = old_opts.s_mount_opt; 3098 sbi->s_resuid = old_opts.s_resuid; 3099 sbi->s_resgid = old_opts.s_resgid; 3100 sbi->s_commit_interval = old_opts.s_commit_interval; 3101 #ifdef CONFIG_QUOTA 3102 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3103 for (i = 0; i < MAXQUOTAS; i++) { 3104 if (sbi->s_qf_names[i] && 3105 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3106 kfree(sbi->s_qf_names[i]); 3107 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3108 } 3109 #endif 3110 return err; 3111 } 3112 3113 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3114 { 3115 struct super_block *sb = dentry->d_sb; 3116 struct ext4_sb_info *sbi = EXT4_SB(sb); 3117 struct ext4_super_block *es = sbi->s_es; 3118 u64 fsid; 3119 3120 if (test_opt(sb, MINIX_DF)) { 3121 sbi->s_overhead_last = 0; 3122 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3123 ext4_group_t ngroups = sbi->s_groups_count, i; 3124 ext4_fsblk_t overhead = 0; 3125 smp_rmb(); 3126 3127 /* 3128 * Compute the overhead (FS structures). This is constant 3129 * for a given filesystem unless the number of block groups 3130 * changes so we cache the previous value until it does. 3131 */ 3132 3133 /* 3134 * All of the blocks before first_data_block are 3135 * overhead 3136 */ 3137 overhead = le32_to_cpu(es->s_first_data_block); 3138 3139 /* 3140 * Add the overhead attributed to the superblock and 3141 * block group descriptors. If the sparse superblocks 3142 * feature is turned on, then not all groups have this. 3143 */ 3144 for (i = 0; i < ngroups; i++) { 3145 overhead += ext4_bg_has_super(sb, i) + 3146 ext4_bg_num_gdb(sb, i); 3147 cond_resched(); 3148 } 3149 3150 /* 3151 * Every block group has an inode bitmap, a block 3152 * bitmap, and an inode table. 3153 */ 3154 overhead += ngroups * (2 + sbi->s_itb_per_group); 3155 sbi->s_overhead_last = overhead; 3156 smp_wmb(); 3157 sbi->s_blocks_last = ext4_blocks_count(es); 3158 } 3159 3160 buf->f_type = EXT4_SUPER_MAGIC; 3161 buf->f_bsize = sb->s_blocksize; 3162 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3163 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3164 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3165 ext4_free_blocks_count_set(es, buf->f_bfree); 3166 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3167 if (buf->f_bfree < ext4_r_blocks_count(es)) 3168 buf->f_bavail = 0; 3169 buf->f_files = le32_to_cpu(es->s_inodes_count); 3170 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3171 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); 3172 buf->f_namelen = EXT4_NAME_LEN; 3173 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3174 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3175 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3176 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3177 return 0; 3178 } 3179 3180 /* Helper function for writing quotas on sync - we need to start transaction before quota file 3181 * is locked for write. Otherwise the are possible deadlocks: 3182 * Process 1 Process 2 3183 * ext4_create() quota_sync() 3184 * jbd2_journal_start() write_dquot() 3185 * DQUOT_INIT() down(dqio_mutex) 3186 * down(dqio_mutex) jbd2_journal_start() 3187 * 3188 */ 3189 3190 #ifdef CONFIG_QUOTA 3191 3192 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3193 { 3194 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3195 } 3196 3197 static int ext4_dquot_initialize(struct inode *inode, int type) 3198 { 3199 handle_t *handle; 3200 int ret, err; 3201 3202 /* We may create quota structure so we need to reserve enough blocks */ 3203 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); 3204 if (IS_ERR(handle)) 3205 return PTR_ERR(handle); 3206 ret = dquot_initialize(inode, type); 3207 err = ext4_journal_stop(handle); 3208 if (!ret) 3209 ret = err; 3210 return ret; 3211 } 3212 3213 static int ext4_dquot_drop(struct inode *inode) 3214 { 3215 handle_t *handle; 3216 int ret, err; 3217 3218 /* We may delete quota structure so we need to reserve enough blocks */ 3219 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3220 if (IS_ERR(handle)) { 3221 /* 3222 * We call dquot_drop() anyway to at least release references 3223 * to quota structures so that umount does not hang. 3224 */ 3225 dquot_drop(inode); 3226 return PTR_ERR(handle); 3227 } 3228 ret = dquot_drop(inode); 3229 err = ext4_journal_stop(handle); 3230 if (!ret) 3231 ret = err; 3232 return ret; 3233 } 3234 3235 static int ext4_write_dquot(struct dquot *dquot) 3236 { 3237 int ret, err; 3238 handle_t *handle; 3239 struct inode *inode; 3240 3241 inode = dquot_to_inode(dquot); 3242 handle = ext4_journal_start(inode, 3243 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3244 if (IS_ERR(handle)) 3245 return PTR_ERR(handle); 3246 ret = dquot_commit(dquot); 3247 err = ext4_journal_stop(handle); 3248 if (!ret) 3249 ret = err; 3250 return ret; 3251 } 3252 3253 static int ext4_acquire_dquot(struct dquot *dquot) 3254 { 3255 int ret, err; 3256 handle_t *handle; 3257 3258 handle = ext4_journal_start(dquot_to_inode(dquot), 3259 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3260 if (IS_ERR(handle)) 3261 return PTR_ERR(handle); 3262 ret = dquot_acquire(dquot); 3263 err = ext4_journal_stop(handle); 3264 if (!ret) 3265 ret = err; 3266 return ret; 3267 } 3268 3269 static int ext4_release_dquot(struct dquot *dquot) 3270 { 3271 int ret, err; 3272 handle_t *handle; 3273 3274 handle = ext4_journal_start(dquot_to_inode(dquot), 3275 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3276 if (IS_ERR(handle)) { 3277 /* Release dquot anyway to avoid endless cycle in dqput() */ 3278 dquot_release(dquot); 3279 return PTR_ERR(handle); 3280 } 3281 ret = dquot_release(dquot); 3282 err = ext4_journal_stop(handle); 3283 if (!ret) 3284 ret = err; 3285 return ret; 3286 } 3287 3288 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3289 { 3290 /* Are we journaling quotas? */ 3291 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3292 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3293 dquot_mark_dquot_dirty(dquot); 3294 return ext4_write_dquot(dquot); 3295 } else { 3296 return dquot_mark_dquot_dirty(dquot); 3297 } 3298 } 3299 3300 static int ext4_write_info(struct super_block *sb, int type) 3301 { 3302 int ret, err; 3303 handle_t *handle; 3304 3305 /* Data block + inode block */ 3306 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3307 if (IS_ERR(handle)) 3308 return PTR_ERR(handle); 3309 ret = dquot_commit_info(sb, type); 3310 err = ext4_journal_stop(handle); 3311 if (!ret) 3312 ret = err; 3313 return ret; 3314 } 3315 3316 /* 3317 * Turn on quotas during mount time - we need to find 3318 * the quota file and such... 3319 */ 3320 static int ext4_quota_on_mount(struct super_block *sb, int type) 3321 { 3322 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3323 EXT4_SB(sb)->s_jquota_fmt, type); 3324 } 3325 3326 /* 3327 * Standard function to be called on quota_on 3328 */ 3329 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3330 char *name, int remount) 3331 { 3332 int err; 3333 struct path path; 3334 3335 if (!test_opt(sb, QUOTA)) 3336 return -EINVAL; 3337 /* When remounting, no checks are needed and in fact, name is NULL */ 3338 if (remount) 3339 return vfs_quota_on(sb, type, format_id, name, remount); 3340 3341 err = kern_path(name, LOOKUP_FOLLOW, &path); 3342 if (err) 3343 return err; 3344 3345 /* Quotafile not on the same filesystem? */ 3346 if (path.mnt->mnt_sb != sb) { 3347 path_put(&path); 3348 return -EXDEV; 3349 } 3350 /* Journaling quota? */ 3351 if (EXT4_SB(sb)->s_qf_names[type]) { 3352 /* Quotafile not in fs root? */ 3353 if (path.dentry->d_parent != sb->s_root) 3354 printk(KERN_WARNING 3355 "EXT4-fs: Quota file not on filesystem root. " 3356 "Journaled quota will not work.\n"); 3357 } 3358 3359 /* 3360 * When we journal data on quota file, we have to flush journal to see 3361 * all updates to the file when we bypass pagecache... 3362 */ 3363 if (ext4_should_journal_data(path.dentry->d_inode)) { 3364 /* 3365 * We don't need to lock updates but journal_flush() could 3366 * otherwise be livelocked... 3367 */ 3368 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3369 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3370 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3371 if (err) { 3372 path_put(&path); 3373 return err; 3374 } 3375 } 3376 3377 err = vfs_quota_on_path(sb, type, format_id, &path); 3378 path_put(&path); 3379 return err; 3380 } 3381 3382 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3383 * acquiring the locks... As quota files are never truncated and quota code 3384 * itself serializes the operations (and noone else should touch the files) 3385 * we don't have to be afraid of races */ 3386 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3387 size_t len, loff_t off) 3388 { 3389 struct inode *inode = sb_dqopt(sb)->files[type]; 3390 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3391 int err = 0; 3392 int offset = off & (sb->s_blocksize - 1); 3393 int tocopy; 3394 size_t toread; 3395 struct buffer_head *bh; 3396 loff_t i_size = i_size_read(inode); 3397 3398 if (off > i_size) 3399 return 0; 3400 if (off+len > i_size) 3401 len = i_size-off; 3402 toread = len; 3403 while (toread > 0) { 3404 tocopy = sb->s_blocksize - offset < toread ? 3405 sb->s_blocksize - offset : toread; 3406 bh = ext4_bread(NULL, inode, blk, 0, &err); 3407 if (err) 3408 return err; 3409 if (!bh) /* A hole? */ 3410 memset(data, 0, tocopy); 3411 else 3412 memcpy(data, bh->b_data+offset, tocopy); 3413 brelse(bh); 3414 offset = 0; 3415 toread -= tocopy; 3416 data += tocopy; 3417 blk++; 3418 } 3419 return len; 3420 } 3421 3422 /* Write to quotafile (we know the transaction is already started and has 3423 * enough credits) */ 3424 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3425 const char *data, size_t len, loff_t off) 3426 { 3427 struct inode *inode = sb_dqopt(sb)->files[type]; 3428 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3429 int err = 0; 3430 int offset = off & (sb->s_blocksize - 1); 3431 int tocopy; 3432 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 3433 size_t towrite = len; 3434 struct buffer_head *bh; 3435 handle_t *handle = journal_current_handle(); 3436 3437 if (!handle) { 3438 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3439 " cancelled because transaction is not started.\n", 3440 (unsigned long long)off, (unsigned long long)len); 3441 return -EIO; 3442 } 3443 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3444 while (towrite > 0) { 3445 tocopy = sb->s_blocksize - offset < towrite ? 3446 sb->s_blocksize - offset : towrite; 3447 bh = ext4_bread(handle, inode, blk, 1, &err); 3448 if (!bh) 3449 goto out; 3450 if (journal_quota) { 3451 err = ext4_journal_get_write_access(handle, bh); 3452 if (err) { 3453 brelse(bh); 3454 goto out; 3455 } 3456 } 3457 lock_buffer(bh); 3458 memcpy(bh->b_data+offset, data, tocopy); 3459 flush_dcache_page(bh->b_page); 3460 unlock_buffer(bh); 3461 if (journal_quota) 3462 err = ext4_journal_dirty_metadata(handle, bh); 3463 else { 3464 /* Always do at least ordered writes for quotas */ 3465 err = ext4_jbd2_file_inode(handle, inode); 3466 mark_buffer_dirty(bh); 3467 } 3468 brelse(bh); 3469 if (err) 3470 goto out; 3471 offset = 0; 3472 towrite -= tocopy; 3473 data += tocopy; 3474 blk++; 3475 } 3476 out: 3477 if (len == towrite) { 3478 mutex_unlock(&inode->i_mutex); 3479 return err; 3480 } 3481 if (inode->i_size < off+len-towrite) { 3482 i_size_write(inode, off+len-towrite); 3483 EXT4_I(inode)->i_disksize = inode->i_size; 3484 } 3485 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3486 ext4_mark_inode_dirty(handle, inode); 3487 mutex_unlock(&inode->i_mutex); 3488 return len - towrite; 3489 } 3490 3491 #endif 3492 3493 static int ext4_get_sb(struct file_system_type *fs_type, 3494 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3495 { 3496 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3497 } 3498 3499 #ifdef CONFIG_PROC_FS 3500 static int ext4_ui_proc_show(struct seq_file *m, void *v) 3501 { 3502 unsigned int *p = m->private; 3503 3504 seq_printf(m, "%u\n", *p); 3505 return 0; 3506 } 3507 3508 static int ext4_ui_proc_open(struct inode *inode, struct file *file) 3509 { 3510 return single_open(file, ext4_ui_proc_show, PDE(inode)->data); 3511 } 3512 3513 static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, 3514 size_t cnt, loff_t *ppos) 3515 { 3516 unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; 3517 char str[32]; 3518 unsigned long value; 3519 3520 if (cnt >= sizeof(str)) 3521 return -EINVAL; 3522 if (copy_from_user(str, buf, cnt)) 3523 return -EFAULT; 3524 value = simple_strtol(str, NULL, 0); 3525 if (value < 0) 3526 return -ERANGE; 3527 *p = value; 3528 return cnt; 3529 } 3530 3531 const struct file_operations ext4_ui_proc_fops = { 3532 .owner = THIS_MODULE, 3533 .open = ext4_ui_proc_open, 3534 .read = seq_read, 3535 .llseek = seq_lseek, 3536 .release = single_release, 3537 .write = ext4_ui_proc_write, 3538 }; 3539 #endif 3540 3541 static struct file_system_type ext4_fs_type = { 3542 .owner = THIS_MODULE, 3543 .name = "ext4", 3544 .get_sb = ext4_get_sb, 3545 .kill_sb = kill_block_super, 3546 .fs_flags = FS_REQUIRES_DEV, 3547 }; 3548 3549 #ifdef CONFIG_EXT4DEV_COMPAT 3550 static int ext4dev_get_sb(struct file_system_type *fs_type, 3551 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3552 { 3553 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3554 "to mount using ext4\n"); 3555 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3556 "will go away by 2.6.31\n"); 3557 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3558 } 3559 3560 static struct file_system_type ext4dev_fs_type = { 3561 .owner = THIS_MODULE, 3562 .name = "ext4dev", 3563 .get_sb = ext4dev_get_sb, 3564 .kill_sb = kill_block_super, 3565 .fs_flags = FS_REQUIRES_DEV, 3566 }; 3567 MODULE_ALIAS("ext4dev"); 3568 #endif 3569 3570 static int __init init_ext4_fs(void) 3571 { 3572 int err; 3573 3574 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3575 err = init_ext4_mballoc(); 3576 if (err) 3577 return err; 3578 3579 err = init_ext4_xattr(); 3580 if (err) 3581 goto out2; 3582 err = init_inodecache(); 3583 if (err) 3584 goto out1; 3585 err = register_filesystem(&ext4_fs_type); 3586 if (err) 3587 goto out; 3588 #ifdef CONFIG_EXT4DEV_COMPAT 3589 err = register_filesystem(&ext4dev_fs_type); 3590 if (err) { 3591 unregister_filesystem(&ext4_fs_type); 3592 goto out; 3593 } 3594 #endif 3595 return 0; 3596 out: 3597 destroy_inodecache(); 3598 out1: 3599 exit_ext4_xattr(); 3600 out2: 3601 exit_ext4_mballoc(); 3602 return err; 3603 } 3604 3605 static void __exit exit_ext4_fs(void) 3606 { 3607 unregister_filesystem(&ext4_fs_type); 3608 #ifdef CONFIG_EXT4DEV_COMPAT 3609 unregister_filesystem(&ext4dev_fs_type); 3610 #endif 3611 destroy_inodecache(); 3612 exit_ext4_xattr(); 3613 exit_ext4_mballoc(); 3614 remove_proc_entry("fs/ext4", NULL); 3615 } 3616 3617 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3618 MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); 3619 MODULE_LICENSE("GPL"); 3620 module_init(init_ext4_fs) 3621 module_exit(exit_ext4_fs) 3622