1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/smp_lock.h> 30 #include <linux/buffer_head.h> 31 #include <linux/exportfs.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 #include <linux/proc_fs.h> 39 #include <linux/ctype.h> 40 #include <linux/marker.h> 41 #include <linux/log2.h> 42 #include <linux/crc16.h> 43 #include <asm/uaccess.h> 44 45 #include "ext4.h" 46 #include "ext4_jbd2.h" 47 #include "xattr.h" 48 #include "acl.h" 49 50 static int default_mb_history_length = 1000; 51 52 module_param_named(default_mb_history_length, default_mb_history_length, 53 int, 0644); 54 MODULE_PARM_DESC(default_mb_history_length, 55 "Default number of entries saved for mb_history"); 56 57 struct proc_dir_entry *ext4_proc_root; 58 static struct kset *ext4_kset; 59 60 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61 unsigned long journal_devnum); 62 static int ext4_commit_super(struct super_block *sb, int sync); 63 static void ext4_mark_recovery_complete(struct super_block *sb, 64 struct ext4_super_block *es); 65 static void ext4_clear_journal_err(struct super_block *sb, 66 struct ext4_super_block *es); 67 static int ext4_sync_fs(struct super_block *sb, int wait); 68 static const char *ext4_decode_error(struct super_block *sb, int errno, 69 char nbuf[16]); 70 static int ext4_remount(struct super_block *sb, int *flags, char *data); 71 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 72 static int ext4_unfreeze(struct super_block *sb); 73 static void ext4_write_super(struct super_block *sb); 74 static int ext4_freeze(struct super_block *sb); 75 76 77 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 78 struct ext4_group_desc *bg) 79 { 80 return le32_to_cpu(bg->bg_block_bitmap_lo) | 81 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 82 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 83 } 84 85 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 86 struct ext4_group_desc *bg) 87 { 88 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 89 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 90 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 91 } 92 93 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 94 struct ext4_group_desc *bg) 95 { 96 return le32_to_cpu(bg->bg_inode_table_lo) | 97 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 98 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 99 } 100 101 __u32 ext4_free_blks_count(struct super_block *sb, 102 struct ext4_group_desc *bg) 103 { 104 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 105 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 106 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 107 } 108 109 __u32 ext4_free_inodes_count(struct super_block *sb, 110 struct ext4_group_desc *bg) 111 { 112 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 113 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 114 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 115 } 116 117 __u32 ext4_used_dirs_count(struct super_block *sb, 118 struct ext4_group_desc *bg) 119 { 120 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 121 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 122 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 123 } 124 125 __u32 ext4_itable_unused_count(struct super_block *sb, 126 struct ext4_group_desc *bg) 127 { 128 return le16_to_cpu(bg->bg_itable_unused_lo) | 129 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 130 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 131 } 132 133 void ext4_block_bitmap_set(struct super_block *sb, 134 struct ext4_group_desc *bg, ext4_fsblk_t blk) 135 { 136 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 137 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 138 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 139 } 140 141 void ext4_inode_bitmap_set(struct super_block *sb, 142 struct ext4_group_desc *bg, ext4_fsblk_t blk) 143 { 144 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 145 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 146 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 147 } 148 149 void ext4_inode_table_set(struct super_block *sb, 150 struct ext4_group_desc *bg, ext4_fsblk_t blk) 151 { 152 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 153 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 154 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 155 } 156 157 void ext4_free_blks_set(struct super_block *sb, 158 struct ext4_group_desc *bg, __u32 count) 159 { 160 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 161 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 162 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 163 } 164 165 void ext4_free_inodes_set(struct super_block *sb, 166 struct ext4_group_desc *bg, __u32 count) 167 { 168 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 169 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 170 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 171 } 172 173 void ext4_used_dirs_set(struct super_block *sb, 174 struct ext4_group_desc *bg, __u32 count) 175 { 176 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 177 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 178 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 179 } 180 181 void ext4_itable_unused_set(struct super_block *sb, 182 struct ext4_group_desc *bg, __u32 count) 183 { 184 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 185 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 186 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 187 } 188 189 /* 190 * Wrappers for jbd2_journal_start/end. 191 * 192 * The only special thing we need to do here is to make sure that all 193 * journal_end calls result in the superblock being marked dirty, so 194 * that sync() will call the filesystem's write_super callback if 195 * appropriate. 196 */ 197 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 198 { 199 journal_t *journal; 200 201 if (sb->s_flags & MS_RDONLY) 202 return ERR_PTR(-EROFS); 203 204 /* Special case here: if the journal has aborted behind our 205 * backs (eg. EIO in the commit thread), then we still need to 206 * take the FS itself readonly cleanly. */ 207 journal = EXT4_SB(sb)->s_journal; 208 if (journal) { 209 if (is_journal_aborted(journal)) { 210 ext4_abort(sb, __func__, "Detected aborted journal"); 211 return ERR_PTR(-EROFS); 212 } 213 return jbd2_journal_start(journal, nblocks); 214 } 215 /* 216 * We're not journaling, return the appropriate indication. 217 */ 218 current->journal_info = EXT4_NOJOURNAL_HANDLE; 219 return current->journal_info; 220 } 221 222 /* 223 * The only special thing we need to do here is to make sure that all 224 * jbd2_journal_stop calls result in the superblock being marked dirty, so 225 * that sync() will call the filesystem's write_super callback if 226 * appropriate. 227 */ 228 int __ext4_journal_stop(const char *where, handle_t *handle) 229 { 230 struct super_block *sb; 231 int err; 232 int rc; 233 234 if (!ext4_handle_valid(handle)) { 235 /* 236 * Do this here since we don't call jbd2_journal_stop() in 237 * no-journal mode. 238 */ 239 current->journal_info = NULL; 240 return 0; 241 } 242 sb = handle->h_transaction->t_journal->j_private; 243 err = handle->h_err; 244 rc = jbd2_journal_stop(handle); 245 246 if (!err) 247 err = rc; 248 if (err) 249 __ext4_std_error(sb, where, err); 250 return err; 251 } 252 253 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 254 struct buffer_head *bh, handle_t *handle, int err) 255 { 256 char nbuf[16]; 257 const char *errstr = ext4_decode_error(NULL, err, nbuf); 258 259 BUG_ON(!ext4_handle_valid(handle)); 260 261 if (bh) 262 BUFFER_TRACE(bh, "abort"); 263 264 if (!handle->h_err) 265 handle->h_err = err; 266 267 if (is_handle_aborted(handle)) 268 return; 269 270 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 271 caller, errstr, err_fn); 272 273 jbd2_journal_abort_handle(handle); 274 } 275 276 /* Deal with the reporting of failure conditions on a filesystem such as 277 * inconsistencies detected or read IO failures. 278 * 279 * On ext2, we can store the error state of the filesystem in the 280 * superblock. That is not possible on ext4, because we may have other 281 * write ordering constraints on the superblock which prevent us from 282 * writing it out straight away; and given that the journal is about to 283 * be aborted, we can't rely on the current, or future, transactions to 284 * write out the superblock safely. 285 * 286 * We'll just use the jbd2_journal_abort() error code to record an error in 287 * the journal instead. On recovery, the journal will compain about 288 * that error until we've noted it down and cleared it. 289 */ 290 291 static void ext4_handle_error(struct super_block *sb) 292 { 293 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 294 295 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 296 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 297 298 if (sb->s_flags & MS_RDONLY) 299 return; 300 301 if (!test_opt(sb, ERRORS_CONT)) { 302 journal_t *journal = EXT4_SB(sb)->s_journal; 303 304 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 305 if (journal) 306 jbd2_journal_abort(journal, -EIO); 307 } 308 if (test_opt(sb, ERRORS_RO)) { 309 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 310 sb->s_flags |= MS_RDONLY; 311 } 312 ext4_commit_super(sb, 1); 313 if (test_opt(sb, ERRORS_PANIC)) 314 panic("EXT4-fs (device %s): panic forced after error\n", 315 sb->s_id); 316 } 317 318 void ext4_error(struct super_block *sb, const char *function, 319 const char *fmt, ...) 320 { 321 va_list args; 322 323 va_start(args, fmt); 324 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 325 vprintk(fmt, args); 326 printk("\n"); 327 va_end(args); 328 329 ext4_handle_error(sb); 330 } 331 332 static const char *ext4_decode_error(struct super_block *sb, int errno, 333 char nbuf[16]) 334 { 335 char *errstr = NULL; 336 337 switch (errno) { 338 case -EIO: 339 errstr = "IO failure"; 340 break; 341 case -ENOMEM: 342 errstr = "Out of memory"; 343 break; 344 case -EROFS: 345 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 346 errstr = "Journal has aborted"; 347 else 348 errstr = "Readonly filesystem"; 349 break; 350 default: 351 /* If the caller passed in an extra buffer for unknown 352 * errors, textualise them now. Else we just return 353 * NULL. */ 354 if (nbuf) { 355 /* Check for truncated error codes... */ 356 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 357 errstr = nbuf; 358 } 359 break; 360 } 361 362 return errstr; 363 } 364 365 /* __ext4_std_error decodes expected errors from journaling functions 366 * automatically and invokes the appropriate error response. */ 367 368 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 369 { 370 char nbuf[16]; 371 const char *errstr; 372 373 /* Special case: if the error is EROFS, and we're not already 374 * inside a transaction, then there's really no point in logging 375 * an error. */ 376 if (errno == -EROFS && journal_current_handle() == NULL && 377 (sb->s_flags & MS_RDONLY)) 378 return; 379 380 errstr = ext4_decode_error(sb, errno, nbuf); 381 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 382 sb->s_id, function, errstr); 383 384 ext4_handle_error(sb); 385 } 386 387 /* 388 * ext4_abort is a much stronger failure handler than ext4_error. The 389 * abort function may be used to deal with unrecoverable failures such 390 * as journal IO errors or ENOMEM at a critical moment in log management. 391 * 392 * We unconditionally force the filesystem into an ABORT|READONLY state, 393 * unless the error response on the fs has been set to panic in which 394 * case we take the easy way out and panic immediately. 395 */ 396 397 void ext4_abort(struct super_block *sb, const char *function, 398 const char *fmt, ...) 399 { 400 va_list args; 401 402 va_start(args, fmt); 403 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 404 vprintk(fmt, args); 405 printk("\n"); 406 va_end(args); 407 408 if (test_opt(sb, ERRORS_PANIC)) 409 panic("EXT4-fs panic from previous error\n"); 410 411 if (sb->s_flags & MS_RDONLY) 412 return; 413 414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 416 sb->s_flags |= MS_RDONLY; 417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 418 if (EXT4_SB(sb)->s_journal) 419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 420 } 421 422 void ext4_msg (struct super_block * sb, const char *prefix, 423 const char *fmt, ...) 424 { 425 va_list args; 426 427 va_start(args, fmt); 428 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 429 vprintk(fmt, args); 430 printk("\n"); 431 va_end(args); 432 } 433 434 void ext4_warning(struct super_block *sb, const char *function, 435 const char *fmt, ...) 436 { 437 va_list args; 438 439 va_start(args, fmt); 440 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 441 sb->s_id, function); 442 vprintk(fmt, args); 443 printk("\n"); 444 va_end(args); 445 } 446 447 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 448 const char *function, const char *fmt, ...) 449 __releases(bitlock) 450 __acquires(bitlock) 451 { 452 va_list args; 453 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 454 455 va_start(args, fmt); 456 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 457 vprintk(fmt, args); 458 printk("\n"); 459 va_end(args); 460 461 if (test_opt(sb, ERRORS_CONT)) { 462 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 463 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 464 ext4_commit_super(sb, 0); 465 return; 466 } 467 ext4_unlock_group(sb, grp); 468 ext4_handle_error(sb); 469 /* 470 * We only get here in the ERRORS_RO case; relocking the group 471 * may be dangerous, but nothing bad will happen since the 472 * filesystem will have already been marked read/only and the 473 * journal has been aborted. We return 1 as a hint to callers 474 * who might what to use the return value from 475 * ext4_grp_locked_error() to distinguish beween the 476 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 477 * aggressively from the ext4 function in question, with a 478 * more appropriate error code. 479 */ 480 ext4_lock_group(sb, grp); 481 return; 482 } 483 484 void ext4_update_dynamic_rev(struct super_block *sb) 485 { 486 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 487 488 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 489 return; 490 491 ext4_warning(sb, __func__, 492 "updating to rev %d because of new feature flag, " 493 "running e2fsck is recommended", 494 EXT4_DYNAMIC_REV); 495 496 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 497 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 498 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 499 /* leave es->s_feature_*compat flags alone */ 500 /* es->s_uuid will be set by e2fsck if empty */ 501 502 /* 503 * The rest of the superblock fields should be zero, and if not it 504 * means they are likely already in use, so leave them alone. We 505 * can leave it up to e2fsck to clean up any inconsistencies there. 506 */ 507 } 508 509 /* 510 * Open the external journal device 511 */ 512 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 513 { 514 struct block_device *bdev; 515 char b[BDEVNAME_SIZE]; 516 517 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 518 if (IS_ERR(bdev)) 519 goto fail; 520 return bdev; 521 522 fail: 523 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 524 __bdevname(dev, b), PTR_ERR(bdev)); 525 return NULL; 526 } 527 528 /* 529 * Release the journal device 530 */ 531 static int ext4_blkdev_put(struct block_device *bdev) 532 { 533 bd_release(bdev); 534 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 535 } 536 537 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 538 { 539 struct block_device *bdev; 540 int ret = -ENODEV; 541 542 bdev = sbi->journal_bdev; 543 if (bdev) { 544 ret = ext4_blkdev_put(bdev); 545 sbi->journal_bdev = NULL; 546 } 547 return ret; 548 } 549 550 static inline struct inode *orphan_list_entry(struct list_head *l) 551 { 552 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 553 } 554 555 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 556 { 557 struct list_head *l; 558 559 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 560 le32_to_cpu(sbi->s_es->s_last_orphan)); 561 562 printk(KERN_ERR "sb_info orphan list:\n"); 563 list_for_each(l, &sbi->s_orphan) { 564 struct inode *inode = orphan_list_entry(l); 565 printk(KERN_ERR " " 566 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 567 inode->i_sb->s_id, inode->i_ino, inode, 568 inode->i_mode, inode->i_nlink, 569 NEXT_ORPHAN(inode)); 570 } 571 } 572 573 static void ext4_put_super(struct super_block *sb) 574 { 575 struct ext4_sb_info *sbi = EXT4_SB(sb); 576 struct ext4_super_block *es = sbi->s_es; 577 int i, err; 578 579 lock_super(sb); 580 lock_kernel(); 581 if (sb->s_dirt) 582 ext4_commit_super(sb, 1); 583 584 ext4_release_system_zone(sb); 585 ext4_mb_release(sb); 586 ext4_ext_release(sb); 587 ext4_xattr_put_super(sb); 588 if (sbi->s_journal) { 589 err = jbd2_journal_destroy(sbi->s_journal); 590 sbi->s_journal = NULL; 591 if (err < 0) 592 ext4_abort(sb, __func__, 593 "Couldn't clean up the journal"); 594 } 595 if (!(sb->s_flags & MS_RDONLY)) { 596 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 597 es->s_state = cpu_to_le16(sbi->s_mount_state); 598 ext4_commit_super(sb, 1); 599 } 600 if (sbi->s_proc) { 601 remove_proc_entry(sb->s_id, ext4_proc_root); 602 } 603 kobject_del(&sbi->s_kobj); 604 605 for (i = 0; i < sbi->s_gdb_count; i++) 606 brelse(sbi->s_group_desc[i]); 607 kfree(sbi->s_group_desc); 608 if (is_vmalloc_addr(sbi->s_flex_groups)) 609 vfree(sbi->s_flex_groups); 610 else 611 kfree(sbi->s_flex_groups); 612 percpu_counter_destroy(&sbi->s_freeblocks_counter); 613 percpu_counter_destroy(&sbi->s_freeinodes_counter); 614 percpu_counter_destroy(&sbi->s_dirs_counter); 615 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 616 brelse(sbi->s_sbh); 617 #ifdef CONFIG_QUOTA 618 for (i = 0; i < MAXQUOTAS; i++) 619 kfree(sbi->s_qf_names[i]); 620 #endif 621 622 /* Debugging code just in case the in-memory inode orphan list 623 * isn't empty. The on-disk one can be non-empty if we've 624 * detected an error and taken the fs readonly, but the 625 * in-memory list had better be clean by this point. */ 626 if (!list_empty(&sbi->s_orphan)) 627 dump_orphan_list(sb, sbi); 628 J_ASSERT(list_empty(&sbi->s_orphan)); 629 630 invalidate_bdev(sb->s_bdev); 631 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 632 /* 633 * Invalidate the journal device's buffers. We don't want them 634 * floating about in memory - the physical journal device may 635 * hotswapped, and it breaks the `ro-after' testing code. 636 */ 637 sync_blockdev(sbi->journal_bdev); 638 invalidate_bdev(sbi->journal_bdev); 639 ext4_blkdev_remove(sbi); 640 } 641 sb->s_fs_info = NULL; 642 /* 643 * Now that we are completely done shutting down the 644 * superblock, we need to actually destroy the kobject. 645 */ 646 unlock_kernel(); 647 unlock_super(sb); 648 kobject_put(&sbi->s_kobj); 649 wait_for_completion(&sbi->s_kobj_unregister); 650 kfree(sbi->s_blockgroup_lock); 651 kfree(sbi); 652 } 653 654 static struct kmem_cache *ext4_inode_cachep; 655 656 /* 657 * Called inside transaction, so use GFP_NOFS 658 */ 659 static struct inode *ext4_alloc_inode(struct super_block *sb) 660 { 661 struct ext4_inode_info *ei; 662 663 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 664 if (!ei) 665 return NULL; 666 667 #ifdef CONFIG_EXT4_FS_POSIX_ACL 668 ei->i_acl = EXT4_ACL_NOT_CACHED; 669 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 670 #endif 671 ei->vfs_inode.i_version = 1; 672 ei->vfs_inode.i_data.writeback_index = 0; 673 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 674 INIT_LIST_HEAD(&ei->i_prealloc_list); 675 spin_lock_init(&ei->i_prealloc_lock); 676 /* 677 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 678 * therefore it can be null here. Don't check it, just initialize 679 * jinode. 680 */ 681 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 682 ei->i_reserved_data_blocks = 0; 683 ei->i_reserved_meta_blocks = 0; 684 ei->i_allocated_meta_blocks = 0; 685 ei->i_delalloc_reserved_flag = 0; 686 spin_lock_init(&(ei->i_block_reservation_lock)); 687 688 return &ei->vfs_inode; 689 } 690 691 static void ext4_destroy_inode(struct inode *inode) 692 { 693 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 694 ext4_msg(inode->i_sb, KERN_ERR, 695 "Inode %lu (%p): orphan list check failed!", 696 inode->i_ino, EXT4_I(inode)); 697 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 698 EXT4_I(inode), sizeof(struct ext4_inode_info), 699 true); 700 dump_stack(); 701 } 702 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 703 } 704 705 static void init_once(void *foo) 706 { 707 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 708 709 INIT_LIST_HEAD(&ei->i_orphan); 710 #ifdef CONFIG_EXT4_FS_XATTR 711 init_rwsem(&ei->xattr_sem); 712 #endif 713 init_rwsem(&ei->i_data_sem); 714 inode_init_once(&ei->vfs_inode); 715 } 716 717 static int init_inodecache(void) 718 { 719 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 720 sizeof(struct ext4_inode_info), 721 0, (SLAB_RECLAIM_ACCOUNT| 722 SLAB_MEM_SPREAD), 723 init_once); 724 if (ext4_inode_cachep == NULL) 725 return -ENOMEM; 726 return 0; 727 } 728 729 static void destroy_inodecache(void) 730 { 731 kmem_cache_destroy(ext4_inode_cachep); 732 } 733 734 static void ext4_clear_inode(struct inode *inode) 735 { 736 #ifdef CONFIG_EXT4_FS_POSIX_ACL 737 if (EXT4_I(inode)->i_acl && 738 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 739 posix_acl_release(EXT4_I(inode)->i_acl); 740 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 741 } 742 if (EXT4_I(inode)->i_default_acl && 743 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 744 posix_acl_release(EXT4_I(inode)->i_default_acl); 745 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 746 } 747 #endif 748 ext4_discard_preallocations(inode); 749 if (EXT4_JOURNAL(inode)) 750 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 751 &EXT4_I(inode)->jinode); 752 } 753 754 static inline void ext4_show_quota_options(struct seq_file *seq, 755 struct super_block *sb) 756 { 757 #if defined(CONFIG_QUOTA) 758 struct ext4_sb_info *sbi = EXT4_SB(sb); 759 760 if (sbi->s_jquota_fmt) 761 seq_printf(seq, ",jqfmt=%s", 762 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 763 764 if (sbi->s_qf_names[USRQUOTA]) 765 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 766 767 if (sbi->s_qf_names[GRPQUOTA]) 768 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 769 770 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 771 seq_puts(seq, ",usrquota"); 772 773 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 774 seq_puts(seq, ",grpquota"); 775 #endif 776 } 777 778 /* 779 * Show an option if 780 * - it's set to a non-default value OR 781 * - if the per-sb default is different from the global default 782 */ 783 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 784 { 785 int def_errors; 786 unsigned long def_mount_opts; 787 struct super_block *sb = vfs->mnt_sb; 788 struct ext4_sb_info *sbi = EXT4_SB(sb); 789 struct ext4_super_block *es = sbi->s_es; 790 791 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 792 def_errors = le16_to_cpu(es->s_errors); 793 794 if (sbi->s_sb_block != 1) 795 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 796 if (test_opt(sb, MINIX_DF)) 797 seq_puts(seq, ",minixdf"); 798 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 799 seq_puts(seq, ",grpid"); 800 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 801 seq_puts(seq, ",nogrpid"); 802 if (sbi->s_resuid != EXT4_DEF_RESUID || 803 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 804 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 805 } 806 if (sbi->s_resgid != EXT4_DEF_RESGID || 807 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 808 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 809 } 810 if (test_opt(sb, ERRORS_RO)) { 811 if (def_errors == EXT4_ERRORS_PANIC || 812 def_errors == EXT4_ERRORS_CONTINUE) { 813 seq_puts(seq, ",errors=remount-ro"); 814 } 815 } 816 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 817 seq_puts(seq, ",errors=continue"); 818 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 819 seq_puts(seq, ",errors=panic"); 820 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 821 seq_puts(seq, ",nouid32"); 822 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 823 seq_puts(seq, ",debug"); 824 if (test_opt(sb, OLDALLOC)) 825 seq_puts(seq, ",oldalloc"); 826 #ifdef CONFIG_EXT4_FS_XATTR 827 if (test_opt(sb, XATTR_USER) && 828 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 829 seq_puts(seq, ",user_xattr"); 830 if (!test_opt(sb, XATTR_USER) && 831 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 832 seq_puts(seq, ",nouser_xattr"); 833 } 834 #endif 835 #ifdef CONFIG_EXT4_FS_POSIX_ACL 836 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 837 seq_puts(seq, ",acl"); 838 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 839 seq_puts(seq, ",noacl"); 840 #endif 841 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 842 seq_printf(seq, ",commit=%u", 843 (unsigned) (sbi->s_commit_interval / HZ)); 844 } 845 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 846 seq_printf(seq, ",min_batch_time=%u", 847 (unsigned) sbi->s_min_batch_time); 848 } 849 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 850 seq_printf(seq, ",max_batch_time=%u", 851 (unsigned) sbi->s_min_batch_time); 852 } 853 854 /* 855 * We're changing the default of barrier mount option, so 856 * let's always display its mount state so it's clear what its 857 * status is. 858 */ 859 seq_puts(seq, ",barrier="); 860 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 861 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 862 seq_puts(seq, ",journal_async_commit"); 863 if (test_opt(sb, NOBH)) 864 seq_puts(seq, ",nobh"); 865 if (test_opt(sb, I_VERSION)) 866 seq_puts(seq, ",i_version"); 867 if (!test_opt(sb, DELALLOC)) 868 seq_puts(seq, ",nodelalloc"); 869 870 871 if (sbi->s_stripe) 872 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 873 /* 874 * journal mode get enabled in different ways 875 * So just print the value even if we didn't specify it 876 */ 877 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 878 seq_puts(seq, ",data=journal"); 879 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 880 seq_puts(seq, ",data=ordered"); 881 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 882 seq_puts(seq, ",data=writeback"); 883 884 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 885 seq_printf(seq, ",inode_readahead_blks=%u", 886 sbi->s_inode_readahead_blks); 887 888 if (test_opt(sb, DATA_ERR_ABORT)) 889 seq_puts(seq, ",data_err=abort"); 890 891 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 892 seq_puts(seq, ",noauto_da_alloc"); 893 894 ext4_show_quota_options(seq, sb); 895 896 return 0; 897 } 898 899 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 900 u64 ino, u32 generation) 901 { 902 struct inode *inode; 903 904 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 905 return ERR_PTR(-ESTALE); 906 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 907 return ERR_PTR(-ESTALE); 908 909 /* iget isn't really right if the inode is currently unallocated!! 910 * 911 * ext4_read_inode will return a bad_inode if the inode had been 912 * deleted, so we should be safe. 913 * 914 * Currently we don't know the generation for parent directory, so 915 * a generation of 0 means "accept any" 916 */ 917 inode = ext4_iget(sb, ino); 918 if (IS_ERR(inode)) 919 return ERR_CAST(inode); 920 if (generation && inode->i_generation != generation) { 921 iput(inode); 922 return ERR_PTR(-ESTALE); 923 } 924 925 return inode; 926 } 927 928 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 929 int fh_len, int fh_type) 930 { 931 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 932 ext4_nfs_get_inode); 933 } 934 935 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 936 int fh_len, int fh_type) 937 { 938 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 939 ext4_nfs_get_inode); 940 } 941 942 /* 943 * Try to release metadata pages (indirect blocks, directories) which are 944 * mapped via the block device. Since these pages could have journal heads 945 * which would prevent try_to_free_buffers() from freeing them, we must use 946 * jbd2 layer's try_to_free_buffers() function to release them. 947 */ 948 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 949 gfp_t wait) 950 { 951 journal_t *journal = EXT4_SB(sb)->s_journal; 952 953 WARN_ON(PageChecked(page)); 954 if (!page_has_buffers(page)) 955 return 0; 956 if (journal) 957 return jbd2_journal_try_to_free_buffers(journal, page, 958 wait & ~__GFP_WAIT); 959 return try_to_free_buffers(page); 960 } 961 962 #ifdef CONFIG_QUOTA 963 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 964 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 965 966 static int ext4_write_dquot(struct dquot *dquot); 967 static int ext4_acquire_dquot(struct dquot *dquot); 968 static int ext4_release_dquot(struct dquot *dquot); 969 static int ext4_mark_dquot_dirty(struct dquot *dquot); 970 static int ext4_write_info(struct super_block *sb, int type); 971 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 972 char *path, int remount); 973 static int ext4_quota_on_mount(struct super_block *sb, int type); 974 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 975 size_t len, loff_t off); 976 static ssize_t ext4_quota_write(struct super_block *sb, int type, 977 const char *data, size_t len, loff_t off); 978 979 static struct dquot_operations ext4_quota_operations = { 980 .initialize = dquot_initialize, 981 .drop = dquot_drop, 982 .alloc_space = dquot_alloc_space, 983 .reserve_space = dquot_reserve_space, 984 .claim_space = dquot_claim_space, 985 .release_rsv = dquot_release_reserved_space, 986 .get_reserved_space = ext4_get_reserved_space, 987 .alloc_inode = dquot_alloc_inode, 988 .free_space = dquot_free_space, 989 .free_inode = dquot_free_inode, 990 .transfer = dquot_transfer, 991 .write_dquot = ext4_write_dquot, 992 .acquire_dquot = ext4_acquire_dquot, 993 .release_dquot = ext4_release_dquot, 994 .mark_dirty = ext4_mark_dquot_dirty, 995 .write_info = ext4_write_info, 996 .alloc_dquot = dquot_alloc, 997 .destroy_dquot = dquot_destroy, 998 }; 999 1000 static struct quotactl_ops ext4_qctl_operations = { 1001 .quota_on = ext4_quota_on, 1002 .quota_off = vfs_quota_off, 1003 .quota_sync = vfs_quota_sync, 1004 .get_info = vfs_get_dqinfo, 1005 .set_info = vfs_set_dqinfo, 1006 .get_dqblk = vfs_get_dqblk, 1007 .set_dqblk = vfs_set_dqblk 1008 }; 1009 #endif 1010 1011 static const struct super_operations ext4_sops = { 1012 .alloc_inode = ext4_alloc_inode, 1013 .destroy_inode = ext4_destroy_inode, 1014 .write_inode = ext4_write_inode, 1015 .dirty_inode = ext4_dirty_inode, 1016 .delete_inode = ext4_delete_inode, 1017 .put_super = ext4_put_super, 1018 .sync_fs = ext4_sync_fs, 1019 .freeze_fs = ext4_freeze, 1020 .unfreeze_fs = ext4_unfreeze, 1021 .statfs = ext4_statfs, 1022 .remount_fs = ext4_remount, 1023 .clear_inode = ext4_clear_inode, 1024 .show_options = ext4_show_options, 1025 #ifdef CONFIG_QUOTA 1026 .quota_read = ext4_quota_read, 1027 .quota_write = ext4_quota_write, 1028 #endif 1029 .bdev_try_to_free_page = bdev_try_to_free_page, 1030 }; 1031 1032 static const struct super_operations ext4_nojournal_sops = { 1033 .alloc_inode = ext4_alloc_inode, 1034 .destroy_inode = ext4_destroy_inode, 1035 .write_inode = ext4_write_inode, 1036 .dirty_inode = ext4_dirty_inode, 1037 .delete_inode = ext4_delete_inode, 1038 .write_super = ext4_write_super, 1039 .put_super = ext4_put_super, 1040 .statfs = ext4_statfs, 1041 .remount_fs = ext4_remount, 1042 .clear_inode = ext4_clear_inode, 1043 .show_options = ext4_show_options, 1044 #ifdef CONFIG_QUOTA 1045 .quota_read = ext4_quota_read, 1046 .quota_write = ext4_quota_write, 1047 #endif 1048 .bdev_try_to_free_page = bdev_try_to_free_page, 1049 }; 1050 1051 static const struct export_operations ext4_export_ops = { 1052 .fh_to_dentry = ext4_fh_to_dentry, 1053 .fh_to_parent = ext4_fh_to_parent, 1054 .get_parent = ext4_get_parent, 1055 }; 1056 1057 enum { 1058 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1059 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1060 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1061 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1062 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1063 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1064 Opt_journal_update, Opt_journal_dev, 1065 Opt_journal_checksum, Opt_journal_async_commit, 1066 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1067 Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, 1068 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1069 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1070 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1071 Opt_usrquota, Opt_grpquota, Opt_i_version, 1072 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1073 Opt_block_validity, Opt_noblock_validity, 1074 Opt_inode_readahead_blks, Opt_journal_ioprio 1075 }; 1076 1077 static const match_table_t tokens = { 1078 {Opt_bsd_df, "bsddf"}, 1079 {Opt_minix_df, "minixdf"}, 1080 {Opt_grpid, "grpid"}, 1081 {Opt_grpid, "bsdgroups"}, 1082 {Opt_nogrpid, "nogrpid"}, 1083 {Opt_nogrpid, "sysvgroups"}, 1084 {Opt_resgid, "resgid=%u"}, 1085 {Opt_resuid, "resuid=%u"}, 1086 {Opt_sb, "sb=%u"}, 1087 {Opt_err_cont, "errors=continue"}, 1088 {Opt_err_panic, "errors=panic"}, 1089 {Opt_err_ro, "errors=remount-ro"}, 1090 {Opt_nouid32, "nouid32"}, 1091 {Opt_debug, "debug"}, 1092 {Opt_oldalloc, "oldalloc"}, 1093 {Opt_orlov, "orlov"}, 1094 {Opt_user_xattr, "user_xattr"}, 1095 {Opt_nouser_xattr, "nouser_xattr"}, 1096 {Opt_acl, "acl"}, 1097 {Opt_noacl, "noacl"}, 1098 {Opt_noload, "noload"}, 1099 {Opt_nobh, "nobh"}, 1100 {Opt_bh, "bh"}, 1101 {Opt_commit, "commit=%u"}, 1102 {Opt_min_batch_time, "min_batch_time=%u"}, 1103 {Opt_max_batch_time, "max_batch_time=%u"}, 1104 {Opt_journal_update, "journal=update"}, 1105 {Opt_journal_dev, "journal_dev=%u"}, 1106 {Opt_journal_checksum, "journal_checksum"}, 1107 {Opt_journal_async_commit, "journal_async_commit"}, 1108 {Opt_abort, "abort"}, 1109 {Opt_data_journal, "data=journal"}, 1110 {Opt_data_ordered, "data=ordered"}, 1111 {Opt_data_writeback, "data=writeback"}, 1112 {Opt_data_err_abort, "data_err=abort"}, 1113 {Opt_data_err_ignore, "data_err=ignore"}, 1114 {Opt_mb_history_length, "mb_history_length=%u"}, 1115 {Opt_offusrjquota, "usrjquota="}, 1116 {Opt_usrjquota, "usrjquota=%s"}, 1117 {Opt_offgrpjquota, "grpjquota="}, 1118 {Opt_grpjquota, "grpjquota=%s"}, 1119 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1120 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1121 {Opt_grpquota, "grpquota"}, 1122 {Opt_noquota, "noquota"}, 1123 {Opt_quota, "quota"}, 1124 {Opt_usrquota, "usrquota"}, 1125 {Opt_barrier, "barrier=%u"}, 1126 {Opt_barrier, "barrier"}, 1127 {Opt_nobarrier, "nobarrier"}, 1128 {Opt_i_version, "i_version"}, 1129 {Opt_stripe, "stripe=%u"}, 1130 {Opt_resize, "resize"}, 1131 {Opt_delalloc, "delalloc"}, 1132 {Opt_nodelalloc, "nodelalloc"}, 1133 {Opt_block_validity, "block_validity"}, 1134 {Opt_noblock_validity, "noblock_validity"}, 1135 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1136 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1137 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1138 {Opt_auto_da_alloc, "auto_da_alloc"}, 1139 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1140 {Opt_err, NULL}, 1141 }; 1142 1143 static ext4_fsblk_t get_sb_block(void **data) 1144 { 1145 ext4_fsblk_t sb_block; 1146 char *options = (char *) *data; 1147 1148 if (!options || strncmp(options, "sb=", 3) != 0) 1149 return 1; /* Default location */ 1150 1151 options += 3; 1152 /* TODO: use simple_strtoll with >32bit ext4 */ 1153 sb_block = simple_strtoul(options, &options, 0); 1154 if (*options && *options != ',') { 1155 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1156 (char *) *data); 1157 return 1; 1158 } 1159 if (*options == ',') 1160 options++; 1161 *data = (void *) options; 1162 1163 return sb_block; 1164 } 1165 1166 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1167 1168 static int parse_options(char *options, struct super_block *sb, 1169 unsigned long *journal_devnum, 1170 unsigned int *journal_ioprio, 1171 ext4_fsblk_t *n_blocks_count, int is_remount) 1172 { 1173 struct ext4_sb_info *sbi = EXT4_SB(sb); 1174 char *p; 1175 substring_t args[MAX_OPT_ARGS]; 1176 int data_opt = 0; 1177 int option; 1178 #ifdef CONFIG_QUOTA 1179 int qtype, qfmt; 1180 char *qname; 1181 #endif 1182 1183 if (!options) 1184 return 1; 1185 1186 while ((p = strsep(&options, ",")) != NULL) { 1187 int token; 1188 if (!*p) 1189 continue; 1190 1191 token = match_token(p, tokens, args); 1192 switch (token) { 1193 case Opt_bsd_df: 1194 clear_opt(sbi->s_mount_opt, MINIX_DF); 1195 break; 1196 case Opt_minix_df: 1197 set_opt(sbi->s_mount_opt, MINIX_DF); 1198 break; 1199 case Opt_grpid: 1200 set_opt(sbi->s_mount_opt, GRPID); 1201 break; 1202 case Opt_nogrpid: 1203 clear_opt(sbi->s_mount_opt, GRPID); 1204 break; 1205 case Opt_resuid: 1206 if (match_int(&args[0], &option)) 1207 return 0; 1208 sbi->s_resuid = option; 1209 break; 1210 case Opt_resgid: 1211 if (match_int(&args[0], &option)) 1212 return 0; 1213 sbi->s_resgid = option; 1214 break; 1215 case Opt_sb: 1216 /* handled by get_sb_block() instead of here */ 1217 /* *sb_block = match_int(&args[0]); */ 1218 break; 1219 case Opt_err_panic: 1220 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1221 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1222 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1223 break; 1224 case Opt_err_ro: 1225 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1226 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1227 set_opt(sbi->s_mount_opt, ERRORS_RO); 1228 break; 1229 case Opt_err_cont: 1230 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1231 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1232 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1233 break; 1234 case Opt_nouid32: 1235 set_opt(sbi->s_mount_opt, NO_UID32); 1236 break; 1237 case Opt_debug: 1238 set_opt(sbi->s_mount_opt, DEBUG); 1239 break; 1240 case Opt_oldalloc: 1241 set_opt(sbi->s_mount_opt, OLDALLOC); 1242 break; 1243 case Opt_orlov: 1244 clear_opt(sbi->s_mount_opt, OLDALLOC); 1245 break; 1246 #ifdef CONFIG_EXT4_FS_XATTR 1247 case Opt_user_xattr: 1248 set_opt(sbi->s_mount_opt, XATTR_USER); 1249 break; 1250 case Opt_nouser_xattr: 1251 clear_opt(sbi->s_mount_opt, XATTR_USER); 1252 break; 1253 #else 1254 case Opt_user_xattr: 1255 case Opt_nouser_xattr: 1256 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1257 break; 1258 #endif 1259 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1260 case Opt_acl: 1261 set_opt(sbi->s_mount_opt, POSIX_ACL); 1262 break; 1263 case Opt_noacl: 1264 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1265 break; 1266 #else 1267 case Opt_acl: 1268 case Opt_noacl: 1269 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1270 break; 1271 #endif 1272 case Opt_journal_update: 1273 /* @@@ FIXME */ 1274 /* Eventually we will want to be able to create 1275 a journal file here. For now, only allow the 1276 user to specify an existing inode to be the 1277 journal file. */ 1278 if (is_remount) { 1279 ext4_msg(sb, KERN_ERR, 1280 "Cannot specify journal on remount"); 1281 return 0; 1282 } 1283 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1284 break; 1285 case Opt_journal_dev: 1286 if (is_remount) { 1287 ext4_msg(sb, KERN_ERR, 1288 "Cannot specify journal on remount"); 1289 return 0; 1290 } 1291 if (match_int(&args[0], &option)) 1292 return 0; 1293 *journal_devnum = option; 1294 break; 1295 case Opt_journal_checksum: 1296 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1297 break; 1298 case Opt_journal_async_commit: 1299 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1300 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1301 break; 1302 case Opt_noload: 1303 set_opt(sbi->s_mount_opt, NOLOAD); 1304 break; 1305 case Opt_commit: 1306 if (match_int(&args[0], &option)) 1307 return 0; 1308 if (option < 0) 1309 return 0; 1310 if (option == 0) 1311 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1312 sbi->s_commit_interval = HZ * option; 1313 break; 1314 case Opt_max_batch_time: 1315 if (match_int(&args[0], &option)) 1316 return 0; 1317 if (option < 0) 1318 return 0; 1319 if (option == 0) 1320 option = EXT4_DEF_MAX_BATCH_TIME; 1321 sbi->s_max_batch_time = option; 1322 break; 1323 case Opt_min_batch_time: 1324 if (match_int(&args[0], &option)) 1325 return 0; 1326 if (option < 0) 1327 return 0; 1328 sbi->s_min_batch_time = option; 1329 break; 1330 case Opt_data_journal: 1331 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1332 goto datacheck; 1333 case Opt_data_ordered: 1334 data_opt = EXT4_MOUNT_ORDERED_DATA; 1335 goto datacheck; 1336 case Opt_data_writeback: 1337 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1338 datacheck: 1339 if (is_remount) { 1340 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1341 != data_opt) { 1342 ext4_msg(sb, KERN_ERR, 1343 "Cannot change data mode on remount"); 1344 return 0; 1345 } 1346 } else { 1347 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1348 sbi->s_mount_opt |= data_opt; 1349 } 1350 break; 1351 case Opt_data_err_abort: 1352 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1353 break; 1354 case Opt_data_err_ignore: 1355 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1356 break; 1357 case Opt_mb_history_length: 1358 if (match_int(&args[0], &option)) 1359 return 0; 1360 if (option < 0) 1361 return 0; 1362 sbi->s_mb_history_max = option; 1363 break; 1364 #ifdef CONFIG_QUOTA 1365 case Opt_usrjquota: 1366 qtype = USRQUOTA; 1367 goto set_qf_name; 1368 case Opt_grpjquota: 1369 qtype = GRPQUOTA; 1370 set_qf_name: 1371 if (sb_any_quota_loaded(sb) && 1372 !sbi->s_qf_names[qtype]) { 1373 ext4_msg(sb, KERN_ERR, 1374 "Cannot change journaled " 1375 "quota options when quota turned on"); 1376 return 0; 1377 } 1378 qname = match_strdup(&args[0]); 1379 if (!qname) { 1380 ext4_msg(sb, KERN_ERR, 1381 "Not enough memory for " 1382 "storing quotafile name"); 1383 return 0; 1384 } 1385 if (sbi->s_qf_names[qtype] && 1386 strcmp(sbi->s_qf_names[qtype], qname)) { 1387 ext4_msg(sb, KERN_ERR, 1388 "%s quota file already " 1389 "specified", QTYPE2NAME(qtype)); 1390 kfree(qname); 1391 return 0; 1392 } 1393 sbi->s_qf_names[qtype] = qname; 1394 if (strchr(sbi->s_qf_names[qtype], '/')) { 1395 ext4_msg(sb, KERN_ERR, 1396 "quotafile must be on " 1397 "filesystem root"); 1398 kfree(sbi->s_qf_names[qtype]); 1399 sbi->s_qf_names[qtype] = NULL; 1400 return 0; 1401 } 1402 set_opt(sbi->s_mount_opt, QUOTA); 1403 break; 1404 case Opt_offusrjquota: 1405 qtype = USRQUOTA; 1406 goto clear_qf_name; 1407 case Opt_offgrpjquota: 1408 qtype = GRPQUOTA; 1409 clear_qf_name: 1410 if (sb_any_quota_loaded(sb) && 1411 sbi->s_qf_names[qtype]) { 1412 ext4_msg(sb, KERN_ERR, "Cannot change " 1413 "journaled quota options when " 1414 "quota turned on"); 1415 return 0; 1416 } 1417 /* 1418 * The space will be released later when all options 1419 * are confirmed to be correct 1420 */ 1421 sbi->s_qf_names[qtype] = NULL; 1422 break; 1423 case Opt_jqfmt_vfsold: 1424 qfmt = QFMT_VFS_OLD; 1425 goto set_qf_format; 1426 case Opt_jqfmt_vfsv0: 1427 qfmt = QFMT_VFS_V0; 1428 set_qf_format: 1429 if (sb_any_quota_loaded(sb) && 1430 sbi->s_jquota_fmt != qfmt) { 1431 ext4_msg(sb, KERN_ERR, "Cannot change " 1432 "journaled quota options when " 1433 "quota turned on"); 1434 return 0; 1435 } 1436 sbi->s_jquota_fmt = qfmt; 1437 break; 1438 case Opt_quota: 1439 case Opt_usrquota: 1440 set_opt(sbi->s_mount_opt, QUOTA); 1441 set_opt(sbi->s_mount_opt, USRQUOTA); 1442 break; 1443 case Opt_grpquota: 1444 set_opt(sbi->s_mount_opt, QUOTA); 1445 set_opt(sbi->s_mount_opt, GRPQUOTA); 1446 break; 1447 case Opt_noquota: 1448 if (sb_any_quota_loaded(sb)) { 1449 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1450 "options when quota turned on"); 1451 return 0; 1452 } 1453 clear_opt(sbi->s_mount_opt, QUOTA); 1454 clear_opt(sbi->s_mount_opt, USRQUOTA); 1455 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1456 break; 1457 #else 1458 case Opt_quota: 1459 case Opt_usrquota: 1460 case Opt_grpquota: 1461 ext4_msg(sb, KERN_ERR, 1462 "quota options not supported"); 1463 break; 1464 case Opt_usrjquota: 1465 case Opt_grpjquota: 1466 case Opt_offusrjquota: 1467 case Opt_offgrpjquota: 1468 case Opt_jqfmt_vfsold: 1469 case Opt_jqfmt_vfsv0: 1470 ext4_msg(sb, KERN_ERR, 1471 "journaled quota options not supported"); 1472 break; 1473 case Opt_noquota: 1474 break; 1475 #endif 1476 case Opt_abort: 1477 set_opt(sbi->s_mount_opt, ABORT); 1478 break; 1479 case Opt_nobarrier: 1480 clear_opt(sbi->s_mount_opt, BARRIER); 1481 break; 1482 case Opt_barrier: 1483 if (match_int(&args[0], &option)) { 1484 set_opt(sbi->s_mount_opt, BARRIER); 1485 break; 1486 } 1487 if (option) 1488 set_opt(sbi->s_mount_opt, BARRIER); 1489 else 1490 clear_opt(sbi->s_mount_opt, BARRIER); 1491 break; 1492 case Opt_ignore: 1493 break; 1494 case Opt_resize: 1495 if (!is_remount) { 1496 ext4_msg(sb, KERN_ERR, 1497 "resize option only available " 1498 "for remount"); 1499 return 0; 1500 } 1501 if (match_int(&args[0], &option) != 0) 1502 return 0; 1503 *n_blocks_count = option; 1504 break; 1505 case Opt_nobh: 1506 set_opt(sbi->s_mount_opt, NOBH); 1507 break; 1508 case Opt_bh: 1509 clear_opt(sbi->s_mount_opt, NOBH); 1510 break; 1511 case Opt_i_version: 1512 set_opt(sbi->s_mount_opt, I_VERSION); 1513 sb->s_flags |= MS_I_VERSION; 1514 break; 1515 case Opt_nodelalloc: 1516 clear_opt(sbi->s_mount_opt, DELALLOC); 1517 break; 1518 case Opt_stripe: 1519 if (match_int(&args[0], &option)) 1520 return 0; 1521 if (option < 0) 1522 return 0; 1523 sbi->s_stripe = option; 1524 break; 1525 case Opt_delalloc: 1526 set_opt(sbi->s_mount_opt, DELALLOC); 1527 break; 1528 case Opt_block_validity: 1529 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1530 break; 1531 case Opt_noblock_validity: 1532 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1533 break; 1534 case Opt_inode_readahead_blks: 1535 if (match_int(&args[0], &option)) 1536 return 0; 1537 if (option < 0 || option > (1 << 30)) 1538 return 0; 1539 if (!is_power_of_2(option)) { 1540 ext4_msg(sb, KERN_ERR, 1541 "EXT4-fs: inode_readahead_blks" 1542 " must be a power of 2"); 1543 return 0; 1544 } 1545 sbi->s_inode_readahead_blks = option; 1546 break; 1547 case Opt_journal_ioprio: 1548 if (match_int(&args[0], &option)) 1549 return 0; 1550 if (option < 0 || option > 7) 1551 break; 1552 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1553 option); 1554 break; 1555 case Opt_noauto_da_alloc: 1556 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1557 break; 1558 case Opt_auto_da_alloc: 1559 if (match_int(&args[0], &option)) { 1560 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1561 break; 1562 } 1563 if (option) 1564 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1565 else 1566 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1567 break; 1568 default: 1569 ext4_msg(sb, KERN_ERR, 1570 "Unrecognized mount option \"%s\" " 1571 "or missing value", p); 1572 return 0; 1573 } 1574 } 1575 #ifdef CONFIG_QUOTA 1576 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1577 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1578 sbi->s_qf_names[USRQUOTA]) 1579 clear_opt(sbi->s_mount_opt, USRQUOTA); 1580 1581 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1582 sbi->s_qf_names[GRPQUOTA]) 1583 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1584 1585 if ((sbi->s_qf_names[USRQUOTA] && 1586 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1587 (sbi->s_qf_names[GRPQUOTA] && 1588 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1589 ext4_msg(sb, KERN_ERR, "old and new quota " 1590 "format mixing"); 1591 return 0; 1592 } 1593 1594 if (!sbi->s_jquota_fmt) { 1595 ext4_msg(sb, KERN_ERR, "journaled quota format " 1596 "not specified"); 1597 return 0; 1598 } 1599 } else { 1600 if (sbi->s_jquota_fmt) { 1601 ext4_msg(sb, KERN_ERR, "journaled quota format " 1602 "specified with no journaling " 1603 "enabled"); 1604 return 0; 1605 } 1606 } 1607 #endif 1608 return 1; 1609 } 1610 1611 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1612 int read_only) 1613 { 1614 struct ext4_sb_info *sbi = EXT4_SB(sb); 1615 int res = 0; 1616 1617 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1618 ext4_msg(sb, KERN_ERR, "revision level too high, " 1619 "forcing read-only mode"); 1620 res = MS_RDONLY; 1621 } 1622 if (read_only) 1623 return res; 1624 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1625 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1626 "running e2fsck is recommended"); 1627 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1628 ext4_msg(sb, KERN_WARNING, 1629 "warning: mounting fs with errors, " 1630 "running e2fsck is recommended"); 1631 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1632 le16_to_cpu(es->s_mnt_count) >= 1633 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1634 ext4_msg(sb, KERN_WARNING, 1635 "warning: maximal mount count reached, " 1636 "running e2fsck is recommended"); 1637 else if (le32_to_cpu(es->s_checkinterval) && 1638 (le32_to_cpu(es->s_lastcheck) + 1639 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1640 ext4_msg(sb, KERN_WARNING, 1641 "warning: checktime reached, " 1642 "running e2fsck is recommended"); 1643 if (!sbi->s_journal) 1644 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1645 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1646 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1647 le16_add_cpu(&es->s_mnt_count, 1); 1648 es->s_mtime = cpu_to_le32(get_seconds()); 1649 ext4_update_dynamic_rev(sb); 1650 if (sbi->s_journal) 1651 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1652 1653 ext4_commit_super(sb, 1); 1654 if (test_opt(sb, DEBUG)) 1655 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1656 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1657 sb->s_blocksize, 1658 sbi->s_groups_count, 1659 EXT4_BLOCKS_PER_GROUP(sb), 1660 EXT4_INODES_PER_GROUP(sb), 1661 sbi->s_mount_opt); 1662 1663 if (EXT4_SB(sb)->s_journal) { 1664 ext4_msg(sb, KERN_INFO, "%s journal on %s", 1665 EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1666 "external", EXT4_SB(sb)->s_journal->j_devname); 1667 } else { 1668 ext4_msg(sb, KERN_INFO, "no journal"); 1669 } 1670 return res; 1671 } 1672 1673 static int ext4_fill_flex_info(struct super_block *sb) 1674 { 1675 struct ext4_sb_info *sbi = EXT4_SB(sb); 1676 struct ext4_group_desc *gdp = NULL; 1677 ext4_group_t flex_group_count; 1678 ext4_group_t flex_group; 1679 int groups_per_flex = 0; 1680 size_t size; 1681 int i; 1682 1683 if (!sbi->s_es->s_log_groups_per_flex) { 1684 sbi->s_log_groups_per_flex = 0; 1685 return 1; 1686 } 1687 1688 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1689 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1690 1691 /* We allocate both existing and potentially added groups */ 1692 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1693 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1694 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1695 size = flex_group_count * sizeof(struct flex_groups); 1696 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1697 if (sbi->s_flex_groups == NULL) { 1698 sbi->s_flex_groups = vmalloc(size); 1699 if (sbi->s_flex_groups) 1700 memset(sbi->s_flex_groups, 0, size); 1701 } 1702 if (sbi->s_flex_groups == NULL) { 1703 ext4_msg(sb, KERN_ERR, "not enough memory for " 1704 "%u flex groups", flex_group_count); 1705 goto failed; 1706 } 1707 1708 for (i = 0; i < sbi->s_groups_count; i++) { 1709 gdp = ext4_get_group_desc(sb, i, NULL); 1710 1711 flex_group = ext4_flex_group(sbi, i); 1712 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, 1713 ext4_free_inodes_count(sb, gdp)); 1714 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, 1715 ext4_free_blks_count(sb, gdp)); 1716 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, 1717 ext4_used_dirs_count(sb, gdp)); 1718 } 1719 1720 return 1; 1721 failed: 1722 return 0; 1723 } 1724 1725 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1726 struct ext4_group_desc *gdp) 1727 { 1728 __u16 crc = 0; 1729 1730 if (sbi->s_es->s_feature_ro_compat & 1731 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1732 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1733 __le32 le_group = cpu_to_le32(block_group); 1734 1735 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1736 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1737 crc = crc16(crc, (__u8 *)gdp, offset); 1738 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1739 /* for checksum of struct ext4_group_desc do the rest...*/ 1740 if ((sbi->s_es->s_feature_incompat & 1741 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1742 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1743 crc = crc16(crc, (__u8 *)gdp + offset, 1744 le16_to_cpu(sbi->s_es->s_desc_size) - 1745 offset); 1746 } 1747 1748 return cpu_to_le16(crc); 1749 } 1750 1751 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1752 struct ext4_group_desc *gdp) 1753 { 1754 if ((sbi->s_es->s_feature_ro_compat & 1755 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1756 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1757 return 0; 1758 1759 return 1; 1760 } 1761 1762 /* Called at mount-time, super-block is locked */ 1763 static int ext4_check_descriptors(struct super_block *sb) 1764 { 1765 struct ext4_sb_info *sbi = EXT4_SB(sb); 1766 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1767 ext4_fsblk_t last_block; 1768 ext4_fsblk_t block_bitmap; 1769 ext4_fsblk_t inode_bitmap; 1770 ext4_fsblk_t inode_table; 1771 int flexbg_flag = 0; 1772 ext4_group_t i; 1773 1774 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1775 flexbg_flag = 1; 1776 1777 ext4_debug("Checking group descriptors"); 1778 1779 for (i = 0; i < sbi->s_groups_count; i++) { 1780 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1781 1782 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1783 last_block = ext4_blocks_count(sbi->s_es) - 1; 1784 else 1785 last_block = first_block + 1786 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1787 1788 block_bitmap = ext4_block_bitmap(sb, gdp); 1789 if (block_bitmap < first_block || block_bitmap > last_block) { 1790 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1791 "Block bitmap for group %u not in group " 1792 "(block %llu)!", i, block_bitmap); 1793 return 0; 1794 } 1795 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1796 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1797 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1798 "Inode bitmap for group %u not in group " 1799 "(block %llu)!", i, inode_bitmap); 1800 return 0; 1801 } 1802 inode_table = ext4_inode_table(sb, gdp); 1803 if (inode_table < first_block || 1804 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1805 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1806 "Inode table for group %u not in group " 1807 "(block %llu)!", i, inode_table); 1808 return 0; 1809 } 1810 ext4_lock_group(sb, i); 1811 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1812 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1813 "Checksum for group %u failed (%u!=%u)", 1814 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1815 gdp)), le16_to_cpu(gdp->bg_checksum)); 1816 if (!(sb->s_flags & MS_RDONLY)) { 1817 ext4_unlock_group(sb, i); 1818 return 0; 1819 } 1820 } 1821 ext4_unlock_group(sb, i); 1822 if (!flexbg_flag) 1823 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1824 } 1825 1826 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1827 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 1828 return 1; 1829 } 1830 1831 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1832 * the superblock) which were deleted from all directories, but held open by 1833 * a process at the time of a crash. We walk the list and try to delete these 1834 * inodes at recovery time (only with a read-write filesystem). 1835 * 1836 * In order to keep the orphan inode chain consistent during traversal (in 1837 * case of crash during recovery), we link each inode into the superblock 1838 * orphan list_head and handle it the same way as an inode deletion during 1839 * normal operation (which journals the operations for us). 1840 * 1841 * We only do an iget() and an iput() on each inode, which is very safe if we 1842 * accidentally point at an in-use or already deleted inode. The worst that 1843 * can happen in this case is that we get a "bit already cleared" message from 1844 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1845 * e2fsck was run on this filesystem, and it must have already done the orphan 1846 * inode cleanup for us, so we can safely abort without any further action. 1847 */ 1848 static void ext4_orphan_cleanup(struct super_block *sb, 1849 struct ext4_super_block *es) 1850 { 1851 unsigned int s_flags = sb->s_flags; 1852 int nr_orphans = 0, nr_truncates = 0; 1853 #ifdef CONFIG_QUOTA 1854 int i; 1855 #endif 1856 if (!es->s_last_orphan) { 1857 jbd_debug(4, "no orphan inodes to clean up\n"); 1858 return; 1859 } 1860 1861 if (bdev_read_only(sb->s_bdev)) { 1862 ext4_msg(sb, KERN_ERR, "write access " 1863 "unavailable, skipping orphan cleanup"); 1864 return; 1865 } 1866 1867 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1868 if (es->s_last_orphan) 1869 jbd_debug(1, "Errors on filesystem, " 1870 "clearing orphan list.\n"); 1871 es->s_last_orphan = 0; 1872 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1873 return; 1874 } 1875 1876 if (s_flags & MS_RDONLY) { 1877 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1878 sb->s_flags &= ~MS_RDONLY; 1879 } 1880 #ifdef CONFIG_QUOTA 1881 /* Needed for iput() to work correctly and not trash data */ 1882 sb->s_flags |= MS_ACTIVE; 1883 /* Turn on quotas so that they are updated correctly */ 1884 for (i = 0; i < MAXQUOTAS; i++) { 1885 if (EXT4_SB(sb)->s_qf_names[i]) { 1886 int ret = ext4_quota_on_mount(sb, i); 1887 if (ret < 0) 1888 ext4_msg(sb, KERN_ERR, 1889 "Cannot turn on journaled " 1890 "quota: error %d", ret); 1891 } 1892 } 1893 #endif 1894 1895 while (es->s_last_orphan) { 1896 struct inode *inode; 1897 1898 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1899 if (IS_ERR(inode)) { 1900 es->s_last_orphan = 0; 1901 break; 1902 } 1903 1904 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1905 vfs_dq_init(inode); 1906 if (inode->i_nlink) { 1907 ext4_msg(sb, KERN_DEBUG, 1908 "%s: truncating inode %lu to %lld bytes", 1909 __func__, inode->i_ino, inode->i_size); 1910 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1911 inode->i_ino, inode->i_size); 1912 ext4_truncate(inode); 1913 nr_truncates++; 1914 } else { 1915 ext4_msg(sb, KERN_DEBUG, 1916 "%s: deleting unreferenced inode %lu", 1917 __func__, inode->i_ino); 1918 jbd_debug(2, "deleting unreferenced inode %lu\n", 1919 inode->i_ino); 1920 nr_orphans++; 1921 } 1922 iput(inode); /* The delete magic happens here! */ 1923 } 1924 1925 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1926 1927 if (nr_orphans) 1928 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1929 PLURAL(nr_orphans)); 1930 if (nr_truncates) 1931 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1932 PLURAL(nr_truncates)); 1933 #ifdef CONFIG_QUOTA 1934 /* Turn quotas off */ 1935 for (i = 0; i < MAXQUOTAS; i++) { 1936 if (sb_dqopt(sb)->files[i]) 1937 vfs_quota_off(sb, i, 0); 1938 } 1939 #endif 1940 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1941 } 1942 1943 /* 1944 * Maximal extent format file size. 1945 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1946 * extent format containers, within a sector_t, and within i_blocks 1947 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 1948 * so that won't be a limiting factor. 1949 * 1950 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1951 */ 1952 static loff_t ext4_max_size(int blkbits, int has_huge_files) 1953 { 1954 loff_t res; 1955 loff_t upper_limit = MAX_LFS_FILESIZE; 1956 1957 /* small i_blocks in vfs inode? */ 1958 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1959 /* 1960 * CONFIG_LBD is not enabled implies the inode 1961 * i_block represent total blocks in 512 bytes 1962 * 32 == size of vfs inode i_blocks * 8 1963 */ 1964 upper_limit = (1LL << 32) - 1; 1965 1966 /* total blocks in file system block size */ 1967 upper_limit >>= (blkbits - 9); 1968 upper_limit <<= blkbits; 1969 } 1970 1971 /* 32-bit extent-start container, ee_block */ 1972 res = 1LL << 32; 1973 res <<= blkbits; 1974 res -= 1; 1975 1976 /* Sanity check against vm- & vfs- imposed limits */ 1977 if (res > upper_limit) 1978 res = upper_limit; 1979 1980 return res; 1981 } 1982 1983 /* 1984 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 1985 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1986 * We need to be 1 filesystem block less than the 2^48 sector limit. 1987 */ 1988 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 1989 { 1990 loff_t res = EXT4_NDIR_BLOCKS; 1991 int meta_blocks; 1992 loff_t upper_limit; 1993 /* This is calculated to be the largest file size for a dense, block 1994 * mapped file such that the file's total number of 512-byte sectors, 1995 * including data and all indirect blocks, does not exceed (2^48 - 1). 1996 * 1997 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 1998 * number of 512-byte sectors of the file. 1999 */ 2000 2001 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2002 /* 2003 * !has_huge_files or CONFIG_LBD not enabled implies that 2004 * the inode i_block field represents total file blocks in 2005 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2006 */ 2007 upper_limit = (1LL << 32) - 1; 2008 2009 /* total blocks in file system block size */ 2010 upper_limit >>= (bits - 9); 2011 2012 } else { 2013 /* 2014 * We use 48 bit ext4_inode i_blocks 2015 * With EXT4_HUGE_FILE_FL set the i_blocks 2016 * represent total number of blocks in 2017 * file system block size 2018 */ 2019 upper_limit = (1LL << 48) - 1; 2020 2021 } 2022 2023 /* indirect blocks */ 2024 meta_blocks = 1; 2025 /* double indirect blocks */ 2026 meta_blocks += 1 + (1LL << (bits-2)); 2027 /* tripple indirect blocks */ 2028 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2029 2030 upper_limit -= meta_blocks; 2031 upper_limit <<= bits; 2032 2033 res += 1LL << (bits-2); 2034 res += 1LL << (2*(bits-2)); 2035 res += 1LL << (3*(bits-2)); 2036 res <<= bits; 2037 if (res > upper_limit) 2038 res = upper_limit; 2039 2040 if (res > MAX_LFS_FILESIZE) 2041 res = MAX_LFS_FILESIZE; 2042 2043 return res; 2044 } 2045 2046 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2047 ext4_fsblk_t logical_sb_block, int nr) 2048 { 2049 struct ext4_sb_info *sbi = EXT4_SB(sb); 2050 ext4_group_t bg, first_meta_bg; 2051 int has_super = 0; 2052 2053 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2054 2055 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2056 nr < first_meta_bg) 2057 return logical_sb_block + nr + 1; 2058 bg = sbi->s_desc_per_block * nr; 2059 if (ext4_bg_has_super(sb, bg)) 2060 has_super = 1; 2061 2062 return (has_super + ext4_group_first_block_no(sb, bg)); 2063 } 2064 2065 /** 2066 * ext4_get_stripe_size: Get the stripe size. 2067 * @sbi: In memory super block info 2068 * 2069 * If we have specified it via mount option, then 2070 * use the mount option value. If the value specified at mount time is 2071 * greater than the blocks per group use the super block value. 2072 * If the super block value is greater than blocks per group return 0. 2073 * Allocator needs it be less than blocks per group. 2074 * 2075 */ 2076 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2077 { 2078 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2079 unsigned long stripe_width = 2080 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2081 2082 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2083 return sbi->s_stripe; 2084 2085 if (stripe_width <= sbi->s_blocks_per_group) 2086 return stripe_width; 2087 2088 if (stride <= sbi->s_blocks_per_group) 2089 return stride; 2090 2091 return 0; 2092 } 2093 2094 /* sysfs supprt */ 2095 2096 struct ext4_attr { 2097 struct attribute attr; 2098 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2099 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2100 const char *, size_t); 2101 int offset; 2102 }; 2103 2104 static int parse_strtoul(const char *buf, 2105 unsigned long max, unsigned long *value) 2106 { 2107 char *endp; 2108 2109 while (*buf && isspace(*buf)) 2110 buf++; 2111 *value = simple_strtoul(buf, &endp, 0); 2112 while (*endp && isspace(*endp)) 2113 endp++; 2114 if (*endp || *value > max) 2115 return -EINVAL; 2116 2117 return 0; 2118 } 2119 2120 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2121 struct ext4_sb_info *sbi, 2122 char *buf) 2123 { 2124 return snprintf(buf, PAGE_SIZE, "%llu\n", 2125 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2126 } 2127 2128 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2129 struct ext4_sb_info *sbi, char *buf) 2130 { 2131 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2132 2133 return snprintf(buf, PAGE_SIZE, "%lu\n", 2134 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2135 sbi->s_sectors_written_start) >> 1); 2136 } 2137 2138 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2139 struct ext4_sb_info *sbi, char *buf) 2140 { 2141 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2142 2143 return snprintf(buf, PAGE_SIZE, "%llu\n", 2144 sbi->s_kbytes_written + 2145 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2146 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2147 } 2148 2149 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2150 struct ext4_sb_info *sbi, 2151 const char *buf, size_t count) 2152 { 2153 unsigned long t; 2154 2155 if (parse_strtoul(buf, 0x40000000, &t)) 2156 return -EINVAL; 2157 2158 if (!is_power_of_2(t)) 2159 return -EINVAL; 2160 2161 sbi->s_inode_readahead_blks = t; 2162 return count; 2163 } 2164 2165 static ssize_t sbi_ui_show(struct ext4_attr *a, 2166 struct ext4_sb_info *sbi, char *buf) 2167 { 2168 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2169 2170 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2171 } 2172 2173 static ssize_t sbi_ui_store(struct ext4_attr *a, 2174 struct ext4_sb_info *sbi, 2175 const char *buf, size_t count) 2176 { 2177 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2178 unsigned long t; 2179 2180 if (parse_strtoul(buf, 0xffffffff, &t)) 2181 return -EINVAL; 2182 *ui = t; 2183 return count; 2184 } 2185 2186 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2187 static struct ext4_attr ext4_attr_##_name = { \ 2188 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2189 .show = _show, \ 2190 .store = _store, \ 2191 .offset = offsetof(struct ext4_sb_info, _elname), \ 2192 } 2193 #define EXT4_ATTR(name, mode, show, store) \ 2194 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2195 2196 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2197 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2198 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2199 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2200 #define ATTR_LIST(name) &ext4_attr_##name.attr 2201 2202 EXT4_RO_ATTR(delayed_allocation_blocks); 2203 EXT4_RO_ATTR(session_write_kbytes); 2204 EXT4_RO_ATTR(lifetime_write_kbytes); 2205 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2206 inode_readahead_blks_store, s_inode_readahead_blks); 2207 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2208 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2209 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2210 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2211 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2212 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2213 2214 static struct attribute *ext4_attrs[] = { 2215 ATTR_LIST(delayed_allocation_blocks), 2216 ATTR_LIST(session_write_kbytes), 2217 ATTR_LIST(lifetime_write_kbytes), 2218 ATTR_LIST(inode_readahead_blks), 2219 ATTR_LIST(mb_stats), 2220 ATTR_LIST(mb_max_to_scan), 2221 ATTR_LIST(mb_min_to_scan), 2222 ATTR_LIST(mb_order2_req), 2223 ATTR_LIST(mb_stream_req), 2224 ATTR_LIST(mb_group_prealloc), 2225 NULL, 2226 }; 2227 2228 static ssize_t ext4_attr_show(struct kobject *kobj, 2229 struct attribute *attr, char *buf) 2230 { 2231 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2232 s_kobj); 2233 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2234 2235 return a->show ? a->show(a, sbi, buf) : 0; 2236 } 2237 2238 static ssize_t ext4_attr_store(struct kobject *kobj, 2239 struct attribute *attr, 2240 const char *buf, size_t len) 2241 { 2242 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2243 s_kobj); 2244 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2245 2246 return a->store ? a->store(a, sbi, buf, len) : 0; 2247 } 2248 2249 static void ext4_sb_release(struct kobject *kobj) 2250 { 2251 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2252 s_kobj); 2253 complete(&sbi->s_kobj_unregister); 2254 } 2255 2256 2257 static struct sysfs_ops ext4_attr_ops = { 2258 .show = ext4_attr_show, 2259 .store = ext4_attr_store, 2260 }; 2261 2262 static struct kobj_type ext4_ktype = { 2263 .default_attrs = ext4_attrs, 2264 .sysfs_ops = &ext4_attr_ops, 2265 .release = ext4_sb_release, 2266 }; 2267 2268 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2269 __releases(kernel_lock) 2270 __acquires(kernel_lock) 2271 { 2272 struct buffer_head *bh; 2273 struct ext4_super_block *es = NULL; 2274 struct ext4_sb_info *sbi; 2275 ext4_fsblk_t block; 2276 ext4_fsblk_t sb_block = get_sb_block(&data); 2277 ext4_fsblk_t logical_sb_block; 2278 unsigned long offset = 0; 2279 unsigned long journal_devnum = 0; 2280 unsigned long def_mount_opts; 2281 struct inode *root; 2282 char *cp; 2283 const char *descr; 2284 int ret = -EINVAL; 2285 int blocksize; 2286 unsigned int db_count; 2287 unsigned int i; 2288 int needs_recovery, has_huge_files; 2289 int features; 2290 __u64 blocks_count; 2291 int err; 2292 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2293 2294 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2295 if (!sbi) 2296 return -ENOMEM; 2297 2298 sbi->s_blockgroup_lock = 2299 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2300 if (!sbi->s_blockgroup_lock) { 2301 kfree(sbi); 2302 return -ENOMEM; 2303 } 2304 sb->s_fs_info = sbi; 2305 sbi->s_mount_opt = 0; 2306 sbi->s_resuid = EXT4_DEF_RESUID; 2307 sbi->s_resgid = EXT4_DEF_RESGID; 2308 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2309 sbi->s_sb_block = sb_block; 2310 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2311 sectors[1]); 2312 2313 unlock_kernel(); 2314 2315 /* Cleanup superblock name */ 2316 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2317 *cp = '!'; 2318 2319 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2320 if (!blocksize) { 2321 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2322 goto out_fail; 2323 } 2324 2325 /* 2326 * The ext4 superblock will not be buffer aligned for other than 1kB 2327 * block sizes. We need to calculate the offset from buffer start. 2328 */ 2329 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2330 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2331 offset = do_div(logical_sb_block, blocksize); 2332 } else { 2333 logical_sb_block = sb_block; 2334 } 2335 2336 if (!(bh = sb_bread(sb, logical_sb_block))) { 2337 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 2338 goto out_fail; 2339 } 2340 /* 2341 * Note: s_es must be initialized as soon as possible because 2342 * some ext4 macro-instructions depend on its value 2343 */ 2344 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2345 sbi->s_es = es; 2346 sb->s_magic = le16_to_cpu(es->s_magic); 2347 if (sb->s_magic != EXT4_SUPER_MAGIC) 2348 goto cantfind_ext4; 2349 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 2350 2351 /* Set defaults before we parse the mount options */ 2352 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2353 if (def_mount_opts & EXT4_DEFM_DEBUG) 2354 set_opt(sbi->s_mount_opt, DEBUG); 2355 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 2356 set_opt(sbi->s_mount_opt, GRPID); 2357 if (def_mount_opts & EXT4_DEFM_UID16) 2358 set_opt(sbi->s_mount_opt, NO_UID32); 2359 #ifdef CONFIG_EXT4_FS_XATTR 2360 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2361 set_opt(sbi->s_mount_opt, XATTR_USER); 2362 #endif 2363 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2364 if (def_mount_opts & EXT4_DEFM_ACL) 2365 set_opt(sbi->s_mount_opt, POSIX_ACL); 2366 #endif 2367 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2368 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 2369 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2370 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 2371 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2372 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 2373 2374 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2375 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2376 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2377 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2378 else 2379 set_opt(sbi->s_mount_opt, ERRORS_RO); 2380 2381 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2382 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2383 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2384 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2385 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2386 sbi->s_mb_history_max = default_mb_history_length; 2387 2388 set_opt(sbi->s_mount_opt, BARRIER); 2389 2390 /* 2391 * enable delayed allocation by default 2392 * Use -o nodelalloc to turn it off 2393 */ 2394 set_opt(sbi->s_mount_opt, DELALLOC); 2395 2396 if (!parse_options((char *) data, sb, &journal_devnum, 2397 &journal_ioprio, NULL, 0)) 2398 goto failed_mount; 2399 2400 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2401 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2402 2403 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2404 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2405 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2406 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2407 ext4_msg(sb, KERN_WARNING, 2408 "feature flags set on rev 0 fs, " 2409 "running e2fsck is recommended"); 2410 2411 /* 2412 * Check feature flags regardless of the revision level, since we 2413 * previously didn't change the revision level when setting the flags, 2414 * so there is a chance incompat flags are set on a rev 0 filesystem. 2415 */ 2416 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2417 if (features) { 2418 ext4_msg(sb, KERN_ERR, 2419 "Couldn't mount because of " 2420 "unsupported optional features (%x)", 2421 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2422 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2423 goto failed_mount; 2424 } 2425 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2426 if (!(sb->s_flags & MS_RDONLY) && features) { 2427 ext4_msg(sb, KERN_ERR, 2428 "Couldn't mount RDWR because of " 2429 "unsupported optional features (%x)", 2430 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2431 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2432 goto failed_mount; 2433 } 2434 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2435 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2436 if (has_huge_files) { 2437 /* 2438 * Large file size enabled file system can only be 2439 * mount if kernel is build with CONFIG_LBD 2440 */ 2441 if (sizeof(root->i_blocks) < sizeof(u64) && 2442 !(sb->s_flags & MS_RDONLY)) { 2443 ext4_msg(sb, KERN_ERR, "Filesystem with huge " 2444 "files cannot be mounted read-write " 2445 "without CONFIG_LBD"); 2446 goto failed_mount; 2447 } 2448 } 2449 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2450 2451 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2452 blocksize > EXT4_MAX_BLOCK_SIZE) { 2453 ext4_msg(sb, KERN_ERR, 2454 "Unsupported filesystem blocksize %d", blocksize); 2455 goto failed_mount; 2456 } 2457 2458 if (sb->s_blocksize != blocksize) { 2459 /* Validate the filesystem blocksize */ 2460 if (!sb_set_blocksize(sb, blocksize)) { 2461 ext4_msg(sb, KERN_ERR, "bad block size %d", 2462 blocksize); 2463 goto failed_mount; 2464 } 2465 2466 brelse(bh); 2467 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2468 offset = do_div(logical_sb_block, blocksize); 2469 bh = sb_bread(sb, logical_sb_block); 2470 if (!bh) { 2471 ext4_msg(sb, KERN_ERR, 2472 "Can't read superblock on 2nd try"); 2473 goto failed_mount; 2474 } 2475 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2476 sbi->s_es = es; 2477 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2478 ext4_msg(sb, KERN_ERR, 2479 "Magic mismatch, very weird!"); 2480 goto failed_mount; 2481 } 2482 } 2483 2484 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2485 has_huge_files); 2486 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2487 2488 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2489 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2490 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2491 } else { 2492 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2493 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2494 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2495 (!is_power_of_2(sbi->s_inode_size)) || 2496 (sbi->s_inode_size > blocksize)) { 2497 ext4_msg(sb, KERN_ERR, 2498 "unsupported inode size: %d", 2499 sbi->s_inode_size); 2500 goto failed_mount; 2501 } 2502 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2503 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2504 } 2505 2506 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2507 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2508 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2509 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2510 !is_power_of_2(sbi->s_desc_size)) { 2511 ext4_msg(sb, KERN_ERR, 2512 "unsupported descriptor size %lu", 2513 sbi->s_desc_size); 2514 goto failed_mount; 2515 } 2516 } else 2517 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2518 2519 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2520 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2521 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2522 goto cantfind_ext4; 2523 2524 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2525 if (sbi->s_inodes_per_block == 0) 2526 goto cantfind_ext4; 2527 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2528 sbi->s_inodes_per_block; 2529 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2530 sbi->s_sbh = bh; 2531 sbi->s_mount_state = le16_to_cpu(es->s_state); 2532 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2533 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2534 2535 for (i = 0; i < 4; i++) 2536 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2537 sbi->s_def_hash_version = es->s_def_hash_version; 2538 i = le32_to_cpu(es->s_flags); 2539 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2540 sbi->s_hash_unsigned = 3; 2541 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2542 #ifdef __CHAR_UNSIGNED__ 2543 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2544 sbi->s_hash_unsigned = 3; 2545 #else 2546 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2547 #endif 2548 sb->s_dirt = 1; 2549 } 2550 2551 if (sbi->s_blocks_per_group > blocksize * 8) { 2552 ext4_msg(sb, KERN_ERR, 2553 "#blocks per group too big: %lu", 2554 sbi->s_blocks_per_group); 2555 goto failed_mount; 2556 } 2557 if (sbi->s_inodes_per_group > blocksize * 8) { 2558 ext4_msg(sb, KERN_ERR, 2559 "#inodes per group too big: %lu", 2560 sbi->s_inodes_per_group); 2561 goto failed_mount; 2562 } 2563 2564 if (ext4_blocks_count(es) > 2565 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2566 ext4_msg(sb, KERN_ERR, "filesystem" 2567 " too large to mount safely"); 2568 if (sizeof(sector_t) < 8) 2569 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); 2570 goto failed_mount; 2571 } 2572 2573 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2574 goto cantfind_ext4; 2575 2576 /* check blocks count against device size */ 2577 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2578 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2579 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 2580 "exceeds size of device (%llu blocks)", 2581 ext4_blocks_count(es), blocks_count); 2582 goto failed_mount; 2583 } 2584 2585 /* 2586 * It makes no sense for the first data block to be beyond the end 2587 * of the filesystem. 2588 */ 2589 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2590 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 2591 "block %u is beyond end of filesystem (%llu)", 2592 le32_to_cpu(es->s_first_data_block), 2593 ext4_blocks_count(es)); 2594 goto failed_mount; 2595 } 2596 blocks_count = (ext4_blocks_count(es) - 2597 le32_to_cpu(es->s_first_data_block) + 2598 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2599 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2600 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2601 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 2602 "(block count %llu, first data block %u, " 2603 "blocks per group %lu)", sbi->s_groups_count, 2604 ext4_blocks_count(es), 2605 le32_to_cpu(es->s_first_data_block), 2606 EXT4_BLOCKS_PER_GROUP(sb)); 2607 goto failed_mount; 2608 } 2609 sbi->s_groups_count = blocks_count; 2610 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2611 EXT4_DESC_PER_BLOCK(sb); 2612 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2613 GFP_KERNEL); 2614 if (sbi->s_group_desc == NULL) { 2615 ext4_msg(sb, KERN_ERR, "not enough memory"); 2616 goto failed_mount; 2617 } 2618 2619 #ifdef CONFIG_PROC_FS 2620 if (ext4_proc_root) 2621 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2622 #endif 2623 2624 bgl_lock_init(sbi->s_blockgroup_lock); 2625 2626 for (i = 0; i < db_count; i++) { 2627 block = descriptor_loc(sb, logical_sb_block, i); 2628 sbi->s_group_desc[i] = sb_bread(sb, block); 2629 if (!sbi->s_group_desc[i]) { 2630 ext4_msg(sb, KERN_ERR, 2631 "can't read group descriptor %d", i); 2632 db_count = i; 2633 goto failed_mount2; 2634 } 2635 } 2636 if (!ext4_check_descriptors(sb)) { 2637 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 2638 goto failed_mount2; 2639 } 2640 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2641 if (!ext4_fill_flex_info(sb)) { 2642 ext4_msg(sb, KERN_ERR, 2643 "unable to initialize " 2644 "flex_bg meta info!"); 2645 goto failed_mount2; 2646 } 2647 2648 sbi->s_gdb_count = db_count; 2649 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2650 spin_lock_init(&sbi->s_next_gen_lock); 2651 2652 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2653 ext4_count_free_blocks(sb)); 2654 if (!err) { 2655 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2656 ext4_count_free_inodes(sb)); 2657 } 2658 if (!err) { 2659 err = percpu_counter_init(&sbi->s_dirs_counter, 2660 ext4_count_dirs(sb)); 2661 } 2662 if (!err) { 2663 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2664 } 2665 if (err) { 2666 ext4_msg(sb, KERN_ERR, "insufficient memory"); 2667 goto failed_mount3; 2668 } 2669 2670 sbi->s_stripe = ext4_get_stripe_size(sbi); 2671 2672 /* 2673 * set up enough so that it can read an inode 2674 */ 2675 if (!test_opt(sb, NOLOAD) && 2676 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 2677 sb->s_op = &ext4_sops; 2678 else 2679 sb->s_op = &ext4_nojournal_sops; 2680 sb->s_export_op = &ext4_export_ops; 2681 sb->s_xattr = ext4_xattr_handlers; 2682 #ifdef CONFIG_QUOTA 2683 sb->s_qcop = &ext4_qctl_operations; 2684 sb->dq_op = &ext4_quota_operations; 2685 #endif 2686 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2687 mutex_init(&sbi->s_orphan_lock); 2688 mutex_init(&sbi->s_resize_lock); 2689 2690 sb->s_root = NULL; 2691 2692 needs_recovery = (es->s_last_orphan != 0 || 2693 EXT4_HAS_INCOMPAT_FEATURE(sb, 2694 EXT4_FEATURE_INCOMPAT_RECOVER)); 2695 2696 /* 2697 * The first inode we look at is the journal inode. Don't try 2698 * root first: it may be modified in the journal! 2699 */ 2700 if (!test_opt(sb, NOLOAD) && 2701 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2702 if (ext4_load_journal(sb, es, journal_devnum)) 2703 goto failed_mount3; 2704 if (!(sb->s_flags & MS_RDONLY) && 2705 EXT4_SB(sb)->s_journal->j_failed_commit) { 2706 ext4_msg(sb, KERN_CRIT, "error: " 2707 "ext4_fill_super: Journal transaction " 2708 "%u is corrupt", 2709 EXT4_SB(sb)->s_journal->j_failed_commit); 2710 if (test_opt(sb, ERRORS_RO)) { 2711 ext4_msg(sb, KERN_CRIT, 2712 "Mounting filesystem read-only"); 2713 sb->s_flags |= MS_RDONLY; 2714 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2715 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2716 } 2717 if (test_opt(sb, ERRORS_PANIC)) { 2718 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2719 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2720 ext4_commit_super(sb, 1); 2721 goto failed_mount4; 2722 } 2723 } 2724 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2725 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2726 ext4_msg(sb, KERN_ERR, "required journal recovery " 2727 "suppressed and not mounted read-only"); 2728 goto failed_mount4; 2729 } else { 2730 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2731 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2732 sbi->s_journal = NULL; 2733 needs_recovery = 0; 2734 goto no_journal; 2735 } 2736 2737 if (ext4_blocks_count(es) > 0xffffffffULL && 2738 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2739 JBD2_FEATURE_INCOMPAT_64BIT)) { 2740 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2741 goto failed_mount4; 2742 } 2743 2744 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2745 jbd2_journal_set_features(sbi->s_journal, 2746 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2747 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2748 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2749 jbd2_journal_set_features(sbi->s_journal, 2750 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2751 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2752 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2753 } else { 2754 jbd2_journal_clear_features(sbi->s_journal, 2755 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2756 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2757 } 2758 2759 /* We have now updated the journal if required, so we can 2760 * validate the data journaling mode. */ 2761 switch (test_opt(sb, DATA_FLAGS)) { 2762 case 0: 2763 /* No mode set, assume a default based on the journal 2764 * capabilities: ORDERED_DATA if the journal can 2765 * cope, else JOURNAL_DATA 2766 */ 2767 if (jbd2_journal_check_available_features 2768 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2769 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2770 else 2771 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2772 break; 2773 2774 case EXT4_MOUNT_ORDERED_DATA: 2775 case EXT4_MOUNT_WRITEBACK_DATA: 2776 if (!jbd2_journal_check_available_features 2777 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2778 ext4_msg(sb, KERN_ERR, "Journal does not support " 2779 "requested data journaling mode"); 2780 goto failed_mount4; 2781 } 2782 default: 2783 break; 2784 } 2785 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2786 2787 no_journal: 2788 2789 if (test_opt(sb, NOBH)) { 2790 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2791 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2792 "its supported only with writeback mode"); 2793 clear_opt(sbi->s_mount_opt, NOBH); 2794 } 2795 } 2796 /* 2797 * The jbd2_journal_load will have done any necessary log recovery, 2798 * so we can safely mount the rest of the filesystem now. 2799 */ 2800 2801 root = ext4_iget(sb, EXT4_ROOT_INO); 2802 if (IS_ERR(root)) { 2803 ext4_msg(sb, KERN_ERR, "get root inode failed"); 2804 ret = PTR_ERR(root); 2805 goto failed_mount4; 2806 } 2807 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2808 iput(root); 2809 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 2810 goto failed_mount4; 2811 } 2812 sb->s_root = d_alloc_root(root); 2813 if (!sb->s_root) { 2814 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 2815 iput(root); 2816 ret = -ENOMEM; 2817 goto failed_mount4; 2818 } 2819 2820 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2821 2822 /* determine the minimum size of new large inodes, if present */ 2823 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2824 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2825 EXT4_GOOD_OLD_INODE_SIZE; 2826 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2827 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2828 if (sbi->s_want_extra_isize < 2829 le16_to_cpu(es->s_want_extra_isize)) 2830 sbi->s_want_extra_isize = 2831 le16_to_cpu(es->s_want_extra_isize); 2832 if (sbi->s_want_extra_isize < 2833 le16_to_cpu(es->s_min_extra_isize)) 2834 sbi->s_want_extra_isize = 2835 le16_to_cpu(es->s_min_extra_isize); 2836 } 2837 } 2838 /* Check if enough inode space is available */ 2839 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2840 sbi->s_inode_size) { 2841 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2842 EXT4_GOOD_OLD_INODE_SIZE; 2843 ext4_msg(sb, KERN_INFO, "required extra inode space not" 2844 "available"); 2845 } 2846 2847 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2848 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 2849 "requested data journaling mode"); 2850 clear_opt(sbi->s_mount_opt, DELALLOC); 2851 } else if (test_opt(sb, DELALLOC)) 2852 ext4_msg(sb, KERN_INFO, "delayed allocation enabled"); 2853 2854 err = ext4_setup_system_zone(sb); 2855 if (err) { 2856 ext4_msg(sb, KERN_ERR, "failed to initialize system " 2857 "zone (%d)\n", err); 2858 goto failed_mount4; 2859 } 2860 2861 ext4_ext_init(sb); 2862 err = ext4_mb_init(sb, needs_recovery); 2863 if (err) { 2864 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 2865 err); 2866 goto failed_mount4; 2867 } 2868 2869 sbi->s_kobj.kset = ext4_kset; 2870 init_completion(&sbi->s_kobj_unregister); 2871 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 2872 "%s", sb->s_id); 2873 if (err) { 2874 ext4_mb_release(sb); 2875 ext4_ext_release(sb); 2876 goto failed_mount4; 2877 }; 2878 2879 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2880 ext4_orphan_cleanup(sb, es); 2881 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2882 if (needs_recovery) { 2883 ext4_msg(sb, KERN_INFO, "recovery complete"); 2884 ext4_mark_recovery_complete(sb, es); 2885 } 2886 if (EXT4_SB(sb)->s_journal) { 2887 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 2888 descr = " journalled data mode"; 2889 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 2890 descr = " ordered data mode"; 2891 else 2892 descr = " writeback data mode"; 2893 } else 2894 descr = "out journal"; 2895 2896 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); 2897 2898 lock_kernel(); 2899 return 0; 2900 2901 cantfind_ext4: 2902 if (!silent) 2903 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 2904 goto failed_mount; 2905 2906 failed_mount4: 2907 ext4_msg(sb, KERN_ERR, "mount failed"); 2908 ext4_release_system_zone(sb); 2909 if (sbi->s_journal) { 2910 jbd2_journal_destroy(sbi->s_journal); 2911 sbi->s_journal = NULL; 2912 } 2913 failed_mount3: 2914 if (sbi->s_flex_groups) { 2915 if (is_vmalloc_addr(sbi->s_flex_groups)) 2916 vfree(sbi->s_flex_groups); 2917 else 2918 kfree(sbi->s_flex_groups); 2919 } 2920 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2921 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2922 percpu_counter_destroy(&sbi->s_dirs_counter); 2923 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 2924 failed_mount2: 2925 for (i = 0; i < db_count; i++) 2926 brelse(sbi->s_group_desc[i]); 2927 kfree(sbi->s_group_desc); 2928 failed_mount: 2929 if (sbi->s_proc) { 2930 remove_proc_entry(sb->s_id, ext4_proc_root); 2931 } 2932 #ifdef CONFIG_QUOTA 2933 for (i = 0; i < MAXQUOTAS; i++) 2934 kfree(sbi->s_qf_names[i]); 2935 #endif 2936 ext4_blkdev_remove(sbi); 2937 brelse(bh); 2938 out_fail: 2939 sb->s_fs_info = NULL; 2940 kfree(sbi->s_blockgroup_lock); 2941 kfree(sbi); 2942 lock_kernel(); 2943 return ret; 2944 } 2945 2946 /* 2947 * Setup any per-fs journal parameters now. We'll do this both on 2948 * initial mount, once the journal has been initialised but before we've 2949 * done any recovery; and again on any subsequent remount. 2950 */ 2951 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 2952 { 2953 struct ext4_sb_info *sbi = EXT4_SB(sb); 2954 2955 journal->j_commit_interval = sbi->s_commit_interval; 2956 journal->j_min_batch_time = sbi->s_min_batch_time; 2957 journal->j_max_batch_time = sbi->s_max_batch_time; 2958 2959 spin_lock(&journal->j_state_lock); 2960 if (test_opt(sb, BARRIER)) 2961 journal->j_flags |= JBD2_BARRIER; 2962 else 2963 journal->j_flags &= ~JBD2_BARRIER; 2964 if (test_opt(sb, DATA_ERR_ABORT)) 2965 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 2966 else 2967 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 2968 spin_unlock(&journal->j_state_lock); 2969 } 2970 2971 static journal_t *ext4_get_journal(struct super_block *sb, 2972 unsigned int journal_inum) 2973 { 2974 struct inode *journal_inode; 2975 journal_t *journal; 2976 2977 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 2978 2979 /* First, test for the existence of a valid inode on disk. Bad 2980 * things happen if we iget() an unused inode, as the subsequent 2981 * iput() will try to delete it. */ 2982 2983 journal_inode = ext4_iget(sb, journal_inum); 2984 if (IS_ERR(journal_inode)) { 2985 ext4_msg(sb, KERN_ERR, "no journal found"); 2986 return NULL; 2987 } 2988 if (!journal_inode->i_nlink) { 2989 make_bad_inode(journal_inode); 2990 iput(journal_inode); 2991 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 2992 return NULL; 2993 } 2994 2995 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2996 journal_inode, journal_inode->i_size); 2997 if (!S_ISREG(journal_inode->i_mode)) { 2998 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 2999 iput(journal_inode); 3000 return NULL; 3001 } 3002 3003 journal = jbd2_journal_init_inode(journal_inode); 3004 if (!journal) { 3005 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3006 iput(journal_inode); 3007 return NULL; 3008 } 3009 journal->j_private = sb; 3010 ext4_init_journal_params(sb, journal); 3011 return journal; 3012 } 3013 3014 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3015 dev_t j_dev) 3016 { 3017 struct buffer_head *bh; 3018 journal_t *journal; 3019 ext4_fsblk_t start; 3020 ext4_fsblk_t len; 3021 int hblock, blocksize; 3022 ext4_fsblk_t sb_block; 3023 unsigned long offset; 3024 struct ext4_super_block *es; 3025 struct block_device *bdev; 3026 3027 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3028 3029 bdev = ext4_blkdev_get(j_dev, sb); 3030 if (bdev == NULL) 3031 return NULL; 3032 3033 if (bd_claim(bdev, sb)) { 3034 ext4_msg(sb, KERN_ERR, 3035 "failed to claim external journal device"); 3036 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3037 return NULL; 3038 } 3039 3040 blocksize = sb->s_blocksize; 3041 hblock = bdev_logical_block_size(bdev); 3042 if (blocksize < hblock) { 3043 ext4_msg(sb, KERN_ERR, 3044 "blocksize too small for journal device"); 3045 goto out_bdev; 3046 } 3047 3048 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3049 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3050 set_blocksize(bdev, blocksize); 3051 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3052 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3053 "external journal"); 3054 goto out_bdev; 3055 } 3056 3057 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3058 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3059 !(le32_to_cpu(es->s_feature_incompat) & 3060 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3061 ext4_msg(sb, KERN_ERR, "external journal has " 3062 "bad superblock"); 3063 brelse(bh); 3064 goto out_bdev; 3065 } 3066 3067 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3068 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3069 brelse(bh); 3070 goto out_bdev; 3071 } 3072 3073 len = ext4_blocks_count(es); 3074 start = sb_block + 1; 3075 brelse(bh); /* we're done with the superblock */ 3076 3077 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3078 start, len, blocksize); 3079 if (!journal) { 3080 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3081 goto out_bdev; 3082 } 3083 journal->j_private = sb; 3084 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3085 wait_on_buffer(journal->j_sb_buffer); 3086 if (!buffer_uptodate(journal->j_sb_buffer)) { 3087 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3088 goto out_journal; 3089 } 3090 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3091 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3092 "user (unsupported) - %d", 3093 be32_to_cpu(journal->j_superblock->s_nr_users)); 3094 goto out_journal; 3095 } 3096 EXT4_SB(sb)->journal_bdev = bdev; 3097 ext4_init_journal_params(sb, journal); 3098 return journal; 3099 3100 out_journal: 3101 jbd2_journal_destroy(journal); 3102 out_bdev: 3103 ext4_blkdev_put(bdev); 3104 return NULL; 3105 } 3106 3107 static int ext4_load_journal(struct super_block *sb, 3108 struct ext4_super_block *es, 3109 unsigned long journal_devnum) 3110 { 3111 journal_t *journal; 3112 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3113 dev_t journal_dev; 3114 int err = 0; 3115 int really_read_only; 3116 3117 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3118 3119 if (journal_devnum && 3120 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3121 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3122 "numbers have changed"); 3123 journal_dev = new_decode_dev(journal_devnum); 3124 } else 3125 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3126 3127 really_read_only = bdev_read_only(sb->s_bdev); 3128 3129 /* 3130 * Are we loading a blank journal or performing recovery after a 3131 * crash? For recovery, we need to check in advance whether we 3132 * can get read-write access to the device. 3133 */ 3134 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3135 if (sb->s_flags & MS_RDONLY) { 3136 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3137 "required on readonly filesystem"); 3138 if (really_read_only) { 3139 ext4_msg(sb, KERN_ERR, "write access " 3140 "unavailable, cannot proceed"); 3141 return -EROFS; 3142 } 3143 ext4_msg(sb, KERN_INFO, "write access will " 3144 "be enabled during recovery"); 3145 } 3146 } 3147 3148 if (journal_inum && journal_dev) { 3149 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3150 "and inode journals!"); 3151 return -EINVAL; 3152 } 3153 3154 if (journal_inum) { 3155 if (!(journal = ext4_get_journal(sb, journal_inum))) 3156 return -EINVAL; 3157 } else { 3158 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3159 return -EINVAL; 3160 } 3161 3162 if (journal->j_flags & JBD2_BARRIER) 3163 ext4_msg(sb, KERN_INFO, "barriers enabled"); 3164 else 3165 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3166 3167 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3168 err = jbd2_journal_update_format(journal); 3169 if (err) { 3170 ext4_msg(sb, KERN_ERR, "error updating journal"); 3171 jbd2_journal_destroy(journal); 3172 return err; 3173 } 3174 } 3175 3176 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3177 err = jbd2_journal_wipe(journal, !really_read_only); 3178 if (!err) 3179 err = jbd2_journal_load(journal); 3180 3181 if (err) { 3182 ext4_msg(sb, KERN_ERR, "error loading journal"); 3183 jbd2_journal_destroy(journal); 3184 return err; 3185 } 3186 3187 EXT4_SB(sb)->s_journal = journal; 3188 ext4_clear_journal_err(sb, es); 3189 3190 if (journal_devnum && 3191 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3192 es->s_journal_dev = cpu_to_le32(journal_devnum); 3193 3194 /* Make sure we flush the recovery flag to disk. */ 3195 ext4_commit_super(sb, 1); 3196 } 3197 3198 return 0; 3199 } 3200 3201 static int ext4_commit_super(struct super_block *sb, int sync) 3202 { 3203 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3204 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3205 int error = 0; 3206 3207 if (!sbh) 3208 return error; 3209 if (buffer_write_io_error(sbh)) { 3210 /* 3211 * Oh, dear. A previous attempt to write the 3212 * superblock failed. This could happen because the 3213 * USB device was yanked out. Or it could happen to 3214 * be a transient write error and maybe the block will 3215 * be remapped. Nothing we can do but to retry the 3216 * write and hope for the best. 3217 */ 3218 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3219 "superblock detected"); 3220 clear_buffer_write_io_error(sbh); 3221 set_buffer_uptodate(sbh); 3222 } 3223 es->s_wtime = cpu_to_le32(get_seconds()); 3224 es->s_kbytes_written = 3225 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3226 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3227 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3228 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3229 &EXT4_SB(sb)->s_freeblocks_counter)); 3230 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3231 &EXT4_SB(sb)->s_freeinodes_counter)); 3232 sb->s_dirt = 0; 3233 BUFFER_TRACE(sbh, "marking dirty"); 3234 mark_buffer_dirty(sbh); 3235 if (sync) { 3236 error = sync_dirty_buffer(sbh); 3237 if (error) 3238 return error; 3239 3240 error = buffer_write_io_error(sbh); 3241 if (error) { 3242 ext4_msg(sb, KERN_ERR, "I/O error while writing " 3243 "superblock"); 3244 clear_buffer_write_io_error(sbh); 3245 set_buffer_uptodate(sbh); 3246 } 3247 } 3248 return error; 3249 } 3250 3251 /* 3252 * Have we just finished recovery? If so, and if we are mounting (or 3253 * remounting) the filesystem readonly, then we will end up with a 3254 * consistent fs on disk. Record that fact. 3255 */ 3256 static void ext4_mark_recovery_complete(struct super_block *sb, 3257 struct ext4_super_block *es) 3258 { 3259 journal_t *journal = EXT4_SB(sb)->s_journal; 3260 3261 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3262 BUG_ON(journal != NULL); 3263 return; 3264 } 3265 jbd2_journal_lock_updates(journal); 3266 if (jbd2_journal_flush(journal) < 0) 3267 goto out; 3268 3269 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3270 sb->s_flags & MS_RDONLY) { 3271 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3272 ext4_commit_super(sb, 1); 3273 } 3274 3275 out: 3276 jbd2_journal_unlock_updates(journal); 3277 } 3278 3279 /* 3280 * If we are mounting (or read-write remounting) a filesystem whose journal 3281 * has recorded an error from a previous lifetime, move that error to the 3282 * main filesystem now. 3283 */ 3284 static void ext4_clear_journal_err(struct super_block *sb, 3285 struct ext4_super_block *es) 3286 { 3287 journal_t *journal; 3288 int j_errno; 3289 const char *errstr; 3290 3291 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3292 3293 journal = EXT4_SB(sb)->s_journal; 3294 3295 /* 3296 * Now check for any error status which may have been recorded in the 3297 * journal by a prior ext4_error() or ext4_abort() 3298 */ 3299 3300 j_errno = jbd2_journal_errno(journal); 3301 if (j_errno) { 3302 char nbuf[16]; 3303 3304 errstr = ext4_decode_error(sb, j_errno, nbuf); 3305 ext4_warning(sb, __func__, "Filesystem error recorded " 3306 "from previous mount: %s", errstr); 3307 ext4_warning(sb, __func__, "Marking fs in need of " 3308 "filesystem check."); 3309 3310 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3311 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3312 ext4_commit_super(sb, 1); 3313 3314 jbd2_journal_clear_err(journal); 3315 } 3316 } 3317 3318 /* 3319 * Force the running and committing transactions to commit, 3320 * and wait on the commit. 3321 */ 3322 int ext4_force_commit(struct super_block *sb) 3323 { 3324 journal_t *journal; 3325 int ret = 0; 3326 3327 if (sb->s_flags & MS_RDONLY) 3328 return 0; 3329 3330 journal = EXT4_SB(sb)->s_journal; 3331 if (journal) 3332 ret = ext4_journal_force_commit(journal); 3333 3334 return ret; 3335 } 3336 3337 static void ext4_write_super(struct super_block *sb) 3338 { 3339 lock_super(sb); 3340 ext4_commit_super(sb, 1); 3341 unlock_super(sb); 3342 } 3343 3344 static int ext4_sync_fs(struct super_block *sb, int wait) 3345 { 3346 int ret = 0; 3347 tid_t target; 3348 3349 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3350 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 3351 if (wait) 3352 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 3353 } 3354 return ret; 3355 } 3356 3357 /* 3358 * LVM calls this function before a (read-only) snapshot is created. This 3359 * gives us a chance to flush the journal completely and mark the fs clean. 3360 */ 3361 static int ext4_freeze(struct super_block *sb) 3362 { 3363 int error = 0; 3364 journal_t *journal; 3365 3366 if (sb->s_flags & MS_RDONLY) 3367 return 0; 3368 3369 journal = EXT4_SB(sb)->s_journal; 3370 3371 /* Now we set up the journal barrier. */ 3372 jbd2_journal_lock_updates(journal); 3373 3374 /* 3375 * Don't clear the needs_recovery flag if we failed to flush 3376 * the journal. 3377 */ 3378 error = jbd2_journal_flush(journal); 3379 if (error < 0) { 3380 out: 3381 jbd2_journal_unlock_updates(journal); 3382 return error; 3383 } 3384 3385 /* Journal blocked and flushed, clear needs_recovery flag. */ 3386 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3387 error = ext4_commit_super(sb, 1); 3388 if (error) 3389 goto out; 3390 return 0; 3391 } 3392 3393 /* 3394 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3395 * flag here, even though the filesystem is not technically dirty yet. 3396 */ 3397 static int ext4_unfreeze(struct super_block *sb) 3398 { 3399 if (sb->s_flags & MS_RDONLY) 3400 return 0; 3401 3402 lock_super(sb); 3403 /* Reset the needs_recovery flag before the fs is unlocked. */ 3404 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3405 ext4_commit_super(sb, 1); 3406 unlock_super(sb); 3407 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3408 return 0; 3409 } 3410 3411 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3412 { 3413 struct ext4_super_block *es; 3414 struct ext4_sb_info *sbi = EXT4_SB(sb); 3415 ext4_fsblk_t n_blocks_count = 0; 3416 unsigned long old_sb_flags; 3417 struct ext4_mount_options old_opts; 3418 ext4_group_t g; 3419 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3420 int err; 3421 #ifdef CONFIG_QUOTA 3422 int i; 3423 #endif 3424 3425 lock_kernel(); 3426 3427 /* Store the original options */ 3428 lock_super(sb); 3429 old_sb_flags = sb->s_flags; 3430 old_opts.s_mount_opt = sbi->s_mount_opt; 3431 old_opts.s_resuid = sbi->s_resuid; 3432 old_opts.s_resgid = sbi->s_resgid; 3433 old_opts.s_commit_interval = sbi->s_commit_interval; 3434 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3435 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3436 #ifdef CONFIG_QUOTA 3437 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3438 for (i = 0; i < MAXQUOTAS; i++) 3439 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3440 #endif 3441 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3442 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3443 3444 /* 3445 * Allow the "check" option to be passed as a remount option. 3446 */ 3447 if (!parse_options(data, sb, NULL, &journal_ioprio, 3448 &n_blocks_count, 1)) { 3449 err = -EINVAL; 3450 goto restore_opts; 3451 } 3452 3453 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 3454 ext4_abort(sb, __func__, "Abort forced by user"); 3455 3456 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3457 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 3458 3459 es = sbi->s_es; 3460 3461 if (sbi->s_journal) { 3462 ext4_init_journal_params(sb, sbi->s_journal); 3463 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3464 } 3465 3466 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3467 n_blocks_count > ext4_blocks_count(es)) { 3468 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3469 err = -EROFS; 3470 goto restore_opts; 3471 } 3472 3473 if (*flags & MS_RDONLY) { 3474 /* 3475 * First of all, the unconditional stuff we have to do 3476 * to disable replay of the journal when we next remount 3477 */ 3478 sb->s_flags |= MS_RDONLY; 3479 3480 /* 3481 * OK, test if we are remounting a valid rw partition 3482 * readonly, and if so set the rdonly flag and then 3483 * mark the partition as valid again. 3484 */ 3485 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3486 (sbi->s_mount_state & EXT4_VALID_FS)) 3487 es->s_state = cpu_to_le16(sbi->s_mount_state); 3488 3489 if (sbi->s_journal) 3490 ext4_mark_recovery_complete(sb, es); 3491 } else { 3492 int ret; 3493 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3494 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3495 ext4_msg(sb, KERN_WARNING, "couldn't " 3496 "remount RDWR because of unsupported " 3497 "optional features (%x)", 3498 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & 3499 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 3500 err = -EROFS; 3501 goto restore_opts; 3502 } 3503 3504 /* 3505 * Make sure the group descriptor checksums 3506 * are sane. If they aren't, refuse to remount r/w. 3507 */ 3508 for (g = 0; g < sbi->s_groups_count; g++) { 3509 struct ext4_group_desc *gdp = 3510 ext4_get_group_desc(sb, g, NULL); 3511 3512 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3513 ext4_msg(sb, KERN_ERR, 3514 "ext4_remount: Checksum for group %u failed (%u!=%u)", 3515 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3516 le16_to_cpu(gdp->bg_checksum)); 3517 err = -EINVAL; 3518 goto restore_opts; 3519 } 3520 } 3521 3522 /* 3523 * If we have an unprocessed orphan list hanging 3524 * around from a previously readonly bdev mount, 3525 * require a full umount/remount for now. 3526 */ 3527 if (es->s_last_orphan) { 3528 ext4_msg(sb, KERN_WARNING, "Couldn't " 3529 "remount RDWR because of unprocessed " 3530 "orphan inode list. Please " 3531 "umount/remount instead"); 3532 err = -EINVAL; 3533 goto restore_opts; 3534 } 3535 3536 /* 3537 * Mounting a RDONLY partition read-write, so reread 3538 * and store the current valid flag. (It may have 3539 * been changed by e2fsck since we originally mounted 3540 * the partition.) 3541 */ 3542 if (sbi->s_journal) 3543 ext4_clear_journal_err(sb, es); 3544 sbi->s_mount_state = le16_to_cpu(es->s_state); 3545 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3546 goto restore_opts; 3547 if (!ext4_setup_super(sb, es, 0)) 3548 sb->s_flags &= ~MS_RDONLY; 3549 } 3550 } 3551 ext4_setup_system_zone(sb); 3552 if (sbi->s_journal == NULL) 3553 ext4_commit_super(sb, 1); 3554 3555 #ifdef CONFIG_QUOTA 3556 /* Release old quota file names */ 3557 for (i = 0; i < MAXQUOTAS; i++) 3558 if (old_opts.s_qf_names[i] && 3559 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3560 kfree(old_opts.s_qf_names[i]); 3561 #endif 3562 unlock_super(sb); 3563 unlock_kernel(); 3564 return 0; 3565 3566 restore_opts: 3567 sb->s_flags = old_sb_flags; 3568 sbi->s_mount_opt = old_opts.s_mount_opt; 3569 sbi->s_resuid = old_opts.s_resuid; 3570 sbi->s_resgid = old_opts.s_resgid; 3571 sbi->s_commit_interval = old_opts.s_commit_interval; 3572 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3573 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3574 #ifdef CONFIG_QUOTA 3575 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3576 for (i = 0; i < MAXQUOTAS; i++) { 3577 if (sbi->s_qf_names[i] && 3578 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3579 kfree(sbi->s_qf_names[i]); 3580 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3581 } 3582 #endif 3583 unlock_super(sb); 3584 unlock_kernel(); 3585 return err; 3586 } 3587 3588 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3589 { 3590 struct super_block *sb = dentry->d_sb; 3591 struct ext4_sb_info *sbi = EXT4_SB(sb); 3592 struct ext4_super_block *es = sbi->s_es; 3593 u64 fsid; 3594 3595 if (test_opt(sb, MINIX_DF)) { 3596 sbi->s_overhead_last = 0; 3597 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3598 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3599 ext4_fsblk_t overhead = 0; 3600 3601 /* 3602 * Compute the overhead (FS structures). This is constant 3603 * for a given filesystem unless the number of block groups 3604 * changes so we cache the previous value until it does. 3605 */ 3606 3607 /* 3608 * All of the blocks before first_data_block are 3609 * overhead 3610 */ 3611 overhead = le32_to_cpu(es->s_first_data_block); 3612 3613 /* 3614 * Add the overhead attributed to the superblock and 3615 * block group descriptors. If the sparse superblocks 3616 * feature is turned on, then not all groups have this. 3617 */ 3618 for (i = 0; i < ngroups; i++) { 3619 overhead += ext4_bg_has_super(sb, i) + 3620 ext4_bg_num_gdb(sb, i); 3621 cond_resched(); 3622 } 3623 3624 /* 3625 * Every block group has an inode bitmap, a block 3626 * bitmap, and an inode table. 3627 */ 3628 overhead += ngroups * (2 + sbi->s_itb_per_group); 3629 sbi->s_overhead_last = overhead; 3630 smp_wmb(); 3631 sbi->s_blocks_last = ext4_blocks_count(es); 3632 } 3633 3634 buf->f_type = EXT4_SUPER_MAGIC; 3635 buf->f_bsize = sb->s_blocksize; 3636 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3637 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3638 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3639 ext4_free_blocks_count_set(es, buf->f_bfree); 3640 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3641 if (buf->f_bfree < ext4_r_blocks_count(es)) 3642 buf->f_bavail = 0; 3643 buf->f_files = le32_to_cpu(es->s_inodes_count); 3644 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3645 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); 3646 buf->f_namelen = EXT4_NAME_LEN; 3647 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3648 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3649 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3650 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3651 3652 return 0; 3653 } 3654 3655 /* Helper function for writing quotas on sync - we need to start transaction 3656 * before quota file is locked for write. Otherwise the are possible deadlocks: 3657 * Process 1 Process 2 3658 * ext4_create() quota_sync() 3659 * jbd2_journal_start() write_dquot() 3660 * vfs_dq_init() down(dqio_mutex) 3661 * down(dqio_mutex) jbd2_journal_start() 3662 * 3663 */ 3664 3665 #ifdef CONFIG_QUOTA 3666 3667 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3668 { 3669 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3670 } 3671 3672 static int ext4_write_dquot(struct dquot *dquot) 3673 { 3674 int ret, err; 3675 handle_t *handle; 3676 struct inode *inode; 3677 3678 inode = dquot_to_inode(dquot); 3679 handle = ext4_journal_start(inode, 3680 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3681 if (IS_ERR(handle)) 3682 return PTR_ERR(handle); 3683 ret = dquot_commit(dquot); 3684 err = ext4_journal_stop(handle); 3685 if (!ret) 3686 ret = err; 3687 return ret; 3688 } 3689 3690 static int ext4_acquire_dquot(struct dquot *dquot) 3691 { 3692 int ret, err; 3693 handle_t *handle; 3694 3695 handle = ext4_journal_start(dquot_to_inode(dquot), 3696 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3697 if (IS_ERR(handle)) 3698 return PTR_ERR(handle); 3699 ret = dquot_acquire(dquot); 3700 err = ext4_journal_stop(handle); 3701 if (!ret) 3702 ret = err; 3703 return ret; 3704 } 3705 3706 static int ext4_release_dquot(struct dquot *dquot) 3707 { 3708 int ret, err; 3709 handle_t *handle; 3710 3711 handle = ext4_journal_start(dquot_to_inode(dquot), 3712 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3713 if (IS_ERR(handle)) { 3714 /* Release dquot anyway to avoid endless cycle in dqput() */ 3715 dquot_release(dquot); 3716 return PTR_ERR(handle); 3717 } 3718 ret = dquot_release(dquot); 3719 err = ext4_journal_stop(handle); 3720 if (!ret) 3721 ret = err; 3722 return ret; 3723 } 3724 3725 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3726 { 3727 /* Are we journaling quotas? */ 3728 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3729 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3730 dquot_mark_dquot_dirty(dquot); 3731 return ext4_write_dquot(dquot); 3732 } else { 3733 return dquot_mark_dquot_dirty(dquot); 3734 } 3735 } 3736 3737 static int ext4_write_info(struct super_block *sb, int type) 3738 { 3739 int ret, err; 3740 handle_t *handle; 3741 3742 /* Data block + inode block */ 3743 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3744 if (IS_ERR(handle)) 3745 return PTR_ERR(handle); 3746 ret = dquot_commit_info(sb, type); 3747 err = ext4_journal_stop(handle); 3748 if (!ret) 3749 ret = err; 3750 return ret; 3751 } 3752 3753 /* 3754 * Turn on quotas during mount time - we need to find 3755 * the quota file and such... 3756 */ 3757 static int ext4_quota_on_mount(struct super_block *sb, int type) 3758 { 3759 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3760 EXT4_SB(sb)->s_jquota_fmt, type); 3761 } 3762 3763 /* 3764 * Standard function to be called on quota_on 3765 */ 3766 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3767 char *name, int remount) 3768 { 3769 int err; 3770 struct path path; 3771 3772 if (!test_opt(sb, QUOTA)) 3773 return -EINVAL; 3774 /* When remounting, no checks are needed and in fact, name is NULL */ 3775 if (remount) 3776 return vfs_quota_on(sb, type, format_id, name, remount); 3777 3778 err = kern_path(name, LOOKUP_FOLLOW, &path); 3779 if (err) 3780 return err; 3781 3782 /* Quotafile not on the same filesystem? */ 3783 if (path.mnt->mnt_sb != sb) { 3784 path_put(&path); 3785 return -EXDEV; 3786 } 3787 /* Journaling quota? */ 3788 if (EXT4_SB(sb)->s_qf_names[type]) { 3789 /* Quotafile not in fs root? */ 3790 if (path.dentry->d_parent != sb->s_root) 3791 ext4_msg(sb, KERN_WARNING, 3792 "Quota file not on filesystem root. " 3793 "Journaled quota will not work"); 3794 } 3795 3796 /* 3797 * When we journal data on quota file, we have to flush journal to see 3798 * all updates to the file when we bypass pagecache... 3799 */ 3800 if (EXT4_SB(sb)->s_journal && 3801 ext4_should_journal_data(path.dentry->d_inode)) { 3802 /* 3803 * We don't need to lock updates but journal_flush() could 3804 * otherwise be livelocked... 3805 */ 3806 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3807 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3808 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3809 if (err) { 3810 path_put(&path); 3811 return err; 3812 } 3813 } 3814 3815 err = vfs_quota_on_path(sb, type, format_id, &path); 3816 path_put(&path); 3817 return err; 3818 } 3819 3820 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3821 * acquiring the locks... As quota files are never truncated and quota code 3822 * itself serializes the operations (and noone else should touch the files) 3823 * we don't have to be afraid of races */ 3824 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3825 size_t len, loff_t off) 3826 { 3827 struct inode *inode = sb_dqopt(sb)->files[type]; 3828 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3829 int err = 0; 3830 int offset = off & (sb->s_blocksize - 1); 3831 int tocopy; 3832 size_t toread; 3833 struct buffer_head *bh; 3834 loff_t i_size = i_size_read(inode); 3835 3836 if (off > i_size) 3837 return 0; 3838 if (off+len > i_size) 3839 len = i_size-off; 3840 toread = len; 3841 while (toread > 0) { 3842 tocopy = sb->s_blocksize - offset < toread ? 3843 sb->s_blocksize - offset : toread; 3844 bh = ext4_bread(NULL, inode, blk, 0, &err); 3845 if (err) 3846 return err; 3847 if (!bh) /* A hole? */ 3848 memset(data, 0, tocopy); 3849 else 3850 memcpy(data, bh->b_data+offset, tocopy); 3851 brelse(bh); 3852 offset = 0; 3853 toread -= tocopy; 3854 data += tocopy; 3855 blk++; 3856 } 3857 return len; 3858 } 3859 3860 /* Write to quotafile (we know the transaction is already started and has 3861 * enough credits) */ 3862 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3863 const char *data, size_t len, loff_t off) 3864 { 3865 struct inode *inode = sb_dqopt(sb)->files[type]; 3866 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3867 int err = 0; 3868 int offset = off & (sb->s_blocksize - 1); 3869 int tocopy; 3870 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 3871 size_t towrite = len; 3872 struct buffer_head *bh; 3873 handle_t *handle = journal_current_handle(); 3874 3875 if (EXT4_SB(sb)->s_journal && !handle) { 3876 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 3877 " cancelled because transaction is not started", 3878 (unsigned long long)off, (unsigned long long)len); 3879 return -EIO; 3880 } 3881 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3882 while (towrite > 0) { 3883 tocopy = sb->s_blocksize - offset < towrite ? 3884 sb->s_blocksize - offset : towrite; 3885 bh = ext4_bread(handle, inode, blk, 1, &err); 3886 if (!bh) 3887 goto out; 3888 if (journal_quota) { 3889 err = ext4_journal_get_write_access(handle, bh); 3890 if (err) { 3891 brelse(bh); 3892 goto out; 3893 } 3894 } 3895 lock_buffer(bh); 3896 memcpy(bh->b_data+offset, data, tocopy); 3897 flush_dcache_page(bh->b_page); 3898 unlock_buffer(bh); 3899 if (journal_quota) 3900 err = ext4_handle_dirty_metadata(handle, NULL, bh); 3901 else { 3902 /* Always do at least ordered writes for quotas */ 3903 err = ext4_jbd2_file_inode(handle, inode); 3904 mark_buffer_dirty(bh); 3905 } 3906 brelse(bh); 3907 if (err) 3908 goto out; 3909 offset = 0; 3910 towrite -= tocopy; 3911 data += tocopy; 3912 blk++; 3913 } 3914 out: 3915 if (len == towrite) { 3916 mutex_unlock(&inode->i_mutex); 3917 return err; 3918 } 3919 if (inode->i_size < off+len-towrite) { 3920 i_size_write(inode, off+len-towrite); 3921 EXT4_I(inode)->i_disksize = inode->i_size; 3922 } 3923 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3924 ext4_mark_inode_dirty(handle, inode); 3925 mutex_unlock(&inode->i_mutex); 3926 return len - towrite; 3927 } 3928 3929 #endif 3930 3931 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 3932 const char *dev_name, void *data, struct vfsmount *mnt) 3933 { 3934 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3935 } 3936 3937 static struct file_system_type ext4_fs_type = { 3938 .owner = THIS_MODULE, 3939 .name = "ext4", 3940 .get_sb = ext4_get_sb, 3941 .kill_sb = kill_block_super, 3942 .fs_flags = FS_REQUIRES_DEV, 3943 }; 3944 3945 #ifdef CONFIG_EXT4DEV_COMPAT 3946 static int ext4dev_get_sb(struct file_system_type *fs_type, int flags, 3947 const char *dev_name, void *data,struct vfsmount *mnt) 3948 { 3949 printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " 3950 "to mount using ext4\n", dev_name); 3951 printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " 3952 "will go away by 2.6.31\n", dev_name); 3953 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3954 } 3955 3956 static struct file_system_type ext4dev_fs_type = { 3957 .owner = THIS_MODULE, 3958 .name = "ext4dev", 3959 .get_sb = ext4dev_get_sb, 3960 .kill_sb = kill_block_super, 3961 .fs_flags = FS_REQUIRES_DEV, 3962 }; 3963 MODULE_ALIAS("ext4dev"); 3964 #endif 3965 3966 static int __init init_ext4_fs(void) 3967 { 3968 int err; 3969 3970 err = init_ext4_system_zone(); 3971 if (err) 3972 return err; 3973 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 3974 if (!ext4_kset) 3975 goto out4; 3976 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3977 err = init_ext4_mballoc(); 3978 if (err) 3979 goto out3; 3980 3981 err = init_ext4_xattr(); 3982 if (err) 3983 goto out2; 3984 err = init_inodecache(); 3985 if (err) 3986 goto out1; 3987 err = register_filesystem(&ext4_fs_type); 3988 if (err) 3989 goto out; 3990 #ifdef CONFIG_EXT4DEV_COMPAT 3991 err = register_filesystem(&ext4dev_fs_type); 3992 if (err) { 3993 unregister_filesystem(&ext4_fs_type); 3994 goto out; 3995 } 3996 #endif 3997 return 0; 3998 out: 3999 destroy_inodecache(); 4000 out1: 4001 exit_ext4_xattr(); 4002 out2: 4003 exit_ext4_mballoc(); 4004 out3: 4005 remove_proc_entry("fs/ext4", NULL); 4006 kset_unregister(ext4_kset); 4007 out4: 4008 exit_ext4_system_zone(); 4009 return err; 4010 } 4011 4012 static void __exit exit_ext4_fs(void) 4013 { 4014 unregister_filesystem(&ext4_fs_type); 4015 #ifdef CONFIG_EXT4DEV_COMPAT 4016 unregister_filesystem(&ext4dev_fs_type); 4017 #endif 4018 destroy_inodecache(); 4019 exit_ext4_xattr(); 4020 exit_ext4_mballoc(); 4021 remove_proc_entry("fs/ext4", NULL); 4022 kset_unregister(ext4_kset); 4023 exit_ext4_system_zone(); 4024 } 4025 4026 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4027 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4028 MODULE_LICENSE("GPL"); 4029 module_init(init_ext4_fs) 4030 module_exit(exit_ext4_fs) 4031