1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/jbd2.h> 24 #include <linux/slab.h> 25 #include <linux/init.h> 26 #include <linux/blkdev.h> 27 #include <linux/parser.h> 28 #include <linux/smp_lock.h> 29 #include <linux/buffer_head.h> 30 #include <linux/exportfs.h> 31 #include <linux/vfs.h> 32 #include <linux/random.h> 33 #include <linux/mount.h> 34 #include <linux/namei.h> 35 #include <linux/quotaops.h> 36 #include <linux/seq_file.h> 37 #include <linux/proc_fs.h> 38 #include <linux/marker.h> 39 #include <linux/log2.h> 40 #include <linux/crc16.h> 41 #include <asm/uaccess.h> 42 43 #include "ext4.h" 44 #include "ext4_jbd2.h" 45 #include "xattr.h" 46 #include "acl.h" 47 #include "namei.h" 48 #include "group.h" 49 50 struct proc_dir_entry *ext4_proc_root; 51 52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 53 unsigned long journal_devnum); 54 static int ext4_commit_super(struct super_block *sb, 55 struct ext4_super_block *es, int sync); 56 static void ext4_mark_recovery_complete(struct super_block *sb, 57 struct ext4_super_block *es); 58 static void ext4_clear_journal_err(struct super_block *sb, 59 struct ext4_super_block *es); 60 static int ext4_sync_fs(struct super_block *sb, int wait); 61 static const char *ext4_decode_error(struct super_block *sb, int errno, 62 char nbuf[16]); 63 static int ext4_remount(struct super_block *sb, int *flags, char *data); 64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 65 static int ext4_unfreeze(struct super_block *sb); 66 static void ext4_write_super(struct super_block *sb); 67 static int ext4_freeze(struct super_block *sb); 68 69 70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 71 struct ext4_group_desc *bg) 72 { 73 return le32_to_cpu(bg->bg_block_bitmap_lo) | 74 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 75 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 76 } 77 78 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 79 struct ext4_group_desc *bg) 80 { 81 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 82 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 83 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 84 } 85 86 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 87 struct ext4_group_desc *bg) 88 { 89 return le32_to_cpu(bg->bg_inode_table_lo) | 90 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 91 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 92 } 93 94 __u32 ext4_free_blks_count(struct super_block *sb, 95 struct ext4_group_desc *bg) 96 { 97 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 98 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 99 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 100 } 101 102 __u32 ext4_free_inodes_count(struct super_block *sb, 103 struct ext4_group_desc *bg) 104 { 105 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 106 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 107 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 108 } 109 110 __u32 ext4_used_dirs_count(struct super_block *sb, 111 struct ext4_group_desc *bg) 112 { 113 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 114 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 115 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 116 } 117 118 __u32 ext4_itable_unused_count(struct super_block *sb, 119 struct ext4_group_desc *bg) 120 { 121 return le16_to_cpu(bg->bg_itable_unused_lo) | 122 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 123 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 124 } 125 126 void ext4_block_bitmap_set(struct super_block *sb, 127 struct ext4_group_desc *bg, ext4_fsblk_t blk) 128 { 129 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 130 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 131 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 132 } 133 134 void ext4_inode_bitmap_set(struct super_block *sb, 135 struct ext4_group_desc *bg, ext4_fsblk_t blk) 136 { 137 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 138 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 139 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 140 } 141 142 void ext4_inode_table_set(struct super_block *sb, 143 struct ext4_group_desc *bg, ext4_fsblk_t blk) 144 { 145 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 146 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 147 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 148 } 149 150 void ext4_free_blks_set(struct super_block *sb, 151 struct ext4_group_desc *bg, __u32 count) 152 { 153 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 154 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 155 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 156 } 157 158 void ext4_free_inodes_set(struct super_block *sb, 159 struct ext4_group_desc *bg, __u32 count) 160 { 161 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 162 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 163 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 164 } 165 166 void ext4_used_dirs_set(struct super_block *sb, 167 struct ext4_group_desc *bg, __u32 count) 168 { 169 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 170 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 171 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 172 } 173 174 void ext4_itable_unused_set(struct super_block *sb, 175 struct ext4_group_desc *bg, __u32 count) 176 { 177 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 178 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 179 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 180 } 181 182 /* 183 * Wrappers for jbd2_journal_start/end. 184 * 185 * The only special thing we need to do here is to make sure that all 186 * journal_end calls result in the superblock being marked dirty, so 187 * that sync() will call the filesystem's write_super callback if 188 * appropriate. 189 */ 190 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 191 { 192 journal_t *journal; 193 194 if (sb->s_flags & MS_RDONLY) 195 return ERR_PTR(-EROFS); 196 197 /* Special case here: if the journal has aborted behind our 198 * backs (eg. EIO in the commit thread), then we still need to 199 * take the FS itself readonly cleanly. */ 200 journal = EXT4_SB(sb)->s_journal; 201 if (journal) { 202 if (is_journal_aborted(journal)) { 203 ext4_abort(sb, __func__, 204 "Detected aborted journal"); 205 return ERR_PTR(-EROFS); 206 } 207 return jbd2_journal_start(journal, nblocks); 208 } 209 /* 210 * We're not journaling, return the appropriate indication. 211 */ 212 current->journal_info = EXT4_NOJOURNAL_HANDLE; 213 return current->journal_info; 214 } 215 216 /* 217 * The only special thing we need to do here is to make sure that all 218 * jbd2_journal_stop calls result in the superblock being marked dirty, so 219 * that sync() will call the filesystem's write_super callback if 220 * appropriate. 221 */ 222 int __ext4_journal_stop(const char *where, handle_t *handle) 223 { 224 struct super_block *sb; 225 int err; 226 int rc; 227 228 if (!ext4_handle_valid(handle)) { 229 /* 230 * Do this here since we don't call jbd2_journal_stop() in 231 * no-journal mode. 232 */ 233 current->journal_info = NULL; 234 return 0; 235 } 236 sb = handle->h_transaction->t_journal->j_private; 237 err = handle->h_err; 238 rc = jbd2_journal_stop(handle); 239 240 if (!err) 241 err = rc; 242 if (err) 243 __ext4_std_error(sb, where, err); 244 return err; 245 } 246 247 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 248 struct buffer_head *bh, handle_t *handle, int err) 249 { 250 char nbuf[16]; 251 const char *errstr = ext4_decode_error(NULL, err, nbuf); 252 253 BUG_ON(!ext4_handle_valid(handle)); 254 255 if (bh) 256 BUFFER_TRACE(bh, "abort"); 257 258 if (!handle->h_err) 259 handle->h_err = err; 260 261 if (is_handle_aborted(handle)) 262 return; 263 264 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 265 caller, errstr, err_fn); 266 267 jbd2_journal_abort_handle(handle); 268 } 269 270 /* Deal with the reporting of failure conditions on a filesystem such as 271 * inconsistencies detected or read IO failures. 272 * 273 * On ext2, we can store the error state of the filesystem in the 274 * superblock. That is not possible on ext4, because we may have other 275 * write ordering constraints on the superblock which prevent us from 276 * writing it out straight away; and given that the journal is about to 277 * be aborted, we can't rely on the current, or future, transactions to 278 * write out the superblock safely. 279 * 280 * We'll just use the jbd2_journal_abort() error code to record an error in 281 * the journal instead. On recovery, the journal will compain about 282 * that error until we've noted it down and cleared it. 283 */ 284 285 static void ext4_handle_error(struct super_block *sb) 286 { 287 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 288 289 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 290 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 291 292 if (sb->s_flags & MS_RDONLY) 293 return; 294 295 if (!test_opt(sb, ERRORS_CONT)) { 296 journal_t *journal = EXT4_SB(sb)->s_journal; 297 298 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 299 if (journal) 300 jbd2_journal_abort(journal, -EIO); 301 } 302 if (test_opt(sb, ERRORS_RO)) { 303 printk(KERN_CRIT "Remounting filesystem read-only\n"); 304 sb->s_flags |= MS_RDONLY; 305 } 306 ext4_commit_super(sb, es, 1); 307 if (test_opt(sb, ERRORS_PANIC)) 308 panic("EXT4-fs (device %s): panic forced after error\n", 309 sb->s_id); 310 } 311 312 void ext4_error(struct super_block *sb, const char *function, 313 const char *fmt, ...) 314 { 315 va_list args; 316 317 va_start(args, fmt); 318 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 319 vprintk(fmt, args); 320 printk("\n"); 321 va_end(args); 322 323 ext4_handle_error(sb); 324 } 325 326 static const char *ext4_decode_error(struct super_block *sb, int errno, 327 char nbuf[16]) 328 { 329 char *errstr = NULL; 330 331 switch (errno) { 332 case -EIO: 333 errstr = "IO failure"; 334 break; 335 case -ENOMEM: 336 errstr = "Out of memory"; 337 break; 338 case -EROFS: 339 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 340 errstr = "Journal has aborted"; 341 else 342 errstr = "Readonly filesystem"; 343 break; 344 default: 345 /* If the caller passed in an extra buffer for unknown 346 * errors, textualise them now. Else we just return 347 * NULL. */ 348 if (nbuf) { 349 /* Check for truncated error codes... */ 350 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 351 errstr = nbuf; 352 } 353 break; 354 } 355 356 return errstr; 357 } 358 359 /* __ext4_std_error decodes expected errors from journaling functions 360 * automatically and invokes the appropriate error response. */ 361 362 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 363 { 364 char nbuf[16]; 365 const char *errstr; 366 367 /* Special case: if the error is EROFS, and we're not already 368 * inside a transaction, then there's really no point in logging 369 * an error. */ 370 if (errno == -EROFS && journal_current_handle() == NULL && 371 (sb->s_flags & MS_RDONLY)) 372 return; 373 374 errstr = ext4_decode_error(sb, errno, nbuf); 375 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 376 sb->s_id, function, errstr); 377 378 ext4_handle_error(sb); 379 } 380 381 /* 382 * ext4_abort is a much stronger failure handler than ext4_error. The 383 * abort function may be used to deal with unrecoverable failures such 384 * as journal IO errors or ENOMEM at a critical moment in log management. 385 * 386 * We unconditionally force the filesystem into an ABORT|READONLY state, 387 * unless the error response on the fs has been set to panic in which 388 * case we take the easy way out and panic immediately. 389 */ 390 391 void ext4_abort(struct super_block *sb, const char *function, 392 const char *fmt, ...) 393 { 394 va_list args; 395 396 printk(KERN_CRIT "ext4_abort called.\n"); 397 398 va_start(args, fmt); 399 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 400 vprintk(fmt, args); 401 printk("\n"); 402 va_end(args); 403 404 if (test_opt(sb, ERRORS_PANIC)) 405 panic("EXT4-fs panic from previous error\n"); 406 407 if (sb->s_flags & MS_RDONLY) 408 return; 409 410 printk(KERN_CRIT "Remounting filesystem read-only\n"); 411 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 412 sb->s_flags |= MS_RDONLY; 413 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 414 if (EXT4_SB(sb)->s_journal) 415 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 416 } 417 418 void ext4_warning(struct super_block *sb, const char *function, 419 const char *fmt, ...) 420 { 421 va_list args; 422 423 va_start(args, fmt); 424 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 425 sb->s_id, function); 426 vprintk(fmt, args); 427 printk("\n"); 428 va_end(args); 429 } 430 431 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 432 const char *function, const char *fmt, ...) 433 __releases(bitlock) 434 __acquires(bitlock) 435 { 436 va_list args; 437 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 438 439 va_start(args, fmt); 440 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 441 vprintk(fmt, args); 442 printk("\n"); 443 va_end(args); 444 445 if (test_opt(sb, ERRORS_CONT)) { 446 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 447 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 448 ext4_commit_super(sb, es, 0); 449 return; 450 } 451 ext4_unlock_group(sb, grp); 452 ext4_handle_error(sb); 453 /* 454 * We only get here in the ERRORS_RO case; relocking the group 455 * may be dangerous, but nothing bad will happen since the 456 * filesystem will have already been marked read/only and the 457 * journal has been aborted. We return 1 as a hint to callers 458 * who might what to use the return value from 459 * ext4_grp_locked_error() to distinguish beween the 460 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 461 * aggressively from the ext4 function in question, with a 462 * more appropriate error code. 463 */ 464 ext4_lock_group(sb, grp); 465 return; 466 } 467 468 469 void ext4_update_dynamic_rev(struct super_block *sb) 470 { 471 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 472 473 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 474 return; 475 476 ext4_warning(sb, __func__, 477 "updating to rev %d because of new feature flag, " 478 "running e2fsck is recommended", 479 EXT4_DYNAMIC_REV); 480 481 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 482 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 483 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 484 /* leave es->s_feature_*compat flags alone */ 485 /* es->s_uuid will be set by e2fsck if empty */ 486 487 /* 488 * The rest of the superblock fields should be zero, and if not it 489 * means they are likely already in use, so leave them alone. We 490 * can leave it up to e2fsck to clean up any inconsistencies there. 491 */ 492 } 493 494 /* 495 * Open the external journal device 496 */ 497 static struct block_device *ext4_blkdev_get(dev_t dev) 498 { 499 struct block_device *bdev; 500 char b[BDEVNAME_SIZE]; 501 502 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 503 if (IS_ERR(bdev)) 504 goto fail; 505 return bdev; 506 507 fail: 508 printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", 509 __bdevname(dev, b), PTR_ERR(bdev)); 510 return NULL; 511 } 512 513 /* 514 * Release the journal device 515 */ 516 static int ext4_blkdev_put(struct block_device *bdev) 517 { 518 bd_release(bdev); 519 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 520 } 521 522 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 523 { 524 struct block_device *bdev; 525 int ret = -ENODEV; 526 527 bdev = sbi->journal_bdev; 528 if (bdev) { 529 ret = ext4_blkdev_put(bdev); 530 sbi->journal_bdev = NULL; 531 } 532 return ret; 533 } 534 535 static inline struct inode *orphan_list_entry(struct list_head *l) 536 { 537 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 538 } 539 540 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 541 { 542 struct list_head *l; 543 544 printk(KERN_ERR "sb orphan head is %d\n", 545 le32_to_cpu(sbi->s_es->s_last_orphan)); 546 547 printk(KERN_ERR "sb_info orphan list:\n"); 548 list_for_each(l, &sbi->s_orphan) { 549 struct inode *inode = orphan_list_entry(l); 550 printk(KERN_ERR " " 551 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 552 inode->i_sb->s_id, inode->i_ino, inode, 553 inode->i_mode, inode->i_nlink, 554 NEXT_ORPHAN(inode)); 555 } 556 } 557 558 static void ext4_put_super(struct super_block *sb) 559 { 560 struct ext4_sb_info *sbi = EXT4_SB(sb); 561 struct ext4_super_block *es = sbi->s_es; 562 int i, err; 563 564 ext4_mb_release(sb); 565 ext4_ext_release(sb); 566 ext4_xattr_put_super(sb); 567 if (sbi->s_journal) { 568 err = jbd2_journal_destroy(sbi->s_journal); 569 sbi->s_journal = NULL; 570 if (err < 0) 571 ext4_abort(sb, __func__, 572 "Couldn't clean up the journal"); 573 } 574 if (!(sb->s_flags & MS_RDONLY)) { 575 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 576 es->s_state = cpu_to_le16(sbi->s_mount_state); 577 ext4_commit_super(sb, es, 1); 578 } 579 if (sbi->s_proc) { 580 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 581 remove_proc_entry(sb->s_id, ext4_proc_root); 582 } 583 584 for (i = 0; i < sbi->s_gdb_count; i++) 585 brelse(sbi->s_group_desc[i]); 586 kfree(sbi->s_group_desc); 587 kfree(sbi->s_flex_groups); 588 percpu_counter_destroy(&sbi->s_freeblocks_counter); 589 percpu_counter_destroy(&sbi->s_freeinodes_counter); 590 percpu_counter_destroy(&sbi->s_dirs_counter); 591 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 592 brelse(sbi->s_sbh); 593 #ifdef CONFIG_QUOTA 594 for (i = 0; i < MAXQUOTAS; i++) 595 kfree(sbi->s_qf_names[i]); 596 #endif 597 598 /* Debugging code just in case the in-memory inode orphan list 599 * isn't empty. The on-disk one can be non-empty if we've 600 * detected an error and taken the fs readonly, but the 601 * in-memory list had better be clean by this point. */ 602 if (!list_empty(&sbi->s_orphan)) 603 dump_orphan_list(sb, sbi); 604 J_ASSERT(list_empty(&sbi->s_orphan)); 605 606 invalidate_bdev(sb->s_bdev); 607 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 608 /* 609 * Invalidate the journal device's buffers. We don't want them 610 * floating about in memory - the physical journal device may 611 * hotswapped, and it breaks the `ro-after' testing code. 612 */ 613 sync_blockdev(sbi->journal_bdev); 614 invalidate_bdev(sbi->journal_bdev); 615 ext4_blkdev_remove(sbi); 616 } 617 sb->s_fs_info = NULL; 618 kfree(sbi); 619 return; 620 } 621 622 static struct kmem_cache *ext4_inode_cachep; 623 624 /* 625 * Called inside transaction, so use GFP_NOFS 626 */ 627 static struct inode *ext4_alloc_inode(struct super_block *sb) 628 { 629 struct ext4_inode_info *ei; 630 631 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 632 if (!ei) 633 return NULL; 634 #ifdef CONFIG_EXT4_FS_POSIX_ACL 635 ei->i_acl = EXT4_ACL_NOT_CACHED; 636 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 637 #endif 638 ei->vfs_inode.i_version = 1; 639 ei->vfs_inode.i_data.writeback_index = 0; 640 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 641 INIT_LIST_HEAD(&ei->i_prealloc_list); 642 spin_lock_init(&ei->i_prealloc_lock); 643 /* 644 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 645 * therefore it can be null here. Don't check it, just initialize 646 * jinode. 647 */ 648 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 649 ei->i_reserved_data_blocks = 0; 650 ei->i_reserved_meta_blocks = 0; 651 ei->i_allocated_meta_blocks = 0; 652 ei->i_delalloc_reserved_flag = 0; 653 spin_lock_init(&(ei->i_block_reservation_lock)); 654 return &ei->vfs_inode; 655 } 656 657 static void ext4_destroy_inode(struct inode *inode) 658 { 659 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 660 printk("EXT4 Inode %p: orphan list check failed!\n", 661 EXT4_I(inode)); 662 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 663 EXT4_I(inode), sizeof(struct ext4_inode_info), 664 true); 665 dump_stack(); 666 } 667 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 668 } 669 670 static void init_once(void *foo) 671 { 672 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 673 674 INIT_LIST_HEAD(&ei->i_orphan); 675 #ifdef CONFIG_EXT4_FS_XATTR 676 init_rwsem(&ei->xattr_sem); 677 #endif 678 init_rwsem(&ei->i_data_sem); 679 inode_init_once(&ei->vfs_inode); 680 } 681 682 static int init_inodecache(void) 683 { 684 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 685 sizeof(struct ext4_inode_info), 686 0, (SLAB_RECLAIM_ACCOUNT| 687 SLAB_MEM_SPREAD), 688 init_once); 689 if (ext4_inode_cachep == NULL) 690 return -ENOMEM; 691 return 0; 692 } 693 694 static void destroy_inodecache(void) 695 { 696 kmem_cache_destroy(ext4_inode_cachep); 697 } 698 699 static void ext4_clear_inode(struct inode *inode) 700 { 701 #ifdef CONFIG_EXT4_FS_POSIX_ACL 702 if (EXT4_I(inode)->i_acl && 703 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 704 posix_acl_release(EXT4_I(inode)->i_acl); 705 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 706 } 707 if (EXT4_I(inode)->i_default_acl && 708 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 709 posix_acl_release(EXT4_I(inode)->i_default_acl); 710 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 711 } 712 #endif 713 ext4_discard_preallocations(inode); 714 if (EXT4_JOURNAL(inode)) 715 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 716 &EXT4_I(inode)->jinode); 717 } 718 719 static inline void ext4_show_quota_options(struct seq_file *seq, 720 struct super_block *sb) 721 { 722 #if defined(CONFIG_QUOTA) 723 struct ext4_sb_info *sbi = EXT4_SB(sb); 724 725 if (sbi->s_jquota_fmt) 726 seq_printf(seq, ",jqfmt=%s", 727 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 728 729 if (sbi->s_qf_names[USRQUOTA]) 730 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 731 732 if (sbi->s_qf_names[GRPQUOTA]) 733 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 734 735 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 736 seq_puts(seq, ",usrquota"); 737 738 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 739 seq_puts(seq, ",grpquota"); 740 #endif 741 } 742 743 /* 744 * Show an option if 745 * - it's set to a non-default value OR 746 * - if the per-sb default is different from the global default 747 */ 748 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 749 { 750 int def_errors; 751 unsigned long def_mount_opts; 752 struct super_block *sb = vfs->mnt_sb; 753 struct ext4_sb_info *sbi = EXT4_SB(sb); 754 struct ext4_super_block *es = sbi->s_es; 755 756 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 757 def_errors = le16_to_cpu(es->s_errors); 758 759 if (sbi->s_sb_block != 1) 760 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 761 if (test_opt(sb, MINIX_DF)) 762 seq_puts(seq, ",minixdf"); 763 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 764 seq_puts(seq, ",grpid"); 765 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 766 seq_puts(seq, ",nogrpid"); 767 if (sbi->s_resuid != EXT4_DEF_RESUID || 768 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 769 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 770 } 771 if (sbi->s_resgid != EXT4_DEF_RESGID || 772 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 773 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 774 } 775 if (test_opt(sb, ERRORS_RO)) { 776 if (def_errors == EXT4_ERRORS_PANIC || 777 def_errors == EXT4_ERRORS_CONTINUE) { 778 seq_puts(seq, ",errors=remount-ro"); 779 } 780 } 781 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 782 seq_puts(seq, ",errors=continue"); 783 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 784 seq_puts(seq, ",errors=panic"); 785 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 786 seq_puts(seq, ",nouid32"); 787 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 788 seq_puts(seq, ",debug"); 789 if (test_opt(sb, OLDALLOC)) 790 seq_puts(seq, ",oldalloc"); 791 #ifdef CONFIG_EXT4_FS_XATTR 792 if (test_opt(sb, XATTR_USER) && 793 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 794 seq_puts(seq, ",user_xattr"); 795 if (!test_opt(sb, XATTR_USER) && 796 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 797 seq_puts(seq, ",nouser_xattr"); 798 } 799 #endif 800 #ifdef CONFIG_EXT4_FS_POSIX_ACL 801 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 802 seq_puts(seq, ",acl"); 803 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 804 seq_puts(seq, ",noacl"); 805 #endif 806 if (!test_opt(sb, RESERVATION)) 807 seq_puts(seq, ",noreservation"); 808 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 809 seq_printf(seq, ",commit=%u", 810 (unsigned) (sbi->s_commit_interval / HZ)); 811 } 812 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 813 seq_printf(seq, ",min_batch_time=%u", 814 (unsigned) sbi->s_min_batch_time); 815 } 816 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 817 seq_printf(seq, ",max_batch_time=%u", 818 (unsigned) sbi->s_min_batch_time); 819 } 820 821 /* 822 * We're changing the default of barrier mount option, so 823 * let's always display its mount state so it's clear what its 824 * status is. 825 */ 826 seq_puts(seq, ",barrier="); 827 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 828 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 829 seq_puts(seq, ",journal_async_commit"); 830 if (test_opt(sb, NOBH)) 831 seq_puts(seq, ",nobh"); 832 if (test_opt(sb, I_VERSION)) 833 seq_puts(seq, ",i_version"); 834 if (!test_opt(sb, DELALLOC)) 835 seq_puts(seq, ",nodelalloc"); 836 837 838 if (sbi->s_stripe) 839 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 840 /* 841 * journal mode get enabled in different ways 842 * So just print the value even if we didn't specify it 843 */ 844 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 845 seq_puts(seq, ",data=journal"); 846 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 847 seq_puts(seq, ",data=ordered"); 848 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 849 seq_puts(seq, ",data=writeback"); 850 851 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 852 seq_printf(seq, ",inode_readahead_blks=%u", 853 sbi->s_inode_readahead_blks); 854 855 if (test_opt(sb, DATA_ERR_ABORT)) 856 seq_puts(seq, ",data_err=abort"); 857 858 ext4_show_quota_options(seq, sb); 859 return 0; 860 } 861 862 863 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 864 u64 ino, u32 generation) 865 { 866 struct inode *inode; 867 868 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 869 return ERR_PTR(-ESTALE); 870 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 871 return ERR_PTR(-ESTALE); 872 873 /* iget isn't really right if the inode is currently unallocated!! 874 * 875 * ext4_read_inode will return a bad_inode if the inode had been 876 * deleted, so we should be safe. 877 * 878 * Currently we don't know the generation for parent directory, so 879 * a generation of 0 means "accept any" 880 */ 881 inode = ext4_iget(sb, ino); 882 if (IS_ERR(inode)) 883 return ERR_CAST(inode); 884 if (generation && inode->i_generation != generation) { 885 iput(inode); 886 return ERR_PTR(-ESTALE); 887 } 888 889 return inode; 890 } 891 892 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 893 int fh_len, int fh_type) 894 { 895 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 896 ext4_nfs_get_inode); 897 } 898 899 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 900 int fh_len, int fh_type) 901 { 902 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 903 ext4_nfs_get_inode); 904 } 905 906 /* 907 * Try to release metadata pages (indirect blocks, directories) which are 908 * mapped via the block device. Since these pages could have journal heads 909 * which would prevent try_to_free_buffers() from freeing them, we must use 910 * jbd2 layer's try_to_free_buffers() function to release them. 911 */ 912 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) 913 { 914 journal_t *journal = EXT4_SB(sb)->s_journal; 915 916 WARN_ON(PageChecked(page)); 917 if (!page_has_buffers(page)) 918 return 0; 919 if (journal) 920 return jbd2_journal_try_to_free_buffers(journal, page, 921 wait & ~__GFP_WAIT); 922 return try_to_free_buffers(page); 923 } 924 925 #ifdef CONFIG_QUOTA 926 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 927 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 928 929 static int ext4_write_dquot(struct dquot *dquot); 930 static int ext4_acquire_dquot(struct dquot *dquot); 931 static int ext4_release_dquot(struct dquot *dquot); 932 static int ext4_mark_dquot_dirty(struct dquot *dquot); 933 static int ext4_write_info(struct super_block *sb, int type); 934 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 935 char *path, int remount); 936 static int ext4_quota_on_mount(struct super_block *sb, int type); 937 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 938 size_t len, loff_t off); 939 static ssize_t ext4_quota_write(struct super_block *sb, int type, 940 const char *data, size_t len, loff_t off); 941 942 static struct dquot_operations ext4_quota_operations = { 943 .initialize = dquot_initialize, 944 .drop = dquot_drop, 945 .alloc_space = dquot_alloc_space, 946 .reserve_space = dquot_reserve_space, 947 .claim_space = dquot_claim_space, 948 .release_rsv = dquot_release_reserved_space, 949 .get_reserved_space = ext4_get_reserved_space, 950 .alloc_inode = dquot_alloc_inode, 951 .free_space = dquot_free_space, 952 .free_inode = dquot_free_inode, 953 .transfer = dquot_transfer, 954 .write_dquot = ext4_write_dquot, 955 .acquire_dquot = ext4_acquire_dquot, 956 .release_dquot = ext4_release_dquot, 957 .mark_dirty = ext4_mark_dquot_dirty, 958 .write_info = ext4_write_info, 959 .alloc_dquot = dquot_alloc, 960 .destroy_dquot = dquot_destroy, 961 }; 962 963 static struct quotactl_ops ext4_qctl_operations = { 964 .quota_on = ext4_quota_on, 965 .quota_off = vfs_quota_off, 966 .quota_sync = vfs_quota_sync, 967 .get_info = vfs_get_dqinfo, 968 .set_info = vfs_set_dqinfo, 969 .get_dqblk = vfs_get_dqblk, 970 .set_dqblk = vfs_set_dqblk 971 }; 972 #endif 973 974 static const struct super_operations ext4_sops = { 975 .alloc_inode = ext4_alloc_inode, 976 .destroy_inode = ext4_destroy_inode, 977 .write_inode = ext4_write_inode, 978 .dirty_inode = ext4_dirty_inode, 979 .delete_inode = ext4_delete_inode, 980 .put_super = ext4_put_super, 981 .write_super = ext4_write_super, 982 .sync_fs = ext4_sync_fs, 983 .freeze_fs = ext4_freeze, 984 .unfreeze_fs = ext4_unfreeze, 985 .statfs = ext4_statfs, 986 .remount_fs = ext4_remount, 987 .clear_inode = ext4_clear_inode, 988 .show_options = ext4_show_options, 989 #ifdef CONFIG_QUOTA 990 .quota_read = ext4_quota_read, 991 .quota_write = ext4_quota_write, 992 #endif 993 .bdev_try_to_free_page = bdev_try_to_free_page, 994 }; 995 996 static const struct export_operations ext4_export_ops = { 997 .fh_to_dentry = ext4_fh_to_dentry, 998 .fh_to_parent = ext4_fh_to_parent, 999 .get_parent = ext4_get_parent, 1000 }; 1001 1002 enum { 1003 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1004 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1005 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1006 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1007 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 1008 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1009 Opt_journal_update, Opt_journal_dev, 1010 Opt_journal_checksum, Opt_journal_async_commit, 1011 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1012 Opt_data_err_abort, Opt_data_err_ignore, 1013 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1014 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1015 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 1016 Opt_grpquota, Opt_i_version, 1017 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1018 Opt_inode_readahead_blks, Opt_journal_ioprio 1019 }; 1020 1021 static const match_table_t tokens = { 1022 {Opt_bsd_df, "bsddf"}, 1023 {Opt_minix_df, "minixdf"}, 1024 {Opt_grpid, "grpid"}, 1025 {Opt_grpid, "bsdgroups"}, 1026 {Opt_nogrpid, "nogrpid"}, 1027 {Opt_nogrpid, "sysvgroups"}, 1028 {Opt_resgid, "resgid=%u"}, 1029 {Opt_resuid, "resuid=%u"}, 1030 {Opt_sb, "sb=%u"}, 1031 {Opt_err_cont, "errors=continue"}, 1032 {Opt_err_panic, "errors=panic"}, 1033 {Opt_err_ro, "errors=remount-ro"}, 1034 {Opt_nouid32, "nouid32"}, 1035 {Opt_debug, "debug"}, 1036 {Opt_oldalloc, "oldalloc"}, 1037 {Opt_orlov, "orlov"}, 1038 {Opt_user_xattr, "user_xattr"}, 1039 {Opt_nouser_xattr, "nouser_xattr"}, 1040 {Opt_acl, "acl"}, 1041 {Opt_noacl, "noacl"}, 1042 {Opt_reservation, "reservation"}, 1043 {Opt_noreservation, "noreservation"}, 1044 {Opt_noload, "noload"}, 1045 {Opt_nobh, "nobh"}, 1046 {Opt_bh, "bh"}, 1047 {Opt_commit, "commit=%u"}, 1048 {Opt_min_batch_time, "min_batch_time=%u"}, 1049 {Opt_max_batch_time, "max_batch_time=%u"}, 1050 {Opt_journal_update, "journal=update"}, 1051 {Opt_journal_dev, "journal_dev=%u"}, 1052 {Opt_journal_checksum, "journal_checksum"}, 1053 {Opt_journal_async_commit, "journal_async_commit"}, 1054 {Opt_abort, "abort"}, 1055 {Opt_data_journal, "data=journal"}, 1056 {Opt_data_ordered, "data=ordered"}, 1057 {Opt_data_writeback, "data=writeback"}, 1058 {Opt_data_err_abort, "data_err=abort"}, 1059 {Opt_data_err_ignore, "data_err=ignore"}, 1060 {Opt_offusrjquota, "usrjquota="}, 1061 {Opt_usrjquota, "usrjquota=%s"}, 1062 {Opt_offgrpjquota, "grpjquota="}, 1063 {Opt_grpjquota, "grpjquota=%s"}, 1064 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1065 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1066 {Opt_grpquota, "grpquota"}, 1067 {Opt_noquota, "noquota"}, 1068 {Opt_quota, "quota"}, 1069 {Opt_usrquota, "usrquota"}, 1070 {Opt_barrier, "barrier=%u"}, 1071 {Opt_i_version, "i_version"}, 1072 {Opt_stripe, "stripe=%u"}, 1073 {Opt_resize, "resize"}, 1074 {Opt_delalloc, "delalloc"}, 1075 {Opt_nodelalloc, "nodelalloc"}, 1076 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1077 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1078 {Opt_err, NULL}, 1079 }; 1080 1081 static ext4_fsblk_t get_sb_block(void **data) 1082 { 1083 ext4_fsblk_t sb_block; 1084 char *options = (char *) *data; 1085 1086 if (!options || strncmp(options, "sb=", 3) != 0) 1087 return 1; /* Default location */ 1088 options += 3; 1089 /*todo: use simple_strtoll with >32bit ext4 */ 1090 sb_block = simple_strtoul(options, &options, 0); 1091 if (*options && *options != ',') { 1092 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1093 (char *) *data); 1094 return 1; 1095 } 1096 if (*options == ',') 1097 options++; 1098 *data = (void *) options; 1099 return sb_block; 1100 } 1101 1102 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1103 1104 static int parse_options(char *options, struct super_block *sb, 1105 unsigned long *journal_devnum, 1106 unsigned int *journal_ioprio, 1107 ext4_fsblk_t *n_blocks_count, int is_remount) 1108 { 1109 struct ext4_sb_info *sbi = EXT4_SB(sb); 1110 char *p; 1111 substring_t args[MAX_OPT_ARGS]; 1112 int data_opt = 0; 1113 int option; 1114 #ifdef CONFIG_QUOTA 1115 int qtype, qfmt; 1116 char *qname; 1117 #endif 1118 1119 if (!options) 1120 return 1; 1121 1122 while ((p = strsep(&options, ",")) != NULL) { 1123 int token; 1124 if (!*p) 1125 continue; 1126 1127 token = match_token(p, tokens, args); 1128 switch (token) { 1129 case Opt_bsd_df: 1130 clear_opt(sbi->s_mount_opt, MINIX_DF); 1131 break; 1132 case Opt_minix_df: 1133 set_opt(sbi->s_mount_opt, MINIX_DF); 1134 break; 1135 case Opt_grpid: 1136 set_opt(sbi->s_mount_opt, GRPID); 1137 break; 1138 case Opt_nogrpid: 1139 clear_opt(sbi->s_mount_opt, GRPID); 1140 break; 1141 case Opt_resuid: 1142 if (match_int(&args[0], &option)) 1143 return 0; 1144 sbi->s_resuid = option; 1145 break; 1146 case Opt_resgid: 1147 if (match_int(&args[0], &option)) 1148 return 0; 1149 sbi->s_resgid = option; 1150 break; 1151 case Opt_sb: 1152 /* handled by get_sb_block() instead of here */ 1153 /* *sb_block = match_int(&args[0]); */ 1154 break; 1155 case Opt_err_panic: 1156 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1157 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1158 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1159 break; 1160 case Opt_err_ro: 1161 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1162 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1163 set_opt(sbi->s_mount_opt, ERRORS_RO); 1164 break; 1165 case Opt_err_cont: 1166 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1167 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1168 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1169 break; 1170 case Opt_nouid32: 1171 set_opt(sbi->s_mount_opt, NO_UID32); 1172 break; 1173 case Opt_debug: 1174 set_opt(sbi->s_mount_opt, DEBUG); 1175 break; 1176 case Opt_oldalloc: 1177 set_opt(sbi->s_mount_opt, OLDALLOC); 1178 break; 1179 case Opt_orlov: 1180 clear_opt(sbi->s_mount_opt, OLDALLOC); 1181 break; 1182 #ifdef CONFIG_EXT4_FS_XATTR 1183 case Opt_user_xattr: 1184 set_opt(sbi->s_mount_opt, XATTR_USER); 1185 break; 1186 case Opt_nouser_xattr: 1187 clear_opt(sbi->s_mount_opt, XATTR_USER); 1188 break; 1189 #else 1190 case Opt_user_xattr: 1191 case Opt_nouser_xattr: 1192 printk(KERN_ERR "EXT4 (no)user_xattr options " 1193 "not supported\n"); 1194 break; 1195 #endif 1196 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1197 case Opt_acl: 1198 set_opt(sbi->s_mount_opt, POSIX_ACL); 1199 break; 1200 case Opt_noacl: 1201 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1202 break; 1203 #else 1204 case Opt_acl: 1205 case Opt_noacl: 1206 printk(KERN_ERR "EXT4 (no)acl options " 1207 "not supported\n"); 1208 break; 1209 #endif 1210 case Opt_reservation: 1211 set_opt(sbi->s_mount_opt, RESERVATION); 1212 break; 1213 case Opt_noreservation: 1214 clear_opt(sbi->s_mount_opt, RESERVATION); 1215 break; 1216 case Opt_journal_update: 1217 /* @@@ FIXME */ 1218 /* Eventually we will want to be able to create 1219 a journal file here. For now, only allow the 1220 user to specify an existing inode to be the 1221 journal file. */ 1222 if (is_remount) { 1223 printk(KERN_ERR "EXT4-fs: cannot specify " 1224 "journal on remount\n"); 1225 return 0; 1226 } 1227 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1228 break; 1229 case Opt_journal_dev: 1230 if (is_remount) { 1231 printk(KERN_ERR "EXT4-fs: cannot specify " 1232 "journal on remount\n"); 1233 return 0; 1234 } 1235 if (match_int(&args[0], &option)) 1236 return 0; 1237 *journal_devnum = option; 1238 break; 1239 case Opt_journal_checksum: 1240 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1241 break; 1242 case Opt_journal_async_commit: 1243 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1244 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1245 break; 1246 case Opt_noload: 1247 set_opt(sbi->s_mount_opt, NOLOAD); 1248 break; 1249 case Opt_commit: 1250 if (match_int(&args[0], &option)) 1251 return 0; 1252 if (option < 0) 1253 return 0; 1254 if (option == 0) 1255 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1256 sbi->s_commit_interval = HZ * option; 1257 break; 1258 case Opt_max_batch_time: 1259 if (match_int(&args[0], &option)) 1260 return 0; 1261 if (option < 0) 1262 return 0; 1263 if (option == 0) 1264 option = EXT4_DEF_MAX_BATCH_TIME; 1265 sbi->s_max_batch_time = option; 1266 break; 1267 case Opt_min_batch_time: 1268 if (match_int(&args[0], &option)) 1269 return 0; 1270 if (option < 0) 1271 return 0; 1272 sbi->s_min_batch_time = option; 1273 break; 1274 case Opt_data_journal: 1275 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1276 goto datacheck; 1277 case Opt_data_ordered: 1278 data_opt = EXT4_MOUNT_ORDERED_DATA; 1279 goto datacheck; 1280 case Opt_data_writeback: 1281 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1282 datacheck: 1283 if (is_remount) { 1284 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1285 != data_opt) { 1286 printk(KERN_ERR 1287 "EXT4-fs: cannot change data " 1288 "mode on remount\n"); 1289 return 0; 1290 } 1291 } else { 1292 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1293 sbi->s_mount_opt |= data_opt; 1294 } 1295 break; 1296 case Opt_data_err_abort: 1297 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1298 break; 1299 case Opt_data_err_ignore: 1300 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1301 break; 1302 #ifdef CONFIG_QUOTA 1303 case Opt_usrjquota: 1304 qtype = USRQUOTA; 1305 goto set_qf_name; 1306 case Opt_grpjquota: 1307 qtype = GRPQUOTA; 1308 set_qf_name: 1309 if (sb_any_quota_loaded(sb) && 1310 !sbi->s_qf_names[qtype]) { 1311 printk(KERN_ERR 1312 "EXT4-fs: Cannot change journaled " 1313 "quota options when quota turned on.\n"); 1314 return 0; 1315 } 1316 qname = match_strdup(&args[0]); 1317 if (!qname) { 1318 printk(KERN_ERR 1319 "EXT4-fs: not enough memory for " 1320 "storing quotafile name.\n"); 1321 return 0; 1322 } 1323 if (sbi->s_qf_names[qtype] && 1324 strcmp(sbi->s_qf_names[qtype], qname)) { 1325 printk(KERN_ERR 1326 "EXT4-fs: %s quota file already " 1327 "specified.\n", QTYPE2NAME(qtype)); 1328 kfree(qname); 1329 return 0; 1330 } 1331 sbi->s_qf_names[qtype] = qname; 1332 if (strchr(sbi->s_qf_names[qtype], '/')) { 1333 printk(KERN_ERR 1334 "EXT4-fs: quotafile must be on " 1335 "filesystem root.\n"); 1336 kfree(sbi->s_qf_names[qtype]); 1337 sbi->s_qf_names[qtype] = NULL; 1338 return 0; 1339 } 1340 set_opt(sbi->s_mount_opt, QUOTA); 1341 break; 1342 case Opt_offusrjquota: 1343 qtype = USRQUOTA; 1344 goto clear_qf_name; 1345 case Opt_offgrpjquota: 1346 qtype = GRPQUOTA; 1347 clear_qf_name: 1348 if (sb_any_quota_loaded(sb) && 1349 sbi->s_qf_names[qtype]) { 1350 printk(KERN_ERR "EXT4-fs: Cannot change " 1351 "journaled quota options when " 1352 "quota turned on.\n"); 1353 return 0; 1354 } 1355 /* 1356 * The space will be released later when all options 1357 * are confirmed to be correct 1358 */ 1359 sbi->s_qf_names[qtype] = NULL; 1360 break; 1361 case Opt_jqfmt_vfsold: 1362 qfmt = QFMT_VFS_OLD; 1363 goto set_qf_format; 1364 case Opt_jqfmt_vfsv0: 1365 qfmt = QFMT_VFS_V0; 1366 set_qf_format: 1367 if (sb_any_quota_loaded(sb) && 1368 sbi->s_jquota_fmt != qfmt) { 1369 printk(KERN_ERR "EXT4-fs: Cannot change " 1370 "journaled quota options when " 1371 "quota turned on.\n"); 1372 return 0; 1373 } 1374 sbi->s_jquota_fmt = qfmt; 1375 break; 1376 case Opt_quota: 1377 case Opt_usrquota: 1378 set_opt(sbi->s_mount_opt, QUOTA); 1379 set_opt(sbi->s_mount_opt, USRQUOTA); 1380 break; 1381 case Opt_grpquota: 1382 set_opt(sbi->s_mount_opt, QUOTA); 1383 set_opt(sbi->s_mount_opt, GRPQUOTA); 1384 break; 1385 case Opt_noquota: 1386 if (sb_any_quota_loaded(sb)) { 1387 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1388 "options when quota turned on.\n"); 1389 return 0; 1390 } 1391 clear_opt(sbi->s_mount_opt, QUOTA); 1392 clear_opt(sbi->s_mount_opt, USRQUOTA); 1393 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1394 break; 1395 #else 1396 case Opt_quota: 1397 case Opt_usrquota: 1398 case Opt_grpquota: 1399 printk(KERN_ERR 1400 "EXT4-fs: quota options not supported.\n"); 1401 break; 1402 case Opt_usrjquota: 1403 case Opt_grpjquota: 1404 case Opt_offusrjquota: 1405 case Opt_offgrpjquota: 1406 case Opt_jqfmt_vfsold: 1407 case Opt_jqfmt_vfsv0: 1408 printk(KERN_ERR 1409 "EXT4-fs: journaled quota options not " 1410 "supported.\n"); 1411 break; 1412 case Opt_noquota: 1413 break; 1414 #endif 1415 case Opt_abort: 1416 set_opt(sbi->s_mount_opt, ABORT); 1417 break; 1418 case Opt_barrier: 1419 if (match_int(&args[0], &option)) 1420 return 0; 1421 if (option) 1422 set_opt(sbi->s_mount_opt, BARRIER); 1423 else 1424 clear_opt(sbi->s_mount_opt, BARRIER); 1425 break; 1426 case Opt_ignore: 1427 break; 1428 case Opt_resize: 1429 if (!is_remount) { 1430 printk("EXT4-fs: resize option only available " 1431 "for remount\n"); 1432 return 0; 1433 } 1434 if (match_int(&args[0], &option) != 0) 1435 return 0; 1436 *n_blocks_count = option; 1437 break; 1438 case Opt_nobh: 1439 set_opt(sbi->s_mount_opt, NOBH); 1440 break; 1441 case Opt_bh: 1442 clear_opt(sbi->s_mount_opt, NOBH); 1443 break; 1444 case Opt_i_version: 1445 set_opt(sbi->s_mount_opt, I_VERSION); 1446 sb->s_flags |= MS_I_VERSION; 1447 break; 1448 case Opt_nodelalloc: 1449 clear_opt(sbi->s_mount_opt, DELALLOC); 1450 break; 1451 case Opt_stripe: 1452 if (match_int(&args[0], &option)) 1453 return 0; 1454 if (option < 0) 1455 return 0; 1456 sbi->s_stripe = option; 1457 break; 1458 case Opt_delalloc: 1459 set_opt(sbi->s_mount_opt, DELALLOC); 1460 break; 1461 case Opt_inode_readahead_blks: 1462 if (match_int(&args[0], &option)) 1463 return 0; 1464 if (option < 0 || option > (1 << 30)) 1465 return 0; 1466 sbi->s_inode_readahead_blks = option; 1467 break; 1468 case Opt_journal_ioprio: 1469 if (match_int(&args[0], &option)) 1470 return 0; 1471 if (option < 0 || option > 7) 1472 break; 1473 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1474 option); 1475 break; 1476 default: 1477 printk(KERN_ERR 1478 "EXT4-fs: Unrecognized mount option \"%s\" " 1479 "or missing value\n", p); 1480 return 0; 1481 } 1482 } 1483 #ifdef CONFIG_QUOTA 1484 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1485 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1486 sbi->s_qf_names[USRQUOTA]) 1487 clear_opt(sbi->s_mount_opt, USRQUOTA); 1488 1489 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1490 sbi->s_qf_names[GRPQUOTA]) 1491 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1492 1493 if ((sbi->s_qf_names[USRQUOTA] && 1494 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1495 (sbi->s_qf_names[GRPQUOTA] && 1496 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1497 printk(KERN_ERR "EXT4-fs: old and new quota " 1498 "format mixing.\n"); 1499 return 0; 1500 } 1501 1502 if (!sbi->s_jquota_fmt) { 1503 printk(KERN_ERR "EXT4-fs: journaled quota format " 1504 "not specified.\n"); 1505 return 0; 1506 } 1507 } else { 1508 if (sbi->s_jquota_fmt) { 1509 printk(KERN_ERR "EXT4-fs: journaled quota format " 1510 "specified with no journaling " 1511 "enabled.\n"); 1512 return 0; 1513 } 1514 } 1515 #endif 1516 return 1; 1517 } 1518 1519 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1520 int read_only) 1521 { 1522 struct ext4_sb_info *sbi = EXT4_SB(sb); 1523 int res = 0; 1524 1525 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1526 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1527 "forcing read-only mode\n"); 1528 res = MS_RDONLY; 1529 } 1530 if (read_only) 1531 return res; 1532 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1533 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1534 "running e2fsck is recommended\n"); 1535 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1536 printk(KERN_WARNING 1537 "EXT4-fs warning: mounting fs with errors, " 1538 "running e2fsck is recommended\n"); 1539 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1540 le16_to_cpu(es->s_mnt_count) >= 1541 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1542 printk(KERN_WARNING 1543 "EXT4-fs warning: maximal mount count reached, " 1544 "running e2fsck is recommended\n"); 1545 else if (le32_to_cpu(es->s_checkinterval) && 1546 (le32_to_cpu(es->s_lastcheck) + 1547 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1548 printk(KERN_WARNING 1549 "EXT4-fs warning: checktime reached, " 1550 "running e2fsck is recommended\n"); 1551 if (!sbi->s_journal) 1552 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1553 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1554 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1555 le16_add_cpu(&es->s_mnt_count, 1); 1556 es->s_mtime = cpu_to_le32(get_seconds()); 1557 ext4_update_dynamic_rev(sb); 1558 if (sbi->s_journal) 1559 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1560 1561 ext4_commit_super(sb, es, 1); 1562 if (test_opt(sb, DEBUG)) 1563 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1564 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1565 sb->s_blocksize, 1566 sbi->s_groups_count, 1567 EXT4_BLOCKS_PER_GROUP(sb), 1568 EXT4_INODES_PER_GROUP(sb), 1569 sbi->s_mount_opt); 1570 1571 if (EXT4_SB(sb)->s_journal) { 1572 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1573 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1574 "external", EXT4_SB(sb)->s_journal->j_devname); 1575 } else { 1576 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); 1577 } 1578 return res; 1579 } 1580 1581 static int ext4_fill_flex_info(struct super_block *sb) 1582 { 1583 struct ext4_sb_info *sbi = EXT4_SB(sb); 1584 struct ext4_group_desc *gdp = NULL; 1585 struct buffer_head *bh; 1586 ext4_group_t flex_group_count; 1587 ext4_group_t flex_group; 1588 int groups_per_flex = 0; 1589 int i; 1590 1591 if (!sbi->s_es->s_log_groups_per_flex) { 1592 sbi->s_log_groups_per_flex = 0; 1593 return 1; 1594 } 1595 1596 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1597 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1598 1599 /* We allocate both existing and potentially added groups */ 1600 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1601 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1602 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1603 sbi->s_flex_groups = kzalloc(flex_group_count * 1604 sizeof(struct flex_groups), GFP_KERNEL); 1605 if (sbi->s_flex_groups == NULL) { 1606 printk(KERN_ERR "EXT4-fs: not enough memory for " 1607 "%u flex groups\n", flex_group_count); 1608 goto failed; 1609 } 1610 1611 for (i = 0; i < sbi->s_groups_count; i++) { 1612 gdp = ext4_get_group_desc(sb, i, &bh); 1613 1614 flex_group = ext4_flex_group(sbi, i); 1615 sbi->s_flex_groups[flex_group].free_inodes += 1616 ext4_free_inodes_count(sb, gdp); 1617 sbi->s_flex_groups[flex_group].free_blocks += 1618 ext4_free_blks_count(sb, gdp); 1619 } 1620 1621 return 1; 1622 failed: 1623 return 0; 1624 } 1625 1626 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1627 struct ext4_group_desc *gdp) 1628 { 1629 __u16 crc = 0; 1630 1631 if (sbi->s_es->s_feature_ro_compat & 1632 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1633 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1634 __le32 le_group = cpu_to_le32(block_group); 1635 1636 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1637 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1638 crc = crc16(crc, (__u8 *)gdp, offset); 1639 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1640 /* for checksum of struct ext4_group_desc do the rest...*/ 1641 if ((sbi->s_es->s_feature_incompat & 1642 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1643 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1644 crc = crc16(crc, (__u8 *)gdp + offset, 1645 le16_to_cpu(sbi->s_es->s_desc_size) - 1646 offset); 1647 } 1648 1649 return cpu_to_le16(crc); 1650 } 1651 1652 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1653 struct ext4_group_desc *gdp) 1654 { 1655 if ((sbi->s_es->s_feature_ro_compat & 1656 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1657 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1658 return 0; 1659 1660 return 1; 1661 } 1662 1663 /* Called at mount-time, super-block is locked */ 1664 static int ext4_check_descriptors(struct super_block *sb) 1665 { 1666 struct ext4_sb_info *sbi = EXT4_SB(sb); 1667 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1668 ext4_fsblk_t last_block; 1669 ext4_fsblk_t block_bitmap; 1670 ext4_fsblk_t inode_bitmap; 1671 ext4_fsblk_t inode_table; 1672 int flexbg_flag = 0; 1673 ext4_group_t i; 1674 1675 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1676 flexbg_flag = 1; 1677 1678 ext4_debug("Checking group descriptors"); 1679 1680 for (i = 0; i < sbi->s_groups_count; i++) { 1681 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1682 1683 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1684 last_block = ext4_blocks_count(sbi->s_es) - 1; 1685 else 1686 last_block = first_block + 1687 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1688 1689 block_bitmap = ext4_block_bitmap(sb, gdp); 1690 if (block_bitmap < first_block || block_bitmap > last_block) { 1691 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1692 "Block bitmap for group %u not in group " 1693 "(block %llu)!\n", i, block_bitmap); 1694 return 0; 1695 } 1696 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1697 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1698 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1699 "Inode bitmap for group %u not in group " 1700 "(block %llu)!\n", i, inode_bitmap); 1701 return 0; 1702 } 1703 inode_table = ext4_inode_table(sb, gdp); 1704 if (inode_table < first_block || 1705 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1706 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1707 "Inode table for group %u not in group " 1708 "(block %llu)!\n", i, inode_table); 1709 return 0; 1710 } 1711 spin_lock(sb_bgl_lock(sbi, i)); 1712 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1713 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1714 "Checksum for group %u failed (%u!=%u)\n", 1715 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1716 gdp)), le16_to_cpu(gdp->bg_checksum)); 1717 if (!(sb->s_flags & MS_RDONLY)) { 1718 spin_unlock(sb_bgl_lock(sbi, i)); 1719 return 0; 1720 } 1721 } 1722 spin_unlock(sb_bgl_lock(sbi, i)); 1723 if (!flexbg_flag) 1724 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1725 } 1726 1727 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1728 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1729 return 1; 1730 } 1731 1732 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1733 * the superblock) which were deleted from all directories, but held open by 1734 * a process at the time of a crash. We walk the list and try to delete these 1735 * inodes at recovery time (only with a read-write filesystem). 1736 * 1737 * In order to keep the orphan inode chain consistent during traversal (in 1738 * case of crash during recovery), we link each inode into the superblock 1739 * orphan list_head and handle it the same way as an inode deletion during 1740 * normal operation (which journals the operations for us). 1741 * 1742 * We only do an iget() and an iput() on each inode, which is very safe if we 1743 * accidentally point at an in-use or already deleted inode. The worst that 1744 * can happen in this case is that we get a "bit already cleared" message from 1745 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1746 * e2fsck was run on this filesystem, and it must have already done the orphan 1747 * inode cleanup for us, so we can safely abort without any further action. 1748 */ 1749 static void ext4_orphan_cleanup(struct super_block *sb, 1750 struct ext4_super_block *es) 1751 { 1752 unsigned int s_flags = sb->s_flags; 1753 int nr_orphans = 0, nr_truncates = 0; 1754 #ifdef CONFIG_QUOTA 1755 int i; 1756 #endif 1757 if (!es->s_last_orphan) { 1758 jbd_debug(4, "no orphan inodes to clean up\n"); 1759 return; 1760 } 1761 1762 if (bdev_read_only(sb->s_bdev)) { 1763 printk(KERN_ERR "EXT4-fs: write access " 1764 "unavailable, skipping orphan cleanup.\n"); 1765 return; 1766 } 1767 1768 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1769 if (es->s_last_orphan) 1770 jbd_debug(1, "Errors on filesystem, " 1771 "clearing orphan list.\n"); 1772 es->s_last_orphan = 0; 1773 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1774 return; 1775 } 1776 1777 if (s_flags & MS_RDONLY) { 1778 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1779 sb->s_id); 1780 sb->s_flags &= ~MS_RDONLY; 1781 } 1782 #ifdef CONFIG_QUOTA 1783 /* Needed for iput() to work correctly and not trash data */ 1784 sb->s_flags |= MS_ACTIVE; 1785 /* Turn on quotas so that they are updated correctly */ 1786 for (i = 0; i < MAXQUOTAS; i++) { 1787 if (EXT4_SB(sb)->s_qf_names[i]) { 1788 int ret = ext4_quota_on_mount(sb, i); 1789 if (ret < 0) 1790 printk(KERN_ERR 1791 "EXT4-fs: Cannot turn on journaled " 1792 "quota: error %d\n", ret); 1793 } 1794 } 1795 #endif 1796 1797 while (es->s_last_orphan) { 1798 struct inode *inode; 1799 1800 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1801 if (IS_ERR(inode)) { 1802 es->s_last_orphan = 0; 1803 break; 1804 } 1805 1806 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1807 vfs_dq_init(inode); 1808 if (inode->i_nlink) { 1809 printk(KERN_DEBUG 1810 "%s: truncating inode %lu to %lld bytes\n", 1811 __func__, inode->i_ino, inode->i_size); 1812 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1813 inode->i_ino, inode->i_size); 1814 ext4_truncate(inode); 1815 nr_truncates++; 1816 } else { 1817 printk(KERN_DEBUG 1818 "%s: deleting unreferenced inode %lu\n", 1819 __func__, inode->i_ino); 1820 jbd_debug(2, "deleting unreferenced inode %lu\n", 1821 inode->i_ino); 1822 nr_orphans++; 1823 } 1824 iput(inode); /* The delete magic happens here! */ 1825 } 1826 1827 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1828 1829 if (nr_orphans) 1830 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1831 sb->s_id, PLURAL(nr_orphans)); 1832 if (nr_truncates) 1833 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1834 sb->s_id, PLURAL(nr_truncates)); 1835 #ifdef CONFIG_QUOTA 1836 /* Turn quotas off */ 1837 for (i = 0; i < MAXQUOTAS; i++) { 1838 if (sb_dqopt(sb)->files[i]) 1839 vfs_quota_off(sb, i, 0); 1840 } 1841 #endif 1842 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1843 } 1844 /* 1845 * Maximal extent format file size. 1846 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1847 * extent format containers, within a sector_t, and within i_blocks 1848 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 1849 * so that won't be a limiting factor. 1850 * 1851 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1852 */ 1853 static loff_t ext4_max_size(int blkbits, int has_huge_files) 1854 { 1855 loff_t res; 1856 loff_t upper_limit = MAX_LFS_FILESIZE; 1857 1858 /* small i_blocks in vfs inode? */ 1859 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1860 /* 1861 * CONFIG_LBD is not enabled implies the inode 1862 * i_block represent total blocks in 512 bytes 1863 * 32 == size of vfs inode i_blocks * 8 1864 */ 1865 upper_limit = (1LL << 32) - 1; 1866 1867 /* total blocks in file system block size */ 1868 upper_limit >>= (blkbits - 9); 1869 upper_limit <<= blkbits; 1870 } 1871 1872 /* 32-bit extent-start container, ee_block */ 1873 res = 1LL << 32; 1874 res <<= blkbits; 1875 res -= 1; 1876 1877 /* Sanity check against vm- & vfs- imposed limits */ 1878 if (res > upper_limit) 1879 res = upper_limit; 1880 1881 return res; 1882 } 1883 1884 /* 1885 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 1886 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1887 * We need to be 1 filesystem block less than the 2^48 sector limit. 1888 */ 1889 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 1890 { 1891 loff_t res = EXT4_NDIR_BLOCKS; 1892 int meta_blocks; 1893 loff_t upper_limit; 1894 /* This is calculated to be the largest file size for a 1895 * dense, bitmapped file such that the total number of 1896 * sectors in the file, including data and all indirect blocks, 1897 * does not exceed 2^48 -1 1898 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1899 * total number of 512 bytes blocks of the file 1900 */ 1901 1902 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1903 /* 1904 * !has_huge_files or CONFIG_LBD is not enabled 1905 * implies the inode i_block represent total blocks in 1906 * 512 bytes 32 == size of vfs inode i_blocks * 8 1907 */ 1908 upper_limit = (1LL << 32) - 1; 1909 1910 /* total blocks in file system block size */ 1911 upper_limit >>= (bits - 9); 1912 1913 } else { 1914 /* 1915 * We use 48 bit ext4_inode i_blocks 1916 * With EXT4_HUGE_FILE_FL set the i_blocks 1917 * represent total number of blocks in 1918 * file system block size 1919 */ 1920 upper_limit = (1LL << 48) - 1; 1921 1922 } 1923 1924 /* indirect blocks */ 1925 meta_blocks = 1; 1926 /* double indirect blocks */ 1927 meta_blocks += 1 + (1LL << (bits-2)); 1928 /* tripple indirect blocks */ 1929 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1930 1931 upper_limit -= meta_blocks; 1932 upper_limit <<= bits; 1933 1934 res += 1LL << (bits-2); 1935 res += 1LL << (2*(bits-2)); 1936 res += 1LL << (3*(bits-2)); 1937 res <<= bits; 1938 if (res > upper_limit) 1939 res = upper_limit; 1940 1941 if (res > MAX_LFS_FILESIZE) 1942 res = MAX_LFS_FILESIZE; 1943 1944 return res; 1945 } 1946 1947 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 1948 ext4_fsblk_t logical_sb_block, int nr) 1949 { 1950 struct ext4_sb_info *sbi = EXT4_SB(sb); 1951 ext4_group_t bg, first_meta_bg; 1952 int has_super = 0; 1953 1954 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1955 1956 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 1957 nr < first_meta_bg) 1958 return logical_sb_block + nr + 1; 1959 bg = sbi->s_desc_per_block * nr; 1960 if (ext4_bg_has_super(sb, bg)) 1961 has_super = 1; 1962 return (has_super + ext4_group_first_block_no(sb, bg)); 1963 } 1964 1965 /** 1966 * ext4_get_stripe_size: Get the stripe size. 1967 * @sbi: In memory super block info 1968 * 1969 * If we have specified it via mount option, then 1970 * use the mount option value. If the value specified at mount time is 1971 * greater than the blocks per group use the super block value. 1972 * If the super block value is greater than blocks per group return 0. 1973 * Allocator needs it be less than blocks per group. 1974 * 1975 */ 1976 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 1977 { 1978 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 1979 unsigned long stripe_width = 1980 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 1981 1982 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 1983 return sbi->s_stripe; 1984 1985 if (stripe_width <= sbi->s_blocks_per_group) 1986 return stripe_width; 1987 1988 if (stride <= sbi->s_blocks_per_group) 1989 return stride; 1990 1991 return 0; 1992 } 1993 1994 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 1995 __releases(kernel_lock) 1996 __acquires(kernel_lock) 1997 1998 { 1999 struct buffer_head *bh; 2000 struct ext4_super_block *es = NULL; 2001 struct ext4_sb_info *sbi; 2002 ext4_fsblk_t block; 2003 ext4_fsblk_t sb_block = get_sb_block(&data); 2004 ext4_fsblk_t logical_sb_block; 2005 unsigned long offset = 0; 2006 unsigned long journal_devnum = 0; 2007 unsigned long def_mount_opts; 2008 struct inode *root; 2009 char *cp; 2010 const char *descr; 2011 int ret = -EINVAL; 2012 int blocksize; 2013 unsigned int db_count; 2014 unsigned int i; 2015 int needs_recovery, has_huge_files; 2016 int features; 2017 __u64 blocks_count; 2018 int err; 2019 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2020 2021 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2022 if (!sbi) 2023 return -ENOMEM; 2024 sb->s_fs_info = sbi; 2025 sbi->s_mount_opt = 0; 2026 sbi->s_resuid = EXT4_DEF_RESUID; 2027 sbi->s_resgid = EXT4_DEF_RESGID; 2028 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2029 sbi->s_sb_block = sb_block; 2030 2031 unlock_kernel(); 2032 2033 /* Cleanup superblock name */ 2034 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2035 *cp = '!'; 2036 2037 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2038 if (!blocksize) { 2039 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 2040 goto out_fail; 2041 } 2042 2043 /* 2044 * The ext4 superblock will not be buffer aligned for other than 1kB 2045 * block sizes. We need to calculate the offset from buffer start. 2046 */ 2047 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2048 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2049 offset = do_div(logical_sb_block, blocksize); 2050 } else { 2051 logical_sb_block = sb_block; 2052 } 2053 2054 if (!(bh = sb_bread(sb, logical_sb_block))) { 2055 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 2056 goto out_fail; 2057 } 2058 /* 2059 * Note: s_es must be initialized as soon as possible because 2060 * some ext4 macro-instructions depend on its value 2061 */ 2062 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2063 sbi->s_es = es; 2064 sb->s_magic = le16_to_cpu(es->s_magic); 2065 if (sb->s_magic != EXT4_SUPER_MAGIC) 2066 goto cantfind_ext4; 2067 2068 /* Set defaults before we parse the mount options */ 2069 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2070 if (def_mount_opts & EXT4_DEFM_DEBUG) 2071 set_opt(sbi->s_mount_opt, DEBUG); 2072 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 2073 set_opt(sbi->s_mount_opt, GRPID); 2074 if (def_mount_opts & EXT4_DEFM_UID16) 2075 set_opt(sbi->s_mount_opt, NO_UID32); 2076 #ifdef CONFIG_EXT4_FS_XATTR 2077 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2078 set_opt(sbi->s_mount_opt, XATTR_USER); 2079 #endif 2080 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2081 if (def_mount_opts & EXT4_DEFM_ACL) 2082 set_opt(sbi->s_mount_opt, POSIX_ACL); 2083 #endif 2084 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2085 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 2086 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2087 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 2088 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2089 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 2090 2091 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2092 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2093 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2094 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2095 else 2096 set_opt(sbi->s_mount_opt, ERRORS_RO); 2097 2098 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2099 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2100 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2101 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2102 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2103 2104 set_opt(sbi->s_mount_opt, RESERVATION); 2105 set_opt(sbi->s_mount_opt, BARRIER); 2106 2107 /* 2108 * enable delayed allocation by default 2109 * Use -o nodelalloc to turn it off 2110 */ 2111 set_opt(sbi->s_mount_opt, DELALLOC); 2112 2113 2114 if (!parse_options((char *) data, sb, &journal_devnum, 2115 &journal_ioprio, NULL, 0)) 2116 goto failed_mount; 2117 2118 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2119 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2120 2121 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2122 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2123 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2124 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2125 printk(KERN_WARNING 2126 "EXT4-fs warning: feature flags set on rev 0 fs, " 2127 "running e2fsck is recommended\n"); 2128 2129 /* 2130 * Check feature flags regardless of the revision level, since we 2131 * previously didn't change the revision level when setting the flags, 2132 * so there is a chance incompat flags are set on a rev 0 filesystem. 2133 */ 2134 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2135 if (features) { 2136 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2137 "unsupported optional features (%x).\n", sb->s_id, 2138 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2139 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2140 goto failed_mount; 2141 } 2142 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2143 if (!(sb->s_flags & MS_RDONLY) && features) { 2144 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2145 "unsupported optional features (%x).\n", sb->s_id, 2146 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2147 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2148 goto failed_mount; 2149 } 2150 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2151 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2152 if (has_huge_files) { 2153 /* 2154 * Large file size enabled file system can only be 2155 * mount if kernel is build with CONFIG_LBD 2156 */ 2157 if (sizeof(root->i_blocks) < sizeof(u64) && 2158 !(sb->s_flags & MS_RDONLY)) { 2159 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2160 "files cannot be mounted read-write " 2161 "without CONFIG_LBD.\n", sb->s_id); 2162 goto failed_mount; 2163 } 2164 } 2165 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2166 2167 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2168 blocksize > EXT4_MAX_BLOCK_SIZE) { 2169 printk(KERN_ERR 2170 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2171 blocksize, sb->s_id); 2172 goto failed_mount; 2173 } 2174 2175 if (sb->s_blocksize != blocksize) { 2176 2177 /* Validate the filesystem blocksize */ 2178 if (!sb_set_blocksize(sb, blocksize)) { 2179 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2180 blocksize); 2181 goto failed_mount; 2182 } 2183 2184 brelse(bh); 2185 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2186 offset = do_div(logical_sb_block, blocksize); 2187 bh = sb_bread(sb, logical_sb_block); 2188 if (!bh) { 2189 printk(KERN_ERR 2190 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2191 goto failed_mount; 2192 } 2193 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2194 sbi->s_es = es; 2195 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2196 printk(KERN_ERR 2197 "EXT4-fs: Magic mismatch, very weird !\n"); 2198 goto failed_mount; 2199 } 2200 } 2201 2202 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2203 has_huge_files); 2204 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2205 2206 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2207 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2208 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2209 } else { 2210 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2211 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2212 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2213 (!is_power_of_2(sbi->s_inode_size)) || 2214 (sbi->s_inode_size > blocksize)) { 2215 printk(KERN_ERR 2216 "EXT4-fs: unsupported inode size: %d\n", 2217 sbi->s_inode_size); 2218 goto failed_mount; 2219 } 2220 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2221 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2222 } 2223 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2224 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2225 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2226 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2227 !is_power_of_2(sbi->s_desc_size)) { 2228 printk(KERN_ERR 2229 "EXT4-fs: unsupported descriptor size %lu\n", 2230 sbi->s_desc_size); 2231 goto failed_mount; 2232 } 2233 } else 2234 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2235 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2236 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2237 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2238 goto cantfind_ext4; 2239 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2240 if (sbi->s_inodes_per_block == 0) 2241 goto cantfind_ext4; 2242 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2243 sbi->s_inodes_per_block; 2244 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2245 sbi->s_sbh = bh; 2246 sbi->s_mount_state = le16_to_cpu(es->s_state); 2247 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2248 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2249 for (i = 0; i < 4; i++) 2250 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2251 sbi->s_def_hash_version = es->s_def_hash_version; 2252 i = le32_to_cpu(es->s_flags); 2253 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2254 sbi->s_hash_unsigned = 3; 2255 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2256 #ifdef __CHAR_UNSIGNED__ 2257 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2258 sbi->s_hash_unsigned = 3; 2259 #else 2260 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2261 #endif 2262 sb->s_dirt = 1; 2263 } 2264 2265 if (sbi->s_blocks_per_group > blocksize * 8) { 2266 printk(KERN_ERR 2267 "EXT4-fs: #blocks per group too big: %lu\n", 2268 sbi->s_blocks_per_group); 2269 goto failed_mount; 2270 } 2271 if (sbi->s_inodes_per_group > blocksize * 8) { 2272 printk(KERN_ERR 2273 "EXT4-fs: #inodes per group too big: %lu\n", 2274 sbi->s_inodes_per_group); 2275 goto failed_mount; 2276 } 2277 2278 if (ext4_blocks_count(es) > 2279 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2280 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2281 " too large to mount safely\n", sb->s_id); 2282 if (sizeof(sector_t) < 8) 2283 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2284 "enabled\n"); 2285 goto failed_mount; 2286 } 2287 2288 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2289 goto cantfind_ext4; 2290 2291 /* 2292 * It makes no sense for the first data block to be beyond the end 2293 * of the filesystem. 2294 */ 2295 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2296 printk(KERN_WARNING "EXT4-fs: bad geometry: first data" 2297 "block %u is beyond end of filesystem (%llu)\n", 2298 le32_to_cpu(es->s_first_data_block), 2299 ext4_blocks_count(es)); 2300 goto failed_mount; 2301 } 2302 blocks_count = (ext4_blocks_count(es) - 2303 le32_to_cpu(es->s_first_data_block) + 2304 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2305 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2306 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2307 printk(KERN_WARNING "EXT4-fs: groups count too large: %u " 2308 "(block count %llu, first data block %u, " 2309 "blocks per group %lu)\n", sbi->s_groups_count, 2310 ext4_blocks_count(es), 2311 le32_to_cpu(es->s_first_data_block), 2312 EXT4_BLOCKS_PER_GROUP(sb)); 2313 goto failed_mount; 2314 } 2315 sbi->s_groups_count = blocks_count; 2316 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2317 EXT4_DESC_PER_BLOCK(sb); 2318 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2319 GFP_KERNEL); 2320 if (sbi->s_group_desc == NULL) { 2321 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2322 goto failed_mount; 2323 } 2324 2325 #ifdef CONFIG_PROC_FS 2326 if (ext4_proc_root) 2327 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2328 2329 if (sbi->s_proc) 2330 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, 2331 &ext4_ui_proc_fops, 2332 &sbi->s_inode_readahead_blks); 2333 #endif 2334 2335 bgl_lock_init(&sbi->s_blockgroup_lock); 2336 2337 for (i = 0; i < db_count; i++) { 2338 block = descriptor_loc(sb, logical_sb_block, i); 2339 sbi->s_group_desc[i] = sb_bread(sb, block); 2340 if (!sbi->s_group_desc[i]) { 2341 printk(KERN_ERR "EXT4-fs: " 2342 "can't read group descriptor %d\n", i); 2343 db_count = i; 2344 goto failed_mount2; 2345 } 2346 } 2347 if (!ext4_check_descriptors(sb)) { 2348 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2349 goto failed_mount2; 2350 } 2351 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2352 if (!ext4_fill_flex_info(sb)) { 2353 printk(KERN_ERR 2354 "EXT4-fs: unable to initialize " 2355 "flex_bg meta info!\n"); 2356 goto failed_mount2; 2357 } 2358 2359 sbi->s_gdb_count = db_count; 2360 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2361 spin_lock_init(&sbi->s_next_gen_lock); 2362 2363 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2364 ext4_count_free_blocks(sb)); 2365 if (!err) { 2366 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2367 ext4_count_free_inodes(sb)); 2368 } 2369 if (!err) { 2370 err = percpu_counter_init(&sbi->s_dirs_counter, 2371 ext4_count_dirs(sb)); 2372 } 2373 if (!err) { 2374 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2375 } 2376 if (err) { 2377 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2378 goto failed_mount3; 2379 } 2380 2381 sbi->s_stripe = ext4_get_stripe_size(sbi); 2382 2383 /* 2384 * set up enough so that it can read an inode 2385 */ 2386 sb->s_op = &ext4_sops; 2387 sb->s_export_op = &ext4_export_ops; 2388 sb->s_xattr = ext4_xattr_handlers; 2389 #ifdef CONFIG_QUOTA 2390 sb->s_qcop = &ext4_qctl_operations; 2391 sb->dq_op = &ext4_quota_operations; 2392 #endif 2393 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2394 2395 sb->s_root = NULL; 2396 2397 needs_recovery = (es->s_last_orphan != 0 || 2398 EXT4_HAS_INCOMPAT_FEATURE(sb, 2399 EXT4_FEATURE_INCOMPAT_RECOVER)); 2400 2401 /* 2402 * The first inode we look at is the journal inode. Don't try 2403 * root first: it may be modified in the journal! 2404 */ 2405 if (!test_opt(sb, NOLOAD) && 2406 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2407 if (ext4_load_journal(sb, es, journal_devnum)) 2408 goto failed_mount3; 2409 if (!(sb->s_flags & MS_RDONLY) && 2410 EXT4_SB(sb)->s_journal->j_failed_commit) { 2411 printk(KERN_CRIT "EXT4-fs error (device %s): " 2412 "ext4_fill_super: Journal transaction " 2413 "%u is corrupt\n", sb->s_id, 2414 EXT4_SB(sb)->s_journal->j_failed_commit); 2415 if (test_opt(sb, ERRORS_RO)) { 2416 printk(KERN_CRIT 2417 "Mounting filesystem read-only\n"); 2418 sb->s_flags |= MS_RDONLY; 2419 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2420 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2421 } 2422 if (test_opt(sb, ERRORS_PANIC)) { 2423 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2424 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2425 ext4_commit_super(sb, es, 1); 2426 goto failed_mount4; 2427 } 2428 } 2429 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2430 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2431 printk(KERN_ERR "EXT4-fs: required journal recovery " 2432 "suppressed and not mounted read-only\n"); 2433 goto failed_mount4; 2434 } else { 2435 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2436 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2437 sbi->s_journal = NULL; 2438 needs_recovery = 0; 2439 goto no_journal; 2440 } 2441 2442 if (ext4_blocks_count(es) > 0xffffffffULL && 2443 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2444 JBD2_FEATURE_INCOMPAT_64BIT)) { 2445 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); 2446 goto failed_mount4; 2447 } 2448 2449 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2450 jbd2_journal_set_features(sbi->s_journal, 2451 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2452 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2453 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2454 jbd2_journal_set_features(sbi->s_journal, 2455 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2456 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2457 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2458 } else { 2459 jbd2_journal_clear_features(sbi->s_journal, 2460 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2461 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2462 } 2463 2464 /* We have now updated the journal if required, so we can 2465 * validate the data journaling mode. */ 2466 switch (test_opt(sb, DATA_FLAGS)) { 2467 case 0: 2468 /* No mode set, assume a default based on the journal 2469 * capabilities: ORDERED_DATA if the journal can 2470 * cope, else JOURNAL_DATA 2471 */ 2472 if (jbd2_journal_check_available_features 2473 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2474 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2475 else 2476 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2477 break; 2478 2479 case EXT4_MOUNT_ORDERED_DATA: 2480 case EXT4_MOUNT_WRITEBACK_DATA: 2481 if (!jbd2_journal_check_available_features 2482 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2483 printk(KERN_ERR "EXT4-fs: Journal does not support " 2484 "requested data journaling mode\n"); 2485 goto failed_mount4; 2486 } 2487 default: 2488 break; 2489 } 2490 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2491 2492 no_journal: 2493 2494 if (test_opt(sb, NOBH)) { 2495 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2496 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2497 "its supported only with writeback mode\n"); 2498 clear_opt(sbi->s_mount_opt, NOBH); 2499 } 2500 } 2501 /* 2502 * The jbd2_journal_load will have done any necessary log recovery, 2503 * so we can safely mount the rest of the filesystem now. 2504 */ 2505 2506 root = ext4_iget(sb, EXT4_ROOT_INO); 2507 if (IS_ERR(root)) { 2508 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2509 ret = PTR_ERR(root); 2510 goto failed_mount4; 2511 } 2512 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2513 iput(root); 2514 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2515 goto failed_mount4; 2516 } 2517 sb->s_root = d_alloc_root(root); 2518 if (!sb->s_root) { 2519 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2520 iput(root); 2521 ret = -ENOMEM; 2522 goto failed_mount4; 2523 } 2524 2525 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2526 2527 /* determine the minimum size of new large inodes, if present */ 2528 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2529 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2530 EXT4_GOOD_OLD_INODE_SIZE; 2531 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2532 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2533 if (sbi->s_want_extra_isize < 2534 le16_to_cpu(es->s_want_extra_isize)) 2535 sbi->s_want_extra_isize = 2536 le16_to_cpu(es->s_want_extra_isize); 2537 if (sbi->s_want_extra_isize < 2538 le16_to_cpu(es->s_min_extra_isize)) 2539 sbi->s_want_extra_isize = 2540 le16_to_cpu(es->s_min_extra_isize); 2541 } 2542 } 2543 /* Check if enough inode space is available */ 2544 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2545 sbi->s_inode_size) { 2546 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2547 EXT4_GOOD_OLD_INODE_SIZE; 2548 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2549 "available.\n"); 2550 } 2551 2552 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2553 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2554 "requested data journaling mode\n"); 2555 clear_opt(sbi->s_mount_opt, DELALLOC); 2556 } else if (test_opt(sb, DELALLOC)) 2557 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2558 2559 ext4_ext_init(sb); 2560 err = ext4_mb_init(sb, needs_recovery); 2561 if (err) { 2562 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2563 err); 2564 goto failed_mount4; 2565 } 2566 2567 /* 2568 * akpm: core read_super() calls in here with the superblock locked. 2569 * That deadlocks, because orphan cleanup needs to lock the superblock 2570 * in numerous places. Here we just pop the lock - it's relatively 2571 * harmless, because we are now ready to accept write_super() requests, 2572 * and aviro says that's the only reason for hanging onto the 2573 * superblock lock. 2574 */ 2575 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2576 ext4_orphan_cleanup(sb, es); 2577 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2578 if (needs_recovery) { 2579 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2580 ext4_mark_recovery_complete(sb, es); 2581 } 2582 if (EXT4_SB(sb)->s_journal) { 2583 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 2584 descr = " journalled data mode"; 2585 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 2586 descr = " ordered data mode"; 2587 else 2588 descr = " writeback data mode"; 2589 } else 2590 descr = "out journal"; 2591 2592 printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", 2593 sb->s_id, descr); 2594 2595 lock_kernel(); 2596 return 0; 2597 2598 cantfind_ext4: 2599 if (!silent) 2600 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2601 sb->s_id); 2602 goto failed_mount; 2603 2604 failed_mount4: 2605 printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); 2606 if (sbi->s_journal) { 2607 jbd2_journal_destroy(sbi->s_journal); 2608 sbi->s_journal = NULL; 2609 } 2610 failed_mount3: 2611 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2612 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2613 percpu_counter_destroy(&sbi->s_dirs_counter); 2614 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 2615 failed_mount2: 2616 for (i = 0; i < db_count; i++) 2617 brelse(sbi->s_group_desc[i]); 2618 kfree(sbi->s_group_desc); 2619 failed_mount: 2620 if (sbi->s_proc) { 2621 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 2622 remove_proc_entry(sb->s_id, ext4_proc_root); 2623 } 2624 #ifdef CONFIG_QUOTA 2625 for (i = 0; i < MAXQUOTAS; i++) 2626 kfree(sbi->s_qf_names[i]); 2627 #endif 2628 ext4_blkdev_remove(sbi); 2629 brelse(bh); 2630 out_fail: 2631 sb->s_fs_info = NULL; 2632 kfree(sbi); 2633 lock_kernel(); 2634 return ret; 2635 } 2636 2637 /* 2638 * Setup any per-fs journal parameters now. We'll do this both on 2639 * initial mount, once the journal has been initialised but before we've 2640 * done any recovery; and again on any subsequent remount. 2641 */ 2642 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 2643 { 2644 struct ext4_sb_info *sbi = EXT4_SB(sb); 2645 2646 journal->j_commit_interval = sbi->s_commit_interval; 2647 journal->j_min_batch_time = sbi->s_min_batch_time; 2648 journal->j_max_batch_time = sbi->s_max_batch_time; 2649 2650 spin_lock(&journal->j_state_lock); 2651 if (test_opt(sb, BARRIER)) 2652 journal->j_flags |= JBD2_BARRIER; 2653 else 2654 journal->j_flags &= ~JBD2_BARRIER; 2655 if (test_opt(sb, DATA_ERR_ABORT)) 2656 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 2657 else 2658 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 2659 spin_unlock(&journal->j_state_lock); 2660 } 2661 2662 static journal_t *ext4_get_journal(struct super_block *sb, 2663 unsigned int journal_inum) 2664 { 2665 struct inode *journal_inode; 2666 journal_t *journal; 2667 2668 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 2669 2670 /* First, test for the existence of a valid inode on disk. Bad 2671 * things happen if we iget() an unused inode, as the subsequent 2672 * iput() will try to delete it. */ 2673 2674 journal_inode = ext4_iget(sb, journal_inum); 2675 if (IS_ERR(journal_inode)) { 2676 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2677 return NULL; 2678 } 2679 if (!journal_inode->i_nlink) { 2680 make_bad_inode(journal_inode); 2681 iput(journal_inode); 2682 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2683 return NULL; 2684 } 2685 2686 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2687 journal_inode, journal_inode->i_size); 2688 if (!S_ISREG(journal_inode->i_mode)) { 2689 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2690 iput(journal_inode); 2691 return NULL; 2692 } 2693 2694 journal = jbd2_journal_init_inode(journal_inode); 2695 if (!journal) { 2696 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 2697 iput(journal_inode); 2698 return NULL; 2699 } 2700 journal->j_private = sb; 2701 ext4_init_journal_params(sb, journal); 2702 return journal; 2703 } 2704 2705 static journal_t *ext4_get_dev_journal(struct super_block *sb, 2706 dev_t j_dev) 2707 { 2708 struct buffer_head *bh; 2709 journal_t *journal; 2710 ext4_fsblk_t start; 2711 ext4_fsblk_t len; 2712 int hblock, blocksize; 2713 ext4_fsblk_t sb_block; 2714 unsigned long offset; 2715 struct ext4_super_block *es; 2716 struct block_device *bdev; 2717 2718 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 2719 2720 bdev = ext4_blkdev_get(j_dev); 2721 if (bdev == NULL) 2722 return NULL; 2723 2724 if (bd_claim(bdev, sb)) { 2725 printk(KERN_ERR 2726 "EXT4-fs: failed to claim external journal device.\n"); 2727 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2728 return NULL; 2729 } 2730 2731 blocksize = sb->s_blocksize; 2732 hblock = bdev_hardsect_size(bdev); 2733 if (blocksize < hblock) { 2734 printk(KERN_ERR 2735 "EXT4-fs: blocksize too small for journal device.\n"); 2736 goto out_bdev; 2737 } 2738 2739 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 2740 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 2741 set_blocksize(bdev, blocksize); 2742 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2743 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 2744 "external journal\n"); 2745 goto out_bdev; 2746 } 2747 2748 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2749 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 2750 !(le32_to_cpu(es->s_feature_incompat) & 2751 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2752 printk(KERN_ERR "EXT4-fs: external journal has " 2753 "bad superblock\n"); 2754 brelse(bh); 2755 goto out_bdev; 2756 } 2757 2758 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2759 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 2760 brelse(bh); 2761 goto out_bdev; 2762 } 2763 2764 len = ext4_blocks_count(es); 2765 start = sb_block + 1; 2766 brelse(bh); /* we're done with the superblock */ 2767 2768 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 2769 start, len, blocksize); 2770 if (!journal) { 2771 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 2772 goto out_bdev; 2773 } 2774 journal->j_private = sb; 2775 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2776 wait_on_buffer(journal->j_sb_buffer); 2777 if (!buffer_uptodate(journal->j_sb_buffer)) { 2778 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 2779 goto out_journal; 2780 } 2781 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2782 printk(KERN_ERR "EXT4-fs: External journal has more than one " 2783 "user (unsupported) - %d\n", 2784 be32_to_cpu(journal->j_superblock->s_nr_users)); 2785 goto out_journal; 2786 } 2787 EXT4_SB(sb)->journal_bdev = bdev; 2788 ext4_init_journal_params(sb, journal); 2789 return journal; 2790 out_journal: 2791 jbd2_journal_destroy(journal); 2792 out_bdev: 2793 ext4_blkdev_put(bdev); 2794 return NULL; 2795 } 2796 2797 static int ext4_load_journal(struct super_block *sb, 2798 struct ext4_super_block *es, 2799 unsigned long journal_devnum) 2800 { 2801 journal_t *journal; 2802 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2803 dev_t journal_dev; 2804 int err = 0; 2805 int really_read_only; 2806 2807 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 2808 2809 if (journal_devnum && 2810 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2811 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2812 "numbers have changed\n"); 2813 journal_dev = new_decode_dev(journal_devnum); 2814 } else 2815 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2816 2817 really_read_only = bdev_read_only(sb->s_bdev); 2818 2819 /* 2820 * Are we loading a blank journal or performing recovery after a 2821 * crash? For recovery, we need to check in advance whether we 2822 * can get read-write access to the device. 2823 */ 2824 2825 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2826 if (sb->s_flags & MS_RDONLY) { 2827 printk(KERN_INFO "EXT4-fs: INFO: recovery " 2828 "required on readonly filesystem.\n"); 2829 if (really_read_only) { 2830 printk(KERN_ERR "EXT4-fs: write access " 2831 "unavailable, cannot proceed.\n"); 2832 return -EROFS; 2833 } 2834 printk(KERN_INFO "EXT4-fs: write access will " 2835 "be enabled during recovery.\n"); 2836 } 2837 } 2838 2839 if (journal_inum && journal_dev) { 2840 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 2841 "and inode journals!\n"); 2842 return -EINVAL; 2843 } 2844 2845 if (journal_inum) { 2846 if (!(journal = ext4_get_journal(sb, journal_inum))) 2847 return -EINVAL; 2848 } else { 2849 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 2850 return -EINVAL; 2851 } 2852 2853 if (journal->j_flags & JBD2_BARRIER) 2854 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 2855 else 2856 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 2857 2858 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2859 err = jbd2_journal_update_format(journal); 2860 if (err) { 2861 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 2862 jbd2_journal_destroy(journal); 2863 return err; 2864 } 2865 } 2866 2867 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 2868 err = jbd2_journal_wipe(journal, !really_read_only); 2869 if (!err) 2870 err = jbd2_journal_load(journal); 2871 2872 if (err) { 2873 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 2874 jbd2_journal_destroy(journal); 2875 return err; 2876 } 2877 2878 EXT4_SB(sb)->s_journal = journal; 2879 ext4_clear_journal_err(sb, es); 2880 2881 if (journal_devnum && 2882 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2883 es->s_journal_dev = cpu_to_le32(journal_devnum); 2884 sb->s_dirt = 1; 2885 2886 /* Make sure we flush the recovery flag to disk. */ 2887 ext4_commit_super(sb, es, 1); 2888 } 2889 2890 return 0; 2891 } 2892 2893 static int ext4_commit_super(struct super_block *sb, 2894 struct ext4_super_block *es, int sync) 2895 { 2896 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2897 int error = 0; 2898 2899 if (!sbh) 2900 return error; 2901 if (buffer_write_io_error(sbh)) { 2902 /* 2903 * Oh, dear. A previous attempt to write the 2904 * superblock failed. This could happen because the 2905 * USB device was yanked out. Or it could happen to 2906 * be a transient write error and maybe the block will 2907 * be remapped. Nothing we can do but to retry the 2908 * write and hope for the best. 2909 */ 2910 printk(KERN_ERR "EXT4-fs: previous I/O error to " 2911 "superblock detected for %s.\n", sb->s_id); 2912 clear_buffer_write_io_error(sbh); 2913 set_buffer_uptodate(sbh); 2914 } 2915 es->s_wtime = cpu_to_le32(get_seconds()); 2916 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 2917 &EXT4_SB(sb)->s_freeblocks_counter)); 2918 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 2919 &EXT4_SB(sb)->s_freeinodes_counter)); 2920 2921 BUFFER_TRACE(sbh, "marking dirty"); 2922 mark_buffer_dirty(sbh); 2923 if (sync) { 2924 error = sync_dirty_buffer(sbh); 2925 if (error) 2926 return error; 2927 2928 error = buffer_write_io_error(sbh); 2929 if (error) { 2930 printk(KERN_ERR "EXT4-fs: I/O error while writing " 2931 "superblock for %s.\n", sb->s_id); 2932 clear_buffer_write_io_error(sbh); 2933 set_buffer_uptodate(sbh); 2934 } 2935 } 2936 return error; 2937 } 2938 2939 2940 /* 2941 * Have we just finished recovery? If so, and if we are mounting (or 2942 * remounting) the filesystem readonly, then we will end up with a 2943 * consistent fs on disk. Record that fact. 2944 */ 2945 static void ext4_mark_recovery_complete(struct super_block *sb, 2946 struct ext4_super_block *es) 2947 { 2948 journal_t *journal = EXT4_SB(sb)->s_journal; 2949 2950 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2951 BUG_ON(journal != NULL); 2952 return; 2953 } 2954 jbd2_journal_lock_updates(journal); 2955 if (jbd2_journal_flush(journal) < 0) 2956 goto out; 2957 2958 lock_super(sb); 2959 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2960 sb->s_flags & MS_RDONLY) { 2961 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2962 sb->s_dirt = 0; 2963 ext4_commit_super(sb, es, 1); 2964 } 2965 unlock_super(sb); 2966 2967 out: 2968 jbd2_journal_unlock_updates(journal); 2969 } 2970 2971 /* 2972 * If we are mounting (or read-write remounting) a filesystem whose journal 2973 * has recorded an error from a previous lifetime, move that error to the 2974 * main filesystem now. 2975 */ 2976 static void ext4_clear_journal_err(struct super_block *sb, 2977 struct ext4_super_block *es) 2978 { 2979 journal_t *journal; 2980 int j_errno; 2981 const char *errstr; 2982 2983 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 2984 2985 journal = EXT4_SB(sb)->s_journal; 2986 2987 /* 2988 * Now check for any error status which may have been recorded in the 2989 * journal by a prior ext4_error() or ext4_abort() 2990 */ 2991 2992 j_errno = jbd2_journal_errno(journal); 2993 if (j_errno) { 2994 char nbuf[16]; 2995 2996 errstr = ext4_decode_error(sb, j_errno, nbuf); 2997 ext4_warning(sb, __func__, "Filesystem error recorded " 2998 "from previous mount: %s", errstr); 2999 ext4_warning(sb, __func__, "Marking fs in need of " 3000 "filesystem check."); 3001 3002 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3003 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3004 ext4_commit_super(sb, es, 1); 3005 3006 jbd2_journal_clear_err(journal); 3007 } 3008 } 3009 3010 /* 3011 * Force the running and committing transactions to commit, 3012 * and wait on the commit. 3013 */ 3014 int ext4_force_commit(struct super_block *sb) 3015 { 3016 journal_t *journal; 3017 int ret = 0; 3018 3019 if (sb->s_flags & MS_RDONLY) 3020 return 0; 3021 3022 journal = EXT4_SB(sb)->s_journal; 3023 if (journal) { 3024 sb->s_dirt = 0; 3025 ret = ext4_journal_force_commit(journal); 3026 } 3027 3028 return ret; 3029 } 3030 3031 /* 3032 * Ext4 always journals updates to the superblock itself, so we don't 3033 * have to propagate any other updates to the superblock on disk at this 3034 * point. (We can probably nuke this function altogether, and remove 3035 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) 3036 */ 3037 static void ext4_write_super(struct super_block *sb) 3038 { 3039 if (EXT4_SB(sb)->s_journal) { 3040 if (mutex_trylock(&sb->s_lock) != 0) 3041 BUG(); 3042 sb->s_dirt = 0; 3043 } else { 3044 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3045 } 3046 } 3047 3048 static int ext4_sync_fs(struct super_block *sb, int wait) 3049 { 3050 int ret = 0; 3051 tid_t target; 3052 3053 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3054 sb->s_dirt = 0; 3055 if (EXT4_SB(sb)->s_journal) { 3056 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, 3057 &target)) { 3058 if (wait) 3059 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, 3060 target); 3061 } 3062 } else { 3063 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); 3064 } 3065 return ret; 3066 } 3067 3068 /* 3069 * LVM calls this function before a (read-only) snapshot is created. This 3070 * gives us a chance to flush the journal completely and mark the fs clean. 3071 */ 3072 static int ext4_freeze(struct super_block *sb) 3073 { 3074 int error = 0; 3075 journal_t *journal; 3076 sb->s_dirt = 0; 3077 3078 if (!(sb->s_flags & MS_RDONLY)) { 3079 journal = EXT4_SB(sb)->s_journal; 3080 3081 if (journal) { 3082 /* Now we set up the journal barrier. */ 3083 jbd2_journal_lock_updates(journal); 3084 3085 /* 3086 * We don't want to clear needs_recovery flag when we 3087 * failed to flush the journal. 3088 */ 3089 error = jbd2_journal_flush(journal); 3090 if (error < 0) 3091 goto out; 3092 } 3093 3094 /* Journal blocked and flushed, clear needs_recovery flag. */ 3095 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3096 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3097 if (error) 3098 goto out; 3099 } 3100 return 0; 3101 out: 3102 jbd2_journal_unlock_updates(journal); 3103 return error; 3104 } 3105 3106 /* 3107 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3108 * flag here, even though the filesystem is not technically dirty yet. 3109 */ 3110 static int ext4_unfreeze(struct super_block *sb) 3111 { 3112 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { 3113 lock_super(sb); 3114 /* Reser the needs_recovery flag before the fs is unlocked. */ 3115 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3116 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3117 unlock_super(sb); 3118 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3119 } 3120 return 0; 3121 } 3122 3123 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3124 { 3125 struct ext4_super_block *es; 3126 struct ext4_sb_info *sbi = EXT4_SB(sb); 3127 ext4_fsblk_t n_blocks_count = 0; 3128 unsigned long old_sb_flags; 3129 struct ext4_mount_options old_opts; 3130 ext4_group_t g; 3131 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3132 int err; 3133 #ifdef CONFIG_QUOTA 3134 int i; 3135 #endif 3136 3137 /* Store the original options */ 3138 old_sb_flags = sb->s_flags; 3139 old_opts.s_mount_opt = sbi->s_mount_opt; 3140 old_opts.s_resuid = sbi->s_resuid; 3141 old_opts.s_resgid = sbi->s_resgid; 3142 old_opts.s_commit_interval = sbi->s_commit_interval; 3143 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3144 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3145 #ifdef CONFIG_QUOTA 3146 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3147 for (i = 0; i < MAXQUOTAS; i++) 3148 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3149 #endif 3150 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3151 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3152 3153 /* 3154 * Allow the "check" option to be passed as a remount option. 3155 */ 3156 if (!parse_options(data, sb, NULL, &journal_ioprio, 3157 &n_blocks_count, 1)) { 3158 err = -EINVAL; 3159 goto restore_opts; 3160 } 3161 3162 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 3163 ext4_abort(sb, __func__, "Abort forced by user"); 3164 3165 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3166 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 3167 3168 es = sbi->s_es; 3169 3170 if (sbi->s_journal) { 3171 ext4_init_journal_params(sb, sbi->s_journal); 3172 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3173 } 3174 3175 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3176 n_blocks_count > ext4_blocks_count(es)) { 3177 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3178 err = -EROFS; 3179 goto restore_opts; 3180 } 3181 3182 if (*flags & MS_RDONLY) { 3183 /* 3184 * First of all, the unconditional stuff we have to do 3185 * to disable replay of the journal when we next remount 3186 */ 3187 sb->s_flags |= MS_RDONLY; 3188 3189 /* 3190 * OK, test if we are remounting a valid rw partition 3191 * readonly, and if so set the rdonly flag and then 3192 * mark the partition as valid again. 3193 */ 3194 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3195 (sbi->s_mount_state & EXT4_VALID_FS)) 3196 es->s_state = cpu_to_le16(sbi->s_mount_state); 3197 3198 /* 3199 * We have to unlock super so that we can wait for 3200 * transactions. 3201 */ 3202 if (sbi->s_journal) { 3203 unlock_super(sb); 3204 ext4_mark_recovery_complete(sb, es); 3205 lock_super(sb); 3206 } 3207 } else { 3208 int ret; 3209 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3210 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3211 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3212 "remount RDWR because of unsupported " 3213 "optional features (%x).\n", sb->s_id, 3214 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & 3215 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 3216 err = -EROFS; 3217 goto restore_opts; 3218 } 3219 3220 /* 3221 * Make sure the group descriptor checksums 3222 * are sane. If they aren't, refuse to 3223 * remount r/w. 3224 */ 3225 for (g = 0; g < sbi->s_groups_count; g++) { 3226 struct ext4_group_desc *gdp = 3227 ext4_get_group_desc(sb, g, NULL); 3228 3229 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3230 printk(KERN_ERR 3231 "EXT4-fs: ext4_remount: " 3232 "Checksum for group %u failed (%u!=%u)\n", 3233 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3234 le16_to_cpu(gdp->bg_checksum)); 3235 err = -EINVAL; 3236 goto restore_opts; 3237 } 3238 } 3239 3240 /* 3241 * If we have an unprocessed orphan list hanging 3242 * around from a previously readonly bdev mount, 3243 * require a full umount/remount for now. 3244 */ 3245 if (es->s_last_orphan) { 3246 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3247 "remount RDWR because of unprocessed " 3248 "orphan inode list. Please " 3249 "umount/remount instead.\n", 3250 sb->s_id); 3251 err = -EINVAL; 3252 goto restore_opts; 3253 } 3254 3255 /* 3256 * Mounting a RDONLY partition read-write, so reread 3257 * and store the current valid flag. (It may have 3258 * been changed by e2fsck since we originally mounted 3259 * the partition.) 3260 */ 3261 if (sbi->s_journal) 3262 ext4_clear_journal_err(sb, es); 3263 sbi->s_mount_state = le16_to_cpu(es->s_state); 3264 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3265 goto restore_opts; 3266 if (!ext4_setup_super(sb, es, 0)) 3267 sb->s_flags &= ~MS_RDONLY; 3268 } 3269 } 3270 if (sbi->s_journal == NULL) 3271 ext4_commit_super(sb, es, 1); 3272 3273 #ifdef CONFIG_QUOTA 3274 /* Release old quota file names */ 3275 for (i = 0; i < MAXQUOTAS; i++) 3276 if (old_opts.s_qf_names[i] && 3277 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3278 kfree(old_opts.s_qf_names[i]); 3279 #endif 3280 return 0; 3281 restore_opts: 3282 sb->s_flags = old_sb_flags; 3283 sbi->s_mount_opt = old_opts.s_mount_opt; 3284 sbi->s_resuid = old_opts.s_resuid; 3285 sbi->s_resgid = old_opts.s_resgid; 3286 sbi->s_commit_interval = old_opts.s_commit_interval; 3287 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3288 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3289 #ifdef CONFIG_QUOTA 3290 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3291 for (i = 0; i < MAXQUOTAS; i++) { 3292 if (sbi->s_qf_names[i] && 3293 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3294 kfree(sbi->s_qf_names[i]); 3295 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3296 } 3297 #endif 3298 return err; 3299 } 3300 3301 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3302 { 3303 struct super_block *sb = dentry->d_sb; 3304 struct ext4_sb_info *sbi = EXT4_SB(sb); 3305 struct ext4_super_block *es = sbi->s_es; 3306 u64 fsid; 3307 3308 if (test_opt(sb, MINIX_DF)) { 3309 sbi->s_overhead_last = 0; 3310 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3311 ext4_group_t ngroups = sbi->s_groups_count, i; 3312 ext4_fsblk_t overhead = 0; 3313 smp_rmb(); 3314 3315 /* 3316 * Compute the overhead (FS structures). This is constant 3317 * for a given filesystem unless the number of block groups 3318 * changes so we cache the previous value until it does. 3319 */ 3320 3321 /* 3322 * All of the blocks before first_data_block are 3323 * overhead 3324 */ 3325 overhead = le32_to_cpu(es->s_first_data_block); 3326 3327 /* 3328 * Add the overhead attributed to the superblock and 3329 * block group descriptors. If the sparse superblocks 3330 * feature is turned on, then not all groups have this. 3331 */ 3332 for (i = 0; i < ngroups; i++) { 3333 overhead += ext4_bg_has_super(sb, i) + 3334 ext4_bg_num_gdb(sb, i); 3335 cond_resched(); 3336 } 3337 3338 /* 3339 * Every block group has an inode bitmap, a block 3340 * bitmap, and an inode table. 3341 */ 3342 overhead += ngroups * (2 + sbi->s_itb_per_group); 3343 sbi->s_overhead_last = overhead; 3344 smp_wmb(); 3345 sbi->s_blocks_last = ext4_blocks_count(es); 3346 } 3347 3348 buf->f_type = EXT4_SUPER_MAGIC; 3349 buf->f_bsize = sb->s_blocksize; 3350 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3351 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3352 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3353 ext4_free_blocks_count_set(es, buf->f_bfree); 3354 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3355 if (buf->f_bfree < ext4_r_blocks_count(es)) 3356 buf->f_bavail = 0; 3357 buf->f_files = le32_to_cpu(es->s_inodes_count); 3358 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3359 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); 3360 buf->f_namelen = EXT4_NAME_LEN; 3361 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3362 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3363 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3364 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3365 return 0; 3366 } 3367 3368 /* Helper function for writing quotas on sync - we need to start transaction before quota file 3369 * is locked for write. Otherwise the are possible deadlocks: 3370 * Process 1 Process 2 3371 * ext4_create() quota_sync() 3372 * jbd2_journal_start() write_dquot() 3373 * vfs_dq_init() down(dqio_mutex) 3374 * down(dqio_mutex) jbd2_journal_start() 3375 * 3376 */ 3377 3378 #ifdef CONFIG_QUOTA 3379 3380 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3381 { 3382 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3383 } 3384 3385 static int ext4_write_dquot(struct dquot *dquot) 3386 { 3387 int ret, err; 3388 handle_t *handle; 3389 struct inode *inode; 3390 3391 inode = dquot_to_inode(dquot); 3392 handle = ext4_journal_start(inode, 3393 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3394 if (IS_ERR(handle)) 3395 return PTR_ERR(handle); 3396 ret = dquot_commit(dquot); 3397 err = ext4_journal_stop(handle); 3398 if (!ret) 3399 ret = err; 3400 return ret; 3401 } 3402 3403 static int ext4_acquire_dquot(struct dquot *dquot) 3404 { 3405 int ret, err; 3406 handle_t *handle; 3407 3408 handle = ext4_journal_start(dquot_to_inode(dquot), 3409 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3410 if (IS_ERR(handle)) 3411 return PTR_ERR(handle); 3412 ret = dquot_acquire(dquot); 3413 err = ext4_journal_stop(handle); 3414 if (!ret) 3415 ret = err; 3416 return ret; 3417 } 3418 3419 static int ext4_release_dquot(struct dquot *dquot) 3420 { 3421 int ret, err; 3422 handle_t *handle; 3423 3424 handle = ext4_journal_start(dquot_to_inode(dquot), 3425 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3426 if (IS_ERR(handle)) { 3427 /* Release dquot anyway to avoid endless cycle in dqput() */ 3428 dquot_release(dquot); 3429 return PTR_ERR(handle); 3430 } 3431 ret = dquot_release(dquot); 3432 err = ext4_journal_stop(handle); 3433 if (!ret) 3434 ret = err; 3435 return ret; 3436 } 3437 3438 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3439 { 3440 /* Are we journaling quotas? */ 3441 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3442 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3443 dquot_mark_dquot_dirty(dquot); 3444 return ext4_write_dquot(dquot); 3445 } else { 3446 return dquot_mark_dquot_dirty(dquot); 3447 } 3448 } 3449 3450 static int ext4_write_info(struct super_block *sb, int type) 3451 { 3452 int ret, err; 3453 handle_t *handle; 3454 3455 /* Data block + inode block */ 3456 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3457 if (IS_ERR(handle)) 3458 return PTR_ERR(handle); 3459 ret = dquot_commit_info(sb, type); 3460 err = ext4_journal_stop(handle); 3461 if (!ret) 3462 ret = err; 3463 return ret; 3464 } 3465 3466 /* 3467 * Turn on quotas during mount time - we need to find 3468 * the quota file and such... 3469 */ 3470 static int ext4_quota_on_mount(struct super_block *sb, int type) 3471 { 3472 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3473 EXT4_SB(sb)->s_jquota_fmt, type); 3474 } 3475 3476 /* 3477 * Standard function to be called on quota_on 3478 */ 3479 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3480 char *name, int remount) 3481 { 3482 int err; 3483 struct path path; 3484 3485 if (!test_opt(sb, QUOTA)) 3486 return -EINVAL; 3487 /* When remounting, no checks are needed and in fact, name is NULL */ 3488 if (remount) 3489 return vfs_quota_on(sb, type, format_id, name, remount); 3490 3491 err = kern_path(name, LOOKUP_FOLLOW, &path); 3492 if (err) 3493 return err; 3494 3495 /* Quotafile not on the same filesystem? */ 3496 if (path.mnt->mnt_sb != sb) { 3497 path_put(&path); 3498 return -EXDEV; 3499 } 3500 /* Journaling quota? */ 3501 if (EXT4_SB(sb)->s_qf_names[type]) { 3502 /* Quotafile not in fs root? */ 3503 if (path.dentry->d_parent != sb->s_root) 3504 printk(KERN_WARNING 3505 "EXT4-fs: Quota file not on filesystem root. " 3506 "Journaled quota will not work.\n"); 3507 } 3508 3509 /* 3510 * When we journal data on quota file, we have to flush journal to see 3511 * all updates to the file when we bypass pagecache... 3512 */ 3513 if (EXT4_SB(sb)->s_journal && 3514 ext4_should_journal_data(path.dentry->d_inode)) { 3515 /* 3516 * We don't need to lock updates but journal_flush() could 3517 * otherwise be livelocked... 3518 */ 3519 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3520 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3521 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3522 if (err) { 3523 path_put(&path); 3524 return err; 3525 } 3526 } 3527 3528 err = vfs_quota_on_path(sb, type, format_id, &path); 3529 path_put(&path); 3530 return err; 3531 } 3532 3533 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3534 * acquiring the locks... As quota files are never truncated and quota code 3535 * itself serializes the operations (and noone else should touch the files) 3536 * we don't have to be afraid of races */ 3537 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3538 size_t len, loff_t off) 3539 { 3540 struct inode *inode = sb_dqopt(sb)->files[type]; 3541 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3542 int err = 0; 3543 int offset = off & (sb->s_blocksize - 1); 3544 int tocopy; 3545 size_t toread; 3546 struct buffer_head *bh; 3547 loff_t i_size = i_size_read(inode); 3548 3549 if (off > i_size) 3550 return 0; 3551 if (off+len > i_size) 3552 len = i_size-off; 3553 toread = len; 3554 while (toread > 0) { 3555 tocopy = sb->s_blocksize - offset < toread ? 3556 sb->s_blocksize - offset : toread; 3557 bh = ext4_bread(NULL, inode, blk, 0, &err); 3558 if (err) 3559 return err; 3560 if (!bh) /* A hole? */ 3561 memset(data, 0, tocopy); 3562 else 3563 memcpy(data, bh->b_data+offset, tocopy); 3564 brelse(bh); 3565 offset = 0; 3566 toread -= tocopy; 3567 data += tocopy; 3568 blk++; 3569 } 3570 return len; 3571 } 3572 3573 /* Write to quotafile (we know the transaction is already started and has 3574 * enough credits) */ 3575 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3576 const char *data, size_t len, loff_t off) 3577 { 3578 struct inode *inode = sb_dqopt(sb)->files[type]; 3579 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3580 int err = 0; 3581 int offset = off & (sb->s_blocksize - 1); 3582 int tocopy; 3583 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 3584 size_t towrite = len; 3585 struct buffer_head *bh; 3586 handle_t *handle = journal_current_handle(); 3587 3588 if (EXT4_SB(sb)->s_journal && !handle) { 3589 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3590 " cancelled because transaction is not started.\n", 3591 (unsigned long long)off, (unsigned long long)len); 3592 return -EIO; 3593 } 3594 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3595 while (towrite > 0) { 3596 tocopy = sb->s_blocksize - offset < towrite ? 3597 sb->s_blocksize - offset : towrite; 3598 bh = ext4_bread(handle, inode, blk, 1, &err); 3599 if (!bh) 3600 goto out; 3601 if (journal_quota) { 3602 err = ext4_journal_get_write_access(handle, bh); 3603 if (err) { 3604 brelse(bh); 3605 goto out; 3606 } 3607 } 3608 lock_buffer(bh); 3609 memcpy(bh->b_data+offset, data, tocopy); 3610 flush_dcache_page(bh->b_page); 3611 unlock_buffer(bh); 3612 if (journal_quota) 3613 err = ext4_handle_dirty_metadata(handle, NULL, bh); 3614 else { 3615 /* Always do at least ordered writes for quotas */ 3616 err = ext4_jbd2_file_inode(handle, inode); 3617 mark_buffer_dirty(bh); 3618 } 3619 brelse(bh); 3620 if (err) 3621 goto out; 3622 offset = 0; 3623 towrite -= tocopy; 3624 data += tocopy; 3625 blk++; 3626 } 3627 out: 3628 if (len == towrite) { 3629 mutex_unlock(&inode->i_mutex); 3630 return err; 3631 } 3632 if (inode->i_size < off+len-towrite) { 3633 i_size_write(inode, off+len-towrite); 3634 EXT4_I(inode)->i_disksize = inode->i_size; 3635 } 3636 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3637 ext4_mark_inode_dirty(handle, inode); 3638 mutex_unlock(&inode->i_mutex); 3639 return len - towrite; 3640 } 3641 3642 #endif 3643 3644 static int ext4_get_sb(struct file_system_type *fs_type, 3645 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3646 { 3647 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3648 } 3649 3650 #ifdef CONFIG_PROC_FS 3651 static int ext4_ui_proc_show(struct seq_file *m, void *v) 3652 { 3653 unsigned int *p = m->private; 3654 3655 seq_printf(m, "%u\n", *p); 3656 return 0; 3657 } 3658 3659 static int ext4_ui_proc_open(struct inode *inode, struct file *file) 3660 { 3661 return single_open(file, ext4_ui_proc_show, PDE(inode)->data); 3662 } 3663 3664 static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, 3665 size_t cnt, loff_t *ppos) 3666 { 3667 unsigned long *p = PDE(file->f_path.dentry->d_inode)->data; 3668 char str[32]; 3669 3670 if (cnt >= sizeof(str)) 3671 return -EINVAL; 3672 if (copy_from_user(str, buf, cnt)) 3673 return -EFAULT; 3674 3675 *p = simple_strtoul(str, NULL, 0); 3676 return cnt; 3677 } 3678 3679 const struct file_operations ext4_ui_proc_fops = { 3680 .owner = THIS_MODULE, 3681 .open = ext4_ui_proc_open, 3682 .read = seq_read, 3683 .llseek = seq_lseek, 3684 .release = single_release, 3685 .write = ext4_ui_proc_write, 3686 }; 3687 #endif 3688 3689 static struct file_system_type ext4_fs_type = { 3690 .owner = THIS_MODULE, 3691 .name = "ext4", 3692 .get_sb = ext4_get_sb, 3693 .kill_sb = kill_block_super, 3694 .fs_flags = FS_REQUIRES_DEV, 3695 }; 3696 3697 #ifdef CONFIG_EXT4DEV_COMPAT 3698 static int ext4dev_get_sb(struct file_system_type *fs_type, 3699 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3700 { 3701 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3702 "to mount using ext4\n"); 3703 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3704 "will go away by 2.6.31\n"); 3705 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3706 } 3707 3708 static struct file_system_type ext4dev_fs_type = { 3709 .owner = THIS_MODULE, 3710 .name = "ext4dev", 3711 .get_sb = ext4dev_get_sb, 3712 .kill_sb = kill_block_super, 3713 .fs_flags = FS_REQUIRES_DEV, 3714 }; 3715 MODULE_ALIAS("ext4dev"); 3716 #endif 3717 3718 static int __init init_ext4_fs(void) 3719 { 3720 int err; 3721 3722 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3723 err = init_ext4_mballoc(); 3724 if (err) 3725 return err; 3726 3727 err = init_ext4_xattr(); 3728 if (err) 3729 goto out2; 3730 err = init_inodecache(); 3731 if (err) 3732 goto out1; 3733 err = register_filesystem(&ext4_fs_type); 3734 if (err) 3735 goto out; 3736 #ifdef CONFIG_EXT4DEV_COMPAT 3737 err = register_filesystem(&ext4dev_fs_type); 3738 if (err) { 3739 unregister_filesystem(&ext4_fs_type); 3740 goto out; 3741 } 3742 #endif 3743 return 0; 3744 out: 3745 destroy_inodecache(); 3746 out1: 3747 exit_ext4_xattr(); 3748 out2: 3749 exit_ext4_mballoc(); 3750 return err; 3751 } 3752 3753 static void __exit exit_ext4_fs(void) 3754 { 3755 unregister_filesystem(&ext4_fs_type); 3756 #ifdef CONFIG_EXT4DEV_COMPAT 3757 unregister_filesystem(&ext4dev_fs_type); 3758 #endif 3759 destroy_inodecache(); 3760 exit_ext4_xattr(); 3761 exit_ext4_mballoc(); 3762 remove_proc_entry("fs/ext4", NULL); 3763 } 3764 3765 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3766 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 3767 MODULE_LICENSE("GPL"); 3768 module_init(init_ext4_fs) 3769 module_exit(exit_ext4_fs) 3770