1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/smp_lock.h> 30 #include <linux/buffer_head.h> 31 #include <linux/exportfs.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 #include <linux/proc_fs.h> 39 #include <linux/ctype.h> 40 #include <linux/log2.h> 41 #include <linux/crc16.h> 42 #include <asm/uaccess.h> 43 44 #include "ext4.h" 45 #include "ext4_jbd2.h" 46 #include "xattr.h" 47 #include "acl.h" 48 #include "mballoc.h" 49 50 #define CREATE_TRACE_POINTS 51 #include <trace/events/ext4.h> 52 53 struct proc_dir_entry *ext4_proc_root; 54 static struct kset *ext4_kset; 55 56 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 57 unsigned long journal_devnum); 58 static int ext4_commit_super(struct super_block *sb, int sync); 59 static void ext4_mark_recovery_complete(struct super_block *sb, 60 struct ext4_super_block *es); 61 static void ext4_clear_journal_err(struct super_block *sb, 62 struct ext4_super_block *es); 63 static int ext4_sync_fs(struct super_block *sb, int wait); 64 static const char *ext4_decode_error(struct super_block *sb, int errno, 65 char nbuf[16]); 66 static int ext4_remount(struct super_block *sb, int *flags, char *data); 67 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 68 static int ext4_unfreeze(struct super_block *sb); 69 static void ext4_write_super(struct super_block *sb); 70 static int ext4_freeze(struct super_block *sb); 71 72 73 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 74 struct ext4_group_desc *bg) 75 { 76 return le32_to_cpu(bg->bg_block_bitmap_lo) | 77 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 78 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 79 } 80 81 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 82 struct ext4_group_desc *bg) 83 { 84 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 85 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 86 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 87 } 88 89 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 90 struct ext4_group_desc *bg) 91 { 92 return le32_to_cpu(bg->bg_inode_table_lo) | 93 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 94 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 95 } 96 97 __u32 ext4_free_blks_count(struct super_block *sb, 98 struct ext4_group_desc *bg) 99 { 100 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 101 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 102 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 103 } 104 105 __u32 ext4_free_inodes_count(struct super_block *sb, 106 struct ext4_group_desc *bg) 107 { 108 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 109 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 110 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 111 } 112 113 __u32 ext4_used_dirs_count(struct super_block *sb, 114 struct ext4_group_desc *bg) 115 { 116 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 117 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 118 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 119 } 120 121 __u32 ext4_itable_unused_count(struct super_block *sb, 122 struct ext4_group_desc *bg) 123 { 124 return le16_to_cpu(bg->bg_itable_unused_lo) | 125 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 126 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 127 } 128 129 void ext4_block_bitmap_set(struct super_block *sb, 130 struct ext4_group_desc *bg, ext4_fsblk_t blk) 131 { 132 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 133 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 134 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 135 } 136 137 void ext4_inode_bitmap_set(struct super_block *sb, 138 struct ext4_group_desc *bg, ext4_fsblk_t blk) 139 { 140 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 141 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 142 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 143 } 144 145 void ext4_inode_table_set(struct super_block *sb, 146 struct ext4_group_desc *bg, ext4_fsblk_t blk) 147 { 148 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 149 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 150 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 151 } 152 153 void ext4_free_blks_set(struct super_block *sb, 154 struct ext4_group_desc *bg, __u32 count) 155 { 156 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 157 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 158 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 159 } 160 161 void ext4_free_inodes_set(struct super_block *sb, 162 struct ext4_group_desc *bg, __u32 count) 163 { 164 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 165 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 166 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 167 } 168 169 void ext4_used_dirs_set(struct super_block *sb, 170 struct ext4_group_desc *bg, __u32 count) 171 { 172 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 173 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 174 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 175 } 176 177 void ext4_itable_unused_set(struct super_block *sb, 178 struct ext4_group_desc *bg, __u32 count) 179 { 180 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 181 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 182 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 183 } 184 185 186 /* Just increment the non-pointer handle value */ 187 static handle_t *ext4_get_nojournal(void) 188 { 189 handle_t *handle = current->journal_info; 190 unsigned long ref_cnt = (unsigned long)handle; 191 192 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 193 194 ref_cnt++; 195 handle = (handle_t *)ref_cnt; 196 197 current->journal_info = handle; 198 return handle; 199 } 200 201 202 /* Decrement the non-pointer handle value */ 203 static void ext4_put_nojournal(handle_t *handle) 204 { 205 unsigned long ref_cnt = (unsigned long)handle; 206 207 BUG_ON(ref_cnt == 0); 208 209 ref_cnt--; 210 handle = (handle_t *)ref_cnt; 211 212 current->journal_info = handle; 213 } 214 215 /* 216 * Wrappers for jbd2_journal_start/end. 217 * 218 * The only special thing we need to do here is to make sure that all 219 * journal_end calls result in the superblock being marked dirty, so 220 * that sync() will call the filesystem's write_super callback if 221 * appropriate. 222 */ 223 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 224 { 225 journal_t *journal; 226 227 if (sb->s_flags & MS_RDONLY) 228 return ERR_PTR(-EROFS); 229 230 /* Special case here: if the journal has aborted behind our 231 * backs (eg. EIO in the commit thread), then we still need to 232 * take the FS itself readonly cleanly. */ 233 journal = EXT4_SB(sb)->s_journal; 234 if (journal) { 235 if (is_journal_aborted(journal)) { 236 ext4_abort(sb, __func__, "Detected aborted journal"); 237 return ERR_PTR(-EROFS); 238 } 239 return jbd2_journal_start(journal, nblocks); 240 } 241 return ext4_get_nojournal(); 242 } 243 244 /* 245 * The only special thing we need to do here is to make sure that all 246 * jbd2_journal_stop calls result in the superblock being marked dirty, so 247 * that sync() will call the filesystem's write_super callback if 248 * appropriate. 249 */ 250 int __ext4_journal_stop(const char *where, handle_t *handle) 251 { 252 struct super_block *sb; 253 int err; 254 int rc; 255 256 if (!ext4_handle_valid(handle)) { 257 ext4_put_nojournal(handle); 258 return 0; 259 } 260 sb = handle->h_transaction->t_journal->j_private; 261 err = handle->h_err; 262 rc = jbd2_journal_stop(handle); 263 264 if (!err) 265 err = rc; 266 if (err) 267 __ext4_std_error(sb, where, err); 268 return err; 269 } 270 271 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 272 struct buffer_head *bh, handle_t *handle, int err) 273 { 274 char nbuf[16]; 275 const char *errstr = ext4_decode_error(NULL, err, nbuf); 276 277 BUG_ON(!ext4_handle_valid(handle)); 278 279 if (bh) 280 BUFFER_TRACE(bh, "abort"); 281 282 if (!handle->h_err) 283 handle->h_err = err; 284 285 if (is_handle_aborted(handle)) 286 return; 287 288 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 289 caller, errstr, err_fn); 290 291 jbd2_journal_abort_handle(handle); 292 } 293 294 /* Deal with the reporting of failure conditions on a filesystem such as 295 * inconsistencies detected or read IO failures. 296 * 297 * On ext2, we can store the error state of the filesystem in the 298 * superblock. That is not possible on ext4, because we may have other 299 * write ordering constraints on the superblock which prevent us from 300 * writing it out straight away; and given that the journal is about to 301 * be aborted, we can't rely on the current, or future, transactions to 302 * write out the superblock safely. 303 * 304 * We'll just use the jbd2_journal_abort() error code to record an error in 305 * the journal instead. On recovery, the journal will compain about 306 * that error until we've noted it down and cleared it. 307 */ 308 309 static void ext4_handle_error(struct super_block *sb) 310 { 311 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 312 313 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 314 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 315 316 if (sb->s_flags & MS_RDONLY) 317 return; 318 319 if (!test_opt(sb, ERRORS_CONT)) { 320 journal_t *journal = EXT4_SB(sb)->s_journal; 321 322 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 323 if (journal) 324 jbd2_journal_abort(journal, -EIO); 325 } 326 if (test_opt(sb, ERRORS_RO)) { 327 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 328 sb->s_flags |= MS_RDONLY; 329 } 330 ext4_commit_super(sb, 1); 331 if (test_opt(sb, ERRORS_PANIC)) 332 panic("EXT4-fs (device %s): panic forced after error\n", 333 sb->s_id); 334 } 335 336 void __ext4_error(struct super_block *sb, const char *function, 337 const char *fmt, ...) 338 { 339 va_list args; 340 341 va_start(args, fmt); 342 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 343 vprintk(fmt, args); 344 printk("\n"); 345 va_end(args); 346 347 ext4_handle_error(sb); 348 } 349 350 void ext4_error_inode(const char *function, struct inode *inode, 351 const char *fmt, ...) 352 { 353 va_list args; 354 355 va_start(args, fmt); 356 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", 357 inode->i_sb->s_id, function, inode->i_ino, current->comm); 358 vprintk(fmt, args); 359 printk("\n"); 360 va_end(args); 361 362 ext4_handle_error(inode->i_sb); 363 } 364 365 void ext4_error_file(const char *function, struct file *file, 366 const char *fmt, ...) 367 { 368 va_list args; 369 struct inode *inode = file->f_dentry->d_inode; 370 char pathname[80], *path; 371 372 va_start(args, fmt); 373 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 374 if (!path) 375 path = "(unknown)"; 376 printk(KERN_CRIT 377 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", 378 inode->i_sb->s_id, function, inode->i_ino, current->comm, path); 379 vprintk(fmt, args); 380 printk("\n"); 381 va_end(args); 382 383 ext4_handle_error(inode->i_sb); 384 } 385 386 static const char *ext4_decode_error(struct super_block *sb, int errno, 387 char nbuf[16]) 388 { 389 char *errstr = NULL; 390 391 switch (errno) { 392 case -EIO: 393 errstr = "IO failure"; 394 break; 395 case -ENOMEM: 396 errstr = "Out of memory"; 397 break; 398 case -EROFS: 399 if (!sb || (EXT4_SB(sb)->s_journal && 400 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 401 errstr = "Journal has aborted"; 402 else 403 errstr = "Readonly filesystem"; 404 break; 405 default: 406 /* If the caller passed in an extra buffer for unknown 407 * errors, textualise them now. Else we just return 408 * NULL. */ 409 if (nbuf) { 410 /* Check for truncated error codes... */ 411 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 412 errstr = nbuf; 413 } 414 break; 415 } 416 417 return errstr; 418 } 419 420 /* __ext4_std_error decodes expected errors from journaling functions 421 * automatically and invokes the appropriate error response. */ 422 423 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 424 { 425 char nbuf[16]; 426 const char *errstr; 427 428 /* Special case: if the error is EROFS, and we're not already 429 * inside a transaction, then there's really no point in logging 430 * an error. */ 431 if (errno == -EROFS && journal_current_handle() == NULL && 432 (sb->s_flags & MS_RDONLY)) 433 return; 434 435 errstr = ext4_decode_error(sb, errno, nbuf); 436 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 437 sb->s_id, function, errstr); 438 439 ext4_handle_error(sb); 440 } 441 442 /* 443 * ext4_abort is a much stronger failure handler than ext4_error. The 444 * abort function may be used to deal with unrecoverable failures such 445 * as journal IO errors or ENOMEM at a critical moment in log management. 446 * 447 * We unconditionally force the filesystem into an ABORT|READONLY state, 448 * unless the error response on the fs has been set to panic in which 449 * case we take the easy way out and panic immediately. 450 */ 451 452 void ext4_abort(struct super_block *sb, const char *function, 453 const char *fmt, ...) 454 { 455 va_list args; 456 457 va_start(args, fmt); 458 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 459 vprintk(fmt, args); 460 printk("\n"); 461 va_end(args); 462 463 if (test_opt(sb, ERRORS_PANIC)) 464 panic("EXT4-fs panic from previous error\n"); 465 466 if (sb->s_flags & MS_RDONLY) 467 return; 468 469 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 470 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 471 sb->s_flags |= MS_RDONLY; 472 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 473 if (EXT4_SB(sb)->s_journal) 474 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 475 } 476 477 void ext4_msg (struct super_block * sb, const char *prefix, 478 const char *fmt, ...) 479 { 480 va_list args; 481 482 va_start(args, fmt); 483 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 484 vprintk(fmt, args); 485 printk("\n"); 486 va_end(args); 487 } 488 489 void __ext4_warning(struct super_block *sb, const char *function, 490 const char *fmt, ...) 491 { 492 va_list args; 493 494 va_start(args, fmt); 495 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 496 sb->s_id, function); 497 vprintk(fmt, args); 498 printk("\n"); 499 va_end(args); 500 } 501 502 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 503 const char *function, const char *fmt, ...) 504 __releases(bitlock) 505 __acquires(bitlock) 506 { 507 va_list args; 508 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 509 510 va_start(args, fmt); 511 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 512 vprintk(fmt, args); 513 printk("\n"); 514 va_end(args); 515 516 if (test_opt(sb, ERRORS_CONT)) { 517 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 518 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 519 ext4_commit_super(sb, 0); 520 return; 521 } 522 ext4_unlock_group(sb, grp); 523 ext4_handle_error(sb); 524 /* 525 * We only get here in the ERRORS_RO case; relocking the group 526 * may be dangerous, but nothing bad will happen since the 527 * filesystem will have already been marked read/only and the 528 * journal has been aborted. We return 1 as a hint to callers 529 * who might what to use the return value from 530 * ext4_grp_locked_error() to distinguish beween the 531 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 532 * aggressively from the ext4 function in question, with a 533 * more appropriate error code. 534 */ 535 ext4_lock_group(sb, grp); 536 return; 537 } 538 539 void ext4_update_dynamic_rev(struct super_block *sb) 540 { 541 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 542 543 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 544 return; 545 546 ext4_warning(sb, 547 "updating to rev %d because of new feature flag, " 548 "running e2fsck is recommended", 549 EXT4_DYNAMIC_REV); 550 551 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 552 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 553 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 554 /* leave es->s_feature_*compat flags alone */ 555 /* es->s_uuid will be set by e2fsck if empty */ 556 557 /* 558 * The rest of the superblock fields should be zero, and if not it 559 * means they are likely already in use, so leave them alone. We 560 * can leave it up to e2fsck to clean up any inconsistencies there. 561 */ 562 } 563 564 /* 565 * Open the external journal device 566 */ 567 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 568 { 569 struct block_device *bdev; 570 char b[BDEVNAME_SIZE]; 571 572 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 573 if (IS_ERR(bdev)) 574 goto fail; 575 return bdev; 576 577 fail: 578 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 579 __bdevname(dev, b), PTR_ERR(bdev)); 580 return NULL; 581 } 582 583 /* 584 * Release the journal device 585 */ 586 static int ext4_blkdev_put(struct block_device *bdev) 587 { 588 bd_release(bdev); 589 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 590 } 591 592 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 593 { 594 struct block_device *bdev; 595 int ret = -ENODEV; 596 597 bdev = sbi->journal_bdev; 598 if (bdev) { 599 ret = ext4_blkdev_put(bdev); 600 sbi->journal_bdev = NULL; 601 } 602 return ret; 603 } 604 605 static inline struct inode *orphan_list_entry(struct list_head *l) 606 { 607 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 608 } 609 610 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 611 { 612 struct list_head *l; 613 614 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 615 le32_to_cpu(sbi->s_es->s_last_orphan)); 616 617 printk(KERN_ERR "sb_info orphan list:\n"); 618 list_for_each(l, &sbi->s_orphan) { 619 struct inode *inode = orphan_list_entry(l); 620 printk(KERN_ERR " " 621 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 622 inode->i_sb->s_id, inode->i_ino, inode, 623 inode->i_mode, inode->i_nlink, 624 NEXT_ORPHAN(inode)); 625 } 626 } 627 628 static void ext4_put_super(struct super_block *sb) 629 { 630 struct ext4_sb_info *sbi = EXT4_SB(sb); 631 struct ext4_super_block *es = sbi->s_es; 632 int i, err; 633 634 flush_workqueue(sbi->dio_unwritten_wq); 635 destroy_workqueue(sbi->dio_unwritten_wq); 636 637 lock_super(sb); 638 lock_kernel(); 639 if (sb->s_dirt) 640 ext4_commit_super(sb, 1); 641 642 if (sbi->s_journal) { 643 err = jbd2_journal_destroy(sbi->s_journal); 644 sbi->s_journal = NULL; 645 if (err < 0) 646 ext4_abort(sb, __func__, 647 "Couldn't clean up the journal"); 648 } 649 650 ext4_release_system_zone(sb); 651 ext4_mb_release(sb); 652 ext4_ext_release(sb); 653 ext4_xattr_put_super(sb); 654 655 if (!(sb->s_flags & MS_RDONLY)) { 656 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 657 es->s_state = cpu_to_le16(sbi->s_mount_state); 658 ext4_commit_super(sb, 1); 659 } 660 if (sbi->s_proc) { 661 remove_proc_entry(sb->s_id, ext4_proc_root); 662 } 663 kobject_del(&sbi->s_kobj); 664 665 for (i = 0; i < sbi->s_gdb_count; i++) 666 brelse(sbi->s_group_desc[i]); 667 kfree(sbi->s_group_desc); 668 if (is_vmalloc_addr(sbi->s_flex_groups)) 669 vfree(sbi->s_flex_groups); 670 else 671 kfree(sbi->s_flex_groups); 672 percpu_counter_destroy(&sbi->s_freeblocks_counter); 673 percpu_counter_destroy(&sbi->s_freeinodes_counter); 674 percpu_counter_destroy(&sbi->s_dirs_counter); 675 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 676 brelse(sbi->s_sbh); 677 #ifdef CONFIG_QUOTA 678 for (i = 0; i < MAXQUOTAS; i++) 679 kfree(sbi->s_qf_names[i]); 680 #endif 681 682 /* Debugging code just in case the in-memory inode orphan list 683 * isn't empty. The on-disk one can be non-empty if we've 684 * detected an error and taken the fs readonly, but the 685 * in-memory list had better be clean by this point. */ 686 if (!list_empty(&sbi->s_orphan)) 687 dump_orphan_list(sb, sbi); 688 J_ASSERT(list_empty(&sbi->s_orphan)); 689 690 invalidate_bdev(sb->s_bdev); 691 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 692 /* 693 * Invalidate the journal device's buffers. We don't want them 694 * floating about in memory - the physical journal device may 695 * hotswapped, and it breaks the `ro-after' testing code. 696 */ 697 sync_blockdev(sbi->journal_bdev); 698 invalidate_bdev(sbi->journal_bdev); 699 ext4_blkdev_remove(sbi); 700 } 701 sb->s_fs_info = NULL; 702 /* 703 * Now that we are completely done shutting down the 704 * superblock, we need to actually destroy the kobject. 705 */ 706 unlock_kernel(); 707 unlock_super(sb); 708 kobject_put(&sbi->s_kobj); 709 wait_for_completion(&sbi->s_kobj_unregister); 710 kfree(sbi->s_blockgroup_lock); 711 kfree(sbi); 712 } 713 714 static struct kmem_cache *ext4_inode_cachep; 715 716 /* 717 * Called inside transaction, so use GFP_NOFS 718 */ 719 static struct inode *ext4_alloc_inode(struct super_block *sb) 720 { 721 struct ext4_inode_info *ei; 722 723 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 724 if (!ei) 725 return NULL; 726 727 ei->vfs_inode.i_version = 1; 728 ei->vfs_inode.i_data.writeback_index = 0; 729 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 730 INIT_LIST_HEAD(&ei->i_prealloc_list); 731 spin_lock_init(&ei->i_prealloc_lock); 732 /* 733 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 734 * therefore it can be null here. Don't check it, just initialize 735 * jinode. 736 */ 737 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 738 ei->i_reserved_data_blocks = 0; 739 ei->i_reserved_meta_blocks = 0; 740 ei->i_allocated_meta_blocks = 0; 741 ei->i_da_metadata_calc_len = 0; 742 ei->i_delalloc_reserved_flag = 0; 743 spin_lock_init(&(ei->i_block_reservation_lock)); 744 #ifdef CONFIG_QUOTA 745 ei->i_reserved_quota = 0; 746 #endif 747 INIT_LIST_HEAD(&ei->i_completed_io_list); 748 spin_lock_init(&ei->i_completed_io_lock); 749 ei->cur_aio_dio = NULL; 750 ei->i_sync_tid = 0; 751 ei->i_datasync_tid = 0; 752 753 return &ei->vfs_inode; 754 } 755 756 static void ext4_destroy_inode(struct inode *inode) 757 { 758 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 759 ext4_msg(inode->i_sb, KERN_ERR, 760 "Inode %lu (%p): orphan list check failed!", 761 inode->i_ino, EXT4_I(inode)); 762 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 763 EXT4_I(inode), sizeof(struct ext4_inode_info), 764 true); 765 dump_stack(); 766 } 767 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 768 } 769 770 static void init_once(void *foo) 771 { 772 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 773 774 INIT_LIST_HEAD(&ei->i_orphan); 775 #ifdef CONFIG_EXT4_FS_XATTR 776 init_rwsem(&ei->xattr_sem); 777 #endif 778 init_rwsem(&ei->i_data_sem); 779 inode_init_once(&ei->vfs_inode); 780 } 781 782 static int init_inodecache(void) 783 { 784 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 785 sizeof(struct ext4_inode_info), 786 0, (SLAB_RECLAIM_ACCOUNT| 787 SLAB_MEM_SPREAD), 788 init_once); 789 if (ext4_inode_cachep == NULL) 790 return -ENOMEM; 791 return 0; 792 } 793 794 static void destroy_inodecache(void) 795 { 796 kmem_cache_destroy(ext4_inode_cachep); 797 } 798 799 static void ext4_clear_inode(struct inode *inode) 800 { 801 dquot_drop(inode); 802 ext4_discard_preallocations(inode); 803 if (EXT4_JOURNAL(inode)) 804 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 805 &EXT4_I(inode)->jinode); 806 } 807 808 static inline void ext4_show_quota_options(struct seq_file *seq, 809 struct super_block *sb) 810 { 811 #if defined(CONFIG_QUOTA) 812 struct ext4_sb_info *sbi = EXT4_SB(sb); 813 814 if (sbi->s_jquota_fmt) { 815 char *fmtname = ""; 816 817 switch (sbi->s_jquota_fmt) { 818 case QFMT_VFS_OLD: 819 fmtname = "vfsold"; 820 break; 821 case QFMT_VFS_V0: 822 fmtname = "vfsv0"; 823 break; 824 case QFMT_VFS_V1: 825 fmtname = "vfsv1"; 826 break; 827 } 828 seq_printf(seq, ",jqfmt=%s", fmtname); 829 } 830 831 if (sbi->s_qf_names[USRQUOTA]) 832 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 833 834 if (sbi->s_qf_names[GRPQUOTA]) 835 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 836 837 if (test_opt(sb, USRQUOTA)) 838 seq_puts(seq, ",usrquota"); 839 840 if (test_opt(sb, GRPQUOTA)) 841 seq_puts(seq, ",grpquota"); 842 #endif 843 } 844 845 /* 846 * Show an option if 847 * - it's set to a non-default value OR 848 * - if the per-sb default is different from the global default 849 */ 850 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 851 { 852 int def_errors; 853 unsigned long def_mount_opts; 854 struct super_block *sb = vfs->mnt_sb; 855 struct ext4_sb_info *sbi = EXT4_SB(sb); 856 struct ext4_super_block *es = sbi->s_es; 857 858 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 859 def_errors = le16_to_cpu(es->s_errors); 860 861 if (sbi->s_sb_block != 1) 862 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 863 if (test_opt(sb, MINIX_DF)) 864 seq_puts(seq, ",minixdf"); 865 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 866 seq_puts(seq, ",grpid"); 867 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 868 seq_puts(seq, ",nogrpid"); 869 if (sbi->s_resuid != EXT4_DEF_RESUID || 870 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 871 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 872 } 873 if (sbi->s_resgid != EXT4_DEF_RESGID || 874 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 875 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 876 } 877 if (test_opt(sb, ERRORS_RO)) { 878 if (def_errors == EXT4_ERRORS_PANIC || 879 def_errors == EXT4_ERRORS_CONTINUE) { 880 seq_puts(seq, ",errors=remount-ro"); 881 } 882 } 883 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 884 seq_puts(seq, ",errors=continue"); 885 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 886 seq_puts(seq, ",errors=panic"); 887 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 888 seq_puts(seq, ",nouid32"); 889 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 890 seq_puts(seq, ",debug"); 891 if (test_opt(sb, OLDALLOC)) 892 seq_puts(seq, ",oldalloc"); 893 #ifdef CONFIG_EXT4_FS_XATTR 894 if (test_opt(sb, XATTR_USER) && 895 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 896 seq_puts(seq, ",user_xattr"); 897 if (!test_opt(sb, XATTR_USER) && 898 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 899 seq_puts(seq, ",nouser_xattr"); 900 } 901 #endif 902 #ifdef CONFIG_EXT4_FS_POSIX_ACL 903 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 904 seq_puts(seq, ",acl"); 905 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 906 seq_puts(seq, ",noacl"); 907 #endif 908 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 909 seq_printf(seq, ",commit=%u", 910 (unsigned) (sbi->s_commit_interval / HZ)); 911 } 912 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 913 seq_printf(seq, ",min_batch_time=%u", 914 (unsigned) sbi->s_min_batch_time); 915 } 916 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 917 seq_printf(seq, ",max_batch_time=%u", 918 (unsigned) sbi->s_min_batch_time); 919 } 920 921 /* 922 * We're changing the default of barrier mount option, so 923 * let's always display its mount state so it's clear what its 924 * status is. 925 */ 926 seq_puts(seq, ",barrier="); 927 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 928 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 929 seq_puts(seq, ",journal_async_commit"); 930 if (test_opt(sb, NOBH)) 931 seq_puts(seq, ",nobh"); 932 if (test_opt(sb, I_VERSION)) 933 seq_puts(seq, ",i_version"); 934 if (!test_opt(sb, DELALLOC)) 935 seq_puts(seq, ",nodelalloc"); 936 937 938 if (sbi->s_stripe) 939 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 940 /* 941 * journal mode get enabled in different ways 942 * So just print the value even if we didn't specify it 943 */ 944 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 945 seq_puts(seq, ",data=journal"); 946 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 947 seq_puts(seq, ",data=ordered"); 948 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 949 seq_puts(seq, ",data=writeback"); 950 951 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 952 seq_printf(seq, ",inode_readahead_blks=%u", 953 sbi->s_inode_readahead_blks); 954 955 if (test_opt(sb, DATA_ERR_ABORT)) 956 seq_puts(seq, ",data_err=abort"); 957 958 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 959 seq_puts(seq, ",noauto_da_alloc"); 960 961 if (test_opt(sb, DISCARD)) 962 seq_puts(seq, ",discard"); 963 964 if (test_opt(sb, NOLOAD)) 965 seq_puts(seq, ",norecovery"); 966 967 if (test_opt(sb, DIOREAD_NOLOCK)) 968 seq_puts(seq, ",dioread_nolock"); 969 970 ext4_show_quota_options(seq, sb); 971 972 return 0; 973 } 974 975 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 976 u64 ino, u32 generation) 977 { 978 struct inode *inode; 979 980 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 981 return ERR_PTR(-ESTALE); 982 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 983 return ERR_PTR(-ESTALE); 984 985 /* iget isn't really right if the inode is currently unallocated!! 986 * 987 * ext4_read_inode will return a bad_inode if the inode had been 988 * deleted, so we should be safe. 989 * 990 * Currently we don't know the generation for parent directory, so 991 * a generation of 0 means "accept any" 992 */ 993 inode = ext4_iget(sb, ino); 994 if (IS_ERR(inode)) 995 return ERR_CAST(inode); 996 if (generation && inode->i_generation != generation) { 997 iput(inode); 998 return ERR_PTR(-ESTALE); 999 } 1000 1001 return inode; 1002 } 1003 1004 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1005 int fh_len, int fh_type) 1006 { 1007 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1008 ext4_nfs_get_inode); 1009 } 1010 1011 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1012 int fh_len, int fh_type) 1013 { 1014 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1015 ext4_nfs_get_inode); 1016 } 1017 1018 /* 1019 * Try to release metadata pages (indirect blocks, directories) which are 1020 * mapped via the block device. Since these pages could have journal heads 1021 * which would prevent try_to_free_buffers() from freeing them, we must use 1022 * jbd2 layer's try_to_free_buffers() function to release them. 1023 */ 1024 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1025 gfp_t wait) 1026 { 1027 journal_t *journal = EXT4_SB(sb)->s_journal; 1028 1029 WARN_ON(PageChecked(page)); 1030 if (!page_has_buffers(page)) 1031 return 0; 1032 if (journal) 1033 return jbd2_journal_try_to_free_buffers(journal, page, 1034 wait & ~__GFP_WAIT); 1035 return try_to_free_buffers(page); 1036 } 1037 1038 #ifdef CONFIG_QUOTA 1039 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1040 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1041 1042 static int ext4_write_dquot(struct dquot *dquot); 1043 static int ext4_acquire_dquot(struct dquot *dquot); 1044 static int ext4_release_dquot(struct dquot *dquot); 1045 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1046 static int ext4_write_info(struct super_block *sb, int type); 1047 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1048 char *path, int remount); 1049 static int ext4_quota_on_mount(struct super_block *sb, int type); 1050 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1051 size_t len, loff_t off); 1052 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1053 const char *data, size_t len, loff_t off); 1054 1055 static const struct dquot_operations ext4_quota_operations = { 1056 #ifdef CONFIG_QUOTA 1057 .get_reserved_space = ext4_get_reserved_space, 1058 #endif 1059 .write_dquot = ext4_write_dquot, 1060 .acquire_dquot = ext4_acquire_dquot, 1061 .release_dquot = ext4_release_dquot, 1062 .mark_dirty = ext4_mark_dquot_dirty, 1063 .write_info = ext4_write_info, 1064 .alloc_dquot = dquot_alloc, 1065 .destroy_dquot = dquot_destroy, 1066 }; 1067 1068 static const struct quotactl_ops ext4_qctl_operations = { 1069 .quota_on = ext4_quota_on, 1070 .quota_off = vfs_quota_off, 1071 .quota_sync = vfs_quota_sync, 1072 .get_info = vfs_get_dqinfo, 1073 .set_info = vfs_set_dqinfo, 1074 .get_dqblk = vfs_get_dqblk, 1075 .set_dqblk = vfs_set_dqblk 1076 }; 1077 #endif 1078 1079 static const struct super_operations ext4_sops = { 1080 .alloc_inode = ext4_alloc_inode, 1081 .destroy_inode = ext4_destroy_inode, 1082 .write_inode = ext4_write_inode, 1083 .dirty_inode = ext4_dirty_inode, 1084 .delete_inode = ext4_delete_inode, 1085 .put_super = ext4_put_super, 1086 .sync_fs = ext4_sync_fs, 1087 .freeze_fs = ext4_freeze, 1088 .unfreeze_fs = ext4_unfreeze, 1089 .statfs = ext4_statfs, 1090 .remount_fs = ext4_remount, 1091 .clear_inode = ext4_clear_inode, 1092 .show_options = ext4_show_options, 1093 #ifdef CONFIG_QUOTA 1094 .quota_read = ext4_quota_read, 1095 .quota_write = ext4_quota_write, 1096 #endif 1097 .bdev_try_to_free_page = bdev_try_to_free_page, 1098 }; 1099 1100 static const struct super_operations ext4_nojournal_sops = { 1101 .alloc_inode = ext4_alloc_inode, 1102 .destroy_inode = ext4_destroy_inode, 1103 .write_inode = ext4_write_inode, 1104 .dirty_inode = ext4_dirty_inode, 1105 .delete_inode = ext4_delete_inode, 1106 .write_super = ext4_write_super, 1107 .put_super = ext4_put_super, 1108 .statfs = ext4_statfs, 1109 .remount_fs = ext4_remount, 1110 .clear_inode = ext4_clear_inode, 1111 .show_options = ext4_show_options, 1112 #ifdef CONFIG_QUOTA 1113 .quota_read = ext4_quota_read, 1114 .quota_write = ext4_quota_write, 1115 #endif 1116 .bdev_try_to_free_page = bdev_try_to_free_page, 1117 }; 1118 1119 static const struct export_operations ext4_export_ops = { 1120 .fh_to_dentry = ext4_fh_to_dentry, 1121 .fh_to_parent = ext4_fh_to_parent, 1122 .get_parent = ext4_get_parent, 1123 }; 1124 1125 enum { 1126 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1127 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1128 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1129 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1130 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1131 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1132 Opt_journal_update, Opt_journal_dev, 1133 Opt_journal_checksum, Opt_journal_async_commit, 1134 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1135 Opt_data_err_abort, Opt_data_err_ignore, 1136 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1137 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1138 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1139 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1140 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1141 Opt_block_validity, Opt_noblock_validity, 1142 Opt_inode_readahead_blks, Opt_journal_ioprio, 1143 Opt_dioread_nolock, Opt_dioread_lock, 1144 Opt_discard, Opt_nodiscard, 1145 }; 1146 1147 static const match_table_t tokens = { 1148 {Opt_bsd_df, "bsddf"}, 1149 {Opt_minix_df, "minixdf"}, 1150 {Opt_grpid, "grpid"}, 1151 {Opt_grpid, "bsdgroups"}, 1152 {Opt_nogrpid, "nogrpid"}, 1153 {Opt_nogrpid, "sysvgroups"}, 1154 {Opt_resgid, "resgid=%u"}, 1155 {Opt_resuid, "resuid=%u"}, 1156 {Opt_sb, "sb=%u"}, 1157 {Opt_err_cont, "errors=continue"}, 1158 {Opt_err_panic, "errors=panic"}, 1159 {Opt_err_ro, "errors=remount-ro"}, 1160 {Opt_nouid32, "nouid32"}, 1161 {Opt_debug, "debug"}, 1162 {Opt_oldalloc, "oldalloc"}, 1163 {Opt_orlov, "orlov"}, 1164 {Opt_user_xattr, "user_xattr"}, 1165 {Opt_nouser_xattr, "nouser_xattr"}, 1166 {Opt_acl, "acl"}, 1167 {Opt_noacl, "noacl"}, 1168 {Opt_noload, "noload"}, 1169 {Opt_noload, "norecovery"}, 1170 {Opt_nobh, "nobh"}, 1171 {Opt_bh, "bh"}, 1172 {Opt_commit, "commit=%u"}, 1173 {Opt_min_batch_time, "min_batch_time=%u"}, 1174 {Opt_max_batch_time, "max_batch_time=%u"}, 1175 {Opt_journal_update, "journal=update"}, 1176 {Opt_journal_dev, "journal_dev=%u"}, 1177 {Opt_journal_checksum, "journal_checksum"}, 1178 {Opt_journal_async_commit, "journal_async_commit"}, 1179 {Opt_abort, "abort"}, 1180 {Opt_data_journal, "data=journal"}, 1181 {Opt_data_ordered, "data=ordered"}, 1182 {Opt_data_writeback, "data=writeback"}, 1183 {Opt_data_err_abort, "data_err=abort"}, 1184 {Opt_data_err_ignore, "data_err=ignore"}, 1185 {Opt_offusrjquota, "usrjquota="}, 1186 {Opt_usrjquota, "usrjquota=%s"}, 1187 {Opt_offgrpjquota, "grpjquota="}, 1188 {Opt_grpjquota, "grpjquota=%s"}, 1189 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1190 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1191 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1192 {Opt_grpquota, "grpquota"}, 1193 {Opt_noquota, "noquota"}, 1194 {Opt_quota, "quota"}, 1195 {Opt_usrquota, "usrquota"}, 1196 {Opt_barrier, "barrier=%u"}, 1197 {Opt_barrier, "barrier"}, 1198 {Opt_nobarrier, "nobarrier"}, 1199 {Opt_i_version, "i_version"}, 1200 {Opt_stripe, "stripe=%u"}, 1201 {Opt_resize, "resize"}, 1202 {Opt_delalloc, "delalloc"}, 1203 {Opt_nodelalloc, "nodelalloc"}, 1204 {Opt_block_validity, "block_validity"}, 1205 {Opt_noblock_validity, "noblock_validity"}, 1206 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1207 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1208 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1209 {Opt_auto_da_alloc, "auto_da_alloc"}, 1210 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1211 {Opt_dioread_nolock, "dioread_nolock"}, 1212 {Opt_dioread_lock, "dioread_lock"}, 1213 {Opt_discard, "discard"}, 1214 {Opt_nodiscard, "nodiscard"}, 1215 {Opt_err, NULL}, 1216 }; 1217 1218 static ext4_fsblk_t get_sb_block(void **data) 1219 { 1220 ext4_fsblk_t sb_block; 1221 char *options = (char *) *data; 1222 1223 if (!options || strncmp(options, "sb=", 3) != 0) 1224 return 1; /* Default location */ 1225 1226 options += 3; 1227 /* TODO: use simple_strtoll with >32bit ext4 */ 1228 sb_block = simple_strtoul(options, &options, 0); 1229 if (*options && *options != ',') { 1230 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1231 (char *) *data); 1232 return 1; 1233 } 1234 if (*options == ',') 1235 options++; 1236 *data = (void *) options; 1237 1238 return sb_block; 1239 } 1240 1241 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1242 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1243 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1244 1245 #ifdef CONFIG_QUOTA 1246 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1247 { 1248 struct ext4_sb_info *sbi = EXT4_SB(sb); 1249 char *qname; 1250 1251 if (sb_any_quota_loaded(sb) && 1252 !sbi->s_qf_names[qtype]) { 1253 ext4_msg(sb, KERN_ERR, 1254 "Cannot change journaled " 1255 "quota options when quota turned on"); 1256 return 0; 1257 } 1258 qname = match_strdup(args); 1259 if (!qname) { 1260 ext4_msg(sb, KERN_ERR, 1261 "Not enough memory for storing quotafile name"); 1262 return 0; 1263 } 1264 if (sbi->s_qf_names[qtype] && 1265 strcmp(sbi->s_qf_names[qtype], qname)) { 1266 ext4_msg(sb, KERN_ERR, 1267 "%s quota file already specified", QTYPE2NAME(qtype)); 1268 kfree(qname); 1269 return 0; 1270 } 1271 sbi->s_qf_names[qtype] = qname; 1272 if (strchr(sbi->s_qf_names[qtype], '/')) { 1273 ext4_msg(sb, KERN_ERR, 1274 "quotafile must be on filesystem root"); 1275 kfree(sbi->s_qf_names[qtype]); 1276 sbi->s_qf_names[qtype] = NULL; 1277 return 0; 1278 } 1279 set_opt(sbi->s_mount_opt, QUOTA); 1280 return 1; 1281 } 1282 1283 static int clear_qf_name(struct super_block *sb, int qtype) 1284 { 1285 1286 struct ext4_sb_info *sbi = EXT4_SB(sb); 1287 1288 if (sb_any_quota_loaded(sb) && 1289 sbi->s_qf_names[qtype]) { 1290 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1291 " when quota turned on"); 1292 return 0; 1293 } 1294 /* 1295 * The space will be released later when all options are confirmed 1296 * to be correct 1297 */ 1298 sbi->s_qf_names[qtype] = NULL; 1299 return 1; 1300 } 1301 #endif 1302 1303 static int parse_options(char *options, struct super_block *sb, 1304 unsigned long *journal_devnum, 1305 unsigned int *journal_ioprio, 1306 ext4_fsblk_t *n_blocks_count, int is_remount) 1307 { 1308 struct ext4_sb_info *sbi = EXT4_SB(sb); 1309 char *p; 1310 substring_t args[MAX_OPT_ARGS]; 1311 int data_opt = 0; 1312 int option; 1313 #ifdef CONFIG_QUOTA 1314 int qfmt; 1315 #endif 1316 1317 if (!options) 1318 return 1; 1319 1320 while ((p = strsep(&options, ",")) != NULL) { 1321 int token; 1322 if (!*p) 1323 continue; 1324 1325 /* 1326 * Initialize args struct so we know whether arg was 1327 * found; some options take optional arguments. 1328 */ 1329 args[0].to = args[0].from = 0; 1330 token = match_token(p, tokens, args); 1331 switch (token) { 1332 case Opt_bsd_df: 1333 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1334 clear_opt(sbi->s_mount_opt, MINIX_DF); 1335 break; 1336 case Opt_minix_df: 1337 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1338 set_opt(sbi->s_mount_opt, MINIX_DF); 1339 1340 break; 1341 case Opt_grpid: 1342 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1343 set_opt(sbi->s_mount_opt, GRPID); 1344 1345 break; 1346 case Opt_nogrpid: 1347 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1348 clear_opt(sbi->s_mount_opt, GRPID); 1349 1350 break; 1351 case Opt_resuid: 1352 if (match_int(&args[0], &option)) 1353 return 0; 1354 sbi->s_resuid = option; 1355 break; 1356 case Opt_resgid: 1357 if (match_int(&args[0], &option)) 1358 return 0; 1359 sbi->s_resgid = option; 1360 break; 1361 case Opt_sb: 1362 /* handled by get_sb_block() instead of here */ 1363 /* *sb_block = match_int(&args[0]); */ 1364 break; 1365 case Opt_err_panic: 1366 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1367 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1368 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1369 break; 1370 case Opt_err_ro: 1371 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1372 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1373 set_opt(sbi->s_mount_opt, ERRORS_RO); 1374 break; 1375 case Opt_err_cont: 1376 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1377 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1378 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1379 break; 1380 case Opt_nouid32: 1381 set_opt(sbi->s_mount_opt, NO_UID32); 1382 break; 1383 case Opt_debug: 1384 set_opt(sbi->s_mount_opt, DEBUG); 1385 break; 1386 case Opt_oldalloc: 1387 set_opt(sbi->s_mount_opt, OLDALLOC); 1388 break; 1389 case Opt_orlov: 1390 clear_opt(sbi->s_mount_opt, OLDALLOC); 1391 break; 1392 #ifdef CONFIG_EXT4_FS_XATTR 1393 case Opt_user_xattr: 1394 set_opt(sbi->s_mount_opt, XATTR_USER); 1395 break; 1396 case Opt_nouser_xattr: 1397 clear_opt(sbi->s_mount_opt, XATTR_USER); 1398 break; 1399 #else 1400 case Opt_user_xattr: 1401 case Opt_nouser_xattr: 1402 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1403 break; 1404 #endif 1405 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1406 case Opt_acl: 1407 set_opt(sbi->s_mount_opt, POSIX_ACL); 1408 break; 1409 case Opt_noacl: 1410 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1411 break; 1412 #else 1413 case Opt_acl: 1414 case Opt_noacl: 1415 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1416 break; 1417 #endif 1418 case Opt_journal_update: 1419 /* @@@ FIXME */ 1420 /* Eventually we will want to be able to create 1421 a journal file here. For now, only allow the 1422 user to specify an existing inode to be the 1423 journal file. */ 1424 if (is_remount) { 1425 ext4_msg(sb, KERN_ERR, 1426 "Cannot specify journal on remount"); 1427 return 0; 1428 } 1429 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1430 break; 1431 case Opt_journal_dev: 1432 if (is_remount) { 1433 ext4_msg(sb, KERN_ERR, 1434 "Cannot specify journal on remount"); 1435 return 0; 1436 } 1437 if (match_int(&args[0], &option)) 1438 return 0; 1439 *journal_devnum = option; 1440 break; 1441 case Opt_journal_checksum: 1442 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1443 break; 1444 case Opt_journal_async_commit: 1445 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1446 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1447 break; 1448 case Opt_noload: 1449 set_opt(sbi->s_mount_opt, NOLOAD); 1450 break; 1451 case Opt_commit: 1452 if (match_int(&args[0], &option)) 1453 return 0; 1454 if (option < 0) 1455 return 0; 1456 if (option == 0) 1457 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1458 sbi->s_commit_interval = HZ * option; 1459 break; 1460 case Opt_max_batch_time: 1461 if (match_int(&args[0], &option)) 1462 return 0; 1463 if (option < 0) 1464 return 0; 1465 if (option == 0) 1466 option = EXT4_DEF_MAX_BATCH_TIME; 1467 sbi->s_max_batch_time = option; 1468 break; 1469 case Opt_min_batch_time: 1470 if (match_int(&args[0], &option)) 1471 return 0; 1472 if (option < 0) 1473 return 0; 1474 sbi->s_min_batch_time = option; 1475 break; 1476 case Opt_data_journal: 1477 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1478 goto datacheck; 1479 case Opt_data_ordered: 1480 data_opt = EXT4_MOUNT_ORDERED_DATA; 1481 goto datacheck; 1482 case Opt_data_writeback: 1483 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1484 datacheck: 1485 if (is_remount) { 1486 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1487 ext4_msg(sb, KERN_ERR, 1488 "Cannot change data mode on remount"); 1489 return 0; 1490 } 1491 } else { 1492 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1493 sbi->s_mount_opt |= data_opt; 1494 } 1495 break; 1496 case Opt_data_err_abort: 1497 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1498 break; 1499 case Opt_data_err_ignore: 1500 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1501 break; 1502 #ifdef CONFIG_QUOTA 1503 case Opt_usrjquota: 1504 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1505 return 0; 1506 break; 1507 case Opt_grpjquota: 1508 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1509 return 0; 1510 break; 1511 case Opt_offusrjquota: 1512 if (!clear_qf_name(sb, USRQUOTA)) 1513 return 0; 1514 break; 1515 case Opt_offgrpjquota: 1516 if (!clear_qf_name(sb, GRPQUOTA)) 1517 return 0; 1518 break; 1519 1520 case Opt_jqfmt_vfsold: 1521 qfmt = QFMT_VFS_OLD; 1522 goto set_qf_format; 1523 case Opt_jqfmt_vfsv0: 1524 qfmt = QFMT_VFS_V0; 1525 goto set_qf_format; 1526 case Opt_jqfmt_vfsv1: 1527 qfmt = QFMT_VFS_V1; 1528 set_qf_format: 1529 if (sb_any_quota_loaded(sb) && 1530 sbi->s_jquota_fmt != qfmt) { 1531 ext4_msg(sb, KERN_ERR, "Cannot change " 1532 "journaled quota options when " 1533 "quota turned on"); 1534 return 0; 1535 } 1536 sbi->s_jquota_fmt = qfmt; 1537 break; 1538 case Opt_quota: 1539 case Opt_usrquota: 1540 set_opt(sbi->s_mount_opt, QUOTA); 1541 set_opt(sbi->s_mount_opt, USRQUOTA); 1542 break; 1543 case Opt_grpquota: 1544 set_opt(sbi->s_mount_opt, QUOTA); 1545 set_opt(sbi->s_mount_opt, GRPQUOTA); 1546 break; 1547 case Opt_noquota: 1548 if (sb_any_quota_loaded(sb)) { 1549 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1550 "options when quota turned on"); 1551 return 0; 1552 } 1553 clear_opt(sbi->s_mount_opt, QUOTA); 1554 clear_opt(sbi->s_mount_opt, USRQUOTA); 1555 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1556 break; 1557 #else 1558 case Opt_quota: 1559 case Opt_usrquota: 1560 case Opt_grpquota: 1561 ext4_msg(sb, KERN_ERR, 1562 "quota options not supported"); 1563 break; 1564 case Opt_usrjquota: 1565 case Opt_grpjquota: 1566 case Opt_offusrjquota: 1567 case Opt_offgrpjquota: 1568 case Opt_jqfmt_vfsold: 1569 case Opt_jqfmt_vfsv0: 1570 case Opt_jqfmt_vfsv1: 1571 ext4_msg(sb, KERN_ERR, 1572 "journaled quota options not supported"); 1573 break; 1574 case Opt_noquota: 1575 break; 1576 #endif 1577 case Opt_abort: 1578 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1579 break; 1580 case Opt_nobarrier: 1581 clear_opt(sbi->s_mount_opt, BARRIER); 1582 break; 1583 case Opt_barrier: 1584 if (args[0].from) { 1585 if (match_int(&args[0], &option)) 1586 return 0; 1587 } else 1588 option = 1; /* No argument, default to 1 */ 1589 if (option) 1590 set_opt(sbi->s_mount_opt, BARRIER); 1591 else 1592 clear_opt(sbi->s_mount_opt, BARRIER); 1593 break; 1594 case Opt_ignore: 1595 break; 1596 case Opt_resize: 1597 if (!is_remount) { 1598 ext4_msg(sb, KERN_ERR, 1599 "resize option only available " 1600 "for remount"); 1601 return 0; 1602 } 1603 if (match_int(&args[0], &option) != 0) 1604 return 0; 1605 *n_blocks_count = option; 1606 break; 1607 case Opt_nobh: 1608 set_opt(sbi->s_mount_opt, NOBH); 1609 break; 1610 case Opt_bh: 1611 clear_opt(sbi->s_mount_opt, NOBH); 1612 break; 1613 case Opt_i_version: 1614 set_opt(sbi->s_mount_opt, I_VERSION); 1615 sb->s_flags |= MS_I_VERSION; 1616 break; 1617 case Opt_nodelalloc: 1618 clear_opt(sbi->s_mount_opt, DELALLOC); 1619 break; 1620 case Opt_stripe: 1621 if (match_int(&args[0], &option)) 1622 return 0; 1623 if (option < 0) 1624 return 0; 1625 sbi->s_stripe = option; 1626 break; 1627 case Opt_delalloc: 1628 set_opt(sbi->s_mount_opt, DELALLOC); 1629 break; 1630 case Opt_block_validity: 1631 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1632 break; 1633 case Opt_noblock_validity: 1634 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1635 break; 1636 case Opt_inode_readahead_blks: 1637 if (match_int(&args[0], &option)) 1638 return 0; 1639 if (option < 0 || option > (1 << 30)) 1640 return 0; 1641 if (!is_power_of_2(option)) { 1642 ext4_msg(sb, KERN_ERR, 1643 "EXT4-fs: inode_readahead_blks" 1644 " must be a power of 2"); 1645 return 0; 1646 } 1647 sbi->s_inode_readahead_blks = option; 1648 break; 1649 case Opt_journal_ioprio: 1650 if (match_int(&args[0], &option)) 1651 return 0; 1652 if (option < 0 || option > 7) 1653 break; 1654 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1655 option); 1656 break; 1657 case Opt_noauto_da_alloc: 1658 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1659 break; 1660 case Opt_auto_da_alloc: 1661 if (args[0].from) { 1662 if (match_int(&args[0], &option)) 1663 return 0; 1664 } else 1665 option = 1; /* No argument, default to 1 */ 1666 if (option) 1667 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1668 else 1669 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1670 break; 1671 case Opt_discard: 1672 set_opt(sbi->s_mount_opt, DISCARD); 1673 break; 1674 case Opt_nodiscard: 1675 clear_opt(sbi->s_mount_opt, DISCARD); 1676 break; 1677 case Opt_dioread_nolock: 1678 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1679 break; 1680 case Opt_dioread_lock: 1681 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1682 break; 1683 default: 1684 ext4_msg(sb, KERN_ERR, 1685 "Unrecognized mount option \"%s\" " 1686 "or missing value", p); 1687 return 0; 1688 } 1689 } 1690 #ifdef CONFIG_QUOTA 1691 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1692 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1693 clear_opt(sbi->s_mount_opt, USRQUOTA); 1694 1695 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1696 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1697 1698 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1699 ext4_msg(sb, KERN_ERR, "old and new quota " 1700 "format mixing"); 1701 return 0; 1702 } 1703 1704 if (!sbi->s_jquota_fmt) { 1705 ext4_msg(sb, KERN_ERR, "journaled quota format " 1706 "not specified"); 1707 return 0; 1708 } 1709 } else { 1710 if (sbi->s_jquota_fmt) { 1711 ext4_msg(sb, KERN_ERR, "journaled quota format " 1712 "specified with no journaling " 1713 "enabled"); 1714 return 0; 1715 } 1716 } 1717 #endif 1718 return 1; 1719 } 1720 1721 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1722 int read_only) 1723 { 1724 struct ext4_sb_info *sbi = EXT4_SB(sb); 1725 int res = 0; 1726 1727 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1728 ext4_msg(sb, KERN_ERR, "revision level too high, " 1729 "forcing read-only mode"); 1730 res = MS_RDONLY; 1731 } 1732 if (read_only) 1733 return res; 1734 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1735 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1736 "running e2fsck is recommended"); 1737 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1738 ext4_msg(sb, KERN_WARNING, 1739 "warning: mounting fs with errors, " 1740 "running e2fsck is recommended"); 1741 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1742 le16_to_cpu(es->s_mnt_count) >= 1743 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1744 ext4_msg(sb, KERN_WARNING, 1745 "warning: maximal mount count reached, " 1746 "running e2fsck is recommended"); 1747 else if (le32_to_cpu(es->s_checkinterval) && 1748 (le32_to_cpu(es->s_lastcheck) + 1749 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1750 ext4_msg(sb, KERN_WARNING, 1751 "warning: checktime reached, " 1752 "running e2fsck is recommended"); 1753 if (!sbi->s_journal) 1754 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1755 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1756 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1757 le16_add_cpu(&es->s_mnt_count, 1); 1758 es->s_mtime = cpu_to_le32(get_seconds()); 1759 ext4_update_dynamic_rev(sb); 1760 if (sbi->s_journal) 1761 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1762 1763 ext4_commit_super(sb, 1); 1764 if (test_opt(sb, DEBUG)) 1765 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1766 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1767 sb->s_blocksize, 1768 sbi->s_groups_count, 1769 EXT4_BLOCKS_PER_GROUP(sb), 1770 EXT4_INODES_PER_GROUP(sb), 1771 sbi->s_mount_opt); 1772 1773 return res; 1774 } 1775 1776 static int ext4_fill_flex_info(struct super_block *sb) 1777 { 1778 struct ext4_sb_info *sbi = EXT4_SB(sb); 1779 struct ext4_group_desc *gdp = NULL; 1780 ext4_group_t flex_group_count; 1781 ext4_group_t flex_group; 1782 int groups_per_flex = 0; 1783 size_t size; 1784 int i; 1785 1786 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1787 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1788 1789 if (groups_per_flex < 2) { 1790 sbi->s_log_groups_per_flex = 0; 1791 return 1; 1792 } 1793 1794 /* We allocate both existing and potentially added groups */ 1795 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1796 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1797 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1798 size = flex_group_count * sizeof(struct flex_groups); 1799 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1800 if (sbi->s_flex_groups == NULL) { 1801 sbi->s_flex_groups = vmalloc(size); 1802 if (sbi->s_flex_groups) 1803 memset(sbi->s_flex_groups, 0, size); 1804 } 1805 if (sbi->s_flex_groups == NULL) { 1806 ext4_msg(sb, KERN_ERR, "not enough memory for " 1807 "%u flex groups", flex_group_count); 1808 goto failed; 1809 } 1810 1811 for (i = 0; i < sbi->s_groups_count; i++) { 1812 gdp = ext4_get_group_desc(sb, i, NULL); 1813 1814 flex_group = ext4_flex_group(sbi, i); 1815 atomic_add(ext4_free_inodes_count(sb, gdp), 1816 &sbi->s_flex_groups[flex_group].free_inodes); 1817 atomic_add(ext4_free_blks_count(sb, gdp), 1818 &sbi->s_flex_groups[flex_group].free_blocks); 1819 atomic_add(ext4_used_dirs_count(sb, gdp), 1820 &sbi->s_flex_groups[flex_group].used_dirs); 1821 } 1822 1823 return 1; 1824 failed: 1825 return 0; 1826 } 1827 1828 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1829 struct ext4_group_desc *gdp) 1830 { 1831 __u16 crc = 0; 1832 1833 if (sbi->s_es->s_feature_ro_compat & 1834 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1835 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1836 __le32 le_group = cpu_to_le32(block_group); 1837 1838 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1839 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1840 crc = crc16(crc, (__u8 *)gdp, offset); 1841 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1842 /* for checksum of struct ext4_group_desc do the rest...*/ 1843 if ((sbi->s_es->s_feature_incompat & 1844 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1845 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1846 crc = crc16(crc, (__u8 *)gdp + offset, 1847 le16_to_cpu(sbi->s_es->s_desc_size) - 1848 offset); 1849 } 1850 1851 return cpu_to_le16(crc); 1852 } 1853 1854 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1855 struct ext4_group_desc *gdp) 1856 { 1857 if ((sbi->s_es->s_feature_ro_compat & 1858 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1859 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1860 return 0; 1861 1862 return 1; 1863 } 1864 1865 /* Called at mount-time, super-block is locked */ 1866 static int ext4_check_descriptors(struct super_block *sb) 1867 { 1868 struct ext4_sb_info *sbi = EXT4_SB(sb); 1869 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1870 ext4_fsblk_t last_block; 1871 ext4_fsblk_t block_bitmap; 1872 ext4_fsblk_t inode_bitmap; 1873 ext4_fsblk_t inode_table; 1874 int flexbg_flag = 0; 1875 ext4_group_t i; 1876 1877 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1878 flexbg_flag = 1; 1879 1880 ext4_debug("Checking group descriptors"); 1881 1882 for (i = 0; i < sbi->s_groups_count; i++) { 1883 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1884 1885 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1886 last_block = ext4_blocks_count(sbi->s_es) - 1; 1887 else 1888 last_block = first_block + 1889 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1890 1891 block_bitmap = ext4_block_bitmap(sb, gdp); 1892 if (block_bitmap < first_block || block_bitmap > last_block) { 1893 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1894 "Block bitmap for group %u not in group " 1895 "(block %llu)!", i, block_bitmap); 1896 return 0; 1897 } 1898 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1899 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1900 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1901 "Inode bitmap for group %u not in group " 1902 "(block %llu)!", i, inode_bitmap); 1903 return 0; 1904 } 1905 inode_table = ext4_inode_table(sb, gdp); 1906 if (inode_table < first_block || 1907 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1908 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1909 "Inode table for group %u not in group " 1910 "(block %llu)!", i, inode_table); 1911 return 0; 1912 } 1913 ext4_lock_group(sb, i); 1914 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1915 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1916 "Checksum for group %u failed (%u!=%u)", 1917 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1918 gdp)), le16_to_cpu(gdp->bg_checksum)); 1919 if (!(sb->s_flags & MS_RDONLY)) { 1920 ext4_unlock_group(sb, i); 1921 return 0; 1922 } 1923 } 1924 ext4_unlock_group(sb, i); 1925 if (!flexbg_flag) 1926 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1927 } 1928 1929 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1930 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 1931 return 1; 1932 } 1933 1934 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1935 * the superblock) which were deleted from all directories, but held open by 1936 * a process at the time of a crash. We walk the list and try to delete these 1937 * inodes at recovery time (only with a read-write filesystem). 1938 * 1939 * In order to keep the orphan inode chain consistent during traversal (in 1940 * case of crash during recovery), we link each inode into the superblock 1941 * orphan list_head and handle it the same way as an inode deletion during 1942 * normal operation (which journals the operations for us). 1943 * 1944 * We only do an iget() and an iput() on each inode, which is very safe if we 1945 * accidentally point at an in-use or already deleted inode. The worst that 1946 * can happen in this case is that we get a "bit already cleared" message from 1947 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1948 * e2fsck was run on this filesystem, and it must have already done the orphan 1949 * inode cleanup for us, so we can safely abort without any further action. 1950 */ 1951 static void ext4_orphan_cleanup(struct super_block *sb, 1952 struct ext4_super_block *es) 1953 { 1954 unsigned int s_flags = sb->s_flags; 1955 int nr_orphans = 0, nr_truncates = 0; 1956 #ifdef CONFIG_QUOTA 1957 int i; 1958 #endif 1959 if (!es->s_last_orphan) { 1960 jbd_debug(4, "no orphan inodes to clean up\n"); 1961 return; 1962 } 1963 1964 if (bdev_read_only(sb->s_bdev)) { 1965 ext4_msg(sb, KERN_ERR, "write access " 1966 "unavailable, skipping orphan cleanup"); 1967 return; 1968 } 1969 1970 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1971 if (es->s_last_orphan) 1972 jbd_debug(1, "Errors on filesystem, " 1973 "clearing orphan list.\n"); 1974 es->s_last_orphan = 0; 1975 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1976 return; 1977 } 1978 1979 if (s_flags & MS_RDONLY) { 1980 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1981 sb->s_flags &= ~MS_RDONLY; 1982 } 1983 #ifdef CONFIG_QUOTA 1984 /* Needed for iput() to work correctly and not trash data */ 1985 sb->s_flags |= MS_ACTIVE; 1986 /* Turn on quotas so that they are updated correctly */ 1987 for (i = 0; i < MAXQUOTAS; i++) { 1988 if (EXT4_SB(sb)->s_qf_names[i]) { 1989 int ret = ext4_quota_on_mount(sb, i); 1990 if (ret < 0) 1991 ext4_msg(sb, KERN_ERR, 1992 "Cannot turn on journaled " 1993 "quota: error %d", ret); 1994 } 1995 } 1996 #endif 1997 1998 while (es->s_last_orphan) { 1999 struct inode *inode; 2000 2001 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2002 if (IS_ERR(inode)) { 2003 es->s_last_orphan = 0; 2004 break; 2005 } 2006 2007 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2008 dquot_initialize(inode); 2009 if (inode->i_nlink) { 2010 ext4_msg(sb, KERN_DEBUG, 2011 "%s: truncating inode %lu to %lld bytes", 2012 __func__, inode->i_ino, inode->i_size); 2013 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2014 inode->i_ino, inode->i_size); 2015 ext4_truncate(inode); 2016 nr_truncates++; 2017 } else { 2018 ext4_msg(sb, KERN_DEBUG, 2019 "%s: deleting unreferenced inode %lu", 2020 __func__, inode->i_ino); 2021 jbd_debug(2, "deleting unreferenced inode %lu\n", 2022 inode->i_ino); 2023 nr_orphans++; 2024 } 2025 iput(inode); /* The delete magic happens here! */ 2026 } 2027 2028 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2029 2030 if (nr_orphans) 2031 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2032 PLURAL(nr_orphans)); 2033 if (nr_truncates) 2034 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2035 PLURAL(nr_truncates)); 2036 #ifdef CONFIG_QUOTA 2037 /* Turn quotas off */ 2038 for (i = 0; i < MAXQUOTAS; i++) { 2039 if (sb_dqopt(sb)->files[i]) 2040 vfs_quota_off(sb, i, 0); 2041 } 2042 #endif 2043 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2044 } 2045 2046 /* 2047 * Maximal extent format file size. 2048 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2049 * extent format containers, within a sector_t, and within i_blocks 2050 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2051 * so that won't be a limiting factor. 2052 * 2053 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2054 */ 2055 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2056 { 2057 loff_t res; 2058 loff_t upper_limit = MAX_LFS_FILESIZE; 2059 2060 /* small i_blocks in vfs inode? */ 2061 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2062 /* 2063 * CONFIG_LBDAF is not enabled implies the inode 2064 * i_block represent total blocks in 512 bytes 2065 * 32 == size of vfs inode i_blocks * 8 2066 */ 2067 upper_limit = (1LL << 32) - 1; 2068 2069 /* total blocks in file system block size */ 2070 upper_limit >>= (blkbits - 9); 2071 upper_limit <<= blkbits; 2072 } 2073 2074 /* 32-bit extent-start container, ee_block */ 2075 res = 1LL << 32; 2076 res <<= blkbits; 2077 res -= 1; 2078 2079 /* Sanity check against vm- & vfs- imposed limits */ 2080 if (res > upper_limit) 2081 res = upper_limit; 2082 2083 return res; 2084 } 2085 2086 /* 2087 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2088 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2089 * We need to be 1 filesystem block less than the 2^48 sector limit. 2090 */ 2091 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2092 { 2093 loff_t res = EXT4_NDIR_BLOCKS; 2094 int meta_blocks; 2095 loff_t upper_limit; 2096 /* This is calculated to be the largest file size for a dense, block 2097 * mapped file such that the file's total number of 512-byte sectors, 2098 * including data and all indirect blocks, does not exceed (2^48 - 1). 2099 * 2100 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2101 * number of 512-byte sectors of the file. 2102 */ 2103 2104 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2105 /* 2106 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2107 * the inode i_block field represents total file blocks in 2108 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2109 */ 2110 upper_limit = (1LL << 32) - 1; 2111 2112 /* total blocks in file system block size */ 2113 upper_limit >>= (bits - 9); 2114 2115 } else { 2116 /* 2117 * We use 48 bit ext4_inode i_blocks 2118 * With EXT4_HUGE_FILE_FL set the i_blocks 2119 * represent total number of blocks in 2120 * file system block size 2121 */ 2122 upper_limit = (1LL << 48) - 1; 2123 2124 } 2125 2126 /* indirect blocks */ 2127 meta_blocks = 1; 2128 /* double indirect blocks */ 2129 meta_blocks += 1 + (1LL << (bits-2)); 2130 /* tripple indirect blocks */ 2131 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2132 2133 upper_limit -= meta_blocks; 2134 upper_limit <<= bits; 2135 2136 res += 1LL << (bits-2); 2137 res += 1LL << (2*(bits-2)); 2138 res += 1LL << (3*(bits-2)); 2139 res <<= bits; 2140 if (res > upper_limit) 2141 res = upper_limit; 2142 2143 if (res > MAX_LFS_FILESIZE) 2144 res = MAX_LFS_FILESIZE; 2145 2146 return res; 2147 } 2148 2149 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2150 ext4_fsblk_t logical_sb_block, int nr) 2151 { 2152 struct ext4_sb_info *sbi = EXT4_SB(sb); 2153 ext4_group_t bg, first_meta_bg; 2154 int has_super = 0; 2155 2156 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2157 2158 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2159 nr < first_meta_bg) 2160 return logical_sb_block + nr + 1; 2161 bg = sbi->s_desc_per_block * nr; 2162 if (ext4_bg_has_super(sb, bg)) 2163 has_super = 1; 2164 2165 return (has_super + ext4_group_first_block_no(sb, bg)); 2166 } 2167 2168 /** 2169 * ext4_get_stripe_size: Get the stripe size. 2170 * @sbi: In memory super block info 2171 * 2172 * If we have specified it via mount option, then 2173 * use the mount option value. If the value specified at mount time is 2174 * greater than the blocks per group use the super block value. 2175 * If the super block value is greater than blocks per group return 0. 2176 * Allocator needs it be less than blocks per group. 2177 * 2178 */ 2179 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2180 { 2181 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2182 unsigned long stripe_width = 2183 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2184 2185 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2186 return sbi->s_stripe; 2187 2188 if (stripe_width <= sbi->s_blocks_per_group) 2189 return stripe_width; 2190 2191 if (stride <= sbi->s_blocks_per_group) 2192 return stride; 2193 2194 return 0; 2195 } 2196 2197 /* sysfs supprt */ 2198 2199 struct ext4_attr { 2200 struct attribute attr; 2201 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2202 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2203 const char *, size_t); 2204 int offset; 2205 }; 2206 2207 static int parse_strtoul(const char *buf, 2208 unsigned long max, unsigned long *value) 2209 { 2210 char *endp; 2211 2212 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2213 endp = skip_spaces(endp); 2214 if (*endp || *value > max) 2215 return -EINVAL; 2216 2217 return 0; 2218 } 2219 2220 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2221 struct ext4_sb_info *sbi, 2222 char *buf) 2223 { 2224 return snprintf(buf, PAGE_SIZE, "%llu\n", 2225 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2226 } 2227 2228 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2229 struct ext4_sb_info *sbi, char *buf) 2230 { 2231 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2232 2233 return snprintf(buf, PAGE_SIZE, "%lu\n", 2234 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2235 sbi->s_sectors_written_start) >> 1); 2236 } 2237 2238 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2239 struct ext4_sb_info *sbi, char *buf) 2240 { 2241 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2242 2243 return snprintf(buf, PAGE_SIZE, "%llu\n", 2244 (unsigned long long)(sbi->s_kbytes_written + 2245 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2246 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2247 } 2248 2249 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2250 struct ext4_sb_info *sbi, 2251 const char *buf, size_t count) 2252 { 2253 unsigned long t; 2254 2255 if (parse_strtoul(buf, 0x40000000, &t)) 2256 return -EINVAL; 2257 2258 if (!is_power_of_2(t)) 2259 return -EINVAL; 2260 2261 sbi->s_inode_readahead_blks = t; 2262 return count; 2263 } 2264 2265 static ssize_t sbi_ui_show(struct ext4_attr *a, 2266 struct ext4_sb_info *sbi, char *buf) 2267 { 2268 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2269 2270 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2271 } 2272 2273 static ssize_t sbi_ui_store(struct ext4_attr *a, 2274 struct ext4_sb_info *sbi, 2275 const char *buf, size_t count) 2276 { 2277 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2278 unsigned long t; 2279 2280 if (parse_strtoul(buf, 0xffffffff, &t)) 2281 return -EINVAL; 2282 *ui = t; 2283 return count; 2284 } 2285 2286 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2287 static struct ext4_attr ext4_attr_##_name = { \ 2288 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2289 .show = _show, \ 2290 .store = _store, \ 2291 .offset = offsetof(struct ext4_sb_info, _elname), \ 2292 } 2293 #define EXT4_ATTR(name, mode, show, store) \ 2294 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2295 2296 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2297 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2298 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2299 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2300 #define ATTR_LIST(name) &ext4_attr_##name.attr 2301 2302 EXT4_RO_ATTR(delayed_allocation_blocks); 2303 EXT4_RO_ATTR(session_write_kbytes); 2304 EXT4_RO_ATTR(lifetime_write_kbytes); 2305 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2306 inode_readahead_blks_store, s_inode_readahead_blks); 2307 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2308 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2309 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2310 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2311 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2312 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2313 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2314 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2315 2316 static struct attribute *ext4_attrs[] = { 2317 ATTR_LIST(delayed_allocation_blocks), 2318 ATTR_LIST(session_write_kbytes), 2319 ATTR_LIST(lifetime_write_kbytes), 2320 ATTR_LIST(inode_readahead_blks), 2321 ATTR_LIST(inode_goal), 2322 ATTR_LIST(mb_stats), 2323 ATTR_LIST(mb_max_to_scan), 2324 ATTR_LIST(mb_min_to_scan), 2325 ATTR_LIST(mb_order2_req), 2326 ATTR_LIST(mb_stream_req), 2327 ATTR_LIST(mb_group_prealloc), 2328 ATTR_LIST(max_writeback_mb_bump), 2329 NULL, 2330 }; 2331 2332 static ssize_t ext4_attr_show(struct kobject *kobj, 2333 struct attribute *attr, char *buf) 2334 { 2335 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2336 s_kobj); 2337 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2338 2339 return a->show ? a->show(a, sbi, buf) : 0; 2340 } 2341 2342 static ssize_t ext4_attr_store(struct kobject *kobj, 2343 struct attribute *attr, 2344 const char *buf, size_t len) 2345 { 2346 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2347 s_kobj); 2348 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2349 2350 return a->store ? a->store(a, sbi, buf, len) : 0; 2351 } 2352 2353 static void ext4_sb_release(struct kobject *kobj) 2354 { 2355 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2356 s_kobj); 2357 complete(&sbi->s_kobj_unregister); 2358 } 2359 2360 2361 static struct sysfs_ops ext4_attr_ops = { 2362 .show = ext4_attr_show, 2363 .store = ext4_attr_store, 2364 }; 2365 2366 static struct kobj_type ext4_ktype = { 2367 .default_attrs = ext4_attrs, 2368 .sysfs_ops = &ext4_attr_ops, 2369 .release = ext4_sb_release, 2370 }; 2371 2372 /* 2373 * Check whether this filesystem can be mounted based on 2374 * the features present and the RDONLY/RDWR mount requested. 2375 * Returns 1 if this filesystem can be mounted as requested, 2376 * 0 if it cannot be. 2377 */ 2378 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2379 { 2380 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2381 ext4_msg(sb, KERN_ERR, 2382 "Couldn't mount because of " 2383 "unsupported optional features (%x)", 2384 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2385 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2386 return 0; 2387 } 2388 2389 if (readonly) 2390 return 1; 2391 2392 /* Check that feature set is OK for a read-write mount */ 2393 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2394 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2395 "unsupported optional features (%x)", 2396 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2397 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2398 return 0; 2399 } 2400 /* 2401 * Large file size enabled file system can only be mounted 2402 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2403 */ 2404 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2405 if (sizeof(blkcnt_t) < sizeof(u64)) { 2406 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2407 "cannot be mounted RDWR without " 2408 "CONFIG_LBDAF"); 2409 return 0; 2410 } 2411 } 2412 return 1; 2413 } 2414 2415 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2416 __releases(kernel_lock) 2417 __acquires(kernel_lock) 2418 { 2419 struct buffer_head *bh; 2420 struct ext4_super_block *es = NULL; 2421 struct ext4_sb_info *sbi; 2422 ext4_fsblk_t block; 2423 ext4_fsblk_t sb_block = get_sb_block(&data); 2424 ext4_fsblk_t logical_sb_block; 2425 unsigned long offset = 0; 2426 unsigned long journal_devnum = 0; 2427 unsigned long def_mount_opts; 2428 struct inode *root; 2429 char *cp; 2430 const char *descr; 2431 int ret = -EINVAL; 2432 int blocksize; 2433 unsigned int db_count; 2434 unsigned int i; 2435 int needs_recovery, has_huge_files; 2436 __u64 blocks_count; 2437 int err; 2438 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2439 2440 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2441 if (!sbi) 2442 return -ENOMEM; 2443 2444 sbi->s_blockgroup_lock = 2445 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2446 if (!sbi->s_blockgroup_lock) { 2447 kfree(sbi); 2448 return -ENOMEM; 2449 } 2450 sb->s_fs_info = sbi; 2451 sbi->s_mount_opt = 0; 2452 sbi->s_resuid = EXT4_DEF_RESUID; 2453 sbi->s_resgid = EXT4_DEF_RESGID; 2454 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2455 sbi->s_sb_block = sb_block; 2456 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2457 sectors[1]); 2458 2459 unlock_kernel(); 2460 2461 /* Cleanup superblock name */ 2462 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2463 *cp = '!'; 2464 2465 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2466 if (!blocksize) { 2467 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2468 goto out_fail; 2469 } 2470 2471 /* 2472 * The ext4 superblock will not be buffer aligned for other than 1kB 2473 * block sizes. We need to calculate the offset from buffer start. 2474 */ 2475 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2476 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2477 offset = do_div(logical_sb_block, blocksize); 2478 } else { 2479 logical_sb_block = sb_block; 2480 } 2481 2482 if (!(bh = sb_bread(sb, logical_sb_block))) { 2483 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 2484 goto out_fail; 2485 } 2486 /* 2487 * Note: s_es must be initialized as soon as possible because 2488 * some ext4 macro-instructions depend on its value 2489 */ 2490 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2491 sbi->s_es = es; 2492 sb->s_magic = le16_to_cpu(es->s_magic); 2493 if (sb->s_magic != EXT4_SUPER_MAGIC) 2494 goto cantfind_ext4; 2495 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 2496 2497 /* Set defaults before we parse the mount options */ 2498 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2499 if (def_mount_opts & EXT4_DEFM_DEBUG) 2500 set_opt(sbi->s_mount_opt, DEBUG); 2501 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 2502 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 2503 "2.6.38"); 2504 set_opt(sbi->s_mount_opt, GRPID); 2505 } 2506 if (def_mount_opts & EXT4_DEFM_UID16) 2507 set_opt(sbi->s_mount_opt, NO_UID32); 2508 #ifdef CONFIG_EXT4_FS_XATTR 2509 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2510 set_opt(sbi->s_mount_opt, XATTR_USER); 2511 #endif 2512 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2513 if (def_mount_opts & EXT4_DEFM_ACL) 2514 set_opt(sbi->s_mount_opt, POSIX_ACL); 2515 #endif 2516 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2517 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2518 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2519 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2520 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2521 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2522 2523 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2524 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2525 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2526 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2527 else 2528 set_opt(sbi->s_mount_opt, ERRORS_RO); 2529 2530 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2531 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2532 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2533 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2534 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2535 2536 set_opt(sbi->s_mount_opt, BARRIER); 2537 2538 /* 2539 * enable delayed allocation by default 2540 * Use -o nodelalloc to turn it off 2541 */ 2542 set_opt(sbi->s_mount_opt, DELALLOC); 2543 2544 if (!parse_options((char *) data, sb, &journal_devnum, 2545 &journal_ioprio, NULL, 0)) 2546 goto failed_mount; 2547 2548 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2549 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2550 2551 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2552 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2553 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2554 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2555 ext4_msg(sb, KERN_WARNING, 2556 "feature flags set on rev 0 fs, " 2557 "running e2fsck is recommended"); 2558 2559 /* 2560 * Check feature flags regardless of the revision level, since we 2561 * previously didn't change the revision level when setting the flags, 2562 * so there is a chance incompat flags are set on a rev 0 filesystem. 2563 */ 2564 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 2565 goto failed_mount; 2566 2567 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2568 2569 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2570 blocksize > EXT4_MAX_BLOCK_SIZE) { 2571 ext4_msg(sb, KERN_ERR, 2572 "Unsupported filesystem blocksize %d", blocksize); 2573 goto failed_mount; 2574 } 2575 2576 if (sb->s_blocksize != blocksize) { 2577 /* Validate the filesystem blocksize */ 2578 if (!sb_set_blocksize(sb, blocksize)) { 2579 ext4_msg(sb, KERN_ERR, "bad block size %d", 2580 blocksize); 2581 goto failed_mount; 2582 } 2583 2584 brelse(bh); 2585 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2586 offset = do_div(logical_sb_block, blocksize); 2587 bh = sb_bread(sb, logical_sb_block); 2588 if (!bh) { 2589 ext4_msg(sb, KERN_ERR, 2590 "Can't read superblock on 2nd try"); 2591 goto failed_mount; 2592 } 2593 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2594 sbi->s_es = es; 2595 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2596 ext4_msg(sb, KERN_ERR, 2597 "Magic mismatch, very weird!"); 2598 goto failed_mount; 2599 } 2600 } 2601 2602 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2603 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2604 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2605 has_huge_files); 2606 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2607 2608 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2609 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2610 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2611 } else { 2612 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2613 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2614 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2615 (!is_power_of_2(sbi->s_inode_size)) || 2616 (sbi->s_inode_size > blocksize)) { 2617 ext4_msg(sb, KERN_ERR, 2618 "unsupported inode size: %d", 2619 sbi->s_inode_size); 2620 goto failed_mount; 2621 } 2622 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2623 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2624 } 2625 2626 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2627 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2628 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2629 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2630 !is_power_of_2(sbi->s_desc_size)) { 2631 ext4_msg(sb, KERN_ERR, 2632 "unsupported descriptor size %lu", 2633 sbi->s_desc_size); 2634 goto failed_mount; 2635 } 2636 } else 2637 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2638 2639 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2640 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2641 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2642 goto cantfind_ext4; 2643 2644 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2645 if (sbi->s_inodes_per_block == 0) 2646 goto cantfind_ext4; 2647 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2648 sbi->s_inodes_per_block; 2649 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2650 sbi->s_sbh = bh; 2651 sbi->s_mount_state = le16_to_cpu(es->s_state); 2652 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2653 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2654 2655 for (i = 0; i < 4; i++) 2656 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2657 sbi->s_def_hash_version = es->s_def_hash_version; 2658 i = le32_to_cpu(es->s_flags); 2659 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2660 sbi->s_hash_unsigned = 3; 2661 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2662 #ifdef __CHAR_UNSIGNED__ 2663 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2664 sbi->s_hash_unsigned = 3; 2665 #else 2666 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2667 #endif 2668 sb->s_dirt = 1; 2669 } 2670 2671 if (sbi->s_blocks_per_group > blocksize * 8) { 2672 ext4_msg(sb, KERN_ERR, 2673 "#blocks per group too big: %lu", 2674 sbi->s_blocks_per_group); 2675 goto failed_mount; 2676 } 2677 if (sbi->s_inodes_per_group > blocksize * 8) { 2678 ext4_msg(sb, KERN_ERR, 2679 "#inodes per group too big: %lu", 2680 sbi->s_inodes_per_group); 2681 goto failed_mount; 2682 } 2683 2684 /* 2685 * Test whether we have more sectors than will fit in sector_t, 2686 * and whether the max offset is addressable by the page cache. 2687 */ 2688 if ((ext4_blocks_count(es) > 2689 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || 2690 (ext4_blocks_count(es) > 2691 (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { 2692 ext4_msg(sb, KERN_ERR, "filesystem" 2693 " too large to mount safely on this system"); 2694 if (sizeof(sector_t) < 8) 2695 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 2696 ret = -EFBIG; 2697 goto failed_mount; 2698 } 2699 2700 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2701 goto cantfind_ext4; 2702 2703 /* check blocks count against device size */ 2704 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2705 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2706 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 2707 "exceeds size of device (%llu blocks)", 2708 ext4_blocks_count(es), blocks_count); 2709 goto failed_mount; 2710 } 2711 2712 /* 2713 * It makes no sense for the first data block to be beyond the end 2714 * of the filesystem. 2715 */ 2716 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2717 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 2718 "block %u is beyond end of filesystem (%llu)", 2719 le32_to_cpu(es->s_first_data_block), 2720 ext4_blocks_count(es)); 2721 goto failed_mount; 2722 } 2723 blocks_count = (ext4_blocks_count(es) - 2724 le32_to_cpu(es->s_first_data_block) + 2725 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2726 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2727 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2728 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 2729 "(block count %llu, first data block %u, " 2730 "blocks per group %lu)", sbi->s_groups_count, 2731 ext4_blocks_count(es), 2732 le32_to_cpu(es->s_first_data_block), 2733 EXT4_BLOCKS_PER_GROUP(sb)); 2734 goto failed_mount; 2735 } 2736 sbi->s_groups_count = blocks_count; 2737 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 2738 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 2739 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2740 EXT4_DESC_PER_BLOCK(sb); 2741 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2742 GFP_KERNEL); 2743 if (sbi->s_group_desc == NULL) { 2744 ext4_msg(sb, KERN_ERR, "not enough memory"); 2745 goto failed_mount; 2746 } 2747 2748 #ifdef CONFIG_PROC_FS 2749 if (ext4_proc_root) 2750 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2751 #endif 2752 2753 bgl_lock_init(sbi->s_blockgroup_lock); 2754 2755 for (i = 0; i < db_count; i++) { 2756 block = descriptor_loc(sb, logical_sb_block, i); 2757 sbi->s_group_desc[i] = sb_bread(sb, block); 2758 if (!sbi->s_group_desc[i]) { 2759 ext4_msg(sb, KERN_ERR, 2760 "can't read group descriptor %d", i); 2761 db_count = i; 2762 goto failed_mount2; 2763 } 2764 } 2765 if (!ext4_check_descriptors(sb)) { 2766 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 2767 goto failed_mount2; 2768 } 2769 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2770 if (!ext4_fill_flex_info(sb)) { 2771 ext4_msg(sb, KERN_ERR, 2772 "unable to initialize " 2773 "flex_bg meta info!"); 2774 goto failed_mount2; 2775 } 2776 2777 sbi->s_gdb_count = db_count; 2778 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2779 spin_lock_init(&sbi->s_next_gen_lock); 2780 2781 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2782 ext4_count_free_blocks(sb)); 2783 if (!err) { 2784 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2785 ext4_count_free_inodes(sb)); 2786 } 2787 if (!err) { 2788 err = percpu_counter_init(&sbi->s_dirs_counter, 2789 ext4_count_dirs(sb)); 2790 } 2791 if (!err) { 2792 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2793 } 2794 if (err) { 2795 ext4_msg(sb, KERN_ERR, "insufficient memory"); 2796 goto failed_mount3; 2797 } 2798 2799 sbi->s_stripe = ext4_get_stripe_size(sbi); 2800 sbi->s_max_writeback_mb_bump = 128; 2801 2802 /* 2803 * set up enough so that it can read an inode 2804 */ 2805 if (!test_opt(sb, NOLOAD) && 2806 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 2807 sb->s_op = &ext4_sops; 2808 else 2809 sb->s_op = &ext4_nojournal_sops; 2810 sb->s_export_op = &ext4_export_ops; 2811 sb->s_xattr = ext4_xattr_handlers; 2812 #ifdef CONFIG_QUOTA 2813 sb->s_qcop = &ext4_qctl_operations; 2814 sb->dq_op = &ext4_quota_operations; 2815 #endif 2816 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2817 mutex_init(&sbi->s_orphan_lock); 2818 mutex_init(&sbi->s_resize_lock); 2819 2820 sb->s_root = NULL; 2821 2822 needs_recovery = (es->s_last_orphan != 0 || 2823 EXT4_HAS_INCOMPAT_FEATURE(sb, 2824 EXT4_FEATURE_INCOMPAT_RECOVER)); 2825 2826 /* 2827 * The first inode we look at is the journal inode. Don't try 2828 * root first: it may be modified in the journal! 2829 */ 2830 if (!test_opt(sb, NOLOAD) && 2831 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2832 if (ext4_load_journal(sb, es, journal_devnum)) 2833 goto failed_mount3; 2834 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2835 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2836 ext4_msg(sb, KERN_ERR, "required journal recovery " 2837 "suppressed and not mounted read-only"); 2838 goto failed_mount_wq; 2839 } else { 2840 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2841 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2842 sbi->s_journal = NULL; 2843 needs_recovery = 0; 2844 goto no_journal; 2845 } 2846 2847 if (ext4_blocks_count(es) > 0xffffffffULL && 2848 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2849 JBD2_FEATURE_INCOMPAT_64BIT)) { 2850 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2851 goto failed_mount_wq; 2852 } 2853 2854 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2855 jbd2_journal_set_features(sbi->s_journal, 2856 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2857 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2858 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2859 jbd2_journal_set_features(sbi->s_journal, 2860 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2861 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2862 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2863 } else { 2864 jbd2_journal_clear_features(sbi->s_journal, 2865 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2866 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2867 } 2868 2869 /* We have now updated the journal if required, so we can 2870 * validate the data journaling mode. */ 2871 switch (test_opt(sb, DATA_FLAGS)) { 2872 case 0: 2873 /* No mode set, assume a default based on the journal 2874 * capabilities: ORDERED_DATA if the journal can 2875 * cope, else JOURNAL_DATA 2876 */ 2877 if (jbd2_journal_check_available_features 2878 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2879 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2880 else 2881 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2882 break; 2883 2884 case EXT4_MOUNT_ORDERED_DATA: 2885 case EXT4_MOUNT_WRITEBACK_DATA: 2886 if (!jbd2_journal_check_available_features 2887 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2888 ext4_msg(sb, KERN_ERR, "Journal does not support " 2889 "requested data journaling mode"); 2890 goto failed_mount_wq; 2891 } 2892 default: 2893 break; 2894 } 2895 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2896 2897 no_journal: 2898 if (test_opt(sb, NOBH)) { 2899 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2900 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2901 "its supported only with writeback mode"); 2902 clear_opt(sbi->s_mount_opt, NOBH); 2903 } 2904 if (test_opt(sb, DIOREAD_NOLOCK)) { 2905 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " 2906 "not supported with nobh mode"); 2907 goto failed_mount_wq; 2908 } 2909 } 2910 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2911 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2912 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 2913 goto failed_mount_wq; 2914 } 2915 2916 /* 2917 * The jbd2_journal_load will have done any necessary log recovery, 2918 * so we can safely mount the rest of the filesystem now. 2919 */ 2920 2921 root = ext4_iget(sb, EXT4_ROOT_INO); 2922 if (IS_ERR(root)) { 2923 ext4_msg(sb, KERN_ERR, "get root inode failed"); 2924 ret = PTR_ERR(root); 2925 goto failed_mount4; 2926 } 2927 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2928 iput(root); 2929 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 2930 goto failed_mount4; 2931 } 2932 sb->s_root = d_alloc_root(root); 2933 if (!sb->s_root) { 2934 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 2935 iput(root); 2936 ret = -ENOMEM; 2937 goto failed_mount4; 2938 } 2939 2940 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2941 2942 /* determine the minimum size of new large inodes, if present */ 2943 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2944 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2945 EXT4_GOOD_OLD_INODE_SIZE; 2946 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2947 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2948 if (sbi->s_want_extra_isize < 2949 le16_to_cpu(es->s_want_extra_isize)) 2950 sbi->s_want_extra_isize = 2951 le16_to_cpu(es->s_want_extra_isize); 2952 if (sbi->s_want_extra_isize < 2953 le16_to_cpu(es->s_min_extra_isize)) 2954 sbi->s_want_extra_isize = 2955 le16_to_cpu(es->s_min_extra_isize); 2956 } 2957 } 2958 /* Check if enough inode space is available */ 2959 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2960 sbi->s_inode_size) { 2961 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2962 EXT4_GOOD_OLD_INODE_SIZE; 2963 ext4_msg(sb, KERN_INFO, "required extra inode space not" 2964 "available"); 2965 } 2966 2967 if (test_opt(sb, DELALLOC) && 2968 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 2969 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 2970 "requested data journaling mode"); 2971 clear_opt(sbi->s_mount_opt, DELALLOC); 2972 } 2973 if (test_opt(sb, DIOREAD_NOLOCK)) { 2974 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2975 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2976 "option - requested data journaling mode"); 2977 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 2978 } 2979 if (sb->s_blocksize < PAGE_SIZE) { 2980 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2981 "option - block size is too small"); 2982 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 2983 } 2984 } 2985 2986 err = ext4_setup_system_zone(sb); 2987 if (err) { 2988 ext4_msg(sb, KERN_ERR, "failed to initialize system " 2989 "zone (%d)\n", err); 2990 goto failed_mount4; 2991 } 2992 2993 ext4_ext_init(sb); 2994 err = ext4_mb_init(sb, needs_recovery); 2995 if (err) { 2996 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 2997 err); 2998 goto failed_mount4; 2999 } 3000 3001 sbi->s_kobj.kset = ext4_kset; 3002 init_completion(&sbi->s_kobj_unregister); 3003 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3004 "%s", sb->s_id); 3005 if (err) { 3006 ext4_mb_release(sb); 3007 ext4_ext_release(sb); 3008 goto failed_mount4; 3009 }; 3010 3011 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3012 ext4_orphan_cleanup(sb, es); 3013 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 3014 if (needs_recovery) { 3015 ext4_msg(sb, KERN_INFO, "recovery complete"); 3016 ext4_mark_recovery_complete(sb, es); 3017 } 3018 if (EXT4_SB(sb)->s_journal) { 3019 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 3020 descr = " journalled data mode"; 3021 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 3022 descr = " ordered data mode"; 3023 else 3024 descr = " writeback data mode"; 3025 } else 3026 descr = "out journal"; 3027 3028 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); 3029 3030 lock_kernel(); 3031 return 0; 3032 3033 cantfind_ext4: 3034 if (!silent) 3035 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3036 goto failed_mount; 3037 3038 failed_mount4: 3039 ext4_msg(sb, KERN_ERR, "mount failed"); 3040 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3041 failed_mount_wq: 3042 ext4_release_system_zone(sb); 3043 if (sbi->s_journal) { 3044 jbd2_journal_destroy(sbi->s_journal); 3045 sbi->s_journal = NULL; 3046 } 3047 failed_mount3: 3048 if (sbi->s_flex_groups) { 3049 if (is_vmalloc_addr(sbi->s_flex_groups)) 3050 vfree(sbi->s_flex_groups); 3051 else 3052 kfree(sbi->s_flex_groups); 3053 } 3054 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3055 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3056 percpu_counter_destroy(&sbi->s_dirs_counter); 3057 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3058 failed_mount2: 3059 for (i = 0; i < db_count; i++) 3060 brelse(sbi->s_group_desc[i]); 3061 kfree(sbi->s_group_desc); 3062 failed_mount: 3063 if (sbi->s_proc) { 3064 remove_proc_entry(sb->s_id, ext4_proc_root); 3065 } 3066 #ifdef CONFIG_QUOTA 3067 for (i = 0; i < MAXQUOTAS; i++) 3068 kfree(sbi->s_qf_names[i]); 3069 #endif 3070 ext4_blkdev_remove(sbi); 3071 brelse(bh); 3072 out_fail: 3073 sb->s_fs_info = NULL; 3074 kfree(sbi->s_blockgroup_lock); 3075 kfree(sbi); 3076 lock_kernel(); 3077 return ret; 3078 } 3079 3080 /* 3081 * Setup any per-fs journal parameters now. We'll do this both on 3082 * initial mount, once the journal has been initialised but before we've 3083 * done any recovery; and again on any subsequent remount. 3084 */ 3085 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3086 { 3087 struct ext4_sb_info *sbi = EXT4_SB(sb); 3088 3089 journal->j_commit_interval = sbi->s_commit_interval; 3090 journal->j_min_batch_time = sbi->s_min_batch_time; 3091 journal->j_max_batch_time = sbi->s_max_batch_time; 3092 3093 spin_lock(&journal->j_state_lock); 3094 if (test_opt(sb, BARRIER)) 3095 journal->j_flags |= JBD2_BARRIER; 3096 else 3097 journal->j_flags &= ~JBD2_BARRIER; 3098 if (test_opt(sb, DATA_ERR_ABORT)) 3099 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3100 else 3101 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3102 spin_unlock(&journal->j_state_lock); 3103 } 3104 3105 static journal_t *ext4_get_journal(struct super_block *sb, 3106 unsigned int journal_inum) 3107 { 3108 struct inode *journal_inode; 3109 journal_t *journal; 3110 3111 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3112 3113 /* First, test for the existence of a valid inode on disk. Bad 3114 * things happen if we iget() an unused inode, as the subsequent 3115 * iput() will try to delete it. */ 3116 3117 journal_inode = ext4_iget(sb, journal_inum); 3118 if (IS_ERR(journal_inode)) { 3119 ext4_msg(sb, KERN_ERR, "no journal found"); 3120 return NULL; 3121 } 3122 if (!journal_inode->i_nlink) { 3123 make_bad_inode(journal_inode); 3124 iput(journal_inode); 3125 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3126 return NULL; 3127 } 3128 3129 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3130 journal_inode, journal_inode->i_size); 3131 if (!S_ISREG(journal_inode->i_mode)) { 3132 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3133 iput(journal_inode); 3134 return NULL; 3135 } 3136 3137 journal = jbd2_journal_init_inode(journal_inode); 3138 if (!journal) { 3139 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3140 iput(journal_inode); 3141 return NULL; 3142 } 3143 journal->j_private = sb; 3144 ext4_init_journal_params(sb, journal); 3145 return journal; 3146 } 3147 3148 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3149 dev_t j_dev) 3150 { 3151 struct buffer_head *bh; 3152 journal_t *journal; 3153 ext4_fsblk_t start; 3154 ext4_fsblk_t len; 3155 int hblock, blocksize; 3156 ext4_fsblk_t sb_block; 3157 unsigned long offset; 3158 struct ext4_super_block *es; 3159 struct block_device *bdev; 3160 3161 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3162 3163 bdev = ext4_blkdev_get(j_dev, sb); 3164 if (bdev == NULL) 3165 return NULL; 3166 3167 if (bd_claim(bdev, sb)) { 3168 ext4_msg(sb, KERN_ERR, 3169 "failed to claim external journal device"); 3170 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3171 return NULL; 3172 } 3173 3174 blocksize = sb->s_blocksize; 3175 hblock = bdev_logical_block_size(bdev); 3176 if (blocksize < hblock) { 3177 ext4_msg(sb, KERN_ERR, 3178 "blocksize too small for journal device"); 3179 goto out_bdev; 3180 } 3181 3182 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3183 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3184 set_blocksize(bdev, blocksize); 3185 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3186 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3187 "external journal"); 3188 goto out_bdev; 3189 } 3190 3191 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3192 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3193 !(le32_to_cpu(es->s_feature_incompat) & 3194 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3195 ext4_msg(sb, KERN_ERR, "external journal has " 3196 "bad superblock"); 3197 brelse(bh); 3198 goto out_bdev; 3199 } 3200 3201 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3202 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3203 brelse(bh); 3204 goto out_bdev; 3205 } 3206 3207 len = ext4_blocks_count(es); 3208 start = sb_block + 1; 3209 brelse(bh); /* we're done with the superblock */ 3210 3211 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3212 start, len, blocksize); 3213 if (!journal) { 3214 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3215 goto out_bdev; 3216 } 3217 journal->j_private = sb; 3218 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3219 wait_on_buffer(journal->j_sb_buffer); 3220 if (!buffer_uptodate(journal->j_sb_buffer)) { 3221 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3222 goto out_journal; 3223 } 3224 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3225 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3226 "user (unsupported) - %d", 3227 be32_to_cpu(journal->j_superblock->s_nr_users)); 3228 goto out_journal; 3229 } 3230 EXT4_SB(sb)->journal_bdev = bdev; 3231 ext4_init_journal_params(sb, journal); 3232 return journal; 3233 3234 out_journal: 3235 jbd2_journal_destroy(journal); 3236 out_bdev: 3237 ext4_blkdev_put(bdev); 3238 return NULL; 3239 } 3240 3241 static int ext4_load_journal(struct super_block *sb, 3242 struct ext4_super_block *es, 3243 unsigned long journal_devnum) 3244 { 3245 journal_t *journal; 3246 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3247 dev_t journal_dev; 3248 int err = 0; 3249 int really_read_only; 3250 3251 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3252 3253 if (journal_devnum && 3254 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3255 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3256 "numbers have changed"); 3257 journal_dev = new_decode_dev(journal_devnum); 3258 } else 3259 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3260 3261 really_read_only = bdev_read_only(sb->s_bdev); 3262 3263 /* 3264 * Are we loading a blank journal or performing recovery after a 3265 * crash? For recovery, we need to check in advance whether we 3266 * can get read-write access to the device. 3267 */ 3268 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3269 if (sb->s_flags & MS_RDONLY) { 3270 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3271 "required on readonly filesystem"); 3272 if (really_read_only) { 3273 ext4_msg(sb, KERN_ERR, "write access " 3274 "unavailable, cannot proceed"); 3275 return -EROFS; 3276 } 3277 ext4_msg(sb, KERN_INFO, "write access will " 3278 "be enabled during recovery"); 3279 } 3280 } 3281 3282 if (journal_inum && journal_dev) { 3283 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3284 "and inode journals!"); 3285 return -EINVAL; 3286 } 3287 3288 if (journal_inum) { 3289 if (!(journal = ext4_get_journal(sb, journal_inum))) 3290 return -EINVAL; 3291 } else { 3292 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3293 return -EINVAL; 3294 } 3295 3296 if (!(journal->j_flags & JBD2_BARRIER)) 3297 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3298 3299 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3300 err = jbd2_journal_update_format(journal); 3301 if (err) { 3302 ext4_msg(sb, KERN_ERR, "error updating journal"); 3303 jbd2_journal_destroy(journal); 3304 return err; 3305 } 3306 } 3307 3308 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3309 err = jbd2_journal_wipe(journal, !really_read_only); 3310 if (!err) 3311 err = jbd2_journal_load(journal); 3312 3313 if (err) { 3314 ext4_msg(sb, KERN_ERR, "error loading journal"); 3315 jbd2_journal_destroy(journal); 3316 return err; 3317 } 3318 3319 EXT4_SB(sb)->s_journal = journal; 3320 ext4_clear_journal_err(sb, es); 3321 3322 if (journal_devnum && 3323 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3324 es->s_journal_dev = cpu_to_le32(journal_devnum); 3325 3326 /* Make sure we flush the recovery flag to disk. */ 3327 ext4_commit_super(sb, 1); 3328 } 3329 3330 return 0; 3331 } 3332 3333 static int ext4_commit_super(struct super_block *sb, int sync) 3334 { 3335 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3336 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3337 int error = 0; 3338 3339 if (!sbh) 3340 return error; 3341 if (buffer_write_io_error(sbh)) { 3342 /* 3343 * Oh, dear. A previous attempt to write the 3344 * superblock failed. This could happen because the 3345 * USB device was yanked out. Or it could happen to 3346 * be a transient write error and maybe the block will 3347 * be remapped. Nothing we can do but to retry the 3348 * write and hope for the best. 3349 */ 3350 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3351 "superblock detected"); 3352 clear_buffer_write_io_error(sbh); 3353 set_buffer_uptodate(sbh); 3354 } 3355 /* 3356 * If the file system is mounted read-only, don't update the 3357 * superblock write time. This avoids updating the superblock 3358 * write time when we are mounting the root file system 3359 * read/only but we need to replay the journal; at that point, 3360 * for people who are east of GMT and who make their clock 3361 * tick in localtime for Windows bug-for-bug compatibility, 3362 * the clock is set in the future, and this will cause e2fsck 3363 * to complain and force a full file system check. 3364 */ 3365 if (!(sb->s_flags & MS_RDONLY)) 3366 es->s_wtime = cpu_to_le32(get_seconds()); 3367 es->s_kbytes_written = 3368 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3369 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3370 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3371 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3372 &EXT4_SB(sb)->s_freeblocks_counter)); 3373 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3374 &EXT4_SB(sb)->s_freeinodes_counter)); 3375 sb->s_dirt = 0; 3376 BUFFER_TRACE(sbh, "marking dirty"); 3377 mark_buffer_dirty(sbh); 3378 if (sync) { 3379 error = sync_dirty_buffer(sbh); 3380 if (error) 3381 return error; 3382 3383 error = buffer_write_io_error(sbh); 3384 if (error) { 3385 ext4_msg(sb, KERN_ERR, "I/O error while writing " 3386 "superblock"); 3387 clear_buffer_write_io_error(sbh); 3388 set_buffer_uptodate(sbh); 3389 } 3390 } 3391 return error; 3392 } 3393 3394 /* 3395 * Have we just finished recovery? If so, and if we are mounting (or 3396 * remounting) the filesystem readonly, then we will end up with a 3397 * consistent fs on disk. Record that fact. 3398 */ 3399 static void ext4_mark_recovery_complete(struct super_block *sb, 3400 struct ext4_super_block *es) 3401 { 3402 journal_t *journal = EXT4_SB(sb)->s_journal; 3403 3404 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3405 BUG_ON(journal != NULL); 3406 return; 3407 } 3408 jbd2_journal_lock_updates(journal); 3409 if (jbd2_journal_flush(journal) < 0) 3410 goto out; 3411 3412 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3413 sb->s_flags & MS_RDONLY) { 3414 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3415 ext4_commit_super(sb, 1); 3416 } 3417 3418 out: 3419 jbd2_journal_unlock_updates(journal); 3420 } 3421 3422 /* 3423 * If we are mounting (or read-write remounting) a filesystem whose journal 3424 * has recorded an error from a previous lifetime, move that error to the 3425 * main filesystem now. 3426 */ 3427 static void ext4_clear_journal_err(struct super_block *sb, 3428 struct ext4_super_block *es) 3429 { 3430 journal_t *journal; 3431 int j_errno; 3432 const char *errstr; 3433 3434 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3435 3436 journal = EXT4_SB(sb)->s_journal; 3437 3438 /* 3439 * Now check for any error status which may have been recorded in the 3440 * journal by a prior ext4_error() or ext4_abort() 3441 */ 3442 3443 j_errno = jbd2_journal_errno(journal); 3444 if (j_errno) { 3445 char nbuf[16]; 3446 3447 errstr = ext4_decode_error(sb, j_errno, nbuf); 3448 ext4_warning(sb, "Filesystem error recorded " 3449 "from previous mount: %s", errstr); 3450 ext4_warning(sb, "Marking fs in need of filesystem check."); 3451 3452 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3453 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3454 ext4_commit_super(sb, 1); 3455 3456 jbd2_journal_clear_err(journal); 3457 } 3458 } 3459 3460 /* 3461 * Force the running and committing transactions to commit, 3462 * and wait on the commit. 3463 */ 3464 int ext4_force_commit(struct super_block *sb) 3465 { 3466 journal_t *journal; 3467 int ret = 0; 3468 3469 if (sb->s_flags & MS_RDONLY) 3470 return 0; 3471 3472 journal = EXT4_SB(sb)->s_journal; 3473 if (journal) 3474 ret = ext4_journal_force_commit(journal); 3475 3476 return ret; 3477 } 3478 3479 static void ext4_write_super(struct super_block *sb) 3480 { 3481 lock_super(sb); 3482 ext4_commit_super(sb, 1); 3483 unlock_super(sb); 3484 } 3485 3486 static int ext4_sync_fs(struct super_block *sb, int wait) 3487 { 3488 int ret = 0; 3489 tid_t target; 3490 struct ext4_sb_info *sbi = EXT4_SB(sb); 3491 3492 trace_ext4_sync_fs(sb, wait); 3493 flush_workqueue(sbi->dio_unwritten_wq); 3494 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 3495 if (wait) 3496 jbd2_log_wait_commit(sbi->s_journal, target); 3497 } 3498 return ret; 3499 } 3500 3501 /* 3502 * LVM calls this function before a (read-only) snapshot is created. This 3503 * gives us a chance to flush the journal completely and mark the fs clean. 3504 */ 3505 static int ext4_freeze(struct super_block *sb) 3506 { 3507 int error = 0; 3508 journal_t *journal; 3509 3510 if (sb->s_flags & MS_RDONLY) 3511 return 0; 3512 3513 journal = EXT4_SB(sb)->s_journal; 3514 3515 /* Now we set up the journal barrier. */ 3516 jbd2_journal_lock_updates(journal); 3517 3518 /* 3519 * Don't clear the needs_recovery flag if we failed to flush 3520 * the journal. 3521 */ 3522 error = jbd2_journal_flush(journal); 3523 if (error < 0) { 3524 out: 3525 jbd2_journal_unlock_updates(journal); 3526 return error; 3527 } 3528 3529 /* Journal blocked and flushed, clear needs_recovery flag. */ 3530 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3531 error = ext4_commit_super(sb, 1); 3532 if (error) 3533 goto out; 3534 return 0; 3535 } 3536 3537 /* 3538 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3539 * flag here, even though the filesystem is not technically dirty yet. 3540 */ 3541 static int ext4_unfreeze(struct super_block *sb) 3542 { 3543 if (sb->s_flags & MS_RDONLY) 3544 return 0; 3545 3546 lock_super(sb); 3547 /* Reset the needs_recovery flag before the fs is unlocked. */ 3548 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3549 ext4_commit_super(sb, 1); 3550 unlock_super(sb); 3551 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3552 return 0; 3553 } 3554 3555 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3556 { 3557 struct ext4_super_block *es; 3558 struct ext4_sb_info *sbi = EXT4_SB(sb); 3559 ext4_fsblk_t n_blocks_count = 0; 3560 unsigned long old_sb_flags; 3561 struct ext4_mount_options old_opts; 3562 ext4_group_t g; 3563 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3564 int err; 3565 #ifdef CONFIG_QUOTA 3566 int i; 3567 #endif 3568 3569 lock_kernel(); 3570 3571 /* Store the original options */ 3572 lock_super(sb); 3573 old_sb_flags = sb->s_flags; 3574 old_opts.s_mount_opt = sbi->s_mount_opt; 3575 old_opts.s_resuid = sbi->s_resuid; 3576 old_opts.s_resgid = sbi->s_resgid; 3577 old_opts.s_commit_interval = sbi->s_commit_interval; 3578 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3579 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3580 #ifdef CONFIG_QUOTA 3581 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3582 for (i = 0; i < MAXQUOTAS; i++) 3583 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3584 #endif 3585 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3586 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3587 3588 /* 3589 * Allow the "check" option to be passed as a remount option. 3590 */ 3591 if (!parse_options(data, sb, NULL, &journal_ioprio, 3592 &n_blocks_count, 1)) { 3593 err = -EINVAL; 3594 goto restore_opts; 3595 } 3596 3597 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 3598 ext4_abort(sb, __func__, "Abort forced by user"); 3599 3600 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3601 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3602 3603 es = sbi->s_es; 3604 3605 if (sbi->s_journal) { 3606 ext4_init_journal_params(sb, sbi->s_journal); 3607 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3608 } 3609 3610 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3611 n_blocks_count > ext4_blocks_count(es)) { 3612 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 3613 err = -EROFS; 3614 goto restore_opts; 3615 } 3616 3617 if (*flags & MS_RDONLY) { 3618 /* 3619 * First of all, the unconditional stuff we have to do 3620 * to disable replay of the journal when we next remount 3621 */ 3622 sb->s_flags |= MS_RDONLY; 3623 3624 /* 3625 * OK, test if we are remounting a valid rw partition 3626 * readonly, and if so set the rdonly flag and then 3627 * mark the partition as valid again. 3628 */ 3629 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3630 (sbi->s_mount_state & EXT4_VALID_FS)) 3631 es->s_state = cpu_to_le16(sbi->s_mount_state); 3632 3633 if (sbi->s_journal) 3634 ext4_mark_recovery_complete(sb, es); 3635 } else { 3636 /* Make sure we can mount this feature set readwrite */ 3637 if (!ext4_feature_set_ok(sb, 0)) { 3638 err = -EROFS; 3639 goto restore_opts; 3640 } 3641 /* 3642 * Make sure the group descriptor checksums 3643 * are sane. If they aren't, refuse to remount r/w. 3644 */ 3645 for (g = 0; g < sbi->s_groups_count; g++) { 3646 struct ext4_group_desc *gdp = 3647 ext4_get_group_desc(sb, g, NULL); 3648 3649 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3650 ext4_msg(sb, KERN_ERR, 3651 "ext4_remount: Checksum for group %u failed (%u!=%u)", 3652 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3653 le16_to_cpu(gdp->bg_checksum)); 3654 err = -EINVAL; 3655 goto restore_opts; 3656 } 3657 } 3658 3659 /* 3660 * If we have an unprocessed orphan list hanging 3661 * around from a previously readonly bdev mount, 3662 * require a full umount/remount for now. 3663 */ 3664 if (es->s_last_orphan) { 3665 ext4_msg(sb, KERN_WARNING, "Couldn't " 3666 "remount RDWR because of unprocessed " 3667 "orphan inode list. Please " 3668 "umount/remount instead"); 3669 err = -EINVAL; 3670 goto restore_opts; 3671 } 3672 3673 /* 3674 * Mounting a RDONLY partition read-write, so reread 3675 * and store the current valid flag. (It may have 3676 * been changed by e2fsck since we originally mounted 3677 * the partition.) 3678 */ 3679 if (sbi->s_journal) 3680 ext4_clear_journal_err(sb, es); 3681 sbi->s_mount_state = le16_to_cpu(es->s_state); 3682 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3683 goto restore_opts; 3684 if (!ext4_setup_super(sb, es, 0)) 3685 sb->s_flags &= ~MS_RDONLY; 3686 } 3687 } 3688 ext4_setup_system_zone(sb); 3689 if (sbi->s_journal == NULL) 3690 ext4_commit_super(sb, 1); 3691 3692 #ifdef CONFIG_QUOTA 3693 /* Release old quota file names */ 3694 for (i = 0; i < MAXQUOTAS; i++) 3695 if (old_opts.s_qf_names[i] && 3696 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3697 kfree(old_opts.s_qf_names[i]); 3698 #endif 3699 unlock_super(sb); 3700 unlock_kernel(); 3701 return 0; 3702 3703 restore_opts: 3704 sb->s_flags = old_sb_flags; 3705 sbi->s_mount_opt = old_opts.s_mount_opt; 3706 sbi->s_resuid = old_opts.s_resuid; 3707 sbi->s_resgid = old_opts.s_resgid; 3708 sbi->s_commit_interval = old_opts.s_commit_interval; 3709 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3710 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3711 #ifdef CONFIG_QUOTA 3712 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3713 for (i = 0; i < MAXQUOTAS; i++) { 3714 if (sbi->s_qf_names[i] && 3715 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3716 kfree(sbi->s_qf_names[i]); 3717 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3718 } 3719 #endif 3720 unlock_super(sb); 3721 unlock_kernel(); 3722 return err; 3723 } 3724 3725 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3726 { 3727 struct super_block *sb = dentry->d_sb; 3728 struct ext4_sb_info *sbi = EXT4_SB(sb); 3729 struct ext4_super_block *es = sbi->s_es; 3730 u64 fsid; 3731 3732 if (test_opt(sb, MINIX_DF)) { 3733 sbi->s_overhead_last = 0; 3734 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3735 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3736 ext4_fsblk_t overhead = 0; 3737 3738 /* 3739 * Compute the overhead (FS structures). This is constant 3740 * for a given filesystem unless the number of block groups 3741 * changes so we cache the previous value until it does. 3742 */ 3743 3744 /* 3745 * All of the blocks before first_data_block are 3746 * overhead 3747 */ 3748 overhead = le32_to_cpu(es->s_first_data_block); 3749 3750 /* 3751 * Add the overhead attributed to the superblock and 3752 * block group descriptors. If the sparse superblocks 3753 * feature is turned on, then not all groups have this. 3754 */ 3755 for (i = 0; i < ngroups; i++) { 3756 overhead += ext4_bg_has_super(sb, i) + 3757 ext4_bg_num_gdb(sb, i); 3758 cond_resched(); 3759 } 3760 3761 /* 3762 * Every block group has an inode bitmap, a block 3763 * bitmap, and an inode table. 3764 */ 3765 overhead += ngroups * (2 + sbi->s_itb_per_group); 3766 sbi->s_overhead_last = overhead; 3767 smp_wmb(); 3768 sbi->s_blocks_last = ext4_blocks_count(es); 3769 } 3770 3771 buf->f_type = EXT4_SUPER_MAGIC; 3772 buf->f_bsize = sb->s_blocksize; 3773 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3774 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3775 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3776 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3777 if (buf->f_bfree < ext4_r_blocks_count(es)) 3778 buf->f_bavail = 0; 3779 buf->f_files = le32_to_cpu(es->s_inodes_count); 3780 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3781 buf->f_namelen = EXT4_NAME_LEN; 3782 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3783 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3784 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3785 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3786 3787 return 0; 3788 } 3789 3790 /* Helper function for writing quotas on sync - we need to start transaction 3791 * before quota file is locked for write. Otherwise the are possible deadlocks: 3792 * Process 1 Process 2 3793 * ext4_create() quota_sync() 3794 * jbd2_journal_start() write_dquot() 3795 * dquot_initialize() down(dqio_mutex) 3796 * down(dqio_mutex) jbd2_journal_start() 3797 * 3798 */ 3799 3800 #ifdef CONFIG_QUOTA 3801 3802 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3803 { 3804 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3805 } 3806 3807 static int ext4_write_dquot(struct dquot *dquot) 3808 { 3809 int ret, err; 3810 handle_t *handle; 3811 struct inode *inode; 3812 3813 inode = dquot_to_inode(dquot); 3814 handle = ext4_journal_start(inode, 3815 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3816 if (IS_ERR(handle)) 3817 return PTR_ERR(handle); 3818 ret = dquot_commit(dquot); 3819 err = ext4_journal_stop(handle); 3820 if (!ret) 3821 ret = err; 3822 return ret; 3823 } 3824 3825 static int ext4_acquire_dquot(struct dquot *dquot) 3826 { 3827 int ret, err; 3828 handle_t *handle; 3829 3830 handle = ext4_journal_start(dquot_to_inode(dquot), 3831 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3832 if (IS_ERR(handle)) 3833 return PTR_ERR(handle); 3834 ret = dquot_acquire(dquot); 3835 err = ext4_journal_stop(handle); 3836 if (!ret) 3837 ret = err; 3838 return ret; 3839 } 3840 3841 static int ext4_release_dquot(struct dquot *dquot) 3842 { 3843 int ret, err; 3844 handle_t *handle; 3845 3846 handle = ext4_journal_start(dquot_to_inode(dquot), 3847 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3848 if (IS_ERR(handle)) { 3849 /* Release dquot anyway to avoid endless cycle in dqput() */ 3850 dquot_release(dquot); 3851 return PTR_ERR(handle); 3852 } 3853 ret = dquot_release(dquot); 3854 err = ext4_journal_stop(handle); 3855 if (!ret) 3856 ret = err; 3857 return ret; 3858 } 3859 3860 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3861 { 3862 /* Are we journaling quotas? */ 3863 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3864 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3865 dquot_mark_dquot_dirty(dquot); 3866 return ext4_write_dquot(dquot); 3867 } else { 3868 return dquot_mark_dquot_dirty(dquot); 3869 } 3870 } 3871 3872 static int ext4_write_info(struct super_block *sb, int type) 3873 { 3874 int ret, err; 3875 handle_t *handle; 3876 3877 /* Data block + inode block */ 3878 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3879 if (IS_ERR(handle)) 3880 return PTR_ERR(handle); 3881 ret = dquot_commit_info(sb, type); 3882 err = ext4_journal_stop(handle); 3883 if (!ret) 3884 ret = err; 3885 return ret; 3886 } 3887 3888 /* 3889 * Turn on quotas during mount time - we need to find 3890 * the quota file and such... 3891 */ 3892 static int ext4_quota_on_mount(struct super_block *sb, int type) 3893 { 3894 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3895 EXT4_SB(sb)->s_jquota_fmt, type); 3896 } 3897 3898 /* 3899 * Standard function to be called on quota_on 3900 */ 3901 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3902 char *name, int remount) 3903 { 3904 int err; 3905 struct path path; 3906 3907 if (!test_opt(sb, QUOTA)) 3908 return -EINVAL; 3909 /* When remounting, no checks are needed and in fact, name is NULL */ 3910 if (remount) 3911 return vfs_quota_on(sb, type, format_id, name, remount); 3912 3913 err = kern_path(name, LOOKUP_FOLLOW, &path); 3914 if (err) 3915 return err; 3916 3917 /* Quotafile not on the same filesystem? */ 3918 if (path.mnt->mnt_sb != sb) { 3919 path_put(&path); 3920 return -EXDEV; 3921 } 3922 /* Journaling quota? */ 3923 if (EXT4_SB(sb)->s_qf_names[type]) { 3924 /* Quotafile not in fs root? */ 3925 if (path.dentry->d_parent != sb->s_root) 3926 ext4_msg(sb, KERN_WARNING, 3927 "Quota file not on filesystem root. " 3928 "Journaled quota will not work"); 3929 } 3930 3931 /* 3932 * When we journal data on quota file, we have to flush journal to see 3933 * all updates to the file when we bypass pagecache... 3934 */ 3935 if (EXT4_SB(sb)->s_journal && 3936 ext4_should_journal_data(path.dentry->d_inode)) { 3937 /* 3938 * We don't need to lock updates but journal_flush() could 3939 * otherwise be livelocked... 3940 */ 3941 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3942 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3943 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3944 if (err) { 3945 path_put(&path); 3946 return err; 3947 } 3948 } 3949 3950 err = vfs_quota_on_path(sb, type, format_id, &path); 3951 path_put(&path); 3952 return err; 3953 } 3954 3955 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3956 * acquiring the locks... As quota files are never truncated and quota code 3957 * itself serializes the operations (and noone else should touch the files) 3958 * we don't have to be afraid of races */ 3959 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3960 size_t len, loff_t off) 3961 { 3962 struct inode *inode = sb_dqopt(sb)->files[type]; 3963 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3964 int err = 0; 3965 int offset = off & (sb->s_blocksize - 1); 3966 int tocopy; 3967 size_t toread; 3968 struct buffer_head *bh; 3969 loff_t i_size = i_size_read(inode); 3970 3971 if (off > i_size) 3972 return 0; 3973 if (off+len > i_size) 3974 len = i_size-off; 3975 toread = len; 3976 while (toread > 0) { 3977 tocopy = sb->s_blocksize - offset < toread ? 3978 sb->s_blocksize - offset : toread; 3979 bh = ext4_bread(NULL, inode, blk, 0, &err); 3980 if (err) 3981 return err; 3982 if (!bh) /* A hole? */ 3983 memset(data, 0, tocopy); 3984 else 3985 memcpy(data, bh->b_data+offset, tocopy); 3986 brelse(bh); 3987 offset = 0; 3988 toread -= tocopy; 3989 data += tocopy; 3990 blk++; 3991 } 3992 return len; 3993 } 3994 3995 /* Write to quotafile (we know the transaction is already started and has 3996 * enough credits) */ 3997 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3998 const char *data, size_t len, loff_t off) 3999 { 4000 struct inode *inode = sb_dqopt(sb)->files[type]; 4001 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4002 int err = 0; 4003 int offset = off & (sb->s_blocksize - 1); 4004 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 4005 struct buffer_head *bh; 4006 handle_t *handle = journal_current_handle(); 4007 4008 if (EXT4_SB(sb)->s_journal && !handle) { 4009 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4010 " cancelled because transaction is not started", 4011 (unsigned long long)off, (unsigned long long)len); 4012 return -EIO; 4013 } 4014 /* 4015 * Since we account only one data block in transaction credits, 4016 * then it is impossible to cross a block boundary. 4017 */ 4018 if (sb->s_blocksize - offset < len) { 4019 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4020 " cancelled because not block aligned", 4021 (unsigned long long)off, (unsigned long long)len); 4022 return -EIO; 4023 } 4024 4025 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4026 bh = ext4_bread(handle, inode, blk, 1, &err); 4027 if (!bh) 4028 goto out; 4029 if (journal_quota) { 4030 err = ext4_journal_get_write_access(handle, bh); 4031 if (err) { 4032 brelse(bh); 4033 goto out; 4034 } 4035 } 4036 lock_buffer(bh); 4037 memcpy(bh->b_data+offset, data, len); 4038 flush_dcache_page(bh->b_page); 4039 unlock_buffer(bh); 4040 if (journal_quota) 4041 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4042 else { 4043 /* Always do at least ordered writes for quotas */ 4044 err = ext4_jbd2_file_inode(handle, inode); 4045 mark_buffer_dirty(bh); 4046 } 4047 brelse(bh); 4048 out: 4049 if (err) { 4050 mutex_unlock(&inode->i_mutex); 4051 return err; 4052 } 4053 if (inode->i_size < off + len) { 4054 i_size_write(inode, off + len); 4055 EXT4_I(inode)->i_disksize = inode->i_size; 4056 } 4057 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4058 ext4_mark_inode_dirty(handle, inode); 4059 mutex_unlock(&inode->i_mutex); 4060 return len; 4061 } 4062 4063 #endif 4064 4065 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4066 const char *dev_name, void *data, struct vfsmount *mnt) 4067 { 4068 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4069 } 4070 4071 #if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4072 static struct file_system_type ext2_fs_type = { 4073 .owner = THIS_MODULE, 4074 .name = "ext2", 4075 .get_sb = ext4_get_sb, 4076 .kill_sb = kill_block_super, 4077 .fs_flags = FS_REQUIRES_DEV, 4078 }; 4079 4080 static inline void register_as_ext2(void) 4081 { 4082 int err = register_filesystem(&ext2_fs_type); 4083 if (err) 4084 printk(KERN_WARNING 4085 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4086 } 4087 4088 static inline void unregister_as_ext2(void) 4089 { 4090 unregister_filesystem(&ext2_fs_type); 4091 } 4092 MODULE_ALIAS("ext2"); 4093 #else 4094 static inline void register_as_ext2(void) { } 4095 static inline void unregister_as_ext2(void) { } 4096 #endif 4097 4098 #if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4099 static struct file_system_type ext3_fs_type = { 4100 .owner = THIS_MODULE, 4101 .name = "ext3", 4102 .get_sb = ext4_get_sb, 4103 .kill_sb = kill_block_super, 4104 .fs_flags = FS_REQUIRES_DEV, 4105 }; 4106 4107 static inline void register_as_ext3(void) 4108 { 4109 int err = register_filesystem(&ext3_fs_type); 4110 if (err) 4111 printk(KERN_WARNING 4112 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4113 } 4114 4115 static inline void unregister_as_ext3(void) 4116 { 4117 unregister_filesystem(&ext3_fs_type); 4118 } 4119 MODULE_ALIAS("ext3"); 4120 #else 4121 static inline void register_as_ext3(void) { } 4122 static inline void unregister_as_ext3(void) { } 4123 #endif 4124 4125 static struct file_system_type ext4_fs_type = { 4126 .owner = THIS_MODULE, 4127 .name = "ext4", 4128 .get_sb = ext4_get_sb, 4129 .kill_sb = kill_block_super, 4130 .fs_flags = FS_REQUIRES_DEV, 4131 }; 4132 4133 static int __init init_ext4_fs(void) 4134 { 4135 int err; 4136 4137 err = init_ext4_system_zone(); 4138 if (err) 4139 return err; 4140 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4141 if (!ext4_kset) 4142 goto out4; 4143 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4144 err = init_ext4_mballoc(); 4145 if (err) 4146 goto out3; 4147 4148 err = init_ext4_xattr(); 4149 if (err) 4150 goto out2; 4151 err = init_inodecache(); 4152 if (err) 4153 goto out1; 4154 register_as_ext2(); 4155 register_as_ext3(); 4156 err = register_filesystem(&ext4_fs_type); 4157 if (err) 4158 goto out; 4159 return 0; 4160 out: 4161 unregister_as_ext2(); 4162 unregister_as_ext3(); 4163 destroy_inodecache(); 4164 out1: 4165 exit_ext4_xattr(); 4166 out2: 4167 exit_ext4_mballoc(); 4168 out3: 4169 remove_proc_entry("fs/ext4", NULL); 4170 kset_unregister(ext4_kset); 4171 out4: 4172 exit_ext4_system_zone(); 4173 return err; 4174 } 4175 4176 static void __exit exit_ext4_fs(void) 4177 { 4178 unregister_as_ext2(); 4179 unregister_as_ext3(); 4180 unregister_filesystem(&ext4_fs_type); 4181 destroy_inodecache(); 4182 exit_ext4_xattr(); 4183 exit_ext4_mballoc(); 4184 remove_proc_entry("fs/ext4", NULL); 4185 kset_unregister(ext4_kset); 4186 exit_ext4_system_zone(); 4187 } 4188 4189 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4190 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4191 MODULE_LICENSE("GPL"); 4192 module_init(init_ext4_fs) 4193 module_exit(exit_ext4_fs) 4194