1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/smp_lock.h> 30 #include <linux/buffer_head.h> 31 #include <linux/exportfs.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 #include <linux/proc_fs.h> 39 #include <linux/ctype.h> 40 #include <linux/log2.h> 41 #include <linux/crc16.h> 42 #include <asm/uaccess.h> 43 44 #include "ext4.h" 45 #include "ext4_jbd2.h" 46 #include "xattr.h" 47 #include "acl.h" 48 #include "mballoc.h" 49 50 #define CREATE_TRACE_POINTS 51 #include <trace/events/ext4.h> 52 53 struct proc_dir_entry *ext4_proc_root; 54 static struct kset *ext4_kset; 55 56 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 57 unsigned long journal_devnum); 58 static int ext4_commit_super(struct super_block *sb, int sync); 59 static void ext4_mark_recovery_complete(struct super_block *sb, 60 struct ext4_super_block *es); 61 static void ext4_clear_journal_err(struct super_block *sb, 62 struct ext4_super_block *es); 63 static int ext4_sync_fs(struct super_block *sb, int wait); 64 static const char *ext4_decode_error(struct super_block *sb, int errno, 65 char nbuf[16]); 66 static int ext4_remount(struct super_block *sb, int *flags, char *data); 67 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 68 static int ext4_unfreeze(struct super_block *sb); 69 static void ext4_write_super(struct super_block *sb); 70 static int ext4_freeze(struct super_block *sb); 71 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 72 const char *dev_name, void *data, struct vfsmount *mnt); 73 74 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 75 static struct file_system_type ext3_fs_type = { 76 .owner = THIS_MODULE, 77 .name = "ext3", 78 .get_sb = ext4_get_sb, 79 .kill_sb = kill_block_super, 80 .fs_flags = FS_REQUIRES_DEV, 81 }; 82 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 83 #else 84 #define IS_EXT3_SB(sb) (0) 85 #endif 86 87 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 88 struct ext4_group_desc *bg) 89 { 90 return le32_to_cpu(bg->bg_block_bitmap_lo) | 91 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 92 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 93 } 94 95 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 96 struct ext4_group_desc *bg) 97 { 98 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 99 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 100 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 101 } 102 103 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 104 struct ext4_group_desc *bg) 105 { 106 return le32_to_cpu(bg->bg_inode_table_lo) | 107 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 108 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 109 } 110 111 __u32 ext4_free_blks_count(struct super_block *sb, 112 struct ext4_group_desc *bg) 113 { 114 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 115 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 116 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 117 } 118 119 __u32 ext4_free_inodes_count(struct super_block *sb, 120 struct ext4_group_desc *bg) 121 { 122 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 123 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 124 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 125 } 126 127 __u32 ext4_used_dirs_count(struct super_block *sb, 128 struct ext4_group_desc *bg) 129 { 130 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 131 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 132 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 133 } 134 135 __u32 ext4_itable_unused_count(struct super_block *sb, 136 struct ext4_group_desc *bg) 137 { 138 return le16_to_cpu(bg->bg_itable_unused_lo) | 139 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 140 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 141 } 142 143 void ext4_block_bitmap_set(struct super_block *sb, 144 struct ext4_group_desc *bg, ext4_fsblk_t blk) 145 { 146 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 147 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 148 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 149 } 150 151 void ext4_inode_bitmap_set(struct super_block *sb, 152 struct ext4_group_desc *bg, ext4_fsblk_t blk) 153 { 154 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 155 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 156 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 157 } 158 159 void ext4_inode_table_set(struct super_block *sb, 160 struct ext4_group_desc *bg, ext4_fsblk_t blk) 161 { 162 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 163 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 164 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 165 } 166 167 void ext4_free_blks_set(struct super_block *sb, 168 struct ext4_group_desc *bg, __u32 count) 169 { 170 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 171 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 172 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 173 } 174 175 void ext4_free_inodes_set(struct super_block *sb, 176 struct ext4_group_desc *bg, __u32 count) 177 { 178 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 179 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 180 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 181 } 182 183 void ext4_used_dirs_set(struct super_block *sb, 184 struct ext4_group_desc *bg, __u32 count) 185 { 186 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 187 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 188 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 189 } 190 191 void ext4_itable_unused_set(struct super_block *sb, 192 struct ext4_group_desc *bg, __u32 count) 193 { 194 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 195 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 196 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 197 } 198 199 200 /* Just increment the non-pointer handle value */ 201 static handle_t *ext4_get_nojournal(void) 202 { 203 handle_t *handle = current->journal_info; 204 unsigned long ref_cnt = (unsigned long)handle; 205 206 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 207 208 ref_cnt++; 209 handle = (handle_t *)ref_cnt; 210 211 current->journal_info = handle; 212 return handle; 213 } 214 215 216 /* Decrement the non-pointer handle value */ 217 static void ext4_put_nojournal(handle_t *handle) 218 { 219 unsigned long ref_cnt = (unsigned long)handle; 220 221 BUG_ON(ref_cnt == 0); 222 223 ref_cnt--; 224 handle = (handle_t *)ref_cnt; 225 226 current->journal_info = handle; 227 } 228 229 /* 230 * Wrappers for jbd2_journal_start/end. 231 * 232 * The only special thing we need to do here is to make sure that all 233 * journal_end calls result in the superblock being marked dirty, so 234 * that sync() will call the filesystem's write_super callback if 235 * appropriate. 236 */ 237 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 238 { 239 journal_t *journal; 240 241 if (sb->s_flags & MS_RDONLY) 242 return ERR_PTR(-EROFS); 243 244 /* Special case here: if the journal has aborted behind our 245 * backs (eg. EIO in the commit thread), then we still need to 246 * take the FS itself readonly cleanly. */ 247 journal = EXT4_SB(sb)->s_journal; 248 if (journal) { 249 if (is_journal_aborted(journal)) { 250 ext4_abort(sb, __func__, "Detected aborted journal"); 251 return ERR_PTR(-EROFS); 252 } 253 return jbd2_journal_start(journal, nblocks); 254 } 255 return ext4_get_nojournal(); 256 } 257 258 /* 259 * The only special thing we need to do here is to make sure that all 260 * jbd2_journal_stop calls result in the superblock being marked dirty, so 261 * that sync() will call the filesystem's write_super callback if 262 * appropriate. 263 */ 264 int __ext4_journal_stop(const char *where, handle_t *handle) 265 { 266 struct super_block *sb; 267 int err; 268 int rc; 269 270 if (!ext4_handle_valid(handle)) { 271 ext4_put_nojournal(handle); 272 return 0; 273 } 274 sb = handle->h_transaction->t_journal->j_private; 275 err = handle->h_err; 276 rc = jbd2_journal_stop(handle); 277 278 if (!err) 279 err = rc; 280 if (err) 281 __ext4_std_error(sb, where, err); 282 return err; 283 } 284 285 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 286 struct buffer_head *bh, handle_t *handle, int err) 287 { 288 char nbuf[16]; 289 const char *errstr = ext4_decode_error(NULL, err, nbuf); 290 291 BUG_ON(!ext4_handle_valid(handle)); 292 293 if (bh) 294 BUFFER_TRACE(bh, "abort"); 295 296 if (!handle->h_err) 297 handle->h_err = err; 298 299 if (is_handle_aborted(handle)) 300 return; 301 302 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 303 caller, errstr, err_fn); 304 305 jbd2_journal_abort_handle(handle); 306 } 307 308 /* Deal with the reporting of failure conditions on a filesystem such as 309 * inconsistencies detected or read IO failures. 310 * 311 * On ext2, we can store the error state of the filesystem in the 312 * superblock. That is not possible on ext4, because we may have other 313 * write ordering constraints on the superblock which prevent us from 314 * writing it out straight away; and given that the journal is about to 315 * be aborted, we can't rely on the current, or future, transactions to 316 * write out the superblock safely. 317 * 318 * We'll just use the jbd2_journal_abort() error code to record an error in 319 * the journal instead. On recovery, the journal will complain about 320 * that error until we've noted it down and cleared it. 321 */ 322 323 static void ext4_handle_error(struct super_block *sb) 324 { 325 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 326 327 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 328 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 329 330 if (sb->s_flags & MS_RDONLY) 331 return; 332 333 if (!test_opt(sb, ERRORS_CONT)) { 334 journal_t *journal = EXT4_SB(sb)->s_journal; 335 336 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 337 if (journal) 338 jbd2_journal_abort(journal, -EIO); 339 } 340 if (test_opt(sb, ERRORS_RO)) { 341 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 342 sb->s_flags |= MS_RDONLY; 343 } 344 ext4_commit_super(sb, 1); 345 if (test_opt(sb, ERRORS_PANIC)) 346 panic("EXT4-fs (device %s): panic forced after error\n", 347 sb->s_id); 348 } 349 350 void __ext4_error(struct super_block *sb, const char *function, 351 const char *fmt, ...) 352 { 353 va_list args; 354 355 va_start(args, fmt); 356 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 357 vprintk(fmt, args); 358 printk("\n"); 359 va_end(args); 360 361 ext4_handle_error(sb); 362 } 363 364 void ext4_error_inode(const char *function, struct inode *inode, 365 const char *fmt, ...) 366 { 367 va_list args; 368 369 va_start(args, fmt); 370 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", 371 inode->i_sb->s_id, function, inode->i_ino, current->comm); 372 vprintk(fmt, args); 373 printk("\n"); 374 va_end(args); 375 376 ext4_handle_error(inode->i_sb); 377 } 378 379 void ext4_error_file(const char *function, struct file *file, 380 const char *fmt, ...) 381 { 382 va_list args; 383 struct inode *inode = file->f_dentry->d_inode; 384 char pathname[80], *path; 385 386 va_start(args, fmt); 387 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 388 if (!path) 389 path = "(unknown)"; 390 printk(KERN_CRIT 391 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", 392 inode->i_sb->s_id, function, inode->i_ino, current->comm, path); 393 vprintk(fmt, args); 394 printk("\n"); 395 va_end(args); 396 397 ext4_handle_error(inode->i_sb); 398 } 399 400 static const char *ext4_decode_error(struct super_block *sb, int errno, 401 char nbuf[16]) 402 { 403 char *errstr = NULL; 404 405 switch (errno) { 406 case -EIO: 407 errstr = "IO failure"; 408 break; 409 case -ENOMEM: 410 errstr = "Out of memory"; 411 break; 412 case -EROFS: 413 if (!sb || (EXT4_SB(sb)->s_journal && 414 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 415 errstr = "Journal has aborted"; 416 else 417 errstr = "Readonly filesystem"; 418 break; 419 default: 420 /* If the caller passed in an extra buffer for unknown 421 * errors, textualise them now. Else we just return 422 * NULL. */ 423 if (nbuf) { 424 /* Check for truncated error codes... */ 425 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 426 errstr = nbuf; 427 } 428 break; 429 } 430 431 return errstr; 432 } 433 434 /* __ext4_std_error decodes expected errors from journaling functions 435 * automatically and invokes the appropriate error response. */ 436 437 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 438 { 439 char nbuf[16]; 440 const char *errstr; 441 442 /* Special case: if the error is EROFS, and we're not already 443 * inside a transaction, then there's really no point in logging 444 * an error. */ 445 if (errno == -EROFS && journal_current_handle() == NULL && 446 (sb->s_flags & MS_RDONLY)) 447 return; 448 449 errstr = ext4_decode_error(sb, errno, nbuf); 450 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 451 sb->s_id, function, errstr); 452 453 ext4_handle_error(sb); 454 } 455 456 /* 457 * ext4_abort is a much stronger failure handler than ext4_error. The 458 * abort function may be used to deal with unrecoverable failures such 459 * as journal IO errors or ENOMEM at a critical moment in log management. 460 * 461 * We unconditionally force the filesystem into an ABORT|READONLY state, 462 * unless the error response on the fs has been set to panic in which 463 * case we take the easy way out and panic immediately. 464 */ 465 466 void ext4_abort(struct super_block *sb, const char *function, 467 const char *fmt, ...) 468 { 469 va_list args; 470 471 va_start(args, fmt); 472 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 473 vprintk(fmt, args); 474 printk("\n"); 475 va_end(args); 476 477 if (test_opt(sb, ERRORS_PANIC)) 478 panic("EXT4-fs panic from previous error\n"); 479 480 if (sb->s_flags & MS_RDONLY) 481 return; 482 483 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 484 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 485 sb->s_flags |= MS_RDONLY; 486 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 487 if (EXT4_SB(sb)->s_journal) 488 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 489 } 490 491 void ext4_msg (struct super_block * sb, const char *prefix, 492 const char *fmt, ...) 493 { 494 va_list args; 495 496 va_start(args, fmt); 497 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 498 vprintk(fmt, args); 499 printk("\n"); 500 va_end(args); 501 } 502 503 void __ext4_warning(struct super_block *sb, const char *function, 504 const char *fmt, ...) 505 { 506 va_list args; 507 508 va_start(args, fmt); 509 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 510 sb->s_id, function); 511 vprintk(fmt, args); 512 printk("\n"); 513 va_end(args); 514 } 515 516 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 517 const char *function, const char *fmt, ...) 518 __releases(bitlock) 519 __acquires(bitlock) 520 { 521 va_list args; 522 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 523 524 va_start(args, fmt); 525 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 526 vprintk(fmt, args); 527 printk("\n"); 528 va_end(args); 529 530 if (test_opt(sb, ERRORS_CONT)) { 531 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 532 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 533 ext4_commit_super(sb, 0); 534 return; 535 } 536 ext4_unlock_group(sb, grp); 537 ext4_handle_error(sb); 538 /* 539 * We only get here in the ERRORS_RO case; relocking the group 540 * may be dangerous, but nothing bad will happen since the 541 * filesystem will have already been marked read/only and the 542 * journal has been aborted. We return 1 as a hint to callers 543 * who might what to use the return value from 544 * ext4_grp_locked_error() to distinguish beween the 545 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 546 * aggressively from the ext4 function in question, with a 547 * more appropriate error code. 548 */ 549 ext4_lock_group(sb, grp); 550 return; 551 } 552 553 void ext4_update_dynamic_rev(struct super_block *sb) 554 { 555 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 556 557 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 558 return; 559 560 ext4_warning(sb, 561 "updating to rev %d because of new feature flag, " 562 "running e2fsck is recommended", 563 EXT4_DYNAMIC_REV); 564 565 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 566 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 567 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 568 /* leave es->s_feature_*compat flags alone */ 569 /* es->s_uuid will be set by e2fsck if empty */ 570 571 /* 572 * The rest of the superblock fields should be zero, and if not it 573 * means they are likely already in use, so leave them alone. We 574 * can leave it up to e2fsck to clean up any inconsistencies there. 575 */ 576 } 577 578 /* 579 * Open the external journal device 580 */ 581 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 582 { 583 struct block_device *bdev; 584 char b[BDEVNAME_SIZE]; 585 586 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 587 if (IS_ERR(bdev)) 588 goto fail; 589 return bdev; 590 591 fail: 592 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 593 __bdevname(dev, b), PTR_ERR(bdev)); 594 return NULL; 595 } 596 597 /* 598 * Release the journal device 599 */ 600 static int ext4_blkdev_put(struct block_device *bdev) 601 { 602 bd_release(bdev); 603 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 604 } 605 606 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 607 { 608 struct block_device *bdev; 609 int ret = -ENODEV; 610 611 bdev = sbi->journal_bdev; 612 if (bdev) { 613 ret = ext4_blkdev_put(bdev); 614 sbi->journal_bdev = NULL; 615 } 616 return ret; 617 } 618 619 static inline struct inode *orphan_list_entry(struct list_head *l) 620 { 621 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 622 } 623 624 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 625 { 626 struct list_head *l; 627 628 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 629 le32_to_cpu(sbi->s_es->s_last_orphan)); 630 631 printk(KERN_ERR "sb_info orphan list:\n"); 632 list_for_each(l, &sbi->s_orphan) { 633 struct inode *inode = orphan_list_entry(l); 634 printk(KERN_ERR " " 635 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 636 inode->i_sb->s_id, inode->i_ino, inode, 637 inode->i_mode, inode->i_nlink, 638 NEXT_ORPHAN(inode)); 639 } 640 } 641 642 static void ext4_put_super(struct super_block *sb) 643 { 644 struct ext4_sb_info *sbi = EXT4_SB(sb); 645 struct ext4_super_block *es = sbi->s_es; 646 int i, err; 647 648 flush_workqueue(sbi->dio_unwritten_wq); 649 destroy_workqueue(sbi->dio_unwritten_wq); 650 651 lock_super(sb); 652 lock_kernel(); 653 if (sb->s_dirt) 654 ext4_commit_super(sb, 1); 655 656 if (sbi->s_journal) { 657 err = jbd2_journal_destroy(sbi->s_journal); 658 sbi->s_journal = NULL; 659 if (err < 0) 660 ext4_abort(sb, __func__, 661 "Couldn't clean up the journal"); 662 } 663 664 ext4_release_system_zone(sb); 665 ext4_mb_release(sb); 666 ext4_ext_release(sb); 667 ext4_xattr_put_super(sb); 668 669 if (!(sb->s_flags & MS_RDONLY)) { 670 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 671 es->s_state = cpu_to_le16(sbi->s_mount_state); 672 ext4_commit_super(sb, 1); 673 } 674 if (sbi->s_proc) { 675 remove_proc_entry(sb->s_id, ext4_proc_root); 676 } 677 kobject_del(&sbi->s_kobj); 678 679 for (i = 0; i < sbi->s_gdb_count; i++) 680 brelse(sbi->s_group_desc[i]); 681 kfree(sbi->s_group_desc); 682 if (is_vmalloc_addr(sbi->s_flex_groups)) 683 vfree(sbi->s_flex_groups); 684 else 685 kfree(sbi->s_flex_groups); 686 percpu_counter_destroy(&sbi->s_freeblocks_counter); 687 percpu_counter_destroy(&sbi->s_freeinodes_counter); 688 percpu_counter_destroy(&sbi->s_dirs_counter); 689 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 690 brelse(sbi->s_sbh); 691 #ifdef CONFIG_QUOTA 692 for (i = 0; i < MAXQUOTAS; i++) 693 kfree(sbi->s_qf_names[i]); 694 #endif 695 696 /* Debugging code just in case the in-memory inode orphan list 697 * isn't empty. The on-disk one can be non-empty if we've 698 * detected an error and taken the fs readonly, but the 699 * in-memory list had better be clean by this point. */ 700 if (!list_empty(&sbi->s_orphan)) 701 dump_orphan_list(sb, sbi); 702 J_ASSERT(list_empty(&sbi->s_orphan)); 703 704 invalidate_bdev(sb->s_bdev); 705 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 706 /* 707 * Invalidate the journal device's buffers. We don't want them 708 * floating about in memory - the physical journal device may 709 * hotswapped, and it breaks the `ro-after' testing code. 710 */ 711 sync_blockdev(sbi->journal_bdev); 712 invalidate_bdev(sbi->journal_bdev); 713 ext4_blkdev_remove(sbi); 714 } 715 sb->s_fs_info = NULL; 716 /* 717 * Now that we are completely done shutting down the 718 * superblock, we need to actually destroy the kobject. 719 */ 720 unlock_kernel(); 721 unlock_super(sb); 722 kobject_put(&sbi->s_kobj); 723 wait_for_completion(&sbi->s_kobj_unregister); 724 kfree(sbi->s_blockgroup_lock); 725 kfree(sbi); 726 } 727 728 static struct kmem_cache *ext4_inode_cachep; 729 730 /* 731 * Called inside transaction, so use GFP_NOFS 732 */ 733 static struct inode *ext4_alloc_inode(struct super_block *sb) 734 { 735 struct ext4_inode_info *ei; 736 737 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 738 if (!ei) 739 return NULL; 740 741 ei->vfs_inode.i_version = 1; 742 ei->vfs_inode.i_data.writeback_index = 0; 743 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 744 INIT_LIST_HEAD(&ei->i_prealloc_list); 745 spin_lock_init(&ei->i_prealloc_lock); 746 /* 747 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 748 * therefore it can be null here. Don't check it, just initialize 749 * jinode. 750 */ 751 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 752 ei->i_reserved_data_blocks = 0; 753 ei->i_reserved_meta_blocks = 0; 754 ei->i_allocated_meta_blocks = 0; 755 ei->i_da_metadata_calc_len = 0; 756 ei->i_delalloc_reserved_flag = 0; 757 spin_lock_init(&(ei->i_block_reservation_lock)); 758 #ifdef CONFIG_QUOTA 759 ei->i_reserved_quota = 0; 760 #endif 761 INIT_LIST_HEAD(&ei->i_completed_io_list); 762 spin_lock_init(&ei->i_completed_io_lock); 763 ei->cur_aio_dio = NULL; 764 ei->i_sync_tid = 0; 765 ei->i_datasync_tid = 0; 766 767 return &ei->vfs_inode; 768 } 769 770 static void ext4_destroy_inode(struct inode *inode) 771 { 772 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 773 ext4_msg(inode->i_sb, KERN_ERR, 774 "Inode %lu (%p): orphan list check failed!", 775 inode->i_ino, EXT4_I(inode)); 776 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 777 EXT4_I(inode), sizeof(struct ext4_inode_info), 778 true); 779 dump_stack(); 780 } 781 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 782 } 783 784 static void init_once(void *foo) 785 { 786 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 787 788 INIT_LIST_HEAD(&ei->i_orphan); 789 #ifdef CONFIG_EXT4_FS_XATTR 790 init_rwsem(&ei->xattr_sem); 791 #endif 792 init_rwsem(&ei->i_data_sem); 793 inode_init_once(&ei->vfs_inode); 794 } 795 796 static int init_inodecache(void) 797 { 798 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 799 sizeof(struct ext4_inode_info), 800 0, (SLAB_RECLAIM_ACCOUNT| 801 SLAB_MEM_SPREAD), 802 init_once); 803 if (ext4_inode_cachep == NULL) 804 return -ENOMEM; 805 return 0; 806 } 807 808 static void destroy_inodecache(void) 809 { 810 kmem_cache_destroy(ext4_inode_cachep); 811 } 812 813 static void ext4_clear_inode(struct inode *inode) 814 { 815 dquot_drop(inode); 816 ext4_discard_preallocations(inode); 817 if (EXT4_JOURNAL(inode)) 818 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 819 &EXT4_I(inode)->jinode); 820 } 821 822 static inline void ext4_show_quota_options(struct seq_file *seq, 823 struct super_block *sb) 824 { 825 #if defined(CONFIG_QUOTA) 826 struct ext4_sb_info *sbi = EXT4_SB(sb); 827 828 if (sbi->s_jquota_fmt) { 829 char *fmtname = ""; 830 831 switch (sbi->s_jquota_fmt) { 832 case QFMT_VFS_OLD: 833 fmtname = "vfsold"; 834 break; 835 case QFMT_VFS_V0: 836 fmtname = "vfsv0"; 837 break; 838 case QFMT_VFS_V1: 839 fmtname = "vfsv1"; 840 break; 841 } 842 seq_printf(seq, ",jqfmt=%s", fmtname); 843 } 844 845 if (sbi->s_qf_names[USRQUOTA]) 846 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 847 848 if (sbi->s_qf_names[GRPQUOTA]) 849 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 850 851 if (test_opt(sb, USRQUOTA)) 852 seq_puts(seq, ",usrquota"); 853 854 if (test_opt(sb, GRPQUOTA)) 855 seq_puts(seq, ",grpquota"); 856 #endif 857 } 858 859 /* 860 * Show an option if 861 * - it's set to a non-default value OR 862 * - if the per-sb default is different from the global default 863 */ 864 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 865 { 866 int def_errors; 867 unsigned long def_mount_opts; 868 struct super_block *sb = vfs->mnt_sb; 869 struct ext4_sb_info *sbi = EXT4_SB(sb); 870 struct ext4_super_block *es = sbi->s_es; 871 872 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 873 def_errors = le16_to_cpu(es->s_errors); 874 875 if (sbi->s_sb_block != 1) 876 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 877 if (test_opt(sb, MINIX_DF)) 878 seq_puts(seq, ",minixdf"); 879 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 880 seq_puts(seq, ",grpid"); 881 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 882 seq_puts(seq, ",nogrpid"); 883 if (sbi->s_resuid != EXT4_DEF_RESUID || 884 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 885 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 886 } 887 if (sbi->s_resgid != EXT4_DEF_RESGID || 888 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 889 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 890 } 891 if (test_opt(sb, ERRORS_RO)) { 892 if (def_errors == EXT4_ERRORS_PANIC || 893 def_errors == EXT4_ERRORS_CONTINUE) { 894 seq_puts(seq, ",errors=remount-ro"); 895 } 896 } 897 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 898 seq_puts(seq, ",errors=continue"); 899 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 900 seq_puts(seq, ",errors=panic"); 901 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 902 seq_puts(seq, ",nouid32"); 903 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 904 seq_puts(seq, ",debug"); 905 if (test_opt(sb, OLDALLOC)) 906 seq_puts(seq, ",oldalloc"); 907 #ifdef CONFIG_EXT4_FS_XATTR 908 if (test_opt(sb, XATTR_USER) && 909 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 910 seq_puts(seq, ",user_xattr"); 911 if (!test_opt(sb, XATTR_USER) && 912 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 913 seq_puts(seq, ",nouser_xattr"); 914 } 915 #endif 916 #ifdef CONFIG_EXT4_FS_POSIX_ACL 917 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 918 seq_puts(seq, ",acl"); 919 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 920 seq_puts(seq, ",noacl"); 921 #endif 922 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 923 seq_printf(seq, ",commit=%u", 924 (unsigned) (sbi->s_commit_interval / HZ)); 925 } 926 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 927 seq_printf(seq, ",min_batch_time=%u", 928 (unsigned) sbi->s_min_batch_time); 929 } 930 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 931 seq_printf(seq, ",max_batch_time=%u", 932 (unsigned) sbi->s_min_batch_time); 933 } 934 935 /* 936 * We're changing the default of barrier mount option, so 937 * let's always display its mount state so it's clear what its 938 * status is. 939 */ 940 seq_puts(seq, ",barrier="); 941 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 942 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 943 seq_puts(seq, ",journal_async_commit"); 944 if (test_opt(sb, NOBH)) 945 seq_puts(seq, ",nobh"); 946 if (test_opt(sb, I_VERSION)) 947 seq_puts(seq, ",i_version"); 948 if (!test_opt(sb, DELALLOC)) 949 seq_puts(seq, ",nodelalloc"); 950 951 952 if (sbi->s_stripe) 953 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 954 /* 955 * journal mode get enabled in different ways 956 * So just print the value even if we didn't specify it 957 */ 958 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 959 seq_puts(seq, ",data=journal"); 960 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 961 seq_puts(seq, ",data=ordered"); 962 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 963 seq_puts(seq, ",data=writeback"); 964 965 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 966 seq_printf(seq, ",inode_readahead_blks=%u", 967 sbi->s_inode_readahead_blks); 968 969 if (test_opt(sb, DATA_ERR_ABORT)) 970 seq_puts(seq, ",data_err=abort"); 971 972 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 973 seq_puts(seq, ",noauto_da_alloc"); 974 975 if (test_opt(sb, DISCARD)) 976 seq_puts(seq, ",discard"); 977 978 if (test_opt(sb, NOLOAD)) 979 seq_puts(seq, ",norecovery"); 980 981 if (test_opt(sb, DIOREAD_NOLOCK)) 982 seq_puts(seq, ",dioread_nolock"); 983 984 ext4_show_quota_options(seq, sb); 985 986 return 0; 987 } 988 989 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 990 u64 ino, u32 generation) 991 { 992 struct inode *inode; 993 994 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 995 return ERR_PTR(-ESTALE); 996 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 997 return ERR_PTR(-ESTALE); 998 999 /* iget isn't really right if the inode is currently unallocated!! 1000 * 1001 * ext4_read_inode will return a bad_inode if the inode had been 1002 * deleted, so we should be safe. 1003 * 1004 * Currently we don't know the generation for parent directory, so 1005 * a generation of 0 means "accept any" 1006 */ 1007 inode = ext4_iget(sb, ino); 1008 if (IS_ERR(inode)) 1009 return ERR_CAST(inode); 1010 if (generation && inode->i_generation != generation) { 1011 iput(inode); 1012 return ERR_PTR(-ESTALE); 1013 } 1014 1015 return inode; 1016 } 1017 1018 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1019 int fh_len, int fh_type) 1020 { 1021 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1022 ext4_nfs_get_inode); 1023 } 1024 1025 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1026 int fh_len, int fh_type) 1027 { 1028 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1029 ext4_nfs_get_inode); 1030 } 1031 1032 /* 1033 * Try to release metadata pages (indirect blocks, directories) which are 1034 * mapped via the block device. Since these pages could have journal heads 1035 * which would prevent try_to_free_buffers() from freeing them, we must use 1036 * jbd2 layer's try_to_free_buffers() function to release them. 1037 */ 1038 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1039 gfp_t wait) 1040 { 1041 journal_t *journal = EXT4_SB(sb)->s_journal; 1042 1043 WARN_ON(PageChecked(page)); 1044 if (!page_has_buffers(page)) 1045 return 0; 1046 if (journal) 1047 return jbd2_journal_try_to_free_buffers(journal, page, 1048 wait & ~__GFP_WAIT); 1049 return try_to_free_buffers(page); 1050 } 1051 1052 #ifdef CONFIG_QUOTA 1053 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1054 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1055 1056 static int ext4_write_dquot(struct dquot *dquot); 1057 static int ext4_acquire_dquot(struct dquot *dquot); 1058 static int ext4_release_dquot(struct dquot *dquot); 1059 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1060 static int ext4_write_info(struct super_block *sb, int type); 1061 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1062 char *path, int remount); 1063 static int ext4_quota_on_mount(struct super_block *sb, int type); 1064 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1065 size_t len, loff_t off); 1066 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1067 const char *data, size_t len, loff_t off); 1068 1069 static const struct dquot_operations ext4_quota_operations = { 1070 #ifdef CONFIG_QUOTA 1071 .get_reserved_space = ext4_get_reserved_space, 1072 #endif 1073 .write_dquot = ext4_write_dquot, 1074 .acquire_dquot = ext4_acquire_dquot, 1075 .release_dquot = ext4_release_dquot, 1076 .mark_dirty = ext4_mark_dquot_dirty, 1077 .write_info = ext4_write_info, 1078 .alloc_dquot = dquot_alloc, 1079 .destroy_dquot = dquot_destroy, 1080 }; 1081 1082 static const struct quotactl_ops ext4_qctl_operations = { 1083 .quota_on = ext4_quota_on, 1084 .quota_off = vfs_quota_off, 1085 .quota_sync = vfs_quota_sync, 1086 .get_info = vfs_get_dqinfo, 1087 .set_info = vfs_set_dqinfo, 1088 .get_dqblk = vfs_get_dqblk, 1089 .set_dqblk = vfs_set_dqblk 1090 }; 1091 #endif 1092 1093 static const struct super_operations ext4_sops = { 1094 .alloc_inode = ext4_alloc_inode, 1095 .destroy_inode = ext4_destroy_inode, 1096 .write_inode = ext4_write_inode, 1097 .dirty_inode = ext4_dirty_inode, 1098 .delete_inode = ext4_delete_inode, 1099 .put_super = ext4_put_super, 1100 .sync_fs = ext4_sync_fs, 1101 .freeze_fs = ext4_freeze, 1102 .unfreeze_fs = ext4_unfreeze, 1103 .statfs = ext4_statfs, 1104 .remount_fs = ext4_remount, 1105 .clear_inode = ext4_clear_inode, 1106 .show_options = ext4_show_options, 1107 #ifdef CONFIG_QUOTA 1108 .quota_read = ext4_quota_read, 1109 .quota_write = ext4_quota_write, 1110 #endif 1111 .bdev_try_to_free_page = bdev_try_to_free_page, 1112 }; 1113 1114 static const struct super_operations ext4_nojournal_sops = { 1115 .alloc_inode = ext4_alloc_inode, 1116 .destroy_inode = ext4_destroy_inode, 1117 .write_inode = ext4_write_inode, 1118 .dirty_inode = ext4_dirty_inode, 1119 .delete_inode = ext4_delete_inode, 1120 .write_super = ext4_write_super, 1121 .put_super = ext4_put_super, 1122 .statfs = ext4_statfs, 1123 .remount_fs = ext4_remount, 1124 .clear_inode = ext4_clear_inode, 1125 .show_options = ext4_show_options, 1126 #ifdef CONFIG_QUOTA 1127 .quota_read = ext4_quota_read, 1128 .quota_write = ext4_quota_write, 1129 #endif 1130 .bdev_try_to_free_page = bdev_try_to_free_page, 1131 }; 1132 1133 static const struct export_operations ext4_export_ops = { 1134 .fh_to_dentry = ext4_fh_to_dentry, 1135 .fh_to_parent = ext4_fh_to_parent, 1136 .get_parent = ext4_get_parent, 1137 }; 1138 1139 enum { 1140 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1141 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1142 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1143 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1144 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1145 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1146 Opt_journal_update, Opt_journal_dev, 1147 Opt_journal_checksum, Opt_journal_async_commit, 1148 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1149 Opt_data_err_abort, Opt_data_err_ignore, 1150 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1151 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1152 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1153 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1154 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1155 Opt_block_validity, Opt_noblock_validity, 1156 Opt_inode_readahead_blks, Opt_journal_ioprio, 1157 Opt_dioread_nolock, Opt_dioread_lock, 1158 Opt_discard, Opt_nodiscard, 1159 }; 1160 1161 static const match_table_t tokens = { 1162 {Opt_bsd_df, "bsddf"}, 1163 {Opt_minix_df, "minixdf"}, 1164 {Opt_grpid, "grpid"}, 1165 {Opt_grpid, "bsdgroups"}, 1166 {Opt_nogrpid, "nogrpid"}, 1167 {Opt_nogrpid, "sysvgroups"}, 1168 {Opt_resgid, "resgid=%u"}, 1169 {Opt_resuid, "resuid=%u"}, 1170 {Opt_sb, "sb=%u"}, 1171 {Opt_err_cont, "errors=continue"}, 1172 {Opt_err_panic, "errors=panic"}, 1173 {Opt_err_ro, "errors=remount-ro"}, 1174 {Opt_nouid32, "nouid32"}, 1175 {Opt_debug, "debug"}, 1176 {Opt_oldalloc, "oldalloc"}, 1177 {Opt_orlov, "orlov"}, 1178 {Opt_user_xattr, "user_xattr"}, 1179 {Opt_nouser_xattr, "nouser_xattr"}, 1180 {Opt_acl, "acl"}, 1181 {Opt_noacl, "noacl"}, 1182 {Opt_noload, "noload"}, 1183 {Opt_noload, "norecovery"}, 1184 {Opt_nobh, "nobh"}, 1185 {Opt_bh, "bh"}, 1186 {Opt_commit, "commit=%u"}, 1187 {Opt_min_batch_time, "min_batch_time=%u"}, 1188 {Opt_max_batch_time, "max_batch_time=%u"}, 1189 {Opt_journal_update, "journal=update"}, 1190 {Opt_journal_dev, "journal_dev=%u"}, 1191 {Opt_journal_checksum, "journal_checksum"}, 1192 {Opt_journal_async_commit, "journal_async_commit"}, 1193 {Opt_abort, "abort"}, 1194 {Opt_data_journal, "data=journal"}, 1195 {Opt_data_ordered, "data=ordered"}, 1196 {Opt_data_writeback, "data=writeback"}, 1197 {Opt_data_err_abort, "data_err=abort"}, 1198 {Opt_data_err_ignore, "data_err=ignore"}, 1199 {Opt_offusrjquota, "usrjquota="}, 1200 {Opt_usrjquota, "usrjquota=%s"}, 1201 {Opt_offgrpjquota, "grpjquota="}, 1202 {Opt_grpjquota, "grpjquota=%s"}, 1203 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1204 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1205 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1206 {Opt_grpquota, "grpquota"}, 1207 {Opt_noquota, "noquota"}, 1208 {Opt_quota, "quota"}, 1209 {Opt_usrquota, "usrquota"}, 1210 {Opt_barrier, "barrier=%u"}, 1211 {Opt_barrier, "barrier"}, 1212 {Opt_nobarrier, "nobarrier"}, 1213 {Opt_i_version, "i_version"}, 1214 {Opt_stripe, "stripe=%u"}, 1215 {Opt_resize, "resize"}, 1216 {Opt_delalloc, "delalloc"}, 1217 {Opt_nodelalloc, "nodelalloc"}, 1218 {Opt_block_validity, "block_validity"}, 1219 {Opt_noblock_validity, "noblock_validity"}, 1220 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1221 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1222 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1223 {Opt_auto_da_alloc, "auto_da_alloc"}, 1224 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1225 {Opt_dioread_nolock, "dioread_nolock"}, 1226 {Opt_dioread_lock, "dioread_lock"}, 1227 {Opt_discard, "discard"}, 1228 {Opt_nodiscard, "nodiscard"}, 1229 {Opt_err, NULL}, 1230 }; 1231 1232 static ext4_fsblk_t get_sb_block(void **data) 1233 { 1234 ext4_fsblk_t sb_block; 1235 char *options = (char *) *data; 1236 1237 if (!options || strncmp(options, "sb=", 3) != 0) 1238 return 1; /* Default location */ 1239 1240 options += 3; 1241 /* TODO: use simple_strtoll with >32bit ext4 */ 1242 sb_block = simple_strtoul(options, &options, 0); 1243 if (*options && *options != ',') { 1244 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1245 (char *) *data); 1246 return 1; 1247 } 1248 if (*options == ',') 1249 options++; 1250 *data = (void *) options; 1251 1252 return sb_block; 1253 } 1254 1255 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1256 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1257 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1258 1259 #ifdef CONFIG_QUOTA 1260 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1261 { 1262 struct ext4_sb_info *sbi = EXT4_SB(sb); 1263 char *qname; 1264 1265 if (sb_any_quota_loaded(sb) && 1266 !sbi->s_qf_names[qtype]) { 1267 ext4_msg(sb, KERN_ERR, 1268 "Cannot change journaled " 1269 "quota options when quota turned on"); 1270 return 0; 1271 } 1272 qname = match_strdup(args); 1273 if (!qname) { 1274 ext4_msg(sb, KERN_ERR, 1275 "Not enough memory for storing quotafile name"); 1276 return 0; 1277 } 1278 if (sbi->s_qf_names[qtype] && 1279 strcmp(sbi->s_qf_names[qtype], qname)) { 1280 ext4_msg(sb, KERN_ERR, 1281 "%s quota file already specified", QTYPE2NAME(qtype)); 1282 kfree(qname); 1283 return 0; 1284 } 1285 sbi->s_qf_names[qtype] = qname; 1286 if (strchr(sbi->s_qf_names[qtype], '/')) { 1287 ext4_msg(sb, KERN_ERR, 1288 "quotafile must be on filesystem root"); 1289 kfree(sbi->s_qf_names[qtype]); 1290 sbi->s_qf_names[qtype] = NULL; 1291 return 0; 1292 } 1293 set_opt(sbi->s_mount_opt, QUOTA); 1294 return 1; 1295 } 1296 1297 static int clear_qf_name(struct super_block *sb, int qtype) 1298 { 1299 1300 struct ext4_sb_info *sbi = EXT4_SB(sb); 1301 1302 if (sb_any_quota_loaded(sb) && 1303 sbi->s_qf_names[qtype]) { 1304 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1305 " when quota turned on"); 1306 return 0; 1307 } 1308 /* 1309 * The space will be released later when all options are confirmed 1310 * to be correct 1311 */ 1312 sbi->s_qf_names[qtype] = NULL; 1313 return 1; 1314 } 1315 #endif 1316 1317 static int parse_options(char *options, struct super_block *sb, 1318 unsigned long *journal_devnum, 1319 unsigned int *journal_ioprio, 1320 ext4_fsblk_t *n_blocks_count, int is_remount) 1321 { 1322 struct ext4_sb_info *sbi = EXT4_SB(sb); 1323 char *p; 1324 substring_t args[MAX_OPT_ARGS]; 1325 int data_opt = 0; 1326 int option; 1327 #ifdef CONFIG_QUOTA 1328 int qfmt; 1329 #endif 1330 1331 if (!options) 1332 return 1; 1333 1334 while ((p = strsep(&options, ",")) != NULL) { 1335 int token; 1336 if (!*p) 1337 continue; 1338 1339 /* 1340 * Initialize args struct so we know whether arg was 1341 * found; some options take optional arguments. 1342 */ 1343 args[0].to = args[0].from = 0; 1344 token = match_token(p, tokens, args); 1345 switch (token) { 1346 case Opt_bsd_df: 1347 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1348 clear_opt(sbi->s_mount_opt, MINIX_DF); 1349 break; 1350 case Opt_minix_df: 1351 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1352 set_opt(sbi->s_mount_opt, MINIX_DF); 1353 1354 break; 1355 case Opt_grpid: 1356 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1357 set_opt(sbi->s_mount_opt, GRPID); 1358 1359 break; 1360 case Opt_nogrpid: 1361 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1362 clear_opt(sbi->s_mount_opt, GRPID); 1363 1364 break; 1365 case Opt_resuid: 1366 if (match_int(&args[0], &option)) 1367 return 0; 1368 sbi->s_resuid = option; 1369 break; 1370 case Opt_resgid: 1371 if (match_int(&args[0], &option)) 1372 return 0; 1373 sbi->s_resgid = option; 1374 break; 1375 case Opt_sb: 1376 /* handled by get_sb_block() instead of here */ 1377 /* *sb_block = match_int(&args[0]); */ 1378 break; 1379 case Opt_err_panic: 1380 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1381 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1382 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1383 break; 1384 case Opt_err_ro: 1385 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1386 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1387 set_opt(sbi->s_mount_opt, ERRORS_RO); 1388 break; 1389 case Opt_err_cont: 1390 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1391 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1392 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1393 break; 1394 case Opt_nouid32: 1395 set_opt(sbi->s_mount_opt, NO_UID32); 1396 break; 1397 case Opt_debug: 1398 set_opt(sbi->s_mount_opt, DEBUG); 1399 break; 1400 case Opt_oldalloc: 1401 set_opt(sbi->s_mount_opt, OLDALLOC); 1402 break; 1403 case Opt_orlov: 1404 clear_opt(sbi->s_mount_opt, OLDALLOC); 1405 break; 1406 #ifdef CONFIG_EXT4_FS_XATTR 1407 case Opt_user_xattr: 1408 set_opt(sbi->s_mount_opt, XATTR_USER); 1409 break; 1410 case Opt_nouser_xattr: 1411 clear_opt(sbi->s_mount_opt, XATTR_USER); 1412 break; 1413 #else 1414 case Opt_user_xattr: 1415 case Opt_nouser_xattr: 1416 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1417 break; 1418 #endif 1419 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1420 case Opt_acl: 1421 set_opt(sbi->s_mount_opt, POSIX_ACL); 1422 break; 1423 case Opt_noacl: 1424 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1425 break; 1426 #else 1427 case Opt_acl: 1428 case Opt_noacl: 1429 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1430 break; 1431 #endif 1432 case Opt_journal_update: 1433 /* @@@ FIXME */ 1434 /* Eventually we will want to be able to create 1435 a journal file here. For now, only allow the 1436 user to specify an existing inode to be the 1437 journal file. */ 1438 if (is_remount) { 1439 ext4_msg(sb, KERN_ERR, 1440 "Cannot specify journal on remount"); 1441 return 0; 1442 } 1443 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1444 break; 1445 case Opt_journal_dev: 1446 if (is_remount) { 1447 ext4_msg(sb, KERN_ERR, 1448 "Cannot specify journal on remount"); 1449 return 0; 1450 } 1451 if (match_int(&args[0], &option)) 1452 return 0; 1453 *journal_devnum = option; 1454 break; 1455 case Opt_journal_checksum: 1456 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1457 break; 1458 case Opt_journal_async_commit: 1459 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1460 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1461 break; 1462 case Opt_noload: 1463 set_opt(sbi->s_mount_opt, NOLOAD); 1464 break; 1465 case Opt_commit: 1466 if (match_int(&args[0], &option)) 1467 return 0; 1468 if (option < 0) 1469 return 0; 1470 if (option == 0) 1471 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1472 sbi->s_commit_interval = HZ * option; 1473 break; 1474 case Opt_max_batch_time: 1475 if (match_int(&args[0], &option)) 1476 return 0; 1477 if (option < 0) 1478 return 0; 1479 if (option == 0) 1480 option = EXT4_DEF_MAX_BATCH_TIME; 1481 sbi->s_max_batch_time = option; 1482 break; 1483 case Opt_min_batch_time: 1484 if (match_int(&args[0], &option)) 1485 return 0; 1486 if (option < 0) 1487 return 0; 1488 sbi->s_min_batch_time = option; 1489 break; 1490 case Opt_data_journal: 1491 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1492 goto datacheck; 1493 case Opt_data_ordered: 1494 data_opt = EXT4_MOUNT_ORDERED_DATA; 1495 goto datacheck; 1496 case Opt_data_writeback: 1497 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1498 datacheck: 1499 if (is_remount) { 1500 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1501 ext4_msg(sb, KERN_ERR, 1502 "Cannot change data mode on remount"); 1503 return 0; 1504 } 1505 } else { 1506 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1507 sbi->s_mount_opt |= data_opt; 1508 } 1509 break; 1510 case Opt_data_err_abort: 1511 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1512 break; 1513 case Opt_data_err_ignore: 1514 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1515 break; 1516 #ifdef CONFIG_QUOTA 1517 case Opt_usrjquota: 1518 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1519 return 0; 1520 break; 1521 case Opt_grpjquota: 1522 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1523 return 0; 1524 break; 1525 case Opt_offusrjquota: 1526 if (!clear_qf_name(sb, USRQUOTA)) 1527 return 0; 1528 break; 1529 case Opt_offgrpjquota: 1530 if (!clear_qf_name(sb, GRPQUOTA)) 1531 return 0; 1532 break; 1533 1534 case Opt_jqfmt_vfsold: 1535 qfmt = QFMT_VFS_OLD; 1536 goto set_qf_format; 1537 case Opt_jqfmt_vfsv0: 1538 qfmt = QFMT_VFS_V0; 1539 goto set_qf_format; 1540 case Opt_jqfmt_vfsv1: 1541 qfmt = QFMT_VFS_V1; 1542 set_qf_format: 1543 if (sb_any_quota_loaded(sb) && 1544 sbi->s_jquota_fmt != qfmt) { 1545 ext4_msg(sb, KERN_ERR, "Cannot change " 1546 "journaled quota options when " 1547 "quota turned on"); 1548 return 0; 1549 } 1550 sbi->s_jquota_fmt = qfmt; 1551 break; 1552 case Opt_quota: 1553 case Opt_usrquota: 1554 set_opt(sbi->s_mount_opt, QUOTA); 1555 set_opt(sbi->s_mount_opt, USRQUOTA); 1556 break; 1557 case Opt_grpquota: 1558 set_opt(sbi->s_mount_opt, QUOTA); 1559 set_opt(sbi->s_mount_opt, GRPQUOTA); 1560 break; 1561 case Opt_noquota: 1562 if (sb_any_quota_loaded(sb)) { 1563 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1564 "options when quota turned on"); 1565 return 0; 1566 } 1567 clear_opt(sbi->s_mount_opt, QUOTA); 1568 clear_opt(sbi->s_mount_opt, USRQUOTA); 1569 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1570 break; 1571 #else 1572 case Opt_quota: 1573 case Opt_usrquota: 1574 case Opt_grpquota: 1575 ext4_msg(sb, KERN_ERR, 1576 "quota options not supported"); 1577 break; 1578 case Opt_usrjquota: 1579 case Opt_grpjquota: 1580 case Opt_offusrjquota: 1581 case Opt_offgrpjquota: 1582 case Opt_jqfmt_vfsold: 1583 case Opt_jqfmt_vfsv0: 1584 case Opt_jqfmt_vfsv1: 1585 ext4_msg(sb, KERN_ERR, 1586 "journaled quota options not supported"); 1587 break; 1588 case Opt_noquota: 1589 break; 1590 #endif 1591 case Opt_abort: 1592 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1593 break; 1594 case Opt_nobarrier: 1595 clear_opt(sbi->s_mount_opt, BARRIER); 1596 break; 1597 case Opt_barrier: 1598 if (args[0].from) { 1599 if (match_int(&args[0], &option)) 1600 return 0; 1601 } else 1602 option = 1; /* No argument, default to 1 */ 1603 if (option) 1604 set_opt(sbi->s_mount_opt, BARRIER); 1605 else 1606 clear_opt(sbi->s_mount_opt, BARRIER); 1607 break; 1608 case Opt_ignore: 1609 break; 1610 case Opt_resize: 1611 if (!is_remount) { 1612 ext4_msg(sb, KERN_ERR, 1613 "resize option only available " 1614 "for remount"); 1615 return 0; 1616 } 1617 if (match_int(&args[0], &option) != 0) 1618 return 0; 1619 *n_blocks_count = option; 1620 break; 1621 case Opt_nobh: 1622 set_opt(sbi->s_mount_opt, NOBH); 1623 break; 1624 case Opt_bh: 1625 clear_opt(sbi->s_mount_opt, NOBH); 1626 break; 1627 case Opt_i_version: 1628 set_opt(sbi->s_mount_opt, I_VERSION); 1629 sb->s_flags |= MS_I_VERSION; 1630 break; 1631 case Opt_nodelalloc: 1632 clear_opt(sbi->s_mount_opt, DELALLOC); 1633 break; 1634 case Opt_stripe: 1635 if (match_int(&args[0], &option)) 1636 return 0; 1637 if (option < 0) 1638 return 0; 1639 sbi->s_stripe = option; 1640 break; 1641 case Opt_delalloc: 1642 set_opt(sbi->s_mount_opt, DELALLOC); 1643 break; 1644 case Opt_block_validity: 1645 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1646 break; 1647 case Opt_noblock_validity: 1648 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1649 break; 1650 case Opt_inode_readahead_blks: 1651 if (match_int(&args[0], &option)) 1652 return 0; 1653 if (option < 0 || option > (1 << 30)) 1654 return 0; 1655 if (!is_power_of_2(option)) { 1656 ext4_msg(sb, KERN_ERR, 1657 "EXT4-fs: inode_readahead_blks" 1658 " must be a power of 2"); 1659 return 0; 1660 } 1661 sbi->s_inode_readahead_blks = option; 1662 break; 1663 case Opt_journal_ioprio: 1664 if (match_int(&args[0], &option)) 1665 return 0; 1666 if (option < 0 || option > 7) 1667 break; 1668 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1669 option); 1670 break; 1671 case Opt_noauto_da_alloc: 1672 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1673 break; 1674 case Opt_auto_da_alloc: 1675 if (args[0].from) { 1676 if (match_int(&args[0], &option)) 1677 return 0; 1678 } else 1679 option = 1; /* No argument, default to 1 */ 1680 if (option) 1681 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1682 else 1683 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1684 break; 1685 case Opt_discard: 1686 set_opt(sbi->s_mount_opt, DISCARD); 1687 break; 1688 case Opt_nodiscard: 1689 clear_opt(sbi->s_mount_opt, DISCARD); 1690 break; 1691 case Opt_dioread_nolock: 1692 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1693 break; 1694 case Opt_dioread_lock: 1695 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1696 break; 1697 default: 1698 ext4_msg(sb, KERN_ERR, 1699 "Unrecognized mount option \"%s\" " 1700 "or missing value", p); 1701 return 0; 1702 } 1703 } 1704 #ifdef CONFIG_QUOTA 1705 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1706 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1707 clear_opt(sbi->s_mount_opt, USRQUOTA); 1708 1709 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1710 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1711 1712 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1713 ext4_msg(sb, KERN_ERR, "old and new quota " 1714 "format mixing"); 1715 return 0; 1716 } 1717 1718 if (!sbi->s_jquota_fmt) { 1719 ext4_msg(sb, KERN_ERR, "journaled quota format " 1720 "not specified"); 1721 return 0; 1722 } 1723 } else { 1724 if (sbi->s_jquota_fmt) { 1725 ext4_msg(sb, KERN_ERR, "journaled quota format " 1726 "specified with no journaling " 1727 "enabled"); 1728 return 0; 1729 } 1730 } 1731 #endif 1732 return 1; 1733 } 1734 1735 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1736 int read_only) 1737 { 1738 struct ext4_sb_info *sbi = EXT4_SB(sb); 1739 int res = 0; 1740 1741 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1742 ext4_msg(sb, KERN_ERR, "revision level too high, " 1743 "forcing read-only mode"); 1744 res = MS_RDONLY; 1745 } 1746 if (read_only) 1747 return res; 1748 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1749 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1750 "running e2fsck is recommended"); 1751 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1752 ext4_msg(sb, KERN_WARNING, 1753 "warning: mounting fs with errors, " 1754 "running e2fsck is recommended"); 1755 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1756 le16_to_cpu(es->s_mnt_count) >= 1757 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1758 ext4_msg(sb, KERN_WARNING, 1759 "warning: maximal mount count reached, " 1760 "running e2fsck is recommended"); 1761 else if (le32_to_cpu(es->s_checkinterval) && 1762 (le32_to_cpu(es->s_lastcheck) + 1763 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1764 ext4_msg(sb, KERN_WARNING, 1765 "warning: checktime reached, " 1766 "running e2fsck is recommended"); 1767 if (!sbi->s_journal) 1768 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1769 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1770 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1771 le16_add_cpu(&es->s_mnt_count, 1); 1772 es->s_mtime = cpu_to_le32(get_seconds()); 1773 ext4_update_dynamic_rev(sb); 1774 if (sbi->s_journal) 1775 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1776 1777 ext4_commit_super(sb, 1); 1778 if (test_opt(sb, DEBUG)) 1779 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1780 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1781 sb->s_blocksize, 1782 sbi->s_groups_count, 1783 EXT4_BLOCKS_PER_GROUP(sb), 1784 EXT4_INODES_PER_GROUP(sb), 1785 sbi->s_mount_opt); 1786 1787 return res; 1788 } 1789 1790 static int ext4_fill_flex_info(struct super_block *sb) 1791 { 1792 struct ext4_sb_info *sbi = EXT4_SB(sb); 1793 struct ext4_group_desc *gdp = NULL; 1794 ext4_group_t flex_group_count; 1795 ext4_group_t flex_group; 1796 int groups_per_flex = 0; 1797 size_t size; 1798 int i; 1799 1800 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1801 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1802 1803 if (groups_per_flex < 2) { 1804 sbi->s_log_groups_per_flex = 0; 1805 return 1; 1806 } 1807 1808 /* We allocate both existing and potentially added groups */ 1809 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1810 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1811 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1812 size = flex_group_count * sizeof(struct flex_groups); 1813 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1814 if (sbi->s_flex_groups == NULL) { 1815 sbi->s_flex_groups = vmalloc(size); 1816 if (sbi->s_flex_groups) 1817 memset(sbi->s_flex_groups, 0, size); 1818 } 1819 if (sbi->s_flex_groups == NULL) { 1820 ext4_msg(sb, KERN_ERR, "not enough memory for " 1821 "%u flex groups", flex_group_count); 1822 goto failed; 1823 } 1824 1825 for (i = 0; i < sbi->s_groups_count; i++) { 1826 gdp = ext4_get_group_desc(sb, i, NULL); 1827 1828 flex_group = ext4_flex_group(sbi, i); 1829 atomic_add(ext4_free_inodes_count(sb, gdp), 1830 &sbi->s_flex_groups[flex_group].free_inodes); 1831 atomic_add(ext4_free_blks_count(sb, gdp), 1832 &sbi->s_flex_groups[flex_group].free_blocks); 1833 atomic_add(ext4_used_dirs_count(sb, gdp), 1834 &sbi->s_flex_groups[flex_group].used_dirs); 1835 } 1836 1837 return 1; 1838 failed: 1839 return 0; 1840 } 1841 1842 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1843 struct ext4_group_desc *gdp) 1844 { 1845 __u16 crc = 0; 1846 1847 if (sbi->s_es->s_feature_ro_compat & 1848 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1849 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1850 __le32 le_group = cpu_to_le32(block_group); 1851 1852 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1853 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1854 crc = crc16(crc, (__u8 *)gdp, offset); 1855 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1856 /* for checksum of struct ext4_group_desc do the rest...*/ 1857 if ((sbi->s_es->s_feature_incompat & 1858 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1859 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1860 crc = crc16(crc, (__u8 *)gdp + offset, 1861 le16_to_cpu(sbi->s_es->s_desc_size) - 1862 offset); 1863 } 1864 1865 return cpu_to_le16(crc); 1866 } 1867 1868 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1869 struct ext4_group_desc *gdp) 1870 { 1871 if ((sbi->s_es->s_feature_ro_compat & 1872 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1873 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1874 return 0; 1875 1876 return 1; 1877 } 1878 1879 /* Called at mount-time, super-block is locked */ 1880 static int ext4_check_descriptors(struct super_block *sb) 1881 { 1882 struct ext4_sb_info *sbi = EXT4_SB(sb); 1883 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1884 ext4_fsblk_t last_block; 1885 ext4_fsblk_t block_bitmap; 1886 ext4_fsblk_t inode_bitmap; 1887 ext4_fsblk_t inode_table; 1888 int flexbg_flag = 0; 1889 ext4_group_t i; 1890 1891 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1892 flexbg_flag = 1; 1893 1894 ext4_debug("Checking group descriptors"); 1895 1896 for (i = 0; i < sbi->s_groups_count; i++) { 1897 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1898 1899 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1900 last_block = ext4_blocks_count(sbi->s_es) - 1; 1901 else 1902 last_block = first_block + 1903 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1904 1905 block_bitmap = ext4_block_bitmap(sb, gdp); 1906 if (block_bitmap < first_block || block_bitmap > last_block) { 1907 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1908 "Block bitmap for group %u not in group " 1909 "(block %llu)!", i, block_bitmap); 1910 return 0; 1911 } 1912 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1913 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1914 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1915 "Inode bitmap for group %u not in group " 1916 "(block %llu)!", i, inode_bitmap); 1917 return 0; 1918 } 1919 inode_table = ext4_inode_table(sb, gdp); 1920 if (inode_table < first_block || 1921 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1922 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1923 "Inode table for group %u not in group " 1924 "(block %llu)!", i, inode_table); 1925 return 0; 1926 } 1927 ext4_lock_group(sb, i); 1928 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1929 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1930 "Checksum for group %u failed (%u!=%u)", 1931 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1932 gdp)), le16_to_cpu(gdp->bg_checksum)); 1933 if (!(sb->s_flags & MS_RDONLY)) { 1934 ext4_unlock_group(sb, i); 1935 return 0; 1936 } 1937 } 1938 ext4_unlock_group(sb, i); 1939 if (!flexbg_flag) 1940 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1941 } 1942 1943 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1944 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 1945 return 1; 1946 } 1947 1948 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1949 * the superblock) which were deleted from all directories, but held open by 1950 * a process at the time of a crash. We walk the list and try to delete these 1951 * inodes at recovery time (only with a read-write filesystem). 1952 * 1953 * In order to keep the orphan inode chain consistent during traversal (in 1954 * case of crash during recovery), we link each inode into the superblock 1955 * orphan list_head and handle it the same way as an inode deletion during 1956 * normal operation (which journals the operations for us). 1957 * 1958 * We only do an iget() and an iput() on each inode, which is very safe if we 1959 * accidentally point at an in-use or already deleted inode. The worst that 1960 * can happen in this case is that we get a "bit already cleared" message from 1961 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1962 * e2fsck was run on this filesystem, and it must have already done the orphan 1963 * inode cleanup for us, so we can safely abort without any further action. 1964 */ 1965 static void ext4_orphan_cleanup(struct super_block *sb, 1966 struct ext4_super_block *es) 1967 { 1968 unsigned int s_flags = sb->s_flags; 1969 int nr_orphans = 0, nr_truncates = 0; 1970 #ifdef CONFIG_QUOTA 1971 int i; 1972 #endif 1973 if (!es->s_last_orphan) { 1974 jbd_debug(4, "no orphan inodes to clean up\n"); 1975 return; 1976 } 1977 1978 if (bdev_read_only(sb->s_bdev)) { 1979 ext4_msg(sb, KERN_ERR, "write access " 1980 "unavailable, skipping orphan cleanup"); 1981 return; 1982 } 1983 1984 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1985 if (es->s_last_orphan) 1986 jbd_debug(1, "Errors on filesystem, " 1987 "clearing orphan list.\n"); 1988 es->s_last_orphan = 0; 1989 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1990 return; 1991 } 1992 1993 if (s_flags & MS_RDONLY) { 1994 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1995 sb->s_flags &= ~MS_RDONLY; 1996 } 1997 #ifdef CONFIG_QUOTA 1998 /* Needed for iput() to work correctly and not trash data */ 1999 sb->s_flags |= MS_ACTIVE; 2000 /* Turn on quotas so that they are updated correctly */ 2001 for (i = 0; i < MAXQUOTAS; i++) { 2002 if (EXT4_SB(sb)->s_qf_names[i]) { 2003 int ret = ext4_quota_on_mount(sb, i); 2004 if (ret < 0) 2005 ext4_msg(sb, KERN_ERR, 2006 "Cannot turn on journaled " 2007 "quota: error %d", ret); 2008 } 2009 } 2010 #endif 2011 2012 while (es->s_last_orphan) { 2013 struct inode *inode; 2014 2015 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2016 if (IS_ERR(inode)) { 2017 es->s_last_orphan = 0; 2018 break; 2019 } 2020 2021 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2022 dquot_initialize(inode); 2023 if (inode->i_nlink) { 2024 ext4_msg(sb, KERN_DEBUG, 2025 "%s: truncating inode %lu to %lld bytes", 2026 __func__, inode->i_ino, inode->i_size); 2027 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2028 inode->i_ino, inode->i_size); 2029 ext4_truncate(inode); 2030 nr_truncates++; 2031 } else { 2032 ext4_msg(sb, KERN_DEBUG, 2033 "%s: deleting unreferenced inode %lu", 2034 __func__, inode->i_ino); 2035 jbd_debug(2, "deleting unreferenced inode %lu\n", 2036 inode->i_ino); 2037 nr_orphans++; 2038 } 2039 iput(inode); /* The delete magic happens here! */ 2040 } 2041 2042 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2043 2044 if (nr_orphans) 2045 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2046 PLURAL(nr_orphans)); 2047 if (nr_truncates) 2048 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2049 PLURAL(nr_truncates)); 2050 #ifdef CONFIG_QUOTA 2051 /* Turn quotas off */ 2052 for (i = 0; i < MAXQUOTAS; i++) { 2053 if (sb_dqopt(sb)->files[i]) 2054 vfs_quota_off(sb, i, 0); 2055 } 2056 #endif 2057 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2058 } 2059 2060 /* 2061 * Maximal extent format file size. 2062 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2063 * extent format containers, within a sector_t, and within i_blocks 2064 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2065 * so that won't be a limiting factor. 2066 * 2067 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2068 */ 2069 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2070 { 2071 loff_t res; 2072 loff_t upper_limit = MAX_LFS_FILESIZE; 2073 2074 /* small i_blocks in vfs inode? */ 2075 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2076 /* 2077 * CONFIG_LBDAF is not enabled implies the inode 2078 * i_block represent total blocks in 512 bytes 2079 * 32 == size of vfs inode i_blocks * 8 2080 */ 2081 upper_limit = (1LL << 32) - 1; 2082 2083 /* total blocks in file system block size */ 2084 upper_limit >>= (blkbits - 9); 2085 upper_limit <<= blkbits; 2086 } 2087 2088 /* 32-bit extent-start container, ee_block */ 2089 res = 1LL << 32; 2090 res <<= blkbits; 2091 res -= 1; 2092 2093 /* Sanity check against vm- & vfs- imposed limits */ 2094 if (res > upper_limit) 2095 res = upper_limit; 2096 2097 return res; 2098 } 2099 2100 /* 2101 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2102 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2103 * We need to be 1 filesystem block less than the 2^48 sector limit. 2104 */ 2105 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2106 { 2107 loff_t res = EXT4_NDIR_BLOCKS; 2108 int meta_blocks; 2109 loff_t upper_limit; 2110 /* This is calculated to be the largest file size for a dense, block 2111 * mapped file such that the file's total number of 512-byte sectors, 2112 * including data and all indirect blocks, does not exceed (2^48 - 1). 2113 * 2114 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2115 * number of 512-byte sectors of the file. 2116 */ 2117 2118 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2119 /* 2120 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2121 * the inode i_block field represents total file blocks in 2122 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2123 */ 2124 upper_limit = (1LL << 32) - 1; 2125 2126 /* total blocks in file system block size */ 2127 upper_limit >>= (bits - 9); 2128 2129 } else { 2130 /* 2131 * We use 48 bit ext4_inode i_blocks 2132 * With EXT4_HUGE_FILE_FL set the i_blocks 2133 * represent total number of blocks in 2134 * file system block size 2135 */ 2136 upper_limit = (1LL << 48) - 1; 2137 2138 } 2139 2140 /* indirect blocks */ 2141 meta_blocks = 1; 2142 /* double indirect blocks */ 2143 meta_blocks += 1 + (1LL << (bits-2)); 2144 /* tripple indirect blocks */ 2145 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2146 2147 upper_limit -= meta_blocks; 2148 upper_limit <<= bits; 2149 2150 res += 1LL << (bits-2); 2151 res += 1LL << (2*(bits-2)); 2152 res += 1LL << (3*(bits-2)); 2153 res <<= bits; 2154 if (res > upper_limit) 2155 res = upper_limit; 2156 2157 if (res > MAX_LFS_FILESIZE) 2158 res = MAX_LFS_FILESIZE; 2159 2160 return res; 2161 } 2162 2163 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2164 ext4_fsblk_t logical_sb_block, int nr) 2165 { 2166 struct ext4_sb_info *sbi = EXT4_SB(sb); 2167 ext4_group_t bg, first_meta_bg; 2168 int has_super = 0; 2169 2170 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2171 2172 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2173 nr < first_meta_bg) 2174 return logical_sb_block + nr + 1; 2175 bg = sbi->s_desc_per_block * nr; 2176 if (ext4_bg_has_super(sb, bg)) 2177 has_super = 1; 2178 2179 return (has_super + ext4_group_first_block_no(sb, bg)); 2180 } 2181 2182 /** 2183 * ext4_get_stripe_size: Get the stripe size. 2184 * @sbi: In memory super block info 2185 * 2186 * If we have specified it via mount option, then 2187 * use the mount option value. If the value specified at mount time is 2188 * greater than the blocks per group use the super block value. 2189 * If the super block value is greater than blocks per group return 0. 2190 * Allocator needs it be less than blocks per group. 2191 * 2192 */ 2193 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2194 { 2195 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2196 unsigned long stripe_width = 2197 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2198 2199 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2200 return sbi->s_stripe; 2201 2202 if (stripe_width <= sbi->s_blocks_per_group) 2203 return stripe_width; 2204 2205 if (stride <= sbi->s_blocks_per_group) 2206 return stride; 2207 2208 return 0; 2209 } 2210 2211 /* sysfs supprt */ 2212 2213 struct ext4_attr { 2214 struct attribute attr; 2215 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2216 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2217 const char *, size_t); 2218 int offset; 2219 }; 2220 2221 static int parse_strtoul(const char *buf, 2222 unsigned long max, unsigned long *value) 2223 { 2224 char *endp; 2225 2226 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2227 endp = skip_spaces(endp); 2228 if (*endp || *value > max) 2229 return -EINVAL; 2230 2231 return 0; 2232 } 2233 2234 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2235 struct ext4_sb_info *sbi, 2236 char *buf) 2237 { 2238 return snprintf(buf, PAGE_SIZE, "%llu\n", 2239 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2240 } 2241 2242 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2243 struct ext4_sb_info *sbi, char *buf) 2244 { 2245 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2246 2247 return snprintf(buf, PAGE_SIZE, "%lu\n", 2248 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2249 sbi->s_sectors_written_start) >> 1); 2250 } 2251 2252 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2253 struct ext4_sb_info *sbi, char *buf) 2254 { 2255 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2256 2257 return snprintf(buf, PAGE_SIZE, "%llu\n", 2258 (unsigned long long)(sbi->s_kbytes_written + 2259 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2260 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2261 } 2262 2263 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2264 struct ext4_sb_info *sbi, 2265 const char *buf, size_t count) 2266 { 2267 unsigned long t; 2268 2269 if (parse_strtoul(buf, 0x40000000, &t)) 2270 return -EINVAL; 2271 2272 if (!is_power_of_2(t)) 2273 return -EINVAL; 2274 2275 sbi->s_inode_readahead_blks = t; 2276 return count; 2277 } 2278 2279 static ssize_t sbi_ui_show(struct ext4_attr *a, 2280 struct ext4_sb_info *sbi, char *buf) 2281 { 2282 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2283 2284 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2285 } 2286 2287 static ssize_t sbi_ui_store(struct ext4_attr *a, 2288 struct ext4_sb_info *sbi, 2289 const char *buf, size_t count) 2290 { 2291 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2292 unsigned long t; 2293 2294 if (parse_strtoul(buf, 0xffffffff, &t)) 2295 return -EINVAL; 2296 *ui = t; 2297 return count; 2298 } 2299 2300 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2301 static struct ext4_attr ext4_attr_##_name = { \ 2302 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2303 .show = _show, \ 2304 .store = _store, \ 2305 .offset = offsetof(struct ext4_sb_info, _elname), \ 2306 } 2307 #define EXT4_ATTR(name, mode, show, store) \ 2308 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2309 2310 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2311 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2312 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2313 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2314 #define ATTR_LIST(name) &ext4_attr_##name.attr 2315 2316 EXT4_RO_ATTR(delayed_allocation_blocks); 2317 EXT4_RO_ATTR(session_write_kbytes); 2318 EXT4_RO_ATTR(lifetime_write_kbytes); 2319 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2320 inode_readahead_blks_store, s_inode_readahead_blks); 2321 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2322 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2323 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2324 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2325 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2326 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2327 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2328 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2329 2330 static struct attribute *ext4_attrs[] = { 2331 ATTR_LIST(delayed_allocation_blocks), 2332 ATTR_LIST(session_write_kbytes), 2333 ATTR_LIST(lifetime_write_kbytes), 2334 ATTR_LIST(inode_readahead_blks), 2335 ATTR_LIST(inode_goal), 2336 ATTR_LIST(mb_stats), 2337 ATTR_LIST(mb_max_to_scan), 2338 ATTR_LIST(mb_min_to_scan), 2339 ATTR_LIST(mb_order2_req), 2340 ATTR_LIST(mb_stream_req), 2341 ATTR_LIST(mb_group_prealloc), 2342 ATTR_LIST(max_writeback_mb_bump), 2343 NULL, 2344 }; 2345 2346 static ssize_t ext4_attr_show(struct kobject *kobj, 2347 struct attribute *attr, char *buf) 2348 { 2349 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2350 s_kobj); 2351 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2352 2353 return a->show ? a->show(a, sbi, buf) : 0; 2354 } 2355 2356 static ssize_t ext4_attr_store(struct kobject *kobj, 2357 struct attribute *attr, 2358 const char *buf, size_t len) 2359 { 2360 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2361 s_kobj); 2362 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2363 2364 return a->store ? a->store(a, sbi, buf, len) : 0; 2365 } 2366 2367 static void ext4_sb_release(struct kobject *kobj) 2368 { 2369 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2370 s_kobj); 2371 complete(&sbi->s_kobj_unregister); 2372 } 2373 2374 2375 static const struct sysfs_ops ext4_attr_ops = { 2376 .show = ext4_attr_show, 2377 .store = ext4_attr_store, 2378 }; 2379 2380 static struct kobj_type ext4_ktype = { 2381 .default_attrs = ext4_attrs, 2382 .sysfs_ops = &ext4_attr_ops, 2383 .release = ext4_sb_release, 2384 }; 2385 2386 /* 2387 * Check whether this filesystem can be mounted based on 2388 * the features present and the RDONLY/RDWR mount requested. 2389 * Returns 1 if this filesystem can be mounted as requested, 2390 * 0 if it cannot be. 2391 */ 2392 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2393 { 2394 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2395 ext4_msg(sb, KERN_ERR, 2396 "Couldn't mount because of " 2397 "unsupported optional features (%x)", 2398 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2399 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2400 return 0; 2401 } 2402 2403 if (readonly) 2404 return 1; 2405 2406 /* Check that feature set is OK for a read-write mount */ 2407 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2408 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2409 "unsupported optional features (%x)", 2410 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2411 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2412 return 0; 2413 } 2414 /* 2415 * Large file size enabled file system can only be mounted 2416 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2417 */ 2418 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2419 if (sizeof(blkcnt_t) < sizeof(u64)) { 2420 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2421 "cannot be mounted RDWR without " 2422 "CONFIG_LBDAF"); 2423 return 0; 2424 } 2425 } 2426 return 1; 2427 } 2428 2429 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2430 __releases(kernel_lock) 2431 __acquires(kernel_lock) 2432 { 2433 struct buffer_head *bh; 2434 struct ext4_super_block *es = NULL; 2435 struct ext4_sb_info *sbi; 2436 ext4_fsblk_t block; 2437 ext4_fsblk_t sb_block = get_sb_block(&data); 2438 ext4_fsblk_t logical_sb_block; 2439 unsigned long offset = 0; 2440 unsigned long journal_devnum = 0; 2441 unsigned long def_mount_opts; 2442 struct inode *root; 2443 char *cp; 2444 const char *descr; 2445 int ret = -EINVAL; 2446 int blocksize; 2447 unsigned int db_count; 2448 unsigned int i; 2449 int needs_recovery, has_huge_files; 2450 __u64 blocks_count; 2451 int err; 2452 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2453 2454 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2455 if (!sbi) 2456 return -ENOMEM; 2457 2458 sbi->s_blockgroup_lock = 2459 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2460 if (!sbi->s_blockgroup_lock) { 2461 kfree(sbi); 2462 return -ENOMEM; 2463 } 2464 sb->s_fs_info = sbi; 2465 sbi->s_mount_opt = 0; 2466 sbi->s_resuid = EXT4_DEF_RESUID; 2467 sbi->s_resgid = EXT4_DEF_RESGID; 2468 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2469 sbi->s_sb_block = sb_block; 2470 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2471 sectors[1]); 2472 2473 unlock_kernel(); 2474 2475 /* Cleanup superblock name */ 2476 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2477 *cp = '!'; 2478 2479 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2480 if (!blocksize) { 2481 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2482 goto out_fail; 2483 } 2484 2485 /* 2486 * The ext4 superblock will not be buffer aligned for other than 1kB 2487 * block sizes. We need to calculate the offset from buffer start. 2488 */ 2489 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2490 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2491 offset = do_div(logical_sb_block, blocksize); 2492 } else { 2493 logical_sb_block = sb_block; 2494 } 2495 2496 if (!(bh = sb_bread(sb, logical_sb_block))) { 2497 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 2498 goto out_fail; 2499 } 2500 /* 2501 * Note: s_es must be initialized as soon as possible because 2502 * some ext4 macro-instructions depend on its value 2503 */ 2504 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2505 sbi->s_es = es; 2506 sb->s_magic = le16_to_cpu(es->s_magic); 2507 if (sb->s_magic != EXT4_SUPER_MAGIC) 2508 goto cantfind_ext4; 2509 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 2510 2511 /* Set defaults before we parse the mount options */ 2512 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2513 if (def_mount_opts & EXT4_DEFM_DEBUG) 2514 set_opt(sbi->s_mount_opt, DEBUG); 2515 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 2516 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 2517 "2.6.38"); 2518 set_opt(sbi->s_mount_opt, GRPID); 2519 } 2520 if (def_mount_opts & EXT4_DEFM_UID16) 2521 set_opt(sbi->s_mount_opt, NO_UID32); 2522 #ifdef CONFIG_EXT4_FS_XATTR 2523 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2524 set_opt(sbi->s_mount_opt, XATTR_USER); 2525 #endif 2526 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2527 if (def_mount_opts & EXT4_DEFM_ACL) 2528 set_opt(sbi->s_mount_opt, POSIX_ACL); 2529 #endif 2530 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2531 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2532 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2533 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2534 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2535 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2536 2537 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2538 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2539 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2540 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2541 else 2542 set_opt(sbi->s_mount_opt, ERRORS_RO); 2543 2544 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2545 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2546 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2547 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2548 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2549 2550 set_opt(sbi->s_mount_opt, BARRIER); 2551 2552 /* 2553 * enable delayed allocation by default 2554 * Use -o nodelalloc to turn it off 2555 */ 2556 if (!IS_EXT3_SB(sb)) 2557 set_opt(sbi->s_mount_opt, DELALLOC); 2558 2559 if (!parse_options((char *) data, sb, &journal_devnum, 2560 &journal_ioprio, NULL, 0)) 2561 goto failed_mount; 2562 2563 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2564 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2565 2566 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2567 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2568 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2569 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2570 ext4_msg(sb, KERN_WARNING, 2571 "feature flags set on rev 0 fs, " 2572 "running e2fsck is recommended"); 2573 2574 /* 2575 * Check feature flags regardless of the revision level, since we 2576 * previously didn't change the revision level when setting the flags, 2577 * so there is a chance incompat flags are set on a rev 0 filesystem. 2578 */ 2579 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 2580 goto failed_mount; 2581 2582 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2583 2584 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2585 blocksize > EXT4_MAX_BLOCK_SIZE) { 2586 ext4_msg(sb, KERN_ERR, 2587 "Unsupported filesystem blocksize %d", blocksize); 2588 goto failed_mount; 2589 } 2590 2591 if (sb->s_blocksize != blocksize) { 2592 /* Validate the filesystem blocksize */ 2593 if (!sb_set_blocksize(sb, blocksize)) { 2594 ext4_msg(sb, KERN_ERR, "bad block size %d", 2595 blocksize); 2596 goto failed_mount; 2597 } 2598 2599 brelse(bh); 2600 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2601 offset = do_div(logical_sb_block, blocksize); 2602 bh = sb_bread(sb, logical_sb_block); 2603 if (!bh) { 2604 ext4_msg(sb, KERN_ERR, 2605 "Can't read superblock on 2nd try"); 2606 goto failed_mount; 2607 } 2608 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2609 sbi->s_es = es; 2610 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2611 ext4_msg(sb, KERN_ERR, 2612 "Magic mismatch, very weird!"); 2613 goto failed_mount; 2614 } 2615 } 2616 2617 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2618 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2619 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2620 has_huge_files); 2621 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2622 2623 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2624 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2625 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2626 } else { 2627 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2628 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2629 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2630 (!is_power_of_2(sbi->s_inode_size)) || 2631 (sbi->s_inode_size > blocksize)) { 2632 ext4_msg(sb, KERN_ERR, 2633 "unsupported inode size: %d", 2634 sbi->s_inode_size); 2635 goto failed_mount; 2636 } 2637 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2638 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2639 } 2640 2641 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2642 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2643 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2644 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2645 !is_power_of_2(sbi->s_desc_size)) { 2646 ext4_msg(sb, KERN_ERR, 2647 "unsupported descriptor size %lu", 2648 sbi->s_desc_size); 2649 goto failed_mount; 2650 } 2651 } else 2652 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2653 2654 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2655 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2656 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2657 goto cantfind_ext4; 2658 2659 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2660 if (sbi->s_inodes_per_block == 0) 2661 goto cantfind_ext4; 2662 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2663 sbi->s_inodes_per_block; 2664 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2665 sbi->s_sbh = bh; 2666 sbi->s_mount_state = le16_to_cpu(es->s_state); 2667 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2668 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2669 2670 for (i = 0; i < 4; i++) 2671 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2672 sbi->s_def_hash_version = es->s_def_hash_version; 2673 i = le32_to_cpu(es->s_flags); 2674 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2675 sbi->s_hash_unsigned = 3; 2676 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2677 #ifdef __CHAR_UNSIGNED__ 2678 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2679 sbi->s_hash_unsigned = 3; 2680 #else 2681 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2682 #endif 2683 sb->s_dirt = 1; 2684 } 2685 2686 if (sbi->s_blocks_per_group > blocksize * 8) { 2687 ext4_msg(sb, KERN_ERR, 2688 "#blocks per group too big: %lu", 2689 sbi->s_blocks_per_group); 2690 goto failed_mount; 2691 } 2692 if (sbi->s_inodes_per_group > blocksize * 8) { 2693 ext4_msg(sb, KERN_ERR, 2694 "#inodes per group too big: %lu", 2695 sbi->s_inodes_per_group); 2696 goto failed_mount; 2697 } 2698 2699 /* 2700 * Test whether we have more sectors than will fit in sector_t, 2701 * and whether the max offset is addressable by the page cache. 2702 */ 2703 if ((ext4_blocks_count(es) > 2704 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || 2705 (ext4_blocks_count(es) > 2706 (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { 2707 ext4_msg(sb, KERN_ERR, "filesystem" 2708 " too large to mount safely on this system"); 2709 if (sizeof(sector_t) < 8) 2710 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 2711 ret = -EFBIG; 2712 goto failed_mount; 2713 } 2714 2715 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2716 goto cantfind_ext4; 2717 2718 /* check blocks count against device size */ 2719 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2720 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2721 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 2722 "exceeds size of device (%llu blocks)", 2723 ext4_blocks_count(es), blocks_count); 2724 goto failed_mount; 2725 } 2726 2727 /* 2728 * It makes no sense for the first data block to be beyond the end 2729 * of the filesystem. 2730 */ 2731 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2732 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 2733 "block %u is beyond end of filesystem (%llu)", 2734 le32_to_cpu(es->s_first_data_block), 2735 ext4_blocks_count(es)); 2736 goto failed_mount; 2737 } 2738 blocks_count = (ext4_blocks_count(es) - 2739 le32_to_cpu(es->s_first_data_block) + 2740 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2741 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2742 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2743 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 2744 "(block count %llu, first data block %u, " 2745 "blocks per group %lu)", sbi->s_groups_count, 2746 ext4_blocks_count(es), 2747 le32_to_cpu(es->s_first_data_block), 2748 EXT4_BLOCKS_PER_GROUP(sb)); 2749 goto failed_mount; 2750 } 2751 sbi->s_groups_count = blocks_count; 2752 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 2753 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 2754 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2755 EXT4_DESC_PER_BLOCK(sb); 2756 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2757 GFP_KERNEL); 2758 if (sbi->s_group_desc == NULL) { 2759 ext4_msg(sb, KERN_ERR, "not enough memory"); 2760 goto failed_mount; 2761 } 2762 2763 #ifdef CONFIG_PROC_FS 2764 if (ext4_proc_root) 2765 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2766 #endif 2767 2768 bgl_lock_init(sbi->s_blockgroup_lock); 2769 2770 for (i = 0; i < db_count; i++) { 2771 block = descriptor_loc(sb, logical_sb_block, i); 2772 sbi->s_group_desc[i] = sb_bread(sb, block); 2773 if (!sbi->s_group_desc[i]) { 2774 ext4_msg(sb, KERN_ERR, 2775 "can't read group descriptor %d", i); 2776 db_count = i; 2777 goto failed_mount2; 2778 } 2779 } 2780 if (!ext4_check_descriptors(sb)) { 2781 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 2782 goto failed_mount2; 2783 } 2784 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2785 if (!ext4_fill_flex_info(sb)) { 2786 ext4_msg(sb, KERN_ERR, 2787 "unable to initialize " 2788 "flex_bg meta info!"); 2789 goto failed_mount2; 2790 } 2791 2792 sbi->s_gdb_count = db_count; 2793 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2794 spin_lock_init(&sbi->s_next_gen_lock); 2795 2796 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2797 ext4_count_free_blocks(sb)); 2798 if (!err) { 2799 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2800 ext4_count_free_inodes(sb)); 2801 } 2802 if (!err) { 2803 err = percpu_counter_init(&sbi->s_dirs_counter, 2804 ext4_count_dirs(sb)); 2805 } 2806 if (!err) { 2807 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2808 } 2809 if (err) { 2810 ext4_msg(sb, KERN_ERR, "insufficient memory"); 2811 goto failed_mount3; 2812 } 2813 2814 sbi->s_stripe = ext4_get_stripe_size(sbi); 2815 sbi->s_max_writeback_mb_bump = 128; 2816 2817 /* 2818 * set up enough so that it can read an inode 2819 */ 2820 if (!test_opt(sb, NOLOAD) && 2821 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 2822 sb->s_op = &ext4_sops; 2823 else 2824 sb->s_op = &ext4_nojournal_sops; 2825 sb->s_export_op = &ext4_export_ops; 2826 sb->s_xattr = ext4_xattr_handlers; 2827 #ifdef CONFIG_QUOTA 2828 sb->s_qcop = &ext4_qctl_operations; 2829 sb->dq_op = &ext4_quota_operations; 2830 #endif 2831 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2832 mutex_init(&sbi->s_orphan_lock); 2833 mutex_init(&sbi->s_resize_lock); 2834 2835 sb->s_root = NULL; 2836 2837 needs_recovery = (es->s_last_orphan != 0 || 2838 EXT4_HAS_INCOMPAT_FEATURE(sb, 2839 EXT4_FEATURE_INCOMPAT_RECOVER)); 2840 2841 /* 2842 * The first inode we look at is the journal inode. Don't try 2843 * root first: it may be modified in the journal! 2844 */ 2845 if (!test_opt(sb, NOLOAD) && 2846 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2847 if (ext4_load_journal(sb, es, journal_devnum)) 2848 goto failed_mount3; 2849 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2850 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2851 ext4_msg(sb, KERN_ERR, "required journal recovery " 2852 "suppressed and not mounted read-only"); 2853 goto failed_mount_wq; 2854 } else { 2855 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2856 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2857 sbi->s_journal = NULL; 2858 needs_recovery = 0; 2859 goto no_journal; 2860 } 2861 2862 if (ext4_blocks_count(es) > 0xffffffffULL && 2863 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2864 JBD2_FEATURE_INCOMPAT_64BIT)) { 2865 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2866 goto failed_mount_wq; 2867 } 2868 2869 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2870 jbd2_journal_set_features(sbi->s_journal, 2871 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2872 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2873 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2874 jbd2_journal_set_features(sbi->s_journal, 2875 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2876 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2877 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2878 } else { 2879 jbd2_journal_clear_features(sbi->s_journal, 2880 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2881 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2882 } 2883 2884 /* We have now updated the journal if required, so we can 2885 * validate the data journaling mode. */ 2886 switch (test_opt(sb, DATA_FLAGS)) { 2887 case 0: 2888 /* No mode set, assume a default based on the journal 2889 * capabilities: ORDERED_DATA if the journal can 2890 * cope, else JOURNAL_DATA 2891 */ 2892 if (jbd2_journal_check_available_features 2893 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2894 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2895 else 2896 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2897 break; 2898 2899 case EXT4_MOUNT_ORDERED_DATA: 2900 case EXT4_MOUNT_WRITEBACK_DATA: 2901 if (!jbd2_journal_check_available_features 2902 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2903 ext4_msg(sb, KERN_ERR, "Journal does not support " 2904 "requested data journaling mode"); 2905 goto failed_mount_wq; 2906 } 2907 default: 2908 break; 2909 } 2910 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2911 2912 no_journal: 2913 if (test_opt(sb, NOBH)) { 2914 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2915 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2916 "its supported only with writeback mode"); 2917 clear_opt(sbi->s_mount_opt, NOBH); 2918 } 2919 if (test_opt(sb, DIOREAD_NOLOCK)) { 2920 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " 2921 "not supported with nobh mode"); 2922 goto failed_mount_wq; 2923 } 2924 } 2925 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2926 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2927 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 2928 goto failed_mount_wq; 2929 } 2930 2931 /* 2932 * The jbd2_journal_load will have done any necessary log recovery, 2933 * so we can safely mount the rest of the filesystem now. 2934 */ 2935 2936 root = ext4_iget(sb, EXT4_ROOT_INO); 2937 if (IS_ERR(root)) { 2938 ext4_msg(sb, KERN_ERR, "get root inode failed"); 2939 ret = PTR_ERR(root); 2940 goto failed_mount4; 2941 } 2942 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2943 iput(root); 2944 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 2945 goto failed_mount4; 2946 } 2947 sb->s_root = d_alloc_root(root); 2948 if (!sb->s_root) { 2949 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 2950 iput(root); 2951 ret = -ENOMEM; 2952 goto failed_mount4; 2953 } 2954 2955 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2956 2957 /* determine the minimum size of new large inodes, if present */ 2958 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2959 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2960 EXT4_GOOD_OLD_INODE_SIZE; 2961 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2962 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2963 if (sbi->s_want_extra_isize < 2964 le16_to_cpu(es->s_want_extra_isize)) 2965 sbi->s_want_extra_isize = 2966 le16_to_cpu(es->s_want_extra_isize); 2967 if (sbi->s_want_extra_isize < 2968 le16_to_cpu(es->s_min_extra_isize)) 2969 sbi->s_want_extra_isize = 2970 le16_to_cpu(es->s_min_extra_isize); 2971 } 2972 } 2973 /* Check if enough inode space is available */ 2974 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2975 sbi->s_inode_size) { 2976 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2977 EXT4_GOOD_OLD_INODE_SIZE; 2978 ext4_msg(sb, KERN_INFO, "required extra inode space not" 2979 "available"); 2980 } 2981 2982 if (test_opt(sb, DELALLOC) && 2983 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 2984 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 2985 "requested data journaling mode"); 2986 clear_opt(sbi->s_mount_opt, DELALLOC); 2987 } 2988 if (test_opt(sb, DIOREAD_NOLOCK)) { 2989 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2990 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2991 "option - requested data journaling mode"); 2992 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 2993 } 2994 if (sb->s_blocksize < PAGE_SIZE) { 2995 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2996 "option - block size is too small"); 2997 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 2998 } 2999 } 3000 3001 err = ext4_setup_system_zone(sb); 3002 if (err) { 3003 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3004 "zone (%d)\n", err); 3005 goto failed_mount4; 3006 } 3007 3008 ext4_ext_init(sb); 3009 err = ext4_mb_init(sb, needs_recovery); 3010 if (err) { 3011 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 3012 err); 3013 goto failed_mount4; 3014 } 3015 3016 sbi->s_kobj.kset = ext4_kset; 3017 init_completion(&sbi->s_kobj_unregister); 3018 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3019 "%s", sb->s_id); 3020 if (err) { 3021 ext4_mb_release(sb); 3022 ext4_ext_release(sb); 3023 goto failed_mount4; 3024 }; 3025 3026 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3027 ext4_orphan_cleanup(sb, es); 3028 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 3029 if (needs_recovery) { 3030 ext4_msg(sb, KERN_INFO, "recovery complete"); 3031 ext4_mark_recovery_complete(sb, es); 3032 } 3033 if (EXT4_SB(sb)->s_journal) { 3034 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 3035 descr = " journalled data mode"; 3036 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 3037 descr = " ordered data mode"; 3038 else 3039 descr = " writeback data mode"; 3040 } else 3041 descr = "out journal"; 3042 3043 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); 3044 3045 lock_kernel(); 3046 return 0; 3047 3048 cantfind_ext4: 3049 if (!silent) 3050 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3051 goto failed_mount; 3052 3053 failed_mount4: 3054 ext4_msg(sb, KERN_ERR, "mount failed"); 3055 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3056 failed_mount_wq: 3057 ext4_release_system_zone(sb); 3058 if (sbi->s_journal) { 3059 jbd2_journal_destroy(sbi->s_journal); 3060 sbi->s_journal = NULL; 3061 } 3062 failed_mount3: 3063 if (sbi->s_flex_groups) { 3064 if (is_vmalloc_addr(sbi->s_flex_groups)) 3065 vfree(sbi->s_flex_groups); 3066 else 3067 kfree(sbi->s_flex_groups); 3068 } 3069 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3070 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3071 percpu_counter_destroy(&sbi->s_dirs_counter); 3072 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3073 failed_mount2: 3074 for (i = 0; i < db_count; i++) 3075 brelse(sbi->s_group_desc[i]); 3076 kfree(sbi->s_group_desc); 3077 failed_mount: 3078 if (sbi->s_proc) { 3079 remove_proc_entry(sb->s_id, ext4_proc_root); 3080 } 3081 #ifdef CONFIG_QUOTA 3082 for (i = 0; i < MAXQUOTAS; i++) 3083 kfree(sbi->s_qf_names[i]); 3084 #endif 3085 ext4_blkdev_remove(sbi); 3086 brelse(bh); 3087 out_fail: 3088 sb->s_fs_info = NULL; 3089 kfree(sbi->s_blockgroup_lock); 3090 kfree(sbi); 3091 lock_kernel(); 3092 return ret; 3093 } 3094 3095 /* 3096 * Setup any per-fs journal parameters now. We'll do this both on 3097 * initial mount, once the journal has been initialised but before we've 3098 * done any recovery; and again on any subsequent remount. 3099 */ 3100 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3101 { 3102 struct ext4_sb_info *sbi = EXT4_SB(sb); 3103 3104 journal->j_commit_interval = sbi->s_commit_interval; 3105 journal->j_min_batch_time = sbi->s_min_batch_time; 3106 journal->j_max_batch_time = sbi->s_max_batch_time; 3107 3108 spin_lock(&journal->j_state_lock); 3109 if (test_opt(sb, BARRIER)) 3110 journal->j_flags |= JBD2_BARRIER; 3111 else 3112 journal->j_flags &= ~JBD2_BARRIER; 3113 if (test_opt(sb, DATA_ERR_ABORT)) 3114 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3115 else 3116 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3117 spin_unlock(&journal->j_state_lock); 3118 } 3119 3120 static journal_t *ext4_get_journal(struct super_block *sb, 3121 unsigned int journal_inum) 3122 { 3123 struct inode *journal_inode; 3124 journal_t *journal; 3125 3126 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3127 3128 /* First, test for the existence of a valid inode on disk. Bad 3129 * things happen if we iget() an unused inode, as the subsequent 3130 * iput() will try to delete it. */ 3131 3132 journal_inode = ext4_iget(sb, journal_inum); 3133 if (IS_ERR(journal_inode)) { 3134 ext4_msg(sb, KERN_ERR, "no journal found"); 3135 return NULL; 3136 } 3137 if (!journal_inode->i_nlink) { 3138 make_bad_inode(journal_inode); 3139 iput(journal_inode); 3140 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3141 return NULL; 3142 } 3143 3144 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3145 journal_inode, journal_inode->i_size); 3146 if (!S_ISREG(journal_inode->i_mode)) { 3147 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3148 iput(journal_inode); 3149 return NULL; 3150 } 3151 3152 journal = jbd2_journal_init_inode(journal_inode); 3153 if (!journal) { 3154 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3155 iput(journal_inode); 3156 return NULL; 3157 } 3158 journal->j_private = sb; 3159 ext4_init_journal_params(sb, journal); 3160 return journal; 3161 } 3162 3163 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3164 dev_t j_dev) 3165 { 3166 struct buffer_head *bh; 3167 journal_t *journal; 3168 ext4_fsblk_t start; 3169 ext4_fsblk_t len; 3170 int hblock, blocksize; 3171 ext4_fsblk_t sb_block; 3172 unsigned long offset; 3173 struct ext4_super_block *es; 3174 struct block_device *bdev; 3175 3176 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3177 3178 bdev = ext4_blkdev_get(j_dev, sb); 3179 if (bdev == NULL) 3180 return NULL; 3181 3182 if (bd_claim(bdev, sb)) { 3183 ext4_msg(sb, KERN_ERR, 3184 "failed to claim external journal device"); 3185 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3186 return NULL; 3187 } 3188 3189 blocksize = sb->s_blocksize; 3190 hblock = bdev_logical_block_size(bdev); 3191 if (blocksize < hblock) { 3192 ext4_msg(sb, KERN_ERR, 3193 "blocksize too small for journal device"); 3194 goto out_bdev; 3195 } 3196 3197 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3198 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3199 set_blocksize(bdev, blocksize); 3200 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3201 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3202 "external journal"); 3203 goto out_bdev; 3204 } 3205 3206 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3207 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3208 !(le32_to_cpu(es->s_feature_incompat) & 3209 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3210 ext4_msg(sb, KERN_ERR, "external journal has " 3211 "bad superblock"); 3212 brelse(bh); 3213 goto out_bdev; 3214 } 3215 3216 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3217 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3218 brelse(bh); 3219 goto out_bdev; 3220 } 3221 3222 len = ext4_blocks_count(es); 3223 start = sb_block + 1; 3224 brelse(bh); /* we're done with the superblock */ 3225 3226 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3227 start, len, blocksize); 3228 if (!journal) { 3229 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3230 goto out_bdev; 3231 } 3232 journal->j_private = sb; 3233 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3234 wait_on_buffer(journal->j_sb_buffer); 3235 if (!buffer_uptodate(journal->j_sb_buffer)) { 3236 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3237 goto out_journal; 3238 } 3239 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3240 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3241 "user (unsupported) - %d", 3242 be32_to_cpu(journal->j_superblock->s_nr_users)); 3243 goto out_journal; 3244 } 3245 EXT4_SB(sb)->journal_bdev = bdev; 3246 ext4_init_journal_params(sb, journal); 3247 return journal; 3248 3249 out_journal: 3250 jbd2_journal_destroy(journal); 3251 out_bdev: 3252 ext4_blkdev_put(bdev); 3253 return NULL; 3254 } 3255 3256 static int ext4_load_journal(struct super_block *sb, 3257 struct ext4_super_block *es, 3258 unsigned long journal_devnum) 3259 { 3260 journal_t *journal; 3261 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3262 dev_t journal_dev; 3263 int err = 0; 3264 int really_read_only; 3265 3266 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3267 3268 if (journal_devnum && 3269 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3270 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3271 "numbers have changed"); 3272 journal_dev = new_decode_dev(journal_devnum); 3273 } else 3274 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3275 3276 really_read_only = bdev_read_only(sb->s_bdev); 3277 3278 /* 3279 * Are we loading a blank journal or performing recovery after a 3280 * crash? For recovery, we need to check in advance whether we 3281 * can get read-write access to the device. 3282 */ 3283 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3284 if (sb->s_flags & MS_RDONLY) { 3285 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3286 "required on readonly filesystem"); 3287 if (really_read_only) { 3288 ext4_msg(sb, KERN_ERR, "write access " 3289 "unavailable, cannot proceed"); 3290 return -EROFS; 3291 } 3292 ext4_msg(sb, KERN_INFO, "write access will " 3293 "be enabled during recovery"); 3294 } 3295 } 3296 3297 if (journal_inum && journal_dev) { 3298 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3299 "and inode journals!"); 3300 return -EINVAL; 3301 } 3302 3303 if (journal_inum) { 3304 if (!(journal = ext4_get_journal(sb, journal_inum))) 3305 return -EINVAL; 3306 } else { 3307 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3308 return -EINVAL; 3309 } 3310 3311 if (!(journal->j_flags & JBD2_BARRIER)) 3312 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3313 3314 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3315 err = jbd2_journal_update_format(journal); 3316 if (err) { 3317 ext4_msg(sb, KERN_ERR, "error updating journal"); 3318 jbd2_journal_destroy(journal); 3319 return err; 3320 } 3321 } 3322 3323 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3324 err = jbd2_journal_wipe(journal, !really_read_only); 3325 if (!err) 3326 err = jbd2_journal_load(journal); 3327 3328 if (err) { 3329 ext4_msg(sb, KERN_ERR, "error loading journal"); 3330 jbd2_journal_destroy(journal); 3331 return err; 3332 } 3333 3334 EXT4_SB(sb)->s_journal = journal; 3335 ext4_clear_journal_err(sb, es); 3336 3337 if (journal_devnum && 3338 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3339 es->s_journal_dev = cpu_to_le32(journal_devnum); 3340 3341 /* Make sure we flush the recovery flag to disk. */ 3342 ext4_commit_super(sb, 1); 3343 } 3344 3345 return 0; 3346 } 3347 3348 static int ext4_commit_super(struct super_block *sb, int sync) 3349 { 3350 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3351 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3352 int error = 0; 3353 3354 if (!sbh) 3355 return error; 3356 if (buffer_write_io_error(sbh)) { 3357 /* 3358 * Oh, dear. A previous attempt to write the 3359 * superblock failed. This could happen because the 3360 * USB device was yanked out. Or it could happen to 3361 * be a transient write error and maybe the block will 3362 * be remapped. Nothing we can do but to retry the 3363 * write and hope for the best. 3364 */ 3365 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3366 "superblock detected"); 3367 clear_buffer_write_io_error(sbh); 3368 set_buffer_uptodate(sbh); 3369 } 3370 /* 3371 * If the file system is mounted read-only, don't update the 3372 * superblock write time. This avoids updating the superblock 3373 * write time when we are mounting the root file system 3374 * read/only but we need to replay the journal; at that point, 3375 * for people who are east of GMT and who make their clock 3376 * tick in localtime for Windows bug-for-bug compatibility, 3377 * the clock is set in the future, and this will cause e2fsck 3378 * to complain and force a full file system check. 3379 */ 3380 if (!(sb->s_flags & MS_RDONLY)) 3381 es->s_wtime = cpu_to_le32(get_seconds()); 3382 es->s_kbytes_written = 3383 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3384 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3385 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3386 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3387 &EXT4_SB(sb)->s_freeblocks_counter)); 3388 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3389 &EXT4_SB(sb)->s_freeinodes_counter)); 3390 sb->s_dirt = 0; 3391 BUFFER_TRACE(sbh, "marking dirty"); 3392 mark_buffer_dirty(sbh); 3393 if (sync) { 3394 error = sync_dirty_buffer(sbh); 3395 if (error) 3396 return error; 3397 3398 error = buffer_write_io_error(sbh); 3399 if (error) { 3400 ext4_msg(sb, KERN_ERR, "I/O error while writing " 3401 "superblock"); 3402 clear_buffer_write_io_error(sbh); 3403 set_buffer_uptodate(sbh); 3404 } 3405 } 3406 return error; 3407 } 3408 3409 /* 3410 * Have we just finished recovery? If so, and if we are mounting (or 3411 * remounting) the filesystem readonly, then we will end up with a 3412 * consistent fs on disk. Record that fact. 3413 */ 3414 static void ext4_mark_recovery_complete(struct super_block *sb, 3415 struct ext4_super_block *es) 3416 { 3417 journal_t *journal = EXT4_SB(sb)->s_journal; 3418 3419 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3420 BUG_ON(journal != NULL); 3421 return; 3422 } 3423 jbd2_journal_lock_updates(journal); 3424 if (jbd2_journal_flush(journal) < 0) 3425 goto out; 3426 3427 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3428 sb->s_flags & MS_RDONLY) { 3429 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3430 ext4_commit_super(sb, 1); 3431 } 3432 3433 out: 3434 jbd2_journal_unlock_updates(journal); 3435 } 3436 3437 /* 3438 * If we are mounting (or read-write remounting) a filesystem whose journal 3439 * has recorded an error from a previous lifetime, move that error to the 3440 * main filesystem now. 3441 */ 3442 static void ext4_clear_journal_err(struct super_block *sb, 3443 struct ext4_super_block *es) 3444 { 3445 journal_t *journal; 3446 int j_errno; 3447 const char *errstr; 3448 3449 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3450 3451 journal = EXT4_SB(sb)->s_journal; 3452 3453 /* 3454 * Now check for any error status which may have been recorded in the 3455 * journal by a prior ext4_error() or ext4_abort() 3456 */ 3457 3458 j_errno = jbd2_journal_errno(journal); 3459 if (j_errno) { 3460 char nbuf[16]; 3461 3462 errstr = ext4_decode_error(sb, j_errno, nbuf); 3463 ext4_warning(sb, "Filesystem error recorded " 3464 "from previous mount: %s", errstr); 3465 ext4_warning(sb, "Marking fs in need of filesystem check."); 3466 3467 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3468 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3469 ext4_commit_super(sb, 1); 3470 3471 jbd2_journal_clear_err(journal); 3472 } 3473 } 3474 3475 /* 3476 * Force the running and committing transactions to commit, 3477 * and wait on the commit. 3478 */ 3479 int ext4_force_commit(struct super_block *sb) 3480 { 3481 journal_t *journal; 3482 int ret = 0; 3483 3484 if (sb->s_flags & MS_RDONLY) 3485 return 0; 3486 3487 journal = EXT4_SB(sb)->s_journal; 3488 if (journal) 3489 ret = ext4_journal_force_commit(journal); 3490 3491 return ret; 3492 } 3493 3494 static void ext4_write_super(struct super_block *sb) 3495 { 3496 lock_super(sb); 3497 ext4_commit_super(sb, 1); 3498 unlock_super(sb); 3499 } 3500 3501 static int ext4_sync_fs(struct super_block *sb, int wait) 3502 { 3503 int ret = 0; 3504 tid_t target; 3505 struct ext4_sb_info *sbi = EXT4_SB(sb); 3506 3507 trace_ext4_sync_fs(sb, wait); 3508 flush_workqueue(sbi->dio_unwritten_wq); 3509 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 3510 if (wait) 3511 jbd2_log_wait_commit(sbi->s_journal, target); 3512 } 3513 return ret; 3514 } 3515 3516 /* 3517 * LVM calls this function before a (read-only) snapshot is created. This 3518 * gives us a chance to flush the journal completely and mark the fs clean. 3519 */ 3520 static int ext4_freeze(struct super_block *sb) 3521 { 3522 int error = 0; 3523 journal_t *journal; 3524 3525 if (sb->s_flags & MS_RDONLY) 3526 return 0; 3527 3528 journal = EXT4_SB(sb)->s_journal; 3529 3530 /* Now we set up the journal barrier. */ 3531 jbd2_journal_lock_updates(journal); 3532 3533 /* 3534 * Don't clear the needs_recovery flag if we failed to flush 3535 * the journal. 3536 */ 3537 error = jbd2_journal_flush(journal); 3538 if (error < 0) { 3539 out: 3540 jbd2_journal_unlock_updates(journal); 3541 return error; 3542 } 3543 3544 /* Journal blocked and flushed, clear needs_recovery flag. */ 3545 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3546 error = ext4_commit_super(sb, 1); 3547 if (error) 3548 goto out; 3549 return 0; 3550 } 3551 3552 /* 3553 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3554 * flag here, even though the filesystem is not technically dirty yet. 3555 */ 3556 static int ext4_unfreeze(struct super_block *sb) 3557 { 3558 if (sb->s_flags & MS_RDONLY) 3559 return 0; 3560 3561 lock_super(sb); 3562 /* Reset the needs_recovery flag before the fs is unlocked. */ 3563 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3564 ext4_commit_super(sb, 1); 3565 unlock_super(sb); 3566 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3567 return 0; 3568 } 3569 3570 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3571 { 3572 struct ext4_super_block *es; 3573 struct ext4_sb_info *sbi = EXT4_SB(sb); 3574 ext4_fsblk_t n_blocks_count = 0; 3575 unsigned long old_sb_flags; 3576 struct ext4_mount_options old_opts; 3577 ext4_group_t g; 3578 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3579 int err; 3580 #ifdef CONFIG_QUOTA 3581 int i; 3582 #endif 3583 3584 lock_kernel(); 3585 3586 /* Store the original options */ 3587 lock_super(sb); 3588 old_sb_flags = sb->s_flags; 3589 old_opts.s_mount_opt = sbi->s_mount_opt; 3590 old_opts.s_resuid = sbi->s_resuid; 3591 old_opts.s_resgid = sbi->s_resgid; 3592 old_opts.s_commit_interval = sbi->s_commit_interval; 3593 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3594 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3595 #ifdef CONFIG_QUOTA 3596 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3597 for (i = 0; i < MAXQUOTAS; i++) 3598 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3599 #endif 3600 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3601 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3602 3603 /* 3604 * Allow the "check" option to be passed as a remount option. 3605 */ 3606 if (!parse_options(data, sb, NULL, &journal_ioprio, 3607 &n_blocks_count, 1)) { 3608 err = -EINVAL; 3609 goto restore_opts; 3610 } 3611 3612 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 3613 ext4_abort(sb, __func__, "Abort forced by user"); 3614 3615 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3616 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3617 3618 es = sbi->s_es; 3619 3620 if (sbi->s_journal) { 3621 ext4_init_journal_params(sb, sbi->s_journal); 3622 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3623 } 3624 3625 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3626 n_blocks_count > ext4_blocks_count(es)) { 3627 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 3628 err = -EROFS; 3629 goto restore_opts; 3630 } 3631 3632 if (*flags & MS_RDONLY) { 3633 /* 3634 * First of all, the unconditional stuff we have to do 3635 * to disable replay of the journal when we next remount 3636 */ 3637 sb->s_flags |= MS_RDONLY; 3638 3639 /* 3640 * OK, test if we are remounting a valid rw partition 3641 * readonly, and if so set the rdonly flag and then 3642 * mark the partition as valid again. 3643 */ 3644 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3645 (sbi->s_mount_state & EXT4_VALID_FS)) 3646 es->s_state = cpu_to_le16(sbi->s_mount_state); 3647 3648 if (sbi->s_journal) 3649 ext4_mark_recovery_complete(sb, es); 3650 } else { 3651 /* Make sure we can mount this feature set readwrite */ 3652 if (!ext4_feature_set_ok(sb, 0)) { 3653 err = -EROFS; 3654 goto restore_opts; 3655 } 3656 /* 3657 * Make sure the group descriptor checksums 3658 * are sane. If they aren't, refuse to remount r/w. 3659 */ 3660 for (g = 0; g < sbi->s_groups_count; g++) { 3661 struct ext4_group_desc *gdp = 3662 ext4_get_group_desc(sb, g, NULL); 3663 3664 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3665 ext4_msg(sb, KERN_ERR, 3666 "ext4_remount: Checksum for group %u failed (%u!=%u)", 3667 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3668 le16_to_cpu(gdp->bg_checksum)); 3669 err = -EINVAL; 3670 goto restore_opts; 3671 } 3672 } 3673 3674 /* 3675 * If we have an unprocessed orphan list hanging 3676 * around from a previously readonly bdev mount, 3677 * require a full umount/remount for now. 3678 */ 3679 if (es->s_last_orphan) { 3680 ext4_msg(sb, KERN_WARNING, "Couldn't " 3681 "remount RDWR because of unprocessed " 3682 "orphan inode list. Please " 3683 "umount/remount instead"); 3684 err = -EINVAL; 3685 goto restore_opts; 3686 } 3687 3688 /* 3689 * Mounting a RDONLY partition read-write, so reread 3690 * and store the current valid flag. (It may have 3691 * been changed by e2fsck since we originally mounted 3692 * the partition.) 3693 */ 3694 if (sbi->s_journal) 3695 ext4_clear_journal_err(sb, es); 3696 sbi->s_mount_state = le16_to_cpu(es->s_state); 3697 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3698 goto restore_opts; 3699 if (!ext4_setup_super(sb, es, 0)) 3700 sb->s_flags &= ~MS_RDONLY; 3701 } 3702 } 3703 ext4_setup_system_zone(sb); 3704 if (sbi->s_journal == NULL) 3705 ext4_commit_super(sb, 1); 3706 3707 #ifdef CONFIG_QUOTA 3708 /* Release old quota file names */ 3709 for (i = 0; i < MAXQUOTAS; i++) 3710 if (old_opts.s_qf_names[i] && 3711 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3712 kfree(old_opts.s_qf_names[i]); 3713 #endif 3714 unlock_super(sb); 3715 unlock_kernel(); 3716 return 0; 3717 3718 restore_opts: 3719 sb->s_flags = old_sb_flags; 3720 sbi->s_mount_opt = old_opts.s_mount_opt; 3721 sbi->s_resuid = old_opts.s_resuid; 3722 sbi->s_resgid = old_opts.s_resgid; 3723 sbi->s_commit_interval = old_opts.s_commit_interval; 3724 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3725 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3726 #ifdef CONFIG_QUOTA 3727 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3728 for (i = 0; i < MAXQUOTAS; i++) { 3729 if (sbi->s_qf_names[i] && 3730 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3731 kfree(sbi->s_qf_names[i]); 3732 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3733 } 3734 #endif 3735 unlock_super(sb); 3736 unlock_kernel(); 3737 return err; 3738 } 3739 3740 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3741 { 3742 struct super_block *sb = dentry->d_sb; 3743 struct ext4_sb_info *sbi = EXT4_SB(sb); 3744 struct ext4_super_block *es = sbi->s_es; 3745 u64 fsid; 3746 3747 if (test_opt(sb, MINIX_DF)) { 3748 sbi->s_overhead_last = 0; 3749 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3750 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3751 ext4_fsblk_t overhead = 0; 3752 3753 /* 3754 * Compute the overhead (FS structures). This is constant 3755 * for a given filesystem unless the number of block groups 3756 * changes so we cache the previous value until it does. 3757 */ 3758 3759 /* 3760 * All of the blocks before first_data_block are 3761 * overhead 3762 */ 3763 overhead = le32_to_cpu(es->s_first_data_block); 3764 3765 /* 3766 * Add the overhead attributed to the superblock and 3767 * block group descriptors. If the sparse superblocks 3768 * feature is turned on, then not all groups have this. 3769 */ 3770 for (i = 0; i < ngroups; i++) { 3771 overhead += ext4_bg_has_super(sb, i) + 3772 ext4_bg_num_gdb(sb, i); 3773 cond_resched(); 3774 } 3775 3776 /* 3777 * Every block group has an inode bitmap, a block 3778 * bitmap, and an inode table. 3779 */ 3780 overhead += ngroups * (2 + sbi->s_itb_per_group); 3781 sbi->s_overhead_last = overhead; 3782 smp_wmb(); 3783 sbi->s_blocks_last = ext4_blocks_count(es); 3784 } 3785 3786 buf->f_type = EXT4_SUPER_MAGIC; 3787 buf->f_bsize = sb->s_blocksize; 3788 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3789 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3790 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3791 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3792 if (buf->f_bfree < ext4_r_blocks_count(es)) 3793 buf->f_bavail = 0; 3794 buf->f_files = le32_to_cpu(es->s_inodes_count); 3795 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3796 buf->f_namelen = EXT4_NAME_LEN; 3797 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3798 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3799 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3800 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3801 3802 return 0; 3803 } 3804 3805 /* Helper function for writing quotas on sync - we need to start transaction 3806 * before quota file is locked for write. Otherwise the are possible deadlocks: 3807 * Process 1 Process 2 3808 * ext4_create() quota_sync() 3809 * jbd2_journal_start() write_dquot() 3810 * dquot_initialize() down(dqio_mutex) 3811 * down(dqio_mutex) jbd2_journal_start() 3812 * 3813 */ 3814 3815 #ifdef CONFIG_QUOTA 3816 3817 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3818 { 3819 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3820 } 3821 3822 static int ext4_write_dquot(struct dquot *dquot) 3823 { 3824 int ret, err; 3825 handle_t *handle; 3826 struct inode *inode; 3827 3828 inode = dquot_to_inode(dquot); 3829 handle = ext4_journal_start(inode, 3830 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3831 if (IS_ERR(handle)) 3832 return PTR_ERR(handle); 3833 ret = dquot_commit(dquot); 3834 err = ext4_journal_stop(handle); 3835 if (!ret) 3836 ret = err; 3837 return ret; 3838 } 3839 3840 static int ext4_acquire_dquot(struct dquot *dquot) 3841 { 3842 int ret, err; 3843 handle_t *handle; 3844 3845 handle = ext4_journal_start(dquot_to_inode(dquot), 3846 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3847 if (IS_ERR(handle)) 3848 return PTR_ERR(handle); 3849 ret = dquot_acquire(dquot); 3850 err = ext4_journal_stop(handle); 3851 if (!ret) 3852 ret = err; 3853 return ret; 3854 } 3855 3856 static int ext4_release_dquot(struct dquot *dquot) 3857 { 3858 int ret, err; 3859 handle_t *handle; 3860 3861 handle = ext4_journal_start(dquot_to_inode(dquot), 3862 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3863 if (IS_ERR(handle)) { 3864 /* Release dquot anyway to avoid endless cycle in dqput() */ 3865 dquot_release(dquot); 3866 return PTR_ERR(handle); 3867 } 3868 ret = dquot_release(dquot); 3869 err = ext4_journal_stop(handle); 3870 if (!ret) 3871 ret = err; 3872 return ret; 3873 } 3874 3875 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3876 { 3877 /* Are we journaling quotas? */ 3878 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3879 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3880 dquot_mark_dquot_dirty(dquot); 3881 return ext4_write_dquot(dquot); 3882 } else { 3883 return dquot_mark_dquot_dirty(dquot); 3884 } 3885 } 3886 3887 static int ext4_write_info(struct super_block *sb, int type) 3888 { 3889 int ret, err; 3890 handle_t *handle; 3891 3892 /* Data block + inode block */ 3893 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3894 if (IS_ERR(handle)) 3895 return PTR_ERR(handle); 3896 ret = dquot_commit_info(sb, type); 3897 err = ext4_journal_stop(handle); 3898 if (!ret) 3899 ret = err; 3900 return ret; 3901 } 3902 3903 /* 3904 * Turn on quotas during mount time - we need to find 3905 * the quota file and such... 3906 */ 3907 static int ext4_quota_on_mount(struct super_block *sb, int type) 3908 { 3909 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3910 EXT4_SB(sb)->s_jquota_fmt, type); 3911 } 3912 3913 /* 3914 * Standard function to be called on quota_on 3915 */ 3916 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3917 char *name, int remount) 3918 { 3919 int err; 3920 struct path path; 3921 3922 if (!test_opt(sb, QUOTA)) 3923 return -EINVAL; 3924 /* When remounting, no checks are needed and in fact, name is NULL */ 3925 if (remount) 3926 return vfs_quota_on(sb, type, format_id, name, remount); 3927 3928 err = kern_path(name, LOOKUP_FOLLOW, &path); 3929 if (err) 3930 return err; 3931 3932 /* Quotafile not on the same filesystem? */ 3933 if (path.mnt->mnt_sb != sb) { 3934 path_put(&path); 3935 return -EXDEV; 3936 } 3937 /* Journaling quota? */ 3938 if (EXT4_SB(sb)->s_qf_names[type]) { 3939 /* Quotafile not in fs root? */ 3940 if (path.dentry->d_parent != sb->s_root) 3941 ext4_msg(sb, KERN_WARNING, 3942 "Quota file not on filesystem root. " 3943 "Journaled quota will not work"); 3944 } 3945 3946 /* 3947 * When we journal data on quota file, we have to flush journal to see 3948 * all updates to the file when we bypass pagecache... 3949 */ 3950 if (EXT4_SB(sb)->s_journal && 3951 ext4_should_journal_data(path.dentry->d_inode)) { 3952 /* 3953 * We don't need to lock updates but journal_flush() could 3954 * otherwise be livelocked... 3955 */ 3956 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3957 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3958 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3959 if (err) { 3960 path_put(&path); 3961 return err; 3962 } 3963 } 3964 3965 err = vfs_quota_on_path(sb, type, format_id, &path); 3966 path_put(&path); 3967 return err; 3968 } 3969 3970 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3971 * acquiring the locks... As quota files are never truncated and quota code 3972 * itself serializes the operations (and noone else should touch the files) 3973 * we don't have to be afraid of races */ 3974 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3975 size_t len, loff_t off) 3976 { 3977 struct inode *inode = sb_dqopt(sb)->files[type]; 3978 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3979 int err = 0; 3980 int offset = off & (sb->s_blocksize - 1); 3981 int tocopy; 3982 size_t toread; 3983 struct buffer_head *bh; 3984 loff_t i_size = i_size_read(inode); 3985 3986 if (off > i_size) 3987 return 0; 3988 if (off+len > i_size) 3989 len = i_size-off; 3990 toread = len; 3991 while (toread > 0) { 3992 tocopy = sb->s_blocksize - offset < toread ? 3993 sb->s_blocksize - offset : toread; 3994 bh = ext4_bread(NULL, inode, blk, 0, &err); 3995 if (err) 3996 return err; 3997 if (!bh) /* A hole? */ 3998 memset(data, 0, tocopy); 3999 else 4000 memcpy(data, bh->b_data+offset, tocopy); 4001 brelse(bh); 4002 offset = 0; 4003 toread -= tocopy; 4004 data += tocopy; 4005 blk++; 4006 } 4007 return len; 4008 } 4009 4010 /* Write to quotafile (we know the transaction is already started and has 4011 * enough credits) */ 4012 static ssize_t ext4_quota_write(struct super_block *sb, int type, 4013 const char *data, size_t len, loff_t off) 4014 { 4015 struct inode *inode = sb_dqopt(sb)->files[type]; 4016 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4017 int err = 0; 4018 int offset = off & (sb->s_blocksize - 1); 4019 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 4020 struct buffer_head *bh; 4021 handle_t *handle = journal_current_handle(); 4022 4023 if (EXT4_SB(sb)->s_journal && !handle) { 4024 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4025 " cancelled because transaction is not started", 4026 (unsigned long long)off, (unsigned long long)len); 4027 return -EIO; 4028 } 4029 /* 4030 * Since we account only one data block in transaction credits, 4031 * then it is impossible to cross a block boundary. 4032 */ 4033 if (sb->s_blocksize - offset < len) { 4034 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4035 " cancelled because not block aligned", 4036 (unsigned long long)off, (unsigned long long)len); 4037 return -EIO; 4038 } 4039 4040 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4041 bh = ext4_bread(handle, inode, blk, 1, &err); 4042 if (!bh) 4043 goto out; 4044 if (journal_quota) { 4045 err = ext4_journal_get_write_access(handle, bh); 4046 if (err) { 4047 brelse(bh); 4048 goto out; 4049 } 4050 } 4051 lock_buffer(bh); 4052 memcpy(bh->b_data+offset, data, len); 4053 flush_dcache_page(bh->b_page); 4054 unlock_buffer(bh); 4055 if (journal_quota) 4056 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4057 else { 4058 /* Always do at least ordered writes for quotas */ 4059 err = ext4_jbd2_file_inode(handle, inode); 4060 mark_buffer_dirty(bh); 4061 } 4062 brelse(bh); 4063 out: 4064 if (err) { 4065 mutex_unlock(&inode->i_mutex); 4066 return err; 4067 } 4068 if (inode->i_size < off + len) { 4069 i_size_write(inode, off + len); 4070 EXT4_I(inode)->i_disksize = inode->i_size; 4071 } 4072 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4073 ext4_mark_inode_dirty(handle, inode); 4074 mutex_unlock(&inode->i_mutex); 4075 return len; 4076 } 4077 4078 #endif 4079 4080 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4081 const char *dev_name, void *data, struct vfsmount *mnt) 4082 { 4083 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4084 } 4085 4086 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4087 static struct file_system_type ext2_fs_type = { 4088 .owner = THIS_MODULE, 4089 .name = "ext2", 4090 .get_sb = ext4_get_sb, 4091 .kill_sb = kill_block_super, 4092 .fs_flags = FS_REQUIRES_DEV, 4093 }; 4094 4095 static inline void register_as_ext2(void) 4096 { 4097 int err = register_filesystem(&ext2_fs_type); 4098 if (err) 4099 printk(KERN_WARNING 4100 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4101 } 4102 4103 static inline void unregister_as_ext2(void) 4104 { 4105 unregister_filesystem(&ext2_fs_type); 4106 } 4107 MODULE_ALIAS("ext2"); 4108 #else 4109 static inline void register_as_ext2(void) { } 4110 static inline void unregister_as_ext2(void) { } 4111 #endif 4112 4113 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4114 static inline void register_as_ext3(void) 4115 { 4116 int err = register_filesystem(&ext3_fs_type); 4117 if (err) 4118 printk(KERN_WARNING 4119 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4120 } 4121 4122 static inline void unregister_as_ext3(void) 4123 { 4124 unregister_filesystem(&ext3_fs_type); 4125 } 4126 MODULE_ALIAS("ext3"); 4127 #else 4128 static inline void register_as_ext3(void) { } 4129 static inline void unregister_as_ext3(void) { } 4130 #endif 4131 4132 static struct file_system_type ext4_fs_type = { 4133 .owner = THIS_MODULE, 4134 .name = "ext4", 4135 .get_sb = ext4_get_sb, 4136 .kill_sb = kill_block_super, 4137 .fs_flags = FS_REQUIRES_DEV, 4138 }; 4139 4140 static int __init init_ext4_fs(void) 4141 { 4142 int err; 4143 4144 err = init_ext4_system_zone(); 4145 if (err) 4146 return err; 4147 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4148 if (!ext4_kset) 4149 goto out4; 4150 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4151 err = init_ext4_mballoc(); 4152 if (err) 4153 goto out3; 4154 4155 err = init_ext4_xattr(); 4156 if (err) 4157 goto out2; 4158 err = init_inodecache(); 4159 if (err) 4160 goto out1; 4161 register_as_ext2(); 4162 register_as_ext3(); 4163 err = register_filesystem(&ext4_fs_type); 4164 if (err) 4165 goto out; 4166 return 0; 4167 out: 4168 unregister_as_ext2(); 4169 unregister_as_ext3(); 4170 destroy_inodecache(); 4171 out1: 4172 exit_ext4_xattr(); 4173 out2: 4174 exit_ext4_mballoc(); 4175 out3: 4176 remove_proc_entry("fs/ext4", NULL); 4177 kset_unregister(ext4_kset); 4178 out4: 4179 exit_ext4_system_zone(); 4180 return err; 4181 } 4182 4183 static void __exit exit_ext4_fs(void) 4184 { 4185 unregister_as_ext2(); 4186 unregister_as_ext3(); 4187 unregister_filesystem(&ext4_fs_type); 4188 destroy_inodecache(); 4189 exit_ext4_xattr(); 4190 exit_ext4_mballoc(); 4191 remove_proc_entry("fs/ext4", NULL); 4192 kset_unregister(ext4_kset); 4193 exit_ext4_system_zone(); 4194 } 4195 4196 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4197 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4198 MODULE_LICENSE("GPL"); 4199 module_init(init_ext4_fs) 4200 module_exit(exit_ext4_fs) 4201